Merge "ICU: Remove static library versions of ICU."
diff --git a/android_icu4j/src/main/java/android/icu/impl/CaseMap.java b/android_icu4j/src/main/java/android/icu/impl/CaseMapImpl.java
similarity index 69%
rename from android_icu4j/src/main/java/android/icu/impl/CaseMap.java
rename to android_icu4j/src/main/java/android/icu/impl/CaseMapImpl.java
index de940e8..97dddc7 100644
--- a/android_icu4j/src/main/java/android/icu/impl/CaseMap.java
+++ b/android_icu4j/src/main/java/android/icu/impl/CaseMapImpl.java
@@ -3,12 +3,17 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package android.icu.impl;
-import android.icu.util.ULocale;
+import java.io.IOException;
+
+import android.icu.lang.UCharacter;
+import android.icu.text.BreakIterator;
+import android.icu.text.Edits;
+import android.icu.util.ICUUncheckedIOException;
/**
* @hide Only a subset of ICU is exposed in Android
*/
-public final class CaseMap {
+public final class CaseMapImpl {
/**
* Implementation of UCaseProps.ContextIterator, iterates over a String.
* See ustrcase.c/utf16_caseContextIterator().
@@ -16,11 +21,11 @@
public static final class StringContextIterator implements UCaseProps.ContextIterator {
/**
* Constructor.
- * @param s String to iterate over.
+ * @param src String to iterate over.
*/
- public StringContextIterator(String s) {
- this.s=s;
- limit=s.length();
+ public StringContextIterator(CharSequence src) {
+ this.s=src;
+ limit=src.length();
cpStart=cpLimit=index=0;
dir=0;
}
@@ -64,7 +69,7 @@
public int nextCaseMapCP() {
cpStart=cpLimit;
if(cpLimit<limit) {
- int c=s.codePointAt(cpLimit);
+ int c=Character.codePointAt(s, cpLimit);
cpLimit+=Character.charCount(c);
return c;
} else {
@@ -88,6 +93,10 @@
return cpLimit;
}
+ public int getCPLength() {
+ return cpLimit-cpStart;
+ }
+
// implement UCaseProps.ContextIterator
// The following code is not used anywhere in this private class
@Override
@@ -112,11 +121,11 @@
int c;
if(dir>0 && index<s.length()) {
- c=s.codePointAt(index);
+ c=Character.codePointAt(s, index);
index+=Character.charCount(c);
return c;
} else if(dir<0 && index>0) {
- c=s.codePointBefore(index);
+ c=Character.codePointBefore(s, index);
index-=Character.charCount(c);
return c;
}
@@ -124,44 +133,242 @@
}
// variables
- protected String s;
+ protected CharSequence s;
protected int index, limit, cpStart, cpLimit;
protected int dir; // 0=initial state >0=forward <0=backward
}
- /** Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. */
- private static final void appendResult(int c, StringBuilder result) {
- // Decode the result.
- if (c < 0) {
- // (not) original code point
- result.appendCodePoint(~c);
- } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
- // The mapping has already been appended to result.
+ /**
+ * Omit unchanged text when case-mapping with Edits.
+ */
+ public static final int OMIT_UNCHANGED_TEXT = 0x4000;
+
+ private static int appendCodePoint(Appendable a, int c) throws IOException {
+ if (c <= Character.MAX_VALUE) {
+ a.append((char)c);
+ return 1;
} else {
- // Append the single-code point mapping.
- result.appendCodePoint(c);
+ a.append((char)(0xd7c0 + (c >> 10)));
+ a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
+ return 2;
}
}
- // TODO: Move the other string case mapping functions from UCharacter to here, too.
-
- public static String toUpper(ULocale locale, String str) {
- if (locale == null) {
- locale = ULocale.getDefault();
+ /**
+ * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
+ * @throws IOException
+ */
+ private static void appendResult(int result, Appendable dest,
+ int cpLength, int options, Edits edits) throws IOException {
+ // Decode the result.
+ if (result < 0) {
+ // (not) original code point
+ if (edits != null) {
+ edits.addUnchanged(cpLength);
+ if ((options & OMIT_UNCHANGED_TEXT) != 0) {
+ return;
+ }
+ }
+ appendCodePoint(dest, ~result);
+ } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
+ // The mapping has already been appended to result.
+ if (edits != null) {
+ edits.addReplace(cpLength, result);
+ }
+ } else {
+ // Append the single-code point mapping.
+ int length = appendCodePoint(dest, result);
+ if (edits != null) {
+ edits.addReplace(cpLength, length);
+ }
}
- int[] locCache = new int[] { UCaseProps.getCaseLocale(locale, null) };
- if (locCache[0] == UCaseProps.LOC_GREEK) {
- return GreekUpper.toUpper(str, locCache);
- }
+ }
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
+ private static final void appendUnchanged(CharSequence src, int start, int length,
+ Appendable dest, int options, Edits edits) throws IOException {
+ if (length > 0) {
+ if (edits != null) {
+ edits.addUnchanged(length);
+ if ((options & OMIT_UNCHANGED_TEXT) != 0) {
+ return;
+ }
+ }
+ dest.append(src, start, start + length);
+ }
+ }
+
+ private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
+ Appendable dest, Edits edits) throws IOException {
int c;
- while((c=iter.nextCaseMapCP())>=0) {
- c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
- appendResult(c, result);
+ while ((c = iter.nextCaseMapCP()) >= 0) {
+ c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
}
- return result.toString();
+ }
+
+ public static <A extends Appendable> A toLower(int caseLocale, int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ StringContextIterator iter = new StringContextIterator(src);
+ internalToLower(caseLocale, options, iter, dest, edits);
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ public static <A extends Appendable> A toUpper(int caseLocale, int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ if (caseLocale == UCaseProps.LOC_GREEK) {
+ return GreekUpper.toUpper(options, src, dest, edits);
+ }
+ StringContextIterator iter = new StringContextIterator(src);
+ int c;
+ while ((c = iter.nextCaseMapCP()) >= 0) {
+ c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ public static <A extends Appendable> A toTitle(
+ int caseLocale, int options, BreakIterator titleIter,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+
+ /* set up local variables */
+ StringContextIterator iter = new StringContextIterator(src);
+ int srcLength = src.length();
+ int prev=0;
+ boolean isFirstIndex=true;
+
+ /* titlecasing loop */
+ while(prev<srcLength) {
+ /* find next index where to titlecase */
+ int index;
+ if(isFirstIndex) {
+ isFirstIndex=false;
+ index=titleIter.first();
+ } else {
+ index=titleIter.next();
+ }
+ if(index==BreakIterator.DONE || index>srcLength) {
+ index=srcLength;
+ }
+
+ /*
+ * Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * In this implementation, segment [prev..index[ into 3 parts:
+ * a) uncased characters (copy as-is) [prev..titleStart[
+ * b) first case letter (titlecase) [titleStart..titleLimit[
+ * c) subsequent characters (lowercase) [titleLimit..index[
+ */
+ if(prev<index) {
+ // find and copy uncased characters [prev..titleStart[
+ int titleStart=prev;
+ iter.setLimit(index);
+ int c=iter.nextCaseMapCP();
+ if((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0
+ && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
+ // Adjust the titlecasing index (titleStart) to the next cased character.
+ while((c=iter.nextCaseMapCP())>=0
+ && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
+ // If c<0 then we have only uncased characters in [prev..index[
+ // and stopped with titleStart==titleLimit==index.
+ titleStart=iter.getCPStart();
+ appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
+ }
+
+ if(titleStart<index) {
+ int titleLimit=iter.getCPLimit();
+ // titlecase c which is from [titleStart..titleLimit[
+ c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+
+ // Special case Dutch IJ titlecasing
+ if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
+ char c1 = src.charAt(titleStart);
+ if ((c1 == 'i' || c1 == 'I')) {
+ char c2 = src.charAt(titleStart+1);
+ if (c2 == 'j') {
+ dest.append('J');
+ if (edits != null) {
+ edits.addReplace(1, 1);
+ }
+ c = iter.nextCaseMapCP();
+ titleLimit++;
+ assert c == c2;
+ assert titleLimit == iter.getCPLimit();
+ } else if (c2 == 'J') {
+ // Keep the capital J from getting lowercased.
+ appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
+ c = iter.nextCaseMapCP();
+ titleLimit++;
+ assert c == c2;
+ assert titleLimit == iter.getCPLimit();
+ }
+ }
+ }
+
+ // lowercase [titleLimit..index[
+ if(titleLimit<index) {
+ if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
+ // Normal operation: Lowercase the rest of the word.
+ internalToLower(caseLocale, options, iter, dest, edits);
+ } else {
+ // Optionally just copy the rest of the word unchanged.
+ appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
+ iter.moveToLimit();
+ }
+ }
+ }
+ }
+
+ prev=index;
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ public static <A extends Appendable> A fold(int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ int length = src.length();
+ for (int i = 0; i < length;) {
+ int c = Character.codePointAt(src, i);
+ int cpLength = Character.charCount(c);
+ i += cpLength;
+ c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
+ appendResult(c, dest, cpLength, options, edits);
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
private static final class GreekUpper {
@@ -665,12 +872,13 @@
* TODO: Try to re-consolidate one way or another with the non-Greek function.
*
* <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
+ * @throws IOException
*/
- private static String toUpper(CharSequence s, int[] locCache) {
- StringBuilder result = new StringBuilder(s.length());
+ private static <A extends Appendable> A toUpper(int options,
+ CharSequence src, A dest, Edits edits) throws IOException {
int state = 0;
- for (int i = 0; i < s.length();) {
- int c = Character.codePointAt(s, i);
+ for (int i = 0; i < src.length();) {
+ int c = Character.codePointAt(src, i);
int nextIndex = i + Character.charCount(c);
int nextState = 0;
int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
@@ -699,8 +907,8 @@
numYpogegrammeni = 1;
}
// Skip combining diacritics after this Greek letter.
- while (nextIndex < s.length()) {
- int diacriticData = getDiacriticData(s.charAt(nextIndex));
+ while (nextIndex < src.length()) {
+ int diacriticData = getDiacriticData(src.charAt(nextIndex));
if (diacriticData != 0) {
data |= diacriticData;
if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
@@ -720,7 +928,7 @@
(data & HAS_ACCENT) != 0 &&
numYpogegrammeni == 0 &&
(state & AFTER_CASED) == 0 &&
- !isFollowedByCasedLetter(s, nextIndex)) {
+ !isFollowedByCasedLetter(src, nextIndex)) {
// Keep disjunctive "or" with (only) a tonos.
// We use the same "word boundary" conditions as for the Final_Sigma test.
if (i == nextIndex) {
@@ -738,25 +946,59 @@
data &= ~HAS_EITHER_DIALYTIKA;
}
}
- result.appendCodePoint(upper);
- if ((data & HAS_EITHER_DIALYTIKA) != 0) {
- result.append('\u0308'); // restore or add a dialytika
+
+ boolean change;
+ if (edits == null) {
+ change = true; // common, simple usage
+ } else {
+ // Find out first whether we are changing the text.
+ change = src.charAt(i) != upper || numYpogegrammeni > 0;
+ int i2 = i + 1;
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
+ ++i2;
+ }
+ if (addTonos) {
+ change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
+ ++i2;
+ }
+ int oldLength = nextIndex - i;
+ int newLength = (i2 - i) + numYpogegrammeni;
+ change |= oldLength != newLength;
+ if (change) {
+ if (edits != null) {
+ edits.addReplace(oldLength, newLength);
+ }
+ } else {
+ if (edits != null) {
+ edits.addUnchanged(oldLength);
+ }
+ // Write unchanged text?
+ change = (options & OMIT_UNCHANGED_TEXT) == 0;
+ }
}
- if (addTonos) {
- result.append('\u0301');
- }
- while (numYpogegrammeni > 0) {
- result.append('Ι');
- --numYpogegrammeni;
+
+ if (change) {
+ dest.append((char)upper);
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ dest.append('\u0308'); // restore or add a dialytika
+ }
+ if (addTonos) {
+ dest.append('\u0301');
+ }
+ while (numYpogegrammeni > 0) {
+ dest.append('Ι');
+ --numYpogegrammeni;
+ }
}
} else {
- c = UCaseProps.INSTANCE.toFullUpper(c, null, result, null, locCache);
- appendResult(c, result);
+ c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
+ appendResult(c, dest, nextIndex - i, options, edits);
}
i = nextIndex;
state = nextState;
}
- return result.toString();
+ return dest;
}
}
}
diff --git a/android_icu4j/src/main/java/android/icu/impl/UCaseProps.java b/android_icu4j/src/main/java/android/icu/impl/UCaseProps.java
index bdc1ad6..5409429 100644
--- a/android_icu4j/src/main/java/android/icu/impl/UCaseProps.java
+++ b/android_icu4j/src/main/java/android/icu/impl/UCaseProps.java
@@ -25,6 +25,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
+import java.util.Locale;
import android.icu.lang.UCharacter;
import android.icu.lang.UProperty;
@@ -75,7 +76,7 @@
// read exceptions[]
count=indexes[IX_EXC_LENGTH];
if(count>0) {
- exceptions=ICUBinary.getChars(bytes, count, 0);
+ exceptions=ICUBinary.getString(bytes, count, 0);
}
// read unfold[]
@@ -154,7 +155,7 @@
*
* @param excWord (in) initial exceptions word
* @param index (in) desired slot index
- * @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
+ * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
* @return bits 31..0: slot value
* 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
*/
@@ -162,11 +163,11 @@
long value;
if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index);
- value=exceptions[excOffset];
+ value=exceptions.charAt(excOffset);
} else {
excOffset+=2*slotOffset(excWord, index);
- value=exceptions[excOffset++];
- value=(value<<16)|exceptions[excOffset];
+ value=exceptions.charAt(excOffset++);
+ value=(value<<16)|exceptions.charAt(excOffset);
}
return value |((long)excOffset<<32);
}
@@ -176,11 +177,11 @@
int value;
if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index);
- value=exceptions[excOffset];
+ value=exceptions.charAt(excOffset);
} else {
excOffset+=2*slotOffset(excWord, index);
- value=exceptions[excOffset++];
- value=(value<<16)|exceptions[excOffset];
+ value=exceptions.charAt(excOffset++);
+ value=(value<<16)|exceptions.charAt(excOffset);
}
return value;
}
@@ -195,7 +196,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_LOWER)) {
c=getSlotValue(excWord, EXC_LOWER, excOffset);
}
@@ -211,7 +212,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_UPPER)) {
c=getSlotValue(excWord, EXC_UPPER, excOffset);
}
@@ -227,7 +228,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index;
if(hasSlot(excWord, EXC_TITLE)) {
index=EXC_TITLE;
@@ -295,7 +296,7 @@
*/
int excOffset0, excOffset=getExceptionsOffset(props);
int closureOffset;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index, closureLength, fullLength, length;
excOffset0=excOffset;
@@ -338,7 +339,7 @@
/* add the full case folding string */
length=fullLength&0xf;
if(length!=0) {
- set.add(new String(exceptions, excOffset, length));
+ set.add(exceptions.substring(excOffset, excOffset+length));
excOffset+=length;
}
@@ -352,8 +353,9 @@
}
/* add each code point in the closure string */
- for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
- c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
+ int limit=closureOffset+closureLength;
+ for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
+ c=exceptions.codePointAt(index);
set.add(c);
}
}
@@ -472,7 +474,7 @@
if(!propsHasException(props)) {
return props&DOT_MASK;
} else {
- return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
+ return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
}
}
@@ -609,38 +611,49 @@
*/
public static final int MAX_STRING_LENGTH=0x1f;
- private static final int LOC_UNKNOWN=0;
- private static final int LOC_ROOT=1;
+ //ivate static final int LOC_UNKNOWN=0;
+ public static final int LOC_ROOT=1;
private static final int LOC_TURKISH=2;
private static final int LOC_LITHUANIAN=3;
static final int LOC_GREEK=4;
+ public static final int LOC_DUTCH=5;
- /*
- * Checks and caches the type of locale ID as it is relevant for case mapping.
- * If the locCache is not null, then it must be initialized with locCache[0]=0 .
- */
- static final int getCaseLocale(ULocale locale, int[] locCache) {
- int result;
-
- if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
- return result;
+ public static final int getCaseLocale(Locale locale) {
+ return getCaseLocale(locale.getLanguage());
+ }
+ public static final int getCaseLocale(ULocale locale) {
+ return getCaseLocale(locale.getLanguage());
+ }
+ /** Accepts both 2- and 3-letter language subtags. */
+ private static final int getCaseLocale(String language) {
+ // Check the subtag length to reduce the number of comparisons
+ // for locales without special behavior.
+ // Fastpath for English "en" which is often used for default (=root locale) case mappings,
+ // and for Chinese "zh": Very common but no special case mapping behavior.
+ if(language.length()==2) {
+ if(language.equals("en") || language.charAt(0)>'t') {
+ return LOC_ROOT;
+ } else if(language.equals("tr") || language.equals("az")) {
+ return LOC_TURKISH;
+ } else if(language.equals("el")) {
+ return LOC_GREEK;
+ } else if(language.equals("lt")) {
+ return LOC_LITHUANIAN;
+ } else if(language.equals("nl")) {
+ return LOC_DUTCH;
+ }
+ } else if(language.length()==3) {
+ if(language.equals("tur") || language.equals("aze")) {
+ return LOC_TURKISH;
+ } else if(language.equals("ell")) {
+ return LOC_GREEK;
+ } else if(language.equals("lit")) {
+ return LOC_LITHUANIAN;
+ } else if(language.equals("nld")) {
+ return LOC_DUTCH;
+ }
}
-
- result=LOC_ROOT;
-
- String language=locale.getLanguage();
- if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
- result=LOC_TURKISH;
- } else if(language.equals("el") || language.equals("ell")) {
- result=LOC_GREEK;
- } else if(language.equals("lt") || language.equals("lit")) {
- result=LOC_LITHUANIAN;
- }
-
- if(locCache!=null) {
- locCache[0]=result;
- }
- return result;
+ return LOC_ROOT;
}
/* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */
@@ -801,19 +814,14 @@
* See ContextIterator for details.
* If iter==null then a context-independent result is returned.
* @param out If the mapping result is a string, then it is appended to out.
- * @param locale Locale ID for locale-dependent mappings.
- * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
- * the locale ID for subsequent calls.
- * Can be null.
+ * @param caseLocale Case locale value from ucase_getCaseLocale().
* @return Output code point or string length, see MAX_STRING_LENGTH.
*
* @see ContextIterator
* @see #MAX_STRING_LENGTH
* @hide draft / provisional / internal are hidden on Android
*/
- public final int toFullLower(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
+ public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
int result, props;
result=c;
@@ -824,22 +832,20 @@
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full;
excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */
- int loc=getCaseLocale(locale, locCache);
-
/*
* Test for conditional mappings first
* (otherwise the unconditional default mappings are always taken),
* then test for characters that have unconditional mappings in SpecialCasing.txt,
* then get the UnicodeData.txt mappings.
*/
- if( loc==LOC_LITHUANIAN &&
+ if( caseLocale==LOC_LITHUANIAN &&
/* base characters, find accents above */
(((c==0x49 || c==0x4a || c==0x12e) &&
isFollowedByMoreAbove(iter)) ||
@@ -862,30 +868,34 @@
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
*/
- switch(c) {
- case 0x49: /* LATIN CAPITAL LETTER I */
- out.append(iDot);
- return 2;
- case 0x4a: /* LATIN CAPITAL LETTER J */
- out.append(jDot);
- return 2;
- case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
- out.append(iOgonekDot);
- return 2;
- case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
- out.append(iDotGrave);
- return 3;
- case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
- out.append(iDotAcute);
- return 3;
- case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
- out.append(iDotTilde);
- return 3;
- default:
- return 0; /* will not occur */
+ try {
+ switch(c) {
+ case 0x49: /* LATIN CAPITAL LETTER I */
+ out.append(iDot);
+ return 2;
+ case 0x4a: /* LATIN CAPITAL LETTER J */
+ out.append(jDot);
+ return 2;
+ case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
+ out.append(iOgonekDot);
+ return 2;
+ case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
+ out.append(iDotGrave);
+ return 3;
+ case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
+ out.append(iDotAcute);
+ return 3;
+ case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
+ out.append(iDotTilde);
+ return 3;
+ default:
+ return 0; /* will not occur */
+ }
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
}
/* # Turkish and Azeri */
- } else if(loc==LOC_TURKISH && c==0x130) {
+ } else if(caseLocale==LOC_TURKISH && c==0x130) {
/*
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases.
@@ -894,7 +904,7 @@
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
return 0x69;
- } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
+ } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
/*
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above
@@ -903,7 +913,7 @@
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/
return 0; /* remove the dot (continue without output) */
- } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
+ } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
/*
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
@@ -917,8 +927,12 @@
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
- out.append(iDot);
- return 2;
+ try {
+ out.append(iDot);
+ return 2;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
} else if( c==0x3a3 &&
!isFollowedByCasedLetter(iter, 1) &&
isFollowedByCasedLetter(iter, -1) /* -1=preceded */
@@ -940,11 +954,15 @@
/* start of full case mapping strings */
excOffset=(int)(value>>32)+1;
- /* set the output pointer to the lowercase mapping */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the lowercase mapping
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
@@ -958,8 +976,8 @@
/* internal */
private final int toUpperOrTitle(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache,
+ Appendable out,
+ int loc,
boolean upperNotTitle) {
int result;
int props;
@@ -972,15 +990,13 @@
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full, index;
excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */
- int loc=getCaseLocale(locale, locCache);
-
if(loc==LOC_TURKISH && c==0x69) {
/*
# Turkish and Azeri
@@ -1030,11 +1046,15 @@
}
if(full!=0) {
- /* set the output pointer to the result string */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the result string
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
@@ -1053,15 +1073,15 @@
}
public final int toFullUpper(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
- return toUpperOrTitle(c, iter, out, locale, locCache, true);
+ Appendable out,
+ int caseLocale) {
+ return toUpperOrTitle(c, iter, out, caseLocale, true);
}
public final int toFullTitle(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
- return toUpperOrTitle(c, iter, out, locale, locCache, false);
+ Appendable out,
+ int caseLocale) {
+ return toUpperOrTitle(c, iter, out, caseLocale, false);
}
/* case folding ------------------------------------------------------------- */
@@ -1121,7 +1141,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index;
if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
/* special case folding mappings, hardcoded */
@@ -1172,7 +1192,7 @@
* together in a way that they still fold to common result strings.
*/
- public final int toFullFolding(int c, StringBuilder out, int options) {
+ public final int toFullFolding(int c, Appendable out, int options) {
int result;
int props;
@@ -1184,7 +1204,7 @@
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full, index;
excOffset2=excOffset;
@@ -1198,8 +1218,12 @@
return 0x69;
} else if(c==0x130) {
/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
- out.append(iDot);
- return 2;
+ try {
+ out.append(iDot);
+ return 2;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
} else {
/* Turkic mappings */
@@ -1223,11 +1247,15 @@
full=(full>>4)&0xf;
if(full!=0) {
- /* set the output pointer to the result string */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the result string
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
@@ -1246,7 +1274,6 @@
/* case mapping properties API ---------------------------------------------- */
- private static final int[] rootLocCache = { LOC_ROOT };
/*
* We need a StringBuilder for multi-code point output from the
* full case mapping functions. However, we do not actually use that output,
@@ -1286,20 +1313,20 @@
*/
case UProperty.CHANGES_WHEN_LOWERCASED:
dummyStringBuilder.setLength(0);
- return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_UPPERCASED:
dummyStringBuilder.setLength(0);
- return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_TITLECASED:
dummyStringBuilder.setLength(0);
- return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
/* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
case UProperty.CHANGES_WHEN_CASEMAPPED:
dummyStringBuilder.setLength(0);
return
- toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
- toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
- toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+ toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+ toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
default:
return false;
}
@@ -1307,7 +1334,7 @@
// data members -------------------------------------------------------- ***
private int indexes[];
- private char exceptions[];
+ private String exceptions;
private char unfold[];
private Trie2_16 trie;
diff --git a/android_icu4j/src/main/java/android/icu/lang/UCharacter.java b/android_icu4j/src/main/java/android/icu/lang/UCharacter.java
index 2c144cc..fb2a50e 100644
--- a/android_icu4j/src/main/java/android/icu/lang/UCharacter.java
+++ b/android_icu4j/src/main/java/android/icu/lang/UCharacter.java
@@ -16,8 +16,7 @@
import java.util.Locale;
import java.util.Map;
-import android.icu.impl.CaseMap;
-import android.icu.impl.CaseMap.StringContextIterator;
+import android.icu.impl.CaseMapImpl;
import android.icu.impl.IllegalIcuArgumentException;
import android.icu.impl.Trie2;
import android.icu.impl.UBiDiProps;
@@ -30,6 +29,7 @@
import android.icu.lang.UCharacterEnums.ECharacterCategory;
import android.icu.lang.UCharacterEnums.ECharacterDirection;
import android.icu.text.BreakIterator;
+import android.icu.text.Edits;
import android.icu.text.Normalizer2;
import android.icu.util.RangeValueIterator;
import android.icu.util.ULocale;
@@ -4276,7 +4276,7 @@
*/
public static String toUpperCase(String str)
{
- return toUpperCase(ULocale.getDefault(), str);
+ return toUpperCase(getDefaultCaseLocale(), str);
}
/**
@@ -4287,7 +4287,7 @@
*/
public static String toLowerCase(String str)
{
- return toLowerCase(ULocale.getDefault(), str);
+ return toLowerCase(getDefaultCaseLocale(), str);
}
/**
@@ -4309,7 +4309,94 @@
*/
public static String toTitleCase(String str, BreakIterator breakiter)
{
- return toTitleCase(ULocale.getDefault(), str, breakiter);
+ return toTitleCase(Locale.getDefault(), str, breakiter, 0);
+ }
+
+ private static int getDefaultCaseLocale() {
+ return UCaseProps.getCaseLocale(Locale.getDefault());
+ }
+
+ private static int getCaseLocale(Locale locale) {
+ if (locale == null) {
+ locale = Locale.getDefault();
+ }
+ return UCaseProps.getCaseLocale(locale);
+ }
+
+ private static int getCaseLocale(ULocale locale) {
+ if (locale == null) {
+ locale = ULocale.getDefault();
+ }
+ return UCaseProps.getCaseLocale(locale);
+ }
+
+ private static String toLowerCase(int caseLocale, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toLower(
+ caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toLower(caseLocale, 0, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String toUpperCase(int caseLocale, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toUpper(
+ caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toUpper(caseLocale, 0, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toTitle(
+ caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
+ new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
+ if (!edits.hasChanges()) {
+ return str;
+ }
+ StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
+ for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
+ if (ei.hasChange()) {
+ int i = ei.replacementIndex();
+ result.append(replacementChars, i, i + ei.newLength());
+ } else {
+ int i = ei.sourceIndex();
+ result.append(str, i, i + ei.oldLength());
+ }
+ }
+ return result.toString();
}
/**
@@ -4321,7 +4408,7 @@
*/
public static String toUpperCase(Locale locale, String str)
{
- return toUpperCase(ULocale.forLocale(locale), str);
+ return toUpperCase(getCaseLocale(locale), str);
}
/**
@@ -4332,7 +4419,7 @@
* @return uppercase version of the argument string
*/
public static String toUpperCase(ULocale locale, String str) {
- return CaseMap.toUpper(locale, str);
+ return toUpperCase(getCaseLocale(locale), str);
}
/**
@@ -4344,7 +4431,7 @@
*/
public static String toLowerCase(Locale locale, String str)
{
- return toLowerCase(ULocale.forLocale(locale), str);
+ return toLowerCase(getCaseLocale(locale), str);
}
/**
@@ -4355,31 +4442,7 @@
* @return lowercase version of the argument string
*/
public static String toLowerCase(ULocale locale, String str) {
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int[] locCache = new int[1];
- int c;
-
- if (locale == null) {
- locale = ULocale.getDefault();
- }
- locCache[0]=0;
-
- while((c=iter.nextCaseMapCP())>=0) {
- c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
-
- /* decode the result */
- if(c<0) {
- /* (not) original code point */
- c=~c;
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- continue;
- /* } else { append single-code point mapping */
- }
- result.appendCodePoint(c);
- }
- return result.toString();
+ return toLowerCase(getCaseLocale(locale), str);
}
/**
@@ -4403,7 +4466,7 @@
public static String toTitleCase(Locale locale, String str,
BreakIterator breakiter)
{
- return toTitleCase(ULocale.forLocale(locale), str, breakiter);
+ return toTitleCase(locale, str, breakiter, 0);
}
/**
@@ -4451,126 +4514,15 @@
* @see #TITLECASE_NO_BREAK_ADJUSTMENT
*/
public static String toTitleCase(ULocale locale, String str,
- BreakIterator titleIter,
- int options) {
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int[] locCache = new int[1];
- int c, nc, srcLength = str.length();
-
- if (locale == null) {
- locale = ULocale.getDefault();
- }
- locCache[0]=0;
-
+ BreakIterator titleIter, int options) {
if(titleIter == null) {
+ if (locale == null) {
+ locale = ULocale.getDefault();
+ }
titleIter = BreakIterator.getWordInstance(locale);
}
titleIter.setText(str);
-
- int prev, titleStart, index;
- boolean isFirstIndex;
- boolean isDutch = locale.getLanguage().equals("nl");
- boolean FirstIJ = true;
-
- /* set up local variables */
- prev=0;
- isFirstIndex=true;
-
- /* titlecasing loop */
- while(prev<srcLength) {
- /* find next index where to titlecase */
- if(isFirstIndex) {
- isFirstIndex=false;
- index=titleIter.first();
- } else {
- index=titleIter.next();
- }
- if(index==BreakIterator.DONE || index>srcLength) {
- index=srcLength;
- }
-
- /*
- * Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * In this implementation, segment [prev..index[ into 3 parts:
- * a) uncased characters (copy as-is) [prev..titleStart[
- * b) first case letter (titlecase) [titleStart..titleLimit[
- * c) subsequent characters (lowercase) [titleLimit..index[
- */
- if(prev<index) {
- /* find and copy uncased characters [prev..titleStart[ */
- iter.setLimit(index);
- c=iter.nextCaseMapCP();
- if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
- && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
- while((c=iter.nextCaseMapCP())>=0
- && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
- titleStart=iter.getCPStart();
- if(prev<titleStart) {
- result.append(str, prev, titleStart);
- }
- } else {
- titleStart=prev;
- }
-
- if(titleStart<index) {
- FirstIJ = true;
- /* titlecase c which is from titleStart */
- c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
-
- /* decode the result and lowercase up to index */
- for(;;) {
- if(c<0) {
- /* (not) original code point */
- c=~c;
- result.appendCodePoint(c);
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- } else {
- /* append single-code point mapping */
- result.appendCodePoint(c);
- }
-
- if((options&TITLECASE_NO_LOWERCASE)!=0) {
- /* Optionally just copy the rest of the word unchanged. */
-
- int titleLimit=iter.getCPLimit();
- if(titleLimit<index) {
- /* Special Case - Dutch IJ Titlecasing */
- if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
- result.append('J').append(str, titleLimit + 1, index);
- } else {
- result.append(str, titleLimit, index);
- }
- }
- iter.moveToLimit();
- break;
- } else if((nc=iter.nextCaseMapCP())>=0) {
- if (isDutch && (nc == 0x004A || nc == 0x006A)
- && (c == 0x0049) && (FirstIJ == true)) {
- c = 0x004A; /* J */
- FirstIJ = false;
- } else {
- /* Normal operation: Lowercase the rest of the word. */
- c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
- locCache);
- }
- } else {
- break;
- }
- }
- }
- }
-
- prev=index;
- }
- return result.toString();
+ return toTitleCase(getCaseLocale(locale), options, titleIter, str);
}
@@ -4673,7 +4625,11 @@
public static String toTitleCase(Locale locale, String str,
BreakIterator titleIter,
int options) {
- return toTitleCase(ULocale.forLocale(locale), str, titleIter, options);
+ if(titleIter == null) {
+ titleIter = BreakIterator.getWordInstance(locale);
+ }
+ titleIter.setText(str);
+ return toTitleCase(getCaseLocale(locale), options, titleIter, str);
}
/**
@@ -4783,27 +4739,19 @@
* @see #foldCase(int, boolean)
*/
public static final String foldCase(String str, int options) {
- StringBuilder result = new StringBuilder(str.length());
- int c, i, length;
-
- length = str.length();
- for(i=0; i<length;) {
- c=str.codePointAt(i);
- i+=Character.charCount(c);
- c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
-
- /* decode the result */
- if(c<0) {
- /* (not) original code point */
- c=~c;
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- continue;
- /* } else { append single-code point mapping */
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
}
- result.appendCodePoint(c);
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.fold(
+ options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
}
- return result.toString();
}
/**
diff --git a/android_icu4j/src/main/java/android/icu/text/CaseMap.java b/android_icu4j/src/main/java/android/icu/text/CaseMap.java
new file mode 100644
index 0000000..c6cc04a
--- /dev/null
+++ b/android_icu4j/src/main/java/android/icu/text/CaseMap.java
@@ -0,0 +1,320 @@
+/* GENERATED SOURCE. DO NOT MODIFY. */
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package android.icu.text;
+
+import java.util.Locale;
+
+import android.icu.impl.CaseMapImpl;
+import android.icu.impl.UCaseProps;
+import android.icu.lang.UCharacter;
+import android.icu.util.ULocale;
+
+/**
+ * Low-level case mapping options and methods. Immutable.
+ * "Setters" return instances with the union of the current and new options set.
+ *
+ * This class is not intended for public subclassing.
+ *
+ * @hide Only a subset of ICU is exposed in Android
+ * @hide draft / provisional / internal are hidden on Android
+ */
+public abstract class CaseMap {
+ /**
+ * @deprecated This API is ICU internal only.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ @Deprecated
+ protected int internalOptions;
+
+ private CaseMap(int opt) { internalOptions = opt; }
+
+ private static int getCaseLocale(Locale locale) {
+ if (locale == null) {
+ locale = Locale.getDefault();
+ }
+ return UCaseProps.getCaseLocale(locale);
+ }
+
+ /**
+ * @return Lowercasing object with default options.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static Lower toLower() { return Lower.DEFAULT; }
+ /**
+ * @return Uppercasing object with default options.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static Upper toUpper() { return Upper.DEFAULT; }
+ /**
+ * @return Titlecasing object with default options.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static Title toTitle() { return Title.DEFAULT; }
+ /**
+ * @return Case folding object with default options.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static Fold fold() { return Fold.DEFAULT; }
+
+ /**
+ * Returns an instance that behaves like this one but
+ * omits unchanged text when case-mapping with {@link Edits}.
+ *
+ * @return an options object with this option.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public abstract CaseMap omitUnchangedText();
+
+ /**
+ * Lowercasing options and methods. Immutable.
+ *
+ * @see #toLower()
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static final class Lower extends CaseMap {
+ private static final Lower DEFAULT = new Lower(0);
+ private static final Lower OMIT_UNCHANGED = new Lower(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Lower(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ @Override
+ public Lower omitUnchangedText() {
+ return OMIT_UNCHANGED;
+ }
+
+ /**
+ * Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. Can be null for {@link Locale#getDefault}.
+ * (See {@link ULocale#toLocale}.)
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toLowerCase(Locale, String)
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public <A extends Appendable> A apply(
+ Locale locale, CharSequence src, A dest, Edits edits) {
+ return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits);
+ }
+ }
+
+ /**
+ * Uppercasing options and methods. Immutable.
+ *
+ * @see #toUpper()
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static final class Upper extends CaseMap {
+ private static final Upper DEFAULT = new Upper(0);
+ private static final Upper OMIT_UNCHANGED = new Upper(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Upper(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ @Override
+ public Upper omitUnchangedText() {
+ return OMIT_UNCHANGED;
+ }
+
+ /**
+ * Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. Can be null for {@link Locale#getDefault}.
+ * (See {@link ULocale#toLocale}.)
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toUpperCase(Locale, String)
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public <A extends Appendable> A apply(
+ Locale locale, CharSequence src, A dest, Edits edits) {
+ return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits);
+ }
+ }
+
+ /**
+ * Titlecasing options and methods. Immutable.
+ *
+ * @see #toTitle()
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static final class Title extends CaseMap {
+ private static final Title DEFAULT = new Title(0);
+ private static final Title OMIT_UNCHANGED = new Title(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Title(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ @Override
+ public Title omitUnchangedText() {
+ if (internalOptions == 0 || internalOptions == CaseMapImpl.OMIT_UNCHANGED_TEXT) {
+ return OMIT_UNCHANGED;
+ }
+ return new Title(internalOptions | CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ }
+
+ /**
+ * Returns an instance that behaves like this one but
+ * does not lowercase non-initial parts of words when titlecasing.
+ *
+ * <p>By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @return an options object with this option.
+ * @see UCharacter#TITLECASE_NO_LOWERCASE
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Title noLowercase() {
+ return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE);
+ }
+
+ // TODO: update references to the Unicode Standard for recent version
+ /**
+ * Returns an instance that behaves like this one but
+ * does not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecases exactly the characters at breaks from the iterator.
+ *
+ * <p>By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * <p>This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @return an options object with this option.
+ * @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Title noBreakAdjustment() {
+ return new Title(internalOptions | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
+ }
+
+ /**
+ * Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * <p>Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. Can be null for {@link Locale#getDefault}.
+ * (See {@link ULocale#toLocale}.)
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setText())
+ * and used one or more times for iteration (first() and next()).
+ * If null, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public <A extends Appendable> A apply(
+ Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
+ if (iter == null) {
+ iter = BreakIterator.getWordInstance(locale);
+ }
+ iter.setText(src.toString());
+ return CaseMapImpl.toTitle(
+ getCaseLocale(locale), internalOptions, iter, src, dest, edits);
+ }
+ }
+
+ /**
+ * Case folding options and methods. Immutable.
+ *
+ * @see #fold()
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static final class Fold extends CaseMap {
+ private static final Fold DEFAULT = new Fold(0);
+ private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I);
+ private static final Fold OMIT_UNCHANGED = new Fold(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private static final Fold TURKIC_OMIT_UNCHANGED = new Fold(
+ UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Fold(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ @Override
+ public Fold omitUnchangedText() {
+ return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0 ?
+ OMIT_UNCHANGED : TURKIC_OMIT_UNCHANGED;
+ }
+
+ /**
+ * Returns an instance that behaves like this one but
+ * handles dotted I and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * <p>Uses the Unicode CaseFolding.txt mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @return an options object with this option.
+ * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Fold turkic() {
+ return (internalOptions & CaseMapImpl.OMIT_UNCHANGED_TEXT) == 0 ?
+ TURKIC : TURKIC_OMIT_UNCHANGED;
+ }
+
+ /**
+ * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
+ *
+ * <p>Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * <p>The result may be longer or shorter than the original.
+ *
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#foldCase(String, int)
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public <A extends Appendable> A apply(CharSequence src, A dest, Edits edits) {
+ return CaseMapImpl.fold(internalOptions, src, dest, edits);
+ }
+ }
+}
diff --git a/android_icu4j/src/main/java/android/icu/text/Edits.java b/android_icu4j/src/main/java/android/icu/text/Edits.java
new file mode 100644
index 0000000..f254a91
--- /dev/null
+++ b/android_icu4j/src/main/java/android/icu/text/Edits.java
@@ -0,0 +1,476 @@
+/* GENERATED SOURCE. DO NOT MODIFY. */
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package android.icu.text;
+
+import java.nio.BufferOverflowException;
+import java.util.Arrays;
+
+/**
+ * Records lengths of string edits but not replacement text.
+ * Supports replacements, insertions, deletions in linear progression.
+ * Does not support moving/reordering of text.
+ *
+ * @hide Only a subset of ICU is exposed in Android
+ * @hide draft / provisional / internal are hidden on Android
+ */
+public final class Edits {
+ // 0000uuuuuuuuuuuu records u+1 unchanged text units.
+ private static final int MAX_UNCHANGED_LENGTH = 0x1000;
+ private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
+
+ // 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
+ // No length change.
+ private static final int MAX_SHORT_WIDTH = 6;
+ private static final int MAX_SHORT_CHANGE_LENGTH = 0xfff;
+ private static final int MAX_SHORT_CHANGE = 0x6fff;
+
+ // 0111mmmmmmnnnnnn records a replacement of m text units with n.
+ // m or n = 61: actual length follows in the next edits array unit.
+ // m or n = 62..63: actual length follows in the next two edits array units.
+ // Bit 30 of the actual length is in the head unit.
+ // Trailing units have bit 15 set.
+ private static final int LENGTH_IN_1TRAIL = 61;
+ private static final int LENGTH_IN_2TRAIL = 62;
+
+ private static final int STACK_CAPACITY = 100;
+ private char[] array;
+ private int length;
+ private int delta;
+
+ /**
+ * Constructs an empty object.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Edits() {
+ array = new char[STACK_CAPACITY];
+ }
+
+ /**
+ * Resets the data but may not release memory.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public void reset() {
+ length = delta = 0;
+ }
+
+ private void setLastUnit(int last) {
+ array[length - 1] = (char)last;
+ }
+ private int lastUnit() {
+ return length > 0 ? array[length - 1] : 0xffff;
+ }
+
+ /**
+ * Adds a record for an unchanged segment of text.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public void addUnchanged(int unchangedLength) {
+ if(unchangedLength < 0) {
+ throw new IllegalArgumentException(
+ "addUnchanged(" + unchangedLength + "): length must not be negative");
+ }
+ // Merge into previous unchanged-text record, if any.
+ int last = lastUnit();
+ if(last < MAX_UNCHANGED) {
+ int remaining = MAX_UNCHANGED - last;
+ if (remaining >= unchangedLength) {
+ setLastUnit(last + unchangedLength);
+ return;
+ }
+ setLastUnit(MAX_UNCHANGED);
+ unchangedLength -= remaining;
+ }
+ // Split large lengths into multiple units.
+ while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
+ append(MAX_UNCHANGED);
+ unchangedLength -= MAX_UNCHANGED_LENGTH;
+ }
+ // Write a small (remaining) length.
+ if(unchangedLength > 0) {
+ append(unchangedLength - 1);
+ }
+ }
+
+ /**
+ * Adds a record for a text replacement/insertion/deletion.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public void addReplace(int oldLength, int newLength) {
+ if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
+ // Replacement of short oldLength text units by same-length new text.
+ // Merge into previous short-replacement record, if any.
+ int last = lastUnit();
+ if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
+ (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
+ setLastUnit(last + 1);
+ return;
+ }
+ append(oldLength << 12);
+ return;
+ }
+
+ if(oldLength < 0 || newLength < 0) {
+ throw new IllegalArgumentException(
+ "addReplace(" + oldLength + ", " + newLength +
+ "): both lengths must be non-negative");
+ }
+ if (oldLength == 0 && newLength == 0) {
+ return;
+ }
+ int newDelta = newLength - oldLength;
+ if (newDelta != 0) {
+ if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
+ (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
+ // Integer overflow or underflow.
+ throw new IndexOutOfBoundsException();
+ }
+ delta += newDelta;
+ }
+
+ int head = 0x7000;
+ if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
+ head |= oldLength << 6;
+ head |= newLength;
+ append(head);
+ } else if ((array.length - length) >= 5 || growArray()) {
+ int limit = length + 1;
+ if(oldLength < LENGTH_IN_1TRAIL) {
+ head |= oldLength << 6;
+ } else if(oldLength <= 0x7fff) {
+ head |= LENGTH_IN_1TRAIL << 6;
+ array[limit++] = (char)(0x8000 | oldLength);
+ } else {
+ head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
+ array[limit++] = (char)(0x8000 | (oldLength >> 15));
+ array[limit++] = (char)(0x8000 | oldLength);
+ }
+ if(newLength < LENGTH_IN_1TRAIL) {
+ head |= newLength;
+ } else if(newLength <= 0x7fff) {
+ head |= LENGTH_IN_1TRAIL;
+ array[limit++] = (char)(0x8000 | newLength);
+ } else {
+ head |= LENGTH_IN_2TRAIL + (newLength >> 30);
+ array[limit++] = (char)(0x8000 | (newLength >> 15));
+ array[limit++] = (char)(0x8000 | newLength);
+ }
+ array[length] = (char)head;
+ length = limit;
+ }
+ }
+
+ private void append(int r) {
+ if(length < array.length || growArray()) {
+ array[length++] = (char)r;
+ }
+ }
+
+ private boolean growArray() {
+ int newCapacity;
+ if (array.length == STACK_CAPACITY) {
+ newCapacity = 2000;
+ } else if (array.length == Integer.MAX_VALUE) {
+ throw new BufferOverflowException();
+ } else if (array.length >= (Integer.MAX_VALUE / 2)) {
+ newCapacity = Integer.MAX_VALUE;
+ } else {
+ newCapacity = 2 * array.length;
+ }
+ // Grow by at least 5 units so that a maximal change record will fit.
+ if ((newCapacity - array.length) < 5) {
+ throw new BufferOverflowException();
+ }
+ array = Arrays.copyOf(array, newCapacity);
+ return true;
+ }
+
+ /**
+ * How much longer is the new text compared with the old text?
+ * @return new length minus old length
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public int lengthDelta() { return delta; }
+ /**
+ * @return true if there are any change edits
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public boolean hasChanges() {
+ if (delta != 0) {
+ return true;
+ }
+ for (int i = 0; i < length; ++i) {
+ if (array[i] > MAX_UNCHANGED) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Access to the list of edits.
+ * @see #getCoarseIterator
+ * @see #getFineIterator
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public static final class Iterator {
+ private final char[] array;
+ private int index;
+ private final int length;
+ private int remaining;
+ private final boolean onlyChanges_, coarse;
+
+ private boolean changed;
+ private int oldLength_, newLength_;
+ private int srcIndex, replIndex, destIndex;
+
+ private Iterator(char[] a, int len, boolean oc, boolean crs) {
+ array = a;
+ length = len;
+ onlyChanges_ = oc;
+ coarse = crs;
+ }
+
+ private int readLength(int head) {
+ if (head < LENGTH_IN_1TRAIL) {
+ return head;
+ } else if (head < LENGTH_IN_2TRAIL) {
+ assert(index < length);
+ assert(array[index] >= 0x8000);
+ return array[index++] & 0x7fff;
+ } else {
+ assert((index + 2) <= length);
+ assert(array[index] >= 0x8000);
+ assert(array[index + 1] >= 0x8000);
+ int len = ((head & 1) << 30) |
+ ((array[index] & 0x7fff) << 15) |
+ (array[index + 1] & 0x7fff);
+ index += 2;
+ return len;
+ }
+ }
+
+ private void updateIndexes() {
+ srcIndex += oldLength_;
+ if (changed) {
+ replIndex += newLength_;
+ }
+ destIndex += newLength_;
+ }
+
+ private boolean noNext() {
+ // No change beyond the string.
+ changed = false;
+ oldLength_ = newLength_ = 0;
+ return false;
+ }
+
+ /**
+ * Advances to the next edit.
+ * @return true if there is another edit
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public boolean next() {
+ return next(onlyChanges_);
+ }
+
+ private boolean next(boolean onlyChanges) {
+ // We have an errorCode in case we need to start guarding against integer overflows.
+ // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+ updateIndexes();
+ if (remaining > 0) {
+ // Fine-grained iterator: Continue a sequence of equal-length changes.
+ --remaining;
+ return true;
+ }
+ if (index >= length) {
+ return noNext();
+ }
+ int u = array[index++];
+ if (u <= MAX_UNCHANGED) {
+ // Combine adjacent unchanged ranges.
+ changed = false;
+ oldLength_ = u + 1;
+ while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
+ ++index;
+ oldLength_ += u + 1;
+ }
+ newLength_ = oldLength_;
+ if (onlyChanges) {
+ updateIndexes();
+ if (index >= length) {
+ return noNext();
+ }
+ // already fetched u > MAX_UNCHANGED at index
+ ++index;
+ } else {
+ return true;
+ }
+ }
+ changed = true;
+ if (u <= MAX_SHORT_CHANGE) {
+ if (coarse) {
+ int w = u >> 12;
+ int len = (u & 0xfff) + 1;
+ oldLength_ = newLength_ = len * w;
+ } else {
+ // Split a sequence of equal-length changes that was compressed into one unit.
+ oldLength_ = newLength_ = u >> 12;
+ remaining = u & 0xfff;
+ return true;
+ }
+ } else {
+ assert(u <= 0x7fff);
+ oldLength_ = readLength((u >> 6) & 0x3f);
+ newLength_ = readLength(u & 0x3f);
+ if (!coarse) {
+ return true;
+ }
+ }
+ // Combine adjacent changes.
+ while (index < length && (u = array[index]) > MAX_UNCHANGED) {
+ ++index;
+ if (u <= MAX_SHORT_CHANGE) {
+ int w = u >> 12;
+ int len = (u & 0xfff) + 1;
+ len = len * w;
+ oldLength_ += len;
+ newLength_ += len;
+ } else {
+ assert(u <= 0x7fff);
+ int oldLen = readLength((u >> 6) & 0x3f);
+ int newLen = readLength(u & 0x3f);
+ oldLength_ += oldLen;
+ newLength_ += newLen;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Finds the edit that contains the source index.
+ * The source index may be found in a non-change
+ * even if normal iteration would skip non-changes.
+ * Normal iteration can continue from a found edit.
+ *
+ * <p>The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * <p>The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i source index
+ * @return true if the edit for the source index was found
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public boolean findSourceIndex(int i) {
+ if (i < 0) { return false; }
+ if (i < srcIndex) {
+ // Reset the iterator to the start.
+ index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
+ } else if (i < (srcIndex + oldLength_)) {
+ // The index is in the current span.
+ return true;
+ }
+ while (next(false)) {
+ if (i < (srcIndex + oldLength_)) {
+ // The index is in the current span.
+ return true;
+ }
+ if (remaining > 0) {
+ // Is the index in one of the remaining compressed edits?
+ // srcIndex is the start of the current span, before the remaining ones.
+ int len = (remaining + 1) * oldLength_;
+ if (i < (srcIndex + len)) {
+ int n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
+ len = n * oldLength_;
+ srcIndex += len;
+ replIndex += len;
+ destIndex += len;
+ remaining -= n;
+ return true;
+ }
+ // Make next() skip all of these edits at once.
+ oldLength_ = newLength_ = len;
+ remaining = 0;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @return true if this edit replaces oldLength() units with newLength() different ones.
+ * false if oldLength units remain unchanged.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public boolean hasChange() { return changed; }
+ /**
+ * @return the number of units in the original string which are replaced or remain unchanged.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public int oldLength() { return oldLength_; }
+ /**
+ * @return the number of units in the modified string, if hasChange() is true.
+ * Same as oldLength if hasChange() is false.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public int newLength() { return newLength_; }
+
+ /**
+ * @return the current index into the source string
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public int sourceIndex() { return srcIndex; }
+ /**
+ * @return the current index into the replacement-characters-only string,
+ * not counting unchanged spans
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public int replacementIndex() { return replIndex; }
+ /**
+ * @return the current index into the full destination string
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public int destinationIndex() { return destIndex; }
+ };
+
+ /**
+ * Returns an Iterator for coarse-grained changes for simple string updates.
+ * Skips non-changes.
+ * @return an Iterator that merges adjacent changes.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Iterator getCoarseChangesIterator() {
+ return new Iterator(array, length, true, true);
+ }
+
+ /**
+ * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
+ * @return an Iterator that merges adjacent changes.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Iterator getCoarseIterator() {
+ return new Iterator(array, length, false, true);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained changes for modifying styled text.
+ * Skips non-changes.
+ * @return an Iterator that separates adjacent changes.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Iterator getFineChangesIterator() {
+ return new Iterator(array, length, true, false);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
+ * @return an Iterator that separates adjacent changes.
+ * @hide draft / provisional / internal are hidden on Android
+ */
+ public Iterator getFineIterator() {
+ return new Iterator(array, length, false, false);
+ }
+}
diff --git a/android_icu4j/src/main/java/android/icu/text/LowercaseTransliterator.java b/android_icu4j/src/main/java/android/icu/text/LowercaseTransliterator.java
index 61e971f..768f43c 100644
--- a/android_icu4j/src/main/java/android/icu/text/LowercaseTransliterator.java
+++ b/android_icu4j/src/main/java/android/icu/text/LowercaseTransliterator.java
@@ -45,7 +45,7 @@
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
@@ -57,8 +57,7 @@
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
@@ -86,7 +85,7 @@
iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) {
- c=csp.toFullLower(c, iter, result, locale, locCache);
+ c=csp.toFullLower(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit
diff --git a/android_icu4j/src/main/java/android/icu/text/MessageFormat.java b/android_icu4j/src/main/java/android/icu/text/MessageFormat.java
index e384a2a..de9f019 100644
--- a/android_icu4j/src/main/java/android/icu/text/MessageFormat.java
+++ b/android_icu4j/src/main/java/android/icu/text/MessageFormat.java
@@ -140,8 +140,8 @@
* and unquoted {curly braces} must occur in matched pairs.
* </ul>
*
- * <p>Recommendation: Use the real apostrophe (single quote) character \\u2019 for
- * human-readable text, and use the ASCII apostrophe (\\u0027 ' )
+ * <p>Recommendation: Use the real apostrophe (single quote) character \u2019 for
+ * human-readable text, and use the ASCII apostrophe (\u0027 ' )
* only in program syntax, like quoting in MessageFormat.
* See the annotations for U+0027 Apostrophe in The Unicode Standard.
*
diff --git a/android_icu4j/src/main/java/android/icu/text/TitlecaseTransliterator.java b/android_icu4j/src/main/java/android/icu/text/TitlecaseTransliterator.java
index 40c8f4e..ad4c5a2 100644
--- a/android_icu4j/src/main/java/android/icu/text/TitlecaseTransliterator.java
+++ b/android_icu4j/src/main/java/android/icu/text/TitlecaseTransliterator.java
@@ -43,7 +43,7 @@
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
@@ -56,8 +56,7 @@
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
@@ -120,9 +119,9 @@
type=csp.getTypeOrIgnorable(c);
if(type>=0) { // not case-ignorable
if(doTitle) {
- c=csp.toFullTitle(c, iter, result, locale, locCache);
+ c=csp.toFullTitle(c, iter, result, caseLocale);
} else {
- c=csp.toFullLower(c, iter, result, locale, locCache);
+ c=csp.toFullLower(c, iter, result, caseLocale);
}
doTitle = type==0; // doTitle=isUncased
diff --git a/android_icu4j/src/main/java/android/icu/text/UnicodeSet.java b/android_icu4j/src/main/java/android/icu/text/UnicodeSet.java
index 966db5d..df7cd2d 100644
--- a/android_icu4j/src/main/java/android/icu/text/UnicodeSet.java
+++ b/android_icu4j/src/main/java/android/icu/text/UnicodeSet.java
@@ -3334,7 +3334,7 @@
* property alias, or a special ID. Special IDs are matched
* loosely and correspond to the following sets:
*
- * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ANY" = [\\u0000-\\u0010FFFF],
* "ASCII" = [\\u0000-\\u007F].
*
* @param valueAlias a value alias, either short or long. The
@@ -3783,7 +3783,6 @@
int n = getRangeCount();
int result;
StringBuilder full = new StringBuilder();
- int locCache[] = new int[1];
for (int i=0; i<n; ++i) {
int start = getRangeStart(i);
@@ -3798,13 +3797,13 @@
// add case mappings
// (does not add long s for regular s, or Kelvin for k, for example)
for (int cp=start; cp<=end; ++cp) {
- result = csp.toFullLower(cp, null, full, root, locCache);
+ result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
- result = csp.toFullTitle(cp, null, full, root, locCache);
+ result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
- result = csp.toFullUpper(cp, null, full, root, locCache);
+ result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
result = csp.toFullFolding(cp, full, 0);
@@ -3823,6 +3822,7 @@
} else {
BreakIterator bi = BreakIterator.getWordInstance(root);
for (String str : strings) {
+ // TODO: call lower-level functions
foldSet.add(UCharacter.toLowerCase(root, str));
foldSet.add(UCharacter.toTitleCase(root, str, bi));
foldSet.add(UCharacter.toUpperCase(root, str));
diff --git a/android_icu4j/src/main/java/android/icu/text/UppercaseTransliterator.java b/android_icu4j/src/main/java/android/icu/text/UppercaseTransliterator.java
index 76393f0..6fedd9a 100644
--- a/android_icu4j/src/main/java/android/icu/text/UppercaseTransliterator.java
+++ b/android_icu4j/src/main/java/android/icu/text/UppercaseTransliterator.java
@@ -42,7 +42,7 @@
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
@@ -53,8 +53,7 @@
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
@@ -82,7 +81,7 @@
iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) {
- c=csp.toFullUpper(c, iter, result, locale, locCache);
+ c=csp.toFullUpper(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit
diff --git a/android_icu4j/src/main/java/android/icu/util/ChineseCalendar.java b/android_icu4j/src/main/java/android/icu/util/ChineseCalendar.java
index 84428a7..b2ee08d 100644
--- a/android_icu4j/src/main/java/android/icu/util/ChineseCalendar.java
+++ b/android_icu4j/src/main/java/android/icu/util/ChineseCalendar.java
@@ -167,7 +167,7 @@
* @param year The value used to set the calendar's {@link #YEAR YEAR} time field.
* @param month The value used to set the calendar's {@link #MONTH MONTH} time field.
* The value is 0-based. e.g., 0 for January.
- * @param isLeapMonth The value used to set the Chinese calendar's (@link #IS_LEAP_MONTH)
+ * @param isLeapMonth The value used to set the Chinese calendar's {@link #IS_LEAP_MONTH}
* time field.
* @param date The value used to set the calendar's {@link #DATE DATE} time field.
* @see Category#FORMAT
@@ -223,7 +223,7 @@
* @param year The value used to set the calendar's {@link #YEAR YEAR} time field.
* @param month The value used to set the calendar's {@link #MONTH MONTH} time field.
* The value is 0-based. e.g., 0 for January.
- * @param isLeapMonth The value used to set the Chinese calendar's (@link #IS_LEAP_MONTH)
+ * @param isLeapMonth The value used to set the Chinese calendar's {@link #IS_LEAP_MONTH}
* time field.
* @param date The value used to set the calendar's {@link #DATE DATE} time field.
* @see Category#FORMAT
diff --git a/android_icu4j/src/main/tests/android/icu/dev/test/calendar/CalendarRegressionTest.java b/android_icu4j/src/main/tests/android/icu/dev/test/calendar/CalendarRegressionTest.java
index eb0fc08c..5d6ce4a 100644
--- a/android_icu4j/src/main/tests/android/icu/dev/test/calendar/CalendarRegressionTest.java
+++ b/android_icu4j/src/main/tests/android/icu/dev/test/calendar/CalendarRegressionTest.java
@@ -2171,7 +2171,7 @@
{"en@calendar=islamic", "gregorian"},
{"zh_TW", "gregorian", "roc", "chinese"},
{"ar_IR", "gregorian", "persian", "islamic", "islamic-civil", "islamic-tbla"},
- {"th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa"},
+ {"th@rg=SAZZZZ", "gregorian", "islamic-umalqura", "islamic", "islamic-rgsa"},
};
// Android patch end.
diff --git a/android_icu4j/src/main/tests/android/icu/dev/test/calendar/IBMCalendarTest.java b/android_icu4j/src/main/tests/android/icu/dev/test/calendar/IBMCalendarTest.java
index 7894088..5e243c5 100644
--- a/android_icu4j/src/main/tests/android/icu/dev/test/calendar/IBMCalendarTest.java
+++ b/android_icu4j/src/main/tests/android/icu/dev/test/calendar/IBMCalendarTest.java
@@ -1119,8 +1119,8 @@
"gregorian",
"gregorian", // iso8601 is a gregorian sub type
"gregorian",
- "islamic-umalqura",
- "islamic-umalqura",
+ "gregorian",
+ "gregorian",
"japanese",
"gregorian",
"gregorian",
diff --git a/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java b/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java
index ffb7a96..6afb411 100644
--- a/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java
+++ b/android_icu4j/src/main/tests/android/icu/dev/test/format/ListFormatterTest.java
@@ -54,6 +54,40 @@
}
}
+ // Tests resource loading and inheritance when region sublocale
+ // has only partial data for the listPattern element (overriding
+ // some of the parent data). #12994
+ String[] EnglishGBTestData = {
+ "",
+ "A",
+ "A and B",
+ "A, B and C",
+ "A, B, C and D",
+ "A, B, C, D and E"
+ };
+
+ @Test
+ public void TestEnglishGB() {
+ checkData(ListFormatter.getInstance(new ULocale("en_GB")), EnglishGBTestData);
+ }
+
+ // Tests resource loading and inheritance when region sublocale
+ // has only partial data for the listPattern element (overriding
+ // some of the parent data). #12994
+ String[] ChineseTradHKTestData = {
+ "",
+ "A",
+ "A\u53CAB",
+ "A\u3001B\u53CAC",
+ "A\u3001B\u3001C\u53CAD",
+ "A\u3001B\u3001C\u3001D\u53CAE"
+ };
+
+ @Test
+ public void TestChineseTradHK() {
+ checkData(ListFormatter.getInstance(new ULocale("zh_Hant_HK")), ChineseTradHKTestData);
+ }
+
String[] JapaneseTestData = {
"",
"A",
diff --git a/android_icu4j/src/main/tests/android/icu/dev/test/lang/UCharacterCaseTest.java b/android_icu4j/src/main/tests/android/icu/dev/test/lang/UCharacterCaseTest.java
index 3b2bdb7..b072863 100644
--- a/android_icu4j/src/main/tests/android/icu/dev/test/lang/UCharacterCaseTest.java
+++ b/android_icu4j/src/main/tests/android/icu/dev/test/lang/UCharacterCaseTest.java
@@ -25,6 +25,8 @@
import android.icu.lang.UCharacter;
import android.icu.lang.UProperty;
import android.icu.text.BreakIterator;
+import android.icu.text.CaseMap;
+import android.icu.text.Edits;
import android.icu.text.RuleBasedBreakIterator;
import android.icu.text.UTF16;
import android.icu.util.ULocale;
@@ -709,6 +711,191 @@
assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
}
+ private static final class EditChange {
+ private boolean change;
+ private int oldLength, newLength;
+ EditChange(boolean change, int oldLength, int newLength) {
+ this.change = change;
+ this.oldLength = oldLength;
+ this.newLength = newLength;
+ }
+ }
+
+ private static void checkEditsIter(
+ String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
+ EditChange[] expected, boolean withUnchanged) {
+ assertFalse(name, ei2.findSourceIndex(-1));
+
+ int expSrcIndex = 0;
+ int expDestIndex = 0;
+ int expReplIndex = 0;
+ for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
+ EditChange expect = expected[expIndex];
+ String msg = name + ' ' + expIndex;
+ if (withUnchanged || expect.change) {
+ assertTrue(msg, ei1.next());
+ assertEquals(msg, expect.change, ei1.hasChange());
+ assertEquals(msg, expect.oldLength, ei1.oldLength());
+ assertEquals(msg, expect.newLength, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+ }
+
+ if (expect.oldLength > 0) {
+ assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
+ assertEquals(msg, expect.change, ei2.hasChange());
+ assertEquals(msg, expect.oldLength, ei2.oldLength());
+ assertEquals(msg, expect.newLength, ei2.newLength());
+ assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+ assertEquals(msg, expDestIndex, ei2.destinationIndex());
+ assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next();
+ ei2.next();
+ }
+ }
+
+ expSrcIndex += expect.oldLength;
+ expDestIndex += expect.newLength;
+ if (expect.change) {
+ expReplIndex += expect.newLength;
+ }
+ }
+ String msg = name + " end";
+ assertFalse(msg, ei1.next());
+ assertFalse(msg, ei1.hasChange());
+ assertEquals(msg, 0, ei1.oldLength());
+ assertEquals(msg, 0, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+
+ assertFalse(name, ei2.findSourceIndex(expSrcIndex));
+ }
+
+ @Test
+ public void TestEdits() {
+ Edits edits = new Edits();
+ assertFalse("new Edits", edits.hasChanges());
+ assertEquals("new Edits", 0, edits.lengthDelta());
+ edits.addUnchanged(1); // multiple unchanged ranges are combined
+ edits.addUnchanged(10000); // too long, and they are split
+ edits.addReplace(0, 0);
+ edits.addUnchanged(2);
+ assertFalse("unchanged 10003", edits.hasChanges());
+ assertEquals("unchanged 10003", 0, edits.lengthDelta());
+ edits.addReplace(1, 1); // multiple short equal-length edits are compressed
+ edits.addUnchanged(0);
+ edits.addReplace(1, 1);
+ edits.addReplace(1, 1);
+ edits.addReplace(0, 10);
+ edits.addReplace(100, 0);
+ edits.addReplace(3000, 4000); // variable-length encoding
+ edits.addReplace(100000, 100000);
+ assertTrue("some edits", edits.hasChanges());
+ assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
+
+ EditChange[] coarseExpectedChanges = new EditChange[] {
+ new EditChange(false, 10003, 10003),
+ new EditChange(true, 103103, 104013)
+ };
+ checkEditsIter("coarse",
+ edits.getCoarseIterator(), edits.getCoarseIterator(),
+ coarseExpectedChanges, true);
+ checkEditsIter("coarse changes",
+ edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
+ coarseExpectedChanges, false);
+
+ EditChange[] fineExpectedChanges = new EditChange[] {
+ new EditChange(false, 10003, 10003),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 0, 10),
+ new EditChange(true, 100, 0),
+ new EditChange(true, 3000, 4000),
+ new EditChange(true, 100000, 100000)
+ };
+ checkEditsIter("fine",
+ edits.getFineIterator(), edits.getFineIterator(),
+ fineExpectedChanges, true);
+ checkEditsIter("fine changes",
+ edits.getFineChangesIterator(), edits.getFineChangesIterator(),
+ fineExpectedChanges, false);
+
+ edits.reset();
+ assertFalse("reset", edits.hasChanges());
+ assertEquals("reset", 0, edits.lengthDelta());
+ Edits.Iterator ei = edits.getCoarseChangesIterator();
+ assertFalse("reset then iterator", ei.next());
+ }
+
+ @Test
+ public void TestCaseMapWithEdits() {
+ StringBuilder sb = new StringBuilder();
+ Edits edits = new Edits();
+
+ sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits);
+ assertEquals("toLower(Istanbul)", "ıb", sb.toString());
+ EditChange[] lowerExpectedChanges = new EditChange[] {
+ new EditChange(true, 1, 1),
+ new EditChange(false, 4, 4),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 2, 2)
+ };
+ checkEditsIter("toLower(Istanbul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ lowerExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits);
+ assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString());
+ EditChange[] upperExpectedChanges = new EditChange[] {
+ new EditChange(false, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1)
+ };
+ checkEditsIter("toUpper(Πατάτα)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ upperExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
+ new Locale("nl"), null, "IjssEL IglOo", sb, edits);
+ assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
+ EditChange[] titleExpectedChanges = new EditChange[] {
+ new EditChange(false, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 10, 10)
+ };
+ checkEditsIter("toTitle(IjssEL IglOo)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ titleExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits);
+ assertEquals("fold(IßtanBul)", "ıssb", sb.toString());
+ EditChange[] foldExpectedChanges = new EditChange[] {
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 2),
+ new EditChange(false, 3, 3),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 2, 2)
+ };
+ checkEditsIter("fold(IßtanBul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ foldExpectedChanges, true);
+ }
+
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
@@ -946,7 +1133,7 @@
// private methods -------------------------------------------------------
/**
- * Converting the hex numbers represented betwee n ';' to Unicode strings
+ * Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/
diff --git a/icu4c/source/common/listformatter.cpp b/icu4c/source/common/listformatter.cpp
index 9225c22..263d5eb 100644
--- a/icu4c/source/common/listformatter.cpp
+++ b/icu4c/source/common/listformatter.cpp
@@ -25,6 +25,7 @@
#include "charstr.h"
#include "ucln_cmn.h"
#include "uresimp.h"
+#include "resource.h"
U_NAMESPACE_BEGIN
@@ -78,17 +79,6 @@
U_CDECL_END
-static ListFormatInternal* loadListFormatInternal(
- const Locale& locale,
- const char* style,
- UErrorCode& errorCode);
-
-static void getStringByKey(
- const UResourceBundle* rb,
- const char* key,
- UnicodeString& result,
- UErrorCode& errorCode);
-
ListFormatter::ListFormatter(const ListFormatter& other) :
owned(other.owned), data(other.data) {
if (other.owned != NULL) {
@@ -171,30 +161,100 @@
return result;
}
-static ListFormatInternal* loadListFormatInternal(
+static const UChar solidus = 0x2F;
+static const UChar aliasPrefix[] = { 0x6C,0x69,0x73,0x74,0x50,0x61,0x74,0x74,0x65,0x72,0x6E,0x2F }; // "listPattern/"
+enum {
+ kAliasPrefixLen = UPRV_LENGTHOF(aliasPrefix),
+ kStyleLenMax = 24 // longest currently is 14
+};
+
+struct ListFormatter::ListPatternsSink : public ResourceSink {
+ UnicodeString two, start, middle, end;
+ char aliasedStyle[kStyleLenMax+1] = {0};
+
+ ListPatternsSink() {}
+ virtual ~ListPatternsSink();
+
+ void setAliasedStyle(UnicodeString alias) {
+ int32_t startIndex = alias.indexOf(aliasPrefix, kAliasPrefixLen, 0);
+ if (startIndex < 0) {
+ return;
+ }
+ startIndex += kAliasPrefixLen;
+ int32_t endIndex = alias.indexOf(solidus, startIndex);
+ if (endIndex < 0) {
+ endIndex = alias.length();
+ }
+ alias.extract(startIndex, endIndex-startIndex, aliasedStyle, kStyleLenMax+1, US_INV);
+ aliasedStyle[kStyleLenMax] = 0;
+ }
+
+ void handleValueForPattern(ResourceValue &value, UnicodeString &pattern, UErrorCode &errorCode) {
+ if (pattern.isEmpty()) {
+ if (value.getType() == URES_ALIAS) {
+ if (aliasedStyle[0] == 0) {
+ setAliasedStyle(value.getAliasUnicodeString(errorCode));
+ }
+ } else {
+ pattern = value.getUnicodeString(errorCode);
+ }
+ }
+ }
+
+ virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
+ UErrorCode &errorCode) {
+ aliasedStyle[0] = 0;
+ if (value.getType() == URES_ALIAS) {
+ setAliasedStyle(value.getAliasUnicodeString(errorCode));
+ return;
+ }
+ ResourceTable listPatterns = value.getTable(errorCode);
+ for (int i = 0; U_SUCCESS(errorCode) && listPatterns.getKeyAndValue(i, key, value); ++i) {
+ if (uprv_strcmp(key, "2") == 0) {
+ handleValueForPattern(value, two, errorCode);
+ } else if (uprv_strcmp(key, "end") == 0) {
+ handleValueForPattern(value, end, errorCode);
+ } else if (uprv_strcmp(key, "middle") == 0) {
+ handleValueForPattern(value, middle, errorCode);
+ } else if (uprv_strcmp(key, "start") == 0) {
+ handleValueForPattern(value, start, errorCode);
+ }
+ }
+ }
+};
+
+// Virtual destructors must be defined out of line.
+ListFormatter::ListPatternsSink::~ListPatternsSink() {}
+
+ListFormatInternal* ListFormatter::loadListFormatInternal(
const Locale& locale, const char * style, UErrorCode& errorCode) {
UResourceBundle* rb = ures_open(NULL, locale.getName(), &errorCode);
- if (U_FAILURE(errorCode)) {
- ures_close(rb);
- return NULL;
- }
rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode);
- rb = ures_getByKeyWithFallback(rb, style, rb, &errorCode);
-
if (U_FAILURE(errorCode)) {
ures_close(rb);
return NULL;
}
- UnicodeString two, start, middle, end;
- getStringByKey(rb, "2", two, errorCode);
- getStringByKey(rb, "start", start, errorCode);
- getStringByKey(rb, "middle", middle, errorCode);
- getStringByKey(rb, "end", end, errorCode);
+ ListFormatter::ListPatternsSink sink;
+ char currentStyle[kStyleLenMax+1];
+ uprv_strncpy(currentStyle, style, kStyleLenMax);
+ currentStyle[kStyleLenMax] = 0;
+
+ for (;;) {
+ ures_getAllItemsWithFallback(rb, currentStyle, sink, errorCode);
+ if (U_FAILURE(errorCode) || sink.aliasedStyle[0] == 0 || uprv_strcmp(currentStyle, sink.aliasedStyle) == 0) {
+ break;
+ }
+ uprv_strcpy(currentStyle, sink.aliasedStyle);
+ }
ures_close(rb);
if (U_FAILURE(errorCode)) {
return NULL;
}
- ListFormatInternal* result = new ListFormatInternal(two, start, middle, end, errorCode);
+ if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) {
+ errorCode = U_MISSING_RESOURCE_ERROR;
+ return NULL;
+ }
+ ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode);
if (result == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return NULL;
@@ -206,15 +266,6 @@
return result;
}
-static void getStringByKey(const UResourceBundle* rb, const char* key, UnicodeString& result, UErrorCode& errorCode) {
- int32_t len;
- const UChar* ustr = ures_getStringByKeyWithFallback(rb, key, &len, &errorCode);
- if (U_FAILURE(errorCode)) {
- return;
- }
- result.setTo(ustr, len);
-}
-
ListFormatter* ListFormatter::createInstance(UErrorCode& errorCode) {
Locale locale; // The default locale.
return createInstance(locale, errorCode);
diff --git a/icu4c/source/common/unicode/listformatter.h b/icu4c/source/common/unicode/listformatter.h
index f2c8988..93eb7f3 100644
--- a/icu4c/source/common/unicode/listformatter.h
+++ b/icu4c/source/common/unicode/listformatter.h
@@ -157,6 +157,8 @@
private:
static void initializeHash(UErrorCode& errorCode);
static const ListFormatInternal* getListFormatInternal(const Locale& locale, const char *style, UErrorCode& errorCode);
+ struct ListPatternsSink;
+ static ListFormatInternal* loadListFormatInternal(const Locale& locale, const char* style, UErrorCode& errorCode);
ListFormatter();
diff --git a/icu4c/source/data/locales/ar_SA.txt b/icu4c/source/data/locales/ar_SA.txt
index cdf6f3c..1c82d9e 100644
--- a/icu4c/source/data/locales/ar_SA.txt
+++ b/icu4c/source/data/locales/ar_SA.txt
@@ -10,7 +10,7 @@
}
Version{"2.1.27.99"}
calendar{
- default{"islamic-umalqura"}
+ default{"gregorian"} // android-changed
gregorian{
dayPeriod{
format{
diff --git a/icu4c/source/data/misc/supplementalData.txt b/icu4c/source/data/misc/supplementalData.txt
index 8730c1c..29fb7cc 100644
--- a/icu4c/source/data/misc/supplementalData.txt
+++ b/icu4c/source/data/misc/supplementalData.txt
@@ -1796,8 +1796,10 @@
"islamic-tbla",
}
SA{
- "islamic-umalqura",
+// BEGIN android-changed
"gregorian",
+ "islamic-umalqura",
+// END android-changed
"islamic",
"islamic-rgsa",
}
diff --git a/icu4c/source/stubdata/icudt58l.dat b/icu4c/source/stubdata/icudt58l.dat
index 40d0b41..60e50fd 100644
--- a/icu4c/source/stubdata/icudt58l.dat
+++ b/icu4c/source/stubdata/icudt58l.dat
Binary files differ
diff --git a/icu4c/source/test/cintltst/ccaltst.c b/icu4c/source/test/cintltst/ccaltst.c
index 27d99ba..25d95b3 100644
--- a/icu4c/source/test/cintltst/ccaltst.c
+++ b/icu4c/source/test/cintltst/ccaltst.c
@@ -89,8 +89,8 @@
{ "th-TH-u-ca-gregory", UCAL_DEFAULT, "gregorian" },
{ "ja_JP@calendar=japanese", UCAL_GREGORIAN, "gregorian" },
{ "fr_CH", UCAL_DEFAULT, "gregorian" },
- { "fr_SA", UCAL_DEFAULT, "islamic-umalqura" },
- { "fr_CH@rg=sazzzz", UCAL_DEFAULT, "islamic-umalqura" },
+ { "fr_SA", UCAL_DEFAULT, "gregorian" }, // android-changed
+ { "fr_CH@rg=sazzzz", UCAL_DEFAULT, "gregorian" }, // android-changed
{ "fr_CH@calendar=japanese;rg=sazzzz", UCAL_DEFAULT, "japanese" },
{ "fr_TH@rg=SA", UCAL_DEFAULT, "gregorian" }, /* ignore malformed rg tag */ // android-changed
{ "th@rg=SA", UCAL_DEFAULT, "gregorian" }, /* ignore malformed rg tag */ // android-changed
@@ -1573,7 +1573,7 @@
{ "en@calendar=islamic", "gregorian", NULL, NULL, NULL, NULL },
{ "zh_TW", "gregorian", "roc", "chinese", NULL, NULL },
{ "ar_IR", "gregorian", "persian", "islamic", "islamic-civil", "islamic-tbla" }, // android-changed
- { "th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa", NULL },
+ { "th@rg=SAZZZZ", "gregorian", "islamic-umalqura", "islamic", "islamic-rgsa", NULL }, // android-changed
};
const int32_t EXPECTED_SIZE[PREFERRED_SIZE] = { 1, 1, 1, 1, 2, 2, 2, 5, 5, 2, 2, 2, 1, 3, 5, 4 };
UErrorCode status = U_ZERO_ERROR;
diff --git a/icu4c/source/test/intltest/listformattertest.cpp b/icu4c/source/test/intltest/listformattertest.cpp
index b7aaea2..ad0afa5 100644
--- a/icu4c/source/test/intltest/listformattertest.cpp
+++ b/icu4c/source/test/intltest/listformattertest.cpp
@@ -147,6 +147,48 @@
CheckFourCases("en_US", one, two, three, four, results);
}
+// Tests resource loading and inheritance when region sublocale
+// has only partial data for the listPattern element (overriding
+// some of the parent data). #12994
+void ListFormatterTest::TestEnglishGB() {
+ UnicodeString results[4] = {
+ one,
+ one + " and " + two,
+ one + ", " + two + " and " + three,
+ one + ", " + two + ", " + three + " and " + four
+ };
+
+ CheckFourCases("en_GB", one, two, three, four, results);
+}
+
+// Tests resource loading and inheritance when region sublocale
+// has only partial data for the listPattern element (overriding
+// some of the parent data). #12994
+void ListFormatterTest::TestNynorsk() {
+ UnicodeString results[4] = {
+ one,
+ one + " og " + two,
+ one + ", " + two + " og " + three,
+ one + ", " + two + ", " + three + " og " + four
+ };
+
+ CheckFourCases("nn", one, two, three, four, results);
+}
+
+// Tests resource loading and inheritance when region sublocale
+// has only partial data for the listPattern element (overriding
+// some of the parent data). #12994
+void ListFormatterTest::TestChineseTradHK() {
+ UnicodeString results[4] = {
+ one,
+ one + "\u53CA" + two,
+ one + "\u3001" + two + "\u53CA" + three,
+ one + "\u3001" + two + "\u3001" + three + "\u53CA" + four
+ };
+
+ CheckFourCases("zh_Hant_HK", one, two, three, four, results);
+}
+
// Formatting in Russian.
// "\\u0438" is used before the last element, and all elements up to (but not including) the penultimate are followed by a comma.
void ListFormatterTest::TestRussian() {
@@ -229,6 +271,9 @@
case 6: name = "TestZulu"; if (exec) TestZulu(); break;
case 7: name = "TestOutOfOrderPatterns"; if (exec) TestOutOfOrderPatterns(); break;
case 8: name = "Test9946"; if (exec) Test9946(); break;
+ case 9: name = "TestEnglishGB"; if (exec) TestEnglishGB(); break;
+ case 10: name = "TestNynorsk"; if (exec) TestNynorsk(); break;
+ case 11: name = "TestChineseTradHK"; if (exec) TestChineseTradHK(); break;
default: name = ""; break;
}
diff --git a/icu4c/source/test/intltest/listformattertest.h b/icu4c/source/test/intltest/listformattertest.h
index 1281306..70686d7 100644
--- a/icu4c/source/test/intltest/listformattertest.h
+++ b/icu4c/source/test/intltest/listformattertest.h
@@ -33,6 +33,9 @@
void TestBogus();
void TestEnglish();
void TestEnglishUS();
+ void TestEnglishGB();
+ void TestNynorsk();
+ void TestChineseTradHK();
void TestRussian();
void TestMalayalam();
void TestZulu();
diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp
index 510146b..801e590 100644
--- a/icu4c/source/test/intltest/measfmttest.cpp
+++ b/icu4c/source/test/intltest/measfmttest.cpp
@@ -1645,6 +1645,8 @@
helperTestManyLocaleDurations("de", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37");
helperTestManyLocaleDurations("en", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5h 37m");
helperTestManyLocaleDurations("en", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37");
+ helperTestManyLocaleDurations("en_GB", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5h 37m");
+ helperTestManyLocaleDurations("en_GB", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37");
helperTestManyLocaleDurations("es", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5h 37min");
helperTestManyLocaleDurations("es", UMEASFMT_WIDTH_NUMERIC, measures, UPRV_LENGTHOF(measures), "5:37");
helperTestManyLocaleDurations("fi", UMEASFMT_WIDTH_NARROW, measures, UPRV_LENGTHOF(measures), "5t 37min");
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java
similarity index 69%
rename from icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java
rename to icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java
index 0d1c259..f28e60e 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java
@@ -2,9 +2,14 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl;
-import com.ibm.icu.util.ULocale;
+import java.io.IOException;
-public final class CaseMap {
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.Edits;
+import com.ibm.icu.util.ICUUncheckedIOException;
+
+public final class CaseMapImpl {
/**
* Implementation of UCaseProps.ContextIterator, iterates over a String.
* See ustrcase.c/utf16_caseContextIterator().
@@ -12,11 +17,11 @@
public static final class StringContextIterator implements UCaseProps.ContextIterator {
/**
* Constructor.
- * @param s String to iterate over.
+ * @param src String to iterate over.
*/
- public StringContextIterator(String s) {
- this.s=s;
- limit=s.length();
+ public StringContextIterator(CharSequence src) {
+ this.s=src;
+ limit=src.length();
cpStart=cpLimit=index=0;
dir=0;
}
@@ -60,7 +65,7 @@
public int nextCaseMapCP() {
cpStart=cpLimit;
if(cpLimit<limit) {
- int c=s.codePointAt(cpLimit);
+ int c=Character.codePointAt(s, cpLimit);
cpLimit+=Character.charCount(c);
return c;
} else {
@@ -84,6 +89,10 @@
return cpLimit;
}
+ public int getCPLength() {
+ return cpLimit-cpStart;
+ }
+
// implement UCaseProps.ContextIterator
// The following code is not used anywhere in this private class
@Override
@@ -108,11 +117,11 @@
int c;
if(dir>0 && index<s.length()) {
- c=s.codePointAt(index);
+ c=Character.codePointAt(s, index);
index+=Character.charCount(c);
return c;
} else if(dir<0 && index>0) {
- c=s.codePointBefore(index);
+ c=Character.codePointBefore(s, index);
index-=Character.charCount(c);
return c;
}
@@ -120,44 +129,242 @@
}
// variables
- protected String s;
+ protected CharSequence s;
protected int index, limit, cpStart, cpLimit;
protected int dir; // 0=initial state >0=forward <0=backward
}
- /** Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. */
- private static final void appendResult(int c, StringBuilder result) {
- // Decode the result.
- if (c < 0) {
- // (not) original code point
- result.appendCodePoint(~c);
- } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
- // The mapping has already been appended to result.
+ /**
+ * Omit unchanged text when case-mapping with Edits.
+ */
+ public static final int OMIT_UNCHANGED_TEXT = 0x4000;
+
+ private static int appendCodePoint(Appendable a, int c) throws IOException {
+ if (c <= Character.MAX_VALUE) {
+ a.append((char)c);
+ return 1;
} else {
- // Append the single-code point mapping.
- result.appendCodePoint(c);
+ a.append((char)(0xd7c0 + (c >> 10)));
+ a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
+ return 2;
}
}
- // TODO: Move the other string case mapping functions from UCharacter to here, too.
-
- public static String toUpper(ULocale locale, String str) {
- if (locale == null) {
- locale = ULocale.getDefault();
+ /**
+ * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
+ * @throws IOException
+ */
+ private static void appendResult(int result, Appendable dest,
+ int cpLength, int options, Edits edits) throws IOException {
+ // Decode the result.
+ if (result < 0) {
+ // (not) original code point
+ if (edits != null) {
+ edits.addUnchanged(cpLength);
+ if ((options & OMIT_UNCHANGED_TEXT) != 0) {
+ return;
+ }
+ }
+ appendCodePoint(dest, ~result);
+ } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
+ // The mapping has already been appended to result.
+ if (edits != null) {
+ edits.addReplace(cpLength, result);
+ }
+ } else {
+ // Append the single-code point mapping.
+ int length = appendCodePoint(dest, result);
+ if (edits != null) {
+ edits.addReplace(cpLength, length);
+ }
}
- int[] locCache = new int[] { UCaseProps.getCaseLocale(locale, null) };
- if (locCache[0] == UCaseProps.LOC_GREEK) {
- return GreekUpper.toUpper(str, locCache);
- }
+ }
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
+ private static final void appendUnchanged(CharSequence src, int start, int length,
+ Appendable dest, int options, Edits edits) throws IOException {
+ if (length > 0) {
+ if (edits != null) {
+ edits.addUnchanged(length);
+ if ((options & OMIT_UNCHANGED_TEXT) != 0) {
+ return;
+ }
+ }
+ dest.append(src, start, start + length);
+ }
+ }
+
+ private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
+ Appendable dest, Edits edits) throws IOException {
int c;
- while((c=iter.nextCaseMapCP())>=0) {
- c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
- appendResult(c, result);
+ while ((c = iter.nextCaseMapCP()) >= 0) {
+ c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
}
- return result.toString();
+ }
+
+ public static <A extends Appendable> A toLower(int caseLocale, int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ StringContextIterator iter = new StringContextIterator(src);
+ internalToLower(caseLocale, options, iter, dest, edits);
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ public static <A extends Appendable> A toUpper(int caseLocale, int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ if (caseLocale == UCaseProps.LOC_GREEK) {
+ return GreekUpper.toUpper(options, src, dest, edits);
+ }
+ StringContextIterator iter = new StringContextIterator(src);
+ int c;
+ while ((c = iter.nextCaseMapCP()) >= 0) {
+ c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ public static <A extends Appendable> A toTitle(
+ int caseLocale, int options, BreakIterator titleIter,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+
+ /* set up local variables */
+ StringContextIterator iter = new StringContextIterator(src);
+ int srcLength = src.length();
+ int prev=0;
+ boolean isFirstIndex=true;
+
+ /* titlecasing loop */
+ while(prev<srcLength) {
+ /* find next index where to titlecase */
+ int index;
+ if(isFirstIndex) {
+ isFirstIndex=false;
+ index=titleIter.first();
+ } else {
+ index=titleIter.next();
+ }
+ if(index==BreakIterator.DONE || index>srcLength) {
+ index=srcLength;
+ }
+
+ /*
+ * Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * In this implementation, segment [prev..index[ into 3 parts:
+ * a) uncased characters (copy as-is) [prev..titleStart[
+ * b) first case letter (titlecase) [titleStart..titleLimit[
+ * c) subsequent characters (lowercase) [titleLimit..index[
+ */
+ if(prev<index) {
+ // find and copy uncased characters [prev..titleStart[
+ int titleStart=prev;
+ iter.setLimit(index);
+ int c=iter.nextCaseMapCP();
+ if((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0
+ && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
+ // Adjust the titlecasing index (titleStart) to the next cased character.
+ while((c=iter.nextCaseMapCP())>=0
+ && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
+ // If c<0 then we have only uncased characters in [prev..index[
+ // and stopped with titleStart==titleLimit==index.
+ titleStart=iter.getCPStart();
+ appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
+ }
+
+ if(titleStart<index) {
+ int titleLimit=iter.getCPLimit();
+ // titlecase c which is from [titleStart..titleLimit[
+ c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+
+ // Special case Dutch IJ titlecasing
+ if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
+ char c1 = src.charAt(titleStart);
+ if ((c1 == 'i' || c1 == 'I')) {
+ char c2 = src.charAt(titleStart+1);
+ if (c2 == 'j') {
+ dest.append('J');
+ if (edits != null) {
+ edits.addReplace(1, 1);
+ }
+ c = iter.nextCaseMapCP();
+ titleLimit++;
+ assert c == c2;
+ assert titleLimit == iter.getCPLimit();
+ } else if (c2 == 'J') {
+ // Keep the capital J from getting lowercased.
+ appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
+ c = iter.nextCaseMapCP();
+ titleLimit++;
+ assert c == c2;
+ assert titleLimit == iter.getCPLimit();
+ }
+ }
+ }
+
+ // lowercase [titleLimit..index[
+ if(titleLimit<index) {
+ if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
+ // Normal operation: Lowercase the rest of the word.
+ internalToLower(caseLocale, options, iter, dest, edits);
+ } else {
+ // Optionally just copy the rest of the word unchanged.
+ appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
+ iter.moveToLimit();
+ }
+ }
+ }
+ }
+
+ prev=index;
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
+ public static <A extends Appendable> A fold(int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ int length = src.length();
+ for (int i = 0; i < length;) {
+ int c = Character.codePointAt(src, i);
+ int cpLength = Character.charCount(c);
+ i += cpLength;
+ c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
+ appendResult(c, dest, cpLength, options, edits);
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
private static final class GreekUpper {
@@ -661,12 +868,13 @@
* TODO: Try to re-consolidate one way or another with the non-Greek function.
*
* <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
+ * @throws IOException
*/
- private static String toUpper(CharSequence s, int[] locCache) {
- StringBuilder result = new StringBuilder(s.length());
+ private static <A extends Appendable> A toUpper(int options,
+ CharSequence src, A dest, Edits edits) throws IOException {
int state = 0;
- for (int i = 0; i < s.length();) {
- int c = Character.codePointAt(s, i);
+ for (int i = 0; i < src.length();) {
+ int c = Character.codePointAt(src, i);
int nextIndex = i + Character.charCount(c);
int nextState = 0;
int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
@@ -695,8 +903,8 @@
numYpogegrammeni = 1;
}
// Skip combining diacritics after this Greek letter.
- while (nextIndex < s.length()) {
- int diacriticData = getDiacriticData(s.charAt(nextIndex));
+ while (nextIndex < src.length()) {
+ int diacriticData = getDiacriticData(src.charAt(nextIndex));
if (diacriticData != 0) {
data |= diacriticData;
if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
@@ -716,7 +924,7 @@
(data & HAS_ACCENT) != 0 &&
numYpogegrammeni == 0 &&
(state & AFTER_CASED) == 0 &&
- !isFollowedByCasedLetter(s, nextIndex)) {
+ !isFollowedByCasedLetter(src, nextIndex)) {
// Keep disjunctive "or" with (only) a tonos.
// We use the same "word boundary" conditions as for the Final_Sigma test.
if (i == nextIndex) {
@@ -734,25 +942,59 @@
data &= ~HAS_EITHER_DIALYTIKA;
}
}
- result.appendCodePoint(upper);
- if ((data & HAS_EITHER_DIALYTIKA) != 0) {
- result.append('\u0308'); // restore or add a dialytika
+
+ boolean change;
+ if (edits == null) {
+ change = true; // common, simple usage
+ } else {
+ // Find out first whether we are changing the text.
+ change = src.charAt(i) != upper || numYpogegrammeni > 0;
+ int i2 = i + 1;
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
+ ++i2;
+ }
+ if (addTonos) {
+ change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
+ ++i2;
+ }
+ int oldLength = nextIndex - i;
+ int newLength = (i2 - i) + numYpogegrammeni;
+ change |= oldLength != newLength;
+ if (change) {
+ if (edits != null) {
+ edits.addReplace(oldLength, newLength);
+ }
+ } else {
+ if (edits != null) {
+ edits.addUnchanged(oldLength);
+ }
+ // Write unchanged text?
+ change = (options & OMIT_UNCHANGED_TEXT) == 0;
+ }
}
- if (addTonos) {
- result.append('\u0301');
- }
- while (numYpogegrammeni > 0) {
- result.append('Ι');
- --numYpogegrammeni;
+
+ if (change) {
+ dest.append((char)upper);
+ if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+ dest.append('\u0308'); // restore or add a dialytika
+ }
+ if (addTonos) {
+ dest.append('\u0301');
+ }
+ while (numYpogegrammeni > 0) {
+ dest.append('Ι');
+ --numYpogegrammeni;
+ }
}
} else {
- c = UCaseProps.INSTANCE.toFullUpper(c, null, result, null, locCache);
- appendResult(c, result);
+ c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
+ appendResult(c, dest, nextIndex - i, options, edits);
}
i = nextIndex;
state = nextState;
}
- return result.toString();
+ return dest;
}
}
}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
index 927cdc0..6b5619d 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
@@ -24,6 +24,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
+import java.util.Locale;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
@@ -71,7 +72,7 @@
// read exceptions[]
count=indexes[IX_EXC_LENGTH];
if(count>0) {
- exceptions=ICUBinary.getChars(bytes, count, 0);
+ exceptions=ICUBinary.getString(bytes, count, 0);
}
// read unfold[]
@@ -150,7 +151,7 @@
*
* @param excWord (in) initial exceptions word
* @param index (in) desired slot index
- * @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
+ * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
* @return bits 31..0: slot value
* 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
*/
@@ -158,11 +159,11 @@
long value;
if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index);
- value=exceptions[excOffset];
+ value=exceptions.charAt(excOffset);
} else {
excOffset+=2*slotOffset(excWord, index);
- value=exceptions[excOffset++];
- value=(value<<16)|exceptions[excOffset];
+ value=exceptions.charAt(excOffset++);
+ value=(value<<16)|exceptions.charAt(excOffset);
}
return value |((long)excOffset<<32);
}
@@ -172,11 +173,11 @@
int value;
if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index);
- value=exceptions[excOffset];
+ value=exceptions.charAt(excOffset);
} else {
excOffset+=2*slotOffset(excWord, index);
- value=exceptions[excOffset++];
- value=(value<<16)|exceptions[excOffset];
+ value=exceptions.charAt(excOffset++);
+ value=(value<<16)|exceptions.charAt(excOffset);
}
return value;
}
@@ -191,7 +192,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_LOWER)) {
c=getSlotValue(excWord, EXC_LOWER, excOffset);
}
@@ -207,7 +208,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_UPPER)) {
c=getSlotValue(excWord, EXC_UPPER, excOffset);
}
@@ -223,7 +224,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index;
if(hasSlot(excWord, EXC_TITLE)) {
index=EXC_TITLE;
@@ -291,7 +292,7 @@
*/
int excOffset0, excOffset=getExceptionsOffset(props);
int closureOffset;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index, closureLength, fullLength, length;
excOffset0=excOffset;
@@ -334,7 +335,7 @@
/* add the full case folding string */
length=fullLength&0xf;
if(length!=0) {
- set.add(new String(exceptions, excOffset, length));
+ set.add(exceptions.substring(excOffset, excOffset+length));
excOffset+=length;
}
@@ -348,8 +349,9 @@
}
/* add each code point in the closure string */
- for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
- c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
+ int limit=closureOffset+closureLength;
+ for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
+ c=exceptions.codePointAt(index);
set.add(c);
}
}
@@ -468,7 +470,7 @@
if(!propsHasException(props)) {
return props&DOT_MASK;
} else {
- return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
+ return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
}
}
@@ -605,38 +607,49 @@
*/
public static final int MAX_STRING_LENGTH=0x1f;
- private static final int LOC_UNKNOWN=0;
- private static final int LOC_ROOT=1;
+ //ivate static final int LOC_UNKNOWN=0;
+ public static final int LOC_ROOT=1;
private static final int LOC_TURKISH=2;
private static final int LOC_LITHUANIAN=3;
static final int LOC_GREEK=4;
+ public static final int LOC_DUTCH=5;
- /*
- * Checks and caches the type of locale ID as it is relevant for case mapping.
- * If the locCache is not null, then it must be initialized with locCache[0]=0 .
- */
- static final int getCaseLocale(ULocale locale, int[] locCache) {
- int result;
-
- if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
- return result;
+ public static final int getCaseLocale(Locale locale) {
+ return getCaseLocale(locale.getLanguage());
+ }
+ public static final int getCaseLocale(ULocale locale) {
+ return getCaseLocale(locale.getLanguage());
+ }
+ /** Accepts both 2- and 3-letter language subtags. */
+ private static final int getCaseLocale(String language) {
+ // Check the subtag length to reduce the number of comparisons
+ // for locales without special behavior.
+ // Fastpath for English "en" which is often used for default (=root locale) case mappings,
+ // and for Chinese "zh": Very common but no special case mapping behavior.
+ if(language.length()==2) {
+ if(language.equals("en") || language.charAt(0)>'t') {
+ return LOC_ROOT;
+ } else if(language.equals("tr") || language.equals("az")) {
+ return LOC_TURKISH;
+ } else if(language.equals("el")) {
+ return LOC_GREEK;
+ } else if(language.equals("lt")) {
+ return LOC_LITHUANIAN;
+ } else if(language.equals("nl")) {
+ return LOC_DUTCH;
+ }
+ } else if(language.length()==3) {
+ if(language.equals("tur") || language.equals("aze")) {
+ return LOC_TURKISH;
+ } else if(language.equals("ell")) {
+ return LOC_GREEK;
+ } else if(language.equals("lit")) {
+ return LOC_LITHUANIAN;
+ } else if(language.equals("nld")) {
+ return LOC_DUTCH;
+ }
}
-
- result=LOC_ROOT;
-
- String language=locale.getLanguage();
- if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
- result=LOC_TURKISH;
- } else if(language.equals("el") || language.equals("ell")) {
- result=LOC_GREEK;
- } else if(language.equals("lt") || language.equals("lit")) {
- result=LOC_LITHUANIAN;
- }
-
- if(locCache!=null) {
- locCache[0]=result;
- }
- return result;
+ return LOC_ROOT;
}
/* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */
@@ -797,19 +810,14 @@
* See ContextIterator for details.
* If iter==null then a context-independent result is returned.
* @param out If the mapping result is a string, then it is appended to out.
- * @param locale Locale ID for locale-dependent mappings.
- * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
- * the locale ID for subsequent calls.
- * Can be null.
+ * @param caseLocale Case locale value from ucase_getCaseLocale().
* @return Output code point or string length, see MAX_STRING_LENGTH.
*
* @see ContextIterator
* @see #MAX_STRING_LENGTH
* @internal
*/
- public final int toFullLower(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
+ public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
int result, props;
result=c;
@@ -820,22 +828,20 @@
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full;
excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */
- int loc=getCaseLocale(locale, locCache);
-
/*
* Test for conditional mappings first
* (otherwise the unconditional default mappings are always taken),
* then test for characters that have unconditional mappings in SpecialCasing.txt,
* then get the UnicodeData.txt mappings.
*/
- if( loc==LOC_LITHUANIAN &&
+ if( caseLocale==LOC_LITHUANIAN &&
/* base characters, find accents above */
(((c==0x49 || c==0x4a || c==0x12e) &&
isFollowedByMoreAbove(iter)) ||
@@ -858,30 +864,34 @@
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
*/
- switch(c) {
- case 0x49: /* LATIN CAPITAL LETTER I */
- out.append(iDot);
- return 2;
- case 0x4a: /* LATIN CAPITAL LETTER J */
- out.append(jDot);
- return 2;
- case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
- out.append(iOgonekDot);
- return 2;
- case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
- out.append(iDotGrave);
- return 3;
- case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
- out.append(iDotAcute);
- return 3;
- case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
- out.append(iDotTilde);
- return 3;
- default:
- return 0; /* will not occur */
+ try {
+ switch(c) {
+ case 0x49: /* LATIN CAPITAL LETTER I */
+ out.append(iDot);
+ return 2;
+ case 0x4a: /* LATIN CAPITAL LETTER J */
+ out.append(jDot);
+ return 2;
+ case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
+ out.append(iOgonekDot);
+ return 2;
+ case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
+ out.append(iDotGrave);
+ return 3;
+ case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
+ out.append(iDotAcute);
+ return 3;
+ case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
+ out.append(iDotTilde);
+ return 3;
+ default:
+ return 0; /* will not occur */
+ }
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
}
/* # Turkish and Azeri */
- } else if(loc==LOC_TURKISH && c==0x130) {
+ } else if(caseLocale==LOC_TURKISH && c==0x130) {
/*
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases.
@@ -890,7 +900,7 @@
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
return 0x69;
- } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
+ } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
/*
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above
@@ -899,7 +909,7 @@
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/
return 0; /* remove the dot (continue without output) */
- } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
+ } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
/*
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
@@ -913,8 +923,12 @@
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
- out.append(iDot);
- return 2;
+ try {
+ out.append(iDot);
+ return 2;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
} else if( c==0x3a3 &&
!isFollowedByCasedLetter(iter, 1) &&
isFollowedByCasedLetter(iter, -1) /* -1=preceded */
@@ -936,11 +950,15 @@
/* start of full case mapping strings */
excOffset=(int)(value>>32)+1;
- /* set the output pointer to the lowercase mapping */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the lowercase mapping
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
@@ -954,8 +972,8 @@
/* internal */
private final int toUpperOrTitle(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache,
+ Appendable out,
+ int loc,
boolean upperNotTitle) {
int result;
int props;
@@ -968,15 +986,13 @@
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full, index;
excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */
- int loc=getCaseLocale(locale, locCache);
-
if(loc==LOC_TURKISH && c==0x69) {
/*
# Turkish and Azeri
@@ -1026,11 +1042,15 @@
}
if(full!=0) {
- /* set the output pointer to the result string */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the result string
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
@@ -1049,15 +1069,15 @@
}
public final int toFullUpper(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
- return toUpperOrTitle(c, iter, out, locale, locCache, true);
+ Appendable out,
+ int caseLocale) {
+ return toUpperOrTitle(c, iter, out, caseLocale, true);
}
public final int toFullTitle(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
- return toUpperOrTitle(c, iter, out, locale, locCache, false);
+ Appendable out,
+ int caseLocale) {
+ return toUpperOrTitle(c, iter, out, caseLocale, false);
}
/* case folding ------------------------------------------------------------- */
@@ -1117,7 +1137,7 @@
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index;
if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
/* special case folding mappings, hardcoded */
@@ -1168,7 +1188,7 @@
* together in a way that they still fold to common result strings.
*/
- public final int toFullFolding(int c, StringBuilder out, int options) {
+ public final int toFullFolding(int c, Appendable out, int options) {
int result;
int props;
@@ -1180,7 +1200,7 @@
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full, index;
excOffset2=excOffset;
@@ -1194,8 +1214,12 @@
return 0x69;
} else if(c==0x130) {
/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
- out.append(iDot);
- return 2;
+ try {
+ out.append(iDot);
+ return 2;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
} else {
/* Turkic mappings */
@@ -1219,11 +1243,15 @@
full=(full>>4)&0xf;
if(full!=0) {
- /* set the output pointer to the result string */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the result string
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
@@ -1242,7 +1270,6 @@
/* case mapping properties API ---------------------------------------------- */
- private static final int[] rootLocCache = { LOC_ROOT };
/*
* We need a StringBuilder for multi-code point output from the
* full case mapping functions. However, we do not actually use that output,
@@ -1282,20 +1309,20 @@
*/
case UProperty.CHANGES_WHEN_LOWERCASED:
dummyStringBuilder.setLength(0);
- return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_UPPERCASED:
dummyStringBuilder.setLength(0);
- return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_TITLECASED:
dummyStringBuilder.setLength(0);
- return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
/* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
case UProperty.CHANGES_WHEN_CASEMAPPED:
dummyStringBuilder.setLength(0);
return
- toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
- toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
- toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+ toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+ toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
default:
return false;
}
@@ -1303,7 +1330,7 @@
// data members -------------------------------------------------------- ***
private int indexes[];
- private char exceptions[];
+ private String exceptions;
private char unfold[];
private Trie2_16 trie;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
index 40fecc7..65cebb3 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
@@ -15,8 +15,7 @@
import java.util.Locale;
import java.util.Map;
-import com.ibm.icu.impl.CaseMap;
-import com.ibm.icu.impl.CaseMap.StringContextIterator;
+import com.ibm.icu.impl.CaseMapImpl;
import com.ibm.icu.impl.IllegalIcuArgumentException;
import com.ibm.icu.impl.Trie2;
import com.ibm.icu.impl.UBiDiProps;
@@ -29,6 +28,7 @@
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.Edits;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ULocale;
@@ -4875,7 +4875,7 @@
*/
public static String toUpperCase(String str)
{
- return toUpperCase(ULocale.getDefault(), str);
+ return toUpperCase(getDefaultCaseLocale(), str);
}
/**
@@ -4887,7 +4887,7 @@
*/
public static String toLowerCase(String str)
{
- return toLowerCase(ULocale.getDefault(), str);
+ return toLowerCase(getDefaultCaseLocale(), str);
}
/**
@@ -4910,7 +4910,94 @@
*/
public static String toTitleCase(String str, BreakIterator breakiter)
{
- return toTitleCase(ULocale.getDefault(), str, breakiter);
+ return toTitleCase(Locale.getDefault(), str, breakiter, 0);
+ }
+
+ private static int getDefaultCaseLocale() {
+ return UCaseProps.getCaseLocale(Locale.getDefault());
+ }
+
+ private static int getCaseLocale(Locale locale) {
+ if (locale == null) {
+ locale = Locale.getDefault();
+ }
+ return UCaseProps.getCaseLocale(locale);
+ }
+
+ private static int getCaseLocale(ULocale locale) {
+ if (locale == null) {
+ locale = ULocale.getDefault();
+ }
+ return UCaseProps.getCaseLocale(locale);
+ }
+
+ private static String toLowerCase(int caseLocale, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toLower(
+ caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toLower(caseLocale, 0, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String toUpperCase(int caseLocale, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toUpper(
+ caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toUpper(caseLocale, 0, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toTitle(
+ caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
+ new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toTitle(caseLocale, options, titleIter, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
+ if (!edits.hasChanges()) {
+ return str;
+ }
+ StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
+ for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
+ if (ei.hasChange()) {
+ int i = ei.replacementIndex();
+ result.append(replacementChars, i, i + ei.newLength());
+ } else {
+ int i = ei.sourceIndex();
+ result.append(str, i, i + ei.oldLength());
+ }
+ }
+ return result.toString();
}
/**
@@ -4923,7 +5010,7 @@
*/
public static String toUpperCase(Locale locale, String str)
{
- return toUpperCase(ULocale.forLocale(locale), str);
+ return toUpperCase(getCaseLocale(locale), str);
}
/**
@@ -4935,7 +5022,7 @@
* @stable ICU 3.2
*/
public static String toUpperCase(ULocale locale, String str) {
- return CaseMap.toUpper(locale, str);
+ return toUpperCase(getCaseLocale(locale), str);
}
/**
@@ -4948,7 +5035,7 @@
*/
public static String toLowerCase(Locale locale, String str)
{
- return toLowerCase(ULocale.forLocale(locale), str);
+ return toLowerCase(getCaseLocale(locale), str);
}
/**
@@ -4960,31 +5047,7 @@
* @stable ICU 3.2
*/
public static String toLowerCase(ULocale locale, String str) {
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int[] locCache = new int[1];
- int c;
-
- if (locale == null) {
- locale = ULocale.getDefault();
- }
- locCache[0]=0;
-
- while((c=iter.nextCaseMapCP())>=0) {
- c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
-
- /* decode the result */
- if(c<0) {
- /* (not) original code point */
- c=~c;
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- continue;
- /* } else { append single-code point mapping */
- }
- result.appendCodePoint(c);
- }
- return result.toString();
+ return toLowerCase(getCaseLocale(locale), str);
}
/**
@@ -5009,7 +5072,7 @@
public static String toTitleCase(Locale locale, String str,
BreakIterator breakiter)
{
- return toTitleCase(ULocale.forLocale(locale), str, breakiter);
+ return toTitleCase(locale, str, breakiter, 0);
}
/**
@@ -5059,126 +5122,15 @@
* @see #TITLECASE_NO_BREAK_ADJUSTMENT
*/
public static String toTitleCase(ULocale locale, String str,
- BreakIterator titleIter,
- int options) {
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int[] locCache = new int[1];
- int c, nc, srcLength = str.length();
-
- if (locale == null) {
- locale = ULocale.getDefault();
- }
- locCache[0]=0;
-
+ BreakIterator titleIter, int options) {
if(titleIter == null) {
+ if (locale == null) {
+ locale = ULocale.getDefault();
+ }
titleIter = BreakIterator.getWordInstance(locale);
}
titleIter.setText(str);
-
- int prev, titleStart, index;
- boolean isFirstIndex;
- boolean isDutch = locale.getLanguage().equals("nl");
- boolean FirstIJ = true;
-
- /* set up local variables */
- prev=0;
- isFirstIndex=true;
-
- /* titlecasing loop */
- while(prev<srcLength) {
- /* find next index where to titlecase */
- if(isFirstIndex) {
- isFirstIndex=false;
- index=titleIter.first();
- } else {
- index=titleIter.next();
- }
- if(index==BreakIterator.DONE || index>srcLength) {
- index=srcLength;
- }
-
- /*
- * Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * In this implementation, segment [prev..index[ into 3 parts:
- * a) uncased characters (copy as-is) [prev..titleStart[
- * b) first case letter (titlecase) [titleStart..titleLimit[
- * c) subsequent characters (lowercase) [titleLimit..index[
- */
- if(prev<index) {
- /* find and copy uncased characters [prev..titleStart[ */
- iter.setLimit(index);
- c=iter.nextCaseMapCP();
- if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
- && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
- while((c=iter.nextCaseMapCP())>=0
- && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
- titleStart=iter.getCPStart();
- if(prev<titleStart) {
- result.append(str, prev, titleStart);
- }
- } else {
- titleStart=prev;
- }
-
- if(titleStart<index) {
- FirstIJ = true;
- /* titlecase c which is from titleStart */
- c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
-
- /* decode the result and lowercase up to index */
- for(;;) {
- if(c<0) {
- /* (not) original code point */
- c=~c;
- result.appendCodePoint(c);
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- } else {
- /* append single-code point mapping */
- result.appendCodePoint(c);
- }
-
- if((options&TITLECASE_NO_LOWERCASE)!=0) {
- /* Optionally just copy the rest of the word unchanged. */
-
- int titleLimit=iter.getCPLimit();
- if(titleLimit<index) {
- /* Special Case - Dutch IJ Titlecasing */
- if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
- result.append('J').append(str, titleLimit + 1, index);
- } else {
- result.append(str, titleLimit, index);
- }
- }
- iter.moveToLimit();
- break;
- } else if((nc=iter.nextCaseMapCP())>=0) {
- if (isDutch && (nc == 0x004A || nc == 0x006A)
- && (c == 0x0049) && (FirstIJ == true)) {
- c = 0x004A; /* J */
- FirstIJ = false;
- } else {
- /* Normal operation: Lowercase the rest of the word. */
- c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
- locCache);
- }
- } else {
- break;
- }
- }
- }
- }
-
- prev=index;
- }
- return result.toString();
+ return toTitleCase(getCaseLocale(locale), options, titleIter, str);
}
@@ -5281,7 +5233,11 @@
public static String toTitleCase(Locale locale, String str,
BreakIterator titleIter,
int options) {
- return toTitleCase(ULocale.forLocale(locale), str, titleIter, options);
+ if(titleIter == null) {
+ titleIter = BreakIterator.getWordInstance(locale);
+ }
+ titleIter.setText(str);
+ return toTitleCase(getCaseLocale(locale), options, titleIter, str);
}
/**
@@ -5398,27 +5354,19 @@
* @stable ICU 2.6
*/
public static final String foldCase(String str, int options) {
- StringBuilder result = new StringBuilder(str.length());
- int c, i, length;
-
- length = str.length();
- for(i=0; i<length;) {
- c=str.codePointAt(i);
- i+=Character.charCount(c);
- c = UCaseProps.INSTANCE.toFullFolding(c, result, options);
-
- /* decode the result */
- if(c<0) {
- /* (not) original code point */
- c=~c;
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- continue;
- /* } else { append single-code point mapping */
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
}
- result.appendCodePoint(c);
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.fold(
+ options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString();
}
- return result.toString();
}
/**
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java
new file mode 100644
index 0000000..e998c66
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java
@@ -0,0 +1,339 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.text;
+
+import java.util.Locale;
+
+import com.ibm.icu.impl.CaseMapImpl;
+import com.ibm.icu.impl.UCaseProps;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.util.ULocale;
+
+/**
+ * Low-level case mapping options and methods. Immutable.
+ * "Setters" return instances with the union of the current and new options set.
+ *
+ * This class is not intended for public subclassing.
+ *
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+public abstract class CaseMap {
+ /**
+ * @internal
+ * @deprecated This API is ICU internal only.
+ */
+ @Deprecated
+ protected int internalOptions;
+
+ private CaseMap(int opt) { internalOptions = opt; }
+
+ private static int getCaseLocale(Locale locale) {
+ if (locale == null) {
+ locale = Locale.getDefault();
+ }
+ return UCaseProps.getCaseLocale(locale);
+ }
+
+ /**
+ * @return Lowercasing object with default options.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Lower toLower() { return Lower.DEFAULT; }
+ /**
+ * @return Uppercasing object with default options.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Upper toUpper() { return Upper.DEFAULT; }
+ /**
+ * @return Titlecasing object with default options.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Title toTitle() { return Title.DEFAULT; }
+ /**
+ * @return Case folding object with default options.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static Fold fold() { return Fold.DEFAULT; }
+
+ /**
+ * Returns an instance that behaves like this one but
+ * omits unchanged text when case-mapping with {@link Edits}.
+ *
+ * @return an options object with this option.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public abstract CaseMap omitUnchangedText();
+
+ /**
+ * Lowercasing options and methods. Immutable.
+ *
+ * @see #toLower()
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Lower extends CaseMap {
+ private static final Lower DEFAULT = new Lower(0);
+ private static final Lower OMIT_UNCHANGED = new Lower(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Lower(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public Lower omitUnchangedText() {
+ return OMIT_UNCHANGED;
+ }
+
+ /**
+ * Lowercases a string and optionally records edits (see {@link #omitUnchangedText}).
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. Can be null for {@link Locale#getDefault}.
+ * (See {@link ULocale#toLocale}.)
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toLowerCase(Locale, String)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public <A extends Appendable> A apply(
+ Locale locale, CharSequence src, A dest, Edits edits) {
+ return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits);
+ }
+ }
+
+ /**
+ * Uppercasing options and methods. Immutable.
+ *
+ * @see #toUpper()
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Upper extends CaseMap {
+ private static final Upper DEFAULT = new Upper(0);
+ private static final Upper OMIT_UNCHANGED = new Upper(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Upper(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public Upper omitUnchangedText() {
+ return OMIT_UNCHANGED;
+ }
+
+ /**
+ * Uppercases a string and optionally records edits (see {@link #omitUnchangedText}).
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. Can be null for {@link Locale#getDefault}.
+ * (See {@link ULocale#toLocale}.)
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toUpperCase(Locale, String)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public <A extends Appendable> A apply(
+ Locale locale, CharSequence src, A dest, Edits edits) {
+ return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits);
+ }
+ }
+
+ /**
+ * Titlecasing options and methods. Immutable.
+ *
+ * @see #toTitle()
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Title extends CaseMap {
+ private static final Title DEFAULT = new Title(0);
+ private static final Title OMIT_UNCHANGED = new Title(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Title(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public Title omitUnchangedText() {
+ if (internalOptions == 0 || internalOptions == CaseMapImpl.OMIT_UNCHANGED_TEXT) {
+ return OMIT_UNCHANGED;
+ }
+ return new Title(internalOptions | CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ }
+
+ /**
+ * Returns an instance that behaves like this one but
+ * does not lowercase non-initial parts of words when titlecasing.
+ *
+ * <p>By default, titlecasing will titlecase the first cased character
+ * of a word and lowercase all other characters.
+ * With this option, the other characters will not be modified.
+ *
+ * @return an options object with this option.
+ * @see UCharacter#TITLECASE_NO_LOWERCASE
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Title noLowercase() {
+ return new Title(internalOptions | UCharacter.TITLECASE_NO_LOWERCASE);
+ }
+
+ // TODO: update references to the Unicode Standard for recent version
+ /**
+ * Returns an instance that behaves like this one but
+ * does not adjust the titlecasing indexes from BreakIterator::next() indexes;
+ * titlecases exactly the characters at breaks from the iterator.
+ *
+ * <p>By default, titlecasing will take each break iterator index,
+ * adjust it by looking for the next cased character, and titlecase that one.
+ * Other characters are lowercased.
+ *
+ * <p>This follows Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * @return an options object with this option.
+ * @see UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Title noBreakAdjustment() {
+ return new Title(internalOptions | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
+ }
+
+ /**
+ * Titlecases a string and optionally records edits (see {@link #omitUnchangedText}).
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * <p>Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. Can be null for {@link Locale#getDefault}.
+ * (See {@link ULocale#toLocale}.)
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setText())
+ * and used one or more times for iteration (first() and next()).
+ * If null, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toTitleCase(Locale, String, BreakIterator, int)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public <A extends Appendable> A apply(
+ Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
+ if (iter == null) {
+ iter = BreakIterator.getWordInstance(locale);
+ }
+ iter.setText(src.toString());
+ return CaseMapImpl.toTitle(
+ getCaseLocale(locale), internalOptions, iter, src, dest, edits);
+ }
+ }
+
+ /**
+ * Case folding options and methods. Immutable.
+ *
+ * @see #fold()
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Fold extends CaseMap {
+ private static final Fold DEFAULT = new Fold(0);
+ private static final Fold TURKIC = new Fold(UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I);
+ private static final Fold OMIT_UNCHANGED = new Fold(CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private static final Fold TURKIC_OMIT_UNCHANGED = new Fold(
+ UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I | CaseMapImpl.OMIT_UNCHANGED_TEXT);
+ private Fold(int opt) { super(opt); }
+
+ /**
+ * {@inheritDoc}
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ @Override
+ public Fold omitUnchangedText() {
+ return (internalOptions & UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0 ?
+ OMIT_UNCHANGED : TURKIC_OMIT_UNCHANGED;
+ }
+
+ /**
+ * Returns an instance that behaves like this one but
+ * handles dotted I and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * <p>Uses the Unicode CaseFolding.txt mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @return an options object with this option.
+ * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Fold turkic() {
+ return (internalOptions & CaseMapImpl.OMIT_UNCHANGED_TEXT) == 0 ?
+ TURKIC : TURKIC_OMIT_UNCHANGED;
+ }
+
+ /**
+ * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}).
+ *
+ * <p>Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * <p>The result may be longer or shorter than the original.
+ *
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#foldCase(String, int)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public <A extends Appendable> A apply(CharSequence src, A dest, Edits edits) {
+ return CaseMapImpl.fold(internalOptions, src, dest, edits);
+ }
+ }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
new file mode 100644
index 0000000..f9cbf9f
--- /dev/null
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
@@ -0,0 +1,494 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html#License
+package com.ibm.icu.text;
+
+import java.nio.BufferOverflowException;
+import java.util.Arrays;
+
+/**
+ * Records lengths of string edits but not replacement text.
+ * Supports replacements, insertions, deletions in linear progression.
+ * Does not support moving/reordering of text.
+ *
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+public final class Edits {
+ // 0000uuuuuuuuuuuu records u+1 unchanged text units.
+ private static final int MAX_UNCHANGED_LENGTH = 0x1000;
+ private static final int MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1;
+
+ // 0wwwcccccccccccc with w=1..6 records ccc+1 replacements of w:w text units.
+ // No length change.
+ private static final int MAX_SHORT_WIDTH = 6;
+ private static final int MAX_SHORT_CHANGE_LENGTH = 0xfff;
+ private static final int MAX_SHORT_CHANGE = 0x6fff;
+
+ // 0111mmmmmmnnnnnn records a replacement of m text units with n.
+ // m or n = 61: actual length follows in the next edits array unit.
+ // m or n = 62..63: actual length follows in the next two edits array units.
+ // Bit 30 of the actual length is in the head unit.
+ // Trailing units have bit 15 set.
+ private static final int LENGTH_IN_1TRAIL = 61;
+ private static final int LENGTH_IN_2TRAIL = 62;
+
+ private static final int STACK_CAPACITY = 100;
+ private char[] array;
+ private int length;
+ private int delta;
+
+ /**
+ * Constructs an empty object.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Edits() {
+ array = new char[STACK_CAPACITY];
+ }
+
+ /**
+ * Resets the data but may not release memory.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void reset() {
+ length = delta = 0;
+ }
+
+ private void setLastUnit(int last) {
+ array[length - 1] = (char)last;
+ }
+ private int lastUnit() {
+ return length > 0 ? array[length - 1] : 0xffff;
+ }
+
+ /**
+ * Adds a record for an unchanged segment of text.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void addUnchanged(int unchangedLength) {
+ if(unchangedLength < 0) {
+ throw new IllegalArgumentException(
+ "addUnchanged(" + unchangedLength + "): length must not be negative");
+ }
+ // Merge into previous unchanged-text record, if any.
+ int last = lastUnit();
+ if(last < MAX_UNCHANGED) {
+ int remaining = MAX_UNCHANGED - last;
+ if (remaining >= unchangedLength) {
+ setLastUnit(last + unchangedLength);
+ return;
+ }
+ setLastUnit(MAX_UNCHANGED);
+ unchangedLength -= remaining;
+ }
+ // Split large lengths into multiple units.
+ while(unchangedLength >= MAX_UNCHANGED_LENGTH) {
+ append(MAX_UNCHANGED);
+ unchangedLength -= MAX_UNCHANGED_LENGTH;
+ }
+ // Write a small (remaining) length.
+ if(unchangedLength > 0) {
+ append(unchangedLength - 1);
+ }
+ }
+
+ /**
+ * Adds a record for a text replacement/insertion/deletion.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public void addReplace(int oldLength, int newLength) {
+ if(oldLength == newLength && 0 < oldLength && oldLength <= MAX_SHORT_WIDTH) {
+ // Replacement of short oldLength text units by same-length new text.
+ // Merge into previous short-replacement record, if any.
+ int last = lastUnit();
+ if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE &&
+ (last >> 12) == oldLength && (last & 0xfff) < MAX_SHORT_CHANGE_LENGTH) {
+ setLastUnit(last + 1);
+ return;
+ }
+ append(oldLength << 12);
+ return;
+ }
+
+ if(oldLength < 0 || newLength < 0) {
+ throw new IllegalArgumentException(
+ "addReplace(" + oldLength + ", " + newLength +
+ "): both lengths must be non-negative");
+ }
+ if (oldLength == 0 && newLength == 0) {
+ return;
+ }
+ int newDelta = newLength - oldLength;
+ if (newDelta != 0) {
+ if ((newDelta > 0 && delta >= 0 && newDelta > (Integer.MAX_VALUE - delta)) ||
+ (newDelta < 0 && delta < 0 && newDelta < (Integer.MIN_VALUE - delta))) {
+ // Integer overflow or underflow.
+ throw new IndexOutOfBoundsException();
+ }
+ delta += newDelta;
+ }
+
+ int head = 0x7000;
+ if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) {
+ head |= oldLength << 6;
+ head |= newLength;
+ append(head);
+ } else if ((array.length - length) >= 5 || growArray()) {
+ int limit = length + 1;
+ if(oldLength < LENGTH_IN_1TRAIL) {
+ head |= oldLength << 6;
+ } else if(oldLength <= 0x7fff) {
+ head |= LENGTH_IN_1TRAIL << 6;
+ array[limit++] = (char)(0x8000 | oldLength);
+ } else {
+ head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6;
+ array[limit++] = (char)(0x8000 | (oldLength >> 15));
+ array[limit++] = (char)(0x8000 | oldLength);
+ }
+ if(newLength < LENGTH_IN_1TRAIL) {
+ head |= newLength;
+ } else if(newLength <= 0x7fff) {
+ head |= LENGTH_IN_1TRAIL;
+ array[limit++] = (char)(0x8000 | newLength);
+ } else {
+ head |= LENGTH_IN_2TRAIL + (newLength >> 30);
+ array[limit++] = (char)(0x8000 | (newLength >> 15));
+ array[limit++] = (char)(0x8000 | newLength);
+ }
+ array[length] = (char)head;
+ length = limit;
+ }
+ }
+
+ private void append(int r) {
+ if(length < array.length || growArray()) {
+ array[length++] = (char)r;
+ }
+ }
+
+ private boolean growArray() {
+ int newCapacity;
+ if (array.length == STACK_CAPACITY) {
+ newCapacity = 2000;
+ } else if (array.length == Integer.MAX_VALUE) {
+ throw new BufferOverflowException();
+ } else if (array.length >= (Integer.MAX_VALUE / 2)) {
+ newCapacity = Integer.MAX_VALUE;
+ } else {
+ newCapacity = 2 * array.length;
+ }
+ // Grow by at least 5 units so that a maximal change record will fit.
+ if ((newCapacity - array.length) < 5) {
+ throw new BufferOverflowException();
+ }
+ array = Arrays.copyOf(array, newCapacity);
+ return true;
+ }
+
+ /**
+ * How much longer is the new text compared with the old text?
+ * @return new length minus old length
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int lengthDelta() { return delta; }
+ /**
+ * @return true if there are any change edits
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean hasChanges() {
+ if (delta != 0) {
+ return true;
+ }
+ for (int i = 0; i < length; ++i) {
+ if (array[i] > MAX_UNCHANGED) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Access to the list of edits.
+ * @see #getCoarseIterator
+ * @see #getFineIterator
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final class Iterator {
+ private final char[] array;
+ private int index;
+ private final int length;
+ private int remaining;
+ private final boolean onlyChanges_, coarse;
+
+ private boolean changed;
+ private int oldLength_, newLength_;
+ private int srcIndex, replIndex, destIndex;
+
+ private Iterator(char[] a, int len, boolean oc, boolean crs) {
+ array = a;
+ length = len;
+ onlyChanges_ = oc;
+ coarse = crs;
+ }
+
+ private int readLength(int head) {
+ if (head < LENGTH_IN_1TRAIL) {
+ return head;
+ } else if (head < LENGTH_IN_2TRAIL) {
+ assert(index < length);
+ assert(array[index] >= 0x8000);
+ return array[index++] & 0x7fff;
+ } else {
+ assert((index + 2) <= length);
+ assert(array[index] >= 0x8000);
+ assert(array[index + 1] >= 0x8000);
+ int len = ((head & 1) << 30) |
+ ((array[index] & 0x7fff) << 15) |
+ (array[index + 1] & 0x7fff);
+ index += 2;
+ return len;
+ }
+ }
+
+ private void updateIndexes() {
+ srcIndex += oldLength_;
+ if (changed) {
+ replIndex += newLength_;
+ }
+ destIndex += newLength_;
+ }
+
+ private boolean noNext() {
+ // No change beyond the string.
+ changed = false;
+ oldLength_ = newLength_ = 0;
+ return false;
+ }
+
+ /**
+ * Advances to the next edit.
+ * @return true if there is another edit
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean next() {
+ return next(onlyChanges_);
+ }
+
+ private boolean next(boolean onlyChanges) {
+ // We have an errorCode in case we need to start guarding against integer overflows.
+ // It is also convenient for caller loops if we bail out when an error was set elsewhere.
+ updateIndexes();
+ if (remaining > 0) {
+ // Fine-grained iterator: Continue a sequence of equal-length changes.
+ --remaining;
+ return true;
+ }
+ if (index >= length) {
+ return noNext();
+ }
+ int u = array[index++];
+ if (u <= MAX_UNCHANGED) {
+ // Combine adjacent unchanged ranges.
+ changed = false;
+ oldLength_ = u + 1;
+ while (index < length && (u = array[index]) <= MAX_UNCHANGED) {
+ ++index;
+ oldLength_ += u + 1;
+ }
+ newLength_ = oldLength_;
+ if (onlyChanges) {
+ updateIndexes();
+ if (index >= length) {
+ return noNext();
+ }
+ // already fetched u > MAX_UNCHANGED at index
+ ++index;
+ } else {
+ return true;
+ }
+ }
+ changed = true;
+ if (u <= MAX_SHORT_CHANGE) {
+ if (coarse) {
+ int w = u >> 12;
+ int len = (u & 0xfff) + 1;
+ oldLength_ = newLength_ = len * w;
+ } else {
+ // Split a sequence of equal-length changes that was compressed into one unit.
+ oldLength_ = newLength_ = u >> 12;
+ remaining = u & 0xfff;
+ return true;
+ }
+ } else {
+ assert(u <= 0x7fff);
+ oldLength_ = readLength((u >> 6) & 0x3f);
+ newLength_ = readLength(u & 0x3f);
+ if (!coarse) {
+ return true;
+ }
+ }
+ // Combine adjacent changes.
+ while (index < length && (u = array[index]) > MAX_UNCHANGED) {
+ ++index;
+ if (u <= MAX_SHORT_CHANGE) {
+ int w = u >> 12;
+ int len = (u & 0xfff) + 1;
+ len = len * w;
+ oldLength_ += len;
+ newLength_ += len;
+ } else {
+ assert(u <= 0x7fff);
+ int oldLen = readLength((u >> 6) & 0x3f);
+ int newLen = readLength(u & 0x3f);
+ oldLength_ += oldLen;
+ newLength_ += newLen;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Finds the edit that contains the source index.
+ * The source index may be found in a non-change
+ * even if normal iteration would skip non-changes.
+ * Normal iteration can continue from a found edit.
+ *
+ * <p>The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * <p>The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i source index
+ * @return true if the edit for the source index was found
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean findSourceIndex(int i) {
+ if (i < 0) { return false; }
+ if (i < srcIndex) {
+ // Reset the iterator to the start.
+ index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0;
+ } else if (i < (srcIndex + oldLength_)) {
+ // The index is in the current span.
+ return true;
+ }
+ while (next(false)) {
+ if (i < (srcIndex + oldLength_)) {
+ // The index is in the current span.
+ return true;
+ }
+ if (remaining > 0) {
+ // Is the index in one of the remaining compressed edits?
+ // srcIndex is the start of the current span, before the remaining ones.
+ int len = (remaining + 1) * oldLength_;
+ if (i < (srcIndex + len)) {
+ int n = (i - srcIndex) / oldLength_; // 1 <= n <= remaining
+ len = n * oldLength_;
+ srcIndex += len;
+ replIndex += len;
+ destIndex += len;
+ remaining -= n;
+ return true;
+ }
+ // Make next() skip all of these edits at once.
+ oldLength_ = newLength_ = len;
+ remaining = 0;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @return true if this edit replaces oldLength() units with newLength() different ones.
+ * false if oldLength units remain unchanged.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public boolean hasChange() { return changed; }
+ /**
+ * @return the number of units in the original string which are replaced or remain unchanged.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int oldLength() { return oldLength_; }
+ /**
+ * @return the number of units in the modified string, if hasChange() is true.
+ * Same as oldLength if hasChange() is false.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int newLength() { return newLength_; }
+
+ /**
+ * @return the current index into the source string
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int sourceIndex() { return srcIndex; }
+ /**
+ * @return the current index into the replacement-characters-only string,
+ * not counting unchanged spans
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int replacementIndex() { return replIndex; }
+ /**
+ * @return the current index into the full destination string
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public int destinationIndex() { return destIndex; }
+ };
+
+ /**
+ * Returns an Iterator for coarse-grained changes for simple string updates.
+ * Skips non-changes.
+ * @return an Iterator that merges adjacent changes.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Iterator getCoarseChangesIterator() {
+ return new Iterator(array, length, true, true);
+ }
+
+ /**
+ * Returns an Iterator for coarse-grained changes and non-changes for simple string updates.
+ * @return an Iterator that merges adjacent changes.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Iterator getCoarseIterator() {
+ return new Iterator(array, length, false, true);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained changes for modifying styled text.
+ * Skips non-changes.
+ * @return an Iterator that separates adjacent changes.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Iterator getFineChangesIterator() {
+ return new Iterator(array, length, true, false);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained changes and non-changes for modifying styled text.
+ * @return an Iterator that separates adjacent changes.
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public Iterator getFineIterator() {
+ return new Iterator(array, length, false, false);
+ }
+}
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java b/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java
index f31f5d5..9b2dd30 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/MessageFormat.java
@@ -139,8 +139,8 @@
* and unquoted {curly braces} must occur in matched pairs.
* </ul>
*
- * <p>Recommendation: Use the real apostrophe (single quote) character \\u2019 for
- * human-readable text, and use the ASCII apostrophe (\\u0027 ' )
+ * <p>Recommendation: Use the real apostrophe (single quote) character \u2019 for
+ * human-readable text, and use the ASCII apostrophe (\u0027 ' )
* only in program syntax, like quoting in MessageFormat.
* See the annotations for U+0027 Apostrophe in The Unicode Standard.
*
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
index 5f6e140..106259f 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@@ -3409,7 +3409,7 @@
* property alias, or a special ID. Special IDs are matched
* loosely and correspond to the following sets:
*
- * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ANY" = [\\u0000-\\u0010FFFF],
* "ASCII" = [\\u0000-\\u007F].
*
* @param valueAlias a value alias, either short or long. The
@@ -3866,7 +3866,6 @@
int n = getRangeCount();
int result;
StringBuilder full = new StringBuilder();
- int locCache[] = new int[1];
for (int i=0; i<n; ++i) {
int start = getRangeStart(i);
@@ -3881,13 +3880,13 @@
// add case mappings
// (does not add long s for regular s, or Kelvin for k, for example)
for (int cp=start; cp<=end; ++cp) {
- result = csp.toFullLower(cp, null, full, root, locCache);
+ result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
- result = csp.toFullTitle(cp, null, full, root, locCache);
+ result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
- result = csp.toFullUpper(cp, null, full, root, locCache);
+ result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
result = csp.toFullFolding(cp, full, 0);
@@ -3906,6 +3905,7 @@
} else {
BreakIterator bi = BreakIterator.getWordInstance(root);
for (String str : strings) {
+ // TODO: call lower-level functions
foldSet.add(UCharacter.toLowerCase(root, str));
foldSet.add(UCharacter.toTitleCase(root, str, bi));
foldSet.add(UCharacter.toUpperCase(root, str));
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ChineseCalendar.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ChineseCalendar.java
index 13140f3..605479c 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/util/ChineseCalendar.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ChineseCalendar.java
@@ -169,7 +169,7 @@
* @param year The value used to set the calendar's {@link #YEAR YEAR} time field.
* @param month The value used to set the calendar's {@link #MONTH MONTH} time field.
* The value is 0-based. e.g., 0 for January.
- * @param isLeapMonth The value used to set the Chinese calendar's (@link #IS_LEAP_MONTH)
+ * @param isLeapMonth The value used to set the Chinese calendar's {@link #IS_LEAP_MONTH}
* time field.
* @param date The value used to set the calendar's {@link #DATE DATE} time field.
* @see Category#FORMAT
@@ -227,7 +227,7 @@
* @param year The value used to set the calendar's {@link #YEAR YEAR} time field.
* @param month The value used to set the calendar's {@link #MONTH MONTH} time field.
* The value is 0-based. e.g., 0 for January.
- * @param isLeapMonth The value used to set the Chinese calendar's (@link #IS_LEAP_MONTH)
+ * @param isLeapMonth The value used to set the Chinese calendar's {@link #IS_LEAP_MONTH}
* time field.
* @param date The value used to set the calendar's {@link #DATE DATE} time field.
* @see Category#FORMAT
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java
index 95bb60b..dfed352 100644
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java
@@ -44,7 +44,7 @@
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
@@ -56,8 +56,7 @@
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
@@ -85,7 +84,7 @@
iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) {
- c=csp.toFullLower(c, iter, result, locale, locCache);
+ c=csp.toFullLower(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java
index d3dc296..96f11c8 100644
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java
@@ -42,7 +42,7 @@
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
@@ -55,8 +55,7 @@
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
@@ -119,9 +118,9 @@
type=csp.getTypeOrIgnorable(c);
if(type>=0) { // not case-ignorable
if(doTitle) {
- c=csp.toFullTitle(c, iter, result, locale, locCache);
+ c=csp.toFullTitle(c, iter, result, caseLocale);
} else {
- c=csp.toFullLower(c, iter, result, locale, locCache);
+ c=csp.toFullLower(c, iter, result, caseLocale);
}
doTitle = type==0; // doTitle=isUncased
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java
index 77e2dfd..bd9e3fe 100644
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java
@@ -41,7 +41,7 @@
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
@@ -52,8 +52,7 @@
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
@@ -81,7 +80,7 @@
iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) {
- c=csp.toFullUpper(c, iter, result, locale, locCache);
+ c=csp.toFullUpper(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar
index e5ba5e5..348f30c 100644
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
Binary files differ
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java
index 07a7428..df95ccc 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/CalendarRegressionTest.java
@@ -2170,7 +2170,7 @@
{"en@calendar=islamic", "gregorian"},
{"zh_TW", "gregorian", "roc", "chinese"},
{"ar_IR", "gregorian", "persian", "islamic", "islamic-civil", "islamic-tbla"},
- {"th@rg=SAZZZZ", "islamic-umalqura", "gregorian", "islamic", "islamic-rgsa"},
+ {"th@rg=SAZZZZ", "gregorian", "islamic-umalqura", "islamic", "islamic-rgsa"},
};
// Android patch end.
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java
index bccfef6..dfefbf3 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/calendar/IBMCalendarTest.java
@@ -1118,8 +1118,8 @@
"gregorian",
"gregorian", // iso8601 is a gregorian sub type
"gregorian",
- "islamic-umalqura",
- "islamic-umalqura",
+ "gregorian",
+ "gregorian",
"japanese",
"gregorian",
"gregorian",
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java
index 2a42f7c..9f52775 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/format/ListFormatterTest.java
@@ -53,6 +53,40 @@
}
}
+ // Tests resource loading and inheritance when region sublocale
+ // has only partial data for the listPattern element (overriding
+ // some of the parent data). #12994
+ String[] EnglishGBTestData = {
+ "",
+ "A",
+ "A and B",
+ "A, B and C",
+ "A, B, C and D",
+ "A, B, C, D and E"
+ };
+
+ @Test
+ public void TestEnglishGB() {
+ checkData(ListFormatter.getInstance(new ULocale("en_GB")), EnglishGBTestData);
+ }
+
+ // Tests resource loading and inheritance when region sublocale
+ // has only partial data for the listPattern element (overriding
+ // some of the parent data). #12994
+ String[] ChineseTradHKTestData = {
+ "",
+ "A",
+ "A\u53CAB",
+ "A\u3001B\u53CAC",
+ "A\u3001B\u3001C\u53CAD",
+ "A\u3001B\u3001C\u3001D\u53CAE"
+ };
+
+ @Test
+ public void TestChineseTradHK() {
+ checkData(ListFormatter.getInstance(new ULocale("zh_Hant_HK")), ChineseTradHKTestData);
+ }
+
String[] JapaneseTestData = {
"",
"A",
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java
index 7ac358b..6f8a679 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java
@@ -24,6 +24,8 @@
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.CaseMap;
+import com.ibm.icu.text.Edits;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.util.ULocale;
@@ -708,6 +710,191 @@
assertGreekUpper("ρωμέικα", "ΡΩΜΕΪΚΑ");
}
+ private static final class EditChange {
+ private boolean change;
+ private int oldLength, newLength;
+ EditChange(boolean change, int oldLength, int newLength) {
+ this.change = change;
+ this.oldLength = oldLength;
+ this.newLength = newLength;
+ }
+ }
+
+ private static void checkEditsIter(
+ String name, Edits.Iterator ei1, Edits.Iterator ei2, // two equal iterators
+ EditChange[] expected, boolean withUnchanged) {
+ assertFalse(name, ei2.findSourceIndex(-1));
+
+ int expSrcIndex = 0;
+ int expDestIndex = 0;
+ int expReplIndex = 0;
+ for (int expIndex = 0; expIndex < expected.length; ++expIndex) {
+ EditChange expect = expected[expIndex];
+ String msg = name + ' ' + expIndex;
+ if (withUnchanged || expect.change) {
+ assertTrue(msg, ei1.next());
+ assertEquals(msg, expect.change, ei1.hasChange());
+ assertEquals(msg, expect.oldLength, ei1.oldLength());
+ assertEquals(msg, expect.newLength, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+ }
+
+ if (expect.oldLength > 0) {
+ assertTrue(msg, ei2.findSourceIndex(expSrcIndex));
+ assertEquals(msg, expect.change, ei2.hasChange());
+ assertEquals(msg, expect.oldLength, ei2.oldLength());
+ assertEquals(msg, expect.newLength, ei2.newLength());
+ assertEquals(msg, expSrcIndex, ei2.sourceIndex());
+ assertEquals(msg, expDestIndex, ei2.destinationIndex());
+ assertEquals(msg, expReplIndex, ei2.replacementIndex());
+ if (!withUnchanged) {
+ // For some iterators, move past the current range
+ // so that findSourceIndex() has to look before the current index.
+ ei2.next();
+ ei2.next();
+ }
+ }
+
+ expSrcIndex += expect.oldLength;
+ expDestIndex += expect.newLength;
+ if (expect.change) {
+ expReplIndex += expect.newLength;
+ }
+ }
+ String msg = name + " end";
+ assertFalse(msg, ei1.next());
+ assertFalse(msg, ei1.hasChange());
+ assertEquals(msg, 0, ei1.oldLength());
+ assertEquals(msg, 0, ei1.newLength());
+ assertEquals(msg, expSrcIndex, ei1.sourceIndex());
+ assertEquals(msg, expDestIndex, ei1.destinationIndex());
+ assertEquals(msg, expReplIndex, ei1.replacementIndex());
+
+ assertFalse(name, ei2.findSourceIndex(expSrcIndex));
+ }
+
+ @Test
+ public void TestEdits() {
+ Edits edits = new Edits();
+ assertFalse("new Edits", edits.hasChanges());
+ assertEquals("new Edits", 0, edits.lengthDelta());
+ edits.addUnchanged(1); // multiple unchanged ranges are combined
+ edits.addUnchanged(10000); // too long, and they are split
+ edits.addReplace(0, 0);
+ edits.addUnchanged(2);
+ assertFalse("unchanged 10003", edits.hasChanges());
+ assertEquals("unchanged 10003", 0, edits.lengthDelta());
+ edits.addReplace(1, 1); // multiple short equal-length edits are compressed
+ edits.addUnchanged(0);
+ edits.addReplace(1, 1);
+ edits.addReplace(1, 1);
+ edits.addReplace(0, 10);
+ edits.addReplace(100, 0);
+ edits.addReplace(3000, 4000); // variable-length encoding
+ edits.addReplace(100000, 100000);
+ assertTrue("some edits", edits.hasChanges());
+ assertEquals("some edits", 10 - 100 + 1000, edits.lengthDelta());
+
+ EditChange[] coarseExpectedChanges = new EditChange[] {
+ new EditChange(false, 10003, 10003),
+ new EditChange(true, 103103, 104013)
+ };
+ checkEditsIter("coarse",
+ edits.getCoarseIterator(), edits.getCoarseIterator(),
+ coarseExpectedChanges, true);
+ checkEditsIter("coarse changes",
+ edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
+ coarseExpectedChanges, false);
+
+ EditChange[] fineExpectedChanges = new EditChange[] {
+ new EditChange(false, 10003, 10003),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 0, 10),
+ new EditChange(true, 100, 0),
+ new EditChange(true, 3000, 4000),
+ new EditChange(true, 100000, 100000)
+ };
+ checkEditsIter("fine",
+ edits.getFineIterator(), edits.getFineIterator(),
+ fineExpectedChanges, true);
+ checkEditsIter("fine changes",
+ edits.getFineChangesIterator(), edits.getFineChangesIterator(),
+ fineExpectedChanges, false);
+
+ edits.reset();
+ assertFalse("reset", edits.hasChanges());
+ assertEquals("reset", 0, edits.lengthDelta());
+ Edits.Iterator ei = edits.getCoarseChangesIterator();
+ assertFalse("reset then iterator", ei.next());
+ }
+
+ @Test
+ public void TestCaseMapWithEdits() {
+ StringBuilder sb = new StringBuilder();
+ Edits edits = new Edits();
+
+ sb = CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul", sb, edits);
+ assertEquals("toLower(Istanbul)", "ıb", sb.toString());
+ EditChange[] lowerExpectedChanges = new EditChange[] {
+ new EditChange(true, 1, 1),
+ new EditChange(false, 4, 4),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 2, 2)
+ };
+ checkEditsIter("toLower(Istanbul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ lowerExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα", sb, edits);
+ assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", sb.toString());
+ EditChange[] upperExpectedChanges = new EditChange[] {
+ new EditChange(false, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 1)
+ };
+ checkEditsIter("toUpper(Πατάτα)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ upperExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply(
+ new Locale("nl"), null, "IjssEL IglOo", sb, edits);
+ assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString());
+ EditChange[] titleExpectedChanges = new EditChange[] {
+ new EditChange(false, 1, 1),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 10, 10)
+ };
+ checkEditsIter("toTitle(IjssEL IglOo)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ titleExpectedChanges, true);
+
+ sb.delete(0, sb.length());
+ edits.reset();
+ sb = CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul", sb, edits);
+ assertEquals("fold(IßtanBul)", "ıssb", sb.toString());
+ EditChange[] foldExpectedChanges = new EditChange[] {
+ new EditChange(true, 1, 1),
+ new EditChange(true, 1, 2),
+ new EditChange(false, 3, 3),
+ new EditChange(true, 1, 1),
+ new EditChange(false, 2, 2)
+ };
+ checkEditsIter("fold(IßtanBul)",
+ edits.getFineIterator(), edits.getFineIterator(),
+ foldExpectedChanges, true);
+ }
+
// private data members - test data --------------------------------------
private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR");
@@ -945,7 +1132,7 @@
// private methods -------------------------------------------------------
/**
- * Converting the hex numbers represented betwee n ';' to Unicode strings
+ * Converting the hex numbers represented between ';' to Unicode strings
* @param str string to break up into Unicode strings
* @return array of Unicode strings ending with a null
*/