blob: a3e97779e901d3a73576d3a4b631870cbcee59ca [file] [log] [blame]
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
package com.android.providers.contacts;
import android.icu.text.AlphabeticIndex;
import android.icu.text.AlphabeticIndex.ImmutableIndex;
import android.icu.text.Transliterator;
import android.os.LocaleList;
import android.provider.ContactsContract.FullNameStyle;
import android.provider.ContactsContract.PhoneticNameStyle;
import android.text.TextUtils;
import android.util.ArraySet;
import android.util.Log;
import com.android.providers.contacts.HanziToPinyin.Token;
import com.google.common.annotations.VisibleForTesting;
import java.lang.Character.UnicodeBlock;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
/**
* This utility class provides specialized handling for locale specific
* information: labels, name lookup keys.
*/
public class ContactLocaleUtils {
public static final String TAG = "ContactLocale";
private static final boolean DEBUG = false; // don't submit with true
public static final Locale LOCALE_ARABIC = new Locale("ar");
public static final Locale LOCALE_GREEK = new Locale("el");
public static final Locale LOCALE_HEBREW = new Locale("he");
// Serbian and Ukrainian labels are complementary supersets of Russian
public static final Locale LOCALE_SERBIAN = new Locale("sr");
public static final Locale LOCALE_UKRAINIAN = new Locale("uk");
public static final Locale LOCALE_THAI = new Locale("th");
// -- Note for adding locales to sDefaultLabelLocales --
//
// AlphabeticIndex.getBucketLabel() uses a binary search across
// the entire label set so care should be taken about growing this
// set too large. The following set determines for which locales
// we will show labels other than your primary locale. General rules
// of thumb for adding a locale: should be a supported locale; and
// should not be included if from a name it is not deterministic
// which way to label it (so eg Chinese cannot be added because
// the labeling of a Chinese character varies between Simplified,
// Traditional, and Japanese locales). Use English only for all
// Latin based alphabets. Ukrainian and Serbian are chosen for
// Cyrillic because their alphabets are complementary supersets
// of Russian.
private static final Locale[] sDefaultLabelLocales = new Locale[]{
Locale.ENGLISH,
Locale.JAPANESE,
Locale.KOREAN,
LOCALE_THAI,
LOCALE_ARABIC,
LOCALE_HEBREW,
LOCALE_GREEK,
LOCALE_UKRAINIAN,
LOCALE_SERBIAN,
};
@VisibleForTesting
static void dumpIndex(ImmutableIndex index) {
final StringBuilder labels = new StringBuilder();
String sep = "";
for (int i = 0; i < index.getBucketCount(); i++) {
labels.append(sep);
labels.append(index.getBucket(i).getLabel());
sep = ",";
}
Log.d(TAG, "Labels=[" + labels + "]");
}
/**
* This class is the default implementation and should be the base class
* for other locales.
*
* sortKey: same as name
* nameLookupKeys: none
* labels: uses ICU AlphabeticIndex for labels and extends by labeling
* phone numbers "#". Eg English labels are: [A-Z], #, " "
*/
private static class ContactLocaleUtilsBase {
private static final String EMPTY_STRING = "";
private static final String NUMBER_STRING = "#";
protected final ImmutableIndex mAlphabeticIndex;
private final int mAlphabeticIndexBucketCount;
private final int mNumberBucketIndex;
private final boolean mUsePinyinTransliterator;
public ContactLocaleUtilsBase(LocaleSet systemLocales) {
mUsePinyinTransliterator = systemLocales.shouldPreferSimplifiedChinese();
// Build the index buckets based on the current system locale set and
// sDefaultLabelLocales.
if (DEBUG) {
Log.d(TAG, "Building index buckets...");
}
final List<Locale> locales = getLocalesForBuckets(systemLocales);
AlphabeticIndex ai = new AlphabeticIndex(locales.get(0))
.setMaxLabelCount(300);
for (int i = 1; i < locales.size(); i++) {
ai.addLabels(locales.get(i));
}
mAlphabeticIndex = ai.buildImmutableIndex();
mAlphabeticIndexBucketCount = mAlphabeticIndex.getBucketCount();
mNumberBucketIndex = mAlphabeticIndexBucketCount - 1;
if (DEBUG) {
dumpIndex(mAlphabeticIndex);
}
}
static List<Locale> getLocalesForBuckets(LocaleSet systemLocales) {
// Create a list of locales that should be used to generate the index buckets.
// - Source: the system locales and sDefaultLabelLocales.
// - Rules:
// - Don't add the same locale multiple times.
// - Also special rules for Chinese (b/31115382):
// - Don't add multiple Chinese locales.
// - Don't add any Chinese locales after Japanese.
// First, collect all the locales (allowing duplicates).
final LocaleList localeList = systemLocales.getAllLocales();
final List<Locale> locales = new ArrayList<>(
localeList.size() + sDefaultLabelLocales.length);
for (int i = 0; i < localeList.size(); i++) {
locales.add(localeList.get(i));
}
for (int i = 0; i < sDefaultLabelLocales.length; i++) {
locales.add(sDefaultLabelLocales[i]);
}
// Then apply the rules to generate the final list.
final List<Locale> ret = new ArrayList<>(locales.size());
boolean allowChinese = true;
for (int i = 0; i < locales.size(); i++) {
final Locale locale = locales.get(i);
if (ret.contains(locale)) {
continue;
}
if (LocaleSet.isLanguageChinese(locale)) {
if (!allowChinese) {
continue;
}
allowChinese = false;
}
if (LocaleSet.isLanguageJapanese(locale)) {
allowChinese = false;
}
if (DEBUG) {
Log.d(TAG, " Adding locale: " + locale.toLanguageTag());
}
ret.add(locale);
}
return ret;
}
public String getSortKey(String name) {
return name;
}
public int getNumberBucketIndex() {
return mNumberBucketIndex;
}
/**
* Returns the bucket index for the specified string. AlphabeticIndex
* sorts strings into buckets numbered in order from 0 to N, where the
* exact value of N depends on how many representative index labels are
* used in a particular locale. This routine adds one additional bucket
* for phone numbers. It attempts to detect phone numbers and shifts
* the bucket indexes returned by AlphabeticIndex in order to make room
* for the new # bucket, so the returned range becomes 0 to N+1.
*/
public int getBucketIndex(String name) {
boolean prefixIsNumeric = false;
final int length = name.length();
int offset = 0;
while (offset < length) {
int codePoint = Character.codePointAt(name, offset);
// Ignore standard phone number separators and identify any
// string that otherwise starts with a number.
if (Character.isDigit(codePoint)) {
prefixIsNumeric = true;
break;
} else if (!Character.isSpaceChar(codePoint) &&
codePoint != '+' && codePoint != '(' &&
codePoint != ')' && codePoint != '.' &&
codePoint != '-' && codePoint != '#') {
break;
}
offset += Character.charCount(codePoint);
}
if (prefixIsNumeric) {
return mNumberBucketIndex;
}
/**
* ICU 55 AlphabeticIndex doesn't support Simplified Chinese
* as a secondary locale so it is necessary to use the
* Pinyin transliterator. We also use this for a Simplified
* Chinese primary locale because it gives more accurate letter
* buckets. b/19835686
*/
if (mUsePinyinTransliterator) {
name = HanziToPinyin.getInstance().transliterate(name);
}
final int bucket = mAlphabeticIndex.getBucketIndex(name);
if (bucket < 0) {
return -1;
}
if (bucket >= mNumberBucketIndex) {
return bucket + 1;
}
return bucket;
}
/**
* Returns the number of buckets in use (one more than AlphabeticIndex
* uses, because this class adds a bucket for phone numbers).
*/
public int getBucketCount() {
return mAlphabeticIndexBucketCount + 1;
}
/**
* Returns the label for the specified bucket index if a valid index,
* otherwise returns an empty string. '#' is returned for the phone
* number bucket; for all others, the AlphabeticIndex label is returned.
*/
public String getBucketLabel(int bucketIndex) {
if (bucketIndex < 0 || bucketIndex >= getBucketCount()) {
return EMPTY_STRING;
} else if (bucketIndex == mNumberBucketIndex) {
return NUMBER_STRING;
} else if (bucketIndex > mNumberBucketIndex) {
--bucketIndex;
}
return mAlphabeticIndex.getBucket(bucketIndex).getLabel();
}
@SuppressWarnings("unused")
public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
return null;
}
public ArrayList<String> getLabels() {
final int bucketCount = getBucketCount();
final ArrayList<String> labels = new ArrayList<String>(bucketCount);
for(int i = 0; i < bucketCount; ++i) {
labels.add(getBucketLabel(i));
}
return labels;
}
}
/**
* Japanese specific locale overrides.
*
* sortKey: unchanged (same as name)
* nameLookupKeys: unchanged (none)
* labels: extends default labels by labeling unlabeled CJ characters
* with the Japanese character 他 ("misc"). Japanese labels are:
* あ, か, さ, た, な, は, ま, や, ら, わ, 他, [A-Z], #, " "
*/
private static class JapaneseContactUtils extends ContactLocaleUtilsBase {
// \u4ed6 is Japanese character 他 ("misc")
private static final String JAPANESE_MISC_LABEL = "\u4ed6";
private final int mMiscBucketIndex;
public JapaneseContactUtils(LocaleSet locales) {
super(locales);
// Determine which bucket AlphabeticIndex is lumping unclassified
// Japanese characters into by looking up the bucket index for
// a representative Kanji/CJK unified ideograph (\u65e5 is the
// character '日').
mMiscBucketIndex = super.getBucketIndex("\u65e5");
}
// Set of UnicodeBlocks for unified CJK (Chinese) characters and
// Japanese characters. This includes all code blocks that might
// contain a character used in Japanese (which is why unified CJK
// blocks are included but Korean Hangul and jamo are not).
private static final Set<Character.UnicodeBlock> CJ_BLOCKS;
static {
Set<UnicodeBlock> set = new ArraySet<>();
set.add(UnicodeBlock.HIRAGANA);
set.add(UnicodeBlock.KATAKANA);
set.add(UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
set.add(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS);
set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
set.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
set.add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
set.add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
set.add(UnicodeBlock.CJK_COMPATIBILITY);
set.add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
set.add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
CJ_BLOCKS = Collections.unmodifiableSet(set);
}
/**
* Helper routine to identify unlabeled Chinese or Japanese characters
* to put in a 'misc' bucket.
*
* @return true if the specified Unicode code point is Chinese or
* Japanese
*/
private static boolean isChineseOrJapanese(int codePoint) {
return CJ_BLOCKS.contains(UnicodeBlock.of(codePoint));
}
/**
* Returns the bucket index for the specified string. Adds an
* additional 'misc' bucket for Kanji characters to the base class set.
*/
@Override
public int getBucketIndex(String name) {
final int bucketIndex = super.getBucketIndex(name);
if ((bucketIndex == mMiscBucketIndex &&
!isChineseOrJapanese(Character.codePointAt(name, 0))) ||
bucketIndex > mMiscBucketIndex) {
return bucketIndex + 1;
}
return bucketIndex;
}
/**
* Returns the number of buckets in use (one more than the base class
* uses, because this class adds a bucket for Kanji).
*/
@Override
public int getBucketCount() {
return super.getBucketCount() + 1;
}
/**
* Returns the label for the specified bucket index if a valid index,
* otherwise returns an empty string. '他' is returned for unclassified
* Kanji; for all others, the label determined by the base class is
* returned.
*/
@Override
public String getBucketLabel(int bucketIndex) {
if (bucketIndex == mMiscBucketIndex) {
return JAPANESE_MISC_LABEL;
} else if (bucketIndex > mMiscBucketIndex) {
--bucketIndex;
}
return super.getBucketLabel(bucketIndex);
}
@Override
public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
// Hiragana and Katakana will be positively identified as Japanese.
if (nameStyle == PhoneticNameStyle.JAPANESE) {
return getRomajiNameLookupKeys(name);
}
return null;
}
private static boolean mInitializedTransliterator;
private static Transliterator mJapaneseTransliterator;
private static Transliterator getJapaneseTransliterator() {
synchronized(JapaneseContactUtils.class) {
if (!mInitializedTransliterator) {
mInitializedTransliterator = true;
Transliterator t = null;
try {
t = Transliterator.getInstance("Hiragana-Latin; Katakana-Latin;"
+ " Latin-Ascii");
} catch (IllegalArgumentException e) {
Log.w(TAG, "Hiragana/Katakana-Latin transliterator data"
+ " is missing");
}
mJapaneseTransliterator = t;
}
return mJapaneseTransliterator;
}
}
public static Iterator<String> getRomajiNameLookupKeys(String name) {
final Transliterator t = getJapaneseTransliterator();
if (t == null) {
return null;
}
final String romajiName = t.transliterate(name);
if (TextUtils.isEmpty(romajiName) ||
TextUtils.equals(name, romajiName)) {
return null;
}
final ArraySet<String> keys = new ArraySet<>();
keys.add(romajiName);
return keys.iterator();
}
/**
* Returns the number for "#" bucket index.
* Adds an additional 'misc' bucket for Kanji characters to the base class set.
*/
@Override
public int getNumberBucketIndex() {
final int numberBucketIndex = super.getNumberBucketIndex();
if (numberBucketIndex > mMiscBucketIndex) {
return numberBucketIndex + 1;
}
return numberBucketIndex;
}
}
/**
* Simplified Chinese specific locale overrides. Uses ICU Transliterator
* for generating pinyin transliteration.
*
* sortKey: unchanged (same as name)
* nameLookupKeys: adds additional name lookup keys
* - Chinese character's pinyin and pinyin's initial character.
* - Latin word and initial character.
* labels: unchanged
* Simplified Chinese labels are the same as English: [A-Z], #, " "
*/
private static class SimplifiedChineseContactUtils
extends ContactLocaleUtilsBase {
public SimplifiedChineseContactUtils(LocaleSet locales) {
super(locales);
}
@Override
public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
if (nameStyle != FullNameStyle.JAPANESE &&
nameStyle != FullNameStyle.KOREAN) {
return getPinyinNameLookupKeys(name);
}
return null;
}
public static Iterator<String> getPinyinNameLookupKeys(String name) {
// TODO : Reduce the object allocation.
ArraySet<String> keys = new ArraySet<>();
ArrayList<Token> tokens = HanziToPinyin.getInstance().getTokens(name);
final int tokenCount = tokens.size();
final StringBuilder keyPinyin = new StringBuilder();
final StringBuilder keyInitial = new StringBuilder();
// There is no space among the Chinese Characters, the variant name
// lookup key wouldn't work for Chinese. The keyOriginal is used to
// build the lookup keys for itself.
final StringBuilder keyOriginal = new StringBuilder();
for (int i = tokenCount - 1; i >= 0; i--) {
final Token token = tokens.get(i);
if (Token.UNKNOWN == token.type) {
continue;
}
if (Token.PINYIN == token.type) {
keyPinyin.insert(0, token.target);
keyInitial.insert(0, token.target.charAt(0));
} else if (Token.LATIN == token.type) {
// Avoid adding space at the end of String.
if (keyPinyin.length() > 0) {
keyPinyin.insert(0, ' ');
}
if (keyOriginal.length() > 0) {
keyOriginal.insert(0, ' ');
}
keyPinyin.insert(0, token.source);
keyInitial.insert(0, token.source.charAt(0));
}
keyOriginal.insert(0, token.source);
keys.add(keyOriginal.toString());
keys.add(keyPinyin.toString());
keys.add(keyInitial.toString());
}
return keys.iterator();
}
}
private static ContactLocaleUtils sSingleton;
private final LocaleSet mLocales;
private final ContactLocaleUtilsBase mUtils;
private ContactLocaleUtils(LocaleSet locales) {
if (locales == null) {
mLocales = LocaleSet.newDefault();
} else {
mLocales = locales;
}
if (mLocales.shouldPreferJapanese()) {
mUtils = new JapaneseContactUtils(mLocales);
} else if (mLocales.shouldPreferSimplifiedChinese()) {
mUtils = new SimplifiedChineseContactUtils(mLocales);
} else {
mUtils = new ContactLocaleUtilsBase(mLocales);
}
Log.i(TAG, "AddressBook Labels [" + mLocales.toString() + "]: "
+ getLabels().toString());
}
public boolean isLocale(LocaleSet locales) {
return mLocales.equals(locales);
}
public static synchronized ContactLocaleUtils getInstance() {
if (sSingleton == null) {
sSingleton = new ContactLocaleUtils(LocaleSet.newDefault());
}
return sSingleton;
}
@VisibleForTesting
public static ContactLocaleUtils newInstanceForTest(Locale... locales) {
return new ContactLocaleUtils(LocaleSet.newForTest(locales));
}
@VisibleForTesting
public static synchronized void setLocaleForTest(Locale... locales) {
setLocales(LocaleSet.newForTest(locales));
}
public static synchronized void setLocales(LocaleSet locales) {
if (sSingleton == null || !sSingleton.isLocale(locales)) {
if (DEBUG) {
Log.d(TAG, "Setting locale(s) to " + locales);
}
sSingleton = new ContactLocaleUtils(locales);
}
}
public String getSortKey(String name, int nameStyle) {
return mUtils.getSortKey(name);
}
public int getBucketIndex(String name) {
return mUtils.getBucketIndex(name);
}
public int getNumberBucketIndex() {
return mUtils.getNumberBucketIndex();
}
public int getBucketCount() {
return mUtils.getBucketCount();
}
public String getBucketLabel(int bucketIndex) {
return mUtils.getBucketLabel(bucketIndex);
}
public String getLabel(String name) {
return getBucketLabel(getBucketIndex(name));
}
public ArrayList<String> getLabels() {
return mUtils.getLabels();
}
/**
* Determine which utility should be used for generating NameLookupKey.
* (ie, whether we generate Romaji or Pinyin lookup keys or not)
*
* Hiragana and Katakana are tagged as JAPANESE; Kanji is unclassified
* and tagged as CJK. For Hiragana/Katakana names, generate Romaji
* lookup keys when not in a Chinese or Korean locale.
*
* Otherwise, use the default behavior of that locale:
* a. For Japan, generate Romaji lookup keys for Hiragana/Katakana.
* b. For Simplified Chinese locale, generate Pinyin lookup keys.
*/
public Iterator<String> getNameLookupKeys(String name, int nameStyle) {
if (!mLocales.isPrimaryLocaleCJK()) {
if (mLocales.shouldPreferSimplifiedChinese()) {
if (nameStyle == FullNameStyle.CHINESE ||
nameStyle == FullNameStyle.CJK) {
return SimplifiedChineseContactUtils.getPinyinNameLookupKeys(name);
}
} else {
if (nameStyle == FullNameStyle.JAPANESE) {
return JapaneseContactUtils.getRomajiNameLookupKeys(name);
}
}
}
return mUtils.getNameLookupKeys(name, nameStyle);
}
}