blob: ddd294f18ee34c76b589c55271ca71643b85bf29 [file] [log] [blame]
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
package com.android.providers.contacts;
import android.content.ContentValues;
import android.provider.ContactsContract.CommonDataKinds.StructuredName;
import android.provider.ContactsContract.FullNameStyle;
import android.provider.ContactsContract.PhoneticNameStyle;
import android.text.TextUtils;
import android.util.ArraySet;
import com.android.providers.contacts.util.NeededForTesting;
import java.lang.Character.UnicodeBlock;
import java.util.Locale;
import java.util.StringTokenizer;
/**
* The purpose of this class is to split a full name into given names and last
* name. The logic only supports having a single last name. If the full name has
* multiple last names the output will be incorrect.
* <p>
* Core algorithm:
* <ol>
* <li>Remove the suffixes (III, Ph.D., M.D.).</li>
* <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
* <li>Assign the last remaining token as the last name.</li>
* <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
* this word also as the last name.</li>
* <li>Assign the rest of the words as the "given names".</li>
* </ol>
*/
public class NameSplitter {
public static final int MAX_TOKENS = 10;
private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();
// This includes simplified and traditional Chinese
private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();
private final ArraySet<String> mPrefixesSet;
private final ArraySet<String> mSuffixesSet;
private final int mMaxSuffixLength;
private final ArraySet<String> mLastNamePrefixesSet;
private final ArraySet<String> mConjuctions;
private final Locale mLocale;
private final String mLanguage;
/**
* Two-Chracter long Korean family names.
* http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1
*/
private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = {
"\uAC15\uC804", // Gang Jeon
"\uB0A8\uAD81", // Nam Goong
"\uB3C5\uACE0", // Dok Go
"\uB3D9\uBC29", // Dong Bang
"\uB9DD\uC808", // Mang Jeol
"\uC0AC\uACF5", // Sa Gong
"\uC11C\uBB38", // Seo Moon
"\uC120\uC6B0", // Seon Woo
"\uC18C\uBD09", // So Bong
"\uC5B4\uAE08", // Uh Geum
"\uC7A5\uACE1", // Jang Gok
"\uC81C\uAC08", // Je Gal
"\uD669\uBCF4" // Hwang Bo
};
public static class Name {
public String prefix;
public String givenNames;
public String middleName;
public String familyName;
public String suffix;
public int fullNameStyle;
public String phoneticFamilyName;
public String phoneticMiddleName;
public String phoneticGivenName;
public int phoneticNameStyle;
public Name() {
}
public Name(String prefix, String givenNames, String middleName, String familyName,
String suffix) {
this.prefix = prefix;
this.givenNames = givenNames;
this.middleName = middleName;
this.familyName = familyName;
this.suffix = suffix;
}
@NeededForTesting
public String getPrefix() {
return prefix;
}
public String getGivenNames() {
return givenNames;
}
public String getMiddleName() {
return middleName;
}
public String getFamilyName() {
return familyName;
}
@NeededForTesting
public String getSuffix() {
return suffix;
}
public int getFullNameStyle() {
return fullNameStyle;
}
public String getPhoneticFamilyName() {
return phoneticFamilyName;
}
public String getPhoneticMiddleName() {
return phoneticMiddleName;
}
public String getPhoneticGivenName() {
return phoneticGivenName;
}
public int getPhoneticNameStyle() {
return phoneticNameStyle;
}
public void fromValues(ContentValues values) {
prefix = values.getAsString(StructuredName.PREFIX);
givenNames = values.getAsString(StructuredName.GIVEN_NAME);
middleName = values.getAsString(StructuredName.MIDDLE_NAME);
familyName = values.getAsString(StructuredName.FAMILY_NAME);
suffix = values.getAsString(StructuredName.SUFFIX);
Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;
phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);
integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
}
public void toValues(ContentValues values) {
putValueIfPresent(values, StructuredName.PREFIX, prefix);
putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
putValueIfPresent(values, StructuredName.SUFFIX, suffix);
values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
}
private void putValueIfPresent(ContentValues values, String name, String value) {
if (value != null) {
values.put(name, value);
}
}
public void clear() {
prefix = null;
givenNames = null;
middleName = null;
familyName = null;
suffix = null;
fullNameStyle = FullNameStyle.UNDEFINED;
phoneticFamilyName = null;
phoneticMiddleName = null;
phoneticGivenName = null;
phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
}
public boolean isEmpty() {
return TextUtils.isEmpty(givenNames)
&& TextUtils.isEmpty(middleName)
&& TextUtils.isEmpty(familyName)
&& TextUtils.isEmpty(suffix)
&& TextUtils.isEmpty(phoneticFamilyName)
&& TextUtils.isEmpty(phoneticMiddleName)
&& TextUtils.isEmpty(phoneticGivenName);
}
@Override
public String toString() {
return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName
+ " family: " + familyName + " suffix: " + suffix + " ph/given: "
+ phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: "
+ phoneticFamilyName + "]";
}
}
private static class NameTokenizer extends StringTokenizer {
private final String[] mTokens;
private int mDotBitmask;
private int mCommaBitmask;
private int mStartPointer;
private int mEndPointer;
public NameTokenizer(String fullName) {
super(fullName, " .,", true);
mTokens = new String[MAX_TOKENS];
// Iterate over tokens, skipping over empty ones and marking tokens that
// are followed by dots.
while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
final String token = nextToken();
if (token.length() > 0) {
final char c = token.charAt(0);
if (c == ' ') {
continue;
}
}
if (mEndPointer > 0 && token.charAt(0) == '.') {
mDotBitmask |= (1 << (mEndPointer - 1));
} else if (mEndPointer > 0 && token.charAt(0) == ',') {
mCommaBitmask |= (1 << (mEndPointer - 1));
} else {
mTokens[mEndPointer] = token;
mEndPointer++;
}
}
}
/**
* Returns true if the token is followed by a dot in the original full name.
*/
public boolean hasDot(int index) {
return (mDotBitmask & (1 << index)) != 0;
}
/**
* Returns true if the token is followed by a comma in the original full name.
*/
public boolean hasComma(int index) {
return (mCommaBitmask & (1 << index)) != 0;
}
}
/**
* Constructor.
*
* @param commonPrefixes comma-separated list of common prefixes,
* e.g. "Mr, Ms, Mrs"
* @param commonLastNamePrefixes comma-separated list of common last name prefixes,
* e.g. "d', st, st., von"
* @param commonSuffixes comma-separated list of common suffixes,
* e.g. "Jr, M.D., MD, D.D.S."
* @param commonConjunctions comma-separated list of common conjuctions,
* e.g. "AND, Or"
*/
public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
String commonSuffixes, String commonConjunctions, Locale locale) {
// TODO: refactor this to use <string-array> resources
mPrefixesSet = convertToSet(commonPrefixes);
mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
mSuffixesSet = convertToSet(commonSuffixes);
mConjuctions = convertToSet(commonConjunctions);
mLocale = locale != null ? locale : Locale.getDefault();
mLanguage = mLocale.getLanguage().toLowerCase();
int maxLength = 0;
for (String suffix : mSuffixesSet) {
if (suffix.length() > maxLength) {
maxLength = suffix.length();
}
}
mMaxSuffixLength = maxLength;
}
/**
* Converts a comma-separated list of Strings to a set of Strings. Trims strings
* and converts them to upper case.
*/
private static ArraySet<String> convertToSet(String strings) {
ArraySet<String> set = new ArraySet<>();
if (strings != null) {
String[] split = strings.split(",");
for (int i = 0; i < split.length; i++) {
set.add(split[i].trim().toUpperCase());
}
}
return set;
}
/**
* Parses a full name and returns components as a list of tokens.
*/
public int tokenize(String[] tokens, String fullName) {
if (fullName == null) {
return 0;
}
NameTokenizer tokenizer = new NameTokenizer(fullName);
if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
return 0;
}
String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
int count = 0;
for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
tokens[count++] = tokenizer.mTokens[i];
}
return count;
}
/**
* Parses a full name and returns parsed components in the Name object.
*/
public void split(Name name, String fullName) {
if (fullName == null) {
return;
}
int fullNameStyle = guessFullNameStyle(fullName);
if (fullNameStyle == FullNameStyle.CJK) {
fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
}
split(name, fullName, fullNameStyle);
}
/**
* Parses a full name and returns parsed components in the Name object
* with a given fullNameStyle.
*/
public void split(Name name, String fullName, int fullNameStyle) {
if (fullName == null) {
return;
}
name.fullNameStyle = fullNameStyle;
switch (fullNameStyle) {
case FullNameStyle.CHINESE:
splitChineseName(name, fullName);
break;
case FullNameStyle.JAPANESE:
splitJapaneseName(name, fullName);
break;
case FullNameStyle.KOREAN:
splitKoreanName(name, fullName);
break;
default:
splitWesternName(name, fullName);
}
}
/**
* Splits a full name composed according to the Western tradition:
* <pre>
* [prefix] given name(s) [[middle name] family name] [, suffix]
* [prefix] family name, given name [middle name] [,suffix]
* </pre>
*/
private void splitWesternName(Name name, String fullName) {
NameTokenizer tokens = new NameTokenizer(fullName);
parsePrefix(name, tokens);
// If the name consists of just one or two tokens, treat them as first/last name,
// not as suffix. Example: John Ma; Ma is last name, not "M.A.".
if (tokens.mEndPointer > 2) {
parseSuffix(name, tokens);
}
if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
name.givenNames = tokens.mTokens[tokens.mStartPointer];
} else {
parseLastName(name, tokens);
parseMiddleName(name, tokens);
parseGivenNames(name, tokens);
}
}
/**
* Splits a full name composed according to the Chinese tradition:
* <pre>
* [family name [middle name]] given name
* </pre>
*/
private void splitChineseName(Name name, String fullName) {
StringTokenizer tokenizer = new StringTokenizer(fullName);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (name.givenNames == null) {
name.givenNames = token;
} else if (name.familyName == null) {
name.familyName = name.givenNames;
name.givenNames = token;
} else if (name.middleName == null) {
name.middleName = name.givenNames;
name.givenNames = token;
} else {
name.middleName = name.middleName + name.givenNames;
name.givenNames = token;
}
}
// If a single word parse that word up.
if (name.givenNames != null && name.familyName == null && name.middleName == null) {
int length = fullName.length();
if (length == 2) {
name.familyName = fullName.substring(0, 1);
name.givenNames = fullName.substring(1);
} else if (length == 3) {
name.familyName = fullName.substring(0, 1);
name.middleName = fullName.substring(1, 2);
name.givenNames = fullName.substring(2);
} else if (length == 4) {
name.familyName = fullName.substring(0, 2);
name.middleName = fullName.substring(2, 3);
name.givenNames = fullName.substring(3);
}
}
}
/**
* Splits a full name composed according to the Japanese tradition:
* <pre>
* [family name] given name(s)
* </pre>
*/
private void splitJapaneseName(Name name, String fullName) {
StringTokenizer tokenizer = new StringTokenizer(fullName);
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (name.givenNames == null) {
name.givenNames = token;
} else if (name.familyName == null) {
name.familyName = name.givenNames;
name.givenNames = token;
} else {
name.givenNames += " " + token;
}
}
}
/**
* Splits a full name composed according to the Korean tradition:
* <pre>
* [family name] given name(s)
* </pre>
*/
private void splitKoreanName(Name name, String fullName) {
StringTokenizer tokenizer = new StringTokenizer(fullName);
if (tokenizer.countTokens() > 1) {
// Each name can be identified by separators.
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
if (name.givenNames == null) {
name.givenNames = token;
} else if (name.familyName == null) {
name.familyName = name.givenNames;
name.givenNames = token;
} else {
name.givenNames += " " + token;
}
}
} else {
// There is no separator. Try to guess family name.
// The length of most family names is 1.
int familyNameLength = 1;
// Compare with 2-length family names.
for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) {
if (fullName.startsWith(twoLengthFamilyName)) {
familyNameLength = 2;
break;
}
}
name.familyName = fullName.substring(0, familyNameLength);
if (fullName.length() > familyNameLength) {
name.givenNames = fullName.substring(familyNameLength);
}
}
}
/**
* Concatenates components of a name according to the rules dictated by the name style.
*
* @param givenNameFirst is ignored for CJK display name styles
*/
public String join(Name name, boolean givenNameFirst, boolean includePrefix) {
String prefix = includePrefix ? name.prefix : null;
switch (name.fullNameStyle) {
case FullNameStyle.CJK:
case FullNameStyle.CHINESE:
case FullNameStyle.KOREAN:
return join(prefix, name.familyName, name.middleName, name.givenNames,
name.suffix, false, false, false);
case FullNameStyle.JAPANESE:
return join(prefix, name.familyName, name.middleName, name.givenNames,
name.suffix, true, false, false);
default:
if (givenNameFirst) {
return join(prefix, name.givenNames, name.middleName, name.familyName,
name.suffix, true, false, true);
} else {
return join(prefix, name.familyName, name.givenNames, name.middleName,
name.suffix, true, true, true);
}
}
}
/**
* Concatenates components of the phonetic name following the CJK tradition:
* family name + middle name + given name(s).
*/
public String joinPhoneticName(Name name) {
return join(null, name.phoneticFamilyName,
name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false);
}
/**
* Concatenates parts of a full name inserting spaces and commas as specified.
*/
private String join(String prefix, String part1, String part2, String part3, String suffix,
boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
prefix = prefix == null ? null: prefix.trim();
part1 = part1 == null ? null: part1.trim();
part2 = part2 == null ? null: part2.trim();
part3 = part3 == null ? null: part3.trim();
suffix = suffix == null ? null: suffix.trim();
boolean hasPrefix = !TextUtils.isEmpty(prefix);
boolean hasPart1 = !TextUtils.isEmpty(part1);
boolean hasPart2 = !TextUtils.isEmpty(part2);
boolean hasPart3 = !TextUtils.isEmpty(part3);
boolean hasSuffix = !TextUtils.isEmpty(suffix);
boolean isSingleWord = true;
String singleWord = null;
if (hasPrefix) {
singleWord = prefix;
}
if (hasPart1) {
if (singleWord != null) {
isSingleWord = false;
} else {
singleWord = part1;
}
}
if (hasPart2) {
if (singleWord != null) {
isSingleWord = false;
} else {
singleWord = part2;
}
}
if (hasPart3) {
if (singleWord != null) {
isSingleWord = false;
} else {
singleWord = part3;
}
}
if (hasSuffix) {
if (singleWord != null) {
isSingleWord = false;
} else {
singleWord = normalizedSuffix(suffix);
}
}
if (isSingleWord) {
return singleWord;
}
StringBuilder sb = new StringBuilder();
if (hasPrefix) {
sb.append(prefix);
}
if (hasPart1) {
if (hasPrefix) {
sb.append(' ');
}
sb.append(part1);
}
if (hasPart2) {
if (hasPrefix || hasPart1) {
if (useCommaAfterPart1) {
sb.append(',');
}
if (useSpace) {
sb.append(' ');
}
}
sb.append(part2);
}
if (hasPart3) {
if (hasPrefix || hasPart1 || hasPart2) {
if (useSpace) {
sb.append(' ');
}
}
sb.append(part3);
}
if (hasSuffix) {
if (hasPrefix || hasPart1 || hasPart2 || hasPart3) {
if (useCommaAfterPart3) {
sb.append(',');
}
if (useSpace) {
sb.append(' ');
}
}
sb.append(normalizedSuffix(suffix));
}
return sb.toString();
}
/**
* Puts a dot after the supplied suffix if that is the accepted form of the suffix,
* e.g. "Jr." and "Sr.", but not "I", "II" and "III".
*/
private String normalizedSuffix(String suffix) {
int length = suffix.length();
if (length == 0 || suffix.charAt(length - 1) == '.') {
return suffix;
}
String withDot = suffix + '.';
if (mSuffixesSet.contains(withDot.toUpperCase())) {
return withDot;
} else {
return suffix;
}
}
/**
* If the supplied name style is undefined, returns a default based on the language,
* otherwise returns the supplied name style itself.
*
* @param nameStyle See {@link FullNameStyle}.
*/
public int getAdjustedFullNameStyle(int nameStyle) {
if (nameStyle == FullNameStyle.UNDEFINED) {
if (JAPANESE_LANGUAGE.equals(mLanguage)) {
return FullNameStyle.JAPANESE;
} else if (KOREAN_LANGUAGE.equals(mLanguage)) {
return FullNameStyle.KOREAN;
} else if (CHINESE_LANGUAGE.equals(mLanguage)) {
return FullNameStyle.CHINESE;
} else {
return FullNameStyle.WESTERN;
}
} else if (nameStyle == FullNameStyle.CJK) {
if (JAPANESE_LANGUAGE.equals(mLanguage)) {
return FullNameStyle.JAPANESE;
} else if (KOREAN_LANGUAGE.equals(mLanguage)) {
return FullNameStyle.KOREAN;
} else {
return FullNameStyle.CHINESE;
}
}
return nameStyle;
}
/**
* Parses the first word from the name if it is a prefix.
*/
private void parsePrefix(Name name, NameTokenizer tokens) {
if (tokens.mStartPointer == tokens.mEndPointer) {
return;
}
String firstToken = tokens.mTokens[tokens.mStartPointer];
if (mPrefixesSet.contains(firstToken.toUpperCase())) {
if (tokens.hasDot(tokens.mStartPointer)) {
firstToken += '.';
}
name.prefix = firstToken;
tokens.mStartPointer++;
}
}
/**
* Parses the last word(s) from the name if it is a suffix.
*/
private void parseSuffix(Name name, NameTokenizer tokens) {
if (tokens.mStartPointer == tokens.mEndPointer) {
return;
}
String lastToken = tokens.mTokens[tokens.mEndPointer - 1];
// Take care of an explicit comma-separated suffix
if (tokens.mEndPointer - tokens.mStartPointer > 2
&& tokens.hasComma(tokens.mEndPointer - 2)) {
if (tokens.hasDot(tokens.mEndPointer - 1)) {
lastToken += '.';
}
name.suffix = lastToken;
tokens.mEndPointer--;
return;
}
if (lastToken.length() > mMaxSuffixLength) {
return;
}
String normalized = lastToken.toUpperCase();
if (mSuffixesSet.contains(normalized)) {
name.suffix = lastToken;
tokens.mEndPointer--;
return;
}
if (tokens.hasDot(tokens.mEndPointer - 1)) {
lastToken += '.';
}
normalized += ".";
// Take care of suffixes like M.D. and D.D.S.
int pos = tokens.mEndPointer - 1;
while (normalized.length() <= mMaxSuffixLength) {
if (mSuffixesSet.contains(normalized)) {
name.suffix = lastToken;
tokens.mEndPointer = pos;
return;
}
if (pos == tokens.mStartPointer) {
break;
}
pos--;
if (tokens.hasDot(pos)) {
lastToken = tokens.mTokens[pos] + "." + lastToken;
} else {
lastToken = tokens.mTokens[pos] + " " + lastToken;
}
normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
}
}
private void parseLastName(Name name, NameTokenizer tokens) {
if (tokens.mStartPointer == tokens.mEndPointer) {
return;
}
// If the first word is followed by a comma, assume that it's the family name
if (tokens.hasComma(tokens.mStartPointer)) {
name.familyName = tokens.mTokens[tokens.mStartPointer];
tokens.mStartPointer++;
return;
}
// If the second word is followed by a comma and the first word
// is a last name prefix as in "de Sade" and "von Cliburn", treat
// the first two words as the family name.
if (tokens.mStartPointer + 1 < tokens.mEndPointer
&& tokens.hasComma(tokens.mStartPointer + 1)
&& isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
if (tokens.hasDot(tokens.mStartPointer)) {
familyNamePrefix += '.';
}
name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
tokens.mStartPointer += 2;
return;
}
// Finally, assume that the last word is the last name
name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
tokens.mEndPointer--;
// Take care of last names like "de Sade" and "von Cliburn"
if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
if (isFamilyNamePrefix(lastNamePrefix)) {
if (tokens.hasDot(tokens.mEndPointer - 1)) {
lastNamePrefix += '.';
}
name.familyName = lastNamePrefix + " " + name.familyName;
tokens.mEndPointer--;
}
}
}
/**
* Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
*/
private boolean isFamilyNamePrefix(String word) {
final String normalized = word.toUpperCase();
return mLastNamePrefixesSet.contains(normalized)
|| mLastNamePrefixesSet.contains(normalized + ".");
}
private void parseMiddleName(Name name, NameTokenizer tokens) {
if (tokens.mStartPointer == tokens.mEndPointer) {
return;
}
if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
if ((tokens.mEndPointer - tokens.mStartPointer) == 2
|| !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
toUpperCase())) {
name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
if (tokens.hasDot(tokens.mEndPointer - 1)) {
name.middleName += '.';
}
tokens.mEndPointer--;
}
}
}
private void parseGivenNames(Name name, NameTokenizer tokens) {
if (tokens.mStartPointer == tokens.mEndPointer) {
return;
}
if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
name.givenNames = tokens.mTokens[tokens.mStartPointer];
} else {
StringBuilder sb = new StringBuilder();
for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
if (i != tokens.mStartPointer) {
sb.append(' ');
}
sb.append(tokens.mTokens[i]);
if (tokens.hasDot(i)) {
sb.append('.');
}
}
name.givenNames = sb.toString();
}
}
/**
* Makes the best guess at the expected full name style based on the character set
* used in the supplied name. If the phonetic name is also supplied, tries to
* differentiate between Chinese, Japanese and Korean based on the alphabet used
* for the phonetic name.
*/
public void guessNameStyle(Name name) {
guessFullNameStyle(name);
guessPhoneticNameStyle(name);
name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
name.phoneticNameStyle);
}
/**
* Updates the display name style according to the phonetic name style if we
* were unsure about display name style based on the name components, but
* phonetic name makes it more definitive.
*/
public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
return FullNameStyle.JAPANESE;
} else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
return FullNameStyle.KOREAN;
}
if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
return FullNameStyle.CHINESE;
}
}
}
return nameStyle;
}
/**
* Makes the best guess at the expected full name style based on the character set
* used in the supplied name.
*/
private void guessFullNameStyle(NameSplitter.Name name) {
if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
return;
}
int bestGuess = guessFullNameStyle(name.givenNames);
// A mix of Hanzi and latin chars are common in China, so we have to go through all names
// if the name is not JANPANESE or KOREAN.
if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
&& bestGuess != FullNameStyle.WESTERN) {
name.fullNameStyle = bestGuess;
return;
}
int guess = guessFullNameStyle(name.familyName);
if (guess != FullNameStyle.UNDEFINED) {
if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
name.fullNameStyle = guess;
return;
}
bestGuess = guess;
}
name.fullNameStyle = bestGuess;
}
public int guessFullNameStyle(String name) {
if (name == null) {
return FullNameStyle.UNDEFINED;
}
int nameStyle = FullNameStyle.UNDEFINED;
int length = name.length();
int offset = 0;
while (offset < length) {
int codePoint = Character.codePointAt(name, offset);
if (Character.isLetter(codePoint)) {
UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
if (!isLatinUnicodeBlock(unicodeBlock)) {
if (isCJKUnicodeBlock(unicodeBlock)) {
// We don't know if this is Chinese, Japanese or Korean -
// trying to figure out by looking at other characters in the name
return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
}
if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
return FullNameStyle.JAPANESE;
}
if (isKoreanUnicodeBlock(unicodeBlock)) {
return FullNameStyle.KOREAN;
}
}
nameStyle = FullNameStyle.WESTERN;
}
offset += Character.charCount(codePoint);
}
return nameStyle;
}
private int guessCJKNameStyle(String name, int offset) {
int length = name.length();
while (offset < length) {
int codePoint = Character.codePointAt(name, offset);
if (Character.isLetter(codePoint)) {
UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
return FullNameStyle.JAPANESE;
}
if (isKoreanUnicodeBlock(unicodeBlock)) {
return FullNameStyle.KOREAN;
}
}
offset += Character.charCount(codePoint);
}
return FullNameStyle.CJK;
}
private void guessPhoneticNameStyle(NameSplitter.Name name) {
if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
return;
}
int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
name.phoneticNameStyle = bestGuess;
return;
}
int guess = guessPhoneticNameStyle(name.phoneticGivenName);
if (guess != FullNameStyle.UNDEFINED) {
if (guess != FullNameStyle.CJK) {
name.phoneticNameStyle = guess;
return;
}
bestGuess = guess;
}
guess = guessPhoneticNameStyle(name.phoneticMiddleName);
if (guess != FullNameStyle.UNDEFINED) {
if (guess != FullNameStyle.CJK) {
name.phoneticNameStyle = guess;
return;
}
bestGuess = guess;
}
}
public int guessPhoneticNameStyle(String name) {
if (name == null) {
return PhoneticNameStyle.UNDEFINED;
}
int nameStyle = PhoneticNameStyle.UNDEFINED;
int length = name.length();
int offset = 0;
while (offset < length) {
int codePoint = Character.codePointAt(name, offset);
if (Character.isLetter(codePoint)) {
UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
return PhoneticNameStyle.JAPANESE;
}
if (isKoreanUnicodeBlock(unicodeBlock)) {
return PhoneticNameStyle.KOREAN;
}
if (isLatinUnicodeBlock(unicodeBlock)) {
return PhoneticNameStyle.PINYIN;
}
}
offset += Character.charCount(codePoint);
}
return nameStyle;
}
private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
}
private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
|| block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
|| block == UnicodeBlock.CJK_COMPATIBILITY
|| block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
|| block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
}
private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
}
private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
return unicodeBlock == UnicodeBlock.KATAKANA ||
unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
unicodeBlock == UnicodeBlock.HIRAGANA;
}
}