src/com/android/providers/contacts/NameSplitter.java - platform/packages/providers/ContactsProvider - Git at Google

 /*
  * Copyright (C) 2009 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License
  */
 package com.android.providers.contacts;

 import android.content.ContentValues;
 import android.provider.ContactsContract.CommonDataKinds.StructuredName;
 import android.provider.ContactsContract.FullNameStyle;
 import android.provider.ContactsContract.PhoneticNameStyle;
 import android.text.TextUtils;
 import android.util.ArraySet;

 import com.android.providers.contacts.util.NeededForTesting;

 import java.lang.Character.UnicodeBlock;
 import java.util.Locale;
 import java.util.StringTokenizer;

 /**
  * The purpose of this class is to split a full name into given names and last
  * name. The logic only supports having a single last name. If the full name has
  * multiple last names the output will be incorrect.
  * <p>
  * Core algorithm:
  * <ol>
  * <li>Remove the suffixes (III, Ph.D., M.D.).</li>
  * <li>Remove the prefixes (Mr., Pastor, Reverend, Sir).</li>
  * <li>Assign the last remaining token as the last name.</li>
  * <li>If the previous word to the last name is one from LASTNAME_PREFIXES, use
  * this word also as the last name.</li>
  * <li>Assign the rest of the words as the "given names".</li>
  * </ol>
  */
 public class NameSplitter {

     public static final int MAX_TOKENS = 10;

     private static final String JAPANESE_LANGUAGE = Locale.JAPANESE.getLanguage().toLowerCase();
     private static final String KOREAN_LANGUAGE = Locale.KOREAN.getLanguage().toLowerCase();

     // This includes simplified and traditional Chinese
     private static final String CHINESE_LANGUAGE = Locale.CHINESE.getLanguage().toLowerCase();

     private final ArraySet<String> mPrefixesSet;
     private final ArraySet<String> mSuffixesSet;
     private final int mMaxSuffixLength;
     private final ArraySet<String> mLastNamePrefixesSet;
     private final ArraySet<String> mConjuctions;
     private final Locale mLocale;
     private final String mLanguage;

     /**
      * Two-Chracter long Korean family names.
      * http://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EB%B3%B5%EC%84%B1
      */
     private static final String[] KOREAN_TWO_CHARCTER_FAMILY_NAMES = {
         "\uAC15\uC804", // Gang Jeon
         "\uB0A8\uAD81", // Nam Goong
         "\uB3C5\uACE0", // Dok Go
         "\uB3D9\uBC29", // Dong Bang
         "\uB9DD\uC808", // Mang Jeol
         "\uC0AC\uACF5", // Sa Gong
         "\uC11C\uBB38", // Seo Moon
         "\uC120\uC6B0", // Seon Woo
         "\uC18C\uBD09", // So Bong
         "\uC5B4\uAE08", // Uh Geum
         "\uC7A5\uACE1", // Jang Gok
         "\uC81C\uAC08", // Je Gal
         "\uD669\uBCF4"  // Hwang Bo
     };

     public static class Name {
         public String prefix;
         public String givenNames;
         public String middleName;
         public String familyName;
         public String suffix;

         public int fullNameStyle;

         public String phoneticFamilyName;
         public String phoneticMiddleName;
         public String phoneticGivenName;

         public int phoneticNameStyle;

         public Name() {
         }

         public Name(String prefix, String givenNames, String middleName, String familyName,
                 String suffix) {
             this.prefix = prefix;
             this.givenNames = givenNames;
             this.middleName = middleName;
             this.familyName = familyName;
             this.suffix = suffix;
         }

         @NeededForTesting
         public String getPrefix() {
             return prefix;
         }

         public String getGivenNames() {
             return givenNames;
         }

         public String getMiddleName() {
             return middleName;
         }

         public String getFamilyName() {
             return familyName;
         }

         @NeededForTesting
         public String getSuffix() {
             return suffix;
         }

         public int getFullNameStyle() {
             return fullNameStyle;
         }

         public String getPhoneticFamilyName() {
             return phoneticFamilyName;
         }

         public String getPhoneticMiddleName() {
             return phoneticMiddleName;
         }

         public String getPhoneticGivenName() {
             return phoneticGivenName;
         }

         public int getPhoneticNameStyle() {
             return phoneticNameStyle;
         }

         public void fromValues(ContentValues values) {
             prefix = values.getAsString(StructuredName.PREFIX);
             givenNames = values.getAsString(StructuredName.GIVEN_NAME);
             middleName = values.getAsString(StructuredName.MIDDLE_NAME);
             familyName = values.getAsString(StructuredName.FAMILY_NAME);
             suffix = values.getAsString(StructuredName.SUFFIX);

             Integer integer = values.getAsInteger(StructuredName.FULL_NAME_STYLE);
             fullNameStyle = integer == null ? FullNameStyle.UNDEFINED : integer;

             phoneticFamilyName = values.getAsString(StructuredName.PHONETIC_FAMILY_NAME);
             phoneticMiddleName = values.getAsString(StructuredName.PHONETIC_MIDDLE_NAME);
             phoneticGivenName = values.getAsString(StructuredName.PHONETIC_GIVEN_NAME);

             integer = values.getAsInteger(StructuredName.PHONETIC_NAME_STYLE);
             phoneticNameStyle = integer == null ? PhoneticNameStyle.UNDEFINED : integer;
         }

         public void toValues(ContentValues values) {
             putValueIfPresent(values, StructuredName.PREFIX, prefix);
             putValueIfPresent(values, StructuredName.GIVEN_NAME, givenNames);
             putValueIfPresent(values, StructuredName.MIDDLE_NAME, middleName);
             putValueIfPresent(values, StructuredName.FAMILY_NAME, familyName);
             putValueIfPresent(values, StructuredName.SUFFIX, suffix);
             values.put(StructuredName.FULL_NAME_STYLE, fullNameStyle);
             putValueIfPresent(values, StructuredName.PHONETIC_FAMILY_NAME, phoneticFamilyName);
             putValueIfPresent(values, StructuredName.PHONETIC_MIDDLE_NAME, phoneticMiddleName);
             putValueIfPresent(values, StructuredName.PHONETIC_GIVEN_NAME, phoneticGivenName);
             values.put(StructuredName.PHONETIC_NAME_STYLE, phoneticNameStyle);
         }

         private void putValueIfPresent(ContentValues values, String name, String value) {
             if (value != null) {
                 values.put(name, value);
             }
         }

         public void clear() {
             prefix = null;
             givenNames = null;
             middleName = null;
             familyName = null;
             suffix = null;
             fullNameStyle = FullNameStyle.UNDEFINED;
             phoneticFamilyName = null;
             phoneticMiddleName = null;
             phoneticGivenName = null;
             phoneticNameStyle = PhoneticNameStyle.UNDEFINED;
         }

         public boolean isEmpty() {
             return TextUtils.isEmpty(givenNames)
                     && TextUtils.isEmpty(middleName)
                     && TextUtils.isEmpty(familyName)
                     && TextUtils.isEmpty(suffix)
                     && TextUtils.isEmpty(phoneticFamilyName)
                     && TextUtils.isEmpty(phoneticMiddleName)
                     && TextUtils.isEmpty(phoneticGivenName);
         }

         @Override
         public String toString() {
             return "[prefix: " + prefix + " given: " + givenNames + " middle: " + middleName
                     + " family: " + familyName + " suffix: " + suffix + " ph/given: "
                     + phoneticGivenName + " ph/middle: " + phoneticMiddleName + " ph/family: "
                     + phoneticFamilyName + "]";
         }
     }

     private static class NameTokenizer extends StringTokenizer {
         private final String[] mTokens;
         private int mDotBitmask;
         private int mCommaBitmask;
         private int mStartPointer;
         private int mEndPointer;

         public NameTokenizer(String fullName) {
             super(fullName, " .,", true);

             mTokens = new String[MAX_TOKENS];

             // Iterate over tokens, skipping over empty ones and marking tokens that
             // are followed by dots.
             while (hasMoreTokens() && mEndPointer < MAX_TOKENS) {
                 final String token = nextToken();
                 if (token.length() > 0) {
                     final char c = token.charAt(0);
                     if (c == ' ') {
                         continue;
                     }
                 }

                 if (mEndPointer > 0 && token.charAt(0) == '.') {
                     mDotBitmask |= (1 << (mEndPointer - 1));
                 } else if (mEndPointer > 0 && token.charAt(0) == ',') {
                     mCommaBitmask |= (1 << (mEndPointer - 1));
                 } else {
                     mTokens[mEndPointer] = token;
                     mEndPointer++;
                 }
             }
         }

         /**
          * Returns true if the token is followed by a dot in the original full name.
          */
         public boolean hasDot(int index) {
             return (mDotBitmask & (1 << index)) != 0;
         }

         /**
          * Returns true if the token is followed by a comma in the original full name.
          */
         public boolean hasComma(int index) {
             return (mCommaBitmask & (1 << index)) != 0;
         }
     }

     /**
      * Constructor.
      *
      * @param commonPrefixes comma-separated list of common prefixes,
      *            e.g. "Mr, Ms, Mrs"
      * @param commonLastNamePrefixes comma-separated list of common last name prefixes,
      *            e.g. "d', st, st., von"
      * @param commonSuffixes comma-separated list of common suffixes,
      *            e.g. "Jr, M.D., MD, D.D.S."
      * @param commonConjunctions comma-separated list of common conjuctions,
      *            e.g. "AND, Or"
      */
     public NameSplitter(String commonPrefixes, String commonLastNamePrefixes,
             String commonSuffixes, String commonConjunctions, Locale locale) {
         // TODO: refactor this to use <string-array> resources
         mPrefixesSet = convertToSet(commonPrefixes);
         mLastNamePrefixesSet = convertToSet(commonLastNamePrefixes);
         mSuffixesSet = convertToSet(commonSuffixes);
         mConjuctions = convertToSet(commonConjunctions);
         mLocale = locale != null ? locale : Locale.getDefault();
         mLanguage = mLocale.getLanguage().toLowerCase();

         int maxLength = 0;
         for (String suffix : mSuffixesSet) {
             if (suffix.length() > maxLength) {
                 maxLength = suffix.length();
             }
         }

         mMaxSuffixLength = maxLength;
     }

     /**
      * Converts a comma-separated list of Strings to a set of Strings. Trims strings
      * and converts them to upper case.
      */
     private static ArraySet<String> convertToSet(String strings) {
         ArraySet<String> set = new ArraySet<>();
         if (strings != null) {
             String[] split = strings.split(",");
             for (int i = 0; i < split.length; i++) {
                 set.add(split[i].trim().toUpperCase());
             }
         }
         return set;
     }

     /**
      * Parses a full name and returns components as a list of tokens.
      */
     public int tokenize(String[] tokens, String fullName) {
         if (fullName == null) {
             return 0;
         }

         NameTokenizer tokenizer = new NameTokenizer(fullName);

         if (tokenizer.mStartPointer == tokenizer.mEndPointer) {
             return 0;
         }

         String firstToken = tokenizer.mTokens[tokenizer.mStartPointer];
         int count = 0;
         for (int i = tokenizer.mStartPointer; i < tokenizer.mEndPointer; i++) {
             tokens[count++] = tokenizer.mTokens[i];
         }

         return count;
     }


     /**
      * Parses a full name and returns parsed components in the Name object.
      */
     public void split(Name name, String fullName) {
         if (fullName == null) {
             return;
         }

         int fullNameStyle = guessFullNameStyle(fullName);
         if (fullNameStyle == FullNameStyle.CJK) {
             fullNameStyle = getAdjustedFullNameStyle(fullNameStyle);
         }

         split(name, fullName, fullNameStyle);
     }

     /**
      * Parses a full name and returns parsed components in the Name object
      * with a given fullNameStyle.
      */
     public void split(Name name, String fullName, int fullNameStyle) {
         if (fullName == null) {
             return;
         }

         name.fullNameStyle = fullNameStyle;

         switch (fullNameStyle) {
             case FullNameStyle.CHINESE:
                 splitChineseName(name, fullName);
                 break;

             case FullNameStyle.JAPANESE:
                 splitJapaneseName(name, fullName);
                 break;

             case FullNameStyle.KOREAN:
                 splitKoreanName(name, fullName);
                 break;

             default:
                 splitWesternName(name, fullName);
         }
     }

     /**
      * Splits a full name composed according to the Western tradition:
      * <pre>
      *   [prefix] given name(s) [[middle name] family name] [, suffix]
      *   [prefix] family name, given name [middle name] [,suffix]
      * </pre>
      */
     private void splitWesternName(Name name, String fullName) {
         NameTokenizer tokens = new NameTokenizer(fullName);
         parsePrefix(name, tokens);

         // If the name consists of just one or two tokens, treat them as first/last name,
         // not as suffix.  Example: John Ma; Ma is last name, not "M.A.".
         if (tokens.mEndPointer > 2) {
             parseSuffix(name, tokens);
         }

         if (name.prefix == null && tokens.mEndPointer - tokens.mStartPointer == 1) {
             name.givenNames = tokens.mTokens[tokens.mStartPointer];
         } else {
             parseLastName(name, tokens);
             parseMiddleName(name, tokens);
             parseGivenNames(name, tokens);
         }
     }

     /**
      * Splits a full name composed according to the Chinese tradition:
      * <pre>
      *   [family name [middle name]] given name
      * </pre>
      */
     private void splitChineseName(Name name, String fullName) {
         StringTokenizer tokenizer = new StringTokenizer(fullName);
         while (tokenizer.hasMoreTokens()) {
             String token = tokenizer.nextToken();
             if (name.givenNames == null) {
                 name.givenNames = token;
             } else if (name.familyName == null) {
                 name.familyName = name.givenNames;
                 name.givenNames = token;
             } else if (name.middleName == null) {
                 name.middleName = name.givenNames;
                 name.givenNames = token;
             } else {
                 name.middleName = name.middleName + name.givenNames;
                 name.givenNames = token;
             }
         }

         // If a single word parse that word up.
         if (name.givenNames != null && name.familyName == null && name.middleName == null) {
             int length = fullName.length();
             if (length == 2) {
                 name.familyName = fullName.substring(0, 1);
                 name.givenNames = fullName.substring(1);
             } else if (length == 3) {
                 name.familyName = fullName.substring(0, 1);
                 name.middleName = fullName.substring(1, 2);
                 name.givenNames = fullName.substring(2);
             } else if (length == 4) {
                 name.familyName = fullName.substring(0, 2);
                 name.middleName = fullName.substring(2, 3);
                 name.givenNames = fullName.substring(3);
             }

         }
     }

     /**
      * Splits a full name composed according to the Japanese tradition:
      * <pre>
      *   [family name] given name(s)
      * </pre>
      */
     private void splitJapaneseName(Name name, String fullName) {
         StringTokenizer tokenizer = new StringTokenizer(fullName);
         while (tokenizer.hasMoreTokens()) {
             String token = tokenizer.nextToken();
             if (name.givenNames == null) {
                 name.givenNames = token;
             } else if (name.familyName == null) {
                 name.familyName = name.givenNames;
                 name.givenNames = token;
             } else {
                 name.givenNames += " " + token;
             }
         }
     }

     /**
      * Splits a full name composed according to the Korean tradition:
      * <pre>
      *   [family name] given name(s)
      * </pre>
      */
     private void splitKoreanName(Name name, String fullName) {
         StringTokenizer tokenizer = new StringTokenizer(fullName);
         if (tokenizer.countTokens() > 1) {
             // Each name can be identified by separators.
             while (tokenizer.hasMoreTokens()) {
                 String token = tokenizer.nextToken();
                 if (name.givenNames == null) {
                     name.givenNames = token;
                 } else if (name.familyName == null) {
                     name.familyName = name.givenNames;
                     name.givenNames = token;
                 } else {
                     name.givenNames += " " + token;
                 }
             }
         } else {
             // There is no separator. Try to guess family name.
             // The length of most family names is 1.
             int familyNameLength = 1;

             // Compare with 2-length family names.
             for (String twoLengthFamilyName : KOREAN_TWO_CHARCTER_FAMILY_NAMES) {
                 if (fullName.startsWith(twoLengthFamilyName)) {
                     familyNameLength = 2;
                     break;
                 }
             }

             name.familyName = fullName.substring(0, familyNameLength);
             if (fullName.length() > familyNameLength) {
                 name.givenNames = fullName.substring(familyNameLength);
             }
         }
     }

     /**
      * Concatenates components of a name according to the rules dictated by the name style.
      *
      * @param givenNameFirst is ignored for CJK display name styles
      */
     public String join(Name name, boolean givenNameFirst, boolean includePrefix) {
         String prefix = includePrefix ? name.prefix : null;
         switch (name.fullNameStyle) {
             case FullNameStyle.CJK:
             case FullNameStyle.CHINESE:
             case FullNameStyle.KOREAN:
                 return join(prefix, name.familyName, name.middleName, name.givenNames,
                         name.suffix, false, false, false);

             case FullNameStyle.JAPANESE:
                 return join(prefix, name.familyName, name.middleName, name.givenNames,
                         name.suffix, true, false, false);

             default:
                 if (givenNameFirst) {
                     return join(prefix, name.givenNames, name.middleName, name.familyName,
                             name.suffix, true, false, true);
                 } else {
                     return join(prefix, name.familyName, name.givenNames, name.middleName,
                             name.suffix, true, true, true);
                 }
         }
     }

     /**
      * Concatenates components of the phonetic name following the CJK tradition:
      * family name + middle name + given name(s).
      */
     public String joinPhoneticName(Name name) {
         return join(null, name.phoneticFamilyName,
                 name.phoneticMiddleName, name.phoneticGivenName, null, true, false, false);
     }

     /**
      * Concatenates parts of a full name inserting spaces and commas as specified.
      */
     private String join(String prefix, String part1, String part2, String part3, String suffix,
             boolean useSpace, boolean useCommaAfterPart1, boolean useCommaAfterPart3) {
         prefix = prefix == null ? null: prefix.trim();
         part1 = part1 == null ? null: part1.trim();
         part2 = part2 == null ? null: part2.trim();
         part3 = part3 == null ? null: part3.trim();
         suffix = suffix == null ? null: suffix.trim();

         boolean hasPrefix = !TextUtils.isEmpty(prefix);
         boolean hasPart1 = !TextUtils.isEmpty(part1);
         boolean hasPart2 = !TextUtils.isEmpty(part2);
         boolean hasPart3 = !TextUtils.isEmpty(part3);
         boolean hasSuffix = !TextUtils.isEmpty(suffix);

         boolean isSingleWord = true;
         String singleWord = null;

         if (hasPrefix) {
             singleWord = prefix;
         }

         if (hasPart1) {
             if (singleWord != null) {
                 isSingleWord = false;
             } else {
                 singleWord = part1;
             }
         }

         if (hasPart2) {
             if (singleWord != null) {
                 isSingleWord = false;
             } else {
                 singleWord = part2;
             }
         }

         if (hasPart3) {
             if (singleWord != null) {
                 isSingleWord = false;
             } else {
                 singleWord = part3;
             }
         }

         if (hasSuffix) {
             if (singleWord != null) {
                 isSingleWord = false;
             } else {
                 singleWord = normalizedSuffix(suffix);
             }
         }

         if (isSingleWord) {
             return singleWord;
         }

         StringBuilder sb = new StringBuilder();

         if (hasPrefix) {
             sb.append(prefix);
         }

         if (hasPart1) {
             if (hasPrefix) {
                 sb.append(' ');
             }
             sb.append(part1);
         }

         if (hasPart2) {
             if (hasPrefix || hasPart1) {
                 if (useCommaAfterPart1) {
                     sb.append(',');
                 }
                 if (useSpace) {
                     sb.append(' ');
                 }
             }
             sb.append(part2);
         }

         if (hasPart3) {
             if (hasPrefix || hasPart1 || hasPart2) {
                 if (useSpace) {
                     sb.append(' ');
                 }
             }
             sb.append(part3);
         }

         if (hasSuffix) {
             if (hasPrefix || hasPart1 || hasPart2 || hasPart3) {
                 if (useCommaAfterPart3) {
                     sb.append(',');
                 }
                 if (useSpace) {
                     sb.append(' ');
                 }
             }
             sb.append(normalizedSuffix(suffix));
         }

         return sb.toString();
     }

     /**
      * Puts a dot after the supplied suffix if that is the accepted form of the suffix,
      * e.g. "Jr." and "Sr.", but not "I", "II" and "III".
      */
     private String normalizedSuffix(String suffix) {
         int length = suffix.length();
         if (length == 0 || suffix.charAt(length - 1) == '.') {
             return suffix;
         }

         String withDot = suffix + '.';
         if (mSuffixesSet.contains(withDot.toUpperCase())) {
             return withDot;
         } else {
             return suffix;
         }
     }

     /**
      * If the supplied name style is undefined, returns a default based on the language,
      * otherwise returns the supplied name style itself.
      *
      * @param nameStyle See {@link FullNameStyle}.
      */
     public int getAdjustedFullNameStyle(int nameStyle) {
         if (nameStyle == FullNameStyle.UNDEFINED) {
             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
                 return FullNameStyle.JAPANESE;
             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
                 return FullNameStyle.KOREAN;
             } else if (CHINESE_LANGUAGE.equals(mLanguage)) {
                 return FullNameStyle.CHINESE;
             } else {
                 return FullNameStyle.WESTERN;
             }
         } else if (nameStyle == FullNameStyle.CJK) {
             if (JAPANESE_LANGUAGE.equals(mLanguage)) {
                 return FullNameStyle.JAPANESE;
             } else if (KOREAN_LANGUAGE.equals(mLanguage)) {
                 return FullNameStyle.KOREAN;
             } else {
                 return FullNameStyle.CHINESE;
             }
         }
         return nameStyle;
     }

     /**
      * Parses the first word from the name if it is a prefix.
      */
     private void parsePrefix(Name name, NameTokenizer tokens) {
         if (tokens.mStartPointer == tokens.mEndPointer) {
             return;
         }

         String firstToken = tokens.mTokens[tokens.mStartPointer];
         if (mPrefixesSet.contains(firstToken.toUpperCase())) {
             if (tokens.hasDot(tokens.mStartPointer)) {
                 firstToken += '.';
             }
             name.prefix = firstToken;
             tokens.mStartPointer++;
         }
     }

     /**
      * Parses the last word(s) from the name if it is a suffix.
      */
     private void parseSuffix(Name name, NameTokenizer tokens) {
         if (tokens.mStartPointer == tokens.mEndPointer) {
             return;
         }

         String lastToken = tokens.mTokens[tokens.mEndPointer - 1];

         // Take care of an explicit comma-separated suffix
         if (tokens.mEndPointer - tokens.mStartPointer > 2
                 && tokens.hasComma(tokens.mEndPointer - 2)) {
             if (tokens.hasDot(tokens.mEndPointer - 1)) {
                 lastToken += '.';
             }
             name.suffix = lastToken;
             tokens.mEndPointer--;
             return;
         }

         if (lastToken.length() > mMaxSuffixLength) {
             return;
         }

         String normalized = lastToken.toUpperCase();
         if (mSuffixesSet.contains(normalized)) {
             name.suffix = lastToken;
             tokens.mEndPointer--;
             return;
         }

         if (tokens.hasDot(tokens.mEndPointer - 1)) {
             lastToken += '.';
         }
         normalized += ".";

         // Take care of suffixes like M.D. and D.D.S.
         int pos = tokens.mEndPointer - 1;
         while (normalized.length() <= mMaxSuffixLength) {

             if (mSuffixesSet.contains(normalized)) {
                 name.suffix = lastToken;
                 tokens.mEndPointer = pos;
                 return;
             }

             if (pos == tokens.mStartPointer) {
                 break;
             }

             pos--;
             if (tokens.hasDot(pos)) {
                 lastToken = tokens.mTokens[pos] + "." + lastToken;
             } else {
                 lastToken = tokens.mTokens[pos] + " " + lastToken;
             }

             normalized = tokens.mTokens[pos].toUpperCase() + "." + normalized;
         }
     }

     private void parseLastName(Name name, NameTokenizer tokens) {
         if (tokens.mStartPointer == tokens.mEndPointer) {
             return;
         }

         // If the first word is followed by a comma, assume that it's the family name
         if (tokens.hasComma(tokens.mStartPointer)) {
            name.familyName = tokens.mTokens[tokens.mStartPointer];
            tokens.mStartPointer++;
            return;
         }

         // If the second word is followed by a comma and the first word
         // is a last name prefix as in "de Sade" and "von Cliburn", treat
         // the first two words as the family name.
         if (tokens.mStartPointer + 1 < tokens.mEndPointer
                 && tokens.hasComma(tokens.mStartPointer + 1)
                 && isFamilyNamePrefix(tokens.mTokens[tokens.mStartPointer])) {
             String familyNamePrefix = tokens.mTokens[tokens.mStartPointer];
             if (tokens.hasDot(tokens.mStartPointer)) {
                 familyNamePrefix += '.';
             }
             name.familyName = familyNamePrefix + " " + tokens.mTokens[tokens.mStartPointer + 1];
             tokens.mStartPointer += 2;
             return;
         }

         // Finally, assume that the last word is the last name
         name.familyName = tokens.mTokens[tokens.mEndPointer - 1];
         tokens.mEndPointer--;

         // Take care of last names like "de Sade" and "von Cliburn"
         if ((tokens.mEndPointer - tokens.mStartPointer) > 0) {
             String lastNamePrefix = tokens.mTokens[tokens.mEndPointer - 1];
             if (isFamilyNamePrefix(lastNamePrefix)) {
                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
                     lastNamePrefix += '.';
                 }
                 name.familyName = lastNamePrefix + " " + name.familyName;
                 tokens.mEndPointer--;
             }
         }
     }

     /**
      * Returns true if the supplied word is an accepted last name prefix, e.g. "von", "de"
      */
     private boolean isFamilyNamePrefix(String word) {
         final String normalized = word.toUpperCase();

         return mLastNamePrefixesSet.contains(normalized)
                 || mLastNamePrefixesSet.contains(normalized + ".");
     }


     private void parseMiddleName(Name name, NameTokenizer tokens) {
         if (tokens.mStartPointer == tokens.mEndPointer) {
             return;
         }

         if ((tokens.mEndPointer - tokens.mStartPointer) > 1) {
             if ((tokens.mEndPointer - tokens.mStartPointer) == 2
                     || !mConjuctions.contains(tokens.mTokens[tokens.mEndPointer - 2].
                             toUpperCase())) {
                 name.middleName = tokens.mTokens[tokens.mEndPointer - 1];
                 if (tokens.hasDot(tokens.mEndPointer - 1)) {
                     name.middleName += '.';
                 }
                 tokens.mEndPointer--;
             }
         }
     }

     private void parseGivenNames(Name name, NameTokenizer tokens) {
         if (tokens.mStartPointer == tokens.mEndPointer) {
             return;
         }

         if ((tokens.mEndPointer - tokens.mStartPointer) == 1) {
             name.givenNames = tokens.mTokens[tokens.mStartPointer];
         } else {
             StringBuilder sb = new StringBuilder();
             for (int i = tokens.mStartPointer; i < tokens.mEndPointer; i++) {
                 if (i != tokens.mStartPointer) {
                     sb.append(' ');
                 }
                 sb.append(tokens.mTokens[i]);
                 if (tokens.hasDot(i)) {
                     sb.append('.');
                 }
             }
             name.givenNames = sb.toString();
         }
     }

     /**
      * Makes the best guess at the expected full name style based on the character set
      * used in the supplied name.  If the phonetic name is also supplied, tries to
      * differentiate between Chinese, Japanese and Korean based on the alphabet used
      * for the phonetic name.
      */
     public void guessNameStyle(Name name) {
         guessFullNameStyle(name);
         guessPhoneticNameStyle(name);
         name.fullNameStyle = getAdjustedNameStyleBasedOnPhoneticNameStyle(name.fullNameStyle,
                 name.phoneticNameStyle);
     }

     /**
      * Updates the display name style according to the phonetic name style if we
      * were unsure about display name style based on the name components, but
      * phonetic name makes it more definitive.
      */
     public int getAdjustedNameStyleBasedOnPhoneticNameStyle(int nameStyle, int phoneticNameStyle) {
         if (phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
             if (nameStyle == FullNameStyle.UNDEFINED || nameStyle == FullNameStyle.CJK) {
                 if (phoneticNameStyle == PhoneticNameStyle.JAPANESE) {
                     return FullNameStyle.JAPANESE;
                 } else if (phoneticNameStyle == PhoneticNameStyle.KOREAN) {
                     return FullNameStyle.KOREAN;
                 }
                 if (nameStyle == FullNameStyle.CJK && phoneticNameStyle == PhoneticNameStyle.PINYIN) {
                     return FullNameStyle.CHINESE;
                 }
             }
         }
         return nameStyle;
     }

     /**
      * Makes the best guess at the expected full name style based on the character set
      * used in the supplied name.
      */
     private void guessFullNameStyle(NameSplitter.Name name) {
         if (name.fullNameStyle != FullNameStyle.UNDEFINED) {
             return;
         }

         int bestGuess = guessFullNameStyle(name.givenNames);
         // A mix of Hanzi and latin chars are common in China, so we have to go through all names
         // if the name is not JANPANESE or KOREAN.
         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK
                 && bestGuess != FullNameStyle.WESTERN) {
             name.fullNameStyle = bestGuess;
             return;
         }

         int guess = guessFullNameStyle(name.familyName);
         if (guess != FullNameStyle.UNDEFINED) {
             if (guess != FullNameStyle.CJK && guess != FullNameStyle.WESTERN) {
                 name.fullNameStyle = guess;
                 return;
             }
             bestGuess = guess;
         }

         name.fullNameStyle = bestGuess;
     }

     public int guessFullNameStyle(String name) {
         if (name == null) {
             return FullNameStyle.UNDEFINED;
         }

         int nameStyle = FullNameStyle.UNDEFINED;
         int length = name.length();
         int offset = 0;
         while (offset < length) {
             int codePoint = Character.codePointAt(name, offset);
             if (Character.isLetter(codePoint)) {
                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);

                 if (!isLatinUnicodeBlock(unicodeBlock)) {

                     if (isCJKUnicodeBlock(unicodeBlock)) {
                         // We don't know if this is Chinese, Japanese or Korean -
                         // trying to figure out by looking at other characters in the name
                         return guessCJKNameStyle(name, offset + Character.charCount(codePoint));
                     }

                     if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
                         return FullNameStyle.JAPANESE;
                     }

                     if (isKoreanUnicodeBlock(unicodeBlock)) {
                         return FullNameStyle.KOREAN;
                     }
                 }
                 nameStyle = FullNameStyle.WESTERN;
             }
             offset += Character.charCount(codePoint);
         }
         return nameStyle;
     }

     private int guessCJKNameStyle(String name, int offset) {
         int length = name.length();
         while (offset < length) {
             int codePoint = Character.codePointAt(name, offset);
             if (Character.isLetter(codePoint)) {
                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
                     return FullNameStyle.JAPANESE;
                 }
                 if (isKoreanUnicodeBlock(unicodeBlock)) {
                     return FullNameStyle.KOREAN;
                 }
             }
             offset += Character.charCount(codePoint);
         }

         return FullNameStyle.CJK;
     }

     private void guessPhoneticNameStyle(NameSplitter.Name name) {
         if (name.phoneticNameStyle != PhoneticNameStyle.UNDEFINED) {
             return;
         }

         int bestGuess = guessPhoneticNameStyle(name.phoneticFamilyName);
         if (bestGuess != FullNameStyle.UNDEFINED && bestGuess != FullNameStyle.CJK) {
             name.phoneticNameStyle = bestGuess;
             return;
         }

         int guess = guessPhoneticNameStyle(name.phoneticGivenName);
         if (guess != FullNameStyle.UNDEFINED) {
             if (guess != FullNameStyle.CJK) {
                 name.phoneticNameStyle = guess;
                 return;
             }
             bestGuess = guess;
         }

         guess = guessPhoneticNameStyle(name.phoneticMiddleName);
         if (guess != FullNameStyle.UNDEFINED) {
             if (guess != FullNameStyle.CJK) {
                 name.phoneticNameStyle = guess;
                 return;
             }
             bestGuess = guess;
         }
     }

     public int guessPhoneticNameStyle(String name) {
         if (name == null) {
             return PhoneticNameStyle.UNDEFINED;
         }

         int nameStyle = PhoneticNameStyle.UNDEFINED;
         int length = name.length();
         int offset = 0;
         while (offset < length) {
             int codePoint = Character.codePointAt(name, offset);
             if (Character.isLetter(codePoint)) {
                 UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
                 if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
                     return PhoneticNameStyle.JAPANESE;
                 }
                 if (isKoreanUnicodeBlock(unicodeBlock)) {
                     return PhoneticNameStyle.KOREAN;
                 }
                 if (isLatinUnicodeBlock(unicodeBlock)) {
                     return PhoneticNameStyle.PINYIN;
                 }
             }
             offset += Character.charCount(codePoint);
         }

         return nameStyle;
     }

     private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
         return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
                 unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
                 unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
     }

     private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
         return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                 || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
                 || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                 || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
                 || block == UnicodeBlock.CJK_COMPATIBILITY
                 || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                 || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
     }

     private static boolean isKoreanUnicodeBlock(UnicodeBlock unicodeBlock) {
         return unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES ||
                 unicodeBlock == UnicodeBlock.HANGUL_JAMO ||
                 unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO;
     }

     private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
         return unicodeBlock == UnicodeBlock.KATAKANA ||
                 unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
                 unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
                 unicodeBlock == UnicodeBlock.HIRAGANA;
     }
 }