| /* |
| ********************************************************************** |
| * Copyright (c) 2002-2011, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * Author: Mark Davis |
| ********************************************************************** |
| */ |
| package org.unicode.cldr.util; |
| |
| import java.util.ArrayList; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.EnumSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.NoSuchElementException; |
| import java.util.Set; |
| import java.util.StringTokenizer; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.regex.Pattern; |
| |
| import org.unicode.cldr.tool.LikelySubtags; |
| |
| import com.google.common.base.CharMatcher; |
| import com.google.common.base.Joiner; |
| import com.google.common.base.Splitter; |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.ImmutableMap; |
| import com.ibm.icu.dev.util.CollectionUtilities; |
| import com.ibm.icu.impl.Relation; |
| import com.ibm.icu.impl.Row.R2; |
| import com.ibm.icu.text.UnicodeSet; |
| |
| public class LanguageTagParser { |
| /** |
| * @return Returns the language, or "" if none. |
| */ |
| public String getLanguage() { |
| return language; |
| } |
| |
| /** |
| * @return Returns the script, or "" if none. |
| */ |
| public String getScript() { |
| return script; |
| } |
| |
| /** |
| * @return Returns the region, or "" if none. |
| */ |
| public String getRegion() { |
| return region; |
| } |
| |
| /** |
| * @return Returns the variants. |
| */ |
| public List<String> getVariants() { |
| return ImmutableList.copyOf(variants); |
| } |
| |
| /** |
| * @return Returns the grandfathered flag |
| */ |
| public boolean isGrandfathered() { |
| return grandfathered; |
| } |
| |
| /** |
| * @return Returns the extensions. |
| */ |
| @Deprecated |
| public Map<String, String> getExtensions() { |
| return OutputOption.ICU.convert(extensions); |
| } |
| |
| /** |
| * @return Returns the localeExtensions. |
| */ |
| @Deprecated |
| public Map<String, String> getLocaleExtensions() { |
| return OutputOption.ICU.convert(localeExtensions); |
| } |
| |
| /** |
| * @return Returns the extensions. |
| */ |
| public Map<String, List<String>> getExtensionsDetailed() { |
| return ImmutableMap.copyOf(extensions); |
| } |
| |
| /** |
| * @return Returns the localeExtensions. |
| */ |
| public Map<String, List<String>> getLocaleExtensionsDetailed() { |
| return ImmutableMap.copyOf(localeExtensions); |
| } |
| |
| /** |
| * @return Returns the original, preparsed language tag |
| */ |
| public String getOriginal() { |
| return original; |
| } |
| |
| /** |
| * @return Returns the language-script (or language) part of a tag. |
| */ |
| public String getLanguageScript() { |
| if (script.length() != 0) return language + "_" + script; |
| return language; |
| } |
| |
| /** |
| * @param in |
| * Collection of language tag strings |
| * @return Returns each of the language-script tags in the collection. |
| */ |
| public static Set<String> getLanguageScript(Collection<String> in) { |
| return getLanguageAndScript(in, null); |
| } |
| |
| /** |
| * @param in |
| * Collection of language tag strings |
| * @return Returns each of the language-script tags in the collection. |
| */ |
| public static Set<String> getLanguageAndScript(Collection<String> in, Set<String> output) { |
| if (output == null) output = new TreeSet<String>(); |
| LanguageTagParser lparser = new LanguageTagParser(); |
| for (Iterator<String> it = in.iterator(); it.hasNext();) { |
| output.add(lparser.set(it.next()).getLanguageScript()); |
| } |
| return output; |
| } |
| |
| // private fields |
| |
| private String original; |
| private boolean grandfathered = false; |
| private String language; |
| private String script; |
| private String region; |
| private Set<String> variants = new TreeSet<String>(); |
| private Map<String, List<String>> extensions = new TreeMap<String, List<String>>(); // use tree map |
| private Map<String, List<String>> localeExtensions = new TreeMap<String, List<String>>(); |
| |
| private static final UnicodeSet ALPHA = new UnicodeSet("[a-zA-Z]").freeze(); |
| private static final UnicodeSet DIGIT = new UnicodeSet("[0-9]").freeze(); |
| private static final UnicodeSet ALPHANUM = new UnicodeSet("[0-9a-zA-Z]").freeze(); |
| private static final UnicodeSet EXTENSION_VALUE = new UnicodeSet("[0-9a-zA-Z/_]").freeze(); |
| private static final UnicodeSet X = new UnicodeSet("[xX]").freeze(); |
| private static final UnicodeSet ALPHA_MINUS_X = new UnicodeSet(ALPHA).removeAll(X).freeze(); |
| private static StandardCodes standardCodes = StandardCodes.make(); |
| private static final Set<String> grandfatheredCodes = standardCodes.getAvailableCodes("grandfathered"); |
| private static final String separator = "-_"; // '-' alone for 3066bis language tags |
| private static final UnicodeSet SEPARATORS = new UnicodeSet().addAll(separator).freeze(); |
| private static final Splitter SPLIT_BAR = Splitter.on(CharMatcher.anyOf(separator)); |
| private static final Splitter SPLIT_COLON = Splitter.on(';'); |
| private static final Splitter SPLIT_EQUAL = Splitter.on('='); |
| private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance(); |
| private static final Relation<R2<String, String>, String> BCP47_ALIASES = SDI.getBcp47Aliases(); |
| |
| /** |
| * Parses out a language tag, setting a number of fields that can subsequently be retrieved. |
| * If a private-use field is found, it is returned as the last extension.<br> |
| * This only checks for well-formedness (syntax), not for validity (subtags in registry). For the latter, see |
| * isValid. |
| * |
| * @param languageTag |
| * @return |
| */ |
| public LanguageTagParser set(String languageTag) { |
| if (languageTag.length() == 0 || languageTag.equals("root")) { |
| // throw new IllegalArgumentException("Language tag cannot be empty"); |
| // |
| // With ICU 64 the language tag for root is normalized to empty string so we |
| // cannot throw for empty string as above. However, code here and in clients |
| // assumes a non-empty language tag, so for now just map "" or "root" to "und". |
| languageTag = "und"; |
| } else if (languageTag.startsWith("_") || languageTag.startsWith("-")) { |
| languageTag = "und" + languageTag; |
| } |
| languageTag = languageTag.toLowerCase(Locale.ROOT); |
| |
| // clear everything out |
| language = region = script = ""; |
| grandfathered = false; |
| variants.clear(); |
| extensions.clear(); |
| localeExtensions.clear(); |
| original = languageTag; |
| int localeExtensionsPosition = languageTag.indexOf('@'); // This does not work with BCP47 compliant IDs |
| if (localeExtensionsPosition >= 0) { |
| final String localeExtensionsString = languageTag.substring(localeExtensionsPosition + 1); |
| for (String keyValue : SPLIT_COLON.split(localeExtensionsString)) { |
| final Iterator<String> keyValuePair = SPLIT_EQUAL.split(keyValue).iterator(); |
| final String key = keyValuePair.next(); |
| final String value = keyValuePair.next(); |
| if (keyValuePair.hasNext() || !ALPHANUM.containsAll(key) || !EXTENSION_VALUE.containsAll(value)) { |
| throwError(keyValue, "Invalid key/value pair"); |
| } |
| localeExtensions.put(key, SPLIT_BAR.splitToList(value)); |
| } |
| languageTag = languageTag.substring(0, localeExtensionsPosition); |
| } |
| |
| // first test for grandfathered |
| if (grandfatheredCodes.contains(languageTag)) { |
| language = languageTag; |
| grandfathered = true; |
| return this; |
| } |
| |
| // each time we fetch a token, we check for length from 1..8, and all alphanum |
| StringTokenizer st = new StringTokenizer(languageTag, separator); |
| String subtag; |
| try { |
| subtag = getSubtag(st); |
| } catch (Exception e1) { |
| throw new IllegalArgumentException("Illegal language tag: " + languageTag, e1); |
| } |
| |
| // check for private use (x-...) and return if so |
| if (subtag.equalsIgnoreCase("x")) { |
| getExtension(subtag, st, 1); |
| return this; |
| } |
| |
| // check that language subtag is valid |
| if (!ALPHA.containsAll(subtag) || subtag.length() < 2) { |
| throwError(subtag, "Invalid language subtag"); |
| } |
| try { // The try block is to catch the out-of-tokens case. Easier than checking each time. |
| language = subtag; |
| subtag = getSubtag(st); // prepare for next |
| |
| // check for script, 4 letters |
| if (subtag.length() == 4 && ALPHA.containsAll(subtag)) { |
| script = subtag; |
| script = script.substring(0, 1).toUpperCase(Locale.ROOT) |
| + script.substring(1); |
| subtag = getSubtag(st); // prepare for next |
| } |
| |
| // check for region, 2 letters or 3 digits |
| if (subtag.length() == 2 && ALPHA.containsAll(subtag) |
| || subtag.length() == 3 && DIGIT.containsAll(subtag)) { |
| region = subtag.toUpperCase(Locale.ENGLISH); |
| subtag = getSubtag(st); // prepare for next |
| } |
| |
| // get variants: length > 4 or len=4 & starts with digit |
| while (isValidVariant(subtag)) { |
| variants.add(subtag); |
| subtag = getSubtag(st); // prepare for next |
| } |
| |
| // get extensions: singleton '-' subtag (2-8 long) |
| while (subtag.length() == 1 && ALPHA_MINUS_X.contains(subtag)) { |
| subtag = getExtension(subtag, st, 2); |
| if (subtag == null) return this; // done |
| } |
| |
| if (subtag.equalsIgnoreCase("x")) { |
| getExtension(subtag, st, 1); |
| return this; |
| } |
| |
| // if we make it to this point, then we have an error |
| throwError(subtag, "Illegal subtag"); |
| |
| } catch (NoSuchElementException e) { |
| // this exception just means we ran out of tokens. That's ok, so we just return. |
| } |
| return this; |
| } |
| |
| private boolean isValidVariant(String subtag) { |
| return subtag != null && ALPHANUM.containsAll(subtag) |
| && (subtag.length() > 4 || subtag.length() == 4 && DIGIT.contains(subtag.charAt(0))); |
| } |
| |
| /** |
| * |
| * @return true iff the language tag validates |
| */ |
| public boolean isValid() { |
| if (grandfathered) return true; // don't need further checking, since we already did so when parsing |
| if (!validates(language, "language")) return false; |
| if (!validates(script, "script")) return false; |
| if (!validates(region, "territory")) return false; |
| for (Iterator<String> it = variants.iterator(); it.hasNext();) { |
| if (!validates(it.next(), "variant")) return false; |
| } |
| return true; // passed the gauntlet |
| } |
| |
| public enum Status { |
| WELL_FORMED, VALID, CANONICAL, MINIMAL |
| } |
| |
| public Status getStatus(Set<String> errors) { |
| errors.clear(); |
| if (!isValid()) { |
| return Status.WELL_FORMED; |
| // TODO, check the bcp47 extension codes also |
| } |
| Map<String, Map<String, R2<List<String>, String>>> aliasInfo = SDI.getLocaleAliasInfo(); |
| Map<String, Map<String, String>> languageInfo = StandardCodes.getLStreg().get("language"); |
| |
| if (aliasInfo.get("language").containsKey(language)) { |
| errors.add("Non-canonical language: " + language); |
| } |
| Map<String, String> lstrInfo = languageInfo.get(language); |
| if (lstrInfo != null) { |
| String scope = lstrInfo.get("Scope"); |
| if ("collection".equals(scope)) { |
| errors.add("Collection language: " + language); |
| } |
| } |
| if (aliasInfo.get("script").containsKey(script)) { |
| errors.add("Non-canonical script: " + script); |
| } |
| if (aliasInfo.get("territory").containsKey(region)) { |
| errors.add("Non-canonical region: " + region); |
| } |
| if (!errors.isEmpty()) { |
| return Status.VALID; |
| } |
| String tag = language + (script.isEmpty() ? "" : "_" + script) + (region.isEmpty() ? "" : "_" + region); |
| String minimized = LikelySubtags.minimize(tag, SDI.getLikelySubtags(), false); |
| if (minimized == null) { |
| errors.add("No minimal data for:" + tag); |
| if (script.isEmpty() && region.isEmpty()) { |
| return Status.MINIMAL; |
| } else { |
| return Status.CANONICAL; |
| } |
| } |
| if (!tag.equals(minimized)) { |
| errors.add("Not minimal:" + tag + "-->" + minimized); |
| return Status.CANONICAL; |
| } |
| return Status.MINIMAL; |
| } |
| |
| /** |
| * @param subtag |
| * @param type |
| * @return true if the subtag is empty, or if it is in the registry |
| */ |
| private boolean validates(String subtag, String type) { |
| return subtag.length() == 0 || standardCodes.getAvailableCodes(type).contains(subtag); |
| } |
| |
| /** |
| * Internal method |
| * |
| * @param minLength |
| * TODO |
| */ |
| private String getExtension(String subtag, StringTokenizer st, int minLength) { |
| String base = subtag; |
| final char extension = subtag.charAt(0); |
| if (extensions.containsKey(subtag)) { |
| throwError(subtag, "Can't have two extensions with the same key"); |
| } |
| if (!st.hasMoreElements()) { |
| throwError(subtag, "Private Use / Extension requires subsequent subtag"); |
| } |
| boolean subkey = extension == 'u' || extension == 't'; |
| boolean firstT = extension == 't'; |
| boolean haveContents = false; |
| List<String> result = new ArrayList<>(); |
| try { |
| while (st.hasMoreElements()) { |
| subtag = getSubtag(st); |
| if (subtag.length() < minLength) { |
| return subtag; |
| } |
| if (subkey && subtag.length() == 2 && !firstT) { // start new key-value pair |
| if (!result.isEmpty() || base.length() != 1) { // don't add empty t- or u- |
| localeExtensions.put(base, ImmutableList.copyOf(result)); |
| haveContents = true; |
| result.clear(); |
| } |
| base = subtag; |
| continue; |
| } |
| firstT = false; |
| result.add(subtag); |
| } |
| return null; |
| } finally { |
| if (subkey) { |
| if (!result.isEmpty() || base.length() != 1) { // don't add empty t- or u- |
| localeExtensions.put(base, ImmutableList.copyOf(result)); |
| haveContents = true; |
| } |
| if (!haveContents) { |
| throw new IllegalArgumentException("extension must not be empty: " + base); |
| } |
| } else { |
| if (result.isEmpty()) { |
| throw new IllegalArgumentException("extension must not be empty: " + base); |
| } |
| extensions.put(base, ImmutableList.copyOf(result)); |
| } |
| } |
| } |
| |
| /** |
| * Internal method |
| */ |
| private String getSubtag(StringTokenizer st) { |
| String result = st.nextToken(); |
| if (result.length() < 1 || result.length() > 8) { |
| throwError(result, "Illegal length (must be 1..8)"); |
| } |
| if (!ALPHANUM.containsAll(result)) { |
| throwError(result, "Illegal characters (" + new UnicodeSet().addAll(result).removeAll(ALPHANUM) + ")"); |
| } |
| return result; |
| } |
| |
| /** |
| * Internal method |
| */ |
| private void throwError(String subtag, String errorText) { |
| throw new IllegalArgumentException(errorText + ": " + subtag + " in " + original); |
| } |
| |
| public LanguageTagParser setRegion(String region) { |
| this.region = region; |
| return this; |
| } |
| |
| public LanguageTagParser setScript(String script) { |
| this.script = script; |
| return this; |
| } |
| |
| public enum OutputOption { |
| ICU('_'), BCP47('-'); |
| final char separator; |
| final Joiner joiner; |
| |
| private OutputOption(char separator) { |
| this.separator = separator; |
| joiner = Joiner.on(separator); |
| } |
| |
| public Map<String, String> convert(Map<String, List<String>> mapToList) { |
| if (mapToList.isEmpty()) { |
| return Collections.emptyMap(); |
| } |
| ImmutableMap.Builder<String, String> builder = ImmutableMap.builder(); |
| for (Entry<String, List<String>> entry : mapToList.entrySet()) { |
| builder.put(entry.getKey(), joiner.join(entry.getValue())); |
| } |
| return builder.build(); |
| } |
| } |
| |
| public String toString() { |
| return toString(OutputOption.ICU); |
| } |
| |
| public String toString(OutputOption oo) { |
| StringBuilder result = new StringBuilder(language); // optimize for the simple cases |
| if (this.script.length() != 0) result.append(oo.separator).append(script); |
| if (this.region.length() != 0) result.append(oo.separator).append(region); |
| if (this.variants.size() != 0) { |
| for (String variant : variants) { |
| result.append(oo.separator).append(oo != OutputOption.ICU ? variant : variant.toUpperCase(Locale.ROOT)); |
| } |
| } |
| if (this.extensions.size() != 0) { |
| for (Entry<String, List<String>> extension : extensions.entrySet()) { |
| String key = extension.getKey(); |
| String value = oo.joiner.join(extension.getValue()); |
| result.append(oo.separator).append(key) |
| .append(oo.separator).append(value); |
| } |
| } |
| if (this.localeExtensions.size() != 0) { |
| if (oo == OutputOption.BCP47) { |
| List<String> tValue = localeExtensions.get("t"); |
| if (tValue != null) { |
| result.append(oo.separator).append('t') |
| .append(oo.separator).append(oo.joiner.join(tValue)); |
| for (Entry<String, List<String>> extension : localeExtensions.entrySet()) { |
| String key = extension.getKey(); |
| if (key.length() == 2 && key.charAt(1) < 'a') { |
| String value = oo.joiner.join(extension.getValue()); |
| result.append(oo.separator).append(key).append(oo.separator).append(value); |
| } |
| } |
| } |
| boolean haveU = false; |
| for (Entry<String, List<String>> extension : localeExtensions.entrySet()) { |
| if (!haveU) { |
| List<String> uValue = localeExtensions.get("u"); |
| result.append(oo.separator).append('u'); |
| if (uValue != null) { |
| result.append(oo.separator).append(oo.joiner.join(tValue)); |
| } |
| haveU = true; |
| } |
| String key = extension.getKey(); |
| if (key.length() == 2 && key.charAt(1) >= 'a') { |
| String value = oo.joiner.join(extension.getValue()); |
| result.append(oo.separator).append(key).append(oo.separator).append(value); |
| } |
| } |
| } else { |
| result.append('@'); |
| boolean needSep = false; |
| for (Entry<String, List<String>> extension : localeExtensions.entrySet()) { |
| if (needSep) { |
| result.append(";"); |
| } |
| String key = extension.getKey(); |
| String value = oo.joiner.join(extension.getValue()); |
| result.append(key.toUpperCase(Locale.ROOT)) |
| .append('=').append(value.toUpperCase(Locale.ROOT)); |
| needSep = true; |
| } |
| } |
| } |
| return result.toString(); |
| } |
| |
| /** |
| * Return just the language, script, and region (no variants or extensions) |
| * @return |
| */ |
| public String toLSR() { |
| String result = language; // optimize for the simple cases |
| if (this.script.length() != 0) result += "_" + script; |
| if (this.region.length() != 0) result += "_" + region; |
| return result; |
| } |
| |
| public enum Fields { |
| LANGUAGE, SCRIPT, REGION, VARIANTS |
| }; |
| |
| public static Set<Fields> LANGUAGE_SCRIPT = Collections.unmodifiableSet(EnumSet.of(Fields.LANGUAGE, Fields.SCRIPT)); |
| public static Set<Fields> LANGUAGE_REGION = Collections.unmodifiableSet(EnumSet.of(Fields.LANGUAGE, Fields.REGION)); |
| public static Set<Fields> LANGUAGE_SCRIPT_REGION = Collections.unmodifiableSet(EnumSet.of(Fields.LANGUAGE, |
| Fields.SCRIPT, Fields.REGION)); |
| |
| public String toString(Set<Fields> selection) { |
| String result = language; |
| if (selection.contains(Fields.SCRIPT) && script.length() != 0) result += "_" + script; |
| if (selection.contains(Fields.REGION) && region.length() != 0) result += "_" + region; |
| if (selection.contains(Fields.VARIANTS) && variants.size() != 0) { |
| for (String variant : (Collection<String>) variants) { |
| result += "_" + variant; |
| } |
| } |
| return result; |
| } |
| |
| public LanguageTagParser setLanguage(String language) { |
| if (SEPARATORS.containsSome(language)) { |
| String oldScript = script; |
| String oldRegion = region; |
| Set<String> oldVariants = variants; |
| set(language); |
| if (script.length() == 0) { |
| script = oldScript; |
| } |
| if (region.length() == 0) { |
| region = oldRegion; |
| } |
| if (oldVariants.size() != 0) { |
| variants = oldVariants; |
| } |
| } else { |
| this.language = language; |
| } |
| return this; |
| } |
| |
| public LanguageTagParser setLocaleExtensions(Map<String, String> localeExtensions) { |
| this.localeExtensions = expandMap(localeExtensions, 1, Integer.MAX_VALUE); |
| return this; |
| } |
| |
| public LanguageTagParser setVariants(Collection<String> newVariants) { |
| for (String variant : newVariants) { |
| if (!isValidVariant(variant)) { |
| throw new IllegalArgumentException("Illegal variant: " + variant); |
| } |
| } |
| variants.clear(); |
| variants.addAll(newVariants); |
| return this; |
| } |
| |
| static final Pattern EXTENSION_PATTERN = PatternCache.get("([0-9a-zA-Z]{2,8}(-[0-9a-zA-Z]{2,8})*)?"); |
| |
| public LanguageTagParser setExtensions(Map<String, String> newExtensions) { |
| this.extensions = expandMap(newExtensions, 2, 8); |
| return this; |
| } |
| |
| public static String getSimpleParent(String s) { |
| int lastBar = s.lastIndexOf('_'); |
| return lastBar >= 0 ? s.substring(0, lastBar) : ""; |
| } |
| |
| private Map<String, List<String>> expandMap(Map<String, String> newLocaleExtensions, int minLength, int maxLength) { |
| if (newLocaleExtensions.isEmpty()) { |
| return Collections.emptyMap(); |
| } |
| ImmutableMap.Builder<String, List<String>> result = ImmutableMap.builder(); |
| for (Entry<String, String> entry : newLocaleExtensions.entrySet()) { |
| result.put(entry.getKey(), split(entry.getValue(), minLength, maxLength)); |
| } |
| return result.build(); |
| } |
| |
| private List<String> split(String value, int minLength, int maxLength) { |
| List<String> values = SPLIT_BAR.splitToList(value); |
| for (String s : values) { |
| if (s.length() < minLength || s.length() > maxLength) { |
| throw new IllegalArgumentException("Illegal subtag length for: " + s); |
| } |
| if (!ALPHANUM.contains(s)) { |
| throw new IllegalArgumentException("Illegal locale character in: " + s); |
| } |
| } |
| return values; |
| } |
| |
| public enum Format {icu("_","_"), bcp47("-","-"), structure("; ", "="); |
| public final String separator; |
| public final String separator2; |
| private Format(String separator, String separator2) { |
| this.separator = separator; |
| this.separator2 = separator2; |
| } |
| }; |
| |
| public String toString(Format format) { |
| StringBuilder result = new StringBuilder(); |
| if (format == Format.structure) { |
| result.append("["); |
| } |
| appendField(format, result, "language", language); |
| appendField(format, result, "script", script); |
| appendField(format, result, "region", region); |
| appendField(format, result, "variants", variants); |
| appendField(format, result, "extensions", extensions, new UnicodeSet('a','s')); |
| appendField(format, result, "localeX", localeExtensions, null); |
| appendField(format, result, "extensions", extensions, new UnicodeSet('v','w', 'y','z')); |
| appendField(format, result, "extensions", extensions, new UnicodeSet('x','x')); |
| if (format == Format.structure) { |
| result.append("]"); |
| } |
| // if (script.length() != 0) { |
| // result. += "_" + script; |
| // } |
| // if (selection.contains(Fields.REGION) && region.length() != 0) result += "_" + region; |
| // if (selection.contains(Fields.VARIANTS) && variants.size() != 0) { |
| // for (String variant : (Collection<String>) variants) { |
| // result += "_" + variant; |
| // } |
| // } |
| return result.toString(); |
| } |
| |
| private void appendField(Format format, StringBuilder result, String fieldName, String fieldValue) { |
| if (!fieldValue.isEmpty()) { |
| if (result.length() > 1) { |
| result.append(format.separator); |
| } |
| if (format == Format.structure) { |
| result.append(fieldName).append("="); |
| } |
| result.append(fieldValue); |
| } |
| } |
| |
| private void appendFieldKey(Format format, StringBuilder result, String fieldName, String fieldValue) { |
| result.append(format.separator).append(fieldName).append(format.separator2).append(fieldValue); |
| } |
| |
| private void appendField(Format format, StringBuilder result, String fieldName, Collection<String> fieldValues) { |
| if (!fieldValues.isEmpty()) { |
| appendField(format, result, fieldName, CollectionUtilities.join(fieldValues, ",")); |
| } |
| } |
| |
| /** |
| * null match means it is -t- or -u- |
| */ |
| private void appendField(Format format, StringBuilder result, String fieldName, Map<String, List<String>> fieldValues, UnicodeSet match) { |
| if (match == null && format != Format.structure) { |
| List<String> tLang = fieldValues.get("t"); |
| List<String> uSpecial = fieldValues.get("u"); |
| boolean haveTLang = tLang != null; |
| boolean haveUSpecial = uSpecial != null; |
| |
| // do all the keys ending with digits first |
| boolean haveT = false; |
| boolean haveU = false; |
| StringBuilder result2 = new StringBuilder(); // put -u- at end |
| for (Entry<String, List<String>> entry : fieldValues.entrySet()) { |
| String key = entry.getKey(); |
| if (key.length() < 2) { |
| continue; |
| } |
| int lastChar = key.codePointBefore(key.length()); |
| if (lastChar < 'a') { |
| if (!haveT) { |
| result.append(format.separator).append('t'); |
| if (haveTLang) { // empty is illegal, but just in case |
| result.append(format.separator).append(CollectionUtilities.join(tLang, format.separator)); |
| haveTLang = false; |
| } |
| haveT = true; |
| } |
| appendFieldKey(format, result, entry.getKey(), CollectionUtilities.join(entry.getValue(), format.separator)); |
| } else { |
| if (!haveU) { |
| result2.append(format.separator).append('u'); |
| if (haveUSpecial) { // not yet valid, but just in case |
| result2.append(format.separator).append(CollectionUtilities.join(uSpecial, format.separator)); |
| haveUSpecial = false; |
| } |
| haveU = true; |
| } |
| appendFieldKey(format, result2, entry.getKey(), CollectionUtilities.join(entry.getValue(), format.separator)); |
| } |
| } |
| if (haveTLang) { |
| result.append(format.separator).append('t').append(format.separator).append(CollectionUtilities.join(tLang, format.separator)); |
| } |
| if (haveUSpecial) { |
| result2.append(format.separator).append('u').append(format.separator).append(CollectionUtilities.join(uSpecial, format.separator)); |
| } |
| result.append(result2); // put in right order |
| } else { |
| for (Entry<String, List<String>> entry : fieldValues.entrySet()) { |
| if (match == null || match.contains(entry.getKey())) { |
| appendFieldKey(format, result, entry.getKey(), CollectionUtilities.join(entry.getValue(), format.separator)); |
| } |
| } |
| } |
| } |
| } |