tools/java/org/unicode/cldr/test/CheckForExemplars.java - platform/external/cldr - Git at Google

 /*
  ******************************************************************************
  * Copyright (C) 2005-2012, International Business Machines Corporation and        *
  * others. All Rights Reserved.                                               *
  ******************************************************************************
  */
 package org.unicode.cldr.test;

 import java.util.BitSet;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
 import org.unicode.cldr.util.CLDRFile;
 import org.unicode.cldr.util.CLDRFile.Status;
 import org.unicode.cldr.util.Factory;
 import org.unicode.cldr.util.InternalCldrException;
 import org.unicode.cldr.util.LocaleIDParser;
 import org.unicode.cldr.util.PatternCache;
 import org.unicode.cldr.util.PatternPlaceholders;
 import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
 import org.unicode.cldr.util.SupplementalDataInfo;
 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
 import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
 import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
 import org.unicode.cldr.util.XMLSource;
 import org.unicode.cldr.util.XPathParts;

 import com.ibm.icu.impl.Relation;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.DateTimePatternGenerator;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.PluralRules;
 import com.ibm.icu.text.Transform;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ULocale;

 public class CheckForExemplars extends FactoryCheckCLDR {
     private static final UnicodeSet RTL_CONTROLS = new UnicodeSet("[\\u061C\\u200E\\u200F\\u202A-\\u202D\\u2066-\\u2069]");

     private static final UnicodeSet RTL = new UnicodeSet("[[:bc=AL:][:bc=R:]]");

     private static final String STAND_IN = "#";

     // private final UnicodeSet commonAndInherited = new UnicodeSet(CheckExemplars.Allowed).complement();
     // "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
     static String[] EXEMPLAR_SKIPS = {
         "/currencySpacing",
         "/exemplarCharacters",
         // "/pattern",
         "/localizedPatternChars",
         "/segmentations",
         "/references",
         "/localeDisplayNames/variants/",
         "/commonlyUsed",
         "/defaultNumberingSystem",
         "/otherNumberingSystems",
         "/exponential",
         "/nan",
         "/scientificFormats",
         "/inText",
         "/orientation",
         "/symbol[@alt=\"narrow\"]",
         "/characters/parseLenients"
     };

     static String[] DATE_PARTS = {
         "/hourFormat",
         "/dateFormatItem",
         "/intervalFormatItem",
         "/dateFormatLength",
         "timeFormatLength"
     };

     static final UnicodeSet START_PAREN = new UnicodeSet("[[:Ps:]]").freeze();
     static final UnicodeSet END_PAREN = new UnicodeSet("[[:Pe:]]").freeze();
     static final UnicodeSet ALL_CURRENCY_SYMBOLS = new UnicodeSet("[[:Sc:]]").freeze();
     static final UnicodeSet LETTER = new UnicodeSet("[[A-Za-z]]").freeze();
     static final UnicodeSet NUMBERS = new UnicodeSet("[[:N:]]").freeze();
     static final UnicodeSet DISALLOWED_HOUR_FORMAT = new UnicodeSet("[[:letter:]]").remove('H').remove('m').freeze();
     static final UnicodeSet DISALLOWED_IN_RANGE = new UnicodeSet("[:L:]").freeze();

     private UnicodeSet exemplars;
     private UnicodeSet exemplarsPlusAscii;
     //private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplars = new UnicodeSet("[()（）;,；，]").freeze();
     //private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplarsWithParens = new UnicodeSet("[;,；，]").freeze();

     // Hack until cldrbug 6566 is fixed. TODO
     private static final Pattern IGNORE_PLACEHOLDER_PARENTHESES = PatternCache.get("\\p{Ps}#\\p{Pe}");

     // private UnicodeSet currencySymbolExemplars;
     private boolean skip;
     private Collator col;
     private Collator spaceCol;
     UnicodeSetPrettyPrinter prettyPrint;
     private Status otherPathStatus = new Status();
     private Matcher patternMatcher = ExampleGenerator.PARAMETER.matcher("");
     private boolean errorDefaultOption;

     // for extracting date pattern text
     private DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
     StringBuilder justText = new StringBuilder();

     // public static final Pattern SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN = PatternCache.get("/(" +
     // "codePattern" +
     // "|dateRangePattern" +
     // "|dateTimeFormat[^/]*?/pattern" +
     // "|appendItem" +
     // "|intervalFormatFallback" +
     // "|hoursFormat" +
     // "|gmtFormat" +
     // "|regionFormat" +
     // "|fallbackFormat" +
     // "|unitPattern.*@count=\"(zero|one|two|few|many|other)\"" +
     // "|localePattern" +
     // "|localeKeyTypePattern" +
     // "|listPatternPart" +
     // "|ellipsis" +
     // "|monthPattern" +
     // ")");
     // private Matcher supposedToBeMessageFormat = SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN.matcher("");

     public static final Pattern LEAD_OR_TRAIL_WHITESPACE_OK = PatternCache.get("/(" +
         "references/reference" +
         "|insertBetween" +
         ")");
     private Matcher leadOrTrailWhitespaceOk = LEAD_OR_TRAIL_WHITESPACE_OK.matcher("");

     private static UnicodeSet ASCII = (UnicodeSet) new UnicodeSet("[\\u0020-\\u007F]").freeze();

     private PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
     private SupplementalDataInfo sdi;
     private Relation scriptToCurrencies;

     public CheckForExemplars(Factory factory) {
         super(factory);
         // patternPlaceholders = RegexLookup.of(new PlaceholderTransform())
         // .loadFromFile(PatternPlaceholders.class, "data/Placeholders.txt");
         sdi = SupplementalDataInfo.getInstance();
     }

     /**
      * Adapted from GenerateXMB.MapTransform
      *
      * @author jchye
      *
      */
     static class PlaceholderTransform implements Transform<String, Set<String>> {
         @Override
         public Set<String> transform(String source) {
             Set<String> placeholders = new LinkedHashSet<String>();
             String[] parts = source.split(";\\s+");
             for (String part : parts) {
                 int equalsPos = part.indexOf('=');
                 String placeholder = part.substring(0, equalsPos).trim();
                 placeholders.add(placeholder);
             }
             return placeholders;
         }
     }

     @Override
     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFile, Options options, List<CheckStatus> possibleErrors) {
         if (cldrFile == null) return this;
         skip = true;
         super.setCldrFileToCheck(cldrFile, options, possibleErrors);
         if (cldrFile.getLocaleID().equals("root")) {
             return this;
         }

         errorDefaultOption = options.get(Options.Option.exemplarErrors) != null;

         String locale = cldrFile.getLocaleID();
         col = Collator.getInstance(new ULocale(locale));
         spaceCol = Collator.getInstance(new ULocale(locale));
         spaceCol.setStrength(Collator.PRIMARY);

         CLDRFile resolvedFile = getResolvedCldrFileToCheck();
         boolean[] ok = new boolean[1];
         exemplars = safeGetExemplars("", possibleErrors, resolvedFile, ok);

         if (exemplars == null) {
             CheckStatus item = new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
                 .setSubtype(Subtype.noExemplarCharacters)
                 .setMessage("No Exemplar Characters: {0}", new Object[] { this.getClass().getName() });
             possibleErrors.add(item);
             return this;
         } else if (!ok[0]) {
             exemplars = new UnicodeSet();
         } else {
             exemplars = new UnicodeSet(exemplars); // modifiable copy
         }


         boolean isRTL = RTL.containsSome(exemplars);
         if (isRTL) {
             exemplars.addAll(RTL_CONTROLS);
         }
         // UnicodeSet temp = resolvedFile.getExemplarSet("standard");
         // if (temp != null) exemplars.addAll(temp);
         UnicodeSet auxiliary = safeGetExemplars("auxiliary", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
         // CLDRFile.WinningChoice.WINNING);
         if (auxiliary != null) {
             exemplars.addAll(auxiliary);
         }

         if (CheckExemplars.USE_PUNCTUATION) {
             UnicodeSet punctuation = safeGetExemplars("punctuation", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
             if (punctuation != null) {
                 exemplars.addAll(punctuation);
             }

             UnicodeSet numbers = getNumberSystemExemplars();
             exemplars.addAll(numbers);

             // TODO fix replacement character
             exemplars.add(STAND_IN);
         }

         exemplars.addAll(CheckExemplars.AlwaysOK).freeze();
         exemplarsPlusAscii = new UnicodeSet(exemplars).addAll(ASCII).freeze();

         skip = false;
         prettyPrint = new UnicodeSetPrettyPrinter()
             .setOrdering(col != null ? col : Collator.getInstance(ULocale.ROOT))
             .setSpaceComparator(col != null ? col : Collator.getInstance(ULocale.ROOT)
                 .setStrength2(Collator.PRIMARY))
             .setCompressRanges(true);
         return this;
     }

     private UnicodeSet getNumberSystemExemplars() {
         String numberSystem = getCldrFileToCheck().getStringValue("//ldml/numbers/defaultNumberingSystem");
         String digits = sdi.getDigits(numberSystem);
         return new UnicodeSet().addAll(digits);
     }

     private UnicodeSet safeGetExemplars(String type, List<CheckStatus> possibleErrors, CLDRFile resolvedFile,
         boolean[] ok) {
         UnicodeSet result = null;
         try {
             result = resolvedFile.getExemplarSet(type, CLDRFile.WinningChoice.WINNING);
             ok[0] = true;
         } catch (IllegalArgumentException iae) {
             possibleErrors.add(new CheckStatus()
                 .setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.couldNotAccessExemplars)
                 .setMessage("Could not get exemplar set: " + iae.toString()));
             ok[0] = false;
         }
         return result;
     }

     public CheckCLDR handleCheck(String path, String fullPath, String value,
         Options options, List<CheckStatus> result) {
         if (fullPath == null) return this; // skip paths that we don't have
         if (value == null) return this; // skip values that we don't have ?
         if (skip) return this;
         if (path == null) {
             throw new InternalCldrException("Empty path!");
         } else if (getCldrFileToCheck() == null) {
             throw new InternalCldrException("no file to check!");
         }
         String sourceLocale = getResolvedCldrFileToCheck().getSourceLocaleID(path, otherPathStatus);

         // if we are an alias to another path, then skip
         // if (!path.equals(otherPathStatus.pathWhereFound)) {
         // return this;
         // }

         // now check locale source
         if (XMLSource.CODE_FALLBACK_ID.equals(sourceLocale)) {
             return this;
             // } else if ("root".equals(sourceLocale)) {
             // // skip eras for non-gregorian
             // if (true) return this;
             // if (path.indexOf("/calendar") >= 0 && path.indexOf("gregorian") <= 0) return this;
         }

         if (containsPart(path, EXEMPLAR_SKIPS)) {
             return this;
         }

         CheckStatus.Type errorOption = errorDefaultOption & sourceLocale.equals(getResolvedCldrFileToCheck().getLocaleID())
             ? CheckStatus.errorType : CheckStatus.warningType;

         value = checkAndReplacePlaceholders(path, value, result);
         if (path.startsWith("//ldml/numbers/miscPatterns") && path.contains("[@type=\"range\"]")) {
             if (DISALLOWED_IN_RANGE.containsSome(value)) {
                 result
                 .add(new CheckStatus()
                     .setCause(this)
                     .setMainType(CheckStatus.errorType)
                     .setSubtype(Subtype.illegalCharactersInPattern)
                     .setMessage(
                         "Range patterns should not have letters.",
                         new Object[] {}));
             }
         }
         // Now handle date patterns.
         if (containsPart(path, DATE_PARTS)) {
             if (!extractDatePatternText(value, STAND_IN, justText)) {
                 return this; // we are done, no text.
             }
             value = justText.toString();
             if (NUMBERS.containsSome(value)) {
                 UnicodeSet disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
                 addMissingMessage(disallowed, CheckStatus.errorType,
                     Subtype.patternCannotContainDigits,
                     Subtype.patternCannotContainDigits,
                     "cannot occur in date or time patterns", result);
             }
             if (path.endsWith("/hourFormat")) {
                 UnicodeSet disallowed = new UnicodeSet().addAll(value)
                     .retainAll(DISALLOWED_HOUR_FORMAT);
                 if (!disallowed.isEmpty()) {
                     addMissingMessage(disallowed, CheckStatus.errorType,
                         Subtype.patternContainsInvalidCharacters,
                         Subtype.patternContainsInvalidCharacters,
                         "cannot occur in the hour format", result);
                 }
             }
         }

         if (path.startsWith("//ldml/posix/messages")) return this;

         UnicodeSet disallowed;

         if (path.contains("/currency") && path.contains("/symbol")) {
             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
                 disallowed.removeAll(ALL_CURRENCY_SYMBOLS);
                 disallowed.removeAll(LETTER); // Allow ASCII A-Z in currency symbols
                 // String currency = new XPathParts().set(path).getAttributeValue(-2, "type");
                 if (disallowed.size() > 0) {
                     // && asciiNotAllowed(getCldrFileToCheck().getLocaleID(), currency)) {
                     addMissingMessage(disallowed, errorOption,
                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
                         result);
                 }
             }
         } else if (path.contains("/gmtFormat") || path.contains("/gmtZeroFormat")) {
             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
                 disallowed.removeAll(LETTER); // Allow ASCII A-Z in gmtFormat and gmtZeroFormat
                 if (disallowed.size() > 0) {
                     addMissingMessage(disallowed, errorOption,
                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
                         result);
                 }
             }
         } else if (path.contains("/months") || path.contains("/quarters")) {
             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
                 disallowed.removeAll("IVXivx"); // Allow Roman-numeral letters in month or quarter names
                 if (path.contains("/calendar[@type=\"generic\"]/months")) {
                     disallowed.removeAll("M"); // Generic-calendar month names contain 'M' and do not get modified
                 }
                 if (disallowed.size() > 0) {
                     addMissingMessage(disallowed, errorOption,
                         Subtype.charactersNotInMainOrAuxiliaryExemplars,
                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
                         result);
                 }
             }
         } else if (path.contains("/localeDisplayNames") && !path.contains("/localeDisplayPattern")) {
             // test first for outside of the set.
             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
                 if (path.contains("[@type=\"iso8601\"]")) {
                     disallowed.removeAll("ISO"); // Name of ISO8601 calendar may contain "ISO" regardless of native script
                 }
                 if (disallowed.size() > 0) {
                     addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
                 }
             }
             if (path.contains("/codePatterns")) {
                 disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
                 if (!disallowed.isEmpty()) {
                     addMissingMessage(disallowed, CheckStatus.errorType,
                         Subtype.patternCannotContainDigits,
                         Subtype.patternCannotContainDigits,
                         "cannot occur in locale fields", result);
                 }
             }
         } else if (path.contains("/units")) {
             String noValidParentheses = IGNORE_PLACEHOLDER_PARENTHESES.matcher(value).replaceAll("");
             disallowed = new UnicodeSet().addAll(START_PAREN).addAll(END_PAREN)
                 .retainAll(noValidParentheses);
             if (!disallowed.isEmpty()) {
                 addMissingMessage(disallowed, CheckStatus.errorType,
                     Subtype.parenthesesNotAllowed,
                     Subtype.parenthesesNotAllowed,
                     "cannot occur in units", result);
             }
         } else if (path.endsWith("/exemplarCity")) {
             disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value);
             if (disallowed != null) {
                 if ("root".equals(sourceLocale)) {
                     return this;
                 }
                 // Get script of locale.
                 LocaleIDParser parser = new LocaleIDParser().set(sourceLocale);
                 String script = parser.getScript();
                 if (script.length() == 0) {
                     String localeID = sdi.getLikelySubtags().get(sourceLocale);
                     if (localeID == null) {
                         localeID = sdi.getLikelySubtags().get(parser.getLanguage());
                         if (localeID == null) {
                             throw new IllegalArgumentException(
                                 "A likely subtag for " + parser.getLanguage() +
                                     " is required to get its script.");
                         }
                     }
                     script = parser.set(localeID).getScript();
                 }
                 int myscript = UScript.getCodeFromName(script);
                 UnicodeSet toRemove = new UnicodeSet();
                 for (int i = 0; i < disallowed.size(); i++) {
                     int c = disallowed.charAt(i);
                     if (UScript.getScript(c) == myscript) {
                         toRemove.add(c);
                     }
                 }
                 disallowed.removeAll(toRemove);
                 if (disallowed.size() > 0) {
                     addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
                         Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
                 }
             }
         } else if (path.contains("/annotations") && !path.contains("[@type"))  {
             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
                 addMissingMessage(disallowed, CheckStatus.warningType, Subtype.charactersNotInMainOrAuxiliaryExemplars,
                     Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
             }
         } else {
             if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
                 addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
                     Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
             }
         }

         // check for spaces

         if (!value.equals(value.trim())) {
             if (!leadOrTrailWhitespaceOk.reset(path).find()) {
                 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
                     .setSubtype(Subtype.mustNotStartOrEndWithSpace)
                     .setMessage("This item must not start or end with whitespace, or be empty."));
             }
         }
         // if (value.contains("  ")) {
         // result.add(new
         // CheckStatus().setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.mustNotStartOrEndWithSpace)
         // .setMessage("This item must not contain two space characters in a row."));
         // }
         return this;
     }

     private String checkAndReplacePlaceholders(String path, String value, List<CheckStatus> result) {
         // add checks for patterns. Make sure that all and only the message format patterns have {n}
         Matcher matcher = patternMatcher.reset(value);
         Set<String> matchList = new HashSet<String>();
         StringBuffer placeholderBuffer = new StringBuffer();
         while (matcher.find()) {
             // Look for duplicate values.
             if (!matchList.add(matcher.group())) {
                 placeholderBuffer.append(", ").append(matcher.group());
             }
         }
         Set<String> placeholders = null;
         PlaceholderStatus placeholderStatus = patternPlaceholders.getStatus(path);
         if (placeholderStatus != PlaceholderStatus.DISALLOWED) {
             placeholders = patternPlaceholders.get(path).keySet();
         }

         boolean supposedToHaveMessageFormatFields =
             // supposedToBeMessageFormat.reset(path).find()
             placeholders != null;

         if (supposedToHaveMessageFormatFields) {
             if (placeholderBuffer.length() > 0) {
                 if (placeholderStatus != PlaceholderStatus.MULTIPLE) {
                     result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
                         .setSubtype(Subtype.extraPlaceholders)
                         .setMessage("Remove duplicates of{0}",
                             new Object[] { placeholderBuffer.substring(1) }));
                 }
             }
             placeholderBuffer.setLength(0);
             // Check that the needed placeholders are there.
             if (placeholders == null) placeholders = new HashSet<String>();
             for (String placeholder : placeholders) {
                 if (!matchList.contains(placeholder)) {
                     placeholderBuffer.append(", ").append(placeholder);
                 }
             }
             boolean placeholdersMissing = false;
             if (placeholderBuffer.length() > 0) {
                 // Check
                 if (placeholderStatus == PlaceholderStatus.LOCALE_DEPENDENT && (path.contains("[@count=") || path.contains("[@ordinal="))) {
                     PluralRules rules = PluralRules.forLocale(new ULocale(getCldrFileToCheck().getLocaleID()));
                     XPathParts parts = XPathParts.getFrozenInstance(path);
                     String keyword = parts.getAttributeValue(-1, "count");
                     if (keyword == null) {
                         keyword = parts.getAttributeValue(-1, "ordinal");
                     }
                     placeholdersMissing = rules.getUniqueKeywordValue(keyword) == PluralRules.NO_UNIQUE_VALUE;
                 } else {
                     placeholdersMissing = placeholderStatus == PlaceholderStatus.REQUIRED;
                 }
             }
             if (placeholdersMissing) {
                 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
                     .setSubtype(Subtype.missingPlaceholders)
                     .setMessage("This message pattern is missing placeholder(s){0}. See the English for an example.",
                         new Object[] { placeholderBuffer.substring(1) }));
             }
             // Check for extra placeholders.
             matchList.removeAll(placeholders);
             if (matchList.size() > 0) {
                 String list = matchList.toString();
                 list = list.substring(1, list.length() - 1);
                 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
                     .setSubtype(Subtype.extraPlaceholders)
                     .setMessage("Extra placeholders {0} should be removed.",
                         new Object[] { list }));
             }
             // check the other characters in the message format patterns
             value = patternMatcher.replaceAll(STAND_IN);
         } else if (matchList.size() > 0 && placeholderStatus == PlaceholderStatus.DISALLOWED) { // non-message field has
             // placeholder values
             result.add(new CheckStatus()
                 .setCause(this)
                 .setMainType(CheckStatus.errorType)
                 .setSubtype(Subtype.shouldntHavePlaceholders)
                 .setMessage(
                     "This field is not a message pattern, and should not have '{0}, {1},' etc. See the English for an example.",
                     new Object[] {}));
             // end checks for patterns
         }
         return value;
     }

     /**
      * Checks if ASCII characters are allowed in a currency symbol in the specified locale.
      * @param localeID the locale ID that the currency is in
      * @param currency the currency to be checked
      * @return true if ASCII is not allowed
      */
     private boolean asciiNotAllowed(String localeID, String currency) {
         // Don't allow ascii at all for bidi scripts.
         String charOrientation = getResolvedCldrFileToCheck().getStringValue(
             "//ldml/layout/orientation/characterOrder");
         if (charOrientation.equals("right-to-left")) {
             return true;
         }

         // Get script of locale. if Latn, quit.
         LocaleIDParser parser = new LocaleIDParser().set(localeID);
         String script = parser.getScript();
         if (script.length() == 0) {
             localeID = sdi.getLikelySubtags().get(localeID);
             if (localeID == null) {
                 localeID = sdi.getLikelySubtags().get(parser.getLanguage());
                 if (localeID == null) {
                     throw new IllegalArgumentException(
                         "A likely subtag for " + parser.getLanguage() +
                         " is required to get its script.");
                 }
             }
             script = parser.set(localeID).getScript();
         }
         if (script.equals("Latn")) {
             return false;
         }

         // Enforce checking of for other non-Latin scripts, for all currencies
         // whose countries use that script, e.g. Russian should have Cyrillic
         // currency symbols for modern currencies of countries with official
         // languages whose script is Cyrillic (Bulgaria, Serbia, ...).
         Set<String> currencies = getCurrenciesForScript(script);
         return currencies != null && currencies.contains(currency);
     }

     private Set<String> getCurrenciesForScript(String script) {
         if (scriptToCurrencies != null) return scriptToCurrencies.get(script);

         // Get mapping of scripts to the territories that use that script in
         // any of their primary languages.
         Relation scriptToTerritories = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
         for (String lang : sdi.getBasicLanguageDataLanguages()) {
             BasicLanguageData langData = sdi.getBasicLanguageDataMap(lang).get(Type.primary);
             if (langData == null) {
                 continue;
             }
             for (String curScript : langData.getScripts()) {
                 scriptToTerritories.putAll(curScript, langData.getTerritories());
             }
         }

         // For each territory, get all of its legal tender currencies.
         Date now = new Date(System.currentTimeMillis());
         scriptToCurrencies = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
         for (Object curScript : scriptToTerritories.keySet()) {
             Set<String> territories = scriptToTerritories.get(curScript);
             Set<String> currencies = new HashSet<String>();
             for (String territory : territories) {
                 Set<CurrencyDateInfo> currencyInfo = sdi.getCurrencyDateInfo(territory);
                 for (CurrencyDateInfo info : currencyInfo) {
                     if (info.isLegalTender() && info.getEnd().compareTo(now) > 0) {
                         currencies.add(info.getCurrency());
                     }
                 }
             }
             scriptToCurrencies.putAll(curScript, currencies);
         }
         return scriptToCurrencies.get(script);
     }

     /**
      * Extracts just the text from a date field, replacing all the variable fields by variableReplacement. Return null
      * if
      * there is an error (a different test will find that error).
      */
     public boolean extractDatePatternText(String value, String variableReplacement, StringBuilder justText) {
         boolean haveText = false;
         try {
             formatParser.set(value);
         } catch (Exception e) {
             return false; // give up, it is illegal
         }
         boolean doReplacement = variableReplacement != null && variableReplacement.length() > 0;
         justText.setLength(0);
         for (Object item : formatParser.getItems()) {
             if (item instanceof String) {
                 justText.append(item);
                 haveText = true;
             } else {
                 if (doReplacement) {
                     justText.append(variableReplacement);
                 }
             }
         }
         return haveText;
     }

     public boolean containsPart(String source, String... segments) {
         for (int i = 0; i < segments.length; ++i) {
             if (source.indexOf(segments[i]) > 0) {
                 return true;
             }
         }
         return false;
     }

     static final String TEST = "؉";

     private void addMissingMessage(UnicodeSet missing, CheckStatus.Type warningVsError, Subtype subtype,
         Subtype subtypeAscii,
         String qualifier, List<CheckStatus> result) {
         String fixedMissing = prettyPrint.format(missing);
         BitSet scripts = new BitSet();
         for (String s : missing) {
             final int script = UScript.getScript(s.codePointAt(0));
             if (script == UScript.INHERITED || script == UScript.COMMON) {
                 continue;
             }
             scripts.set(script);
         }
         StringBuilder scriptString = new StringBuilder();
         if (!scripts.isEmpty()) {
             scriptString.append("{");
             for (int i = scripts.nextSetBit(0); i >= 0; i = scripts.nextSetBit(i + 1)) {
                 if (scriptString.length() > 1) {
                     scriptString.append(", ");
                 }
                 scriptString.append(UScript.getName(i));
             }
             scriptString.append("}");
         }
         result
         .add(new CheckStatus()
             .setCause(this)
             .setMainType(warningVsError)
             .setSubtype(ASCII.containsAll(missing) ? subtypeAscii : subtype)
             .setMessage(
                 "The characters \u200E{0}\u200E {1} {2}. "
                     +
                     "For what to do, see <i>Handling Warnings</i> in <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/characters#TOC-Handing-Warnings'>Characters</a>.",
                     new Object[] { fixedMissing, scriptString, qualifier }));
     }

     static final Normalizer2 NFC = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);

     /**
      * Return null if ok, otherwise UnicodeSet of bad characters
      *
      * @param exemplarSet
      * @param value
      * @return
      */
     private UnicodeSet containsAllCountingParens(UnicodeSet exemplarSet, UnicodeSet exemplarSetPlusASCII, String value) {
         UnicodeSet result = null;
         if (exemplarSet.containsAll(value)) {
             return result;
         }

         // Normalize
         value = NFC.normalize(value);

         // if we failed, then check that everything outside of () is ok.
         // and everything inside parens is either ASCII or in the set
         int lastPos = 0;
         while (true) {
             int start = START_PAREN.findIn(value, lastPos, false);
             String outside = value.substring(lastPos, start);
             result = addDisallowedItems(exemplarSet, outside, result);
             if (start == value.length()) {
                 break; // all done
             }
             ++start;
             int end = END_PAREN.findIn(value, start, false);
             // don't worry about mixed brackets
             String inside = value.substring(start, end);
             result = addDisallowedItems(exemplarSetPlusASCII, inside, result);
             if (end == value.length()) {
                 break; // all done
             }
             lastPos = end + 1;
         }
         return result;
     }

     private UnicodeSet addDisallowedItems(UnicodeSet exemplarSet, String outside, UnicodeSet result) {
         if (!exemplarSet.containsAll(outside)) {
             if (result == null) {
                 result = new UnicodeSet();
             }
             result.addAll(new UnicodeSet().addAll(outside).removeAll(exemplarSet));
         }
         return result;
     }
 }