blob: 36d6c8b3048681d16c5c3f7ba03a32891896a82b [file] [log] [blame]
/*
******************************************************************************
* Copyright (C) 2005-2012, International Business Machines Corporation and *
* others. All Rights Reserved. *
******************************************************************************
*/
package org.unicode.cldr.test;
import java.util.BitSet;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.Status;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.InternalCldrException;
import org.unicode.cldr.util.LocaleIDParser;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.PatternPlaceholders;
import org.unicode.cldr.util.PatternPlaceholders.PlaceholderStatus;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
import org.unicode.cldr.util.UnicodeSetPrettyPrinter;
import org.unicode.cldr.util.XMLSource;
import org.unicode.cldr.util.XPathParts;
import com.ibm.icu.impl.Relation;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.DateTimePatternGenerator;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.PluralRules;
import com.ibm.icu.text.Transform;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
public class CheckForExemplars extends FactoryCheckCLDR {
private static final UnicodeSet RTL_CONTROLS = new UnicodeSet("[\\u061C\\u200E\\u200F\\u202A-\\u202D\\u2066-\\u2069]");
private static final UnicodeSet RTL = new UnicodeSet("[[:bc=AL:][:bc=R:]]");
private static final String STAND_IN = "#";
// private final UnicodeSet commonAndInherited = new UnicodeSet(CheckExemplars.Allowed).complement();
// "[[:script=common:][:script=inherited:][:alphabetic=false:]]");
static String[] EXEMPLAR_SKIPS = {
"/currencySpacing",
"/exemplarCharacters",
// "/pattern",
"/localizedPatternChars",
"/segmentations",
"/references",
"/localeDisplayNames/variants/",
"/commonlyUsed",
"/defaultNumberingSystem",
"/otherNumberingSystems",
"/exponential",
"/nan",
"/scientificFormats",
"/inText",
"/orientation",
"/symbol[@alt=\"narrow\"]",
"/characters/parseLenients"
};
static String[] DATE_PARTS = {
"/hourFormat",
"/dateFormatItem",
"/intervalFormatItem",
"/dateFormatLength",
"timeFormatLength"
};
static final UnicodeSet START_PAREN = new UnicodeSet("[[:Ps:]]").freeze();
static final UnicodeSet END_PAREN = new UnicodeSet("[[:Pe:]]").freeze();
static final UnicodeSet ALL_CURRENCY_SYMBOLS = new UnicodeSet("[[:Sc:]]").freeze();
static final UnicodeSet LETTER = new UnicodeSet("[[A-Za-z]]").freeze();
static final UnicodeSet NUMBERS = new UnicodeSet("[[:N:]]").freeze();
static final UnicodeSet DISALLOWED_HOUR_FORMAT = new UnicodeSet("[[:letter:]]").remove('H').remove('m').freeze();
static final UnicodeSet DISALLOWED_IN_RANGE = new UnicodeSet("[:L:]").freeze();
private UnicodeSet exemplars;
private UnicodeSet exemplarsPlusAscii;
//private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplars = new UnicodeSet("[()();,;,]").freeze();
//private static final UnicodeSet DISALLOWED_IN_scriptRegionExemplarsWithParens = new UnicodeSet("[;,;,]").freeze();
// Hack until cldrbug 6566 is fixed. TODO
private static final Pattern IGNORE_PLACEHOLDER_PARENTHESES = PatternCache.get("\\p{Ps}#\\p{Pe}");
// private UnicodeSet currencySymbolExemplars;
private boolean skip;
private Collator col;
private Collator spaceCol;
UnicodeSetPrettyPrinter prettyPrint;
private Status otherPathStatus = new Status();
private Matcher patternMatcher = ExampleGenerator.PARAMETER.matcher("");
private boolean errorDefaultOption;
// for extracting date pattern text
private DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
StringBuilder justText = new StringBuilder();
// public static final Pattern SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN = PatternCache.get("/(" +
// "codePattern" +
// "|dateRangePattern" +
// "|dateTimeFormat[^/]*?/pattern" +
// "|appendItem" +
// "|intervalFormatFallback" +
// "|hoursFormat" +
// "|gmtFormat" +
// "|regionFormat" +
// "|fallbackFormat" +
// "|unitPattern.*@count=\"(zero|one|two|few|many|other)\"" +
// "|localePattern" +
// "|localeKeyTypePattern" +
// "|listPatternPart" +
// "|ellipsis" +
// "|monthPattern" +
// ")");
// private Matcher supposedToBeMessageFormat = SUPPOSED_TO_BE_MESSAGE_FORMAT_PATTERN.matcher("");
public static final Pattern LEAD_OR_TRAIL_WHITESPACE_OK = PatternCache.get("/(" +
"references/reference" +
"|insertBetween" +
")");
private Matcher leadOrTrailWhitespaceOk = LEAD_OR_TRAIL_WHITESPACE_OK.matcher("");
private static UnicodeSet ASCII = (UnicodeSet) new UnicodeSet("[\\u0020-\\u007F]").freeze();
private PatternPlaceholders patternPlaceholders = PatternPlaceholders.getInstance();
private SupplementalDataInfo sdi;
private Relation scriptToCurrencies;
public CheckForExemplars(Factory factory) {
super(factory);
// patternPlaceholders = RegexLookup.of(new PlaceholderTransform())
// .loadFromFile(PatternPlaceholders.class, "data/Placeholders.txt");
sdi = SupplementalDataInfo.getInstance();
}
/**
* Adapted from GenerateXMB.MapTransform
*
* @author jchye
*
*/
static class PlaceholderTransform implements Transform<String, Set<String>> {
@Override
public Set<String> transform(String source) {
Set<String> placeholders = new LinkedHashSet<String>();
String[] parts = source.split(";\\s+");
for (String part : parts) {
int equalsPos = part.indexOf('=');
String placeholder = part.substring(0, equalsPos).trim();
placeholders.add(placeholder);
}
return placeholders;
}
}
@Override
public CheckCLDR setCldrFileToCheck(CLDRFile cldrFile, Options options, List<CheckStatus> possibleErrors) {
if (cldrFile == null) return this;
skip = true;
super.setCldrFileToCheck(cldrFile, options, possibleErrors);
if (cldrFile.getLocaleID().equals("root")) {
return this;
}
errorDefaultOption = options.get(Options.Option.exemplarErrors) != null;
String locale = cldrFile.getLocaleID();
col = Collator.getInstance(new ULocale(locale));
spaceCol = Collator.getInstance(new ULocale(locale));
spaceCol.setStrength(Collator.PRIMARY);
CLDRFile resolvedFile = getResolvedCldrFileToCheck();
boolean[] ok = new boolean[1];
exemplars = safeGetExemplars("", possibleErrors, resolvedFile, ok);
if (exemplars == null) {
CheckStatus item = new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
.setSubtype(Subtype.noExemplarCharacters)
.setMessage("No Exemplar Characters: {0}", new Object[] { this.getClass().getName() });
possibleErrors.add(item);
return this;
} else if (!ok[0]) {
exemplars = new UnicodeSet();
} else {
exemplars = new UnicodeSet(exemplars); // modifiable copy
}
boolean isRTL = RTL.containsSome(exemplars);
if (isRTL) {
exemplars.addAll(RTL_CONTROLS);
}
// UnicodeSet temp = resolvedFile.getExemplarSet("standard");
// if (temp != null) exemplars.addAll(temp);
UnicodeSet auxiliary = safeGetExemplars("auxiliary", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
// CLDRFile.WinningChoice.WINNING);
if (auxiliary != null) {
exemplars.addAll(auxiliary);
}
if (CheckExemplars.USE_PUNCTUATION) {
UnicodeSet punctuation = safeGetExemplars("punctuation", possibleErrors, resolvedFile, ok); // resolvedFile.getExemplarSet("auxiliary",
if (punctuation != null) {
exemplars.addAll(punctuation);
}
UnicodeSet numbers = getNumberSystemExemplars();
exemplars.addAll(numbers);
// TODO fix replacement character
exemplars.add(STAND_IN);
}
exemplars.addAll(CheckExemplars.AlwaysOK).freeze();
exemplarsPlusAscii = new UnicodeSet(exemplars).addAll(ASCII).freeze();
skip = false;
prettyPrint = new UnicodeSetPrettyPrinter()
.setOrdering(col != null ? col : Collator.getInstance(ULocale.ROOT))
.setSpaceComparator(col != null ? col : Collator.getInstance(ULocale.ROOT)
.setStrength2(Collator.PRIMARY))
.setCompressRanges(true);
return this;
}
private UnicodeSet getNumberSystemExemplars() {
String numberSystem = getCldrFileToCheck().getStringValue("//ldml/numbers/defaultNumberingSystem");
String digits = sdi.getDigits(numberSystem);
return new UnicodeSet().addAll(digits);
}
private UnicodeSet safeGetExemplars(String type, List<CheckStatus> possibleErrors, CLDRFile resolvedFile,
boolean[] ok) {
UnicodeSet result = null;
try {
result = resolvedFile.getExemplarSet(type, CLDRFile.WinningChoice.WINNING);
ok[0] = true;
} catch (IllegalArgumentException iae) {
possibleErrors.add(new CheckStatus()
.setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.couldNotAccessExemplars)
.setMessage("Could not get exemplar set: " + iae.toString()));
ok[0] = false;
}
return result;
}
public CheckCLDR handleCheck(String path, String fullPath, String value,
Options options, List<CheckStatus> result) {
if (fullPath == null) return this; // skip paths that we don't have
if (value == null) return this; // skip values that we don't have ?
if (skip) return this;
if (path == null) {
throw new InternalCldrException("Empty path!");
} else if (getCldrFileToCheck() == null) {
throw new InternalCldrException("no file to check!");
}
String sourceLocale = getResolvedCldrFileToCheck().getSourceLocaleID(path, otherPathStatus);
// if we are an alias to another path, then skip
// if (!path.equals(otherPathStatus.pathWhereFound)) {
// return this;
// }
// now check locale source
if (XMLSource.CODE_FALLBACK_ID.equals(sourceLocale)) {
return this;
// } else if ("root".equals(sourceLocale)) {
// // skip eras for non-gregorian
// if (true) return this;
// if (path.indexOf("/calendar") >= 0 && path.indexOf("gregorian") <= 0) return this;
}
if (containsPart(path, EXEMPLAR_SKIPS)) {
return this;
}
CheckStatus.Type errorOption = errorDefaultOption & sourceLocale.equals(getResolvedCldrFileToCheck().getLocaleID())
? CheckStatus.errorType : CheckStatus.warningType;
value = checkAndReplacePlaceholders(path, value, result);
if (path.startsWith("//ldml/numbers/miscPatterns") && path.contains("[@type=\"range\"]")) {
if (DISALLOWED_IN_RANGE.containsSome(value)) {
result
.add(new CheckStatus()
.setCause(this)
.setMainType(CheckStatus.errorType)
.setSubtype(Subtype.illegalCharactersInPattern)
.setMessage(
"Range patterns should not have letters.",
new Object[] {}));
}
}
// Now handle date patterns.
if (containsPart(path, DATE_PARTS)) {
if (!extractDatePatternText(value, STAND_IN, justText)) {
return this; // we are done, no text.
}
value = justText.toString();
if (NUMBERS.containsSome(value)) {
UnicodeSet disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
addMissingMessage(disallowed, CheckStatus.errorType,
Subtype.patternCannotContainDigits,
Subtype.patternCannotContainDigits,
"cannot occur in date or time patterns", result);
}
if (path.endsWith("/hourFormat")) {
UnicodeSet disallowed = new UnicodeSet().addAll(value)
.retainAll(DISALLOWED_HOUR_FORMAT);
if (!disallowed.isEmpty()) {
addMissingMessage(disallowed, CheckStatus.errorType,
Subtype.patternContainsInvalidCharacters,
Subtype.patternContainsInvalidCharacters,
"cannot occur in the hour format", result);
}
}
}
if (path.startsWith("//ldml/posix/messages")) return this;
UnicodeSet disallowed;
if (path.contains("/currency") && path.contains("/symbol")) {
if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
disallowed.removeAll(ALL_CURRENCY_SYMBOLS);
disallowed.removeAll(LETTER); // Allow ASCII A-Z in currency symbols
// String currency = new XPathParts().set(path).getAttributeValue(-2, "type");
if (disallowed.size() > 0) {
// && asciiNotAllowed(getCldrFileToCheck().getLocaleID(), currency)) {
addMissingMessage(disallowed, errorOption,
Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
result);
}
}
} else if (path.contains("/gmtFormat") || path.contains("/gmtZeroFormat")) {
if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
disallowed.removeAll(LETTER); // Allow ASCII A-Z in gmtFormat and gmtZeroFormat
if (disallowed.size() > 0) {
addMissingMessage(disallowed, errorOption,
Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
result);
}
}
} else if (path.contains("/months") || path.contains("/quarters")) {
if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
disallowed.removeAll("IVXivx"); // Allow Roman-numeral letters in month or quarter names
if (path.contains("/calendar[@type=\"generic\"]/months")) {
disallowed.removeAll("M"); // Generic-calendar month names contain 'M' and do not get modified
}
if (disallowed.size() > 0) {
addMissingMessage(disallowed, errorOption,
Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters",
result);
}
}
} else if (path.contains("/localeDisplayNames") && !path.contains("/localeDisplayPattern")) {
// test first for outside of the set.
if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
if (path.contains("[@type=\"iso8601\"]")) {
disallowed.removeAll("ISO"); // Name of ISO8601 calendar may contain "ISO" regardless of native script
}
if (disallowed.size() > 0) {
addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
}
}
if (path.contains("/codePatterns")) {
disallowed = new UnicodeSet().addAll(value).retainAll(NUMBERS);
if (!disallowed.isEmpty()) {
addMissingMessage(disallowed, CheckStatus.errorType,
Subtype.patternCannotContainDigits,
Subtype.patternCannotContainDigits,
"cannot occur in locale fields", result);
}
}
} else if (path.contains("/units")) {
String noValidParentheses = IGNORE_PLACEHOLDER_PARENTHESES.matcher(value).replaceAll("");
disallowed = new UnicodeSet().addAll(START_PAREN).addAll(END_PAREN)
.retainAll(noValidParentheses);
if (!disallowed.isEmpty()) {
addMissingMessage(disallowed, CheckStatus.errorType,
Subtype.parenthesesNotAllowed,
Subtype.parenthesesNotAllowed,
"cannot occur in units", result);
}
} else if (path.endsWith("/exemplarCity")) {
disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value);
if (disallowed != null) {
if ("root".equals(sourceLocale)) {
return this;
}
// Get script of locale.
LocaleIDParser parser = new LocaleIDParser().set(sourceLocale);
String script = parser.getScript();
if (script.length() == 0) {
String localeID = sdi.getLikelySubtags().get(sourceLocale);
if (localeID == null) {
localeID = sdi.getLikelySubtags().get(parser.getLanguage());
if (localeID == null) {
throw new IllegalArgumentException(
"A likely subtag for " + parser.getLanguage() +
" is required to get its script.");
}
}
script = parser.set(localeID).getScript();
}
int myscript = UScript.getCodeFromName(script);
UnicodeSet toRemove = new UnicodeSet();
for (int i = 0; i < disallowed.size(); i++) {
int c = disallowed.charAt(i);
if (UScript.getScript(c) == myscript) {
toRemove.add(c);
}
}
disallowed.removeAll(toRemove);
if (disallowed.size() > 0) {
addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
}
}
} else if (path.contains("/annotations") && !path.contains("[@type")) {
if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
addMissingMessage(disallowed, CheckStatus.warningType, Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
}
} else {
if (null != (disallowed = containsAllCountingParens(exemplars, exemplarsPlusAscii, value))) {
addMissingMessage(disallowed, errorOption, Subtype.charactersNotInMainOrAuxiliaryExemplars,
Subtype.asciiCharactersNotInMainOrAuxiliaryExemplars, "are not in the exemplar characters", result);
}
}
// check for spaces
if (!value.equals(value.trim())) {
if (!leadOrTrailWhitespaceOk.reset(path).find()) {
result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
.setSubtype(Subtype.mustNotStartOrEndWithSpace)
.setMessage("This item must not start or end with whitespace, or be empty."));
}
}
// if (value.contains(" ")) {
// result.add(new
// CheckStatus().setCause(this).setMainType(CheckStatus.errorType).setSubtype(Subtype.mustNotStartOrEndWithSpace)
// .setMessage("This item must not contain two space characters in a row."));
// }
return this;
}
private String checkAndReplacePlaceholders(String path, String value, List<CheckStatus> result) {
// add checks for patterns. Make sure that all and only the message format patterns have {n}
Matcher matcher = patternMatcher.reset(value);
Set<String> matchList = new HashSet<String>();
StringBuffer placeholderBuffer = new StringBuffer();
while (matcher.find()) {
// Look for duplicate values.
if (!matchList.add(matcher.group())) {
placeholderBuffer.append(", ").append(matcher.group());
}
}
Set<String> placeholders = null;
PlaceholderStatus placeholderStatus = patternPlaceholders.getStatus(path);
if (placeholderStatus != PlaceholderStatus.DISALLOWED) {
placeholders = patternPlaceholders.get(path).keySet();
}
boolean supposedToHaveMessageFormatFields =
// supposedToBeMessageFormat.reset(path).find()
placeholders != null;
if (supposedToHaveMessageFormatFields) {
if (placeholderBuffer.length() > 0) {
if (placeholderStatus != PlaceholderStatus.MULTIPLE) {
result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
.setSubtype(Subtype.extraPlaceholders)
.setMessage("Remove duplicates of{0}",
new Object[] { placeholderBuffer.substring(1) }));
}
}
placeholderBuffer.setLength(0);
// Check that the needed placeholders are there.
if (placeholders == null) placeholders = new HashSet<String>();
for (String placeholder : placeholders) {
if (!matchList.contains(placeholder)) {
placeholderBuffer.append(", ").append(placeholder);
}
}
boolean placeholdersMissing = false;
if (placeholderBuffer.length() > 0) {
// Check
if (placeholderStatus == PlaceholderStatus.LOCALE_DEPENDENT && (path.contains("[@count=") || path.contains("[@ordinal="))) {
PluralRules rules = PluralRules.forLocale(new ULocale(getCldrFileToCheck().getLocaleID()));
XPathParts parts = XPathParts.getFrozenInstance(path);
String keyword = parts.getAttributeValue(-1, "count");
if (keyword == null) {
keyword = parts.getAttributeValue(-1, "ordinal");
}
placeholdersMissing = rules.getUniqueKeywordValue(keyword) == PluralRules.NO_UNIQUE_VALUE;
} else {
placeholdersMissing = placeholderStatus == PlaceholderStatus.REQUIRED;
}
}
if (placeholdersMissing) {
result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
.setSubtype(Subtype.missingPlaceholders)
.setMessage("This message pattern is missing placeholder(s){0}. See the English for an example.",
new Object[] { placeholderBuffer.substring(1) }));
}
// Check for extra placeholders.
matchList.removeAll(placeholders);
if (matchList.size() > 0) {
String list = matchList.toString();
list = list.substring(1, list.length() - 1);
result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
.setSubtype(Subtype.extraPlaceholders)
.setMessage("Extra placeholders {0} should be removed.",
new Object[] { list }));
}
// check the other characters in the message format patterns
value = patternMatcher.replaceAll(STAND_IN);
} else if (matchList.size() > 0 && placeholderStatus == PlaceholderStatus.DISALLOWED) { // non-message field has
// placeholder values
result.add(new CheckStatus()
.setCause(this)
.setMainType(CheckStatus.errorType)
.setSubtype(Subtype.shouldntHavePlaceholders)
.setMessage(
"This field is not a message pattern, and should not have '{0}, {1},' etc. See the English for an example.",
new Object[] {}));
// end checks for patterns
}
return value;
}
/**
* Checks if ASCII characters are allowed in a currency symbol in the specified locale.
* @param localeID the locale ID that the currency is in
* @param currency the currency to be checked
* @return true if ASCII is not allowed
*/
private boolean asciiNotAllowed(String localeID, String currency) {
// Don't allow ascii at all for bidi scripts.
String charOrientation = getResolvedCldrFileToCheck().getStringValue(
"//ldml/layout/orientation/characterOrder");
if (charOrientation.equals("right-to-left")) {
return true;
}
// Get script of locale. if Latn, quit.
LocaleIDParser parser = new LocaleIDParser().set(localeID);
String script = parser.getScript();
if (script.length() == 0) {
localeID = sdi.getLikelySubtags().get(localeID);
if (localeID == null) {
localeID = sdi.getLikelySubtags().get(parser.getLanguage());
if (localeID == null) {
throw new IllegalArgumentException(
"A likely subtag for " + parser.getLanguage() +
" is required to get its script.");
}
}
script = parser.set(localeID).getScript();
}
if (script.equals("Latn")) {
return false;
}
// Enforce checking of for other non-Latin scripts, for all currencies
// whose countries use that script, e.g. Russian should have Cyrillic
// currency symbols for modern currencies of countries with official
// languages whose script is Cyrillic (Bulgaria, Serbia, ...).
Set<String> currencies = getCurrenciesForScript(script);
return currencies != null && currencies.contains(currency);
}
private Set<String> getCurrenciesForScript(String script) {
if (scriptToCurrencies != null) return scriptToCurrencies.get(script);
// Get mapping of scripts to the territories that use that script in
// any of their primary languages.
Relation scriptToTerritories = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
for (String lang : sdi.getBasicLanguageDataLanguages()) {
BasicLanguageData langData = sdi.getBasicLanguageDataMap(lang).get(Type.primary);
if (langData == null) {
continue;
}
for (String curScript : langData.getScripts()) {
scriptToTerritories.putAll(curScript, langData.getTerritories());
}
}
// For each territory, get all of its legal tender currencies.
Date now = new Date(System.currentTimeMillis());
scriptToCurrencies = new Relation(new HashMap<String, Set<String>>(), HashSet.class);
for (Object curScript : scriptToTerritories.keySet()) {
Set<String> territories = scriptToTerritories.get(curScript);
Set<String> currencies = new HashSet<String>();
for (String territory : territories) {
Set<CurrencyDateInfo> currencyInfo = sdi.getCurrencyDateInfo(territory);
for (CurrencyDateInfo info : currencyInfo) {
if (info.isLegalTender() && info.getEnd().compareTo(now) > 0) {
currencies.add(info.getCurrency());
}
}
}
scriptToCurrencies.putAll(curScript, currencies);
}
return scriptToCurrencies.get(script);
}
/**
* Extracts just the text from a date field, replacing all the variable fields by variableReplacement. Return null
* if
* there is an error (a different test will find that error).
*/
public boolean extractDatePatternText(String value, String variableReplacement, StringBuilder justText) {
boolean haveText = false;
try {
formatParser.set(value);
} catch (Exception e) {
return false; // give up, it is illegal
}
boolean doReplacement = variableReplacement != null && variableReplacement.length() > 0;
justText.setLength(0);
for (Object item : formatParser.getItems()) {
if (item instanceof String) {
justText.append(item);
haveText = true;
} else {
if (doReplacement) {
justText.append(variableReplacement);
}
}
}
return haveText;
}
public boolean containsPart(String source, String... segments) {
for (int i = 0; i < segments.length; ++i) {
if (source.indexOf(segments[i]) > 0) {
return true;
}
}
return false;
}
static final String TEST = "؉";
private void addMissingMessage(UnicodeSet missing, CheckStatus.Type warningVsError, Subtype subtype,
Subtype subtypeAscii,
String qualifier, List<CheckStatus> result) {
String fixedMissing = prettyPrint.format(missing);
BitSet scripts = new BitSet();
for (String s : missing) {
final int script = UScript.getScript(s.codePointAt(0));
if (script == UScript.INHERITED || script == UScript.COMMON) {
continue;
}
scripts.set(script);
}
StringBuilder scriptString = new StringBuilder();
if (!scripts.isEmpty()) {
scriptString.append("{");
for (int i = scripts.nextSetBit(0); i >= 0; i = scripts.nextSetBit(i + 1)) {
if (scriptString.length() > 1) {
scriptString.append(", ");
}
scriptString.append(UScript.getName(i));
}
scriptString.append("}");
}
result
.add(new CheckStatus()
.setCause(this)
.setMainType(warningVsError)
.setSubtype(ASCII.containsAll(missing) ? subtypeAscii : subtype)
.setMessage(
"The characters \u200E{0}\u200E {1} {2}. "
+
"For what to do, see <i>Handling Warnings</i> in <a target='CLDR-ST-DOCS' href='http://cldr.org/translation/characters#TOC-Handing-Warnings'>Characters</a>.",
new Object[] { fixedMissing, scriptString, qualifier }));
}
static final Normalizer2 NFC = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE);
/**
* Return null if ok, otherwise UnicodeSet of bad characters
*
* @param exemplarSet
* @param value
* @return
*/
private UnicodeSet containsAllCountingParens(UnicodeSet exemplarSet, UnicodeSet exemplarSetPlusASCII, String value) {
UnicodeSet result = null;
if (exemplarSet.containsAll(value)) {
return result;
}
// Normalize
value = NFC.normalize(value);
// if we failed, then check that everything outside of () is ok.
// and everything inside parens is either ASCII or in the set
int lastPos = 0;
while (true) {
int start = START_PAREN.findIn(value, lastPos, false);
String outside = value.substring(lastPos, start);
result = addDisallowedItems(exemplarSet, outside, result);
if (start == value.length()) {
break; // all done
}
++start;
int end = END_PAREN.findIn(value, start, false);
// don't worry about mixed brackets
String inside = value.substring(start, end);
result = addDisallowedItems(exemplarSetPlusASCII, inside, result);
if (end == value.length()) {
break; // all done
}
lastPos = end + 1;
}
return result;
}
private UnicodeSet addDisallowedItems(UnicodeSet exemplarSet, String outside, UnicodeSet result) {
if (!exemplarSet.containsAll(outside)) {
if (result == null) {
result = new UnicodeSet();
}
result.addAll(new UnicodeSet().addAll(outside).removeAll(exemplarSet));
}
return result;
}
}