blob: 3c2ab1dfe3a8fe2fe10b6f11559b51376e890388 [file] [log] [blame]
package org.unicode.cldr.json;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonPrimitive;
import com.ibm.icu.number.IntegerWidth;
import com.ibm.icu.number.LocalizedNumberFormatter;
import com.ibm.icu.number.NumberFormatter;
import com.ibm.icu.number.Precision;
import com.ibm.icu.text.MessageFormat;
import com.ibm.icu.util.NoUnit;
import com.ibm.icu.util.ULocale;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.draft.ScriptMetadata;
import org.unicode.cldr.draft.ScriptMetadata.Info;
import org.unicode.cldr.tool.Option.Options;
import org.unicode.cldr.util.Annotations;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.DraftStatus;
import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CLDRTool;
import org.unicode.cldr.util.CLDRURLS;
import org.unicode.cldr.util.CalculatedCoverageLevels;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.CoverageInfo;
import org.unicode.cldr.util.DtdData;
import org.unicode.cldr.util.DtdType;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.FileCopier;
import org.unicode.cldr.util.GlossonymConstructor;
import org.unicode.cldr.util.Level;
import org.unicode.cldr.util.LocaleIDParser;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.Timer;
import org.unicode.cldr.util.XMLSource;
import org.unicode.cldr.util.XPathParts;
/**
* Utility methods to extract data from CLDR repository and export it in JSON format.
*
* @author shanjian / emmons
*/
@CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON")
public class Ldml2JsonConverter {
// Icons
private static final String DONE_ICON = "✅";
private static final String GEAR_ICON = "⚙️";
private static final String NONE_ICON = "∅";
private static final String PACKAGE_ICON = "📦";
private static final String SECTION_ICON = "📍";
private static final String TYPE_ICON = "📂";
private static final String WARN_ICON = "⚠️";
// File prefix
private static final String CLDR_PKG_PREFIX = "cldr-";
private static final String FULL_TIER_SUFFIX = "-full";
private static final String MODERN_TIER_SUFFIX = "-modern";
private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName());
enum RunType {
all, // number zero
main,
supplemental(false, false), // aka 'cldr-core'
segments,
rbnf(false, true),
annotations,
annotationsDerived,
bcp47(false, false);
private final boolean isTiered;
private final boolean hasLocales;
RunType() {
this.isTiered = true;
this.hasLocales = true;
}
RunType(boolean isTiered, boolean hasLocales) {
this.isTiered = isTiered;
this.hasLocales = hasLocales;
}
/**
* Is it split into modern/full?
*
* @return
*/
public boolean tiered() {
return isTiered;
}
/**
* Does it have locale IDs?
*
* @return
*/
public boolean locales() {
return hasLocales;
}
/**
* return the options as a pipe-delimited list
*
* @return
*/
public static String valueList() {
return String.join(
"|",
Lists.newArrayList(RunType.values()).stream()
.map(t -> t.name())
.toArray(String[]::new));
}
}
private static final StandardCodes sc = StandardCodes.make();
private Set<String> defaultContentLocales =
SupplementalDataInfo.getInstance().getDefaultContentLocales();
private Set<String> skippedDefaultContentLocales = new TreeSet<>();
private class AvailableLocales {
Set<String> modern = new TreeSet<>();
Set<String> full = new TreeSet<>();
}
private AvailableLocales avl = new AvailableLocales();
private Gson gson = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
private static final Options options =
new Options(
"Usage: LDML2JsonConverter [OPTIONS] [FILES]\n"
+ "This program converts CLDR data to the JSON format.\n"
+ "Please refer to the following options. \n"
+ "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy")
.add(
"bcp47",
'B',
"(true|false)",
"true",
"Whether to strictly use BCP47 tags in filenames and data. Defaults to true.")
.add(
"bcp47-no-subtags",
'T',
"(true|false)",
"true",
"In BCP47 mode, ignore locales with subtags such as en-US-u-va-posix. Defaults to true.")
.add(
"commondir",
'c',
".*",
CLDRPaths.COMMON_DIRECTORY,
"Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY")
.add(
"destdir",
'd',
".*",
CLDRPaths.GEN_DIRECTORY,
"Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY")
.add(
"match",
'm',
".*",
".*",
"Regular expression to define only specific locales or files to be generated")
.add(
"type",
't',
"(" + RunType.valueList() + ")",
"all",
"Type of CLDR data being generated, such as main, supplemental, or segments. All gets all.")
.add(
"resolved",
'r',
"(true|false)",
"false",
"Whether the output JSON for the main directory should be based on resolved or unresolved data")
.add(
"Redundant",
'R',
"(true|false)",
"false",
"Include redundant data from code-fallback and constructed")
.add(
"draftstatus",
's',
"(approved|contributed|provisional|unconfirmed)",
"unconfirmed",
"The minimum draft status of the output data")
.add(
"coverage",
'l',
"(minimal|basic|moderate|modern|comprehensive|optional)",
"optional",
"The maximum coverage level of the output data")
.add(
"packagelist",
'P',
"(true|false)",
"true",
"Whether to output PACKAGES.md and cldr-core/cldr-packages.json (during supplemental/cldr-core)")
.add(
"fullnumbers",
'n',
"(true|false)",
"false",
"Whether the output JSON should output data for all numbering systems, even those not used in the locale")
.add(
"other",
'o',
"(true|false)",
"false",
"Whether to write out the 'other' section, which contains any unmatched paths")
.add(
"packages",
'p',
"(true|false)",
"false",
"Whether to group data files into installable packages")
.add(
"identity",
'i',
"(true|false)",
"true",
"Whether to copy the identity info into all sections containing data")
.add("konfig", 'k', ".*", null, "LDML to JSON configuration file")
.add(
"pkgversion",
'V',
".*",
getDefaultVersion(),
"Version to be used in writing package files")
.add(
"Modern",
'M',
"(true|false)",
"true",
"Whether to include the -modern tier")
// Primarily useful for non-Maven build systems where CldrUtility.LICENSE may
// not be available as it is put in place by pom.xml
.add(
"license-file",
'L',
".*",
"",
"Override the license file included in the bundle");
public static void main(String[] args) throws Exception {
System.out.println(GEAR_ICON + " " + Ldml2JsonConverter.class.getName() + " options:");
options.parse(args, true);
Timer overallTimer = new Timer();
overallTimer.start();
final String rawType = options.get("type").getValue();
if (RunType.all.name().equals(rawType)) {
// Running all types
for (final RunType t : RunType.values()) {
if (t == RunType.all) continue;
System.out.println();
System.out.println(
TYPE_ICON + "####################### " + t + " #######################");
Timer subTimer = new Timer();
subTimer.start();
processType(t.name());
System.out.println(
TYPE_ICON + " " + t + "\tFinished in " + subTimer.toMeasureString());
System.out.println();
}
} else {
processType(rawType);
}
System.out.println(
"\n\n###\n\n"
+ DONE_ICON
+ " Finished everything in "
+ overallTimer.toMeasureString());
}
static void processType(final String runType) throws Exception {
Ldml2JsonConverter l2jc =
new Ldml2JsonConverter(
options.get("commondir").getValue(),
options.get("destdir").getValue(),
runType,
Boolean.parseBoolean(options.get("fullnumbers").getValue()),
Boolean.parseBoolean(options.get("resolved").getValue()),
options.get("coverage").getValue(),
options.get("match").getValue(),
Boolean.parseBoolean(options.get("packages").getValue()),
options.get("konfig").getValue(),
options.get("pkgversion").getValue(),
Boolean.parseBoolean(options.get("bcp47").getValue()),
Boolean.parseBoolean(options.get("bcp47-no-subtags").getValue()),
Boolean.parseBoolean(options.get("Modern").getValue()),
Boolean.parseBoolean(options.get("Redundant").getValue()),
Optional.ofNullable(options.get("license-file").getValue())
.filter(s -> !s.isEmpty()));
DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue());
l2jc.processDirectory(runType, status);
}
// The CLDR file directory where those official XML files will be found.
private String cldrCommonDir;
// Where the generated JSON files will be stored.
private String outputDir;
// Whether data in main should output all numbering systems, even those not in use in the
// locale.
private boolean fullNumbers;
// Whether data in main should be resolved for output.
private boolean resolve;
// Used to match specific locales for output
private String match;
// Used to filter based on coverage
private int coverageValue;
// Whether we should write output files into installable packages
private boolean writePackages;
// Type of run for this converter: main, supplemental, or segments
private final RunType type;
// include Redundant data such as apc="apc", en_US="en (US)"
private boolean includeRedundant;
static class JSONSection implements Comparable<JSONSection> {
public String section;
public Pattern pattern;
public String packageName;
@Override
public int compareTo(JSONSection other) {
return section.compareTo(other.section);
}
}
private Map<String, String> dependencies;
private List<JSONSection> sections;
private Set<String> packages;
private final String pkgVersion;
private final boolean strictBcp47;
private final boolean writeModernPackage;
private final Optional<String> licenseFile;
private final boolean skipBcp47LocalesWithSubtags;
private LdmlConfigFileReader configFileReader;
public Ldml2JsonConverter(
String cldrDir,
String outputDir,
String runType,
boolean fullNumbers,
boolean resolve,
String coverage,
String match,
boolean writePackages,
String configFile,
String pkgVersion,
boolean strictBcp47,
boolean skipBcp47LocalesWithSubtags,
boolean writeModernPackage,
boolean includeRedundant,
Optional<String> licenseFile) {
this.writeModernPackage = writeModernPackage;
this.strictBcp47 = strictBcp47;
this.skipBcp47LocalesWithSubtags = strictBcp47 && skipBcp47LocalesWithSubtags;
this.cldrCommonDir = cldrDir;
this.outputDir = outputDir;
try {
this.type = RunType.valueOf(runType);
} catch (IllegalArgumentException | NullPointerException e) {
throw new RuntimeException(
"runType (-t) invalid: " + runType + " must be one of " + RunType.valueList(),
e);
}
this.fullNumbers = fullNumbers;
this.resolve = resolve;
this.match = match;
this.writePackages = writePackages;
this.coverageValue = Level.get(coverage).getLevel();
this.pkgVersion = pkgVersion;
LdmlConvertRules.addVersionHandler(pkgVersion.split("\\.")[0]);
configFileReader = new LdmlConfigFileReader();
configFileReader.read(configFile, type);
this.dependencies = configFileReader.getDependencies();
this.sections = configFileReader.getSections();
this.packages = new TreeSet<>();
this.includeRedundant = includeRedundant;
this.licenseFile = licenseFile;
}
/**
* @see XPathParts#addInternal
*/
static final Pattern ANNOTATION_CP_REMAP =
PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$");
/**
* Transform the path by applying PATH_TRANSFORMATIONS rules.
*
* @param pathStr The path string being transformed.
* @return The transformed path.
*/
private String transformPath(final String pathStr, final String pathPrefix) {
String result = pathStr;
// handle annotation cp value
Matcher cpm = ANNOTATION_CP_REMAP.matcher(result);
if (cpm.matches()) {
// We need to avoid breaking the syntax not just of JSON, but of XPATH.
final String badCodepointRange = cpm.group(2);
StringBuilder sb = new StringBuilder(cpm.group(1)).append("[@cp=\"");
// JSON would handle a wide range of things if escaped, but XPATH will not.
if (badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) {
// forbid more than one U+ (because we will have to unescape it.)
throw new IllegalArgumentException(
"Need exactly one codepoint in the @cp string, but got "
+ badCodepointRange
+ " in xpath "
+ pathStr);
}
badCodepointRange
.codePoints()
.forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase()));
sb.append("\"]").append(cpm.group(3));
result = sb.toString();
}
logger.finest(" IN pathStr : " + result);
result = LdmlConvertRules.PathTransformSpec.applyAll(result);
result = result.replaceFirst("/ldml/", pathPrefix);
result = result.replaceFirst("/supplementalData/", pathPrefix);
if (result.startsWith("//cldr/supplemental/references/reference")) {
// no change
} else if (strictBcp47) {
// Look for something like <!--@MATCH:set/validity/locale--> in DTD
if (result.contains("localeDisplayNames/languages/language")) {
if (result.contains("type=\"root\"")) {
// This is strictBcp47
// Drop translation for 'root' as it conflicts with 'und'
return ""; // 'drop this path'
}
result = fixXpathBcp47(result, "language", "type");
} else if (result.contains("likelySubtags/likelySubtag")) {
if (!result.contains("\"iw\"")
&& !result.contains("\"in\"")
&& !result.contains("\"ji\"")) {
// Special case: preserve 'iw' and 'in' likely subtags
result = fixXpathBcp47(result, "likelySubtag", "from", "to");
} else {
result = underscoreToHypen(result);
logger.warning("Including aliased likelySubtags: " + result);
}
} else if (result.startsWith("//cldr/supplemental/weekData/weekOfPreference")) {
result = fixXpathBcp47(result, "weekOfPreference", "locales");
} else if (result.startsWith("//cldr/supplemental/metadata/defaultContent")) {
result = fixXpathBcp47(result, "defaultContent", "locales");
} else if (result.startsWith("//cldr/supplemental/grammatical")
&& result.contains("Data/grammaticalFeatures")) {
result = fixXpathBcp47(result, "grammaticalFeatures", "locales");
} else if (result.startsWith("//cldr/supplemental/grammatical")
&& result.contains("Data/grammaticalDerivations")) {
result = fixXpathBcp47(result, "grammaticalDerivations", "locales");
} else if (result.startsWith("//cldr/supplemental/dayPeriodRuleSet")) {
result = fixXpathBcp47(result, "dayPeriodRules", "locales");
} else if (result.startsWith("//cldr/supplemental/plurals")) {
result = fixXpathBcp47(result, "pluralRules", "locales");
} else if (result.startsWith("//cldr/supplemental/timeData/hours")) {
result = fixXpathBcp47MishMash(result, "hours", "regions");
} else if (result.startsWith("//cldr/supplemental/parentLocales/parentLocale")) {
result = fixXpathBcp47(result, "parentLocale", "parent", "locales");
} else if (result.startsWith(
"//cldr/supplemental/territoryInfo/territory/languagePopulation")) {
result = fixXpathBcp47(result, "languagePopulation", "type");
} else if (result.contains("languages")
|| result.contains("languageAlias")
|| result.contains("languageMatches")
|| result.contains("likelySubtags")
|| result.contains("parentLocale")
|| result.contains("locales=")) {
final String oldResult = result;
result = underscoreToHypen(result);
if (!oldResult.equals(result)) {
logger.fine(oldResult + " => " + result);
}
}
} else if (result.contains("languages")
|| result.contains("languageAlias")
|| result.contains("languageMatches")
|| result.contains("likelySubtags")
|| result.contains("parentLocale")
|| result.contains("locales=")) {
// old behavior: just munge paths..
result = underscoreToHypen(result);
}
logger.finest("OUT pathStr : " + result);
logger.finest("result: " + result);
return result;
}
/** Read all paths in the file, and assign each to a JSONSection. Return the map. */
private Map<JSONSection, List<CldrItem>> mapPathsToSections(
AtomicInteger readCount,
int totalCount,
CLDRFile file,
String pathPrefix,
SupplementalDataInfo sdi)
throws IOException, ParseException {
final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>();
String locID = file.getLocaleID();
Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher("");
Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher("");
Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher("");
Set<String> activeNumberingSystems = new TreeSet<>();
activeNumberingSystems.add("latn"); // Always include latin script numbers
for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) {
String ns = file.getWinningValue(np);
if (ns != null && ns.length() > 0) {
activeNumberingSystems.add(ns);
}
}
final DtdType fileDtdType = file.getDtdType();
CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
// read paths in DTD order. The order is critical for JSON processing.
final CLDRFile.Status status = new CLDRFile.Status();
for (Iterator<String> it =
file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null));
it.hasNext(); ) {
int cv = Level.UNDETERMINED.getLevel();
final String path = it.next();
// Check for code-fallback and constructed first, even before fullpath and value
final String localeWhereFound = file.getSourceLocaleID(path, status);
if (!includeRedundant
&& (localeWhereFound.equals(XMLSource.CODE_FALLBACK_ID)
|| // language[@type="apc"] = apc : missing
status.pathWhereFound.equals(
GlossonymConstructor
.PSEUDO_PATH))) { // language[@type="fa_AF"] = fa (AF)
// or Farsi (Afghanistan) : missing
// Don't include these paths.
continue;
}
// now get the fullpath and value
String fullPath = file.getFullXPath(path);
String value = file.getWinningValue(path);
if (fullPath == null) {
fullPath = path;
}
if (!CLDRFile.isSupplementalName(locID)
&& path.startsWith("//ldml/")
&& !path.contains("/identity")) {
cv = covInfo.getCoverageValue(path, locID);
}
if (cv > coverageValue) {
continue;
}
// Discard root identity element unless the locale is root
rootIdentityMatcher.reset(fullPath);
if (rootIdentityMatcher.matches() && !"root".equals(locID)) {
continue;
}
// automatically filter out number symbols and formats without a numbering system
noNumberingSystemMatcher.reset(fullPath);
if (noNumberingSystemMatcher.matches()) {
continue;
}
// Filter out non-active numbering systems data unless fullNumbers is specified.
numberingSystemMatcher.reset(fullPath);
if (numberingSystemMatcher.matches() && !fullNumbers) {
XPathParts xpp = XPathParts.getFrozenInstance(fullPath);
String currentNS = xpp.getAttributeValue(2, "numberSystem");
if (currentNS != null && !activeNumberingSystems.contains(currentNS)) {
continue;
}
}
// Handle the no inheritance marker.
if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) {
continue;
}
// discard draft before transforming
final String pathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(path).replaceAll("");
final String fullPathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(fullPath).replaceAll("");
final String pathNoXmlSpace =
CLDRFile.XML_SPACE_PATTERN.matcher(pathNoDraft).replaceAll("");
final String fullPathNoXmlSpace =
CLDRFile.XML_SPACE_PATTERN.matcher(fullPathNoDraft).replaceAll("");
final String transformedPath = transformPath(pathNoXmlSpace, pathPrefix);
final String transformedFullPath = transformPath(fullPathNoXmlSpace, pathPrefix);
if (transformedPath.isEmpty()) {
continue; // skip this path
}
for (JSONSection js :
sections) { // TODO: move to subfunction, error if >1 section matches
if (js.pattern.matcher(transformedPath).matches()) {
CldrItem item =
new CldrItem(
transformedPath, transformedFullPath, path, fullPath, value);
List<CldrItem> cldrItems = sectionItems.get(js);
if (cldrItems == null) {
cldrItems = new ArrayList<>();
}
cldrItems.add(item);
sectionItems.put(js, cldrItems);
break;
}
}
}
// TODO: move matcher out of inner loop
final Matcher versionInfoMatcher = VERSION_INFO_PATTERN.matcher("");
// Automatically copy the version info to any sections that had real data in them.
JSONSection otherSection = sections.get(sections.size() - 1);
List<CldrItem> others = sectionItems.get(otherSection);
if (others == null) {
return sectionItems;
}
List<CldrItem> otherSectionItems = new ArrayList<>(others);
int addedItemCount = 0;
boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue());
for (CldrItem item : otherSectionItems) {
String thisPath = item.getPath();
versionInfoMatcher.reset(thisPath);
if (versionInfoMatcher.matches()) {
for (JSONSection js : sections) {
if (sectionItems.get(js) != null
&& !js.section.equals("other")
&& copyIdentityInfo) {
List<CldrItem> hit = sectionItems.get(js);
hit.add(addedItemCount, item);
sectionItems.put(js, hit);
}
if (js.section.equals("other")) { // did not match one of the regular sections
List<CldrItem> hit = sectionItems.get(js);
hit.remove(item);
sectionItems.put(js, hit);
}
}
addedItemCount++;
}
}
return sectionItems;
}
static final Pattern VERSION_INFO_PATTERN = PatternCache.get(".*/(identity|version).*");
static final Pattern HAS_SUBTAG = PatternCache.get(".*-[a-z]-.*");
/**
* Convert CLDR's XML data to JSON format.
*
* @param file CLDRFile object.
* @param outFilename The file name used to save JSON data.
* @throws IOException
* @throws ParseException
* @return total items written in all files. (if 0, file had no effect)
*/
private int convertCldrItems(
AtomicInteger readCount,
int totalCount,
String dirName,
String filename,
String pathPrefix,
final Map<JSONSection, List<CldrItem>> sectionItems)
throws IOException, ParseException {
// zone and timezone items are queued for sorting first before they are
// processed.
final String filenameAsLangTag = unicodeLocaleToString(filename);
if (skipBcp47LocalesWithSubtags
&& type.locales()
&& HAS_SUBTAG.matcher(filenameAsLangTag).matches()) {
// Has a subtag, so skip it.
// It will show up in the "no output" list.
return 0;
}
int totalItemsInFile = 0;
List<Pair<String, Integer>> outputProgress = new LinkedList<>();
for (JSONSection js : sections) {
if (js.section.equals("IGNORE")) {
continue;
}
String outFilename;
if (type == RunType.rbnf) {
outFilename = filenameAsLangTag + ".json";
} else if (type == RunType.bcp47) {
outFilename = filename + ".json";
} else if (js.section.equals("other")) {
// If you see other-___.json, it means items that were missing from
// JSON_config_*.txt
outFilename = js.section + "-" + filename + ".json"; // Use original filename
} else {
outFilename = js.section + ".json";
}
String tier = "";
boolean writeOther = Boolean.parseBoolean(options.get("other").getValue());
if (js.section.equals("other") && !writeOther) {
continue;
} else {
StringBuilder outputDirname = new StringBuilder(outputDir);
if (writePackages) {
if (type.tiered()) {
LocaleIDParser lp = new LocaleIDParser();
lp.set(filename);
if (defaultContentLocales.contains(filename)
&& lp.getRegion().length() > 0) {
if (type == RunType.main) {
skippedDefaultContentLocales.add(filenameAsLangTag);
}
continue;
}
final boolean isModernTier = localeIsModernTier(filename);
if (isModernTier && writeModernPackage) {
tier = MODERN_TIER_SUFFIX;
if (type == RunType.main) {
avl.modern.add(filenameAsLangTag);
}
} else {
tier = FULL_TIER_SUFFIX;
}
if (type == RunType.main) {
avl.full.add(filenameAsLangTag);
}
} else if (type == RunType.rbnf) {
js.packageName = "rbnf";
tier = "";
} else if (type == RunType.bcp47) {
js.packageName = "bcp47";
tier = "";
}
if (js.packageName != null) {
String packageName = CLDR_PKG_PREFIX + js.packageName + tier;
outputDirname.append("/" + packageName);
packages.add(packageName);
}
outputDirname.append("/" + dirName + "/");
if (type.tiered()) {
outputDirname.append(filenameAsLangTag);
}
logger.fine("outDir: " + outputDirname);
logger.fine("pack: " + js.packageName);
logger.fine("dir: " + dirName);
} else {
outputDirname.append("/" + filename);
}
assert (tier.isEmpty() == !type.tiered());
List<String> outputDirs = new ArrayList<>();
outputDirs.add(outputDirname.toString());
if (writePackages && tier.equals(MODERN_TIER_SUFFIX) && js.packageName != null) {
// if it is in 'modern', add it to 'full' and core also.
outputDirs.add(
outputDirname
.toString()
.replaceFirst(MODERN_TIER_SUFFIX, FULL_TIER_SUFFIX));
// Also need to make sure that the full and core package is added
packages.add(CLDR_PKG_PREFIX + js.packageName + FULL_TIER_SUFFIX);
}
for (String outputDir : outputDirs) {
List<CldrItem> theItems = sectionItems.get(js);
if (theItems == null || theItems.size() == 0) {
logger.fine(
() ->
">"
+ progressPrefix(readCount, totalCount)
+ outputDir
+ " - no items to write in "
+ js.section); // mostly noise
continue;
}
logger.fine(
() ->
("?"
+ progressPrefix(
readCount, totalCount, filename, js.section)
+ " - "
+ theItems.size()
+ " item(s)"
+ "\r"));
// Create the output dir if it doesn't exist
File dir = new File(outputDir.toString());
if (!dir.exists()) {
dir.mkdirs();
}
JsonObject out = new JsonObject(); // root object for writing
ArrayList<CldrItem> sortingItems = new ArrayList<>();
ArrayList<CldrItem> arrayItems = new ArrayList<>();
ArrayList<CldrNode> nodesForLastItem = new ArrayList<>();
String lastLeadingArrayItemPath = null;
String leadingArrayItemPath = "";
int valueCount = 0;
String previousIdentityPath = null;
for (CldrItem item : theItems) {
if (item.getPath().isEmpty()) {
throw new IllegalArgumentException(
"empty xpath in "
+ filename
+ " section "
+ js.packageName
+ "/"
+ js.section);
}
if (type == RunType.rbnf) {
item.adjustRbnfPath();
}
// items in the identity section of a file should only ever contain the
// lowest level, even if using
// resolving source, so if we have duplicates ( caused by attributes used as
// a value ) then suppress
// them here.
if (item.getPath().contains("/identity/")) {
String[] parts = item.getPath().split("\\[");
if (parts[0].equals(previousIdentityPath)) {
continue;
} else {
XPathParts xpp = XPathParts.getFrozenInstance(item.getPath());
String territory = xpp.findAttributeValue("territory", "type");
LocaleIDParser lp = new LocaleIDParser().set(filename);
if (territory != null
&& territory.length() > 0
&& !territory.equals(lp.getRegion())) {
continue;
}
previousIdentityPath = parts[0];
}
}
// some items need to be split to multiple item before processing. None
// of those items need to be sorted.
// Applies to SPLITTABLE_ATTRS attributes.
CldrItem[] items = item.split();
if (items == null) {
// Nothing to split. Make it a 1-element array.
items = new CldrItem[1];
items[0] = item;
}
valueCount += items.length;
// Hard code this part.
if (item.getUntransformedPath().contains("unitPreference")) {
// Need to do more transforms on this one, so just output version/etc
// here.
continue;
}
for (CldrItem newItem : items) {
// alias will be dropped in conversion, don't count it.
if (newItem.isAliasItem()) {
valueCount--;
}
// Items like zone items need to be sorted first before write them out.
if (newItem.needsSort()) {
resolveArrayItems(out, nodesForLastItem, arrayItems);
sortingItems.add(newItem);
} else {
Matcher matcher =
LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(
newItem.getPath());
if (matcher.matches()) {
resolveSortingItems(out, nodesForLastItem, sortingItems);
leadingArrayItemPath = matcher.group(1);
if (lastLeadingArrayItemPath != null
&& !lastLeadingArrayItemPath.equals(
leadingArrayItemPath)) {
resolveArrayItems(out, nodesForLastItem, arrayItems);
}
lastLeadingArrayItemPath = leadingArrayItemPath;
arrayItems.add(newItem);
} else {
// output a single item
resolveSortingItems(out, nodesForLastItem, sortingItems);
resolveArrayItems(out, nodesForLastItem, arrayItems);
outputCldrItem(out, nodesForLastItem, newItem);
lastLeadingArrayItemPath = "";
}
}
}
}
resolveSortingItems(out, nodesForLastItem, sortingItems);
resolveArrayItems(out, nodesForLastItem, arrayItems);
if (js.section.contains("unitPreferenceData")) {
outputUnitPreferenceData(js, theItems, out, nodesForLastItem);
}
// closeNodes(out, nodesForLastItem.size() - 2, 0);
// write JSON
try (PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename)) {
outf.println(gson.toJson(out));
}
String outPath =
new File(outputDir.substring(this.outputDir.length()), outFilename)
.getPath();
outputProgress.add(
Pair.of(String.format("%20s %s", js.section, outPath), valueCount));
logger.fine(
">"
+ progressPrefix(readCount, totalCount, filename, js.section)
+ String.format("…%s (%d values)", outPath, valueCount));
totalItemsInFile += valueCount;
}
}
} // this is the only normal output with debug off
StringBuilder outStr = new StringBuilder();
if (!outputProgress.isEmpty()) {
// Put these first, so the percent is at the end.
for (final Pair<String, Integer> outputItem : outputProgress) {
outStr.append(
String.format("\t%6d %s\n", outputItem.getSecond(), outputItem.getFirst()));
}
outStr.append(
String.format(
"%s%-12s\t %s\n",
progressPrefix(readCount, totalCount),
filename,
valueSectionsFormat(totalItemsInFile, outputProgress.size())));
} else {
outStr.append(
String.format(
"%s%-12s\t" + NONE_ICON + " (no output)\n",
progressPrefix(readCount, totalCount),
filename));
}
synchronized (readCount) { // to prevent interleaved output
System.out.print(outStr);
}
return totalItemsInFile;
}
private static String valueSectionsFormat(int values, int sections) {
return MessageFormat.format(
"({0, plural, one {# value} other {# values}} in {1, plural, one {# section} other {# sections}})",
values,
sections);
}
private boolean localeIsModernTier(String filename) {
Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename);
if (lev == null) return false;
return lev.isAtLeast(Level.MODERN);
}
private boolean localeIsBasicTier(String filename) {
Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename);
if (lev == null) return false;
return lev.isAtLeast(Level.BASIC);
}
/**
* Entire xpaths and random short strings are passed through this function. Not really Locale ID
* to Language Tag.
*
* @param filename
* @return
*/
private String underscoreToHypen(String filename) {
return filename.replaceAll("_", "-");
}
/**
* Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or
* data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia)
*
* @param locale
* @return
*/
private final String unicodeLocaleToString(String locale) {
if (strictBcp47) {
return CLDRLocale.toLanguageTag(locale);
} else {
return underscoreToHypen(locale);
}
}
Pattern IS_REGION_CODE = PatternCache.get("([A-Z][A-Z])|([0-9][0-9][0-9])");
/**
* Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or
* data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia)
* Differs from unicodeLocaleToString in that it will preserve all uppercase region ids
*
* @param locale
* @return
*/
private final String unicodeLocaleMishMashToString(String locale) {
if (strictBcp47) {
if (IS_REGION_CODE.matcher(locale).matches()) {
return locale;
} else {
return CLDRLocale.toLanguageTag(locale);
}
} else {
return underscoreToHypen(locale);
}
}
/**
* Fixup a path to be BCP47 compliant
*
* @param path XPath (usually ends in elementName, but not necessarily)
* @param elementName element to fixup
* @param attributeNames list of attributes to fix
* @return new path
*/
final String fixXpathBcp47(final String path, String elementName, String... attributeNames) {
final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
for (final String attributeName : attributeNames) {
final String oldValue = xpp.findAttributeValue(elementName, attributeName);
if (oldValue == null) continue;
final String oldValues[] = oldValue.split(" ");
String newValue =
Arrays.stream(oldValues)
.map((String s) -> unicodeLocaleToString(s))
.collect(Collectors.joining(" "));
if (!oldValue.equals(newValue)) {
xpp.setAttribute(elementName, attributeName, newValue);
logger.finest(attributeName + " = " + oldValue + " -> " + newValue);
}
}
return xpp.toString();
}
/**
* Fixup a path to be BCP47 compliant …but support a mishmash of regions and locale ids
* CLDR-15069
*
* @param path XPath (usually ends in elementName, but not necessarily)
* @param elementName element to fixup
* @param attributeNames list of attributes to fix
* @return new path
*/
final String fixXpathBcp47MishMash(
final String path, String elementName, String... attributeNames) {
final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
for (final String attributeName : attributeNames) {
final String oldValue = xpp.findAttributeValue(elementName, attributeName);
if (oldValue == null) continue;
final String oldValues[] = oldValue.split(" ");
String newValue =
Arrays.stream(oldValues)
.map((String s) -> unicodeLocaleMishMashToString(s))
.collect(Collectors.joining(" "));
if (!oldValue.equals(newValue)) {
xpp.setAttribute(elementName, attributeName, newValue);
logger.finest(attributeName + " = " + oldValue + " -> " + newValue);
}
}
return xpp.toString();
}
private void outputUnitPreferenceData(
JSONSection js,
List<CldrItem> theItems,
JsonObject out,
ArrayList<CldrNode> nodesForLastItem)
throws ParseException, IOException {
// handle these specially.
// redo earlier loop somewhat.
CldrNode supplementalNode = CldrNode.createNode("cldr", "supplemental", "supplemental");
JsonElement supplementalObject = startNonleafNode(out, supplementalNode);
CldrNode unitPrefNode = CldrNode.createNode("supplemental", js.section, js.section);
final JsonElement o = startNonleafNode(supplementalObject, unitPrefNode);
// We'll directly write to 'out'
// Unit preference sorting is a bit more complicated, so we're going to use the CldrItems,
// but collect the results more directly.
Map<Pair<String, String>, Map<String, List<CldrItem>>> catUsagetoRegionItems =
new TreeMap<>();
for (CldrItem item : theItems) {
if (!item.getUntransformedPath().contains("unitPref")) {
continue;
}
CldrItem[] items = item.split();
if (items == null) {
throw new IllegalArgumentException("expected unit pref to split: " + item);
}
for (final CldrItem subItem : items) {
// step 1: make sure the category/usage is there
final XPathParts xpp = XPathParts.getFrozenInstance(subItem.getPath());
final String category = xpp.findFirstAttributeValue("category");
final String usage = xpp.findFirstAttributeValue("usage");
final String region =
xpp.findFirstAttributeValue("regions"); // actually one region (split)
Pair<String, String> key = Pair.of(category, usage);
Map<String, List<CldrItem>> regionMap =
catUsagetoRegionItems.computeIfAbsent(key, ignored -> new TreeMap<>());
List<CldrItem> perRegion =
regionMap.computeIfAbsent(region, ignored -> new ArrayList<>());
perRegion.add(subItem);
}
}
// OK, now start outputting
// Traverse categories/usage/regions
// unitPreferenceData is already open {
catUsagetoRegionItems.keySet().stream()
.map(p -> p.getFirst())
.distinct() // for each category
.forEach(
category -> {
JsonObject oo = new JsonObject();
o.getAsJsonObject().add(category, oo);
catUsagetoRegionItems.entrySet().stream()
.filter(p -> p.getKey().getFirst().equals(category))
.forEach(
ent -> {
final String usage = ent.getKey().getSecond();
JsonObject ooo = new JsonObject();
oo.getAsJsonObject().add(usage, ooo);
ent.getValue()
.forEach(
(region, list) -> {
JsonArray array =
new JsonArray();
ooo.getAsJsonObject()
.add(region, array);
list.forEach(
item -> {
final XPathParts
xpp =
XPathParts
.getFrozenInstance(
item
.getPath());
JsonObject u =
new JsonObject();
array.add(u);
u.addProperty(
"unit",
item
.getValue());
if (xpp
.containsAttribute(
"geq")) {
u.addProperty(
"geq",
Double
.parseDouble(
xpp
.findFirstAttributeValue(
"geq")));
}
});
});
});
});
// Computer, switch to 'automatic' navigation
// We'll let closeNodes take over.
nodesForLastItem.add(unitPrefNode); // unitPreferenceData }
}
/**
* Creates the packaging files ( i.e. package.json ) for a particular package
*
* @param packageName The name of the installable package
*/
public void writePackagingFiles(String outputDir, String packageName) throws IOException {
File dir = new File(outputDir.toString());
if (!dir.exists()) {
dir.mkdirs();
}
writePackageJson(outputDir, packageName);
writeBowerJson(outputDir, packageName);
writeReadme(outputDir, packageName);
}
/** Write the ## License section */
public void writeCopyrightSection(PrintWriter out) {
out.println(
CldrUtility.getCopyrightMarkdown()
+ "\n"
+ "A copy of the license is included as [LICENSE](./LICENSE).");
}
/**
* Write the readme fragment from cldr-json-readme.md plus the copyright
*
* @param outf
* @throws IOException
*/
private void writeReadmeSection(PrintWriter outf) throws IOException {
FileCopier.copy(CldrUtility.getUTF8Data("cldr-json-readme.md"), outf);
outf.println();
writeCopyrightSection(outf);
}
public void writeReadme(String outputDir, String packageName) throws IOException {
final String basePackageName = getBasePackageName(packageName);
try (PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "README.md"); ) {
outf.println("# " + packageName);
outf.println();
outf.println(configFileReader.getPackageDescriptions().get(basePackageName));
outf.println();
if (packageName.endsWith(FULL_TIER_SUFFIX)) {
outf.println("This package contains all locales.");
outf.println();
} else if (packageName.endsWith(MODERN_TIER_SUFFIX)) {
outf.println(
"**Deprecated** This package contains only the set of locales listed as modern coverage. Use `"
+ CLDR_PKG_PREFIX
+ basePackageName
+ FULL_TIER_SUFFIX
+ "` and locale coverage data instead. The -modern packages are scheduled to be removed in v46, see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465).");
outf.println();
}
outf.println();
outf.println(getNpmBadge(packageName));
outf.println();
writeReadmeSection(outf);
}
try (PrintWriter outf =
FileUtilities.openUTF8Writer(
outputDir + "/" + packageName, CldrUtility.LICENSE); ) {
if (licenseFile.isPresent()) {
try (BufferedReader br = FileUtilities.openUTF8Reader("", licenseFile.get()); ) {
FileCopier.copy(br, outf);
}
} else {
FileCopier.copy(CldrUtility.getUTF8Data(CldrUtility.LICENSE), outf);
}
}
}
String getBasePackageName(final String packageName) {
String basePackageName = packageName;
if (basePackageName.startsWith(CLDR_PKG_PREFIX)) {
basePackageName = basePackageName.substring(CLDR_PKG_PREFIX.length());
}
if (basePackageName.endsWith(FULL_TIER_SUFFIX)) {
basePackageName =
basePackageName.substring(
0, basePackageName.length() - FULL_TIER_SUFFIX.length());
} else if (basePackageName.endsWith(MODERN_TIER_SUFFIX)) {
basePackageName =
basePackageName.substring(
0, basePackageName.length() - MODERN_TIER_SUFFIX.length());
}
return basePackageName;
}
public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) {
obj.addProperty("name", packageName);
obj.addProperty("version", pkgVersion);
String[] packageNameParts = packageName.split("-");
String dependency = dependencies.get(packageNameParts[1]);
if (dependency != null) {
String[] dependentPackageNames = new String[1];
String tier = packageNameParts[packageNameParts.length - 1];
if (dependency.equals("core") || dependency.equals("bcp47")) {
dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency;
} else {
dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency + "-" + tier;
}
JsonObject dependencies = new JsonObject();
for (String dependentPackageName : dependentPackageNames) {
if (dependentPackageName != null) {
dependencies.addProperty(dependentPackageName, pkgVersion);
}
}
obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies);
}
}
/**
* Default for version string
*
* @return
*/
private static String getDefaultVersion() {
String versionString = CLDRFile.GEN_VERSION;
while (versionString.split("\\.").length < 3) {
versionString = versionString + ".0";
}
return versionString;
}
public void writePackageJson(String outputDir, String packageName) throws IOException {
PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json");
logger.fine(
PACKAGE_ICON
+ " Creating packaging file => "
+ outputDir
+ File.separator
+ packageName
+ File.separator
+ "package.json");
JsonObject obj = new JsonObject();
writeBasicInfo(obj, packageName, true);
JsonArray maintainers = new JsonArray();
JsonObject primaryMaintainer = new JsonObject();
JsonObject secondaryMaintainer = new JsonObject();
final String basePackageName = getBasePackageName(packageName);
String description = configFileReader.getPackageDescriptions().get(basePackageName);
if (packageName.endsWith(MODERN_TIER_SUFFIX)) {
description = description + " (modern only: deprecated)";
}
obj.addProperty("description", description);
obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE);
obj.addProperty("author", CLDRURLS.UNICODE_CONSORTIUM);
primaryMaintainer.addProperty("name", "Steven R. Loomis");
primaryMaintainer.addProperty("email", "srloomis@unicode.org");
maintainers.add(primaryMaintainer);
secondaryMaintainer.addProperty("name", "John Emmons");
secondaryMaintainer.addProperty("email", "emmo@us.ibm.com");
secondaryMaintainer.addProperty("url", "https://github.com/JCEmmons");
maintainers.add(secondaryMaintainer);
obj.add("maintainers", maintainers);
JsonObject repository = new JsonObject();
repository.addProperty("type", "git");
repository.addProperty("url", "git://github.com/unicode-cldr/cldr-json.git");
obj.add("repository", repository);
obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL);
final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
obj.addProperty("cldrVersion", sdi.getCldrVersionString());
obj.addProperty("unicodeVersion", sdi.getUnicodeVersionString());
outf.println(gson.toJson(obj));
outf.close();
}
public void writeBowerJson(String outputDir, String packageName) throws IOException {
PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json");
logger.fine(
PACKAGE_ICON
+ " Creating packaging file => "
+ outputDir
+ File.separator
+ packageName
+ File.separator
+ "bower.json");
JsonObject obj = new JsonObject();
writeBasicInfo(obj, packageName, false);
if (type == RunType.supplemental) {
JsonArray mainPaths = new JsonArray();
mainPaths.add(new JsonPrimitive("availableLocales.json"));
mainPaths.add(new JsonPrimitive("defaultContent.json")); // Handled specially
mainPaths.add(new JsonPrimitive("scriptMetadata.json"));
mainPaths.add(new JsonPrimitive(type.toString() + "/*.json"));
obj.add("main", mainPaths);
} else if (type == RunType.rbnf) {
obj.addProperty("main", type.toString() + "/*.json");
} else {
obj.addProperty("main", type.toString() + "/**/*.json");
}
JsonArray ignorePaths = new JsonArray();
ignorePaths.add(new JsonPrimitive(".gitattributes"));
ignorePaths.add(new JsonPrimitive("README.md"));
obj.add("ignore", ignorePaths);
obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
outf.println(gson.toJson(obj));
outf.close();
}
public void writeDefaultContent(String outputDir) throws IOException {
PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json");
System.out.println(
PACKAGE_ICON
+ " Creating packaging file => "
+ outputDir
+ "/cldr-core"
+ File.separator
+ "defaultContent.json");
JsonObject obj = new JsonObject();
obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales));
outf.println(gson.toJson(obj));
outf.close();
}
public void writeCoverageLevels(String outputDir) throws IOException {
try (PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "coverageLevels.json"); ) {
final Map<String, String> covlocs = new TreeMap<>();
System.out.println(
PACKAGE_ICON
+ " Creating packaging file => "
+ outputDir
+ "/cldr-core"
+ File.separator
+ "coverageLevels.json from coverageLevels.txt");
CalculatedCoverageLevels ccl = CalculatedCoverageLevels.getInstance();
for (final Map.Entry<String, org.unicode.cldr.util.Level> e :
ccl.getLevels().entrySet()) {
final String uloc = e.getKey();
final String level = e.getValue().name().toLowerCase();
final String bcp47loc = unicodeLocaleToString(uloc);
if (covlocs.put(bcp47loc, level) != null) {
throw new IllegalArgumentException(
"coverageLevels.txt: duplicate locale " + bcp47loc);
}
}
final Map<String, String> effectiveCovlocs = new TreeMap<>();
avl.full.forEach(
loc -> {
final String uloc = ULocale.forLanguageTag(loc).toString();
final Level lev = ccl.getEffectiveCoverageLevel(uloc);
if (lev != null) {
effectiveCovlocs.put(loc, lev.name().toLowerCase());
}
});
JsonObject obj = new JsonObject();
// exactly what is in CLDR .txt file
obj.add("coverageLevels", gson.toJsonTree(covlocs));
// resolved, including all available locales
obj.add("effectiveCoverageLevels", gson.toJsonTree(effectiveCovlocs));
outf.println(gson.toJson(obj));
}
}
public void writeAvailableLocales(String outputDir) throws IOException {
PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json");
System.out.println(
PACKAGE_ICON
+ " Creating packaging file => "
+ outputDir
+ "/cldr-core"
+ File.separator
+ "availableLocales.json");
JsonObject obj = new JsonObject();
obj.add("availableLocales", gson.toJsonTree(avl));
outf.println(gson.toJson(obj));
outf.close();
}
public void writeScriptMetadata(String outputDir) throws IOException {
PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json");
System.out.println(
"Creating script metadata file => "
+ outputDir
+ File.separator
+ "cldr-core"
+ File.separator
+ "scriptMetadata.json");
Map<String, Info> scriptInfo = new TreeMap<>();
for (String script : ScriptMetadata.getScripts()) {
Info i = ScriptMetadata.getInfo(script);
scriptInfo.put(script, i);
}
if (ScriptMetadata.errors.size() > 0) {
System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors));
// throw new IllegalArgumentException();
}
JsonObject obj = new JsonObject();
obj.add("scriptMetadata", gson.toJsonTree(scriptInfo));
outf.println(gson.toJson(obj));
outf.close();
}
public void writePackageList(String outputDir) throws IOException {
PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "cldr-packages.json");
System.out.println(
PACKAGE_ICON
+ " Creating packaging metadata file => "
+ outputDir
+ File.separator
+ "cldr-core"
+ File.separator
+ "cldr-packages.json and PACKAGES.md");
PrintWriter pkgs = FileUtilities.openUTF8Writer(outputDir + "/..", "PACKAGES.md");
pkgs.println("# CLDR JSON Packages");
pkgs.println();
LdmlConfigFileReader uberReader = new LdmlConfigFileReader();
for (RunType r : RunType.values()) {
if (r == RunType.all) continue;
uberReader.read(null, r);
}
TreeMap<String, String> pkgsToDesc = new TreeMap<>();
JsonObject obj = new JsonObject();
obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL);
obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE);
obj.addProperty("version", pkgVersion);
JsonArray packages = new JsonArray();
for (Map.Entry<String, String> e : uberReader.getPackageDescriptions().entrySet()) {
final String baseName = e.getKey();
if (baseName.equals("IGNORE") || baseName.equals("cal")) continue;
if (baseName.equals("core") || baseName.equals("rbnf") || baseName.equals("bcp47")) {
JsonObject packageEntry = new JsonObject();
packageEntry.addProperty("description", e.getValue());
packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName);
packages.add(packageEntry);
pkgsToDesc.put(
packageEntry.get("name").getAsString(),
packageEntry.get("description").getAsString());
} else {
{
JsonObject packageEntry = new JsonObject();
packageEntry.addProperty("description", e.getValue());
packageEntry.addProperty("tier", "full");
packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName + FULL_TIER_SUFFIX);
packages.add(packageEntry);
pkgsToDesc.put(
packageEntry.get("name").getAsString(),
packageEntry.get("description").getAsString());
}
{
JsonObject packageEntry = new JsonObject();
packageEntry.addProperty("description", e.getValue() + " modern (deprecated)");
packageEntry.addProperty("tier", "modern");
packageEntry.addProperty(
"name", CLDR_PKG_PREFIX + baseName + MODERN_TIER_SUFFIX);
packages.add(packageEntry);
pkgsToDesc.put(
packageEntry.get("name").getAsString(),
packageEntry.get("description").getAsString());
}
}
}
pkgs.println();
for (Map.Entry<String, String> e : pkgsToDesc.entrySet()) {
pkgs.println("### [" + e.getKey() + "](./cldr-json/" + e.getKey() + "/)");
pkgs.println();
if (e.getKey().contains("-modern")) {
pkgs.println(
" - **Note: Deprecated** see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465).");
}
pkgs.println(" - " + e.getValue());
pkgs.println(" - " + getNpmBadge(e.getKey()));
pkgs.println();
}
obj.add("packages", packages);
outf.println(gson.toJson(obj));
outf.close();
pkgs.println("## JSON Metadata");
pkgs.println();
pkgs.println(
"Package metadata is available at [`cldr-core`/cldr-packages.json](./cldr-json/cldr-core/cldr-packages.json)");
pkgs.println();
writeReadmeSection(pkgs);
pkgs.close();
}
private String getNpmBadge(final String packageName) {
return String.format(
"[![NPM version](https://img.shields.io/npm/v/%s.svg?style=flat)](https://www.npmjs.org/package/%s)",
packageName, packageName);
}
/**
* Process the pending sorting items.
*
* @param out The ArrayList to hold all output lines.
* @param nodesForLastItem All the nodes from last item.
* @param sortingItems The item list that should be sorted before output.
* @throws IOException
* @throws ParseException
*/
private void resolveSortingItems(
JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)
throws IOException, ParseException {
ArrayList<CldrItem> arrayItems = new ArrayList<>();
String lastLeadingArrayItemPath = null;
if (!sortingItems.isEmpty()) {
Collections.sort(sortingItems);
for (CldrItem item : sortingItems) {
Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath());
if (matcher.matches()) {
String leadingArrayItemPath = matcher.group(1);
if (lastLeadingArrayItemPath != null
&& !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) {
resolveArrayItems(out, nodesForLastItem, arrayItems);
}
lastLeadingArrayItemPath = leadingArrayItemPath;
arrayItems.add(item);
} else {
outputCldrItem(out, nodesForLastItem, item);
}
}
sortingItems.clear();
resolveArrayItems(out, nodesForLastItem, arrayItems);
}
}
/**
* Process the pending array items.
*
* @param out The ArrayList to hold all output lines.
* @param nodesForLastItem All the nodes from last item.
* @param arrayItems The item list that should be output as array.
* @throws IOException
* @throws ParseException
*/
private void resolveArrayItems(
JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)
throws IOException, ParseException {
if (!arrayItems.isEmpty()) {
CldrItem firstItem = arrayItems.get(0);
if (firstItem.needsSort()) {
Collections.sort(arrayItems);
firstItem = arrayItems.get(0);
}
int arrayLevel = getArrayIndentLevel(firstItem); // only used for trim
JsonArray array = outputStartArray(out, nodesForLastItem, firstItem, arrayLevel);
// Previous statement closed for first element, trim nodesForLastItem
// so that it will not happen again inside.
int len = nodesForLastItem.size();
while (len > arrayLevel) {
nodesForLastItem.remove(len - 1);
len--;
}
for (CldrItem insideItem : arrayItems) {
outputArrayItem(array, insideItem, nodesForLastItem, arrayLevel);
}
arrayItems.clear();
int lastLevel = nodesForLastItem.size() - 1;
// closeNodes(out, lastLevel, arrayLevel);
// out.endArray();
for (int i = arrayLevel - 1; i < lastLevel; i++) {
nodesForLastItem.remove(i);
}
}
}
/**
* Find the indent level on which array should be inserted.
*
* @param item The CldrItem being examined.
* @return The array indent level.
* @throws ParseException
*/
private int getArrayIndentLevel(CldrItem item) throws ParseException {
Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath());
if (!matcher.matches()) {
System.out.println("No match found for " + item.getPath() + ", this shouldn't happen.");
return 0;
}
String leadingPath = matcher.group(1);
CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, "");
return fakeItem.getNodesInPath().size() - 1;
}
/**
* Write the start of an array.
*
* @param out The root object
* @param nodesForLastItem Nodes in path for last CldrItem.
* @param item The CldrItem to be processed.
* @param arrayLevel The level on which array is laid out.
* @throws IOException
* @throws ParseException
*/
private JsonArray outputStartArray(
JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)
throws IOException, ParseException {
ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
JsonElement o = out;
// final CldrNode last = nodesInPath.get(nodesInPath.size()-1);
// Output nodes up to parent of 'arrayLevel'
for (int i = 1; i < arrayLevel - 1; i++) {
final CldrNode node = nodesInPath.get(i);
o = startNonleafNode(o, node);
}
// at arrayLevel, we have a named Array.
// Get the name of the parent of the array
String objName = nodesInPath.get(arrayLevel - 1).getNodeKeyName();
JsonArray array = new JsonArray();
o.getAsJsonObject().add(objName, array);
return array;
}
/**
* Write a CLDR item to file.
*
* <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be
* dropped.
*
* @param out The ArrayList to hold all output lines.
* @param nodesForLastItem
* @param item The CldrItem to be processed.
* @throws IOException
* @throws ParseException
*/
private void outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)
throws IOException, ParseException {
// alias has been resolved, no need to keep it.
if (item.isAliasItem()) {
return;
}
ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
int arraySize = nodesInPath.size();
int i = 0;
if (i == nodesInPath.size() && type != RunType.rbnf) {
System.err.println(
"This nodes and last nodes has identical path. ("
+ item.getPath()
+ ") Some distinguishing attributes wrongly removed?");
return;
}
// close previous nodes
// closeNodes(out, nodesForLastItem.size() - 2, i);
JsonElement o = out;
for (; i < nodesInPath.size() - 1; ++i) {
o = startNonleafNode(o, nodesInPath.get(i));
}
writeLeafNode(o, nodesInPath.get(i), item.getValue());
nodesForLastItem.clear();
nodesForLastItem.addAll(nodesInPath);
}
/**
* Start a non-leaf node, adding it if not there.
*
* @param out The input JsonObject
* @param node The node being written.
* @throws IOException
*/
private JsonElement startNonleafNode(JsonElement out, final CldrNode node) throws IOException {
String objName = node.getNodeKeyName();
// Some node should be skipped as indicated by objName being null.
logger.finest(() -> "objName= " + objName + " for path " + node.getUntransformedPath());
if (objName == null
|| objName.equals("cldr")
|| objName.equals("ldmlBCP47")) { // Skip root 'cldr' node
return out;
}
Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
String name;
if (type == RunType.annotations || type == RunType.annotationsDerived) {
if (objName.startsWith("U+")) {
// parse U+22 -> " etc
name = (com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16)));
} else {
name = (objName);
}
} else {
name = (objName);
}
JsonElement o = out.getAsJsonObject().get(name);
if (o == null) {
o = new JsonObject();
out.getAsJsonObject().add(name, o);
}
for (final String key : attrAsValueMap.keySet()) {
logger.finest(() -> "Non-Leaf Node: " + node.getUntransformedPath() + " ." + key);
String rawAttrValue = attrAsValueMap.get(key);
String value = escapeValue(rawAttrValue);
// attribute is prefixed with "_" when being used as key.
String attrAsKey = "_" + key;
if (LdmlConvertRules.attrIsBooleanOmitFalse(
node.getUntransformedPath(), node.getName(), node.getParent(), key)) {
final Boolean v = Boolean.parseBoolean(rawAttrValue);
if (v) {
o.getAsJsonObject().addProperty(attrAsKey, v);
} // else, omit
} else {
// hack for localeRules
if (attrAsKey.equals("_localeRules")) {
// find the _localeRules object, add if it didn't exist
JsonElement localeRules = out.getAsJsonObject().get(attrAsKey);
if (localeRules == null) {
localeRules = new JsonObject();
out.getAsJsonObject().add(attrAsKey, localeRules);
}
// find the sibling object, add if it did't exist ( this will be parentLocale or
// collations etc.)
JsonElement sibling = localeRules.getAsJsonObject().get(name);
if (sibling == null) {
sibling = new JsonObject();
localeRules.getAsJsonObject().add(name, sibling);
}
// get the 'parent' attribute, which wil be the value
final String parent =
XPathParts.getFrozenInstance(node.getUntransformedPath())
.getAttributeValue(-1, "parent");
// finally, we add something like "nonLikelyScript: und"
sibling.getAsJsonObject().addProperty(value, parent);
} else {
o.getAsJsonObject().addProperty(attrAsKey, value);
}
}
}
return o;
}
/**
* Write a CLDR item to file.
*
* <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be
* dropped.
*
* @param out The ArrayList to hold all output lines.
* @param item The CldrItem to be processed.
* @param nodesForLastItem Nodes in path for last item.
* @param arrayLevel The indentation level in which array exists.
* @throws IOException
* @throws ParseException
*/
private void outputArrayItem(
JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)
throws IOException, ParseException {
// This method is more complicated that outputCldrItem because it needs to
// handle 3 different cases.
// 1. When difference is found below array item, this item will be of the
// same array item. Inside the array item, it is about the same as
// outputCldrItem, just with one more level of indentation because of
// the array.
// 2. The array item is the leaf item with no attribute, simplify it as
// an object with one name/value pair.
// 3. The array item is the leaf item with attribute, an embedded object
// will be created inside the array item object.
ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
String value = escapeValue(item.getValue());
int nodesNum = nodesInPath.size();
// case 1
// int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath);
CldrNode cldrNode = nodesInPath.get(nodesNum - 1);
// if (diff > arrayLevel) {
// // close previous nodes
// closeNodes(out, nodesForLastItem.size() - 1, diff + 1);
// for (int i = diff; i < nodesNum - 1; i++) {
// startNonleafNode(out, nodesInPath.get(i), i + 1);
// }
// writeLeafNode(out, cldrNode, value, nodesNum);
// return;
// }
if (arrayLevel == nodesNum - 1) {
// case 2
// close previous nodes
// if (nodesForLastItem.size() - 1 - arrayLevel > 0) {
// closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
// }
String objName = cldrNode.getNodeKeyName();
int pos = objName.indexOf('-');
if (pos > 0) {
objName = objName.substring(0, pos);
}
Map<String, String> attrAsValueMap = cldrNode.getAttrAsValueMap();
if (attrAsValueMap.isEmpty()) {
JsonObject o = new JsonObject();
out.add(o);
o.addProperty(objName, value);
} else if (objName.equals("rbnfrule")) {
writeRbnfLeafNode(out, item, attrAsValueMap);
} else {
JsonObject o = new JsonObject();
writeLeafNode(
o,
objName,
attrAsValueMap,
value,
cldrNode.getName(),
cldrNode.getParent(),
cldrNode);
out.add(o);
}
// the last node is closed, remove it.
nodesInPath.remove(nodesNum - 1);
} else {
// case 3
// close previous nodes
// if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) {
// closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
// }
JsonObject o = new JsonObject();
out.add(o);
CldrNode node = nodesInPath.get(arrayLevel);
String objName = node.getNodeKeyName();
int pos = objName.indexOf('-');
if (pos > 0) {
objName = objName.substring(0, pos);
}
Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
JsonObject oo = new JsonObject();
o.add(objName, oo);
for (String key : attrAsValueMap.keySet()) {
// attribute is prefixed with "_" when being used as key.
oo.addProperty("_" + key, escapeValue(attrAsValueMap.get(key)));
}
JsonElement o2 = out;
System.err.println("PROBLEM at " + cldrNode.getUntransformedPath());
// TODO ?!!
for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) {
o2 = startNonleafNode(o2, nodesInPath.get(i));
}
writeLeafNode(o2, cldrNode, value);
}
nodesForLastItem.clear();
nodesForLastItem.addAll(nodesInPath);
}
private void writeRbnfLeafNode(
JsonElement out, CldrItem item, Map<String, String> attrAsValueMap) throws IOException {
if (attrAsValueMap.size() != 1) {
throw new IllegalArgumentException(
"Error, attributes seem wrong for RBNF " + item.getUntransformedPath());
}
Entry<String, String> entry = attrAsValueMap.entrySet().iterator().next();
JsonArray arr = new JsonArray();
arr.add(entry.getKey());
arr.add(entry.getValue());
out.getAsJsonArray().add(arr);
}
private String progressPrefix(
AtomicInteger readCount, int totalCount, String filename, String section) {
return progressPrefix(readCount.get(), totalCount, filename, section);
}
private String progressPrefix(int readCount, int totalCount, String filename, String section) {
return progressPrefix(readCount, totalCount) + filename + "\t" + section + "\t";
}
private final String progressPrefix(AtomicInteger readCount, int totalCount) {
return progressPrefix(readCount.get(), totalCount);
}
final LocalizedNumberFormatter percentFormatter =
NumberFormatter.withLocale(Locale.ENGLISH)
.unit(NoUnit.PERCENT)
.integerWidth(IntegerWidth.zeroFillTo(3))
.precision(Precision.integer());
private final String progressPrefix(int readCount, int totalCount) {
double asPercent = ((double) readCount / (double) totalCount) * 100.0;
return String.format(
SECTION_ICON + " %s (step %d/%d)\t[%s]:\t",
type,
type.ordinal(),
RunType.values().length
- 1, // which 'type' are we on? (all=0, minus one to get the count right)
percentFormatter.format(asPercent));
}
/**
* Process files in a directory of CLDR file tree.
*
* @param dirName The directory in which xml file will be transformed.
* @param minimalDraftStatus The minimumDraftStatus that will be accepted.
* @throws IOException
* @throws ParseException
*/
public void processDirectory(String dirName, DraftStatus minimalDraftStatus)
throws IOException, ParseException {
SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental");
Factory cldrFactory = Factory.make(cldrCommonDir + dirName + "/", ".*");
Set<String> files =
cldrFactory
.getAvailable()
// filter these out early so our work count is correct
.stream()
.filter(
filename ->
filename.matches(match)
&& !LdmlConvertRules.IGNORE_FILE_SET.contains(
filename))
.collect(Collectors.toSet());
final int total = files.size();
AtomicInteger readCount = new AtomicInteger(0);
Map<String, Throwable> errs = new TreeMap<>();
// This takes a long time (minutes, in 2020), so run it in parallel forkJoinPool threads.
// The result of this pipeline is an array of toString()-able filenames of XML files which
// produced no JSON output, just as a warning.
System.out.println(
progressPrefix(0, total)
+ " "
+ MessageFormat.format(
GEAR_ICON
+ " Beginning parallel process of {0, plural, one {# file} other {# files}}",
total));
Object noOutputFiles[] =
files.parallelStream()
.unordered()
.map(
filename -> {
String pathPrefix;
CLDRFile file =
cldrFactory.make(
filename,
resolve && type == RunType.main,
minimalDraftStatus);
// Print 'reading' after the make, to stagger the output a
// little bit.
// Otherwise, the printout happens before any work happens, and
// is easily out of order.
readCount.incrementAndGet();
logger.fine(
() ->
"<"
+ progressPrefix(
readCount, total, dirName,
filename)
+ "\r");
if (type == RunType.main) {
pathPrefix =
"/cldr/"
+ dirName
+ "/"
+ unicodeLocaleToString(filename)
+ "/";
} else {
pathPrefix = "/cldr/" + dirName + "/";
}
int totalForThisFile = 0;
try {
totalForThisFile =
convertCldrItems(
readCount,
total,
dirName,
filename,
pathPrefix,
mapPathsToSections(
readCount,
total,
file,
pathPrefix,
sdi));
} catch (IOException | ParseException t) {
t.printStackTrace();
System.err.println(
"!"
+ progressPrefix(readCount, total)
+ filename
+ " - err - "
+ t);
errs.put(filename, t);
} finally {
logger.fine(
() ->
"."
+ progressPrefix(readCount, total)
+ "Completing "
+ dirName
+ "/"
+ filename);
}
return new Pair<>(dirName + "/" + filename, totalForThisFile);
})
.filter(p -> p.getSecond() == 0) // filter out only files which produced no
// output
.map(p -> p.getFirst())
.toArray();
System.out.println(
progressPrefix(total, total)
+ " "
+ DONE_ICON
+ MessageFormat.format(
"Completed parallel process of {0, plural, one {# file} other {# files}}",
total));
if (noOutputFiles.length > 0) {
System.err.println(
WARN_ICON
+ MessageFormat.format(
" Warning: {0, plural, one {# file} other {# files}} did not produce any output (check JSON config):",
noOutputFiles.length));
for (final Object f : noOutputFiles) {
final String loc = f.toString();
final String uloc = unicodeLocaleToString(f.toString());
if (skipBcp47LocalesWithSubtags
&& type.locales()
&& HAS_SUBTAG.matcher(uloc).matches()) {
System.err.println(
"\t- " + loc + " ❎ (Skipped due to '-T true': " + uloc + ")");
} else {
System.err.println("\t- " + loc);
}
}
}
if (!errs.isEmpty()) {
System.err.println("Errors in these files:");
for (Map.Entry<String, Throwable> e : errs.entrySet()) {
System.err.println(e.getKey() + " - " + e.getValue());
}
// rethrow
for (Map.Entry<String, Throwable> e : errs.entrySet()) {
if (e.getValue() instanceof IOException) {
throw (IOException) e.getValue(); // throw the first one
} else if (e.getValue() instanceof ParseException) {
throw (ParseException) e.getValue(); // throw the first one
} else {
throw new RuntimeException("Other exception thrown: " + e.getValue());
}
/* NOTREACHED */
}
}
if (writePackages) {
for (String currentPackage : packages) {
writePackagingFiles(outputDir, currentPackage);
}
if (type == RunType.main) {
writeDefaultContent(outputDir);
writeAvailableLocales(outputDir);
writeCoverageLevels(outputDir);
} else if (type == RunType.supplemental) {
writeScriptMetadata(outputDir);
if (Boolean.parseBoolean(options.get("packagelist").getValue())) {
writePackageList(outputDir);
}
}
}
}
/** Replacement pattern for escaping. */
private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)");
/**
* Escape \ in value string. \ should be replaced by \\, except in case of \u1234 In following
* code, \\\\ represent one \, because java compiler and regular expression compiler each do one
* round of escape.
*
* @param value Input string.
* @return escaped string.
*/
private String escapeValue(String value) {
Matcher match = escapePattern.matcher(value);
String ret = match.replaceAll("\\\\");
return ret.replace("\n", " ").replace("\t", " ");
}
/**
* Write the value to output.
*
* @param out The ArrayList to hold all output lines.
* @param node The CldrNode being written.
* @param value The value part for this element.
* @param level Indent level.
* @throws IOException
*/
private void writeLeafNode(JsonElement out, CldrNode node, String value) throws IOException {
String objName = node.getNodeKeyName();
Map<String, String> attrAsValueMaps = node.getAttrAsValueMap();
writeLeafNode(out, objName, attrAsValueMaps, value, node.getName(), node.getParent(), node);
}
/**
* Write the value to output.
*
* @param out The ArrayList to hold all output lines.
* @param objName The node's node.
* @param attrAsValueMap Those attributes that will be treated as values.
* @param value The value part for this element.
* @param level Indent level.
* @param nodeName the original nodeName (not distinguished)
* @throws IOException
*/
private void writeLeafNode(
JsonElement out,
String objName,
Map<String, String> attrAsValueMap,
String value,
final String nodeName,
String parent,
CldrNode node)
throws IOException {
if (objName == null) {
return;
}
value = escapeValue(value);
final boolean valueIsSpacesepArray =
LdmlConvertRules.valueIsSpacesepArray(nodeName, parent);
if (attrAsValueMap.isEmpty()) {
// out.name(objName);
if (value.isEmpty()) {
if (valueIsSpacesepArray) {
// empty value, output as empty space-sep array: []
out.getAsJsonObject().add(objName, new JsonArray());
} else {
// empty value.
if (objName.endsWith("SpaceReplacement")) { // foreignSpaceReplacement or
// nativeSpaceReplacement
out.getAsJsonObject().addProperty(objName, "");
} else {
out.getAsJsonObject().add(objName, new JsonObject());
}
}
} else if (type == RunType.annotations || type == RunType.annotationsDerived) {
JsonArray a = new JsonArray();
// split this, so "a | b | c" becomes ["a","b","c"]
for (final String s : Annotations.splitter.split(value.trim())) {
a.add(s);
}
out.getAsJsonObject().add(objName, a);
} else if (valueIsSpacesepArray) {
outputSpaceSepArray(out, objName, value);
} else {
// normal value
out.getAsJsonObject().addProperty(objName, value);
}
return;
}
// If there is no value, but a attribute being treated as value,
// simplify the output.
if (value.isEmpty() && attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) {
String v = attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY);
// out.name(objName);
if (valueIsSpacesepArray) {
outputSpaceSepArray(out, objName, v);
} else {
out.getAsJsonObject().addProperty(objName, v);
}
return;
}
JsonObject o = new JsonObject();
out.getAsJsonObject().add(objName, o);
if (!value.isEmpty()) {
o.addProperty("_value", value);
}
for (final String key : attrAsValueMap.keySet()) {
String rawAttrValue = attrAsValueMap.get(key);
String attrValue = escapeValue(rawAttrValue);
// attribute is prefixed with "_" when being used as key.
String attrAsKey = "_" + key;
if (node != null) {
logger.finest(() -> "Leaf Node: " + node.getUntransformedPath() + " ." + key);
}
if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) {
String[] strings = attrValue.trim().split("\\s+");
JsonArray a = new JsonArray();
o.add(attrAsKey, a);
for (String s : strings) {
a.add(s);
}
} else if (node != null
&& LdmlConvertRules.attrIsBooleanOmitFalse(
node.getUntransformedPath(), nodeName, parent, key)) {
final Boolean v = Boolean.parseBoolean(rawAttrValue);
if (v) {
o.addProperty(attrAsKey, v);
} // else: omit falsy value
} else {
o.addProperty(attrAsKey, attrValue);
}
}
}
private void outputSpaceSepArray(JsonElement out, String objName, String v) throws IOException {
JsonArray a = new JsonArray();
out.getAsJsonObject().add(objName, a);
// split this, so "a b c" becomes ["a","b","c"]
for (final String s : v.trim().split(" ")) {
if (!s.isEmpty()) {
a.add(s);
}
}
}
}