blob: 79f66a6735c07f5856e605ad7aa39d2d4b4f2154 [file] [log] [blame]
package org.unicode.cldr.tool;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.text.Collator;
import com.ibm.icu.util.ULocale;
import java.io.File;
import java.io.StringWriter;
import java.util.Collection;
import java.util.Comparator;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.DraftStatus;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.TempPrintWriter;
import org.unicode.cldr.util.personname.PersonNameFormatter;
import org.unicode.cldr.util.personname.PersonNameFormatter.Field;
import org.unicode.cldr.util.personname.PersonNameFormatter.Formality;
import org.unicode.cldr.util.personname.PersonNameFormatter.FormatParameters;
import org.unicode.cldr.util.personname.PersonNameFormatter.Length;
import org.unicode.cldr.util.personname.PersonNameFormatter.ModifiedField;
import org.unicode.cldr.util.personname.PersonNameFormatter.Modifier;
import org.unicode.cldr.util.personname.PersonNameFormatter.Order;
import org.unicode.cldr.util.personname.PersonNameFormatter.SampleType;
import org.unicode.cldr.util.personname.PersonNameFormatter.Usage;
import org.unicode.cldr.util.personname.SimpleNameObject;
public class GeneratePersonNameTestData {
private static final Joiner COMMA_JOINER = Joiner.on(", ");
private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
private static final CLDRFile ENGLISH = CLDR_CONFIG.getEnglish();
static final Comparator<String> LENGTH_FIRST =
Comparator.comparingInt(String::length)
.reversed()
.thenComparing(Collator.getInstance(Locale.ROOT))
.thenComparing(Comparator.naturalOrder());
enum Options {
none,
sorting
}
static File dir = new File(CLDRPaths.TEST_DATA, "personNameTest");
static final Set<String> REQUIRED_PATHS =
ImmutableSet.of(
"//ldml/personNames/nameOrderLocales[@order=\"givenFirst\"]",
"//ldml/personNames/nameOrderLocales[@order=\"surnameFirst\"]",
"//ldml/personNames/parameterDefault[@parameter=\"formality\"]",
"//ldml/personNames/parameterDefault[@parameter=\"length\"]",
"//ldml/personNames/foreignSpaceReplacement",
"//ldml/personNames/nativeSpaceReplacement",
"//ldml/personNames/initialPattern[@type=\"initial\"]",
"//ldml/personNames/initialPattern[@type=\"initialSequence\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/personName[@order=\"givenFirst\"][@length=\"long\"][@usage=\"referring\"][@formality=\"formal\"]/namePattern"
//
);
public static void main(String[] args) {
Factory factory = CLDR_CONFIG.getCldrFactory();
Matcher localeMatcher = null;
if (args.length >= 1) {
localeMatcher = Pattern.compile(args[0]).matcher("");
}
ULocale undLocale = new ULocale("und");
main:
for (String locale : factory.getAvailableLanguages()) {
if (localeMatcher != null && !localeMatcher.reset(locale).lookingAt()) {
continue;
}
try {
CLDRFile cldrFile =
factory.make(locale, true, DraftStatus.contributed); // don't include
// draft=unconfirmed/provisional
CLDRFile unresolved = cldrFile.getUnresolved();
// Check that we have sufficient person data
if (!locale.equals("en")) {
for (String path : REQUIRED_PATHS) {
String value = unresolved.getStringValue(path);
if (value == null) {
removeTestFile(locale);
continue main; // skip unless we have person data
}
}
}
// Load the samples, and exit if there is a problem
Map<SampleType, SimpleNameObject> names;
PersonNameFormatter formatter;
try {
names = PersonNameFormatter.loadSampleNames(cldrFile);
formatter = new PersonNameFormatter(cldrFile);
} catch (Exception e) {
removeTestFile(locale);
continue;
}
if (names.isEmpty()) {
removeTestFile(locale);
continue;
}
// We have to jump through some hoops to get locales corresponding to the order
// First get the locale for native sample names
ULocale myLocale = new ULocale(locale);
Order myOrder = formatter.getOrderFromLocale(myLocale);
if (myOrder == null) {
formatter.getOrderFromLocale(myLocale);
throw new IllegalArgumentException("Missing order for: " + locale);
}
// Now get the locale for non-native sample names
// We see if we can get a locale of the other direction
// Otherwise we pick either English or German
Order otherOrder =
myOrder == Order.givenFirst ? Order.surnameFirst : Order.givenFirst;
Map<ULocale, Order> localeToOrder =
formatter.getNamePatternData().getLocaleToOrder();
Multimap<Order, ULocale> orderToLocale =
Multimaps.invertFrom(
Multimaps.forMap(localeToOrder), TreeMultimap.create());
ULocale otherLocale = null;
for (ULocale tryLocale : orderToLocale.get(otherOrder)) {
if (!undLocale.equals(tryLocale)) {
otherLocale = tryLocale;
break;
}
}
if (otherLocale == null) {
otherLocale = myLocale.equals(ULocale.FRENCH) ? ULocale.GERMAN : ULocale.FRENCH;
}
// now change region to AQ, just to check for inheritance
myLocale = addRegionIfMissing(myLocale, "AQ");
otherLocale = addRegionIfMissing(otherLocale, "AQ");
// Start collecting output
StringWriter output = new StringWriter();
output.write("\n");
writeChoices("field", Field.ALL, output);
writeChoices("modifiers", Modifier.ALL, output);
writeChoices("order", Order.ALL, output);
writeChoices("length", Length.ALL, output);
writeChoices("usage", Usage.ALL, output);
writeChoices("formality", Formality.ALL, output);
for (Entry<SampleType, SimpleNameObject> entry : names.entrySet()) {
// write the name information
SampleType sampleType = entry.getKey();
if (!sampleType.isNative() && otherLocale == null) {
continue;
}
final SimpleNameObject nameObject = entry.getValue();
output.write("\n");
output.write("# " + sampleType + "\n");
for (Entry<ModifiedField, String> x :
nameObject.getModifiedFieldToValue().entrySet()) {
output.write("name ; " + x.getKey() + "; " + x.getValue() + "\n");
}
// handle the situation that ICU's formatter doesn't give us low-level access
// so we have to use the name locale to set the direction
Order nameOrder;
if (sampleType.isNative()) {
output.write("name ; " + "locale" + "; " + myLocale + "\n");
nameOrder = myOrder;
} else {
output.write("name ; " + "locale" + "; " + otherLocale + "\n");
nameOrder = otherOrder;
}
// Group the formatted names, longest first
Multimap<String, FormatParameters> valueToSources =
TreeMultimap.create(LENGTH_FIRST, Comparator.naturalOrder());
for (FormatParameters parameters : FormatParameters.allCldr()) {
// boolean debugPoint = locale.startsWith("th") &&
// parameters.equals(testParameters)
// && sampleType == SampleType.nativeGS;
// if (debugPoint) {
// System.out.println(sampleType + "; " +
// nameObject + "; " + testParameters);
// int debug = 0;
// }
String formatted =
formatter.formatWithoutSuperscripts(nameObject, parameters);
if (formatted.isEmpty()) {
continue;
}
valueToSources.put(formatted, parameters);
}
// write out the result, and then all the parameters that give produce it.
for (Entry<String, Collection<FormatParameters>> entry2 :
valueToSources.asMap().entrySet()) {
final String expectedResult = entry2.getKey();
output.write("\nexpectedResult; " + expectedResult + "\n\n");
entry2.getValue()
.forEach(
x -> {
output.write(
"parameters; "
+ x.getOrder()
+ "; "
+ x.getLength()
+ "; "
+ x.getUsage()
+ "; "
+ x.getFormality()
+ "\n");
});
}
output.write("\nendName\n");
}
try (TempPrintWriter output2 =
TempPrintWriter.openUTF8Writer(dir.toString(), locale + ".txt"); ) {
output2.write(
"# Test data for Person Name Data\n"
+ CldrUtility.getCopyrightString("# ")
+ "\n# CLDR person name formatting test data for: "
+ locale
+ "\n#"
+ "\n# Test lines have the following structure:"
+ "\n#"
+ "\n# enum ; <type> ; <value>(', ' <value)"
+ "\n# For all the elements in <…> below, the possible choices that could appear in the file."
+ "\n# For example, <field> could be any of title, given, … credentials."
+ "\n# Verify that all of these values work with the implementation."
+ "\n#"
+ "\n# name ; <field>('-'<modifier>) ; <value>"
+ "\n# A sequence of these is to be used to build a person name object with the given field values."
+ "\n# If the <field> is 'locale', then the value is the locale of the name."
+ "\n# That will always be the last field in the name."
+ "\n# NOTE: the locale for the name (where different than the test file's locale) will generally not match the text."
+ "\n# It is chosen to exercise the person name formatting, by having a different given-surname order than the file's locale."
+ "\n#"
+ "\n# expectedResult; <value>"
+ "\n# This line follows a sequence of name lines, and indicates the that all the following parameter lines have this expected value."
+ "\n#"
+ "\n# parameters; <options>; <length>; <usage>; <formality>"
+ "\n# Each of these parameter lines should be tested to see that when formatting the current name with these parameters, "
+ "\n# the expected value is produced."
+ "\n#"
+ "\n# endName"
+ "\n# Indicates the end of the values to be tested with the current name."
+ "\n#"
+ "\n# ====="
+ "\n# Example:"
+ "\n# enum ; field ; title, given, given2, surname, surname2, generation, credentials"
+ "\n# …"
+ "\n#"
+ "\n# name ; given; Iris"
+ "\n# name ; surname; Falke"
+ "\n# name ; locale; de"
+ "\n#"
+ "\n# expectedResult; Falke, Iris"
+ "\n#"
+ "\n# parameters; sorting; long; referring; formal"
+ "\n# parameters; sorting; medium; referring; informal"
+ "\n#"
+ "\n# endName"
+ "\n#"
+ "\n# name ; given; Max"
+ "\n# name ; given2; Ben"
+ "\n# name ; surname; Mustermann"
+ "\n# …"
+ "\n# ====="
+ "\n");
output2.write(output.toString());
}
} catch (Exception e) {
System.out.println("Skipping " + locale);
e.printStackTrace();
removeTestFile(locale);
continue;
}
}
}
private static void removeTestFile(String locale) {
File file = new File(dir.toString(), locale + ".txt");
if (file.exists()) {
System.out.println("Removing " + file);
file.delete();
}
}
public static ULocale addRegionIfMissing(ULocale myLocale, String region) {
return !myLocale.getCountry().isEmpty()
? myLocale
: new ULocale.Builder().setLocale(myLocale).setRegion(region).build();
}
public static <T> void writeChoices(String kind, Collection<T> choices, StringWriter output) {
output.write("enum ; " + kind + " ; " + COMMA_JOINER.join(choices) + "\n");
}
}