blob: ad60c98af77bb3a0c234dc2b18946bdc53518041 [file] [log] [blame]
package org.unicode.cldr.tool;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSet.Builder;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.util.ICUUncheckedIOException;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.function.Consumer;
import java.util.function.Function;
import org.apache.jena.query.QuerySolution;
import org.apache.jena.query.ResultSet;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.rdf.QueryClient;
import org.unicode.cldr.rdf.TsvWriter;
import org.unicode.cldr.util.*;
import org.unicode.cldr.util.Iso639Data.Type;
import org.unicode.cldr.util.StandardCodes.LstrField;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.Validity.Status;
/**
* This code generates language group containment based on Wikidata. For example, it finds: root >
* Indo-European [Other] (ine) > Germanic [Other] (gem) > West Germanic languages (gmw) > English
* (en)
*
* <p>To do this, it reads three tables from Wikidata, and combines them. The combination is not
* trivial, because wikidata offers multiple "parents" for the same language, and many of the
* parents do not have ISO codes. For the first problem, the software computes the possible parent
* chains and picks among them. For the second problem, any parents without ISO codes are skipped
* (after forming the chains, so the ultimate ancestors are still found). <br>
* A number of debugging files are written to the external directory.
*
* <p>Some failures will be exposed by running this tool. Examples: <br>
* <b>wikidata-entityToCode Multiple values:</b> Cebaara [Q1097512] [sef, sev]. <br>
* If these are not CLDR languages then they do not need to be fixed. <br>
* <b>wikidata-childToParent Multiple values:</b> Q118712 [Q118712] [German [de, Q18], English [en,
* Q186]] <br>
* Normally these don't need to be fixed; the generation code works around them. <br>
* <b>Cycle in [dng, zhx]</b> from [[http://www.wikidata.org/entity/Q33050, <br>
* These indicate that the Wikidata has a cycle in it. A => B => C => A. Ignore these unless the
* cases are worth investigating.
*
* <p>Others are exposed by running TestLanguageGroup.java <br>
* Error: (TestLanguageGroup.java:55) Single ancestor but not in ISOLATES: ce [Chechen] [ce] <br>
* Check to see if the language has a language group (in this case not, so add to
* TestLanguageGroup.ISOLATEs). <br>
* For kea [Kabuverdianu] [kea], you can add cpp as the parent, as follows. <br>
* <b>Missing.</b> If a child-parent relation is missing, you can add it to EXTRA_PARENT_CHILDREN so
* that it shows up. For example, .put("gmw", "lb") says that West Germanic is the parent of
* Luxembourgish. <br>
* <b>Extra.</b> Sometimes wikidata has conflicting or erroneous entries. Those can be fixed by
* adding to REMOVE_PARENT_CHILDREN. Use * to remove all children, such as .put("crp", "*") <br>
* Sometimes the tool fails with JsonParseExceptions, but works if you rerun. <br>
* Cycle in [dng, zhx] from ... Will be fixed by giving the language 'no parent' (mul)
*
* <p>
*/
public class GenerateLanguageContainment {
static {
System.out.println(
"See the class description for GenerateLanguageContainment.java about fixing problems.");
}
private static final boolean ONLY_LIVING = false;
private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
private static final QueryClient queryClient = QueryClient.getInstance();
static final Splitter TAB = Splitter.on('\t').trimResults();
static final CLDRFile ENGLISH = CONFIG.getEnglish();
static final String relDir = "../util/data/languages/";
static final Map<String, R2<List<String>, String>> ALIAS_MAP =
CONFIG.getSupplementalDataInfo().getLocaleAliasInfo().get("language");
/** We load the SparQL queries using this helper object, to be able to catch exceptions… */
static final class QueryHelper {
public final Map<String, String> entityToLabel;
public final Map<String, String> entityToCode;
public final ImmutableMultimap<String, String> codeToEntity;
public final Multimap<String, String> childToParent;
QueryHelper() {
try {
entityToLabel =
loadQueryPairsUnique(
GenerateLanguageContainment.class,
"wikidata-entityToLabel",
null,
null,
null);
entityToCode =
loadQueryPairsUnique(
GenerateLanguageContainment.class,
"wikidata-entityToCode",
code -> {
code = code.replace("\"", "");
R2<List<String>, String> v = ALIAS_MAP.get(code);
String result = v == null ? code : v.get0().get(0);
result = result.contains("_") ? code : result;
return result;
},
code -> showNameAndCode(code),
NAME);
codeToEntity =
ImmutableMultimap.copyOf(
Multimaps.invertFrom(
Multimaps.forMap(entityToCode),
LinkedHashMultimap.create()));
childToParent =
loadQueryPairs(
GenerateLanguageContainment.class,
"wikidata-childToParent",
code -> showNameAndCode(code),
code -> showNameAndCode(code));
} catch (Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
}
}
String getEntityName(String key) {
String code = getEntityCode(key);
if (code != null) {
try {
String name = NAME.apply(code);
if (name != null) {
return name;
}
} catch (Exception e) {
// TODO: Why would NAME.apply throw?
// TODO: Need better handling here?
}
}
String name = entityToLabel.get(key);
if (name != null) {
return name;
}
return afterLastSlash(key);
}
private String getEntityCode(String key) {
return entityToCode == null ? null : entityToCode.get(key);
}
private String afterLastSlash(String key) {
return key.substring(key.lastIndexOf('/') + 1, key.length() - 1);
}
public void writeTsvs() throws IOException {
TsvWriter.writeTsv("childToParent.tsv", childToParent, "child", "parent");
TsvWriter.writeTsv("entityToCode.tsv", entityToCode, "lang", "langCode");
TsvWriter.writeTsv("entityToLabel.tsv", entityToLabel, "lang", "langLabel");
SortedSetMultimap<String, String> childToParentWithCodes = TreeMultimap.create();
for (Entry<String, String> entry : childToParent.entries()) {
String child = entry.getKey();
String parent = entry.getValue();
childToParentWithCodes.put(showNameAndCode(child), showNameAndCode(parent));
}
TsvWriter.writeTsv(
"childToParentWithCodes.tsv",
childToParentWithCodes,
"childCode\tLabel",
"parentCode\tLabel");
}
public String showNameAndCode(String qid) {
return getEntityName(qid)
+ " ("
+ (getEntityCode(qid) == null ? "" : getEntityCode(qid) + ", ")
+ afterLastSlash(qid)
+ ")";
}
public <T extends Iterable<String>> String showNameAndCode(T qids) {
StringBuilder b = new StringBuilder();
qids.forEach(
qid -> {
if (b.length() != 0) b.append(", ");
b.append(showNameAndCode(qid));
});
return b.toString();
}
public <T extends Iterable<String>, U extends Iterable<T>> String showNameAndCode2(U qids) {
StringBuilder b = new StringBuilder();
qids.forEach(
qid -> {
if (b.length() != 0) b.append("; ");
b.append(showNameAndCode(qid));
});
return b.toString();
}
}
static final QueryHelper QUERY_HELPER = new QueryHelper();
static final Function<String, String> NAME =
code ->
code.equals(LocaleNames.MUL)
? LocaleNames.ROOT
: ENGLISH.getName(code) + " (" + code + ")";
static final Set<String> COLLECTIONS;
static {
Map<String, Map<LstrField, String>> languages =
StandardCodes.getEnumLstreg().get(LstrType.language);
Builder<String> _collections = ImmutableSet.<String>builder();
for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) {
String scope = e.getValue().get(LstrField.Scope);
if (scope != null && "Collection".equalsIgnoreCase(scope)) {
_collections.add(e.getKey());
}
}
COLLECTIONS = _collections.build();
}
static class Tree {
Set<String> leaves = new LinkedHashSet<>();
void add(List<String> chain) {
Collections.reverse(chain);
}
}
/** To add parent-child relations to Wikidata */
static final Multimap<String, String> EXTRA_PARENT_CHILDREN =
ImmutableMultimap.<String, String>builder()
.put("alv", "agq")
.put("alv", "cch") // Atlantic–Congo <= cch [Atsam]
.put("alv", "kcg") // Atlantic–Congo <= kcg [Tyap]
.put("alv", "ken") // Atlantic–Congo <= ken [Kenyang]
.put("alv", "ngb")
.put("alv", "yav")
.put("ber", "zgh")
.put("bnt", "asa")
.put("bnt", "bez")
.put("bnt", "cgg")
.put("bnt", "ebu")
.put("bnt", "jmc")
.put("bnt", "ksb")
.put("bnt", "lag")
.put("bnt", "mer")
.put("bnt", "mgh")
.put("bnt", "nmg")
.put("bnt", "rof")
.put("bnt", "rwk")
.put("bnt", "sbp")
.put("bnt", "seh")
.put("bnt", "vun")
.put("bnt", "xog")
.put("cpp", "kea")
.put("euq", "eu")
// gmw = West Germanic
.put("gmw", "ksh")
.put("gmw", "lb")
.put("gmw", "wae")
.put("grk", "el")
.put("grk", "gmy")
.put("grk", "grc")
.put("ira", "lrc")
.put("ira", "bgn") // Iranian <= Western Balochi
.put("inc", "trw") // Indo-Aryan <= Torwali
.put("jpx", "ja")
.put(LocaleNames.MUL, "art")
.put(LocaleNames.MUL, "euq")
.put(LocaleNames.MUL, "jpx")
.put(LocaleNames.MUL, "tai")
.put("ngb", "sg")
.put("roa", "cpf")
.put("roa", "cpp")
.put("roa", "cpp")
.put("sdv", "saq")
.put("son", "khq")
.put("sw", "swc")
.put("tai", "blt") // tai [Tai] <= blt [Tai Dam]
.put("tai", "lo")
.put("tai", "th")
.put("zlw", "szl") // West Slavic <= Silesian
.build();
/**
* To remove parent-child relations from Wikidata, eg if a child has two parents (where that
* causes problems)
*/
static final Multimap<String, String> REMOVE_PARENT_CHILDREN =
ImmutableMultimap.<String, String>builder()
.put("alv", "ukg") // ngf [Trans-New Guinea languages] <= ukg [Ukuriguma]
.put(
"crp",
"*") // general Creole group interferes with French/Spanish/... language
// grouping
.put("cus", "mhd") // bnt [Bantu] <= mhd [Mbugu] (not cus [Cushitic])
.put("gmw", "pih") // cpe [Creoles and pidgins, English based] <= pih
// [Pitcairn-Norfolk]
.put("inc", "rmg")
// Indo-European
.put("ine", "el")
.put("ine", "gmy")
.put("ine", "grc")
.put("ine", "trw") // inc [Indic] <= trw [Torwali]
.put(LocaleNames.MUL, "crp")
.put(LocaleNames.MUL, "cpp") // Creoles and pidgins, Portuguese-based
.put(LocaleNames.MUL, LocaleNames.UND) // anomaly
.put("nic", "kcp") // ssa [Nilo-Saharan] <= kcp [Kanga]
.put("nic", "kec") // ssa [Nilo-Saharan] <= kec [Keiga]
.put("nic", "kgo") // ssa [Nilo-Saharan] <= kgo [Krongo]
.put("nic", "rof") // ssa [Nilo-Saharan] <= rof [Rombo]
.put("nic", "tbr") // ssa [Nilo-Saharan] <= tbr [Tumtum]
.put("nic", "tey") // ssa [Nilo-Saharan] <= tey [Tulishi]
.put("sit", "th") // sit <= tbq <= th
.put("sit", "dz") // sit <= tbq <= dz
.put("sit", "zh")
.put("sla", "cu")
.put("tbq", "psq") // paa [Papuan]; for psq [Pasi] - not tbq [Tibeto-Burman
// languages]; (There is also a variety of the Sino-Tibetan Adi
// language called Pasi.
.build();
public static void main(String[] args) throws IOException {
new GenerateLanguageContainment().run(args);
if (Containment.hadErrors) {
System.err.println("ERROR: Containment Errors detected, see errors above.");
System.exit(1);
}
}
void run(String[] args) throws IOException {
if (true) {
// check on items
for (String check : Arrays.asList("sw", "km", "ksh", "wae", "kea", "mfe", "th", "lo")) {
System.out.println("Checking " + ENGLISH.getName(check) + "[" + check + "]");
Collection<String> entities = QUERY_HELPER.codeToEntity.get(check);
if (entities.isEmpty()) {
System.out.println("no code for " + check + ": " + entities);
continue;
}
for (String entity : entities) {
Set<List<String>> ancestors = getAllAncestors(entity);
showEntityLists(entity + " parents ", ancestors);
System.out.println();
}
}
}
Map<Status, Set<String>> table = Validity.getInstance().getStatusToCodes(LstrType.language);
TreeMultimap<String, String> _parentToChild = TreeMultimap.create();
TreeSet<String> missing = new TreeSet<>(table.get(Status.regular));
_parentToChild.put(LocaleNames.MUL, LocaleNames.UND);
Set<String> skipping = new LinkedHashSet<>();
for (String code : table.get(Status.regular)) {
if (ONLY_LIVING) {
Type type = Iso639Data.getType(code);
if (type != Type.Living) {
continue;
}
}
if (code.compareTo("hdz") > 0) {
int debug = 0;
}
// if (COLLECTIONS.contains(code)) {
// continue;
// }
Collection<String> entities = QUERY_HELPER.codeToEntity.get(code);
if (entities.isEmpty()) {
continue;
}
for (String entity : entities) {
if (QUERY_HELPER.childToParent.get(entity).isEmpty()) {
continue;
}
Set<Set<String>> chains = getAncestors(entity, skipping);
if (chains.size() > 1) {
int debug = 0;
}
for (Set<String> chain : chains) {
String last = null;
for (String link : chain) {
if (last != null) {
_parentToChild.put(link, last);
}
last = link;
}
}
}
}
System.out.println("Writing " + "skippingCodes.tsv");
try (PrintWriter w =
FileUtilities.openUTF8Writer(TsvWriter.getTsvDir(), "skippingCodes.tsv")) {
// TsvWriter.writeRow(w, "childCode\tLabel", "parentCode\tLabel"); // header
skipping.forEach(e -> w.println(e));
}
for (Entry<String, Collection<String>> entity : REMOVE_PARENT_CHILDREN.asMap().entrySet()) {
String key = entity.getKey();
for (String value : entity.getValue()) {
if (value.equals("*")) {
_parentToChild.removeAll(key);
} else {
_parentToChild.remove(key, value);
}
}
}
_parentToChild.putAll(EXTRA_PARENT_CHILDREN);
// special code for artificial
for (String code : Iso639Data.getAvailable()) {
Type type = Iso639Data.getType(code);
if (type == Type.Constructed) {
_parentToChild.put("art", code);
}
}
Multimap<String, String> parentToChild = ImmutableMultimap.copyOf(_parentToChild);
Multimap<String, String> childToParent =
ImmutableMultimap.copyOf(
Multimaps.invertFrom(parentToChild, TreeMultimap.create()));
System.out.println(
"Checking " + "he" + "\t" + Containment.getAllDirected(childToParent, "he"));
try (PrintWriter w =
FileUtilities.openUTF8Writer(TsvWriter.getTsvDir(), "RawLanguageContainment.txt")) {
print(w, parentToChild, new ArrayList<>(Arrays.asList(LocaleNames.MUL)));
}
SimpleXMLSource xmlSource = new SimpleXMLSource("languageGroup");
xmlSource.setNonInheriting(true); // should be gotten from DtdType...
CLDRFile newFile = new CLDRFile(xmlSource);
newFile.setDtdType(DtdType.supplementalData);
newFile.add("//" + DtdType.supplementalData + "/version[@number='$Revision$']", "");
printXML(newFile, parentToChild);
try (PrintWriter outFile =
FileUtilities.openUTF8Writer(
CLDRPaths.SUPPLEMENTAL_DIRECTORY, "languageGroup.xml")) {
newFile.write(outFile);
} catch (IOException e1) {
throw new ICUUncheckedIOException("Can't write to languageGroup.xml", e1);
}
// for (Entry<String,String> entry : childToParent.entries()) {
// String childNames = getName(entityToCode, entityToLabel, entry.getKey());
// String parentNames = getName(entityToCode, entityToLabel, entry.getValue());
// System.out.println(entry.getKey() + "\t" + entry.getValue() + "\t" +
// childNames + "\t" + parentNames);
// }
QUERY_HELPER.writeTsvs();
}
private static void showEntityLists(String title, Set<List<String>> ancestors) {
ancestors.forEach(
new Consumer<List<String>>() {
@Override
public void accept(List<String> item) {
item.forEach(
new Consumer<String>() {
@Override
public void accept(String t) {
System.out.println(
t
+ "\t"
+ QUERY_HELPER.entityToCode.get(t)
+ "\t"
+ QUERY_HELPER.entityToLabel.get(t));
}
});
System.out.println();
}
});
}
private static void printXML(CLDRFile newFile, Multimap<String, String> parentToChild) {
printXML(newFile, parentToChild, LocaleNames.MUL);
}
private static void printXML(
CLDRFile newFile, Multimap<String, String> parentToChild, String base) {
Collection<String> children = parentToChild.get(base);
if (children.isEmpty()) {
return;
}
if (base.equals(LocaleNames.UND)) {
// skip, no good info
} else {
newFile.add(
"//"
+ DtdType.supplementalData
+ "/languageGroups/languageGroup[@parent=\""
+ base
+ "\"]",
Joiner.on(" ").join(children));
}
for (String child : children) {
printXML(newFile, parentToChild, child);
}
}
private static void print(
Writer out, Multimap<String, String> parentToChild, List<String> line) {
String current = line.get(line.size() - 1);
Collection<String> children = parentToChild.get(current);
if (children.isEmpty()) {
try {
String sep = "";
for (String item : line) {
out.append(sep).append(NAME.apply(item));
sep = " > ";
}
out.append('\n');
out.flush();
} catch (IOException e) {
}
} else {
for (String child : children) {
line.add(child);
print(out, parentToChild, line);
line.remove(line.size() - 1);
}
}
}
private static Set<Set<String>> getAncestors(String leaf, Set<String> skipping) {
Set<List<String>> items = Containment.getAllDirected(QUERY_HELPER.childToParent, leaf);
Set<Set<String>> itemsFixed = new LinkedHashSet<>();
main:
for (List<String> item : items) {
Set<String> chain = new LinkedHashSet<>();
for (String id : item) {
String code = QUERY_HELPER.entityToCode.get(id);
if (code == null) {
continue;
}
// skip leaf nodes after the first
if (!chain.isEmpty() && !COLLECTIONS.contains(code)) {
if (code.equals("zh")) {
code = "zhx"; // rewrite collections usage
} else {
skipping.add(
"Skipping inheritance from\t"
+ chain
+ "\t"
+ code
+ "\tfrom\t"
+ QUERY_HELPER.showNameAndCode2(items));
continue;
}
}
// check for cycle, and skip if we have one
boolean changed = chain.add(code);
if (!changed) {
log("Cycle in\t" + chain + "\tfrom\t" + QUERY_HELPER.showNameAndCode2(items));
continue main;
}
}
if (chain.size() > 1) {
chain.add(LocaleNames.MUL); // root
itemsFixed.add(chain);
}
}
// remove subsets
// eg [[smp, he, mul], [smp, he, sem, afa, mul]]
// => [[smp, he, sem, afa, mul]]
if (itemsFixed.size() > 1) {
Set<Set<String>> removals = new HashSet<>();
for (Set<String> chain1 : itemsFixed) {
for (Set<String> chain2 : itemsFixed) {
if (chain1.containsAll(chain2) && !chain2.containsAll(chain1)) {
removals.add(chain2);
}
}
}
itemsFixed.removeAll(removals);
}
return itemsFixed;
// TODO: delete this commented-out code?
// while (true) {
// String code = entityToCode.get(leaf);
// if (code != null) {
// chain.add(code);
// }
// Collection<String> parents = childToParent.get(leaf);
// if (parents.isEmpty()) {
// // clean up duplicates
// chain = new ArrayList<>(new LinkedHashSet<>(chain));
// // wikipedia has non-collections as parents. Remove those if they are not
// first.
// break;
// }
// leaf = getBest(parents);
// }
// String last = chain.get(0);
// for (int i = 1; i < chain.size(); ++i) {
// String item = chain.get(i);
// if (!COLLECTIONS.contains(item)) {
// chain.set(i, item.equals("zh") ? "zhx" : "");
// DROPPED_PARENTS_TO_CHILDREN.put(item, last);
// } else {
// last = item;
// }
// }
// chain.removeIf(x -> x.isEmpty());
// if ("zh".equals(chain.get(0))) {
// chain.add(1,"zhx");
// }
// last = chain.get(chain.size()-1);
// if (!LocaleNames.MUL.equals(last)) {
// chain.add(LocaleNames.MUL); // make sure we have root.
// }
// if (chain.size() == 2) {
// chain.add(1,LocaleNames.UND);
// }
// return chain;
}
private static void log(String string) {
System.out.println(string);
// for (Entry<String, String> e : DROPPED_PARENTS_TO_CHILDREN.entries()) {
// System.out.println(NAME.apply(e.getKey()) + "\t" + NAME.apply(e.getValue())
// );
// }
}
// TODO: This function is only called by other commented-out code above.
// private static String getBest(Collection<String> parents) {
// for (String parent : parents) {
// String code = QUERY_HELPER.entityToCode.get(parent);
// if (code == null) continue;
// Type type = Iso639Data.getType(code);
// if (type != Type.Living) {
// continue;
// }
// return parent;
// }
// // failed
// return parents.iterator().next();
// }
private static Multimap<String, String> loadQueryPairs(
Class<?> class1,
String file,
Function<String, String> keyMapper,
Function<String, String> valueMapper)
throws IOException {
System.out.println("QUERY: " + file);
ResultSet rs = queryClient.execSelectFromSparql(file, QueryClient.WIKIDATA_SPARQL_SERVER);
// the query must return exactly two variables.
List<String> resultVars = rs.getResultVars();
assertTwoVars(resultVars);
final String keyName = resultVars.get(0);
final String valueName = resultVars.get(1);
ImmutableMultimap.Builder<String, String> _keyToValues = ImmutableMultimap.builder();
for (; rs.hasNext(); ) {
final QuerySolution qs = rs.next();
String key = QueryClient.getStringOrNull(qs, keyName);
String value = QueryClient.getStringOrNull(qs, valueName);
_keyToValues.put(key, value);
}
ImmutableMultimap<String, String> result = _keyToValues.build();
showDups(file, result, keyMapper, valueMapper);
System.out.println("LOADED: " + file + " with rows " + rs.getRowNumber());
return result;
}
/**
* Assuming that the SPARQL query returns exactly 2 results, treat them as Key=Value.
*
* @param class1
* @param file name of a sparql query, such as 'wikidata-childToParent'
* @param fixValue
* @param keyMapper
* @param valueMapper
* @return
* @throws IOException
*/
private static Map<String, String> loadQueryPairsUnique(
Class<?> class1,
String file,
Function<String, String> fixValue,
Function<String, String> keyMapper,
Function<String, String> valueMapper)
throws IOException {
System.out.println("QUERY: " + file);
ResultSet rs = queryClient.execSelectFromSparql(file, QueryClient.WIKIDATA_SPARQL_SERVER);
// the query must return exactly two variables.
List<String> resultVars = rs.getResultVars();
assertTwoVars(resultVars);
final String keyName = resultVars.get(0);
final String valueName = resultVars.get(1);
Map<String, String> _keyToValue = new TreeMap<>();
Multimap<String, String> _keyToValues = TreeMultimap.create();
for (; rs.hasNext(); ) {
final QuerySolution qs = rs.next();
String key = QueryClient.getStringOrNull(qs, keyName);
String value = QueryClient.getStringOrNull(qs, valueName);
if (fixValue != null) {
value = fixValue.apply(value);
}
_keyToValues.put(key, value);
String oldValue = _keyToValue.get(key);
if (oldValue == null || oldValue.equals("kxm")) {
_keyToValue.put(key, value);
}
}
_keyToValue = ImmutableMap.copyOf(_keyToValue);
showDups(file, _keyToValues, keyMapper, valueMapper);
System.out.println("LOADED: " + file + " with rows " + rs.getRowNumber());
return _keyToValue;
}
private static void assertTwoVars(List<String> resultVars) {
if (resultVars.size() != 2) {
throw new IllegalArgumentException(
"expected 2 result vars but got " + resultVars.size() + ": " + resultVars);
}
}
private static void showDups(
String file,
Multimap<String, String> _keyToValues,
Function<String, String> keyMapper,
Function<String, String> valueMapper) {
for (Entry<String, Collection<String>> entry : _keyToValues.asMap().entrySet()) {
Collection<String> valueSet = entry.getValue();
if (valueSet.size() > 1) {
String key = entry.getKey();
key = keyMapper == null ? key : keyMapper.apply(key);
if (valueMapper != null) {
Set<String> result = new LinkedHashSet<>();
valueSet.stream().map(valueMapper).forEach(x -> result.add(x));
valueSet = result;
}
log(file + "\tMultiple values: " + key + "\t" + valueSet);
}
}
}
static Set<List<String>> getAllAncestors(String lang) {
return Containment.getAllDirected(QUERY_HELPER.childToParent, lang);
}
}