blob: 7605f5629893ca182e0bd316dd77f374f5bbfa87 [file] [log] [blame]
package org.unicode.cldr.tool;
import java.util.Set;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.Counter;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
public class GetLanguageData {
SupplementalDataInfo sdata = SupplementalDataInfo
.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY);
Factory cldrFactory = Factory
.make(CLDRPaths.MAIN_DIRECTORY, ".*");
CLDRFile english = cldrFactory.make("en", true);
Set<String> euCountries = sdata.getContained("EU");
Counter<String> languageToGdp = new Counter<String>();
Counter<String> languageToPop = new Counter<String>();
public static void main(String[] args) {
new GetLanguageData().run();
}
private void run() {
findSuspectData();
System.out.println("Code\tLang\tLpop\tApprox. Gdp");
for (String language : sdata.getLanguages()) {
final long pop = languageToPop.getCount(language);
System.out.print(language + "\t" + english.getName(language));
if (pop > 0) {
Pair<OfficialStatus, String> status = isOfficialLanguageOfEUCountry(language);
System.out.print("\t" + pop //
+ "\t" + languageToGdp.getCount(language) //
+ "\t" + (status.getFirst().isOfficial() ? status.getFirst() : "") //
+ "\t" + status.getSecond() //
);
}
System.out.println();
}
}
private void findSuspectData() {
Set<String> territories = sdata.getTerritoriesWithPopulationData();
for (String territory : territories) {
double scale = 1.0;
final PopulationData populationDataForTerritory = sdata
.getPopulationDataForTerritory(territory);
final double gdp = populationDataForTerritory.getGdp();
double territoryPop = populationDataForTerritory.getPopulation();
double langPop = 0;
double officialLangPop = 0;
Set<String> languages = sdata.getLanguagesForTerritoryWithPopulationData(territory);
for (String language : languages) {
if (language.equals("tl")) continue;
PopulationData pop2 = sdata.getLanguageAndTerritoryPopulationData(language, territory);
langPop += pop2.getPopulation();
if (pop2.getOfficialStatus().isOfficial()) {
officialLangPop += pop2.getPopulation();
}
}
final double missing = 0.75 * territoryPop - langPop;
if (missing > 0) {
System.out.println(territory //
+ "\t" + english.getName("territory", territory) //
+ "\t" + territoryPop //
+ "\t" + langPop //
+ "\t" + gdp //
);
scale = 1 + missing / officialLangPop;
// scale up the official so that
// official + non-official = 70% of total
langPop = territoryPop * 0.75;
System.out.println("\tScaling " + territory + "\t" + scale * 100 + "%");
}
long langUnknown = (long) territoryPop;
for (String language : languages) {
if (language.equals("tl")) continue;
PopulationData pop2 = sdata.getLanguageAndTerritoryPopulationData(language, territory);
double langPop2 = pop2.getPopulation();
if (pop2.getOfficialStatus().isOfficial()) {
langPop2 *= scale;
}
languageToGdp.add(language, (long) (gdp * langPop2 / territoryPop));
languageToPop.add(language, (long) (langPop2));
langUnknown -= langPop2;
}
if (langUnknown > 0) {
languageToGdp.add("und", (long) (gdp * langUnknown / territoryPop));
languageToPop.add("und", (long) (langUnknown));
}
}
}
private Pair<OfficialStatus, String> isOfficialLanguageOfEUCountry(String language) {
OfficialStatus bestStatus = OfficialStatus.unknown;
String eu = "";
double bestEuPop = 0;
Set<String> territories = sdata.getTerritoriesForPopulationData(language);
for (String territory : territories) {
PopulationData pop = sdata.getLanguageAndTerritoryPopulationData(language, territory);
OfficialStatus status = pop.getOfficialStatus();
if (bestStatus.compareTo(status) < 0) {
bestStatus = status;
}
if (status.isMajor() && euCountries.contains(territory)) {
if (pop.getLiteratePopulation() > bestEuPop) {
bestEuPop = pop.getLiteratePopulation();
eu = territory;
}
}
}
return Pair.of(bestStatus, eu);
}
}