blob: b898fe5586b7f91e5b9539776b154c63269fb10e [file] [log] [blame]
package org.unicode.cldr.tool;
import java.io.BufferedReader;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.tool.Option.Options;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.ChainedMap;
import org.unicode.cldr.util.ChainedMap.M5;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.Counter;
import org.unicode.cldr.util.Organization;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.VoteResolver.Level;
import org.unicode.cldr.util.XMLFileReader;
import org.unicode.cldr.util.XPathParts;
import com.google.common.collect.ImmutableSet;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.SimpleDateFormat;
public class ReadSql {
static final boolean DEBUG = false;
static UserMap umap = new UserMap(CLDRPaths.DATA_DIRECTORY + "cldr/users.xml");
enum MyOptions {
organization(".*", "google", "organization"), verbose("", "", "verbose"),
;
// BOILERPLATE TO COPY
final Option option;
private MyOptions(String argumentPattern, String defaultArgument, String helpText) {
option = new Option(this, argumentPattern, defaultArgument, helpText);
}
static Options options = new Options();
static {
for (MyOptions option : MyOptions.values()) {
options.add(option, option.option);
}
}
private static Set<String> parse(String[] args, boolean showArguments) {
return options.parse(MyOptions.values()[0], args, true);
}
}
static Organization organization;
static boolean verbose;
public static void main(String[] args) throws IOException {
MyOptions.parse(args, true);
organization = Organization.valueOf(MyOptions.organization.option.getValue());
verbose = MyOptions.verbose.option.doesOccur();
long max = Long.MAX_VALUE;
long maxItems = 10;
boolean inCreate = false;
try (BufferedReader r = FileUtilities.openFile(CLDRPaths.DATA_DIRECTORY, "cldr/cldr-DUMP-20160817.sql")) {
while (--max > 0) {
String line = r.readLine();
if (line == null) {
break;
}
if (line.startsWith("INSERT")) {
//System.out.println(trunc(line, 100));
Data.parseLine(line, maxItems);
} else if (line.startsWith("CREATE")) {
inCreate = true;
if (verbose) System.out.println(line);
} else if (inCreate) {
if (verbose) System.out.println(line);
if (line.startsWith(") ENGINE")) {
inCreate = false;
}
} else if (DEBUG) {
if (verbose) System.out.println(line);
}
}
}
Counter<String> keys = Data.getKeys();
for (R2<Long, String> e : keys.getEntrySetSortedByCount(false, null)) {
if (e.get0() > 0) {
System.out.println(CldrUtility.toString(e));
}
}
Data.show("_30");
}
private static String trunc(String line, int len) {
return line.length() <= len ? line : line.substring(0, len) + "…";
}
static final Pattern INSERT = Pattern.compile("INSERT\\s+INTO\\s+`([^`]+)`\\s+VALUES\\s*");
static final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); // 2014-05-01 17:19:57
static class Items {
final Date date;
final User owner;
final List<String> raw;
public static Items of(String key, List<String> raw) {
try {
return new Items(key, raw);
} catch (Exception e) {
System.out.println("No user for: " + key + ": " + raw);
return null;
}
}
private Items(String key, List<String> raw) {
Date temp;
try {
temp = df.parse(raw.get(raw.size() - 1));
} catch (ParseException e) {
temp = null;
}
this.date = temp;
this.raw = raw;
if (temp == null) {
owner = null;
} else {
String ownerField;
switch (key) {
case "FEEDBACK":
ownerField = raw.get(1);
break;
default:
ownerField = raw.get(2);
break;
}
owner = umap.get(ownerField);
}
}
@Override
public String toString() {
return (date == null ? "???" : df.format(date)) + ";\t" + owner + ";\t" + CldrUtility.toString(raw);
}
}
static class DateMap {
M5<Integer, Integer, Integer, Integer, Boolean> yearMonthDays = ChainedMap.of(new TreeMap<>(), new TreeMap(), new TreeMap(), new TreeMap(),
Boolean.class);
int current = 0;
void add(Date d) {
yearMonthDays.put(d.getYear() + 1900, d.getMonth() + 1, d.getDate(), current++, Boolean.TRUE);
}
static DateFormat monthFormat = new SimpleDateFormat("MMM");
@Override
public String toString() {
StringBuilder result = new StringBuilder();
int years = 0;
for (Entry<Integer, Map<Integer, Map<Integer, Map<Integer, Boolean>>>> yearMonthDay : yearMonthDays) {
if (years++ > 0) {
result.append("; ");
}
final int year = yearMonthDay.getKey();
result.append(year);
result.append(": ");
int months = 0;
for (Entry<Integer, Map<Integer, Map<Integer, Boolean>>> monthDay : yearMonthDay.getValue().entrySet()) {
if (months++ > 0) {
result.append("; ");
}
final int month = monthDay.getKey();
result.append(monthFormat.format(new Date(year - 1900, month - 1, 1)));
result.append(": ");
int days = 0;
for (Entry<Integer, Map<Integer, Boolean>> dayCount : monthDay.getValue().entrySet()) {
if (days++ > 0) {
result.append(", ");
}
final int day = dayCount.getKey();
result.append(day);
final int count = dayCount.getValue().size();
if (count > 1) {
result.append('(');
result.append(count);
result.append(")");
}
}
}
}
return result.toString();
}
}
static class Data {
final String key;
final List<Items> dataItems = new ArrayList<Items>();
static Map<String, Data> map = new TreeMap<>();
public Data(String key) {
this.key = key;
}
public static Counter<String> getKeys() {
Counter<String> items = new Counter();
for (Entry<String, Data> e : map.entrySet()) {
items.add(e.getKey(), e.getValue().dataItems.size());
}
return items;
}
public static void show(String regex) {
Matcher m = Pattern.compile(regex).matcher("");
for (Entry<String, Data> e : map.entrySet()) {
Data data = e.getValue();
if (!m.reset(data.key).find()) {
continue;
}
Counter<User> counter = new Counter<>();
Map<User, DateMap> dateMaps = new HashMap<>();
for (Items item : data.dataItems) {
if (item.owner.org == organization) {
counter.add(item.owner, 1);
DateMap dateMap = dateMaps.get(item.owner);
if (dateMap == null) {
dateMaps.put(item.owner, dateMap = new DateMap());
}
dateMap.add(item.date);
}
}
for (R2<Long, User> item : counter.getEntrySetSortedByCount(false, null)) {
final Long count = item.get0();
final User user = item.get1();
System.out.println("key: " + data.key + "; count: " + count + "; " + user + "\t" + dateMaps.get(user));
}
}
}
@Override
public String toString() {
return key + "=" + CldrUtility.toString(dataItems);
}
public Items add(ArrayList<String> items) {
final Items items2 = Items.of(key, items);
if (items2 != null && items2.owner != null) {
dataItems.add(items2);
return items2;
}
return null;
}
static void parseLine(String line, long maxItems) {
Matcher m = INSERT.matcher(line);
String key;
int i;
if (m.lookingAt()) {
key = m.group(1);
i = m.end();
} else {
throw new IllegalArgumentException();
}
if (key.equals("FEEDBACK") || key.equals("sf_fora")) { // cf. private FeedBack.TABLE_FEEDBACK and public SurveyForum.DB_FORA
return; // old format
}
boolean inQuote = false;
boolean skipComma = true;
StringBuilder buffer = new StringBuilder();
ArrayList<String> items = new ArrayList<>();
Data current = map.get(key);
if (current == null) {
map.put(key, current = new Data(key));
}
ArrayList<Data> rows = new ArrayList<>();
while (i < line.length()) {
int cp = line.codePointAt(i);
i += Character.charCount(cp);
if (inQuote) {
switch (cp) {
case '\'':
inQuote = false;
break;
case '\\':
cp = line.codePointAt(i);
i += Character.charCount(cp);
// fall through
default:
buffer.appendCodePoint(cp);
break;
}
} else {
switch (cp) {
case '\'':
inQuote = true;
break;
case ',':
if (!skipComma) {
items.add(buffer.toString());
buffer.setLength(0);
}
break;
case ' ':
case '\t':
case '\n':
case '(':
skipComma = false;
break;
case ')':
skipComma = true;
items.add(buffer.toString());
buffer.setLength(0);
Items lastItem = current.add(items);
if (--maxItems > 0 && lastItem != null) {
if (verbose) System.out.println(key + "\t" + lastItem);
}
items = new ArrayList<>();
break;
case '\\':
cp = line.codePointAt(i);
i += Character.charCount(cp);
// fall through
default:
buffer.appendCodePoint(cp);
break;
}
}
}
}
}
static class User {
final int id;
final String email;
final Level level;
final String name;
final Organization org;
final Set<String> locales;
public User(XPathParts parts) {
this.id = Integer.parseInt(parts.getAttributeValue(-1, "id"));
this.email = parts.getAttributeValue(-1, "email");
this.level = Level.valueOf(parts.getAttributeValue(-1, "level"));
this.name = parts.getAttributeValue(-1, "name");
this.org = Organization.fromString(parts.getAttributeValue(-1, "org"));
this.locales = ImmutableSet.copyOf(Arrays.asList(parts.getAttributeValue(-1, "locales").split("[, ]+")));
}
@Override
public String toString() {
return "id: " + id
+ "; email: " + email
+ "; name: " + name
+ "; level: " + level
+ "; org: " + org
+ "; locales: " + locales;
}
}
static class UserMap {
Map<Integer, User> map = new HashMap<>();
UserMap(String filename) {
List<Pair<String, String>> data = new ArrayList<>();
XMLFileReader.loadPathValues(filename, data, false);
// <user id="1271" email="..." level="tc" name="..." org="adobe" locales="pt"/>
for (Pair<String, String> e : data) {
String path = e.getFirst();
XPathParts parts = XPathParts.getInstance(path);
User user = new User(parts);
map.put(user.id, user);
}
}
public User get(String string) {
return map.get(Integer.valueOf(string));
}
}
}