blob: 2b14d19f7a5238bd55084eb6d377315b1f971464 [file] [log] [blame]
package org.unicode.cldr.util;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeMultimap;
import com.google.common.collect.TreeMultiset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Helper class that allows logging the use of regular expressions. A class that will summarize them
* will get a NavigabSet of PatternCountInterface instances.
*
* @author ribnitz
*/
public class RegexLogger {
/** Should debugging be done? - if not, a null implementation will be used */
private static final boolean DEBUG = false;
/** Instance */
private static RegexLoggerInterface instance = null;
public static RegexLoggerInterface getInstance() {
if (instance == null) {
if (DEBUG) {
instance = new RegexLoggerImpl();
} else {
instance = new NullRegexLogger();
}
}
return instance;
}
private static class PatternStringWithBoolean implements Comparable<PatternStringWithBoolean> {
private final String pattern;
private final boolean calledFromRegexFinder;
private final int hashCode;
public PatternStringWithBoolean(String patternStr, boolean calledFromRegexFinder) {
this.pattern = patternStr.trim();
this.calledFromRegexFinder = calledFromRegexFinder;
hashCode = Objects.hash(this.pattern, this.calledFromRegexFinder);
}
@Override
public int hashCode() {
return hashCode;
}
public String getPattern() {
return pattern;
}
public boolean isCalledFromRegexFinder() {
return calledFromRegexFinder;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
PatternStringWithBoolean other = (PatternStringWithBoolean) obj;
if (calledFromRegexFinder != other.calledFromRegexFinder) {
return false;
}
if (hashCode != other.hashCode) {
return false;
}
if (other.pattern != null) {
return false;
}
if (!pattern.equals(other.pattern)) {
return false;
}
return true;
}
@Override
public int compareTo(PatternStringWithBoolean o) {
if (o == null) {
return 1;
}
if (this == o) {
return 0;
}
return pattern.compareTo(o.pattern);
}
}
/**
* Interface used for logging Regular expressions
*
* @author ribnitz
*/
public static interface RegexLoggerInterface {
/**
* Log that the given pattern was applied on the given matchStr, whether it matched, and
* what the type of the log was. Cls conains the calling class.
*
* @param pattern
* @param matchStr
* @param matched
* @param type
* @param cls
*/
void log(String pattern, String matchStr, boolean matched, LogType type, Class<?> cls);
void log(Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls);
void log(Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls);
void log(
String pattern,
String matchStr,
boolean matched,
double time,
LogType type,
Class<?> cls);
/**
* Get all the entries that matched
*
* @return
*/
NavigableSet<PatternCountInterface> getEntries();
/**
* Get the entries that occurred at least minCount times. If there are no matches, an empty
* set is returned
*
* @param minCount
* @return
*/
NavigableSet<PatternCountInterface> getEntries(final int minCount);
boolean isEnabled();
}
/**
* Three of the methods can be delegations, which reduces the actual implementation to two
* methods
*
* @author ribnitz
*/
private abstract static class AbstractRegexLogger implements RegexLoggerInterface {
@Override
public void log(
Matcher matcher, String matchStr, boolean matched, LogType type, Class<?> cls) {
log(matcher.pattern(), matchStr, matched, type, cls);
}
@Override
public void log(
Pattern pattern, String matchStr, boolean matched, LogType type, Class<?> cls) {
log(pattern.pattern(), matchStr, matched, type, cls);
}
@Override
public void log(
String pattern, String matchStr, boolean matched, LogType type, Class<?> cls) {
log(pattern, matchStr, matched, 0, type, cls);
}
/** Get all entries */
@Override
public NavigableSet<PatternCountInterface> getEntries() {
return getEntries(1);
}
@Override
public boolean isEnabled() {
return DEBUG;
}
}
/**
* Null implementation
*
* @author ribnitz
*/
private static class NullRegexLogger extends AbstractRegexLogger {
@Override
public void log(
String pattern,
String matchStr,
boolean matched,
double time,
LogType type,
Class<?> cls) {
// do nothing
}
@Override
public NavigableSet<PatternCountInterface> getEntries(int minCount) {
NavigableSet<PatternCountInterface> returned = Sets.newTreeSet(Collections.EMPTY_SET);
return returned;
}
}
/**
* Inetface used for the entries returnred by the RegexLogger
*
* @author ribnitz
*/
public static interface PatternCountInterface {
/**
* Get the pattern used
*
* @return
*/
String getPattern();
/**
* Get the number of successful matches obtained through FIND
*
* @return
*/
int getNumberOfFindMatches();
/**
* Get the number of unsuccessful matches obtained through FIND
*
* @return
*/
int getNumberOfFindFailures();
/**
* Get the number of successful matches obtained through MATCH
*
* @return
*/
int getNumberOfMatchMatches();
/**
* Get the number of unsuccessful matches obtained through FIND
*
* @return
*/
int getNumberOfMatchFailures();
/**
* Return true if this call was made from RegexFinder
*
* @return
*/
boolean isCalledFromRegexFinder();
/**
* Get a set of all call locations
*
* @return
*/
Set<String> getCallLocations();
}
/**
* GetAll uses this class to add all the entries of a multiSet to the result set, constructing
* the object to return for each pattern. Objects will only be added once.
*
* <p>This is the implementatioon that adds all items.
*
* @author ribnitz
*/
private static class AddAllEntryProcessor {
protected final int minCount;
protected final CountSets c;
protected final Set<PatternStringWithBoolean> seen = new HashSet<>();
protected final NavigableSet<PatternCountInterface> result = new TreeSet<>();
public AddAllEntryProcessor(int minCount, CountSets c) {
this.minCount = minCount;
this.c = c;
}
public NavigableSet<PatternCountInterface> getResult() {
return result;
}
public void process(
PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet) {
if (!seen.contains(item)) {
result.add(new RegexKeyWithCount(item, c));
seen.add(item);
}
}
}
/**
* Sometimes getEntries is called with a minCount; this Class filters and only adds the items
* that occur at least minCount times.
*
* @author ribnitz
*/
private static class EntryProcessor extends AddAllEntryProcessor {
public EntryProcessor(int minCount, CountSets c) {
super(minCount, c);
}
@Override
public void process(
PatternStringWithBoolean item, Multiset<PatternStringWithBoolean> countSet) {
if (countSet.count(item) >= minCount) {
super.process(item, countSet);
}
}
}
/**
* Since all the inner classes are static, this object is used to pass around the refernces to
* the different sets/the state
*
* @author ribnitz
*/
private static class CountSets {
final Multiset<PatternStringWithBoolean> matchedFindSet;
final Multiset<PatternStringWithBoolean> failedFindSet;
final Multiset<PatternStringWithBoolean> matchedMatchSet;
final Multiset<PatternStringWithBoolean> failedMatchSet;
final Multimap<PatternStringWithBoolean, String> stacktraces;
public CountSets(
Multiset<PatternStringWithBoolean> matchedFindSet,
Multiset<PatternStringWithBoolean> failedFindSet,
Multiset<PatternStringWithBoolean> matchedMatchSet,
Multiset<PatternStringWithBoolean> failedMatchSet,
Multimap<PatternStringWithBoolean, String> occurrences) {
this.failedFindSet = failedFindSet;
this.failedMatchSet = failedMatchSet;
this.matchedMatchSet = matchedMatchSet;
this.stacktraces = occurrences;
this.matchedFindSet = matchedFindSet;
}
}
private static class RegexKeyWithCount
implements PatternCountInterface, Comparable<PatternCountInterface> {
private final String pattern;
private final int findMatchCount;
private final int findFailCount;
private final int matchMatchCount;
private final int matchFailCount;
private final boolean calledFromRegexFinder;
private final Set<String> callLocations = new HashSet<>();
private final int hashCode;
public RegexKeyWithCount(PatternStringWithBoolean key, CountSets bean) {
this.pattern = key.getPattern();
this.calledFromRegexFinder = key.isCalledFromRegexFinder();
this.findMatchCount = bean.matchedFindSet.count(key);
this.findFailCount = bean.failedFindSet.count(key);
this.matchMatchCount = bean.matchedMatchSet.count(key);
this.matchFailCount = bean.failedMatchSet.count(key);
Collection<String> tmp = bean.stacktraces.get(key);
for (String cur : tmp) {
if (!callLocations.contains(cur)) {
callLocations.add(cur);
}
}
this.hashCode =
Objects.hash(
this.pattern,
this.findMatchCount,
this.findFailCount,
this.matchFailCount,
this.matchMatchCount,
this.calledFromRegexFinder,
this.callLocations);
}
@Override
public String getPattern() {
return pattern;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public int getNumberOfFindMatches() {
return findMatchCount;
}
@Override
public int getNumberOfFindFailures() {
return findFailCount;
}
@Override
public int getNumberOfMatchMatches() {
return matchMatchCount;
}
@Override
public int getNumberOfMatchFailures() {
return matchFailCount;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (hashCode != obj.hashCode()) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
RegexKeyWithCount other = (RegexKeyWithCount) obj;
if (matchFailCount != other.matchFailCount) {
return false;
}
if (matchMatchCount != other.matchMatchCount) {
return false;
}
if (findFailCount != other.findFailCount) {
return false;
}
if (findMatchCount != other.findMatchCount) {
return false;
}
if (!pattern.equals(other.pattern)) {
return false;
}
if (calledFromRegexFinder != other.calledFromRegexFinder) {
return false;
}
if (callLocations != other.callLocations) {
return false;
}
return true;
}
@Override
public int compareTo(PatternCountInterface o) {
if (o == null) {
return 1;
}
return Integer.compare(
matchFailCount + matchMatchCount + findFailCount + findMatchCount,
o.getNumberOfFindFailures()
+ o.getNumberOfFindMatches()
+ o.getNumberOfMatchFailures()
+ o.getNumberOfMatchMatches());
}
@Override
public boolean isCalledFromRegexFinder() {
return calledFromRegexFinder;
}
@Override
public Set<String> getCallLocations() {
return callLocations;
}
}
public enum LogType {
FIND,
MATCH
}
private static interface IterableTransformer<E, F> {
Iterable<F> transform(Iterable<E> input);
}
private static class StringIterableTransformer implements IterableTransformer<String, String> {
@Override
public Iterable<String> transform(Iterable<String> input) {
List<String> returned = new ArrayList<>(Iterables.size(input));
String lastClass = null;
for (String current : input) {
String transformed = current;
if (lastClass != null) {
if (lastClass.startsWith("RegexLookup")
&& !current.startsWith("org.unicode.cldr.util.RegexLookup")) {
returned.add(lastClass);
}
break;
}
if (current.startsWith("org.unicode.cldr.test.CheckCLDR")
&&
/*
* TODO: fix this function to avoid referencing lastClass when it is null.
* The condition lastClass == null here prevents compiler warning/error or possible NullPointerException,
* since lastClass is ALWAYS null here; but this is obviously not the best solution.
*/
(lastClass == null
|| !lastClass.startsWith("org.unicode.cldr.test.CheckCLDR"))) {
lastClass = current;
// leave out
continue;
}
// remove org.unicode.cldr
if (current.startsWith("org.unicode.cldr.util.")) {
transformed = current.substring("org.unicode.cldr.util.".length());
}
// only the last RegexLookup will be added
if (!transformed.startsWith("RegexLookup")) {
returned.add(transformed);
}
lastClass = transformed;
}
return returned;
}
}
private static class ClassnameOnlyStringTransformer
implements IterableTransformer<String, String> {
@Override
public Iterable<String> transform(Iterable<String> input) {
List<String> returned = new ArrayList<>(Iterables.size(input));
String lastClass = null;
for (String current : input) {
if (current.lastIndexOf(".") > 0) {
current = current.substring(current.lastIndexOf("."));
}
if (lastClass != null) {
if (lastClass.startsWith("RegexLookup") && !current.startsWith("RegexLookup")) {
returned.add(lastClass);
}
if (lastClass.startsWith("VettingViewer")) {
break;
}
if (current.startsWith("CheckCLDR") && !lastClass.startsWith("CheckCLDR")) {
lastClass = current;
// leave out
continue;
}
}
// only the last RegexLookup will be added
if (!current.startsWith("RegexLookup")) {
returned.add(current);
}
lastClass = current;
}
return returned;
}
}
/**
* This is the class doing the bulk of the work.
*
* @author ribnitz
*/
private static class RegexLoggerImpl extends AbstractRegexLogger {
/*
* Each has more than 1m hits, together they account for about 14m (of the 26m total)
*/
private static final Set<String> exactMatchSet =
new HashSet<>(
Arrays.asList(
new String[] {
"^//ldml.*",
"^//ldml/dates.*",
"^//ldml/units.*",
"^//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]",
"^//ldml/characters.*",
"^//ldml/listPatterns/listPattern.*",
"^//ldml/units/unitLength[@type=\"(long|short|narrow)\"].*",
}));
private static final Set<String> patternSet =
new HashSet<>(
Arrays.asList(
new String[] {
"^//ldml/dates/fields",
"^//ldml/dates/calendars/calendar",
"/(availableFormats",
}));
private final Multiset<PatternStringWithBoolean> matchedFindSet = TreeMultiset.create();
private final Multiset<PatternStringWithBoolean> failedFindSet = TreeMultiset.create();
private final Multiset<PatternStringWithBoolean> matchedMatchSet = TreeMultiset.create();
private final Multiset<PatternStringWithBoolean> failedMatchSet = TreeMultiset.create();
private final Multimap<PatternStringWithBoolean, String> occurrences =
TreeMultimap.create();
private final IterableTransformer<String, String> transformer =
new StringIterableTransformer();
@Override
public void log(
String pattern,
String matchStr,
boolean matched,
double time,
LogType type,
Class<?> cls) {
boolean isRegexFinder = findClassName("org.unicode.cldr.util.RegexLookup", 10);
PatternStringWithBoolean key = new PatternStringWithBoolean(pattern, isRegexFinder);
Collection<PatternStringWithBoolean> collectionToAdd =
determineCollectionToUse(matched, type);
if (collectionToAdd != null) {
collectionToAdd.add(key);
}
if (shouldLogPattern(pattern, isRegexFinder)) {
addElementToList(key);
}
}
private Collection<PatternStringWithBoolean> determineCollectionToUse(
boolean matched, LogType type) {
Collection<PatternStringWithBoolean> collectionToAdd = null;
switch (type) {
case FIND:
if (matched) {
collectionToAdd = matchedFindSet;
} else {
collectionToAdd = failedFindSet;
}
break;
case MATCH:
if (matched) {
collectionToAdd = matchedMatchSet;
} else {
collectionToAdd = failedMatchSet;
}
break;
}
return collectionToAdd;
}
private boolean shouldLogPattern(String pattern, boolean isRegexFinder) {
if (!isRegexFinder) {
return true;
} else {
if (exactMatchSet.contains(pattern)) {
return true;
} else {
for (String cur : patternSet) {
if (pattern.startsWith(cur)) {
return true;
}
}
}
}
return false;
}
private boolean findClassName(String className, int depth) {
StackTraceElement[] st = Thread.currentThread().getStackTrace();
int startPos = (st.length > 2) ? 2 : 0;
int endPos = (startPos + depth > st.length) ? st.length : startPos + depth;
for (int i = startPos; i < endPos; i++) {
StackTraceElement cur = st[i];
String curClass = cur.getClassName();
if (curClass.startsWith(className)) {
return true;
}
}
return false;
}
private static final Joiner JOINER = Joiner.on(";");
private void addElementToList(PatternStringWithBoolean key) {
List<String> stList = processStackTrace("org.unicode.cldr.util.RegexLookup", 0);
if (!stList.isEmpty()) {
occurrences.put(key, JOINER.join(transformer.transform(stList)));
}
}
private List<String> processStackTrace(String classNameToStartAt, int depth) {
StackTraceElement[] st = Thread.currentThread().getStackTrace();
if (depth == 0) {
depth = st.length;
}
int startPos;
if (depth < 0) {
startPos = depth + st.length;
depth = Math.abs(depth);
} else {
startPos = (st.length > 2) ? 2 : 0;
}
int pos;
boolean found = false;
for (pos = startPos; pos < st.length; pos++) {
if (st[pos].getClassName().startsWith(classNameToStartAt)) {
found = true;
break;
}
}
if (!found) {
return Collections.emptyList();
}
int endPos = (pos + depth > st.length) ? st.length : startPos + depth;
List<String> ret = new ArrayList<>(depth + 2);
for (int i = pos; i < endPos; i++) {
StackTraceElement cur = st[i];
String curClass = cur.getClassName();
ret.add(curClass + ":" + cur.getLineNumber());
}
return ret;
}
@Override
public NavigableSet<PatternCountInterface> getEntries(final int minCount) {
CountSets c =
new CountSets(
matchedFindSet,
failedFindSet,
matchedMatchSet,
failedMatchSet,
occurrences);
final AddAllEntryProcessor processor =
(minCount == 1)
? new AddAllEntryProcessor(minCount, c)
: new EntryProcessor(minCount, c);
for (PatternStringWithBoolean item : matchedFindSet) {
processor.process(item, matchedFindSet);
}
for (PatternStringWithBoolean item : failedFindSet) {
processor.process(item, failedFindSet);
}
for (PatternStringWithBoolean item : matchedMatchSet) {
processor.process(item, matchedMatchSet);
}
for (PatternStringWithBoolean item : failedMatchSet) {
processor.process(item, failedMatchSet);
}
return Sets.unmodifiableNavigableSet(processor.getResult());
}
}
}