blob: 1d0df9ae48047cb4d658c9e80e5b4f66573eaa49 [file] [log] [blame]
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.loganalysis.util;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* The RegexTrie is a trie where each _stored_ segment of the key is a regex {@link Pattern}. Thus,
* the full _stored_ key is a List<Pattern> rather than a String as in a standard trie. Note that
* the {@link #get(Object key)} method requires a List<String>, which will be matched against the
* {@link Pattern}s, rather than checked for equality as in a standard trie. It will likely perform
* poorly for large datasets.
* <p />
* One can also use a {@code null} entry in the {@code Pattern} sequence to serve as a wildcard. If
* a {@code null} is encountered, all subsequent entries in the sequence will be ignored.
* When the retrieval code encounters a {@code null} {@code Pattern}, it will first wait to see if a
* more-specific entry matches the sequence. If one does, that more-specific entry will proceed,
* even if it subsequently fails to match.
* <p />
* If no more-specific entry matches, the wildcard match will add all remaining {@code String}s
* to the list of captures (if enabled) and return the value associated with the wildcard.
* <p />
* A short sample of the wildcard functionality:
* <pre>
* List<List<String>> captures = new LinkedList<List<String>>();
* RegexTrie<Integer> trie = new RegexTrie<Integer>();
* trie.put(2, "a", null);
* trie.put(4, "a", "b");
* trie.retrieve(captures, "a", "c", "e");
* // returns 2. captures is now [[], ["c"], ["e"]]
* trie.retrieve(captures, "a", "b");
* // returns 4. captures is now [[], []]
* trie.retrieve(captures, "a", "b", "c");
* // returns null. captures is now [[], []]
* </pre>
*/
//TODO: Use libTF once this is copied over.
public class RegexTrie<V> {
private V mValue = null;
private Map<CompPattern, RegexTrie<V>> mChildren =
new LinkedHashMap<CompPattern, RegexTrie<V>>();
/**
* Patterns aren't comparable by default, which prevents you from retrieving them from a
* HashTable. This is a simple stub class that makes a Pattern with a working
* {@link CompPattern#equals()} method.
*/
static class CompPattern {
protected final Pattern mPattern;
CompPattern(Pattern pattern) {
if (pattern == null) {
throw new NullPointerException();
}
mPattern = pattern;
}
@Override
public boolean equals(Object other) {
Pattern otherPat;
if (other instanceof Pattern) {
otherPat = (Pattern) other;
} else if (other instanceof CompPattern) {
CompPattern otherCPat = (CompPattern) other;
otherPat = otherCPat.mPattern;
} else {
return false;
}
return mPattern.toString().equals(otherPat.toString());
}
@Override
public int hashCode() {
return mPattern.toString().hashCode();
}
@Override
public String toString() {
return String.format("CP(%s)", mPattern.toString());
}
public Matcher matcher(String string) {
return mPattern.matcher(string);
}
}
public void clear() {
mValue = null;
for (RegexTrie<V> child : mChildren.values()) {
child.clear();
}
mChildren.clear();
}
boolean containsKey(String... strings) {
return retrieve(strings) != null;
}
V recursivePut(V value, List<CompPattern> patterns) {
// Cases:
// 1) patterns is empty -- set our value
// 2) patterns is non-empty -- recurse downward, creating a child if necessary
if (patterns.isEmpty()) {
V oldValue = mValue;
mValue = value;
return oldValue;
} else {
CompPattern curKey = patterns.get(0);
List<CompPattern> nextKeys = patterns.subList(1, patterns.size());
// Create a new child to handle
RegexTrie<V> nextChild = mChildren.get(curKey);
if (nextChild == null) {
nextChild = new RegexTrie<V>();
mChildren.put(curKey, nextChild);
}
return nextChild.recursivePut(value, nextKeys);
}
}
/**
* A helper method to consolidate validation before adding an entry to the trie.
*
* @param value The value to set
* @param patterns The sequence of {@link CompPattern}s that must be sequentially matched to
* retrieve the associated {@code value}
*/
private V validateAndPut(V value, List<CompPattern> pList) {
if (pList.size() == 0) {
throw new IllegalArgumentException("pattern list must be non-empty.");
}
return recursivePut(value, pList);
}
/**
* Add an entry to the trie.
*
* @param value The value to set
* @param patterns The sequence of {@link Pattern}s that must be sequentially matched to
* retrieve the associated {@code value}
*/
public V put(V value, Pattern... patterns) {
List<CompPattern> pList = new ArrayList<CompPattern>(patterns.length);
for (Pattern pat : patterns) {
if (pat == null) {
pList.add(null);
break;
}
pList.add(new CompPattern(pat));
}
return validateAndPut(value, pList);
}
/**
* This helper method takes a list of regular expressions as {@link String}s and compiles them
* on-the-fly before adding the subsequent {@link Pattern}s to the trie
*
* @param value The value to set
* @param patterns The sequence of regular expressions (as {@link String}s) that must be
* sequentially matched to retrieve the associated {@code value}. Each String will be
* compiled as a {@link Pattern} before invoking {@link #put(V, Pattern...)}.
*/
public V put(V value, String... regexen) {
List<CompPattern> pList = new ArrayList<CompPattern>(regexen.length);
for (String regex : regexen) {
if (regex == null) {
pList.add(null);
break;
}
Pattern pat = Pattern.compile(regex);
pList.add(new CompPattern(pat));
}
return validateAndPut(value, pList);
}
V recursiveRetrieve(List<List<String>> captures, List<String> strings) {
// Cases:
// 1) strings is empty -- return our value
// 2) strings is non-empty -- find the first child that matches, recurse downward
if (strings.isEmpty()) {
return mValue;
} else {
boolean wildcardMatch = false;
V wildcardValue = null;
String curKey = strings.get(0);
List<String> nextKeys = strings.subList(1, strings.size());
for (Map.Entry<CompPattern, RegexTrie<V>> child : mChildren.entrySet()) {
CompPattern pattern = child.getKey();
if (pattern == null) {
wildcardMatch = true;
wildcardValue = child.getValue().getValue();
continue;
}
Matcher matcher = pattern.matcher(curKey);
if (matcher.matches()) {
if (captures != null) {
List<String> curCaptures = new ArrayList<String>(matcher.groupCount());
for (int i = 0; i < matcher.groupCount(); i++) {
// i+1 since group 0 is the entire matched string
curCaptures.add(matcher.group(i+1));
}
captures.add(curCaptures);
}
return child.getValue().recursiveRetrieve(captures, nextKeys);
}
}
if (wildcardMatch) {
// Stick the rest of the query string into the captures list and return
if (captures != null) {
for (String str : strings) {
captures.add(Arrays.asList(str));
}
}
return wildcardValue;
}
// no match
return null;
}
}
/**
* Fetch a value from the trie, by matching the provided sequence of {@link String}s to a
* sequence of {@link Pattern}s stored in the trie.
*
* @param strings A sequence of {@link String}s to match
* @return The associated value, or {@code null} if no value was found
*/
public V retrieve(String... strings) {
return retrieve(null, strings);
}
/**
* Fetch a value from the trie, by matching the provided sequence of {@link String}s to a
* sequence of {@link Pattern}s stored in the trie. This version of the method also returns
* a {@link List} of capture groups for each {@link Pattern} that was matched.
* <p />
* Each entry in the outer List corresponds to one level of {@code Pattern} in the trie.
* For each level, the list of capture groups will be stored. If there were no captures
* for a particular level, an empty list will be stored.
* <p />
* Note that {@code captures} will be {@link List#clear()}ed before the retrieval begins.
* Also, if the retrieval fails after a partial sequence of matches, {@code captures} will
* still reflect the capture groups from the partial match.
*
* @param captures A {@code List<List<String>>} through which capture groups will be returned.
* @param strings A sequence of {@link String}s to match
* @return The associated value, or {@code null} if no value was found
*/
public V retrieve(List<List<String>> captures, String... strings) {
if (strings.length == 0) {
throw new IllegalArgumentException("string list must be non-empty");
}
List<String> sList = Arrays.asList(strings);
if (captures != null) {
captures.clear();
}
return recursiveRetrieve(captures, sList);
}
private V getValue() {
return mValue;
}
@Override
public String toString() {
return String.format("{V: %s, C: %s}", mValue, mChildren);
}
}