blob: 5d16ddbf6c0509b409d5295f15cfef1a3e2abf75 [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.common.jimfs;
import static com.google.common.base.Preconditions.checkNotNull;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.PatternSyntaxException;
/**
* Translates globs to regex patterns.
*
* @author Colin Decker
*/
final class GlobToRegex {
/**
* Converts the given glob to a regular expression pattern. The given separators determine what
* characters the resulting expression breaks on for glob expressions such as * which should not
* cross directory boundaries.
*
* <p>Basic conversions (assuming / as only separator):
*
* <pre>{@code
* ? = [^/]
* * = [^/]*
* ** = .*
* [a-z] = [[^/]&&[a-z]]
* [!a-z] = [[^/]&&[^a-z]]
* {a,b,c} = (a|b|c)
* }</pre>
*/
public static String toRegex(String glob, String separators) {
return new GlobToRegex(glob, separators).convert();
}
private static final InternalCharMatcher REGEX_RESERVED =
InternalCharMatcher.anyOf("^$.?+*\\[]{}()");
private final String glob;
private final String separators;
private final InternalCharMatcher separatorMatcher;
private final StringBuilder builder = new StringBuilder();
private final Deque<State> states = new ArrayDeque<>();
private int index;
private GlobToRegex(String glob, String separators) {
this.glob = checkNotNull(glob);
this.separators = separators;
this.separatorMatcher = InternalCharMatcher.anyOf(separators);
}
/**
* Converts the glob to a regex one character at a time. A state stack (states) is maintained,
* with the state at the top of the stack being the current state at any given time. The current
* state is always used to process the next character. When a state processes a character, it may
* pop the current state or push a new state as the current state. The resulting regex is written
* to {@code builder}.
*/
private String convert() {
pushState(NORMAL);
for (index = 0; index < glob.length(); index++) {
currentState().process(this, glob.charAt(index));
}
currentState().finish(this);
return builder.toString();
}
/**
* Enters the given state. The current state becomes the previous state.
*/
private void pushState(State state) {
states.push(state);
}
/**
* Returns to the previous state.
*/
private void popState() {
states.pop();
}
/**
* Returns the current state.
*/
private State currentState() {
return states.peek();
}
/**
* Throws a {@link PatternSyntaxException}.
*/
private PatternSyntaxException syntaxError(String desc) {
throw new PatternSyntaxException(desc, glob, index);
}
/**
* Appends the given character as-is to the regex.
*/
private void appendExact(char c) {
builder.append(c);
}
/**
* Appends the regex form of the given normal character or separator from the glob.
*/
private void append(char c) {
if (separatorMatcher.matches(c)) {
appendSeparator();
} else {
appendNormal(c);
}
}
/**
* Appends the regex form of the given normal character from the glob.
*/
private void appendNormal(char c) {
if (REGEX_RESERVED.matches(c)) {
builder.append('\\');
}
builder.append(c);
}
/**
* Appends the regex form matching the separators for the path type.
*/
private void appendSeparator() {
if (separators.length() == 1) {
appendNormal(separators.charAt(0));
} else {
builder.append('[');
for (int i = 0; i < separators.length(); i++) {
appendInBracket(separators.charAt(i));
}
builder.append("]");
}
}
/**
* Appends the regex form that matches anything except the separators for the path type.
*/
private void appendNonSeparator() {
builder.append("[^");
for (int i = 0; i < separators.length(); i++) {
appendInBracket(separators.charAt(i));
}
builder.append(']');
}
/**
* Appends the regex form of the glob ? character.
*/
private void appendQuestionMark() {
appendNonSeparator();
}
/**
* Appends the regex form of the glob * character.
*/
private void appendStar() {
appendNonSeparator();
builder.append('*');
}
/**
* Appends the regex form of the glob ** pattern.
*/
private void appendStarStar() {
builder.append(".*");
}
/**
* Appends the regex form of the start of a glob [] section.
*/
private void appendBracketStart() {
builder.append('[');
appendNonSeparator();
builder.append("&&[");
}
/**
* Appends the regex form of the end of a glob [] section.
*/
private void appendBracketEnd() {
builder.append("]]");
}
/**
* Appends the regex form of the given character within a glob [] section.
*/
private void appendInBracket(char c) {
// escape \ in regex character class
if (c == '\\') {
builder.append('\\');
}
builder.append(c);
}
/**
* Appends the regex form of the start of a glob {} section.
*/
private void appendCurlyBraceStart() {
builder.append('(');
}
/**
* Appends the regex form of the separator (,) within a glob {} section.
*/
private void appendSubpatternSeparator() {
builder.append('|');
}
/**
* Appends the regex form of the end of a glob {} section.
*/
private void appendCurlyBraceEnd() {
builder.append(')');
}
/**
* Converter state.
*/
private abstract static class State {
/**
* Process the next character with the current state, transitioning the converter to a new
* state if necessary.
*/
abstract void process(GlobToRegex converter, char c);
/**
* Called after all characters have been read.
*/
void finish(GlobToRegex converter) {}
}
/**
* Normal state.
*/
private static final State NORMAL =
new State() {
@Override
void process(GlobToRegex converter, char c) {
switch (c) {
case '?':
converter.appendQuestionMark();
return;
case '[':
converter.appendBracketStart();
converter.pushState(BRACKET_FIRST_CHAR);
return;
case '{':
converter.appendCurlyBraceStart();
converter.pushState(CURLY_BRACE);
return;
case '*':
converter.pushState(STAR);
return;
case '\\':
converter.pushState(ESCAPE);
return;
default:
converter.append(c);
}
}
@Override
public String toString() {
return "NORMAL";
}
};
/**
* State following the reading of a single \.
*/
private static final State ESCAPE =
new State() {
@Override
void process(GlobToRegex converter, char c) {
converter.append(c);
converter.popState();
}
@Override
void finish(GlobToRegex converter) {
throw converter.syntaxError("Hanging escape (\\) at end of pattern");
}
@Override
public String toString() {
return "ESCAPE";
}
};
/**
* State following the reading of a single *.
*/
private static final State STAR =
new State() {
@Override
void process(GlobToRegex converter, char c) {
if (c == '*') {
converter.appendStarStar();
converter.popState();
} else {
converter.appendStar();
converter.popState();
converter.currentState().process(converter, c);
}
}
@Override
void finish(GlobToRegex converter) {
converter.appendStar();
}
@Override
public String toString() {
return "STAR";
}
};
/**
* State immediately following the reading of a [.
*/
private static final State BRACKET_FIRST_CHAR =
new State() {
@Override
void process(GlobToRegex converter, char c) {
if (c == ']') {
// A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example)
// but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it
// produces:
// java.util.regex.PatternSyntaxException: Unclosed character class near index 13
// ^[[^/]&&[]]\]$
// ^
// The error here is slightly different, but trying to make this work would require some
// kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
// someone wants to include a ']' inside a character class, they should escape it.
throw converter.syntaxError("Empty []");
}
if (c == '!') {
converter.appendExact('^');
} else if (c == '-') {
converter.appendExact(c);
} else {
converter.appendInBracket(c);
}
converter.popState();
converter.pushState(BRACKET);
}
@Override
void finish(GlobToRegex converter) {
throw converter.syntaxError("Unclosed [");
}
@Override
public String toString() {
return "BRACKET_FIRST_CHAR";
}
};
/**
* State inside [brackets], but not at the first character inside the brackets.
*/
private static final State BRACKET =
new State() {
@Override
void process(GlobToRegex converter, char c) {
if (c == ']') {
converter.appendBracketEnd();
converter.popState();
} else {
converter.appendInBracket(c);
}
}
@Override
void finish(GlobToRegex converter) {
throw converter.syntaxError("Unclosed [");
}
@Override
public String toString() {
return "BRACKET";
}
};
/**
* State inside {curly braces}.
*/
private static final State CURLY_BRACE =
new State() {
@Override
void process(GlobToRegex converter, char c) {
switch (c) {
case '?':
converter.appendQuestionMark();
break;
case '[':
converter.appendBracketStart();
converter.pushState(BRACKET_FIRST_CHAR);
break;
case '{':
throw converter.syntaxError("{ not allowed in subpattern group");
case '*':
converter.pushState(STAR);
break;
case '\\':
converter.pushState(ESCAPE);
break;
case '}':
converter.appendCurlyBraceEnd();
converter.popState();
break;
case ',':
converter.appendSubpatternSeparator();
break;
default:
converter.append(c);
}
}
@Override
void finish(GlobToRegex converter) {
throw converter.syntaxError("Unclosed {");
}
@Override
public String toString() {
return "CURLY_BRACE";
}
};
}