| /* |
| * Copyright 2013 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.common.jimfs; |
| |
| import static com.google.common.base.Preconditions.checkNotNull; |
| |
| import java.util.ArrayDeque; |
| import java.util.Deque; |
| import java.util.regex.PatternSyntaxException; |
| |
| /** |
| * Translates globs to regex patterns. |
| * |
| * @author Colin Decker |
| */ |
| final class GlobToRegex { |
| |
| /** |
| * Converts the given glob to a regular expression pattern. The given separators determine what |
| * characters the resulting expression breaks on for glob expressions such as * which should not |
| * cross directory boundaries. |
| * |
| * <p>Basic conversions (assuming / as only separator): |
| * |
| * <pre>{@code |
| * ? = [^/] |
| * * = [^/]* |
| * ** = .* |
| * [a-z] = [[^/]&&[a-z]] |
| * [!a-z] = [[^/]&&[^a-z]] |
| * {a,b,c} = (a|b|c) |
| * }</pre> |
| */ |
| public static String toRegex(String glob, String separators) { |
| return new GlobToRegex(glob, separators).convert(); |
| } |
| |
| private static final InternalCharMatcher REGEX_RESERVED = |
| InternalCharMatcher.anyOf("^$.?+*\\[]{}()"); |
| |
| private final String glob; |
| private final String separators; |
| private final InternalCharMatcher separatorMatcher; |
| |
| private final StringBuilder builder = new StringBuilder(); |
| private final Deque<State> states = new ArrayDeque<>(); |
| private int index; |
| |
| private GlobToRegex(String glob, String separators) { |
| this.glob = checkNotNull(glob); |
| this.separators = separators; |
| this.separatorMatcher = InternalCharMatcher.anyOf(separators); |
| } |
| |
| /** |
| * Converts the glob to a regex one character at a time. A state stack (states) is maintained, |
| * with the state at the top of the stack being the current state at any given time. The current |
| * state is always used to process the next character. When a state processes a character, it may |
| * pop the current state or push a new state as the current state. The resulting regex is written |
| * to {@code builder}. |
| */ |
| private String convert() { |
| pushState(NORMAL); |
| for (index = 0; index < glob.length(); index++) { |
| currentState().process(this, glob.charAt(index)); |
| } |
| currentState().finish(this); |
| return builder.toString(); |
| } |
| |
| /** |
| * Enters the given state. The current state becomes the previous state. |
| */ |
| private void pushState(State state) { |
| states.push(state); |
| } |
| |
| /** |
| * Returns to the previous state. |
| */ |
| private void popState() { |
| states.pop(); |
| } |
| |
| /** |
| * Returns the current state. |
| */ |
| private State currentState() { |
| return states.peek(); |
| } |
| |
| /** |
| * Throws a {@link PatternSyntaxException}. |
| */ |
| private PatternSyntaxException syntaxError(String desc) { |
| throw new PatternSyntaxException(desc, glob, index); |
| } |
| |
| /** |
| * Appends the given character as-is to the regex. |
| */ |
| private void appendExact(char c) { |
| builder.append(c); |
| } |
| |
| /** |
| * Appends the regex form of the given normal character or separator from the glob. |
| */ |
| private void append(char c) { |
| if (separatorMatcher.matches(c)) { |
| appendSeparator(); |
| } else { |
| appendNormal(c); |
| } |
| } |
| |
| /** |
| * Appends the regex form of the given normal character from the glob. |
| */ |
| private void appendNormal(char c) { |
| if (REGEX_RESERVED.matches(c)) { |
| builder.append('\\'); |
| } |
| builder.append(c); |
| } |
| |
| /** |
| * Appends the regex form matching the separators for the path type. |
| */ |
| private void appendSeparator() { |
| if (separators.length() == 1) { |
| appendNormal(separators.charAt(0)); |
| } else { |
| builder.append('['); |
| for (int i = 0; i < separators.length(); i++) { |
| appendInBracket(separators.charAt(i)); |
| } |
| builder.append("]"); |
| } |
| } |
| |
| /** |
| * Appends the regex form that matches anything except the separators for the path type. |
| */ |
| private void appendNonSeparator() { |
| builder.append("[^"); |
| for (int i = 0; i < separators.length(); i++) { |
| appendInBracket(separators.charAt(i)); |
| } |
| builder.append(']'); |
| } |
| |
| /** |
| * Appends the regex form of the glob ? character. |
| */ |
| private void appendQuestionMark() { |
| appendNonSeparator(); |
| } |
| |
| /** |
| * Appends the regex form of the glob * character. |
| */ |
| private void appendStar() { |
| appendNonSeparator(); |
| builder.append('*'); |
| } |
| |
| /** |
| * Appends the regex form of the glob ** pattern. |
| */ |
| private void appendStarStar() { |
| builder.append(".*"); |
| } |
| |
| /** |
| * Appends the regex form of the start of a glob [] section. |
| */ |
| private void appendBracketStart() { |
| builder.append('['); |
| appendNonSeparator(); |
| builder.append("&&["); |
| } |
| |
| /** |
| * Appends the regex form of the end of a glob [] section. |
| */ |
| private void appendBracketEnd() { |
| builder.append("]]"); |
| } |
| |
| /** |
| * Appends the regex form of the given character within a glob [] section. |
| */ |
| private void appendInBracket(char c) { |
| // escape \ in regex character class |
| if (c == '\\') { |
| builder.append('\\'); |
| } |
| |
| builder.append(c); |
| } |
| |
| /** |
| * Appends the regex form of the start of a glob {} section. |
| */ |
| private void appendCurlyBraceStart() { |
| builder.append('('); |
| } |
| |
| /** |
| * Appends the regex form of the separator (,) within a glob {} section. |
| */ |
| private void appendSubpatternSeparator() { |
| builder.append('|'); |
| } |
| |
| /** |
| * Appends the regex form of the end of a glob {} section. |
| */ |
| private void appendCurlyBraceEnd() { |
| builder.append(')'); |
| } |
| |
| /** |
| * Converter state. |
| */ |
| private abstract static class State { |
| /** |
| * Process the next character with the current state, transitioning the converter to a new |
| * state if necessary. |
| */ |
| abstract void process(GlobToRegex converter, char c); |
| |
| /** |
| * Called after all characters have been read. |
| */ |
| void finish(GlobToRegex converter) {} |
| } |
| |
| /** |
| * Normal state. |
| */ |
| private static final State NORMAL = |
| new State() { |
| @Override |
| void process(GlobToRegex converter, char c) { |
| switch (c) { |
| case '?': |
| converter.appendQuestionMark(); |
| return; |
| case '[': |
| converter.appendBracketStart(); |
| converter.pushState(BRACKET_FIRST_CHAR); |
| return; |
| case '{': |
| converter.appendCurlyBraceStart(); |
| converter.pushState(CURLY_BRACE); |
| return; |
| case '*': |
| converter.pushState(STAR); |
| return; |
| case '\\': |
| converter.pushState(ESCAPE); |
| return; |
| default: |
| converter.append(c); |
| } |
| } |
| |
| @Override |
| public String toString() { |
| return "NORMAL"; |
| } |
| }; |
| |
| /** |
| * State following the reading of a single \. |
| */ |
| private static final State ESCAPE = |
| new State() { |
| @Override |
| void process(GlobToRegex converter, char c) { |
| converter.append(c); |
| converter.popState(); |
| } |
| |
| @Override |
| void finish(GlobToRegex converter) { |
| throw converter.syntaxError("Hanging escape (\\) at end of pattern"); |
| } |
| |
| @Override |
| public String toString() { |
| return "ESCAPE"; |
| } |
| }; |
| |
| /** |
| * State following the reading of a single *. |
| */ |
| private static final State STAR = |
| new State() { |
| @Override |
| void process(GlobToRegex converter, char c) { |
| if (c == '*') { |
| converter.appendStarStar(); |
| converter.popState(); |
| } else { |
| converter.appendStar(); |
| converter.popState(); |
| converter.currentState().process(converter, c); |
| } |
| } |
| |
| @Override |
| void finish(GlobToRegex converter) { |
| converter.appendStar(); |
| } |
| |
| @Override |
| public String toString() { |
| return "STAR"; |
| } |
| }; |
| |
| /** |
| * State immediately following the reading of a [. |
| */ |
| private static final State BRACKET_FIRST_CHAR = |
| new State() { |
| @Override |
| void process(GlobToRegex converter, char c) { |
| if (c == ']') { |
| // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example) |
| // but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it |
| // produces: |
| // java.util.regex.PatternSyntaxException: Unclosed character class near index 13 |
| // ^[[^/]&&[]]\]$ |
| // ^ |
| // The error here is slightly different, but trying to make this work would require some |
| // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if |
| // someone wants to include a ']' inside a character class, they should escape it. |
| throw converter.syntaxError("Empty []"); |
| } |
| if (c == '!') { |
| converter.appendExact('^'); |
| } else if (c == '-') { |
| converter.appendExact(c); |
| } else { |
| converter.appendInBracket(c); |
| } |
| converter.popState(); |
| converter.pushState(BRACKET); |
| } |
| |
| @Override |
| void finish(GlobToRegex converter) { |
| throw converter.syntaxError("Unclosed ["); |
| } |
| |
| @Override |
| public String toString() { |
| return "BRACKET_FIRST_CHAR"; |
| } |
| }; |
| |
| /** |
| * State inside [brackets], but not at the first character inside the brackets. |
| */ |
| private static final State BRACKET = |
| new State() { |
| @Override |
| void process(GlobToRegex converter, char c) { |
| if (c == ']') { |
| converter.appendBracketEnd(); |
| converter.popState(); |
| } else { |
| converter.appendInBracket(c); |
| } |
| } |
| |
| @Override |
| void finish(GlobToRegex converter) { |
| throw converter.syntaxError("Unclosed ["); |
| } |
| |
| @Override |
| public String toString() { |
| return "BRACKET"; |
| } |
| }; |
| |
| /** |
| * State inside {curly braces}. |
| */ |
| private static final State CURLY_BRACE = |
| new State() { |
| @Override |
| void process(GlobToRegex converter, char c) { |
| switch (c) { |
| case '?': |
| converter.appendQuestionMark(); |
| break; |
| case '[': |
| converter.appendBracketStart(); |
| converter.pushState(BRACKET_FIRST_CHAR); |
| break; |
| case '{': |
| throw converter.syntaxError("{ not allowed in subpattern group"); |
| case '*': |
| converter.pushState(STAR); |
| break; |
| case '\\': |
| converter.pushState(ESCAPE); |
| break; |
| case '}': |
| converter.appendCurlyBraceEnd(); |
| converter.popState(); |
| break; |
| case ',': |
| converter.appendSubpatternSeparator(); |
| break; |
| default: |
| converter.append(c); |
| } |
| } |
| |
| @Override |
| void finish(GlobToRegex converter) { |
| throw converter.syntaxError("Unclosed {"); |
| } |
| |
| @Override |
| public String toString() { |
| return "CURLY_BRACE"; |
| } |
| }; |
| } |