jimfs/src/main/java/com/google/common/jimfs/GlobToRegex.java - platform/external/jimfs - Git at Google

 /*
  * Copyright 2013 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.google.common.jimfs;

 import static com.google.common.base.Preconditions.checkNotNull;

 import java.util.ArrayDeque;
 import java.util.Deque;
 import java.util.regex.PatternSyntaxException;

 /**
  * Translates globs to regex patterns.
  *
  * @author Colin Decker
  */
 final class GlobToRegex {

   /**
    * Converts the given glob to a regular expression pattern. The given separators determine what
    * characters the resulting expression breaks on for glob expressions such as * which should not
    * cross directory boundaries.
    *
    * <p>Basic conversions (assuming / as only separator):
    *
    * <pre>{@code
    * ?        = [^/]
    * *        = [^/]*
    * **       = .*
    * [a-z]    = [[^/]&&[a-z]]
    * [!a-z]   = [[^/]&&[^a-z]]
    * {a,b,c}  = (a|b|c)
    * }</pre>
    */
   public static String toRegex(String glob, String separators) {
     return new GlobToRegex(glob, separators).convert();
   }

   private static final InternalCharMatcher REGEX_RESERVED =
       InternalCharMatcher.anyOf("^$.?+*\\[]{}()");

   private final String glob;
   private final String separators;
   private final InternalCharMatcher separatorMatcher;

   private final StringBuilder builder = new StringBuilder();
   private final Deque<State> states = new ArrayDeque<>();
   private int index;

   private GlobToRegex(String glob, String separators) {
     this.glob = checkNotNull(glob);
     this.separators = separators;
     this.separatorMatcher = InternalCharMatcher.anyOf(separators);
   }

   /**
    * Converts the glob to a regex one character at a time. A state stack (states) is maintained,
    * with the state at the top of the stack being the current state at any given time. The current
    * state is always used to process the next character. When a state processes a character, it may
    * pop the current state or push a new state as the current state. The resulting regex is written
    * to {@code builder}.
    */
   private String convert() {
     pushState(NORMAL);
     for (index = 0; index < glob.length(); index++) {
       currentState().process(this, glob.charAt(index));
     }
     currentState().finish(this);
     return builder.toString();
   }

   /**
    * Enters the given state. The current state becomes the previous state.
    */
   private void pushState(State state) {
     states.push(state);
   }

   /**
    * Returns to the previous state.
    */
   private void popState() {
     states.pop();
   }

   /**
    * Returns the current state.
    */
   private State currentState() {
     return states.peek();
   }

   /**
    * Throws a {@link PatternSyntaxException}.
    */
   private PatternSyntaxException syntaxError(String desc) {
     throw new PatternSyntaxException(desc, glob, index);
   }

   /**
    * Appends the given character as-is to the regex.
    */
   private void appendExact(char c) {
     builder.append(c);
   }

   /**
    * Appends the regex form of the given normal character or separator from the glob.
    */
   private void append(char c) {
     if (separatorMatcher.matches(c)) {
       appendSeparator();
     } else {
       appendNormal(c);
     }
   }

   /**
    * Appends the regex form of the given normal character from the glob.
    */
   private void appendNormal(char c) {
     if (REGEX_RESERVED.matches(c)) {
       builder.append('\\');
     }
     builder.append(c);
   }

   /**
    * Appends the regex form matching the separators for the path type.
    */
   private void appendSeparator() {
     if (separators.length() == 1) {
       appendNormal(separators.charAt(0));
     } else {
       builder.append('[');
       for (int i = 0; i < separators.length(); i++) {
         appendInBracket(separators.charAt(i));
       }
       builder.append("]");
     }
   }

   /**
    * Appends the regex form that matches anything except the separators for the path type.
    */
   private void appendNonSeparator() {
     builder.append("[^");
     for (int i = 0; i < separators.length(); i++) {
       appendInBracket(separators.charAt(i));
     }
     builder.append(']');
   }

   /**
    * Appends the regex form of the glob ? character.
    */
   private void appendQuestionMark() {
     appendNonSeparator();
   }

   /**
    * Appends the regex form of the glob * character.
    */
   private void appendStar() {
     appendNonSeparator();
     builder.append('*');
   }

   /**
    * Appends the regex form of the glob ** pattern.
    */
   private void appendStarStar() {
     builder.append(".*");
   }

   /**
    * Appends the regex form of the start of a glob [] section.
    */
   private void appendBracketStart() {
     builder.append('[');
     appendNonSeparator();
     builder.append("&&[");
   }

   /**
    * Appends the regex form of the end of a glob [] section.
    */
   private void appendBracketEnd() {
     builder.append("]]");
   }

   /**
    * Appends the regex form of the given character within a glob [] section.
    */
   private void appendInBracket(char c) {
     // escape \ in regex character class
     if (c == '\\') {
       builder.append('\\');
     }

     builder.append(c);
   }

   /**
    * Appends the regex form of the start of a glob {} section.
    */
   private void appendCurlyBraceStart() {
     builder.append('(');
   }

   /**
    * Appends the regex form of the separator (,) within a glob {} section.
    */
   private void appendSubpatternSeparator() {
     builder.append('|');
   }

   /**
    * Appends the regex form of the end of a glob {} section.
    */
   private void appendCurlyBraceEnd() {
     builder.append(')');
   }

   /**
    * Converter state.
    */
   private abstract static class State {
     /**
      * Process the next character with the current state, transitioning the converter to a new
      * state if necessary.
      */
     abstract void process(GlobToRegex converter, char c);

     /**
      * Called after all characters have been read.
      */
     void finish(GlobToRegex converter) {}
   }

   /**
    * Normal state.
    */
   private static final State NORMAL =
       new State() {
         @Override
         void process(GlobToRegex converter, char c) {
           switch (c) {
             case '?':
               converter.appendQuestionMark();
               return;
             case '[':
               converter.appendBracketStart();
               converter.pushState(BRACKET_FIRST_CHAR);
               return;
             case '{':
               converter.appendCurlyBraceStart();
               converter.pushState(CURLY_BRACE);
               return;
             case '*':
               converter.pushState(STAR);
               return;
             case '\\':
               converter.pushState(ESCAPE);
               return;
             default:
               converter.append(c);
           }
         }

         @Override
         public String toString() {
           return "NORMAL";
         }
       };

   /**
    * State following the reading of a single \.
    */
   private static final State ESCAPE =
       new State() {
         @Override
         void process(GlobToRegex converter, char c) {
           converter.append(c);
           converter.popState();
         }

         @Override
         void finish(GlobToRegex converter) {
           throw converter.syntaxError("Hanging escape (\\) at end of pattern");
         }

         @Override
         public String toString() {
           return "ESCAPE";
         }
       };

   /**
    * State following the reading of a single *.
    */
   private static final State STAR =
       new State() {
         @Override
         void process(GlobToRegex converter, char c) {
           if (c == '*') {
             converter.appendStarStar();
             converter.popState();
           } else {
             converter.appendStar();
             converter.popState();
             converter.currentState().process(converter, c);
           }
         }

         @Override
         void finish(GlobToRegex converter) {
           converter.appendStar();
         }

         @Override
         public String toString() {
           return "STAR";
         }
       };

   /**
    * State immediately following the reading of a [.
    */
   private static final State BRACKET_FIRST_CHAR =
       new State() {
         @Override
         void process(GlobToRegex converter, char c) {
           if (c == ']') {
             // A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example)
             // but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it
             // produces:
             // java.util.regex.PatternSyntaxException: Unclosed character class near index 13
             // ^[[^/]&&[]]\]$
             //              ^
             // The error here is slightly different, but trying to make this work would require some
             // kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
             // someone wants to include a ']' inside a character class, they should escape it.
             throw converter.syntaxError("Empty []");
           }
           if (c == '!') {
             converter.appendExact('^');
           } else if (c == '-') {
             converter.appendExact(c);
           } else {
             converter.appendInBracket(c);
           }
           converter.popState();
           converter.pushState(BRACKET);
         }

         @Override
         void finish(GlobToRegex converter) {
           throw converter.syntaxError("Unclosed [");
         }

         @Override
         public String toString() {
           return "BRACKET_FIRST_CHAR";
         }
       };

   /**
    * State inside [brackets], but not at the first character inside the brackets.
    */
   private static final State BRACKET =
       new State() {
         @Override
         void process(GlobToRegex converter, char c) {
           if (c == ']') {
             converter.appendBracketEnd();
             converter.popState();
           } else {
             converter.appendInBracket(c);
           }
         }

         @Override
         void finish(GlobToRegex converter) {
           throw converter.syntaxError("Unclosed [");
         }

         @Override
         public String toString() {
           return "BRACKET";
         }
       };

   /**
    * State inside {curly braces}.
    */
   private static final State CURLY_BRACE =
       new State() {
         @Override
         void process(GlobToRegex converter, char c) {
           switch (c) {
             case '?':
               converter.appendQuestionMark();
               break;
             case '[':
               converter.appendBracketStart();
               converter.pushState(BRACKET_FIRST_CHAR);
               break;
             case '{':
               throw converter.syntaxError("{ not allowed in subpattern group");
             case '*':
               converter.pushState(STAR);
               break;
             case '\\':
               converter.pushState(ESCAPE);
               break;
             case '}':
               converter.appendCurlyBraceEnd();
               converter.popState();
               break;
             case ',':
               converter.appendSubpatternSeparator();
               break;
             default:
               converter.append(c);
           }
         }

         @Override
         void finish(GlobToRegex converter) {
           throw converter.syntaxError("Unclosed {");
         }

         @Override
         public String toString() {
           return "CURLY_BRACE";
         }
       };
 }
	/*
	* Copyright 2013 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.google.common.jimfs;

	import static com.google.common.base.Preconditions.checkNotNull;

	import java.util.ArrayDeque;
	import java.util.Deque;
	import java.util.regex.PatternSyntaxException;

	/**
	* Translates globs to regex patterns.
	*
	* @author Colin Decker
	*/
	final class GlobToRegex {

	/**
	* Converts the given glob to a regular expression pattern. The given separators determine what
	* characters the resulting expression breaks on for glob expressions such as * which should not
	* cross directory boundaries.
	*
	* <p>Basic conversions (assuming / as only separator):
	*
	* <pre>{@code
	* ? = [^/]
	* * = [^/]*
	* ** = .*
	* [a-z] = [[^/]&&[a-z]]
	* [!a-z] = [[^/]&&[^a-z]]
	* {a,b,c} = (a\|b\|c)
	* }</pre>
	*/
	public static String toRegex(String glob, String separators) {
	return new GlobToRegex(glob, separators).convert();
	}

	private static final InternalCharMatcher REGEX_RESERVED =
	InternalCharMatcher.anyOf("^$.?+*\\[]{}()");

	private final String glob;
	private final String separators;
	private final InternalCharMatcher separatorMatcher;

	private final StringBuilder builder = new StringBuilder();
	private final Deque<State> states = new ArrayDeque<>();
	private int index;

	private GlobToRegex(String glob, String separators) {
	this.glob = checkNotNull(glob);
	this.separators = separators;
	this.separatorMatcher = InternalCharMatcher.anyOf(separators);
	}

	/**
	* Converts the glob to a regex one character at a time. A state stack (states) is maintained,
	* with the state at the top of the stack being the current state at any given time. The current
	* state is always used to process the next character. When a state processes a character, it may
	* pop the current state or push a new state as the current state. The resulting regex is written
	* to {@code builder}.
	*/
	private String convert() {
	pushState(NORMAL);
	for (index = 0; index < glob.length(); index++) {
	currentState().process(this, glob.charAt(index));
	}
	currentState().finish(this);
	return builder.toString();
	}

	/**
	* Enters the given state. The current state becomes the previous state.
	*/
	private void pushState(State state) {
	states.push(state);
	}

	/**
	* Returns to the previous state.
	*/
	private void popState() {
	states.pop();
	}

	/**
	* Returns the current state.
	*/
	private State currentState() {
	return states.peek();
	}

	/**
	* Throws a {@link PatternSyntaxException}.
	*/
	private PatternSyntaxException syntaxError(String desc) {
	throw new PatternSyntaxException(desc, glob, index);
	}

	/**
	* Appends the given character as-is to the regex.
	*/
	private void appendExact(char c) {
	builder.append(c);
	}

	/**
	* Appends the regex form of the given normal character or separator from the glob.
	*/
	private void append(char c) {
	if (separatorMatcher.matches(c)) {
	appendSeparator();
	} else {
	appendNormal(c);
	}
	}

	/**
	* Appends the regex form of the given normal character from the glob.
	*/
	private void appendNormal(char c) {
	if (REGEX_RESERVED.matches(c)) {
	builder.append('\\');
	}
	builder.append(c);
	}

	/**
	* Appends the regex form matching the separators for the path type.
	*/
	private void appendSeparator() {
	if (separators.length() == 1) {
	appendNormal(separators.charAt(0));
	} else {
	builder.append('[');
	for (int i = 0; i < separators.length(); i++) {
	appendInBracket(separators.charAt(i));
	}
	builder.append("]");
	}
	}

	/**
	* Appends the regex form that matches anything except the separators for the path type.
	*/
	private void appendNonSeparator() {
	builder.append("[^");
	for (int i = 0; i < separators.length(); i++) {
	appendInBracket(separators.charAt(i));
	}
	builder.append(']');
	}

	/**
	* Appends the regex form of the glob ? character.
	*/
	private void appendQuestionMark() {
	appendNonSeparator();
	}

	/**
	* Appends the regex form of the glob * character.
	*/
	private void appendStar() {
	appendNonSeparator();
	builder.append('*');
	}

	/**
	* Appends the regex form of the glob ** pattern.
	*/
	private void appendStarStar() {
	builder.append(".*");
	}

	/**
	* Appends the regex form of the start of a glob [] section.
	*/
	private void appendBracketStart() {
	builder.append('[');
	appendNonSeparator();
	builder.append("&&[");
	}

	/**
	* Appends the regex form of the end of a glob [] section.
	*/
	private void appendBracketEnd() {
	builder.append("]]");
	}

	/**
	* Appends the regex form of the given character within a glob [] section.
	*/
	private void appendInBracket(char c) {
	// escape \ in regex character class
	if (c == '\\') {
	builder.append('\\');
	}

	builder.append(c);
	}

	/**
	* Appends the regex form of the start of a glob {} section.
	*/
	private void appendCurlyBraceStart() {
	builder.append('(');
	}

	/**
	* Appends the regex form of the separator (,) within a glob {} section.
	*/
	private void appendSubpatternSeparator() {
	builder.append('\|');
	}

	/**
	* Appends the regex form of the end of a glob {} section.
	*/
	private void appendCurlyBraceEnd() {
	builder.append(')');
	}

	/**
	* Converter state.
	*/
	private abstract static class State {
	/**
	* Process the next character with the current state, transitioning the converter to a new
	* state if necessary.
	*/
	abstract void process(GlobToRegex converter, char c);

	/**
	* Called after all characters have been read.
	*/
	void finish(GlobToRegex converter) {}
	}

	/**
	* Normal state.
	*/
	private static final State NORMAL =
	new State() {
	@Override
	void process(GlobToRegex converter, char c) {
	switch (c) {
	case '?':
	converter.appendQuestionMark();
	return;
	case '[':
	converter.appendBracketStart();
	converter.pushState(BRACKET_FIRST_CHAR);
	return;
	case '{':
	converter.appendCurlyBraceStart();
	converter.pushState(CURLY_BRACE);
	return;
	case '*':
	converter.pushState(STAR);
	return;
	case '\\':
	converter.pushState(ESCAPE);
	return;
	default:
	converter.append(c);
	}
	}

	@Override
	public String toString() {
	return "NORMAL";
	}
	};

	/**
	* State following the reading of a single \.
	*/
	private static final State ESCAPE =
	new State() {
	@Override
	void process(GlobToRegex converter, char c) {
	converter.append(c);
	converter.popState();
	}

	@Override
	void finish(GlobToRegex converter) {
	throw converter.syntaxError("Hanging escape (\\) at end of pattern");
	}

	@Override
	public String toString() {
	return "ESCAPE";
	}
	};

	/**
	* State following the reading of a single *.
	*/
	private static final State STAR =
	new State() {
	@Override
	void process(GlobToRegex converter, char c) {
	if (c == '*') {
	converter.appendStarStar();
	converter.popState();
	} else {
	converter.appendStar();
	converter.popState();
	converter.currentState().process(converter, c);
	}
	}

	@Override
	void finish(GlobToRegex converter) {
	converter.appendStar();
	}

	@Override
	public String toString() {
	return "STAR";
	}
	};

	/**
	* State immediately following the reading of a [.
	*/
	private static final State BRACKET_FIRST_CHAR =
	new State() {
	@Override
	void process(GlobToRegex converter, char c) {
	if (c == ']') {
	// A glob like "[]]" or "[]q]" is apparently fine in Unix (when used with ls for example)
	// but doesn't work for the default java.nio.file implementations. In the cases of "[]]" it
	// produces:
	// java.util.regex.PatternSyntaxException: Unclosed character class near index 13
	// ^[[^/]&&[]]\]$
	// ^
	// The error here is slightly different, but trying to make this work would require some
	// kind of lookahead and break the simplicity of char-by-char conversion here. Also, if
	// someone wants to include a ']' inside a character class, they should escape it.
	throw converter.syntaxError("Empty []");
	}
	if (c == '!') {
	converter.appendExact('^');
	} else if (c == '-') {
	converter.appendExact(c);
	} else {
	converter.appendInBracket(c);
	}
	converter.popState();
	converter.pushState(BRACKET);
	}

	@Override
	void finish(GlobToRegex converter) {
	throw converter.syntaxError("Unclosed [");
	}

	@Override
	public String toString() {
	return "BRACKET_FIRST_CHAR";
	}
	};

	/**
	* State inside [brackets], but not at the first character inside the brackets.
	*/
	private static final State BRACKET =
	new State() {
	@Override
	void process(GlobToRegex converter, char c) {
	if (c == ']') {
	converter.appendBracketEnd();
	converter.popState();
	} else {
	converter.appendInBracket(c);
	}
	}

	@Override
	void finish(GlobToRegex converter) {
	throw converter.syntaxError("Unclosed [");
	}

	@Override
	public String toString() {
	return "BRACKET";
	}
	};

	/**
	* State inside {curly braces}.
	*/
	private static final State CURLY_BRACE =
	new State() {
	@Override
	void process(GlobToRegex converter, char c) {
	switch (c) {
	case '?':
	converter.appendQuestionMark();
	break;
	case '[':
	converter.appendBracketStart();
	converter.pushState(BRACKET_FIRST_CHAR);
	break;
	case '{':
	throw converter.syntaxError("{ not allowed in subpattern group");
	case '*':
	converter.pushState(STAR);
	break;
	case '\\':
	converter.pushState(ESCAPE);
	break;
	case '}':
	converter.appendCurlyBraceEnd();
	converter.popState();
	break;
	case ',':
	converter.appendSubpatternSeparator();
	break;
	default:
	converter.append(c);
	}
	}

	@Override
	void finish(GlobToRegex converter) {
	throw converter.syntaxError("Unclosed {");
	}

	@Override
	public String toString() {
	return "CURLY_BRACE";
	}
	};
	}