src/com/android/tradefed/util/QuotationAwareTokenizer.java - platform/tools/tradefederation - Git at Google

 /*
  * Copyright (C) 2010 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package com.android.tradefed.util;

 import com.android.ddmlib.Log;

 import java.util.ArrayList;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 public class QuotationAwareTokenizer {
     private static final String LOG_TAG = "TOKEN";

     /**
      * Tokenizes the string, splitting on spaces.  Does not split between consecutive, unquoted
      * double-quote marks.
      * <p/>
      * How the tokenizer works:
      * <ol>
      *     <li> Split the string into "characters" where each "character" is either an escaped
      *          character like \" (that is, "\\\"") or a single real character like f (just "f").
      *     <li> For each "character"
      *     <ol>
      *         <li> If it's a space, finish a token unless we're being quoted
      *         <li> If it's a quotation mark, flip the "we're being quoted" bit
      *         <li> Otherwise, add it to the token being built
      *     </ol>
      *     <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
      *     <ol>
      *         <li> If the last "character" is an escape character, throw an exception; that's not
      *              valid
      *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
      *         <li> Otherwise, add the final token to (tokens)
      *     </ol>
      *     <li> Return a String[] version of (tokens)
      * </ol>
      *
      * @param line A {@link String} to be tokenized
      * @return A tokenized version of the string
      * @throws IllegalArgumentException if the line cannot be parsed
      */
     public static String[] tokenizeLine(String line) throws IllegalArgumentException {
         if (line == null) {
             throw new IllegalArgumentException("line is null");
         }

         ArrayList<String> tokens = new ArrayList<String>();
         StringBuilder token = new StringBuilder();
         // This pattern matches an escaped character or a character.  Escaped char takes precedence
         final Pattern charPattern = Pattern.compile("\\\\.|.");
         final Matcher charMatcher = charPattern.matcher(line);
         String aChar = "";
         boolean quotation = false;

         Log.d(LOG_TAG, String.format("Trying to tokenize the line '%s'", line));
         while (charMatcher.find()) {
             aChar = charMatcher.group();
             Log.v(LOG_TAG, String.format("Got a character: '%s'", aChar));

             if (" ".equals(aChar)) {
                 if (quotation) {
                     // inside a quotation; treat spaces as part of the token
                     token.append(aChar);
                 } else {
                     if (token.length() > 0) {
                         // this is the end of a non-empty token; dump it in our list of tokens,
                         // clear our temp storage, and keep rolling
                         Log.v(LOG_TAG, String.format("Finished token '%s'", token.toString()));
                         tokens.add(token.toString());
                         token.delete(0, token.length());
                     }
                     // otherwise, this is the non-first in a sequence of spaces; ignore.
                 }
             } else if ("\"".equals(aChar)) {
                 // unescaped quotation mark; flip quotation state
                 Log.v(LOG_TAG, "Flipped quotation state");
                 quotation ^= true;
             } else {
                 // default case: add the character to the token being built
                 Log.v(LOG_TAG, String.format("Adding character '%s' to token '%s'", aChar, token));
                 token.append(aChar);
             }
         }

         if (quotation || "\\".equals(aChar)) {
             // We ended in a quotation or with an escape character; this is not valid
             throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " +
                     "character");
         }

         // Add the final token to the tokens array.
         if (token.length() > 0) {
             Log.v(LOG_TAG, String.format("Finished final token '%s'", token.toString()));
             tokens.add(token.toString());
             token.delete(0, token.length());
         }

         String[] tokensArray = new String[tokens.size()];
         return tokens.toArray(tokensArray);
     }

     /**
      * Perform the reverse of {@link tokenizeLine}. <br/>
      * Given array of tokens, combine them into a single line.
      *
      * @param tokens
      * @return
      */
     public static String combineTokens(String... tokens) {
         final Pattern wsPattern = Pattern.compile("\\s");
         StringBuilder sb = new StringBuilder();
         for (int i=0; i < tokens.length; i++) {
             final String token = tokens[i];
             final Matcher wsMatcher = wsPattern.matcher(token);
             if (wsMatcher.find()) {
                 sb.append('"');
                 sb.append(token);
                 sb.append('"');
             } else {
                 sb.append(token);
             }
             if (i < (tokens.length - 1)) {
                 // don't output space after last token
                 sb.append(' ');
             }
         }
         return sb.toString();
     }
 }
	/*
	* Copyright (C) 2010 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package com.android.tradefed.util;

	import com.android.ddmlib.Log;

	import java.util.ArrayList;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	public class QuotationAwareTokenizer {
	private static final String LOG_TAG = "TOKEN";

	/**
	* Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
	* double-quote marks.
	* <p/>
	* How the tokenizer works:
	* <ol>
	* <li> Split the string into "characters" where each "character" is either an escaped
	* character like \" (that is, "\\\"") or a single real character like f (just "f").
	* <li> For each "character"
	* <ol>
	* <li> If it's a space, finish a token unless we're being quoted
	* <li> If it's a quotation mark, flip the "we're being quoted" bit
	* <li> Otherwise, add it to the token being built
	* </ol>
	* <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
	* <ol>
	* <li> If the last "character" is an escape character, throw an exception; that's not
	* valid
	* <li> If we're in the middle of a quotation, throw an exception; that's not valid
	* <li> Otherwise, add the final token to (tokens)
	* </ol>
	* <li> Return a String[] version of (tokens)
	* </ol>
	*
	* @param line A {@link String} to be tokenized
	* @return A tokenized version of the string
	* @throws IllegalArgumentException if the line cannot be parsed
	*/
	public static String[] tokenizeLine(String line) throws IllegalArgumentException {
	if (line == null) {
	throw new IllegalArgumentException("line is null");
	}

	ArrayList<String> tokens = new ArrayList<String>();
	StringBuilder token = new StringBuilder();
	// This pattern matches an escaped character or a character. Escaped char takes precedence
	final Pattern charPattern = Pattern.compile("\\\\.\|.");
	final Matcher charMatcher = charPattern.matcher(line);
	String aChar = "";
	boolean quotation = false;

	Log.d(LOG_TAG, String.format("Trying to tokenize the line '%s'", line));
	while (charMatcher.find()) {
	aChar = charMatcher.group();
	Log.v(LOG_TAG, String.format("Got a character: '%s'", aChar));

	if (" ".equals(aChar)) {
	if (quotation) {
	// inside a quotation; treat spaces as part of the token
	token.append(aChar);
	} else {
	if (token.length() > 0) {
	// this is the end of a non-empty token; dump it in our list of tokens,
	// clear our temp storage, and keep rolling
	Log.v(LOG_TAG, String.format("Finished token '%s'", token.toString()));
	tokens.add(token.toString());
	token.delete(0, token.length());
	}
	// otherwise, this is the non-first in a sequence of spaces; ignore.
	}
	} else if ("\"".equals(aChar)) {
	// unescaped quotation mark; flip quotation state
	Log.v(LOG_TAG, "Flipped quotation state");
	quotation ^= true;
	} else {
	// default case: add the character to the token being built
	Log.v(LOG_TAG, String.format("Adding character '%s' to token '%s'", aChar, token));
	token.append(aChar);
	}
	}

	if (quotation \|\| "\\".equals(aChar)) {
	// We ended in a quotation or with an escape character; this is not valid
	throw new IllegalArgumentException("Unexpected EOL in a quotation or after an escape " +
	"character");
	}

	// Add the final token to the tokens array.
	if (token.length() > 0) {
	Log.v(LOG_TAG, String.format("Finished final token '%s'", token.toString()));
	tokens.add(token.toString());
	token.delete(0, token.length());
	}

	String[] tokensArray = new String[tokens.size()];
	return tokens.toArray(tokensArray);
	}

	/**
	* Perform the reverse of {@link tokenizeLine}. <br/>
	* Given array of tokens, combine them into a single line.
	*
	* @param tokens
	* @return
	*/
	public static String combineTokens(String... tokens) {
	final Pattern wsPattern = Pattern.compile("\\s");
	StringBuilder sb = new StringBuilder();
	for (int i=0; i < tokens.length; i++) {
	final String token = tokens[i];
	final Matcher wsMatcher = wsPattern.matcher(token);
	if (wsMatcher.find()) {
	sb.append('"');
	sb.append(token);
	sb.append('"');
	} else {
	sb.append(token);
	}
	if (i < (tokens.length - 1)) {
	// don't output space after last token
	sb.append(' ');
	}
	}
	return sb.toString();
	}
	}