src/com/android/tradefed/util/QuotationAwareTokenizer.java - platform/tools/tradefederation - Git at Google

 /*
  * Copyright (C) 2010 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package com.android.tradefed.util;

 import com.android.ddmlib.Log;
 import com.android.tradefed.error.HarnessRuntimeException;
 import com.android.tradefed.result.error.InfraErrorIdentifier;

 import java.util.ArrayList;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 public class QuotationAwareTokenizer {
     private static final String LOG_TAG = "TOKEN";

     /**
      * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive,
      * unquoted double-quote marks.
      *
      * <p>How the tokenizer works:
      *
      * <ol>
      *   <li> Split the string into "characters" where each "character" is either an escaped
      *       character like \" (that is, "\\\"") or a single real character like f (just "f").
      *   <li> For each "character"
      *       <ol>
      *         <li> If it's a space, finish a token unless we're being quoted
      *         <li> If it's a quotation mark, flip the "we're being quoted" bit
      *         <li> Otherwise, add it to the token being built
      *       </ol>
      *
      *   <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
      *       <ol>
      *         <li> If the last "character" is an escape character, throw an exception; that's not
      *             valid
      *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
      *         <li> Otherwise, add the final token to (tokens)
      *       </ol>
      *
      *   <li> Return a String[] version of (tokens)
      * </ol>
      *
      * @param line A {@link String} to be tokenized
      * @param delim the delimiter to split on
      * @param logging whether or not to log operations
      * @return A tokenized version of the string
      * @throws IllegalArgumentException if the line cannot be parsed
      */
     public static String[] tokenizeLine(String line, String delim, boolean logging)
             throws IllegalArgumentException {
         if (line == null) {
             throw new IllegalArgumentException("line is null");
         }

         ArrayList<String> tokens = new ArrayList<String>();
         StringBuilder token = new StringBuilder();
         // This pattern matches an escaped character or a character.  Escaped char takes precedence
         final Pattern charPattern = Pattern.compile("\\\\.|.");
         final Matcher charMatcher = charPattern.matcher(line);
         String aChar = "";
         boolean quotation = false;

         log(String.format("Trying to tokenize the line '%s'", line), logging);
         while (charMatcher.find()) {
             aChar = charMatcher.group();

             if (delim.equals(aChar)) {
                 if (quotation) {
                     // inside a quotation; treat spaces as part of the token
                     token.append(aChar);
                 } else {
                     if (token.length() > 0) {
                         // this is the end of a non-empty token; dump it in our list of tokens,
                         // clear our temp storage, and keep rolling
                         log(String.format("Finished token '%s'", token.toString()), logging);
                         // Handle escaped empty string by '' to restore and empty string
                         if (token.toString().equals("''")) {
                             tokens.add("");
                         } else {
                             tokens.add(token.toString());
                         }
                         token.delete(0, token.length());
                     }
                     // otherwise, this is the non-first in a sequence of spaces; ignore.
                 }
             } else if ("\"".equals(aChar)) {
                 // unescaped quotation mark; flip quotation state
                 log("Flipped quotation state", logging);
                 quotation ^= true;
             } else {
                 // default case: add the character to the token being built
                 token.append(aChar);
             }
         }

         if (quotation || "\\".equals(aChar)) {
             // We ended in a quotation or with an escape character; this is not valid
             throw new HarnessRuntimeException(
                     "Unexpected EOL in a quotation or after an escape " + "character",
                     InfraErrorIdentifier.OPTION_CONFIGURATION_ERROR);
         }

         // Add the final token to the tokens array.
         if (token.length() > 0) {
             log(String.format("Finished final token '%s'", token.toString()), logging);
             // Handle escaped empty string by '' to restore and empty string
             if (token.toString().equals("''")) {
                 tokens.add("");
             } else {
                 tokens.add(token.toString());
             }
             token.delete(0, token.length());
         }

         String[] tokensArray = new String[tokens.size()];
         return tokens.toArray(tokensArray);
     }

     /**
      * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
      * double-quote marks.
      *
      * <p>See also {@link #tokenizeLine(String, String)}
      */
     public static String[] tokenizeLine(String line) {
         return tokenizeLine(line, " ", true);
     }

     public static String[] tokenizeLine(String line, String delim) {
         return tokenizeLine(line, delim, true);
     }

     /**
      * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
      * double-quote marks.
      *
      * <p>See also {@link #tokenizeLine(String, String)}
      */
     public static String[] tokenizeLine(String line, boolean logging) {
         return tokenizeLine(line, " ", logging);
     }

     /**
      * Perform the reverse of {@link #tokenizeLine(String)}. <br/>
      * Given array of tokens, combine them into a single line.
      *
      * @param tokens
      * @return A {@link String} created from all the tokens.
      */
     public static String combineTokens(String... tokens) {
         final Pattern wsPattern = Pattern.compile("\\s");
         StringBuilder sb = new StringBuilder();
         for (int i=0; i < tokens.length; i++) {
             final String token = tokens[i];
             final Matcher wsMatcher = wsPattern.matcher(token);
             if (wsMatcher.find()) {
                 sb.append('"');
                 sb.append(token);
                 sb.append('"');
             } else {
                 sb.append(token);
             }
             if (i < (tokens.length - 1)) {
                 // don't output space after last token
                 sb.append(' ');
             }
         }
         return sb.toString();
     }

     private static void log(String message, boolean display) {
         if (display) {
             Log.v(LOG_TAG, message);
         }
     }
 }
	/*
	* Copyright (C) 2010 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package com.android.tradefed.util;

	import com.android.ddmlib.Log;
	import com.android.tradefed.error.HarnessRuntimeException;
	import com.android.tradefed.result.error.InfraErrorIdentifier;

	import java.util.ArrayList;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	public class QuotationAwareTokenizer {
	private static final String LOG_TAG = "TOKEN";

	/**
	* Tokenizes the string, splitting on specified delimiter. Does not split between consecutive,
	* unquoted double-quote marks.
	*
	* <p>How the tokenizer works:
	*
	* <ol>
	* <li> Split the string into "characters" where each "character" is either an escaped
	* character like \" (that is, "\\\"") or a single real character like f (just "f").
	* <li> For each "character"
	* <ol>
	* <li> If it's a space, finish a token unless we're being quoted
	* <li> If it's a quotation mark, flip the "we're being quoted" bit
	* <li> Otherwise, add it to the token being built
	* </ol>
	*
	* <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
	* <ol>
	* <li> If the last "character" is an escape character, throw an exception; that's not
	* valid
	* <li> If we're in the middle of a quotation, throw an exception; that's not valid
	* <li> Otherwise, add the final token to (tokens)
	* </ol>
	*
	* <li> Return a String[] version of (tokens)
	* </ol>
	*
	* @param line A {@link String} to be tokenized
	* @param delim the delimiter to split on
	* @param logging whether or not to log operations
	* @return A tokenized version of the string
	* @throws IllegalArgumentException if the line cannot be parsed
	*/
	public static String[] tokenizeLine(String line, String delim, boolean logging)
	throws IllegalArgumentException {
	if (line == null) {
	throw new IllegalArgumentException("line is null");
	}

	ArrayList<String> tokens = new ArrayList<String>();
	StringBuilder token = new StringBuilder();
	// This pattern matches an escaped character or a character. Escaped char takes precedence
	final Pattern charPattern = Pattern.compile("\\\\.\|.");
	final Matcher charMatcher = charPattern.matcher(line);
	String aChar = "";
	boolean quotation = false;

	log(String.format("Trying to tokenize the line '%s'", line), logging);
	while (charMatcher.find()) {
	aChar = charMatcher.group();

	if (delim.equals(aChar)) {
	if (quotation) {
	// inside a quotation; treat spaces as part of the token
	token.append(aChar);
	} else {
	if (token.length() > 0) {
	// this is the end of a non-empty token; dump it in our list of tokens,
	// clear our temp storage, and keep rolling
	log(String.format("Finished token '%s'", token.toString()), logging);
	// Handle escaped empty string by '' to restore and empty string
	if (token.toString().equals("''")) {
	tokens.add("");
	} else {
	tokens.add(token.toString());
	}
	token.delete(0, token.length());
	}
	// otherwise, this is the non-first in a sequence of spaces; ignore.
	}
	} else if ("\"".equals(aChar)) {
	// unescaped quotation mark; flip quotation state
	log("Flipped quotation state", logging);
	quotation ^= true;
	} else {
	// default case: add the character to the token being built
	token.append(aChar);
	}
	}

	if (quotation \|\| "\\".equals(aChar)) {
	// We ended in a quotation or with an escape character; this is not valid
	throw new HarnessRuntimeException(
	"Unexpected EOL in a quotation or after an escape " + "character",
	InfraErrorIdentifier.OPTION_CONFIGURATION_ERROR);
	}

	// Add the final token to the tokens array.
	if (token.length() > 0) {
	log(String.format("Finished final token '%s'", token.toString()), logging);
	// Handle escaped empty string by '' to restore and empty string
	if (token.toString().equals("''")) {
	tokens.add("");
	} else {
	tokens.add(token.toString());
	}
	token.delete(0, token.length());
	}

	String[] tokensArray = new String[tokens.size()];
	return tokens.toArray(tokensArray);
	}

	/**
	* Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
	* double-quote marks.
	*
	* <p>See also {@link #tokenizeLine(String, String)}
	*/
	public static String[] tokenizeLine(String line) {
	return tokenizeLine(line, " ", true);
	}

	public static String[] tokenizeLine(String line, String delim) {
	return tokenizeLine(line, delim, true);
	}

	/**
	* Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
	* double-quote marks.
	*
	* <p>See also {@link #tokenizeLine(String, String)}
	*/
	public static String[] tokenizeLine(String line, boolean logging) {
	return tokenizeLine(line, " ", logging);
	}

	/**
	* Perform the reverse of {@link #tokenizeLine(String)}. <br/>
	* Given array of tokens, combine them into a single line.
	*
	* @param tokens
	* @return A {@link String} created from all the tokens.
	*/
	public static String combineTokens(String... tokens) {
	final Pattern wsPattern = Pattern.compile("\\s");
	StringBuilder sb = new StringBuilder();
	for (int i=0; i < tokens.length; i++) {
	final String token = tokens[i];
	final Matcher wsMatcher = wsPattern.matcher(token);
	if (wsMatcher.find()) {
	sb.append('"');
	sb.append(token);
	sb.append('"');
	} else {
	sb.append(token);
	}
	if (i < (tokens.length - 1)) {
	// don't output space after last token
	sb.append(' ');
	}
	}
	return sb.toString();
	}

	private static void log(String message, boolean display) {
	if (display) {
	Log.v(LOG_TAG, message);
	}
	}
	}