src/lib.rs - platform/external/rust/crates/shlex - Git at Google

 // Copyright 2015 Nicholas Allegra (comex).
 // Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
 // the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.

 //! Same idea as (but implementation not directly based on) the Python shlex module.  However, this
 //! implementation does not support any of the Python module's customization because it makes
 //! parsing slower and is fairly useless.  You only get the default settings of shlex.split, which
 //! mimic the POSIX shell:
 //! http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
 //!
 //! This implementation also deviates from the Python version in not treating \r specially, which I
 //! believe is more compliant.
 //!
 //! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
 //! directly as a micro-optimization.

 use std::borrow::Cow;

 /// An iterator that takes an input string and splits it into the words using the same syntax as
 /// the POSIX shell.
 pub struct Shlex<'a> {
     in_iter: std::str::Bytes<'a>,
     /// The number of newlines read so far, plus one.
     pub line_no: usize,
     /// An input string is erroneous if it ends while inside a quotation or right after an
     /// unescaped backslash.  Since Iterator does not have a mechanism to return an error, if that
     /// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
     /// true; best to check it after you're done iterating.
     pub had_error: bool,
 }

 impl<'a> Shlex<'a> {
     pub fn new(in_str: &'a str) -> Self {
         Shlex {
             in_iter: in_str.bytes(),
             line_no: 1,
             had_error: false,
         }
     }

     fn parse_word(&mut self, mut ch: u8) -> Option<String> {
         let mut result: Vec<u8> = Vec::new();
         loop {
             match ch as char {
                 '"' => if let Err(()) = self.parse_double(&mut result) {
                     self.had_error = true;
                     return None;
                 },
                 '\'' => if let Err(()) = self.parse_single(&mut result) {
                     self.had_error = true;
                     return None;
                 },
                 '\\' => if let Some(ch2) = self.next_char() {
                     if ch2 != '\n' as u8 { result.push(ch2); }
                 } else {
                     self.had_error = true;
                     return None;
                 },
                 ' ' | '\t' | '\n' => { break; },
                 _ => { result.push(ch as u8); },
             }
             if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
         }
         unsafe { Some(String::from_utf8_unchecked(result)) }
     }

     fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
         loop {
             if let Some(ch2) = self.next_char() {
                 match ch2 as char {
                     '\\' => {
                         if let Some(ch3) = self.next_char() {
                             match ch3 as char {
                                 // \$ => $
                                 '$' | '`' | '"' | '\\' => { result.push(ch3); },
                                 // \<newline> => nothing
                                 '\n' => {},
                                 // \x => =x
                                 _ => { result.push('\\' as u8); result.push(ch3); }
                             }
                         } else {
                             return Err(());
                         }
                     },
                     '"' => { return Ok(()); },
                     _ => { result.push(ch2); },
                 }
             } else {
                 return Err(());
             }
         }
     }

     fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
         loop {
             if let Some(ch2) = self.next_char() {
                 match ch2 as char {
                     '\\' => {
                         if let Some(ch3) = self.next_char() {
                             match ch3 as char {
                                 // for single quotes, only these can be escaped
                                 '\'' | '\\' => { result.push(ch3); },
                                 _ => { result.push('\\' as u8); result.push(ch3); }
                             }
                         } else {
                             return Err(());
                         }
                     },
                     '\'' => { return Ok(()); },
                     _ => { result.push(ch2); },
                 }
             } else {
                 return Err(());
             }
         }
     }

     fn next_char(&mut self) -> Option<u8> {
         let res = self.in_iter.next();
         if res == Some('\n' as u8) { self.line_no += 1; }
         res
     }
 }

 impl<'a> Iterator for Shlex<'a> {
     type Item = String;
     fn next(&mut self) -> Option<String> {
         if let Some(mut ch) = self.next_char() {
             // skip initial whitespace
             loop {
                 match ch as char {
                     ' ' | '\t' | '\n' => {},
                     '#' => {
                         while let Some(ch2) = self.next_char() {
                             if ch2 as char == '\n' { break; }
                         }
                     },
                     _ => { break; }
                 }
                 if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
             }
             self.parse_word(ch)
         } else { // no initial character
             None
         }
     }

 }

 /// Convenience function that consumes the whole string at once.  Returns None if the input was
 /// erroneous.
 pub fn split(in_str: &str) -> Option<Vec<String>> {
     let mut shl = Shlex::new(in_str);
     let res = shl.by_ref().collect();
     if shl.had_error { None } else { Some(res) }
 }

 /// Given a single word, return a string suitable to encode it as a shell argument.
 pub fn quote(in_str: &str) -> Cow<str> {
     if in_str.len() == 0 {
         "\"\"".into()
     } else if in_str.bytes().any(|c| match c as char {
         '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' |
         '\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true,
         _ => false
     }) {
         let mut out: Vec<u8> = Vec::new();
         out.push('"' as u8);
         for c in in_str.bytes() {
             match c as char {
                 '$' | '`' | '"' | '\\' => out.push('\\' as u8),
                 _ => ()
             }
             out.push(c);
         }
         out.push('"' as u8);
         unsafe { String::from_utf8_unchecked(out) }.into()
     } else {
         in_str.into()
     }
 }

 #[cfg(test)]
 static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
     ("foo$baz", Some(&["foo$baz"])),
     ("foo baz", Some(&["foo", "baz"])),
     ("foo\"bar\"baz", Some(&["foobarbaz"])),
     ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
     ("   foo \nbar", Some(&["foo", "bar"])),
     ("foo\\\nbar", Some(&["foobar"])),
     ("\"foo\\\nbar\"", Some(&["foobar"])),
     ("'baz\\$b'", Some(&["baz\\$b"])),
     ("'baz\\\''", Some(&["baz\'"])),
     ("\\", None),
     ("\"\\", None),
     ("'\\", None),
     ("\"", None),
     ("'", None),
     ("foo #bar\nbaz", Some(&["foo", "baz"])),
     ("foo #bar", Some(&["foo"])),
     ("foo#bar", Some(&["foo#bar"])),
     ("foo\"#bar", None),
 ];

 #[test]
 fn test_split() {
     for &(input, output) in SPLIT_TEST_ITEMS {
         assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
     }
 }

 #[test]
 fn test_lineno() {
     let mut sh = Shlex::new("\nfoo\nbar");
     while let Some(word) = sh.next() {
         if word == "bar" {
             assert_eq!(sh.line_no, 3);
         }
     }
 }

 #[test]
 fn test_quote() {
     assert_eq!(quote("foobar"), "foobar");
     assert_eq!(quote("foo bar"), "\"foo bar\"");
     assert_eq!(quote("\""), "\"\\\"\"");
     assert_eq!(quote(""), "\"\"");
 }
	// Copyright 2015 Nicholas Allegra (comex).
	// Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
	// the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
	// copied, modified, or distributed except according to those terms.

	//! Same idea as (but implementation not directly based on) the Python shlex module. However, this
	//! implementation does not support any of the Python module's customization because it makes
	//! parsing slower and is fairly useless. You only get the default settings of shlex.split, which
	//! mimic the POSIX shell:
	//! http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
	//!
	//! This implementation also deviates from the Python version in not treating \r specially, which I
	//! believe is more compliant.
	//!
	//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
	//! directly as a micro-optimization.

	use std::borrow::Cow;

	/// An iterator that takes an input string and splits it into the words using the same syntax as
	/// the POSIX shell.
	pub struct Shlex<'a> {
	in_iter: std::str::Bytes<'a>,
	/// The number of newlines read so far, plus one.
	pub line_no: usize,
	/// An input string is erroneous if it ends while inside a quotation or right after an
	/// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that
	/// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
	/// true; best to check it after you're done iterating.
	pub had_error: bool,
	}

	impl<'a> Shlex<'a> {
	pub fn new(in_str: &'a str) -> Self {
	Shlex {
	in_iter: in_str.bytes(),
	line_no: 1,
	had_error: false,
	}
	}

	fn parse_word(&mut self, mut ch: u8) -> Option<String> {
	let mut result: Vec<u8> = Vec::new();
	loop {
	match ch as char {
	'"' => if let Err(()) = self.parse_double(&mut result) {
	self.had_error = true;
	return None;
	},
	'\'' => if let Err(()) = self.parse_single(&mut result) {
	self.had_error = true;
	return None;
	},
	'\\' => if let Some(ch2) = self.next_char() {
	if ch2 != '\n' as u8 { result.push(ch2); }
	} else {
	self.had_error = true;
	return None;
	},
	' ' \| '\t' \| '\n' => { break; },
	_ => { result.push(ch as u8); },
	}
	if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
	}
	unsafe { Some(String::from_utf8_unchecked(result)) }
	}

	fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
	loop {
	if let Some(ch2) = self.next_char() {
	match ch2 as char {
	'\\' => {
	if let Some(ch3) = self.next_char() {
	match ch3 as char {
	// \$ => $
	'$' \| '`' \| '"' \| '\\' => { result.push(ch3); },
	// \<newline> => nothing
	'\n' => {},
	// \x => =x
	_ => { result.push('\\' as u8); result.push(ch3); }
	}
	} else {
	return Err(());
	}
	},
	'"' => { return Ok(()); },
	_ => { result.push(ch2); },
	}
	} else {
	return Err(());
	}
	}
	}

	fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
	loop {
	if let Some(ch2) = self.next_char() {
	match ch2 as char {
	'\\' => {
	if let Some(ch3) = self.next_char() {
	match ch3 as char {
	// for single quotes, only these can be escaped
	'\'' \| '\\' => { result.push(ch3); },
	_ => { result.push('\\' as u8); result.push(ch3); }
	}
	} else {
	return Err(());
	}
	},
	'\'' => { return Ok(()); },
	_ => { result.push(ch2); },
	}
	} else {
	return Err(());
	}
	}
	}

	fn next_char(&mut self) -> Option<u8> {
	let res = self.in_iter.next();
	if res == Some('\n' as u8) { self.line_no += 1; }
	res
	}
	}

	impl<'a> Iterator for Shlex<'a> {
	type Item = String;
	fn next(&mut self) -> Option<String> {
	if let Some(mut ch) = self.next_char() {
	// skip initial whitespace
	loop {
	match ch as char {
	' ' \| '\t' \| '\n' => {},
	'#' => {
	while let Some(ch2) = self.next_char() {
	if ch2 as char == '\n' { break; }
	}
	},
	_ => { break; }
	}
	if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
	}
	self.parse_word(ch)
	} else { // no initial character
	None
	}
	}

	}

	/// Convenience function that consumes the whole string at once. Returns None if the input was
	/// erroneous.
	pub fn split(in_str: &str) -> Option<Vec<String>> {
	let mut shl = Shlex::new(in_str);
	let res = shl.by_ref().collect();
	if shl.had_error { None } else { Some(res) }
	}

	/// Given a single word, return a string suitable to encode it as a shell argument.
	pub fn quote(in_str: &str) -> Cow<str> {
	if in_str.len() == 0 {
	"\"\"".into()
	} else if in_str.bytes().any(\|c\| match c as char {
	'\|' \| '&' \| ';' \| '<' \| '>' \| '(' \| ')' \| '$' \| '`' \| '\\' \| '"' \| '\'' \| ' ' \| '\t' \|
	'\r' \| '\n' \| '*' \| '?' \| '[' \| '#' \| '~' \| '=' \| '%' => true,
	_ => false
	}) {
	let mut out: Vec<u8> = Vec::new();
	out.push('"' as u8);
	for c in in_str.bytes() {
	match c as char {
	'$' \| '`' \| '"' \| '\\' => out.push('\\' as u8),
	_ => ()
	}
	out.push(c);
	}
	out.push('"' as u8);
	unsafe { String::from_utf8_unchecked(out) }.into()
	} else {
	in_str.into()
	}
	}

	#[cfg(test)]
	static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
	("foo$baz", Some(&["foo$baz"])),
	("foo baz", Some(&["foo", "baz"])),
	("foo\"bar\"baz", Some(&["foobarbaz"])),
	("foo \"bar\"baz", Some(&["foo", "barbaz"])),
	(" foo \nbar", Some(&["foo", "bar"])),
	("foo\\\nbar", Some(&["foobar"])),
	("\"foo\\\nbar\"", Some(&["foobar"])),
	("'baz\\$b'", Some(&["baz\\$b"])),
	("'baz\\\''", Some(&["baz\'"])),
	("\\", None),
	("\"\\", None),
	("'\\", None),
	("\"", None),
	("'", None),
	("foo #bar\nbaz", Some(&["foo", "baz"])),
	("foo #bar", Some(&["foo"])),
	("foo#bar", Some(&["foo#bar"])),
	("foo\"#bar", None),
	];

	#[test]
	fn test_split() {
	for &(input, output) in SPLIT_TEST_ITEMS {
	assert_eq!(split(input), output.map(\|o\| o.iter().map(\|&x\| x.to_owned()).collect()));
	}
	}

	#[test]
	fn test_lineno() {
	let mut sh = Shlex::new("\nfoo\nbar");
	while let Some(word) = sh.next() {
	if word == "bar" {
	assert_eq!(sh.line_no, 3);
	}
	}
	}

	#[test]
	fn test_quote() {
	assert_eq!(quote("foobar"), "foobar");
	assert_eq!(quote("foo bar"), "\"foo bar\"");
	assert_eq!(quote("\""), "\"\\\"\"");
	assert_eq!(quote(""), "\"\"");
	}