blob: aa5c3067af82eb996a6780f04fd9891a2567e54e [file] [log] [blame]
// Copyright 2015 Nicholas Allegra (comex).
// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//! Parse strings like, and escape strings for, POSIX shells.
//!
//! Same idea as (but implementation not directly based on) the Python shlex module.
//!
//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
//!
//! ## <span style="color:red">Warning</span>
//!
//! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they
//! cannot be quoted portably).
//!
//! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or
//! even scripts `source`d from interactive shells).
//!
//! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce
//! ugly outputs (which may not be copy-pastable).
//!
//! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin
//! of an interactive shell, you should stop, because control characters can lead to arbitrary
//! command injection.
//!
//! For more information, and for information about more minor issues, please see [quoting_warning].
//!
//! ## Compatibility
//!
//! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**;
//! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not
//! POSIX-compatible but close enough).
//!
//! It also aims to be compatible with Python `shlex` and C `wordexp`.
#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
use alloc::vec::Vec;
use alloc::borrow::Cow;
use alloc::string::String;
#[cfg(test)]
use alloc::vec;
#[cfg(test)]
use alloc::borrow::ToOwned;
pub mod bytes;
#[cfg(all(doc, not(doctest)))]
#[path = "quoting_warning.md"]
pub mod quoting_warning;
/// An iterator that takes an input string and splits it into the words using the same syntax as
/// the POSIX shell.
///
/// See [`bytes::Shlex`].
pub struct Shlex<'a>(bytes::Shlex<'a>);
impl<'a> Shlex<'a> {
pub fn new(in_str: &'a str) -> Self {
Self(bytes::Shlex::new(in_str.as_bytes()))
}
}
impl<'a> Iterator for Shlex<'a> {
type Item = String;
fn next(&mut self) -> Option<String> {
self.0.next().map(|byte_word| {
// Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8.
unsafe { String::from_utf8_unchecked(byte_word) }
})
}
}
impl<'a> core::ops::Deref for Shlex<'a> {
type Target = bytes::Shlex<'a>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<'a> core::ops::DerefMut for Shlex<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
/// Convenience function that consumes the whole string at once. Returns None if the input was
/// erroneous.
pub fn split(in_str: &str) -> Option<Vec<String>> {
let mut shl = Shlex::new(in_str);
let res = shl.by_ref().collect();
if shl.had_error { None } else { Some(res) }
}
/// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts).
///
/// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call
/// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the
/// future will not be enabled by default; they will be enabled through corresponding non-default
/// [`Quoter`] options.
///
/// ...In theory. In the unlikely event that additional classes of inputs are discovered that,
/// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk
/// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by
/// default.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum QuoteError {
/// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings
/// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if
/// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let
/// them pass through.
Nul,
}
impl core::fmt::Display for QuoteError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for QuoteError {}
/// A more configurable interface to quote strings. If you only want the default settings you can
/// use the convenience functions [`try_quote`] and [`try_join`].
///
/// The bytes equivalent is [`bytes::Quoter`].
#[derive(Default, Debug, Clone)]
pub struct Quoter {
inner: bytes::Quoter,
}
impl Quoter {
/// Create a new [`Quoter`] with default settings.
#[inline]
pub fn new() -> Self {
Self::default()
}
/// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not
/// allowed and will result in an error of [`QuoteError::Nul`].
#[inline]
pub fn allow_nul(mut self, allow: bool) -> Self {
self.inner = self.inner.allow_nul(allow);
self
}
/// Convenience function that consumes an iterable of words and turns it into a single string,
/// quoting words when necessary. Consecutive words will be separated by a single space.
pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> {
// Safety: given valid UTF-8, bytes::join() will always return valid UTF-8.
self.inner.join(words.into_iter().map(|s| s.as_bytes()))
.map(|bytes| unsafe { String::from_utf8_unchecked(bytes) })
}
/// Given a single word, return a string suitable to encode it as a shell argument.
pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> {
Ok(match self.inner.quote(in_str.as_bytes())? {
Cow::Borrowed(out) => {
// Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
unsafe { core::str::from_utf8_unchecked(out) }.into()
}
Cow::Owned(out) => {
// Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
unsafe { String::from_utf8_unchecked(out) }.into()
}
})
}
}
impl From<bytes::Quoter> for Quoter {
fn from(inner: bytes::Quoter) -> Quoter {
Quoter { inner }
}
}
impl From<Quoter> for bytes::Quoter {
fn from(quoter: Quoter) -> bytes::Quoter {
quoter.inner
}
}
/// Convenience function that consumes an iterable of words and turns it into a single string,
/// quoting words when necessary. Consecutive words will be separated by a single space.
///
/// Uses default settings except that nul bytes are passed through, which [may be
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
///
/// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter).
///
/// (That configuration never returns `Err`, so this function does not panic.)
///
/// The bytes equivalent is [bytes::join].
#[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
Quoter::new().allow_nul(true).join(words).unwrap()
}
/// Convenience function that consumes an iterable of words and turns it into a single string,
/// quoting words when necessary. Consecutive words will be separated by a single space.
///
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
///
/// Equivalent to [`Quoter::new().join(words)`](Quoter).
///
/// The bytes equivalent is [bytes::try_join].
pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> {
Quoter::new().join(words)
}
/// Given a single word, return a string suitable to encode it as a shell argument.
///
/// Uses default settings except that nul bytes are passed through, which [may be
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
///
/// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter).
///
/// (That configuration never returns `Err`, so this function does not panic.)
///
/// The bytes equivalent is [bytes::quote].
#[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
pub fn quote(in_str: &str) -> Cow<str> {
Quoter::new().allow_nul(true).quote(in_str).unwrap()
}
/// Given a single word, return a string suitable to encode it as a shell argument.
///
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
///
/// Equivalent to [`Quoter::new().quote(in_str)`](Quoter).
///
/// (That configuration never returns `Err`, so this function does not panic.)
///
/// The bytes equivalent is [bytes::try_quote].
pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> {
Quoter::new().quote(in_str)
}
#[cfg(test)]
static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
("foo$baz", Some(&["foo$baz"])),
("foo baz", Some(&["foo", "baz"])),
("foo\"bar\"baz", Some(&["foobarbaz"])),
("foo \"bar\"baz", Some(&["foo", "barbaz"])),
(" foo \nbar", Some(&["foo", "bar"])),
("foo\\\nbar", Some(&["foobar"])),
("\"foo\\\nbar\"", Some(&["foobar"])),
("'baz\\$b'", Some(&["baz\\$b"])),
("'baz\\\''", None),
("\\", None),
("\"\\", None),
("'\\", None),
("\"", None),
("'", None),
("foo #bar\nbaz", Some(&["foo", "baz"])),
("foo #bar", Some(&["foo"])),
("foo#bar", Some(&["foo#bar"])),
("foo\"#bar", None),
("'\\n'", Some(&["\\n"])),
("'\\\\n'", Some(&["\\\\n"])),
];
#[test]
fn test_split() {
for &(input, output) in SPLIT_TEST_ITEMS {
assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
}
}
#[test]
fn test_lineno() {
let mut sh = Shlex::new("\nfoo\nbar");
while let Some(word) = sh.next() {
if word == "bar" {
assert_eq!(sh.line_no, 3);
}
}
}
#[test]
#[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))]
fn test_quote() {
// This is a list of (unquoted, quoted) pairs.
// But it's using a single long (raw) string literal with an ad-hoc format, just because it's
// hard to read if we have to put the test strings through Rust escaping on top of the escaping
// being tested. (Even raw string literals are noisy for short strings).
// Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences.
let tests = r#"
<> => <''>
<foobar> => <foobar>
<foo bar> => <'foo bar'>
<"foo bar'"> => <"\"foo bar'\"">
<'foo bar'> => <"'foo bar'">
<"> => <'"'>
<"'> => <"\"'">
<hello!world> => <'hello!world'>
<'hello!world> => <"'hello"'!world'>
<'hello!> => <"'hello"'!'>
<hello ^ world> => <'hello ''^ world'>
<hello^> => <hello'^'>
<!world'> => <'!world'"'">
<{a, b}> => <'{a, b}'>
<NL> => <'NL'>
<^> => <'^'>
<foo^bar> => <foo'^bar'>
<NLx^> => <'NLx''^'>
<NL^x> => <'NL''^x'>
<NL ^x> => <'NL ''^x'>
<{a,b}> => <'{a,b}'>
<a,b> => <'a,b'>
<a..b => <a..b>
<'$> => <"'"'$'>
<"^> => <'"''^'>
"#;
let mut ok = true;
for test in tests.trim().split('\n') {
let parts: Vec<String> = test
.replace("NL", "\n")
.split("=>")
.map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned())
.collect();
assert!(parts.len() == 2);
let unquoted = &*parts[0];
let quoted_expected = &*parts[1];
let quoted_actual = try_quote(&parts[0]).unwrap();
if quoted_expected != quoted_actual {
#[cfg(not(feature = "std"))]
panic!("FAIL: for input <{}>, expected <{}>, got <{}>",
unquoted, quoted_expected, quoted_actual);
#[cfg(feature = "std")]
println!("FAIL: for input <{}>, expected <{}>, got <{}>",
unquoted, quoted_expected, quoted_actual);
ok = false;
}
}
assert!(ok);
}
#[test]
#[allow(deprecated)]
fn test_join() {
assert_eq!(join(vec![]), "");
assert_eq!(join(vec![""]), "''");
assert_eq!(join(vec!["a", "b"]), "a b");
assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz");
}
#[test]
fn test_fallible() {
assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul));
assert_eq!(try_quote("\0"), Err(QuoteError::Nul));
}