Change the interface of the Split function.
To allow the caller to detect incomplete quotation marks, Split now returns a
Boolean flag signifying whether the result is "valid".
In implementing this, I found a few other bugs, which are now fixed and the
tests updated to catch regressions.
diff --git a/shell.go b/shell.go
index 9ec14d3..dd0ab46 100644
--- a/shell.go
+++ b/shell.go
@@ -6,9 +6,13 @@
// characters that would otherwise be subject to shell evaluation, and the Join
// function concatenates quoted strings with spaces between them.
//
-// The relationship between Split and Join is:
+// The relationship between Split and Join is that given
//
-// Split(Join(ss)) == ss
+// fields, ok := Split(Join(ss))
+//
+// the following relationship will hold:
+//
+// fields == ss && ok
//
package shell
@@ -37,6 +41,7 @@
const (
stNone state = iota
stBreak
+ stBreakQ
stWord
stWordQ
stSingle
@@ -54,7 +59,6 @@
clSingle
clDouble
clOther
- clEnd
)
type action int
@@ -73,11 +77,19 @@
stBreak: {
clBreak: {stBreak, drop},
clNewline: {stBreak, drop},
- clQuote: {stWordQ, drop},
+ clQuote: {stBreakQ, drop},
clSingle: {stSingle, drop},
clDouble: {stDouble, drop},
clOther: {stWord, push},
},
+ stBreakQ: {
+ clBreak: {stWord, push},
+ clNewline: {stBreak, drop},
+ clQuote: {stWord, push},
+ clSingle: {stWord, push},
+ clDouble: {stWord, push},
+ clOther: {stWord, push},
+ },
stWord: {
clBreak: {stBreak, emit},
clNewline: {stBreak, emit},
@@ -120,6 +132,10 @@
},
}
+func init() {
+ update[stNone] = update[stBreak]
+}
+
var byteClass = map[byte]class{
' ': clBreak,
'\t': clBreak,
@@ -137,22 +153,23 @@
}
// Split partitions s into fields divided on space, tab, and newline
-// characters. Single and double quotation marks will be handled as described
-// in
+// characters. Leading and trailing whitespace are discarded before
+// splitting. Single and double quotation marks will be handled as described in
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02.
-func Split(s string) []string {
- buf := bufio.NewReader(strings.NewReader(s))
+//
+// The Boolean flag reports whether the split is "valid", meaning there were no
+// unclosed quotations in the string.
+func Split(s string) ([]string, bool) {
+ buf := bufio.NewReader(strings.NewReader(strings.TrimSpace(s)))
var cur bytes.Buffer
var ss []string
pop := func() {
- if cur.Len() != 0 {
- ss = append(ss, cur.String())
- cur.Reset()
- }
+ ss = append(ss, cur.String())
+ cur.Reset()
}
- st := stBreak
- for st != stEnd {
+ st := stNone
+ for {
c, err := buf.ReadByte()
if err == io.EOF {
break
@@ -175,8 +192,10 @@
}
st = next.state
}
- pop()
- return ss
+ if st != stNone {
+ pop()
+ }
+ return ss, st == stNone || st == stWord
}
func quotable(s string) (hasQ, hasOther bool) {
@@ -190,6 +209,9 @@
// Quote returns a copy of s in which shell metacharacters are quoted to
// protect them from evaluation.
func Quote(s string) string {
+ if s == "" {
+ return "''"
+ }
hasQ, hasOther := quotable(s)
if !hasQ && !hasOther {
return s // fast path: nothing needs quotation
diff --git a/shell_test.go b/shell_test.go
index c369047..9770fc7 100644
--- a/shell_test.go
+++ b/shell_test.go
@@ -9,8 +9,8 @@
tests := []struct {
in, want string
}{
- {"", ""}, // nothing to quote
- {"abc", "abc"}, // "
+ {"", "''"}, // empty is special
+ {"abc", "abc"}, // nothing to quote
{"--flag", "--flag"}, // "
{"'abc", `\'abc`}, // single quote only
{"abc'", `abc\'`}, // "
@@ -32,10 +32,63 @@
}
}
+func TestSplit(t *testing.T) {
+ tests := []struct {
+ in string
+ want []string
+ ok bool
+ }{
+ // Variations of empty input yield an empty split.
+ {"", nil, true},
+ {" ", nil, true},
+ {"\t", nil, true},
+ {"\n ", nil, true},
+
+ // Leading and trailing whitespace are discarded.
+ {"a", []string{"a"}, true},
+ {" a", []string{"a"}, true},
+ {"a\n", []string{"a"}, true},
+
+ // Escaped newlines are magic in the correct ways.
+ {"a\\\nb", []string{"ab"}, true},
+ {"a \\\n b\tc", []string{"a", "b", "c"}, true},
+
+ // Various splits with and without quotes. Quoted whitespace is
+ // preserved.
+ {"a b c", []string{"a", "b", "c"}, true},
+ {`a 'b c'`, []string{"a", "b c"}, true},
+ {"\"a\nb\"cd e'f'", []string{"a\nbcd", "ef"}, true},
+ {"'\n \t '", []string{"\n \t "}, true},
+
+ // Quoted empty strings are preserved in various places.
+ {"''", []string{""}, true},
+ {"a ''", []string{"a", ""}, true},
+ {" a \"\" b ", []string{"a", "", "b"}, true},
+
+ // Unbalanced quotation marks and escapes are detected.
+ {"\\", []string{""}, false},
+ {"'", []string{""}, false},
+ {`"`, []string{""}, false},
+ {"a 'b c", []string{"a", "b c"}, false},
+ {`a "b c`, []string{"a", "b c"}, false},
+ {`a "b \"`, []string{"a", `b "`}, false},
+ }
+ for _, test := range tests {
+ got, ok := Split(test.in)
+ if ok != test.ok {
+ t.Errorf("Split %#q: got valid=%v, want %v", test.in, ok, test.ok)
+ }
+ if !reflect.DeepEqual(got, test.want) {
+ t.Errorf("Split %#q: got %+q, want %+q", test.in, got, test.want)
+ }
+ }
+}
+
func TestRoundTrip(t *testing.T) {
tests := [][]string{
nil,
{"a"},
+ {"a "},
{"a", "b", "c"},
{"a", "b c"},
{"--flag=value"},
@@ -47,7 +100,11 @@
for _, test := range tests {
s := Join(test)
t.Logf("Join %#q = %v", test, s)
- if got := Split(s); !reflect.DeepEqual(got, test) {
+ got, ok := Split(s)
+ if !ok {
+ t.Errorf("Split %+q: should be valid, but is not", s)
+ }
+ if !reflect.DeepEqual(got, test) {
t.Errorf("Split %+q: got %q, want %q", s, got, test)
}
}