Move split functionality into a new *Scanner type.
Using a scanner allows incremental processing of an io.Reader. The Split
function is now implemented using a scanner internally. This commit also adds
running examples using the scanner type.
diff --git a/shell.go b/shell.go
index dd0ab46..c787468 100644
--- a/shell.go
+++ b/shell.go
@@ -47,7 +47,6 @@
stSingle
stDouble
stDoubleQ
- stEnd
)
type class int
@@ -132,10 +131,6 @@
},
}
-func init() {
- update[stNone] = update[stBreak]
-}
-
var byteClass = map[byte]class{
' ': clBreak,
'\t': clBreak,
@@ -152,50 +147,89 @@
return clOther
}
-// Split partitions s into fields divided on space, tab, and newline
-// characters. Leading and trailing whitespace are discarded before
-// splitting. Single and double quotation marks will be handled as described in
-// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02.
-//
-// The Boolean flag reports whether the split is "valid", meaning there were no
-// unclosed quotations in the string.
-func Split(s string) ([]string, bool) {
- buf := bufio.NewReader(strings.NewReader(strings.TrimSpace(s)))
- var cur bytes.Buffer
- var ss []string
+// A Scanner partitions input from a reader into tokens divided on space, tab,
+// and newline characters. Single and double quotation marks are handled as
+// described in http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02.
+type Scanner struct {
+ buf *bufio.Reader
+ cur bytes.Buffer
+ st state
+ err error
+}
- pop := func() {
- ss = append(ss, cur.String())
- cur.Reset()
+// NewScanner returns a Scanner that reads input from r.
+func NewScanner(r io.Reader) *Scanner {
+ return &Scanner{
+ buf: bufio.NewReader(r),
+ st: stBreak,
}
- st := stNone
+}
+
+// Next advances the scanner and reports whether there are any further tokens
+// to be consumed.
+func (s *Scanner) Next() bool {
+ if s.err != nil {
+ return false
+ }
+ s.cur.Reset()
for {
- c, err := buf.ReadByte()
+ c, err := s.buf.ReadByte()
+ s.err = err
if err == io.EOF {
break
} else if err != nil {
- panic(err)
+ return false
}
-
- next := update[st][classOf(c)]
+ next := update[s.st][classOf(c)]
+ s.st = next.state
switch next.action {
case push:
- cur.WriteByte(c)
+ s.cur.WriteByte(c)
case xpush:
- cur.Write([]byte{'\\', c})
+ s.cur.Write([]byte{'\\', c})
case emit:
- pop()
+ return true // s.cur has a complete token
case drop:
break
default:
panic("unknown action")
}
- st = next.state
}
- if st != stNone {
- pop()
+ return s.st != stBreak
+}
+
+// Text returns the text of the current token, or "" if there is none.
+func (s *Scanner) Text() string { return s.cur.String() }
+
+// Err returns the error, if any, that resulted from the most recent action.
+func (s *Scanner) Err() error { return s.err }
+
+// Complete reports whether the current token is complete, meaning that it is
+// unquoted or its quotes were balanced.
+func (s *Scanner) Complete() bool { return s.st == stBreak || s.st == stWord }
+
+// Rest returns an io.Reader for the remainder of the unconsumed input in s.
+// After calling this method, Next will always return false. The remainder
+// does not include the text of the current token at the time Rest is called.
+func (s *Scanner) Rest() io.Reader {
+ s.st = stNone
+ s.cur.Reset()
+ s.err = io.EOF
+ return s.buf
+}
+
+// Split partitions s into tokens divided on space, tab, and newline characters
+// using a *Scanner. Leading and trailing whitespace are ignored.
+//
+// The Boolean flag reports whether the final token is "valid", meaning there
+// were no unclosed quotations in the string.
+func Split(s string) ([]string, bool) {
+ var ss []string
+ sc := NewScanner(strings.NewReader(s))
+ for sc.Next() {
+ ss = append(ss, sc.Text())
}
- return ss, st == stNone || st == stWord
+ return ss, sc.Complete()
}
func quotable(s string) (hasQ, hasOther bool) {
diff --git a/shell_test.go b/shell_test.go
index 9770fc7..d72c62a 100644
--- a/shell_test.go
+++ b/shell_test.go
@@ -1,7 +1,11 @@
package shell
import (
+ "fmt"
+ "io/ioutil"
+ "log"
"reflect"
+ "strings"
"testing"
)
@@ -109,3 +113,32 @@
}
}
}
+
+func ExampleScanner() {
+ const input = `a "free range" exploration of soi\ disant novelties`
+ s := NewScanner(strings.NewReader(input))
+ sum, count := 0, 0
+ for s.Next() {
+ count++
+ sum += len(s.Text())
+ }
+ fmt.Println(len(input), count, sum, s.Complete(), s.Err())
+ // Output: 51 6 43 true EOF
+}
+
+func ExampleScanner_Rest() {
+ const input = `things 'and stuff' %end% all the remaining stuff`
+ s := NewScanner(strings.NewReader(input))
+ for s.Next() {
+ if s.Text() == "%end%" {
+ fmt.Print("found marker; ")
+ break
+ }
+ }
+ rest, err := ioutil.ReadAll(s.Rest())
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(string(rest))
+ // Output: found marker; all the remaining stuff
+}