proto/text_parser.go - platform/tools/external/go/src/github.com/golang/protobuf - Git at Google

 // Go support for Protocol Buffers - Google's data interchange format
 //
 // Copyright 2010 The Go Authors.  All rights reserved.
 // http://code.google.com/p/goprotobuf/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 package proto

 // Functions for parsing the Text protocol buffer format.
 // TODO: message sets.

 import (
 	"encoding"
 	"errors"
 	"fmt"
 	"reflect"
 	"strconv"
 	"strings"
 	"unicode/utf8"
 )

 type ParseError struct {
 	Message string
 	Line    int // 1-based line number
 	Offset  int // 0-based byte offset from start of input
 }

 func (p *ParseError) Error() string {
 	if p.Line == 1 {
 		// show offset only for first line
 		return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
 	}
 	return fmt.Sprintf("line %d: %v", p.Line, p.Message)
 }

 type token struct {
 	value    string
 	err      *ParseError
 	line     int    // line number
 	offset   int    // byte number from start of input, not start of line
 	unquoted string // the unquoted version of value, if it was a quoted string
 }

 func (t *token) String() string {
 	if t.err == nil {
 		return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
 	}
 	return fmt.Sprintf("parse error: %v", t.err)
 }

 type textParser struct {
 	s            string // remaining input
 	done         bool   // whether the parsing is finished (success or error)
 	backed       bool   // whether back() was called
 	offset, line int
 	cur          token
 }

 func newTextParser(s string) *textParser {
 	p := new(textParser)
 	p.s = s
 	p.line = 1
 	p.cur.line = 1
 	return p
 }

 func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
 	pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
 	p.cur.err = pe
 	p.done = true
 	return pe
 }

 // Numbers and identifiers are matched by [-+._A-Za-z0-9]
 func isIdentOrNumberChar(c byte) bool {
 	switch {
 	case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
 		return true
 	case '0' <= c && c <= '9':
 		return true
 	}
 	switch c {
 	case '-', '+', '.', '_':
 		return true
 	}
 	return false
 }

 func isWhitespace(c byte) bool {
 	switch c {
 	case ' ', '\t', '\n', '\r':
 		return true
 	}
 	return false
 }

 func (p *textParser) skipWhitespace() {
 	i := 0
 	for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
 		if p.s[i] == '#' {
 			// comment; skip to end of line or input
 			for i < len(p.s) && p.s[i] != '\n' {
 				i++
 			}
 			if i == len(p.s) {
 				break
 			}
 		}
 		if p.s[i] == '\n' {
 			p.line++
 		}
 		i++
 	}
 	p.offset += i
 	p.s = p.s[i:len(p.s)]
 	if len(p.s) == 0 {
 		p.done = true
 	}
 }

 func (p *textParser) advance() {
 	// Skip whitespace
 	p.skipWhitespace()
 	if p.done {
 		return
 	}

 	// Start of non-whitespace
 	p.cur.err = nil
 	p.cur.offset, p.cur.line = p.offset, p.line
 	p.cur.unquoted = ""
 	switch p.s[0] {
 	case '<', '>', '{', '}', ':', '[', ']', ';', ',':
 		// Single symbol
 		p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
 	case '"', '\'':
 		// Quoted string
 		i := 1
 		for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
 			if p.s[i] == '\\' && i+1 < len(p.s) {
 				// skip escaped char
 				i++
 			}
 			i++
 		}
 		if i >= len(p.s) || p.s[i] != p.s[0] {
 			p.errorf("unmatched quote")
 			return
 		}
 		unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
 		if err != nil {
 			p.errorf("invalid quoted string %v", p.s[0:i+1])
 			return
 		}
 		p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
 		p.cur.unquoted = unq
 	default:
 		i := 0
 		for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
 			i++
 		}
 		if i == 0 {
 			p.errorf("unexpected byte %#x", p.s[0])
 			return
 		}
 		p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
 	}
 	p.offset += len(p.cur.value)
 }

 var (
 	errBadUTF8 = errors.New("proto: bad UTF-8")
 	errBadHex  = errors.New("proto: bad hexadecimal")
 )

 func unquoteC(s string, quote rune) (string, error) {
 	// This is based on C++'s tokenizer.cc.
 	// Despite its name, this is *not* parsing C syntax.
 	// For instance, "\0" is an invalid quoted string.

 	// Avoid allocation in trivial cases.
 	simple := true
 	for _, r := range s {
 		if r == '\\' || r == quote {
 			simple = false
 			break
 		}
 	}
 	if simple {
 		return s, nil
 	}

 	buf := make([]byte, 0, 3*len(s)/2)
 	for len(s) > 0 {
 		r, n := utf8.DecodeRuneInString(s)
 		if r == utf8.RuneError && n == 1 {
 			return "", errBadUTF8
 		}
 		s = s[n:]
 		if r != '\\' {
 			if r < utf8.RuneSelf {
 				buf = append(buf, byte(r))
 			} else {
 				buf = append(buf, string(r)...)
 			}
 			continue
 		}

 		ch, tail, err := unescape(s)
 		if err != nil {
 			return "", err
 		}
 		buf = append(buf, ch...)
 		s = tail
 	}
 	return string(buf), nil
 }

 func unescape(s string) (ch string, tail string, err error) {
 	r, n := utf8.DecodeRuneInString(s)
 	if r == utf8.RuneError && n == 1 {
 		return "", "", errBadUTF8
 	}
 	s = s[n:]
 	switch r {
 	case 'a':
 		return "\a", s, nil
 	case 'b':
 		return "\b", s, nil
 	case 'f':
 		return "\f", s, nil
 	case 'n':
 		return "\n", s, nil
 	case 'r':
 		return "\r", s, nil
 	case 't':
 		return "\t", s, nil
 	case 'v':
 		return "\v", s, nil
 	case '?':
 		return "?", s, nil // trigraph workaround
 	case '\'', '"', '\\':
 		return string(r), s, nil
 	case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
 		if len(s) < 2 {
 			return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
 		}
 		base := 8
 		ss := s[:2]
 		s = s[2:]
 		if r == 'x' || r == 'X' {
 			base = 16
 		} else {
 			ss = string(r) + ss
 		}
 		i, err := strconv.ParseUint(ss, base, 8)
 		if err != nil {
 			return "", "", err
 		}
 		return string([]byte{byte(i)}), s, nil
 	case 'u', 'U':
 		n := 4
 		if r == 'U' {
 			n = 8
 		}
 		if len(s) < n {
 			return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
 		}

 		bs := make([]byte, n/2)
 		for i := 0; i < n; i += 2 {
 			a, ok1 := unhex(s[i])
 			b, ok2 := unhex(s[i+1])
 			if !ok1 || !ok2 {
 				return "", "", errBadHex
 			}
 			bs[i/2] = a<<4 | b
 		}
 		s = s[n:]
 		return string(bs), s, nil
 	}
 	return "", "", fmt.Errorf(`unknown escape \%c`, r)
 }

 // Adapted from src/pkg/strconv/quote.go.
 func unhex(b byte) (v byte, ok bool) {
 	switch {
 	case '0' <= b && b <= '9':
 		return b - '0', true
 	case 'a' <= b && b <= 'f':
 		return b - 'a' + 10, true
 	case 'A' <= b && b <= 'F':
 		return b - 'A' + 10, true
 	}
 	return 0, false
 }

 // Back off the parser by one token. Can only be done between calls to next().
 // It makes the next advance() a no-op.
 func (p *textParser) back() { p.backed = true }

 // Advances the parser and returns the new current token.
 func (p *textParser) next() *token {
 	if p.backed || p.done {
 		p.backed = false
 		return &p.cur
 	}
 	p.advance()
 	if p.done {
 		p.cur.value = ""
 	} else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
 		// Look for multiple quoted strings separated by whitespace,
 		// and concatenate them.
 		cat := p.cur
 		for {
 			p.skipWhitespace()
 			if p.done || p.s[0] != '"' {
 				break
 			}
 			p.advance()
 			if p.cur.err != nil {
 				return &p.cur
 			}
 			cat.value += " " + p.cur.value
 			cat.unquoted += p.cur.unquoted
 		}
 		p.done = false // parser may have seen EOF, but we want to return cat
 		p.cur = cat
 	}
 	return &p.cur
 }

 // Return a RequiredNotSetError indicating which required field was not set.
 func (p *textParser) missingRequiredFieldError(sv reflect.Value) *RequiredNotSetError {
 	st := sv.Type()
 	sprops := GetProperties(st)
 	for i := 0; i < st.NumField(); i++ {
 		if !isNil(sv.Field(i)) {
 			continue
 		}

 		props := sprops.Prop[i]
 		if props.Required {
 			return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
 		}
 	}
 	return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
 }

 // Returns the index in the struct for the named field, as well as the parsed tag properties.
 func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
 	sprops := GetProperties(st)
 	i, ok := sprops.decoderOrigNames[name]
 	if ok {
 		return i, sprops.Prop[i], true
 	}
 	return -1, nil, false
 }

 // Consume a ':' from the input stream (if the next token is a colon),
 // returning an error if a colon is needed but not present.
 func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
 	tok := p.next()
 	if tok.err != nil {
 		return tok.err
 	}
 	if tok.value != ":" {
 		// Colon is optional when the field is a group or message.
 		needColon := true
 		switch props.Wire {
 		case "group":
 			needColon = false
 		case "bytes":
 			// A "bytes" field is either a message, a string, or a repeated field;
 			// those three become *T, *string and []T respectively, so we can check for
 			// this field being a pointer to a non-string.
 			if typ.Kind() == reflect.Ptr {
 				// *T or *string
 				if typ.Elem().Kind() == reflect.String {
 					break
 				}
 			} else if typ.Kind() == reflect.Slice {
 				// []T or []*T
 				if typ.Elem().Kind() != reflect.Ptr {
 					break
 				}
 			}
 			needColon = false
 		}
 		if needColon {
 			return p.errorf("expected ':', found %q", tok.value)
 		}
 		p.back()
 	}
 	return nil
 }

 func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
 	st := sv.Type()
 	reqCount := GetProperties(st).reqCount
 	var reqFieldErr error
 	fieldSet := make(map[string]bool)
 	// A struct is a sequence of "name: value", terminated by one of
 	// '>' or '}', or the end of the input.  A name may also be
 	// "[extension]".
 	for {
 		tok := p.next()
 		if tok.err != nil {
 			return tok.err
 		}
 		if tok.value == terminator {
 			break
 		}
 		if tok.value == "[" {
 			// Looks like an extension.
 			//
 			// TODO: Check whether we need to handle
 			// namespace rooted names (e.g. ".something.Foo").
 			tok = p.next()
 			if tok.err != nil {
 				return tok.err
 			}
 			var desc *ExtensionDesc
 			// This could be faster, but it's functional.
 			// TODO: Do something smarter than a linear scan.
 			for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
 				if d.Name == tok.value {
 					desc = d
 					break
 				}
 			}
 			if desc == nil {
 				return p.errorf("unrecognized extension %q", tok.value)
 			}
 			// Check the extension terminator.
 			tok = p.next()
 			if tok.err != nil {
 				return tok.err
 			}
 			if tok.value != "]" {
 				return p.errorf("unrecognized extension terminator %q", tok.value)
 			}

 			props := &Properties{}
 			props.Parse(desc.Tag)

 			typ := reflect.TypeOf(desc.ExtensionType)
 			if err := p.checkForColon(props, typ); err != nil {
 				return err
 			}

 			rep := desc.repeated()

 			// Read the extension structure, and set it in
 			// the value we're constructing.
 			var ext reflect.Value
 			if !rep {
 				ext = reflect.New(typ).Elem()
 			} else {
 				ext = reflect.New(typ.Elem()).Elem()
 			}
 			if err := p.readAny(ext, props); err != nil {
 				if _, ok := err.(*RequiredNotSetError); !ok {
 					return err
 				}
 				reqFieldErr = err
 			}
 			ep := sv.Addr().Interface().(extendableProto)
 			if !rep {
 				SetExtension(ep, desc, ext.Interface())
 			} else {
 				old, err := GetExtension(ep, desc)
 				var sl reflect.Value
 				if err == nil {
 					sl = reflect.ValueOf(old) // existing slice
 				} else {
 					sl = reflect.MakeSlice(typ, 0, 1)
 				}
 				sl = reflect.Append(sl, ext)
 				SetExtension(ep, desc, sl.Interface())
 			}
 		} else {
 			// This is a normal, non-extension field.
 			name := tok.value
 			fi, props, ok := structFieldByName(st, name)
 			if !ok {
 				return p.errorf("unknown field name %q in %v", name, st)
 			}

 			dst := sv.Field(fi)

 			// Check that it's not already set if it's not a repeated field.
 			if !props.Repeated && fieldSet[name] {
 				return p.errorf("non-repeated field %q was repeated", name)
 			}

 			if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
 				return err
 			}

 			// Parse into the field.
 			fieldSet[name] = true
 			if err := p.readAny(dst, props); err != nil {
 				if _, ok := err.(*RequiredNotSetError); !ok {
 					return err
 				}
 				reqFieldErr = err
 			} else if props.Required {
 				reqCount--
 			}
 		}

 		// For backward compatibility, permit a semicolon or comma after a field.
 		tok = p.next()
 		if tok.err != nil {
 			return tok.err
 		}
 		if tok.value != ";" && tok.value != "," {
 			p.back()
 		}
 	}

 	if reqCount > 0 {
 		return p.missingRequiredFieldError(sv)
 	}
 	return reqFieldErr
 }

 func (p *textParser) readAny(v reflect.Value, props *Properties) error {
 	tok := p.next()
 	if tok.err != nil {
 		return tok.err
 	}
 	if tok.value == "" {
 		return p.errorf("unexpected EOF")
 	}

 	switch fv := v; fv.Kind() {
 	case reflect.Slice:
 		at := v.Type()
 		if at.Elem().Kind() == reflect.Uint8 {
 			// Special case for []byte
 			if tok.value[0] != '"' && tok.value[0] != '\'' {
 				// Deliberately written out here, as the error after
 				// this switch statement would write "invalid []byte: ...",
 				// which is not as user-friendly.
 				return p.errorf("invalid string: %v", tok.value)
 			}
 			bytes := []byte(tok.unquoted)
 			fv.Set(reflect.ValueOf(bytes))
 			return nil
 		}
 		// Repeated field. May already exist.
 		flen := fv.Len()
 		if flen == fv.Cap() {
 			nav := reflect.MakeSlice(at, flen, 2*flen+1)
 			reflect.Copy(nav, fv)
 			fv.Set(nav)
 		}
 		fv.SetLen(flen + 1)

 		// Read one.
 		p.back()
 		return p.readAny(fv.Index(flen), props)
 	case reflect.Bool:
 		// Either "true", "false", 1 or 0.
 		switch tok.value {
 		case "true", "1":
 			fv.SetBool(true)
 			return nil
 		case "false", "0":
 			fv.SetBool(false)
 			return nil
 		}
 	case reflect.Float32, reflect.Float64:
 		v := tok.value
 		// Ignore 'f' for compatibility with output generated by C++, but don't
 		// remove 'f' when the value is "-inf" or "inf".
 		if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
 			v = v[:len(v)-1]
 		}
 		if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
 			fv.SetFloat(f)
 			return nil
 		}
 	case reflect.Int32:
 		if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
 			fv.SetInt(x)
 			return nil
 		}

 		if len(props.Enum) == 0 {
 			break
 		}
 		m, ok := enumValueMaps[props.Enum]
 		if !ok {
 			break
 		}
 		x, ok := m[tok.value]
 		if !ok {
 			break
 		}
 		fv.SetInt(int64(x))
 		return nil
 	case reflect.Int64:
 		if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
 			fv.SetInt(x)
 			return nil
 		}

 	case reflect.Ptr:
 		// A basic field (indirected through pointer), or a repeated message/group
 		p.back()
 		fv.Set(reflect.New(fv.Type().Elem()))
 		return p.readAny(fv.Elem(), props)
 	case reflect.String:
 		if tok.value[0] == '"' || tok.value[0] == '\'' {
 			fv.SetString(tok.unquoted)
 			return nil
 		}
 	case reflect.Struct:
 		var terminator string
 		switch tok.value {
 		case "{":
 			terminator = "}"
 		case "<":
 			terminator = ">"
 		default:
 			return p.errorf("expected '{' or '<', found %q", tok.value)
 		}
 		// TODO: Handle nested messages which implement encoding.TextUnmarshaler.
 		return p.readStruct(fv, terminator)
 	case reflect.Uint32:
 		if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
 			fv.SetUint(uint64(x))
 			return nil
 		}
 	case reflect.Uint64:
 		if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
 			fv.SetUint(x)
 			return nil
 		}
 	}
 	return p.errorf("invalid %v: %v", v.Type(), tok.value)
 }

 // UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
 // before starting to unmarshal, so any existing data in pb is always removed.
 // If a required field is not set and no other error occurs,
 // UnmarshalText returns *RequiredNotSetError.
 func UnmarshalText(s string, pb Message) error {
 	if um, ok := pb.(encoding.TextUnmarshaler); ok {
 		err := um.UnmarshalText([]byte(s))
 		return err
 	}
 	pb.Reset()
 	v := reflect.ValueOf(pb)
 	if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
 		return pe
 	}
 	return nil
 }
	// Go support for Protocol Buffers - Google's data interchange format
	//
	// Copyright 2010 The Go Authors. All rights reserved.
	// http://code.google.com/p/goprotobuf/
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following disclaimer
	// in the documentation and/or other materials provided with the
	// distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	package proto

	// Functions for parsing the Text protocol buffer format.
	// TODO: message sets.

	import (
	"encoding"
	"errors"
	"fmt"
	"reflect"
	"strconv"
	"strings"
	"unicode/utf8"
	)

	type ParseError struct {
	Message string
	Line int // 1-based line number
	Offset int // 0-based byte offset from start of input
	}

	func (p *ParseError) Error() string {
	if p.Line == 1 {
	// show offset only for first line
	return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
	}
	return fmt.Sprintf("line %d: %v", p.Line, p.Message)
	}

	type token struct {
	value string
	err *ParseError
	line int // line number
	offset int // byte number from start of input, not start of line
	unquoted string // the unquoted version of value, if it was a quoted string
	}

	func (t *token) String() string {
	if t.err == nil {
	return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
	}
	return fmt.Sprintf("parse error: %v", t.err)
	}

	type textParser struct {
	s string // remaining input
	done bool // whether the parsing is finished (success or error)
	backed bool // whether back() was called
	offset, line int
	cur token
	}

	func newTextParser(s string) *textParser {
	p := new(textParser)
	p.s = s
	p.line = 1
	p.cur.line = 1
	return p
	}

	func (p textParser) errorf(format string, a ...interface{}) ParseError {
	pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
	p.cur.err = pe
	p.done = true
	return pe
	}

	// Numbers and identifiers are matched by [-+._A-Za-z0-9]
	func isIdentOrNumberChar(c byte) bool {
	switch {
	case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
	return true
	case '0' <= c && c <= '9':
	return true
	}
	switch c {
	case '-', '+', '.', '_':
	return true
	}
	return false
	}

	func isWhitespace(c byte) bool {
	switch c {
	case ' ', '\t', '\n', '\r':
	return true
	}
	return false
	}

	func (p *textParser) skipWhitespace() {
	i := 0
	for i < len(p.s) && (isWhitespace(p.s[i]) \|\| p.s[i] == '#') {
	if p.s[i] == '#' {
	// comment; skip to end of line or input
	for i < len(p.s) && p.s[i] != '\n' {
	i++
	}
	if i == len(p.s) {
	break
	}
	}
	if p.s[i] == '\n' {
	p.line++
	}
	i++
	}
	p.offset += i
	p.s = p.s[i:len(p.s)]
	if len(p.s) == 0 {
	p.done = true
	}
	}

	func (p *textParser) advance() {
	// Skip whitespace
	p.skipWhitespace()
	if p.done {
	return
	}

	// Start of non-whitespace
	p.cur.err = nil
	p.cur.offset, p.cur.line = p.offset, p.line
	p.cur.unquoted = ""
	switch p.s[0] {
	case '<', '>', '{', '}', ':', '[', ']', ';', ',':
	// Single symbol
	p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
	case '"', '\'':
	// Quoted string
	i := 1
	for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
	if p.s[i] == '\\' && i+1 < len(p.s) {
	// skip escaped char
	i++
	}
	i++
	}
	if i >= len(p.s) \|\| p.s[i] != p.s[0] {
	p.errorf("unmatched quote")
	return
	}
	unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
	if err != nil {
	p.errorf("invalid quoted string %v", p.s[0:i+1])
	return
	}
	p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
	p.cur.unquoted = unq
	default:
	i := 0
	for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
	i++
	}
	if i == 0 {
	p.errorf("unexpected byte %#x", p.s[0])
	return
	}
	p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
	}
	p.offset += len(p.cur.value)
	}

	var (
	errBadUTF8 = errors.New("proto: bad UTF-8")
	errBadHex = errors.New("proto: bad hexadecimal")
	)

	func unquoteC(s string, quote rune) (string, error) {
	// This is based on C++'s tokenizer.cc.
	// Despite its name, this is not parsing C syntax.
	// For instance, "\0" is an invalid quoted string.

	// Avoid allocation in trivial cases.
	simple := true
	for _, r := range s {
	if r == '\\' \|\| r == quote {
	simple = false
	break
	}
	}
	if simple {
	return s, nil
	}

	buf := make([]byte, 0, 3*len(s)/2)
	for len(s) > 0 {
	r, n := utf8.DecodeRuneInString(s)
	if r == utf8.RuneError && n == 1 {
	return "", errBadUTF8
	}
	s = s[n:]
	if r != '\\' {
	if r < utf8.RuneSelf {
	buf = append(buf, byte(r))
	} else {
	buf = append(buf, string(r)...)
	}
	continue
	}

	ch, tail, err := unescape(s)
	if err != nil {
	return "", err
	}
	buf = append(buf, ch...)
	s = tail
	}
	return string(buf), nil
	}

	func unescape(s string) (ch string, tail string, err error) {
	r, n := utf8.DecodeRuneInString(s)
	if r == utf8.RuneError && n == 1 {
	return "", "", errBadUTF8
	}
	s = s[n:]
	switch r {
	case 'a':
	return "\a", s, nil
	case 'b':
	return "\b", s, nil
	case 'f':
	return "\f", s, nil
	case 'n':
	return "\n", s, nil
	case 'r':
	return "\r", s, nil
	case 't':
	return "\t", s, nil
	case 'v':
	return "\v", s, nil
	case '?':
	return "?", s, nil // trigraph workaround
	case '\'', '"', '\\':
	return string(r), s, nil
	case '0', '1', '2', '3', '4', '5', '6', '7', 'x', 'X':
	if len(s) < 2 {
	return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
	}
	base := 8
	ss := s[:2]
	s = s[2:]
	if r == 'x' \|\| r == 'X' {
	base = 16
	} else {
	ss = string(r) + ss
	}
	i, err := strconv.ParseUint(ss, base, 8)
	if err != nil {
	return "", "", err
	}
	return string([]byte{byte(i)}), s, nil
	case 'u', 'U':
	n := 4
	if r == 'U' {
	n = 8
	}
	if len(s) < n {
	return "", "", fmt.Errorf(`\%c requires %d digits`, r, n)
	}

	bs := make([]byte, n/2)
	for i := 0; i < n; i += 2 {
	a, ok1 := unhex(s[i])
	b, ok2 := unhex(s[i+1])
	if !ok1 \|\| !ok2 {
	return "", "", errBadHex
	}
	bs[i/2] = a<<4 \| b
	}
	s = s[n:]
	return string(bs), s, nil
	}
	return "", "", fmt.Errorf(`unknown escape \%c`, r)
	}

	// Adapted from src/pkg/strconv/quote.go.
	func unhex(b byte) (v byte, ok bool) {
	switch {
	case '0' <= b && b <= '9':
	return b - '0', true
	case 'a' <= b && b <= 'f':
	return b - 'a' + 10, true
	case 'A' <= b && b <= 'F':
	return b - 'A' + 10, true
	}
	return 0, false
	}

	// Back off the parser by one token. Can only be done between calls to next().
	// It makes the next advance() a no-op.
	func (p *textParser) back() { p.backed = true }

	// Advances the parser and returns the new current token.
	func (p textParser) next() token {
	if p.backed \|\| p.done {
	p.backed = false
	return &p.cur
	}
	p.advance()
	if p.done {
	p.cur.value = ""
	} else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
	// Look for multiple quoted strings separated by whitespace,
	// and concatenate them.
	cat := p.cur
	for {
	p.skipWhitespace()
	if p.done \|\| p.s[0] != '"' {
	break
	}
	p.advance()
	if p.cur.err != nil {
	return &p.cur
	}
	cat.value += " " + p.cur.value
	cat.unquoted += p.cur.unquoted
	}
	p.done = false // parser may have seen EOF, but we want to return cat
	p.cur = cat
	}
	return &p.cur
	}

	// Return a RequiredNotSetError indicating which required field was not set.
	func (p textParser) missingRequiredFieldError(sv reflect.Value) RequiredNotSetError {
	st := sv.Type()
	sprops := GetProperties(st)
	for i := 0; i < st.NumField(); i++ {
	if !isNil(sv.Field(i)) {
	continue
	}

	props := sprops.Prop[i]
	if props.Required {
	return &RequiredNotSetError{fmt.Sprintf("%v.%v", st, props.OrigName)}
	}
	}
	return &RequiredNotSetError{fmt.Sprintf("%v.<unknown field name>", st)} // should not happen
	}

	// Returns the index in the struct for the named field, as well as the parsed tag properties.
	func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
	sprops := GetProperties(st)
	i, ok := sprops.decoderOrigNames[name]
	if ok {
	return i, sprops.Prop[i], true
	}
	return -1, nil, false
	}

	// Consume a ':' from the input stream (if the next token is a colon),
	// returning an error if a colon is needed but not present.
	func (p textParser) checkForColon(props Properties, typ reflect.Type) *ParseError {
	tok := p.next()
	if tok.err != nil {
	return tok.err
	}
	if tok.value != ":" {
	// Colon is optional when the field is a group or message.
	needColon := true
	switch props.Wire {
	case "group":
	needColon = false
	case "bytes":
	// A "bytes" field is either a message, a string, or a repeated field;
	// those three become T, string and []T respectively, so we can check for
	// this field being a pointer to a non-string.
	if typ.Kind() == reflect.Ptr {
	// T or string
	if typ.Elem().Kind() == reflect.String {
	break
	}
	} else if typ.Kind() == reflect.Slice {
	// []T or []*T
	if typ.Elem().Kind() != reflect.Ptr {
	break
	}
	}
	needColon = false
	}
	if needColon {
	return p.errorf("expected ':', found %q", tok.value)
	}
	p.back()
	}
	return nil
	}

	func (p *textParser) readStruct(sv reflect.Value, terminator string) error {
	st := sv.Type()
	reqCount := GetProperties(st).reqCount
	var reqFieldErr error
	fieldSet := make(map[string]bool)
	// A struct is a sequence of "name: value", terminated by one of
	// '>' or '}', or the end of the input. A name may also be
	// "[extension]".
	for {
	tok := p.next()
	if tok.err != nil {
	return tok.err
	}
	if tok.value == terminator {
	break
	}
	if tok.value == "[" {
	// Looks like an extension.
	//
	// TODO: Check whether we need to handle
	// namespace rooted names (e.g. ".something.Foo").
	tok = p.next()
	if tok.err != nil {
	return tok.err
	}
	var desc *ExtensionDesc
	// This could be faster, but it's functional.
	// TODO: Do something smarter than a linear scan.
	for _, d := range RegisteredExtensions(reflect.New(st).Interface().(Message)) {
	if d.Name == tok.value {
	desc = d
	break
	}
	}
	if desc == nil {
	return p.errorf("unrecognized extension %q", tok.value)
	}
	// Check the extension terminator.
	tok = p.next()
	if tok.err != nil {
	return tok.err
	}
	if tok.value != "]" {
	return p.errorf("unrecognized extension terminator %q", tok.value)
	}

	props := &Properties{}
	props.Parse(desc.Tag)

	typ := reflect.TypeOf(desc.ExtensionType)
	if err := p.checkForColon(props, typ); err != nil {
	return err
	}

	rep := desc.repeated()

	// Read the extension structure, and set it in
	// the value we're constructing.
	var ext reflect.Value
	if !rep {
	ext = reflect.New(typ).Elem()
	} else {
	ext = reflect.New(typ.Elem()).Elem()
	}
	if err := p.readAny(ext, props); err != nil {
	if _, ok := err.(*RequiredNotSetError); !ok {
	return err
	}
	reqFieldErr = err
	}
	ep := sv.Addr().Interface().(extendableProto)
	if !rep {
	SetExtension(ep, desc, ext.Interface())
	} else {
	old, err := GetExtension(ep, desc)
	var sl reflect.Value
	if err == nil {
	sl = reflect.ValueOf(old) // existing slice
	} else {
	sl = reflect.MakeSlice(typ, 0, 1)
	}
	sl = reflect.Append(sl, ext)
	SetExtension(ep, desc, sl.Interface())
	}
	} else {
	// This is a normal, non-extension field.
	name := tok.value
	fi, props, ok := structFieldByName(st, name)
	if !ok {
	return p.errorf("unknown field name %q in %v", name, st)
	}

	dst := sv.Field(fi)

	// Check that it's not already set if it's not a repeated field.
	if !props.Repeated && fieldSet[name] {
	return p.errorf("non-repeated field %q was repeated", name)
	}

	if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
	return err
	}

	// Parse into the field.
	fieldSet[name] = true
	if err := p.readAny(dst, props); err != nil {
	if _, ok := err.(*RequiredNotSetError); !ok {
	return err
	}
	reqFieldErr = err
	} else if props.Required {
	reqCount--
	}
	}

	// For backward compatibility, permit a semicolon or comma after a field.
	tok = p.next()
	if tok.err != nil {
	return tok.err
	}
	if tok.value != ";" && tok.value != "," {
	p.back()
	}
	}

	if reqCount > 0 {
	return p.missingRequiredFieldError(sv)
	}
	return reqFieldErr
	}

	func (p textParser) readAny(v reflect.Value, props Properties) error {
	tok := p.next()
	if tok.err != nil {
	return tok.err
	}
	if tok.value == "" {
	return p.errorf("unexpected EOF")
	}

	switch fv := v; fv.Kind() {
	case reflect.Slice:
	at := v.Type()
	if at.Elem().Kind() == reflect.Uint8 {
	// Special case for []byte
	if tok.value[0] != '"' && tok.value[0] != '\'' {
	// Deliberately written out here, as the error after
	// this switch statement would write "invalid []byte: ...",
	// which is not as user-friendly.
	return p.errorf("invalid string: %v", tok.value)
	}
	bytes := []byte(tok.unquoted)
	fv.Set(reflect.ValueOf(bytes))
	return nil
	}
	// Repeated field. May already exist.
	flen := fv.Len()
	if flen == fv.Cap() {
	nav := reflect.MakeSlice(at, flen, 2*flen+1)
	reflect.Copy(nav, fv)
	fv.Set(nav)
	}
	fv.SetLen(flen + 1)

	// Read one.
	p.back()
	return p.readAny(fv.Index(flen), props)
	case reflect.Bool:
	// Either "true", "false", 1 or 0.
	switch tok.value {
	case "true", "1":
	fv.SetBool(true)
	return nil
	case "false", "0":
	fv.SetBool(false)
	return nil
	}
	case reflect.Float32, reflect.Float64:
	v := tok.value
	// Ignore 'f' for compatibility with output generated by C++, but don't
	// remove 'f' when the value is "-inf" or "inf".
	if strings.HasSuffix(v, "f") && tok.value != "-inf" && tok.value != "inf" {
	v = v[:len(v)-1]
	}
	if f, err := strconv.ParseFloat(v, fv.Type().Bits()); err == nil {
	fv.SetFloat(f)
	return nil
	}
	case reflect.Int32:
	if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
	fv.SetInt(x)
	return nil
	}

	if len(props.Enum) == 0 {
	break
	}
	m, ok := enumValueMaps[props.Enum]
	if !ok {
	break
	}
	x, ok := m[tok.value]
	if !ok {
	break
	}
	fv.SetInt(int64(x))
	return nil
	case reflect.Int64:
	if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
	fv.SetInt(x)
	return nil
	}

	case reflect.Ptr:
	// A basic field (indirected through pointer), or a repeated message/group
	p.back()
	fv.Set(reflect.New(fv.Type().Elem()))
	return p.readAny(fv.Elem(), props)
	case reflect.String:
	if tok.value[0] == '"' \|\| tok.value[0] == '\'' {
	fv.SetString(tok.unquoted)
	return nil
	}
	case reflect.Struct:
	var terminator string
	switch tok.value {
	case "{":
	terminator = "}"
	case "<":
	terminator = ">"
	default:
	return p.errorf("expected '{' or '<', found %q", tok.value)
	}
	// TODO: Handle nested messages which implement encoding.TextUnmarshaler.
	return p.readStruct(fv, terminator)
	case reflect.Uint32:
	if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
	fv.SetUint(uint64(x))
	return nil
	}
	case reflect.Uint64:
	if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
	fv.SetUint(x)
	return nil
	}
	}
	return p.errorf("invalid %v: %v", v.Type(), tok.value)
	}

	// UnmarshalText reads a protocol buffer in Text format. UnmarshalText resets pb
	// before starting to unmarshal, so any existing data in pb is always removed.
	// If a required field is not set and no other error occurs,
	// UnmarshalText returns *RequiredNotSetError.
	func UnmarshalText(s string, pb Message) error {
	if um, ok := pb.(encoding.TextUnmarshaler); ok {
	err := um.UnmarshalText([]byte(s))
	return err
	}
	pb.Reset()
	v := reflect.ValueOf(pb)
	if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
	return pe
	}
	return nil
	}