syntax/scan.go - platform/external/starlark-go - Git at Google

 // Copyright 2017 The Bazel Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package syntax

 // A lexical scanner for Skylark.

 import (
 	"fmt"
 	"io"
 	"io/ioutil"
 	"log"
 	"strconv"
 	"strings"
 	"unicode"
 	"unicode/utf8"
 )

 // A Token represents a Skylark lexical token.
 type Token int8

 const (
 	ILLEGAL Token = iota
 	EOF

 	NEWLINE
 	INDENT
 	OUTDENT

 	// Tokens with values
 	IDENT  // x
 	INT    // 123
 	FLOAT  // 1.23e45
 	STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"

 	// Punctuation
 	PLUS          // +
 	MINUS         // -
 	STAR          // *
 	SLASH         // /
 	SLASHSLASH    // //
 	PERCENT       // %
 	AMP           // &
 	PIPE          // |
 	DOT           // .
 	COMMA         // ,
 	EQ            // =
 	SEMI          // ;
 	COLON         // :
 	LPAREN        // (
 	RPAREN        // )
 	LBRACK        // [
 	RBRACK        // ]
 	LBRACE        // {
 	RBRACE        // }
 	LT            // <
 	GT            // >
 	GE            // >=
 	LE            // <=
 	EQL           // ==
 	NEQ           // !=
 	PLUS_EQ       // +=    (keep order consistent with PLUS..PERCENT)
 	MINUS_EQ      // -=
 	STAR_EQ       // *=
 	SLASH_EQ      // /=
 	SLASHSLASH_EQ // //=
 	PERCENT_EQ    // %=
 	STARSTAR      // **

 	// Keywords
 	AND
 	BREAK
 	CONTINUE
 	DEF
 	ELIF
 	ELSE
 	FOR
 	IF
 	IN
 	LAMBDA
 	LOAD
 	NOT
 	NOT_IN // synthesized by parser from NOT IN
 	OR
 	PASS
 	RETURN

 	maxToken
 )

 func (tok Token) String() string { return tokenNames[tok] }

 // GoString is like String but quotes punctuation tokens.
 // Use Sprintf("%#v", tok) when constructing error messages.
 func (tok Token) GoString() string {
 	if tok >= PLUS && tok <= STARSTAR {
 		return "'" + tokenNames[tok] + "'"
 	}
 	return tokenNames[tok]
 }

 var tokenNames = [...]string{
 	ILLEGAL:       "illegal token",
 	EOF:           "end of file",
 	NEWLINE:       "newline",
 	INDENT:        "indent",
 	OUTDENT:       "outdent",
 	IDENT:         "identifier",
 	INT:           "int literal",
 	FLOAT:         "float literal",
 	STRING:        "string literal",
 	PLUS:          "+",
 	MINUS:         "-",
 	STAR:          "*",
 	SLASH:         "/",
 	SLASHSLASH:    "//",
 	PERCENT:       "%",
 	AMP:           "&",
 	PIPE:          "|",
 	DOT:           ".",
 	COMMA:         ",",
 	EQ:            "=",
 	SEMI:          ";",
 	COLON:         ":",
 	LPAREN:        "(",
 	RPAREN:        ")",
 	LBRACK:        "[",
 	RBRACK:        "]",
 	LBRACE:        "{",
 	RBRACE:        "]",
 	LT:            "<",
 	GT:            ">",
 	GE:            ">=",
 	LE:            "<=",
 	EQL:           "==",
 	NEQ:           "!=",
 	PLUS_EQ:       "+=",
 	MINUS_EQ:      "-=",
 	STAR_EQ:       "*=",
 	SLASH_EQ:      "/=",
 	SLASHSLASH_EQ: "//=",
 	PERCENT_EQ:    "%=",
 	STARSTAR:      "**",
 	AND:           "and",
 	BREAK:         "break",
 	CONTINUE:      "continue",
 	DEF:           "def",
 	ELIF:          "elif",
 	ELSE:          "else",
 	FOR:           "for",
 	IF:            "if",
 	IN:            "in",
 	LAMBDA:        "lambda",
 	LOAD:          "load",
 	NOT:           "not",
 	NOT_IN:        "not in",
 	OR:            "or",
 	PASS:          "pass",
 	RETURN:        "return",
 }

 // A Position describes the location of a rune of input.
 type Position struct {
 	file *string // filename (indirect for compactness)
 	Line int32   // 1-based line number
 	Col  int32   // 1-based column number (strictly: rune)
 }

 // IsValid reports whether the position is valid.
 func (p Position) IsValid() bool {
 	return p.Line >= 1
 }

 // Filename returns the name of the file containing this position.
 func (p Position) Filename() string {
 	if p.file != nil {
 		return *p.file
 	}
 	return "<unknown>"
 }

 // add returns the position at the end of s, assuming it starts at p.
 func (p Position) add(s string) Position {
 	if n := strings.Count(s, "\n"); n > 0 {
 		p.Line += int32(n)
 		s = s[strings.LastIndex(s, "\n")+1:]
 		p.Col = 1
 	}
 	p.Col += int32(utf8.RuneCountInString(s))
 	return p
 }

 func (p Position) String() string {
 	return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col)
 }

 // An scanner represents a single input file being parsed.
 type scanner struct {
 	complete  []byte   // entire input
 	rest      []byte   // rest of input
 	token     []byte   // token being scanned
 	pos       Position // current input position
 	depth     int      // nesting of [ ] { } ( )
 	indentstk []int    // stack of indentation levels
 	dents     int      // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
 	lineStart bool     // after NEWLINE; convert spaces to indentation tokens
 }

 func newScanner(filename string, src interface{}) (*scanner, error) {
 	data, err := readSource(filename, src)
 	if err != nil {
 		return nil, err
 	}
 	return &scanner{
 		complete:  data,
 		rest:      data,
 		pos:       Position{file: &filename, Line: 1, Col: 1},
 		indentstk: make([]int, 1, 10), // []int{0} + spare capacity
 		lineStart: true,
 	}, nil
 }

 func readSource(filename string, src interface{}) (data []byte, err error) {
 	switch src := src.(type) {
 	case string:
 		data = []byte(src)
 	case []byte:
 		data = src
 	case io.Reader:
 		data, err = ioutil.ReadAll(src)
 	case nil:
 		data, err = ioutil.ReadFile(filename)
 	default:
 		return nil, fmt.Errorf("invalid source: %T", src)
 	}
 	if err != nil {
 		return nil, fmt.Errorf("reading %s: %s", filename, err)
 	}
 	return data, nil
 }

 // An Error describes the nature and position of a scanner or parser error.
 type Error struct {
 	Pos Position
 	Msg string
 }

 func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }

 // errorf is called to report an error.
 // errorf does not return: it panics.
 func (sc *scanner) error(pos Position, s string) {
 	panic(Error{pos, s})
 }

 func (sc *scanner) errorf(pos Position, format string, args ...interface{}) {
 	sc.error(pos, fmt.Sprintf(format, args...))
 }

 func (sc *scanner) recover(err *error) {
 	// The scanner and parser panic both for routine errors like
 	// syntax errors and for programmer bugs like array index
 	// errors.  Turn both into error returns.  Catching bug panics
 	// is especially important when processing many files.
 	switch e := recover().(type) {
 	case nil:
 		// no panic
 	case Error:
 		*err = e
 	default:
 		*err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)}
 		if debug {
 			log.Fatal(*err)
 		}
 	}
 }

 // eof reports whether the input has reached end of file.
 func (sc *scanner) eof() bool {
 	return len(sc.rest) == 0
 }

 // peekRune returns the next rune in the input without consuming it.
 // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
 func (sc *scanner) peekRune() rune {
 	if len(sc.rest) == 0 {
 		return 0
 	}

 	// fast path: ASCII
 	if b := sc.rest[0]; b < utf8.RuneSelf {
 		if b == '\r' {
 			return '\n'
 		}
 		return rune(b)
 	}

 	r, _ := utf8.DecodeRune(sc.rest)
 	return r
 }

 // readRune consumes and returns the next rune in the input.
 // Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
 func (sc *scanner) readRune() rune {
 	if len(sc.rest) == 0 {
 		sc.error(sc.pos, "internal scanner error: readRune at EOF")
 	}

 	// fast path: ASCII
 	if b := sc.rest[0]; b < utf8.RuneSelf {
 		r := rune(b)
 		sc.rest = sc.rest[1:]
 		if r == '\r' {
 			if len(sc.rest) > 0 && sc.rest[0] == '\n' {
 				sc.rest = sc.rest[1:]
 			}
 			r = '\n'
 		}
 		if r == '\n' {
 			sc.pos.Line++
 			sc.pos.Col = 1
 		} else {
 			sc.pos.Col++
 		}
 		return r
 	}

 	r, size := utf8.DecodeRune(sc.rest)
 	sc.rest = sc.rest[size:]
 	sc.pos.Col++
 	return r
 }

 // tokenValue records the position and value associated with each token.
 type tokenValue struct {
 	raw    string   // raw text of token
 	int    int64    // decoded int
 	float  float64  // decoded float
 	string string   // decoded string
 	pos    Position // start position of token
 	triple bool     // was string triple quoted?
 }

 // startToken marks the beginning of the next input token.
 // It must be followed by a call to endToken once the token has
 // been consumed using readRune.
 func (sc *scanner) startToken(val *tokenValue) {
 	sc.token = sc.rest
 	val.raw = ""
 	val.pos = sc.pos
 }

 // endToken marks the end of an input token.
 // It records the actual token string in val.raw if the caller
 // has not done that already.
 func (sc *scanner) endToken(val *tokenValue) {
 	if val.raw == "" {
 		val.raw = string(sc.token[:len(sc.token)-len(sc.rest)])
 	}
 }

 // nextToken is called by the parser to obtain the next input token.
 // It returns the token value and sets val to the data associated with
 // the token.
 //
 // For all our input tokens, the associated data is val.pos (the
 // position where the token begins), val.raw (the input string
 // corresponding to the token).  For string and int tokens, the string
 // and int fields additionally contain the token's interpreted value.
 func (sc *scanner) nextToken(val *tokenValue) Token {

 	// The following distribution of tokens guides case ordering:
 	//
 	//      COMMA          27   %
 	//      STRING         23   %
 	//      IDENT          15   %
 	//      EQL            11   %
 	//      LBRACK          5.5 %
 	//      RBRACK          5.5 %
 	//      NEWLINE         3   %
 	//      LPAREN          2.9 %
 	//      RPAREN          2.9 %
 	//      INT             2   %
 	//      others        < 1   %
 	//
 	// Although NEWLINE tokens are infrequent, and lineStart is
 	// usually (~97%) false on entry, skipped newlines account for
 	// about 50% of all iterations of the 'start' loop.

 start:
 	var c rune

 	// Deal with leading spaces and indentation.
 	blank := false
 	savedLineStart := sc.lineStart
 	if sc.lineStart {
 		sc.lineStart = false
 		col := 0
 		for {
 			c = sc.peekRune()
 			if c == ' ' {
 				col++
 				sc.readRune()
 			} else if c == '\t' {
 				const tab = 8
 				col += int(tab - (sc.pos.Col-1)%tab)
 				sc.readRune()
 			} else {
 				break
 			}
 		}
 		// The third clause is "trailing spaces without newline at EOF".
 		if c == '#' || c == '\n' || c == 0 && col > 0 {
 			blank = true
 		}

 		// Compute indentation level for non-blank lines not
 		// inside an expression.  This is not the common case.
 		if !blank && sc.depth == 0 {
 			cur := sc.indentstk[len(sc.indentstk)-1]
 			if col > cur {
 				// indent
 				sc.dents++
 				sc.indentstk = append(sc.indentstk, col)
 			} else if col < cur {
 				// dedent(s)
 				for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
 					sc.dents--
 					sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
 				}
 				if col != sc.indentstk[len(sc.indentstk)-1] {
 					sc.error(sc.pos, "unindent does not match any outer indentation level")
 				}
 			}
 		}
 	}

 	// Return saved indentation tokens.
 	if sc.dents != 0 {
 		sc.startToken(val)
 		sc.endToken(val)
 		if sc.dents < 0 {
 			sc.dents++
 			return OUTDENT
 		} else {
 			sc.dents--
 			return INDENT
 		}
 	}

 	// start of line proper
 	c = sc.peekRune()

 	// Skip spaces.
 	for c == ' ' || c == '\t' {
 		sc.readRune()
 		c = sc.peekRune()
 	}

 	// comment
 	if c == '#' {
 		// Consume up to (but not including) newline.
 		for c != 0 && c != '\n' {
 			sc.readRune()
 			c = sc.peekRune()
 		}
 	}

 	// newline
 	if c == '\n' {
 		sc.lineStart = true
 		if blank || sc.depth > 0 {
 			// Ignore blank lines, or newlines within expressions (common case).
 			sc.readRune()
 			goto start
 		}
 		// At top-level (not in an expression).
 		sc.startToken(val)
 		sc.readRune()
 		val.raw = "\n"
 		return NEWLINE
 	}

 	// end of file
 	if c == 0 {
 		// Emit OUTDENTs for unfinished indentation,
 		// preceded by a NEWLINE if we haven't just emitted one.
 		if len(sc.indentstk) > 1 {
 			if savedLineStart {
 				sc.dents = 1 - len(sc.indentstk)
 				sc.indentstk = sc.indentstk[1:]
 				goto start
 			} else {
 				sc.lineStart = true
 				sc.startToken(val)
 				val.raw = "\n"
 				return NEWLINE
 			}
 		}

 		sc.startToken(val)
 		sc.endToken(val)
 		return EOF
 	}

 	// line continuation
 	if c == '\\' {
 		sc.readRune()
 		if sc.peekRune() != '\n' {
 			sc.errorf(sc.pos, "stray backslash in program")
 		}
 		sc.readRune()
 		goto start
 	}

 	// start of the next token
 	sc.startToken(val)

 	// comma (common case)
 	if c == ',' {
 		sc.readRune()
 		sc.endToken(val)
 		return COMMA
 	}

 	// string literal
 	if c == '"' || c == '\'' {
 		return sc.scanString(val, c)
 	}

 	// identifier or keyword
 	if isIdentStart(c) {
 		// raw string literal
 		if c == 'r' && len(sc.rest) > 1 && (sc.rest[1] == '"' || sc.rest[1] == '\'') {
 			sc.readRune()
 			c = sc.peekRune()
 			return sc.scanString(val, c)
 		}

 		for isIdent(c) {
 			sc.readRune()
 			c = sc.peekRune()
 		}
 		sc.endToken(val)
 		if k, ok := keywordToken[val.raw]; ok {
 			return k
 		}

 		return IDENT
 	}

 	// brackets
 	switch c {
 	case '[', '(', '{':
 		sc.depth++
 		sc.readRune()
 		sc.endToken(val)
 		switch c {
 		case '[':
 			return LBRACK
 		case '(':
 			return LPAREN
 		case '{':
 			return LBRACE
 		}
 		panic("unreachable")

 	case ']', ')', '}':
 		if sc.depth == 0 {
 			sc.error(sc.pos, "indentation error")
 		} else {
 			sc.depth--
 		}
 		sc.readRune()
 		sc.endToken(val)
 		switch c {
 		case ']':
 			return RBRACK
 		case ')':
 			return RPAREN
 		case '}':
 			return RBRACE
 		}
 		panic("unreachable")
 	}

 	// int or float literal, or period
 	if isdigit(c) || c == '.' {
 		return sc.scanNumber(val, c)
 	}

 	// other punctuation
 	defer sc.endToken(val)
 	switch c {
 	case '=', '<', '>', '!', '+', '-', '%', '/': // possibly followed by '='
 		start := sc.pos
 		sc.readRune()
 		if sc.peekRune() == '=' {
 			sc.readRune()
 			switch c {
 			case '<':
 				return LE
 			case '>':
 				return GE
 			case '=':
 				return EQL
 			case '!':
 				return NEQ
 			case '+':
 				return PLUS_EQ
 			case '-':
 				return MINUS_EQ
 			case '/':
 				return SLASH_EQ
 			case '%':
 				return PERCENT_EQ
 			}
 		}
 		switch c {
 		case '=':
 			return EQ
 		case '<':
 			return LT
 		case '>':
 			return GT
 		case '!':
 			sc.error(start, "unexpected input character '!'")
 		case '+':
 			return PLUS
 		case '-':
 			return MINUS
 		case '/':
 			if sc.peekRune() == '/' {
 				sc.readRune()
 				if sc.peekRune() == '=' {
 					sc.readRune()
 					return SLASHSLASH_EQ
 				} else {
 					return SLASHSLASH
 				}
 			}
 			return SLASH
 		case '%':
 			return PERCENT
 		}
 		panic("unreachable")

 	case ':', ';', '|', '&': // single-char tokens (except comma)
 		sc.readRune()
 		switch c {
 		case ':':
 			return COLON
 		case ';':
 			return SEMI
 		case '|':
 			return PIPE
 		case '&':
 			return AMP
 		}
 		panic("unreachable")

 	case '*': // possibly followed by '*' or '='
 		sc.readRune()
 		switch sc.peekRune() {
 		case '*':
 			sc.readRune()
 			return STARSTAR
 		case '=':
 			sc.readRune()
 			return STAR_EQ
 		}
 		return STAR
 	}

 	sc.errorf(sc.pos, "unexpected input character %#q", c)
 	panic("unreachable")
 }

 func (sc *scanner) scanString(val *tokenValue, quote rune) Token {
 	start := sc.pos
 	triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
 	sc.readRune()
 	if triple {
 		sc.readRune()
 		sc.readRune()
 	}

 	quoteCount := 0
 	for {
 		if sc.eof() {
 			sc.error(val.pos, "unexpected EOF in string")
 		}
 		c := sc.readRune()
 		if c == '\n' && !triple {
 			sc.error(val.pos, "unexpected newline in string")
 		}
 		if c == quote {
 			quoteCount++
 			if !triple || quoteCount == 3 {
 				break
 			}
 		} else {
 			quoteCount = 0
 		}
 		if c == '\\' {
 			if sc.eof() {
 				sc.error(val.pos, "unexpected EOF in string")
 			}
 			sc.readRune()
 		}
 	}

 	sc.endToken(val)
 	s, _, err := unquote(val.raw)
 	if err != nil {
 		sc.error(start, err.Error())
 	}
 	val.string = s
 	return STRING
 }

 func (sc *scanner) scanNumber(val *tokenValue, c rune) Token {
 	// https://github.com/google/skylark/blob/master/doc/spec.md#lexical-elements
 	//
 	// Python features not supported:
 	// - integer literals of >64 bits of precision
 	// - 123L or 123l long suffix
 	// - traditional octal: 0755
 	// https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals

 	start := sc.pos
 	fraction, exponent := false, false

 	if c == '.' {
 		// dot or start of fraction
 		sc.readRune()
 		c = sc.peekRune()
 		if !isdigit(c) {
 			sc.endToken(val)
 			return DOT
 		}
 		fraction = true
 	} else if c == '0' {
 		// hex, octal, binary or float
 		sc.readRune()
 		c = sc.peekRune()

 		if c == '.' {
 			fraction = true
 		} else if c == 'x' || c == 'X' {
 			// hex
 			sc.readRune()
 			c = sc.peekRune()
 			if !isxdigit(c) {
 				sc.error(start, "invalid hex literal")
 			}
 			for isxdigit(c) {
 				sc.readRune()
 				c = sc.peekRune()
 			}
 		} else if c == 'o' || c == 'O' {
 			// octal
 			sc.readRune()
 			c = sc.peekRune()
 			if !isodigit(c) {
 				sc.error(sc.pos, "invalid octal literal")
 			}
 			for isodigit(c) {
 				sc.readRune()
 				c = sc.peekRune()
 			}
 		} else if c == 'b' || c == 'B' {
 			// binary
 			sc.readRune()
 			c = sc.peekRune()
 			if !isbdigit(c) {
 				sc.error(sc.pos, "invalid binary literal")
 			}
 			for isbdigit(c) {
 				sc.readRune()
 				c = sc.peekRune()
 			}
 		} else {
 			// float (or obsolete octal "0755")
 			allzeros, octal := true, true
 			for isdigit(c) {
 				if c != '0' {
 					allzeros = false
 				}
 				if c > '7' {
 					octal = false
 				}
 				sc.readRune()
 				c = sc.peekRune()
 			}
 			if c == '.' {
 				fraction = true
 			} else if c == 'e' || c == 'E' {
 				exponent = true
 			} else if octal && !allzeros {
 				// We must support old octal until the Java
 				// implementation groks the new one.
 				// TODO(adonovan): reenable the check.
 				if false {
 					sc.endToken(val)
 					sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:])
 				}
 			}
 		}
 	} else {
 		// decimal
 		for isdigit(c) {
 			sc.readRune()
 			c = sc.peekRune()
 		}

 		if c == '.' {
 			fraction = true
 		} else if c == 'e' || c == 'E' {
 			exponent = true
 		}
 	}

 	if fraction {
 		sc.readRune() // consume '.'
 		c = sc.peekRune()
 		for isdigit(c) {
 			sc.readRune()
 			c = sc.peekRune()
 		}

 		if c == 'e' || c == 'E' {
 			exponent = true
 		}
 	}

 	if exponent {
 		sc.readRune() // consume [eE]
 		c = sc.peekRune()
 		if c == '+' || c == '-' {
 			sc.readRune()
 			c = sc.peekRune()
 			if !isdigit(c) {
 				sc.error(sc.pos, "invalid float literal")
 			}
 		}
 		for isdigit(c) {
 			sc.readRune()
 			c = sc.peekRune()
 		}
 	}

 	sc.endToken(val)
 	if fraction || exponent {
 		var err error
 		val.float, err = strconv.ParseFloat(val.raw, 64)
 		if err != nil {
 			sc.error(sc.pos, "invalid float literal")
 		}
 		return FLOAT
 	} else {
 		var err error
 		s := val.raw
 		if len(s) > 2 && s[0] == '0' && (s[1] == 'o' || s[1] == 'O') {
 			val.int, err = strconv.ParseInt(s[2:], 8, 64)
 		} else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B') {
 			val.int, err = strconv.ParseInt(s[2:], 2, 64)
 		} else {
 			val.int, err = strconv.ParseInt(s, 0, 64)
 		}
 		if err != nil {
 			sc.error(start, "invalid int literal")
 		}
 		return INT
 	}
 }

 // isIdent reports whether c is an identifier rune.
 func isIdent(c rune) bool {
 	return isdigit(c) || isIdentStart(c)
 }

 func isIdentStart(c rune) bool {
 	return 'a' <= c && c <= 'z' ||
 		'A' <= c && c <= 'Z' ||
 		c == '_' ||
 		unicode.IsLetter(c)
 }

 func isdigit(c rune) bool  { return '0' <= c && c <= '9' }
 func isodigit(c rune) bool { return '0' <= c && c <= '7' }
 func isxdigit(c rune) bool { return isdigit(c) || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' }
 func isbdigit(c rune) bool { return '0' == c || c == '1' }

 // keywordToken records the special tokens for
 // strings that should not be treated as ordinary identifiers.
 var keywordToken = map[string]Token{
 	"and":      AND,
 	"break":    BREAK,
 	"continue": CONTINUE,
 	"def":      DEF,
 	"elif":     ELIF,
 	"else":     ELSE,
 	"for":      FOR,
 	"if":       IF,
 	"in":       IN,
 	"lambda":   LAMBDA,
 	"load":     LOAD,
 	"not":      NOT,
 	"or":       OR,
 	"pass":     PASS,
 	"return":   RETURN,

 	// reserved words:
 	"as": ILLEGAL,
 	// "assert":   ILLEGAL, // heavily used by our tests
 	"class":    ILLEGAL,
 	"del":      ILLEGAL,
 	"except":   ILLEGAL,
 	"finally":  ILLEGAL,
 	"from":     ILLEGAL,
 	"global":   ILLEGAL,
 	"import":   ILLEGAL,
 	"is":       ILLEGAL,
 	"nonlocal": ILLEGAL,
 	"raise":    ILLEGAL,
 	"try":      ILLEGAL,
 	"while":    ILLEGAL,
 	"with":     ILLEGAL,
 	"yield":    ILLEGAL,
 }
	// Copyright 2017 The Bazel Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package syntax

	// A lexical scanner for Skylark.

	import (
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"strconv"
	"strings"
	"unicode"
	"unicode/utf8"
	)

	// A Token represents a Skylark lexical token.
	type Token int8

	const (
	ILLEGAL Token = iota
	EOF

	NEWLINE
	INDENT
	OUTDENT

	// Tokens with values
	IDENT // x
	INT // 123
	FLOAT // 1.23e45
	STRING // "foo" or 'foo' or '''foo''' or r'foo' or r"foo"

	// Punctuation
	PLUS // +
	MINUS // -
	STAR // *
	SLASH // /
	SLASHSLASH // //
	PERCENT // %
	AMP // &
	PIPE // \|
	DOT // .
	COMMA // ,
	EQ // =
	SEMI // ;
	COLON // :
	LPAREN // (
	RPAREN // )
	LBRACK // [
	RBRACK // ]
	LBRACE // {
	RBRACE // }
	LT // <
	GT // >
	GE // >=
	LE // <=
	EQL // ==
	NEQ // !=
	PLUS_EQ // += (keep order consistent with PLUS..PERCENT)
	MINUS_EQ // -=
	STAR_EQ // *=
	SLASH_EQ // /=
	SLASHSLASH_EQ // //=
	PERCENT_EQ // %=
	STARSTAR // **

	// Keywords
	AND
	BREAK
	CONTINUE
	DEF
	ELIF
	ELSE
	FOR
	IF
	IN
	LAMBDA
	LOAD
	NOT
	NOT_IN // synthesized by parser from NOT IN
	OR
	PASS
	RETURN

	maxToken
	)

	func (tok Token) String() string { return tokenNames[tok] }

	// GoString is like String but quotes punctuation tokens.
	// Use Sprintf("%#v", tok) when constructing error messages.
	func (tok Token) GoString() string {
	if tok >= PLUS && tok <= STARSTAR {
	return "'" + tokenNames[tok] + "'"
	}
	return tokenNames[tok]
	}

	var tokenNames = [...]string{
	ILLEGAL: "illegal token",
	EOF: "end of file",
	NEWLINE: "newline",
	INDENT: "indent",
	OUTDENT: "outdent",
	IDENT: "identifier",
	INT: "int literal",
	FLOAT: "float literal",
	STRING: "string literal",
	PLUS: "+",
	MINUS: "-",
	STAR: "*",
	SLASH: "/",
	SLASHSLASH: "//",
	PERCENT: "%",
	AMP: "&",
	PIPE: "\|",
	DOT: ".",
	COMMA: ",",
	EQ: "=",
	SEMI: ";",
	COLON: ":",
	LPAREN: "(",
	RPAREN: ")",
	LBRACK: "[",
	RBRACK: "]",
	LBRACE: "{",
	RBRACE: "]",
	LT: "<",
	GT: ">",
	GE: ">=",
	LE: "<=",
	EQL: "==",
	NEQ: "!=",
	PLUS_EQ: "+=",
	MINUS_EQ: "-=",
	STAR_EQ: "*=",
	SLASH_EQ: "/=",
	SLASHSLASH_EQ: "//=",
	PERCENT_EQ: "%=",
	STARSTAR: "**",
	AND: "and",
	BREAK: "break",
	CONTINUE: "continue",
	DEF: "def",
	ELIF: "elif",
	ELSE: "else",
	FOR: "for",
	IF: "if",
	IN: "in",
	LAMBDA: "lambda",
	LOAD: "load",
	NOT: "not",
	NOT_IN: "not in",
	OR: "or",
	PASS: "pass",
	RETURN: "return",
	}

	// A Position describes the location of a rune of input.
	type Position struct {
	file *string // filename (indirect for compactness)
	Line int32 // 1-based line number
	Col int32 // 1-based column number (strictly: rune)
	}

	// IsValid reports whether the position is valid.
	func (p Position) IsValid() bool {
	return p.Line >= 1
	}

	// Filename returns the name of the file containing this position.
	func (p Position) Filename() string {
	if p.file != nil {
	return *p.file
	}
	return "<unknown>"
	}

	// add returns the position at the end of s, assuming it starts at p.
	func (p Position) add(s string) Position {
	if n := strings.Count(s, "\n"); n > 0 {
	p.Line += int32(n)
	s = s[strings.LastIndex(s, "\n")+1:]
	p.Col = 1
	}
	p.Col += int32(utf8.RuneCountInString(s))
	return p
	}

	func (p Position) String() string {
	return fmt.Sprintf("%s:%d:%d", p.Filename(), p.Line, p.Col)
	}

	// An scanner represents a single input file being parsed.
	type scanner struct {
	complete []byte // entire input
	rest []byte // rest of input
	token []byte // token being scanned
	pos Position // current input position
	depth int // nesting of [ ] { } ( )
	indentstk []int // stack of indentation levels
	dents int // number of saved INDENT (>0) or OUTDENT (<0) tokens to return
	lineStart bool // after NEWLINE; convert spaces to indentation tokens
	}

	func newScanner(filename string, src interface{}) (*scanner, error) {
	data, err := readSource(filename, src)
	if err != nil {
	return nil, err
	}
	return &scanner{
	complete: data,
	rest: data,
	pos: Position{file: &filename, Line: 1, Col: 1},
	indentstk: make([]int, 1, 10), // []int{0} + spare capacity
	lineStart: true,
	}, nil
	}

	func readSource(filename string, src interface{}) (data []byte, err error) {
	switch src := src.(type) {
	case string:
	data = []byte(src)
	case []byte:
	data = src
	case io.Reader:
	data, err = ioutil.ReadAll(src)
	case nil:
	data, err = ioutil.ReadFile(filename)
	default:
	return nil, fmt.Errorf("invalid source: %T", src)
	}
	if err != nil {
	return nil, fmt.Errorf("reading %s: %s", filename, err)
	}
	return data, nil
	}

	// An Error describes the nature and position of a scanner or parser error.
	type Error struct {
	Pos Position
	Msg string
	}

	func (e Error) Error() string { return e.Pos.String() + ": " + e.Msg }

	// errorf is called to report an error.
	// errorf does not return: it panics.
	func (sc *scanner) error(pos Position, s string) {
	panic(Error{pos, s})
	}

	func (sc *scanner) errorf(pos Position, format string, args ...interface{}) {
	sc.error(pos, fmt.Sprintf(format, args...))
	}

	func (sc scanner) recover(err error) {
	// The scanner and parser panic both for routine errors like
	// syntax errors and for programmer bugs like array index
	// errors. Turn both into error returns. Catching bug panics
	// is especially important when processing many files.
	switch e := recover().(type) {
	case nil:
	// no panic
	case Error:
	*err = e
	default:
	*err = Error{sc.pos, fmt.Sprintf("internal error: %v", e)}
	if debug {
	log.Fatal(*err)
	}
	}
	}

	// eof reports whether the input has reached end of file.
	func (sc *scanner) eof() bool {
	return len(sc.rest) == 0
	}

	// peekRune returns the next rune in the input without consuming it.
	// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
	func (sc *scanner) peekRune() rune {
	if len(sc.rest) == 0 {
	return 0
	}

	// fast path: ASCII
	if b := sc.rest[0]; b < utf8.RuneSelf {
	if b == '\r' {
	return '\n'
	}
	return rune(b)
	}

	r, _ := utf8.DecodeRune(sc.rest)
	return r
	}

	// readRune consumes and returns the next rune in the input.
	// Newlines in Unix, DOS, or Mac format are treated as one rune, '\n'.
	func (sc *scanner) readRune() rune {
	if len(sc.rest) == 0 {
	sc.error(sc.pos, "internal scanner error: readRune at EOF")
	}

	// fast path: ASCII
	if b := sc.rest[0]; b < utf8.RuneSelf {
	r := rune(b)
	sc.rest = sc.rest[1:]
	if r == '\r' {
	if len(sc.rest) > 0 && sc.rest[0] == '\n' {
	sc.rest = sc.rest[1:]
	}
	r = '\n'
	}
	if r == '\n' {
	sc.pos.Line++
	sc.pos.Col = 1
	} else {
	sc.pos.Col++
	}
	return r
	}

	r, size := utf8.DecodeRune(sc.rest)
	sc.rest = sc.rest[size:]
	sc.pos.Col++
	return r
	}

	// tokenValue records the position and value associated with each token.
	type tokenValue struct {
	raw string // raw text of token
	int int64 // decoded int
	float float64 // decoded float
	string string // decoded string
	pos Position // start position of token
	triple bool // was string triple quoted?
	}

	// startToken marks the beginning of the next input token.
	// It must be followed by a call to endToken once the token has
	// been consumed using readRune.
	func (sc scanner) startToken(val tokenValue) {
	sc.token = sc.rest
	val.raw = ""
	val.pos = sc.pos
	}

	// endToken marks the end of an input token.
	// It records the actual token string in val.raw if the caller
	// has not done that already.
	func (sc scanner) endToken(val tokenValue) {
	if val.raw == "" {
	val.raw = string(sc.token[:len(sc.token)-len(sc.rest)])
	}
	}

	// nextToken is called by the parser to obtain the next input token.
	// It returns the token value and sets val to the data associated with
	// the token.
	//
	// For all our input tokens, the associated data is val.pos (the
	// position where the token begins), val.raw (the input string
	// corresponding to the token). For string and int tokens, the string
	// and int fields additionally contain the token's interpreted value.
	func (sc scanner) nextToken(val tokenValue) Token {

	// The following distribution of tokens guides case ordering:
	//
	// COMMA 27 %
	// STRING 23 %
	// IDENT 15 %
	// EQL 11 %
	// LBRACK 5.5 %
	// RBRACK 5.5 %
	// NEWLINE 3 %
	// LPAREN 2.9 %
	// RPAREN 2.9 %
	// INT 2 %
	// others < 1 %
	//
	// Although NEWLINE tokens are infrequent, and lineStart is
	// usually (~97%) false on entry, skipped newlines account for
	// about 50% of all iterations of the 'start' loop.

	start:
	var c rune

	// Deal with leading spaces and indentation.
	blank := false
	savedLineStart := sc.lineStart
	if sc.lineStart {
	sc.lineStart = false
	col := 0
	for {
	c = sc.peekRune()
	if c == ' ' {
	col++
	sc.readRune()
	} else if c == '\t' {
	const tab = 8
	col += int(tab - (sc.pos.Col-1)%tab)
	sc.readRune()
	} else {
	break
	}
	}
	// The third clause is "trailing spaces without newline at EOF".
	if c == '#' \|\| c == '\n' \|\| c == 0 && col > 0 {
	blank = true
	}

	// Compute indentation level for non-blank lines not
	// inside an expression. This is not the common case.
	if !blank && sc.depth == 0 {
	cur := sc.indentstk[len(sc.indentstk)-1]
	if col > cur {
	// indent
	sc.dents++
	sc.indentstk = append(sc.indentstk, col)
	} else if col < cur {
	// dedent(s)
	for len(sc.indentstk) > 0 && col < sc.indentstk[len(sc.indentstk)-1] {
	sc.dents--
	sc.indentstk = sc.indentstk[:len(sc.indentstk)-1] // pop
	}
	if col != sc.indentstk[len(sc.indentstk)-1] {
	sc.error(sc.pos, "unindent does not match any outer indentation level")
	}
	}
	}
	}

	// Return saved indentation tokens.
	if sc.dents != 0 {
	sc.startToken(val)
	sc.endToken(val)
	if sc.dents < 0 {
	sc.dents++
	return OUTDENT
	} else {
	sc.dents--
	return INDENT
	}
	}

	// start of line proper
	c = sc.peekRune()

	// Skip spaces.
	for c == ' ' \|\| c == '\t' {
	sc.readRune()
	c = sc.peekRune()
	}

	// comment
	if c == '#' {
	// Consume up to (but not including) newline.
	for c != 0 && c != '\n' {
	sc.readRune()
	c = sc.peekRune()
	}
	}

	// newline
	if c == '\n' {
	sc.lineStart = true
	if blank \|\| sc.depth > 0 {
	// Ignore blank lines, or newlines within expressions (common case).
	sc.readRune()
	goto start
	}
	// At top-level (not in an expression).
	sc.startToken(val)
	sc.readRune()
	val.raw = "\n"
	return NEWLINE
	}

	// end of file
	if c == 0 {
	// Emit OUTDENTs for unfinished indentation,
	// preceded by a NEWLINE if we haven't just emitted one.
	if len(sc.indentstk) > 1 {
	if savedLineStart {
	sc.dents = 1 - len(sc.indentstk)
	sc.indentstk = sc.indentstk[1:]
	goto start
	} else {
	sc.lineStart = true
	sc.startToken(val)
	val.raw = "\n"
	return NEWLINE
	}
	}

	sc.startToken(val)
	sc.endToken(val)
	return EOF
	}

	// line continuation
	if c == '\\' {
	sc.readRune()
	if sc.peekRune() != '\n' {
	sc.errorf(sc.pos, "stray backslash in program")
	}
	sc.readRune()
	goto start
	}

	// start of the next token
	sc.startToken(val)

	// comma (common case)
	if c == ',' {
	sc.readRune()
	sc.endToken(val)
	return COMMA
	}

	// string literal
	if c == '"' \|\| c == '\'' {
	return sc.scanString(val, c)
	}

	// identifier or keyword
	if isIdentStart(c) {
	// raw string literal
	if c == 'r' && len(sc.rest) > 1 && (sc.rest[1] == '"' \|\| sc.rest[1] == '\'') {
	sc.readRune()
	c = sc.peekRune()
	return sc.scanString(val, c)
	}

	for isIdent(c) {
	sc.readRune()
	c = sc.peekRune()
	}
	sc.endToken(val)
	if k, ok := keywordToken[val.raw]; ok {
	return k
	}

	return IDENT
	}

	// brackets
	switch c {
	case '[', '(', '{':
	sc.depth++
	sc.readRune()
	sc.endToken(val)
	switch c {
	case '[':
	return LBRACK
	case '(':
	return LPAREN
	case '{':
	return LBRACE
	}
	panic("unreachable")

	case ']', ')', '}':
	if sc.depth == 0 {
	sc.error(sc.pos, "indentation error")
	} else {
	sc.depth--
	}
	sc.readRune()
	sc.endToken(val)
	switch c {
	case ']':
	return RBRACK
	case ')':
	return RPAREN
	case '}':
	return RBRACE
	}
	panic("unreachable")
	}

	// int or float literal, or period
	if isdigit(c) \|\| c == '.' {
	return sc.scanNumber(val, c)
	}

	// other punctuation
	defer sc.endToken(val)
	switch c {
	case '=', '<', '>', '!', '+', '-', '%', '/': // possibly followed by '='
	start := sc.pos
	sc.readRune()
	if sc.peekRune() == '=' {
	sc.readRune()
	switch c {
	case '<':
	return LE
	case '>':
	return GE
	case '=':
	return EQL
	case '!':
	return NEQ
	case '+':
	return PLUS_EQ
	case '-':
	return MINUS_EQ
	case '/':
	return SLASH_EQ
	case '%':
	return PERCENT_EQ
	}
	}
	switch c {
	case '=':
	return EQ
	case '<':
	return LT
	case '>':
	return GT
	case '!':
	sc.error(start, "unexpected input character '!'")
	case '+':
	return PLUS
	case '-':
	return MINUS
	case '/':
	if sc.peekRune() == '/' {
	sc.readRune()
	if sc.peekRune() == '=' {
	sc.readRune()
	return SLASHSLASH_EQ
	} else {
	return SLASHSLASH
	}
	}
	return SLASH
	case '%':
	return PERCENT
	}
	panic("unreachable")

	case ':', ';', '\|', '&': // single-char tokens (except comma)
	sc.readRune()
	switch c {
	case ':':
	return COLON
	case ';':
	return SEMI
	case '\|':
	return PIPE
	case '&':
	return AMP
	}
	panic("unreachable")

	case '': // possibly followed by '' or '='
	sc.readRune()
	switch sc.peekRune() {
	case '*':
	sc.readRune()
	return STARSTAR
	case '=':
	sc.readRune()
	return STAR_EQ
	}
	return STAR
	}

	sc.errorf(sc.pos, "unexpected input character %#q", c)
	panic("unreachable")
	}

	func (sc scanner) scanString(val tokenValue, quote rune) Token {
	start := sc.pos
	triple := len(sc.rest) >= 3 && sc.rest[0] == byte(quote) && sc.rest[1] == byte(quote) && sc.rest[2] == byte(quote)
	sc.readRune()
	if triple {
	sc.readRune()
	sc.readRune()
	}

	quoteCount := 0
	for {
	if sc.eof() {
	sc.error(val.pos, "unexpected EOF in string")
	}
	c := sc.readRune()
	if c == '\n' && !triple {
	sc.error(val.pos, "unexpected newline in string")
	}
	if c == quote {
	quoteCount++
	if !triple \|\| quoteCount == 3 {
	break
	}
	} else {
	quoteCount = 0
	}
	if c == '\\' {
	if sc.eof() {
	sc.error(val.pos, "unexpected EOF in string")
	}
	sc.readRune()
	}
	}

	sc.endToken(val)
	s, _, err := unquote(val.raw)
	if err != nil {
	sc.error(start, err.Error())
	}
	val.string = s
	return STRING
	}

	func (sc scanner) scanNumber(val tokenValue, c rune) Token {
	// https://github.com/google/skylark/blob/master/doc/spec.md#lexical-elements
	//
	// Python features not supported:
	// - integer literals of >64 bits of precision
	// - 123L or 123l long suffix
	// - traditional octal: 0755
	// https://docs.python.org/2/reference/lexical_analysis.html#integer-and-long-integer-literals

	start := sc.pos
	fraction, exponent := false, false

	if c == '.' {
	// dot or start of fraction
	sc.readRune()
	c = sc.peekRune()
	if !isdigit(c) {
	sc.endToken(val)
	return DOT
	}
	fraction = true
	} else if c == '0' {
	// hex, octal, binary or float
	sc.readRune()
	c = sc.peekRune()

	if c == '.' {
	fraction = true
	} else if c == 'x' \|\| c == 'X' {
	// hex
	sc.readRune()
	c = sc.peekRune()
	if !isxdigit(c) {
	sc.error(start, "invalid hex literal")
	}
	for isxdigit(c) {
	sc.readRune()
	c = sc.peekRune()
	}
	} else if c == 'o' \|\| c == 'O' {
	// octal
	sc.readRune()
	c = sc.peekRune()
	if !isodigit(c) {
	sc.error(sc.pos, "invalid octal literal")
	}
	for isodigit(c) {
	sc.readRune()
	c = sc.peekRune()
	}
	} else if c == 'b' \|\| c == 'B' {
	// binary
	sc.readRune()
	c = sc.peekRune()
	if !isbdigit(c) {
	sc.error(sc.pos, "invalid binary literal")
	}
	for isbdigit(c) {
	sc.readRune()
	c = sc.peekRune()
	}
	} else {
	// float (or obsolete octal "0755")
	allzeros, octal := true, true
	for isdigit(c) {
	if c != '0' {
	allzeros = false
	}
	if c > '7' {
	octal = false
	}
	sc.readRune()
	c = sc.peekRune()
	}
	if c == '.' {
	fraction = true
	} else if c == 'e' \|\| c == 'E' {
	exponent = true
	} else if octal && !allzeros {
	// We must support old octal until the Java
	// implementation groks the new one.
	// TODO(adonovan): reenable the check.
	if false {
	sc.endToken(val)
	sc.errorf(sc.pos, "obsolete form of octal literal; use 0o%s", val.raw[1:])
	}
	}
	}
	} else {
	// decimal
	for isdigit(c) {
	sc.readRune()
	c = sc.peekRune()
	}

	if c == '.' {
	fraction = true
	} else if c == 'e' \|\| c == 'E' {
	exponent = true
	}
	}

	if fraction {
	sc.readRune() // consume '.'
	c = sc.peekRune()
	for isdigit(c) {
	sc.readRune()
	c = sc.peekRune()
	}

	if c == 'e' \|\| c == 'E' {
	exponent = true
	}
	}

	if exponent {
	sc.readRune() // consume [eE]
	c = sc.peekRune()
	if c == '+' \|\| c == '-' {
	sc.readRune()
	c = sc.peekRune()
	if !isdigit(c) {
	sc.error(sc.pos, "invalid float literal")
	}
	}
	for isdigit(c) {
	sc.readRune()
	c = sc.peekRune()
	}
	}

	sc.endToken(val)
	if fraction \|\| exponent {
	var err error
	val.float, err = strconv.ParseFloat(val.raw, 64)
	if err != nil {
	sc.error(sc.pos, "invalid float literal")
	}
	return FLOAT
	} else {
	var err error
	s := val.raw
	if len(s) > 2 && s[0] == '0' && (s[1] == 'o' \|\| s[1] == 'O') {
	val.int, err = strconv.ParseInt(s[2:], 8, 64)
	} else if len(s) > 2 && s[0] == '0' && (s[1] == 'b' \|\| s[1] == 'B') {
	val.int, err = strconv.ParseInt(s[2:], 2, 64)
	} else {
	val.int, err = strconv.ParseInt(s, 0, 64)
	}
	if err != nil {
	sc.error(start, "invalid int literal")
	}
	return INT
	}
	}

	// isIdent reports whether c is an identifier rune.
	func isIdent(c rune) bool {
	return isdigit(c) \|\| isIdentStart(c)
	}

	func isIdentStart(c rune) bool {
	return 'a' <= c && c <= 'z' \|\|
	'A' <= c && c <= 'Z' \|\|
	c == '_' \|\|
	unicode.IsLetter(c)
	}

	func isdigit(c rune) bool { return '0' <= c && c <= '9' }
	func isodigit(c rune) bool { return '0' <= c && c <= '7' }
	func isxdigit(c rune) bool { return isdigit(c) \|\| 'A' <= c && c <= 'F' \|\| 'a' <= c && c <= 'f' }
	func isbdigit(c rune) bool { return '0' == c \|\| c == '1' }

	// keywordToken records the special tokens for
	// strings that should not be treated as ordinary identifiers.
	var keywordToken = map[string]Token{
	"and": AND,
	"break": BREAK,
	"continue": CONTINUE,
	"def": DEF,
	"elif": ELIF,
	"else": ELSE,
	"for": FOR,
	"if": IF,
	"in": IN,
	"lambda": LAMBDA,
	"load": LOAD,
	"not": NOT,
	"or": OR,
	"pass": PASS,
	"return": RETURN,

	// reserved words:
	"as": ILLEGAL,
	// "assert": ILLEGAL, // heavily used by our tests
	"class": ILLEGAL,
	"del": ILLEGAL,
	"except": ILLEGAL,
	"finally": ILLEGAL,
	"from": ILLEGAL,
	"global": ILLEGAL,
	"import": ILLEGAL,
	"is": ILLEGAL,
	"nonlocal": ILLEGAL,
	"raise": ILLEGAL,
	"try": ILLEGAL,
	"while": ILLEGAL,
	"with": ILLEGAL,
	"yield": ILLEGAL,
	}