| // Copyright 2017 syzkaller project authors. All rights reserved. |
| // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. |
| |
| package ast |
| |
| import ( |
| "fmt" |
| "os" |
| "strconv" |
| ) |
| |
| type token int |
| |
| const ( |
| tokIllegal token = iota |
| tokComment |
| tokIdent |
| tokInclude |
| tokIncdir |
| tokDefine |
| tokResource |
| tokString |
| tokCExpr |
| tokInt |
| |
| tokNewLine |
| tokLParen |
| tokRParen |
| tokLBrack |
| tokRBrack |
| tokLBrace |
| tokRBrace |
| tokEq |
| tokComma |
| tokColon |
| |
| tokEOF |
| ) |
| |
| var punctuation = [256]token{ |
| '\n': tokNewLine, |
| '(': tokLParen, |
| ')': tokRParen, |
| '[': tokLBrack, |
| ']': tokRBrack, |
| '{': tokLBrace, |
| '}': tokRBrace, |
| '=': tokEq, |
| ',': tokComma, |
| ':': tokColon, |
| } |
| |
| var tok2str = [...]string{ |
| tokIllegal: "ILLEGAL", |
| tokComment: "comment", |
| tokIdent: "identifier", |
| tokInclude: "include", |
| tokIncdir: "incdir", |
| tokDefine: "define", |
| tokResource: "resource", |
| tokString: "string", |
| tokCExpr: "CEXPR", |
| tokInt: "int", |
| tokNewLine: "NEWLINE", |
| tokEOF: "EOF", |
| } |
| |
| func init() { |
| for ch, tok := range punctuation { |
| if tok == tokIllegal { |
| continue |
| } |
| tok2str[tok] = fmt.Sprintf("%q", ch) |
| } |
| } |
| |
| var keywords = map[string]token{ |
| "include": tokInclude, |
| "incdir": tokIncdir, |
| "define": tokDefine, |
| "resource": tokResource, |
| } |
| |
| func (tok token) String() string { |
| return tok2str[tok] |
| } |
| |
| type scanner struct { |
| data []byte |
| filename string |
| errorHandler ErrorHandler |
| |
| ch byte |
| off int |
| line int |
| col int |
| |
| prev1 token |
| prev2 token |
| |
| errors int |
| } |
| |
| func newScanner(data []byte, filename string, errorHandler ErrorHandler) *scanner { |
| if errorHandler == nil { |
| errorHandler = LoggingHandler |
| } |
| s := &scanner{ |
| data: data, |
| filename: filename, |
| errorHandler: errorHandler, |
| off: -1, |
| } |
| s.next() |
| return s |
| } |
| |
| type ErrorHandler func(pos Pos, msg string) |
| |
| func LoggingHandler(pos Pos, msg string) { |
| fmt.Fprintf(os.Stderr, "%v: %v\n", pos, msg) |
| } |
| |
| func (pos Pos) String() string { |
| return fmt.Sprintf("%v:%v:%v", pos.File, pos.Line, pos.Col) |
| } |
| |
| func (s *scanner) Scan() (tok token, lit string, pos Pos) { |
| s.skipWhitespace() |
| pos = s.pos() |
| switch { |
| case s.ch == 0: |
| tok = tokEOF |
| s.next() |
| case s.ch == '`': |
| tok = tokCExpr |
| lit = s.scanCExpr(pos) |
| case s.prev2 == tokDefine && s.prev1 == tokIdent: |
| // Note: the old form for C expressions, not really lexable. |
| // TODO(dvyukov): get rid of this eventually. |
| tok = tokCExpr |
| for ; s.ch != '\n'; s.next() { |
| } |
| lit = string(s.data[pos.Off:s.off]) |
| case s.ch == '#': |
| tok = tokComment |
| for s.next(); s.ch != '\n'; s.next() { |
| } |
| lit = string(s.data[pos.Off+1 : s.off]) |
| case s.ch == '"' || s.ch == '<': |
| tok = tokString |
| lit = s.scanStr(pos) |
| case s.ch >= '0' && s.ch <= '9' || s.ch == '-': |
| tok = tokInt |
| lit = s.scanInt(pos) |
| case s.ch == '\'': |
| tok = tokInt |
| lit = s.scanChar(pos) |
| case s.ch == '_' || s.ch >= 'a' && s.ch <= 'z' || s.ch >= 'A' && s.ch <= 'Z': |
| tok, lit = s.scanIdent(pos) |
| default: |
| tok = punctuation[s.ch] |
| if tok == tokIllegal { |
| s.Error(pos, "illegal character %#U", s.ch) |
| } |
| s.next() |
| } |
| s.prev2 = s.prev1 |
| s.prev1 = tok |
| return |
| } |
| |
| func (s *scanner) scanCExpr(pos Pos) string { |
| for s.next(); s.ch != '`' && s.ch != '\n'; s.next() { |
| } |
| if s.ch == '\n' { |
| s.Error(pos, "C expression is not terminated") |
| return "" |
| } |
| lit := string(s.data[pos.Off+1 : s.off]) |
| s.next() |
| return lit |
| } |
| |
| func (s *scanner) scanStr(pos Pos) string { |
| // TODO(dvyukov): get rid of <...> strings, that's only includes |
| closing := byte('"') |
| if s.ch == '<' { |
| closing = '>' |
| } |
| for s.next(); s.ch != closing; s.next() { |
| if s.ch == 0 || s.ch == '\n' { |
| s.Error(pos, "string literal is not terminated") |
| return "" |
| } |
| } |
| lit := string(s.data[pos.Off+1 : s.off]) |
| for i := 0; i < len(lit); i++ { |
| //lit[i] |
| if lit[i] < 0x20 || lit[i] >= 0x80 { |
| pos1 := pos |
| pos1.Col += i + 1 |
| pos1.Off += i + 1 |
| s.Error(pos1, "illegal character %#U in string literal", lit[i]) |
| break |
| } |
| } |
| s.next() |
| return lit |
| } |
| |
| func (s *scanner) scanInt(pos Pos) string { |
| for s.ch >= '0' && s.ch <= '9' || |
| s.ch >= 'a' && s.ch <= 'f' || |
| s.ch >= 'A' && s.ch <= 'F' || |
| s.ch == 'x' || s.ch == '-' { |
| s.next() |
| } |
| lit := string(s.data[pos.Off:s.off]) |
| if _, err := strconv.ParseUint(lit, 10, 64); err == nil { |
| return lit |
| } |
| if len(lit) > 1 && lit[0] == '-' { |
| if _, err := strconv.ParseInt(lit, 10, 64); err == nil { |
| return lit |
| } |
| } |
| if len(lit) > 2 && lit[0] == '0' && lit[1] == 'x' { |
| if _, err := strconv.ParseUint(lit[2:], 16, 64); err == nil { |
| return lit |
| } |
| } |
| s.Error(pos, fmt.Sprintf("bad integer %q", lit)) |
| return "0" |
| } |
| |
| func (s *scanner) scanChar(pos Pos) string { |
| s.next() |
| s.next() |
| if s.ch != '\'' { |
| s.Error(pos, "char literal is not terminated") |
| return "0" |
| } |
| s.next() |
| return string(s.data[pos.Off : pos.Off+3]) |
| } |
| |
| func (s *scanner) scanIdent(pos Pos) (tok token, lit string) { |
| tok = tokIdent |
| for s.ch == '_' || s.ch == '$' || |
| s.ch >= 'a' && s.ch <= 'z' || |
| s.ch >= 'A' && s.ch <= 'Z' || |
| s.ch >= '0' && s.ch <= '9' { |
| s.next() |
| } |
| lit = string(s.data[pos.Off:s.off]) |
| if key, ok := keywords[lit]; ok { |
| tok = key |
| } |
| return |
| } |
| |
| func (s *scanner) Error(pos Pos, msg string, args ...interface{}) { |
| s.errors++ |
| s.errorHandler(pos, fmt.Sprintf(msg, args...)) |
| } |
| |
| func (s *scanner) Ok() bool { |
| return s.errors == 0 |
| } |
| |
| func (s *scanner) next() { |
| s.off++ |
| for s.off < len(s.data) && s.data[s.off] == '\r' { |
| s.off++ |
| } |
| if s.off == len(s.data) { |
| // Always emit NEWLINE before EOF. |
| // Makes lots of things simpler as we always |
| // want to treat EOF as NEWLINE as well. |
| s.ch = '\n' |
| s.off++ |
| return |
| } |
| if s.off > len(s.data) { |
| s.ch = 0 |
| return |
| } |
| if s.off == 0 || s.data[s.off-1] == '\n' { |
| s.line++ |
| s.col = 0 |
| } |
| s.ch = s.data[s.off] |
| s.col++ |
| if s.ch == 0 { |
| s.Error(s.pos(), "illegal character \\x00") |
| } |
| } |
| |
| func (s *scanner) skipWhitespace() { |
| for s.ch == ' ' || s.ch == '\t' { |
| s.next() |
| } |
| } |
| |
| func (s *scanner) pos() Pos { |
| return Pos{ |
| File: s.filename, |
| Off: s.off, |
| Line: s.line, |
| Col: s.col, |
| } |
| } |