blob: 241f15240f24bd7c64ad48e484f32edb0d8af500 [file] [log] [blame]
// Copyright (C) 2014 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parse
import (
"bytes"
"unicode"
"unicode/utf8"
)
// Reader is the interface to an object that converts a rune array into tokens.
type Reader struct {
runes []rune // The string being parsed.
offset int // The start of the current token.
cursor int // The offset of the next unparsed rune.
}
func (r *Reader) setData(data string) {
r.runes = bytes.Runes([]byte(data))
}
// Token peeks at the current scanned token value. It does not consume anything.
func (r *Reader) Token() Token {
return Token{Runes: r.runes, Start: r.offset, End: r.cursor}
}
// Consume consumes the current token.
func (r *Reader) Consume() Token {
tok := r.Token()
r.offset = r.cursor
return tok
}
// Advance moves the cursor one rune forward.
func (r *Reader) Advance() {
if r.cursor < len(r.runes) {
r.cursor++
}
}
// Rollback sets the cursor back to the last consume point.
func (r *Reader) Rollback() {
r.cursor = r.offset
}
// IsEOF returns true when the cursor is at the end of the input.
func (r *Reader) IsEOF() bool {
return r.cursor >= len(r.runes)
}
// GuessNextToken attempts to do a general purpose consume of a single
// arbitrary token from the stream. It is used by error handlers to indicate
// where the error occurred. It guarantees that if the stream is not finished,
// it will consume at least one character.
func (r *Reader) GuessNextToken() Token {
if r.cursor == r.offset {
r.Space()
switch {
case r.AlphaNumeric():
case r.Numeric() != NotNumeric:
case r.NotSpace():
default:
r.Advance()
}
}
return r.Consume()
}
// Peek returns the next rune without advancing the cursor.
func (r *Reader) Peek() rune {
if r.cursor >= len(r.runes) {
return utf8.RuneError
}
return r.runes[r.cursor]
}
// Rune advances and returns true if the next rune after the cursor matches value.
func (r *Reader) Rune(value rune) bool {
if r.cursor >= len(r.runes) || r.runes[r.cursor] != value {
return false
}
r.cursor++
return true
}
// SeekRune advances the cursor until either the value is found or the end
// of stream is reached.
// It returns true if it found value, false otherwise.
func (r *Reader) SeekRune(value rune) bool {
for i := r.cursor; i < len(r.runes); i++ {
if r.runes[i] == value {
r.cursor = i
return true
}
}
return false
}
// String checks to see if value occurs at cursor, if it does, it advances the
// cursor past it and returns true.
func (r *Reader) String(value string) bool {
end := r.cursor + len(value)
if end > len(r.runes) {
return false
}
for i, v := range value {
if r.runes[r.cursor+i] != v {
return false
}
}
r.cursor = end
return true
}
// Space skips over any whitespace, returning true if it advanced the cursor.
func (r *Reader) Space() bool {
i := r.cursor
for ; i < len(r.runes); i++ {
r := r.runes[i]
if r == RuneEOL || !unicode.IsSpace(r) {
break
}
}
if i == r.cursor {
return false
}
r.cursor = i
return true
}
// Space skips over any non whitespace, returning true if it advanced the cursor.
func (r *Reader) NotSpace() bool {
i := r.cursor
for ; i < len(r.runes); i++ {
if unicode.IsSpace(r.runes[i]) {
break
}
}
if i == r.cursor {
return false
}
r.cursor = i
return true
}
// NumberKind is a type used by Reader.Numeric for identifying various kinds of numbers.
type NumberKind uint8
const (
// No number was found.
NotNumeric NumberKind = iota
// A decimal number.
Decimal
// An octal number, starting with "0". PS: A lone "0" is classified as octal.
Octal
// A hexadecimal number, starting with "0x".
Hexadecimal
// A floating point number: "123.456". Whole and the fractional parts are optional (but
// not both at the same time).
Floating
// A floating point number in scientific notation: "123.456e±789". The fractional part,
// the dot and the exponent sign are all optional.
Scientific
atDot // Internally used to represent the state after reading ".".
atE // Internally used to represent the state after reading "e".
atESign // Internally used to represent the state after reading "e±".
)
// Numeric tries to move past the common number pattern. It returns a constant of type NumberKind
// describing the kind of number it found.
func (r *Reader) Numeric() NumberKind {
state := NotNumeric
i := r.cursor
for {
var next = '?'
if i < len(r.runes) {
next = unicode.ToLower(r.runes[i])
}
i++
switch state {
case NotNumeric:
switch {
case next == '0':
state = Octal
case next >= '1' && next <= '9':
state = Decimal
case next == '.':
state = atDot
default:
return NotNumeric // We have read nothing
}
case Decimal:
switch {
case next >= '0' && next <= '9': // do nothing
case next == '.':
state = atDot
case next == 'e':
state = atE
case next == 'u':
r.cursor = i
return Decimal
default:
r.cursor = i - 1
return Decimal
}
case Octal:
switch {
case next >= '0' && next <= '7': // do nothing
case next == 'x':
state = Hexadecimal
case next == 'u':
r.cursor = i
return Octal
case next == '.' && i == r.cursor+2: // We have read "0."
state = atDot
default:
r.cursor = i - 1
return Octal
}
case Hexadecimal:
switch {
case (next >= '0' && next <= '9') || (next >= 'a' && next <= 'f'): // do nothing
case next == 'u':
r.cursor = i
return Hexadecimal
default:
r.cursor = i - 1
return Hexadecimal
}
case atDot:
switch {
case next >= '0' && next <= '9':
state = Floating
case i > r.cursor+2: // There is at least one digit before the dot.
if next == 'e' {
state = atE
} else {
r.cursor = i - 1
return Floating
}
default:
return NotNumeric // We have only read ".". This is bad.
}
case Floating:
switch {
case next >= '0' && next <= '9': // do nothing
case next == 'e':
state = atE
default:
r.cursor = i - 1
return Floating
}
case atE:
switch {
case next >= '0' && next <= '9':
state = Scientific
case next == '+' || next == '-':
state = atESign
default:
return NotNumeric // We need at least one digit after "e"
}
case atESign:
switch {
case next >= '0' && next <= '9':
state = Scientific
default:
return NotNumeric // We need at least one digit after "e±"
}
case Scientific:
switch {
case next >= '0' && next <= '9': // do nothing
default:
r.cursor = i - 1
return Scientific
}
}
}
}
// AlphaNumeric moves past anything that starts with a letter or underscore,
// and consists of letters, numbers or underscores. It returns true if the
// pattern was matched, false otherwise.
func (r *Reader) AlphaNumeric() bool {
i := r.cursor
if i >= len(r.runes) {
return false
}
next := r.runes[r.cursor]
if next == '_' || unicode.IsLetter(next) {
for i++; i < len(r.runes); i++ {
next := r.runes[i]
if next != '_' && !unicode.IsLetter(next) && !unicode.IsDigit(next) {
break
}
}
}
if i == r.cursor {
return false
}
r.cursor = i
return true
}
// NewReader creates a new reader which reads from the supplied string.
func NewReader(data string) *Reader {
r := &Reader{}
r.setData(data)
return r
}