blob: fc5d53ff0fb01e54e8d03bd38012269dc26dcbdf [file] [log] [blame]
// Copyright 2020 The Bazel Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package starlarkjson defines utilities for converting Starlark values
// to/from JSON strings. The most recent IETF standard for JSON is
// https://www.ietf.org/rfc/rfc7159.txt.
package starlarkjson // import "go.starlark.net/starlarkjson"
import (
"bytes"
"encoding/json"
"fmt"
"log"
"math"
"math/big"
"sort"
"strconv"
"strings"
"unicode/utf8"
"go.starlark.net/starlark"
"go.starlark.net/starlarkstruct"
)
// Module json is a Starlark module of JSON-related functions.
//
// json = module(
// encode,
// decode,
// indent,
// )
//
// def encode(x):
//
// The encode function accepts one required positional argument,
// which it converts to JSON by cases:
// - A Starlark value that implements Go's standard json.Marshal
// interface defines its own JSON encoding.
// - None, True, and False are converted to null, true, and false, respectively.
// - Starlark int values, no matter how large, are encoded as decimal integers.
// Some decoders may not be able to decode very large integers.
// - Starlark float values are encoded using decimal point notation,
// even if the value is an integer.
// It is an error to encode a non-finite floating-point value.
// - Starlark strings are encoded as JSON strings, using UTF-16 escapes.
// - a Starlark IterableMapping (e.g. dict) is encoded as a JSON object.
// It is an error if any key is not a string.
// - any other Starlark Iterable (e.g. list, tuple) is encoded as a JSON array.
// - a Starlark HasAttrs (e.g. struct) is encoded as a JSON object.
// It an application-defined type matches more than one the cases describe above,
// (e.g. it implements both Iterable and HasFields), the first case takes precedence.
// Encoding any other value yields an error.
//
// def decode(x):
//
// The decode function accepts one positional parameter, a JSON string.
// It returns the Starlark value that the string denotes.
// - Numbers are parsed as int or float, depending on whether they
// contain a decimal point.
// - JSON objects are parsed as new unfrozen Starlark dicts.
// - JSON arrays are parsed as new unfrozen Starlark lists.
// Decoding fails if x is not a valid JSON string.
//
// def indent(str, *, prefix="", indent="\t"):
//
// The indent function pretty-prints a valid JSON encoding,
// and returns a string containing the indented form.
// It accepts one required positional parameter, the JSON string,
// and two optional keyword-only string parameters, prefix and indent,
// that specify a prefix of each new line, and the unit of indentation.
//
var Module = &starlarkstruct.Module{
Name: "json",
Members: starlark.StringDict{
"encode": starlark.NewBuiltin("json.encode", encode),
"decode": starlark.NewBuiltin("json.decode", decode),
"indent": starlark.NewBuiltin("json.indent", indent),
},
}
func encode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
var x starlark.Value
if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &x); err != nil {
return nil, err
}
buf := new(bytes.Buffer)
var quoteSpace [128]byte
quote := func(s string) {
// Non-trivial escaping is handled by Go's encoding/json.
if isPrintableASCII(s) {
buf.Write(strconv.AppendQuote(quoteSpace[:0], s))
} else {
// TODO(adonovan): opt: RFC 8259 mandates UTF-8 for JSON.
// Can we avoid this call?
data, _ := json.Marshal(s)
buf.Write(data)
}
}
var emit func(x starlark.Value) error
emit = func(x starlark.Value) error {
switch x := x.(type) {
case json.Marshaler:
// Application-defined starlark.Value types
// may define their own JSON encoding.
data, err := x.MarshalJSON()
if err != nil {
return err
}
buf.Write(data)
case starlark.NoneType:
buf.WriteString("null")
case starlark.Bool:
if x {
buf.WriteString("true")
} else {
buf.WriteString("false")
}
case starlark.Int:
fmt.Fprint(buf, x)
case starlark.Float:
if !isFinite(float64(x)) {
return fmt.Errorf("cannot encode non-finite float %v", x)
}
fmt.Fprintf(buf, "%g", x) // always contains a decimal point
case starlark.String:
quote(string(x))
case starlark.IterableMapping:
// e.g. dict (must have string keys)
buf.WriteByte('{')
items := x.Items()
for _, item := range items {
if _, ok := item[0].(starlark.String); !ok {
return fmt.Errorf("%s has %s key, want string", x.Type(), item[0].Type())
}
}
sort.Slice(items, func(i, j int) bool {
return items[i][0].(starlark.String) < items[j][0].(starlark.String)
})
for i, item := range items {
if i > 0 {
buf.WriteByte(',')
}
k, _ := starlark.AsString(item[0])
quote(k)
buf.WriteByte(':')
if err := emit(item[1]); err != nil {
return fmt.Errorf("in %s key %s: %v", x.Type(), item[0], err)
}
}
buf.WriteByte('}')
case starlark.Iterable:
// e.g. tuple, list
buf.WriteByte('[')
iter := x.Iterate()
defer iter.Done()
var elem starlark.Value
for i := 0; iter.Next(&elem); i++ {
if i > 0 {
buf.WriteByte(',')
}
if err := emit(elem); err != nil {
return fmt.Errorf("at %s index %d: %v", x.Type(), i, err)
}
}
buf.WriteByte(']')
case starlark.HasAttrs:
// e.g. struct
buf.WriteByte('{')
var names []string
names = append(names, x.AttrNames()...)
sort.Strings(names)
for i, name := range names {
v, err := x.Attr(name)
if err != nil || v == nil {
log.Fatalf("internal error: dir(%s) includes %q but value has no .%s field", x.Type(), name, name)
}
if i > 0 {
buf.WriteByte(',')
}
quote(name)
buf.WriteByte(':')
if err := emit(v); err != nil {
return fmt.Errorf("in field .%s: %v", name, err)
}
}
buf.WriteByte('}')
default:
return fmt.Errorf("cannot encode %s as JSON", x.Type())
}
return nil
}
if err := emit(x); err != nil {
return nil, fmt.Errorf("%s: %v", b.Name(), err)
}
return starlark.String(buf.String()), nil
}
// isPrintableASCII reports whether s contains only printable ASCII.
func isPrintableASCII(s string) bool {
for i := 0; i < len(s); i++ {
b := s[i]
if b < 0x20 || b >= 0x80 {
return false
}
}
return true
}
// isFinite reports whether f represents a finite rational value.
// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0).
func isFinite(f float64) bool {
return math.Abs(f) <= math.MaxFloat64
}
func indent(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
prefix, indent := "", "\t" // keyword-only
if err := starlark.UnpackArgs(b.Name(), nil, kwargs,
"prefix?", &prefix,
"indent?", &indent,
); err != nil {
return nil, err
}
var str string // positional-only
if err := starlark.UnpackPositionalArgs(b.Name(), args, nil, 1, &str); err != nil {
return nil, err
}
buf := new(bytes.Buffer)
if err := json.Indent(buf, []byte(str), prefix, indent); err != nil {
return nil, fmt.Errorf("%s: %v", b.Name(), err)
}
return starlark.String(buf.String()), nil
}
func decode(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (_ starlark.Value, err error) {
var s string
if err := starlark.UnpackPositionalArgs(b.Name(), args, kwargs, 1, &s); err != nil {
return nil, err
}
// The decoder necessarily makes certain representation choices
// such as list vs tuple, struct vs dict, int vs float.
// In principle, we could parameterize it to allow the caller to
// control the returned types, but there's no compelling need yet.
// Use panic/recover with a distinguished type (failure) for error handling.
type failure string
fail := func(format string, args ...interface{}) {
panic(failure(fmt.Sprintf(format, args...)))
}
i := 0
// skipSpace consumes leading spaces, and reports whether there is more input.
skipSpace := func() bool {
for ; i < len(s); i++ {
b := s[i]
if b != ' ' && b != '\t' && b != '\n' && b != '\r' {
return true
}
}
return false
}
// next consumes leading spaces and returns the first non-space.
// It panics if at EOF.
next := func() byte {
if skipSpace() {
return s[i]
}
fail("unexpected end of file")
panic("unreachable")
}
// parse returns the next JSON value from the input.
// It consumes leading but not trailing whitespace.
// It panics on error.
var parse func() starlark.Value
parse = func() starlark.Value {
b := next()
switch b {
case '"':
// string
// Find end of quotation.
// Also, record whether trivial unquoting is safe.
// Non-trivial unquoting is handled by Go's encoding/json.
safe := true
closed := false
j := i + 1
for ; j < len(s); j++ {
b := s[j]
if b == '\\' {
safe = false
j++ // skip x in \x
} else if b == '"' {
closed = true
j++ // skip '"'
break
} else if b >= utf8.RuneSelf {
safe = false
}
}
if !closed {
fail("unclosed string literal")
}
r := s[i:j]
i = j
// unquote
if safe {
r = r[1 : len(r)-1]
} else if err := json.Unmarshal([]byte(r), &r); err != nil {
fail("%s", err)
}
return starlark.String(r)
case 'n':
if strings.HasPrefix(s[i:], "null") {
i += len("null")
return starlark.None
}
case 't':
if strings.HasPrefix(s[i:], "true") {
i += len("true")
return starlark.True
}
case 'f':
if strings.HasPrefix(s[i:], "false") {
i += len("false")
return starlark.False
}
case '[':
// array
var elems []starlark.Value
i++ // '['
b = next()
if b != ']' {
for {
elem := parse()
elems = append(elems, elem)
b = next()
if b != ',' {
if b != ']' {
fail("got %q, want ',' or ']'", b)
}
break
}
i++ // ','
}
}
i++ // ']'
return starlark.NewList(elems)
case '{':
// object
dict := new(starlark.Dict)
i++ // '{'
b = next()
if b != '}' {
for {
key := parse()
if _, ok := key.(starlark.String); !ok {
fail("got %s for object key, want string", key.Type())
}
b = next()
if b != ':' {
fail("after object key, got %q, want ':' ", b)
}
i++ // ':'
value := parse()
dict.SetKey(key, value) // can't fail
b = next()
if b != ',' {
if b != '}' {
fail("in object, got %q, want ',' or '}'", b)
}
break
}
i++ // ','
}
}
i++ // '}'
return dict
default:
// number?
if isdigit(b) || b == '-' {
// scan literal. Allow [0-9+-eE.] for now.
float := false
var j int
for j = i + 1; j < len(s); j++ {
b = s[j]
if isdigit(b) {
// ok
} else if b == '.' ||
b == 'e' ||
b == 'E' ||
b == '+' ||
b == '-' {
float = true
} else {
break
}
}
num := s[i:j]
i = j
// Unlike most C-like languages,
// JSON disallows a leading zero before a digit.
digits := num
if num[0] == '-' {
digits = num[1:]
}
if digits == "" || digits[0] == '0' && len(digits) > 1 && isdigit(digits[1]) {
fail("invalid number: %s", num)
}
// parse literal
if float {
x, err := strconv.ParseFloat(num, 64)
if err != nil {
fail("invalid number: %s", num)
}
return starlark.Float(x)
} else {
x, ok := new(big.Int).SetString(num, 10)
if !ok {
fail("invalid number: %s", num)
}
return starlark.MakeBigInt(x)
}
}
}
fail("unexpected character %q", b)
panic("unreachable")
}
defer func() {
x := recover()
switch x := x.(type) {
case failure:
err = fmt.Errorf("json.decode: at offset %d, %s", i, x)
case nil:
// nop
default:
panic(x) // unexpected panic
}
}()
x := parse()
if skipSpace() {
fail("unexpected character %q after value", s[i])
}
return x, nil
}
func isdigit(b byte) bool {
return b >= '0' && b <= '9'
}