int: reduce allocation by representing small ints as pointers (#280) * int: consolidate field accessors This change defines low-level accessors for the small and big arms of the int union so that the representation can be easily changed in a follow-up. Change-Id: I7c4ae279a6d2e7b76e102ba5d01a3cd1c56fb368 * int: improve performance by avoiding allocation This change defines a new representation for Int on 64-bit machines running a POSIX operating system, by reserving a 4GB portion of the address space. Pointers to addresses in this region represent int32 values, and are disjoint from all *big.Int pointers, allowing us to represent the union in a single pointer. This means the conversion from Int to Value does not allocate. The gauss benchmark (added in this CL) shows -40% ns, -48% bytes, -63% allocs: Benchmark/bench_gauss-12 84 13648744 ns/op 3175816 B/op 105862 allocs/op (before) Benchmark/bench_gauss-12 55 24283703 ns/op 6119844 B/op 289862 allocs/op (after) On 32-bit machines, or those running a non-POSIX system, we continue to use the old representation.

commit: c6daab680f283fdf16a79c99d8e86a1562400761 [log] [tgz]
author: alandonovan <adonovan@google.com> Wed Jun 17 14:27:56 2020 -0400
committer: GitHub <noreply@github.com> Wed Jun 17 14:27:56 2020 -0400
tree: 3a69918222b2653c9f7fd98f2ed22b846b6a1756
parent: fd77f1382f3580e9991b1b20a9985b14dd670dda [diff]
diff --git a/go.mod b/go.mod
index 2e64352..a9577a6 100644
--- a/go.mod
+++ b/go.mod

@@ -6,5 +6,5 @@
 	github.com/chzyer/logex v1.1.10 // indirect
 	github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
 	github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect
-	golang.org/x/sys v0.0.0-20191002063906-3421d5a6bb1c // indirect
+	golang.org/x/sys v0.0.0-20191002063906-3421d5a6bb1c
 )

diff --git a/starlark/int.go b/starlark/int.go
index 35bd42b..c40532a 100644
--- a/starlark/int.go
+++ b/starlark/int.go

@@ -14,27 +14,11 @@
 )
 
 // Int is the type of a Starlark int.
-type Int struct {
-	// We use only the signed 32 bit range of small to ensure
-	// that small+small and small*small do not overflow.
+//
+// The zero value is not a legal value; use MakeInt(0).
+type Int struct{ impl intImpl }
 
-	small int64    // minint32 <= small <= maxint32
-	big   *big.Int // big != nil <=> value is not representable as int32
-}
-
-// newBig allocates a new big.Int.
-func newBig(x int64) *big.Int {
-	if 0 <= x && int64(big.Word(x)) == x {
-		// x is guaranteed to fit into a single big.Word.
-		// Most starlark ints are small,
-		// but math/big assumes that since you've chosen to use math/big,
-		// your big.Ints will probably grow, so it over-allocates.
-		// Avoid that over-allocation by manually constructing a single-word slice.
-		// See https://golang.org/cl/150999, which will hopefully land in Go 1.13.
-		return new(big.Int).SetBits([]big.Word{big.Word(x)})
-	}
-	return big.NewInt(x)
-}
+// --- high-level accessors ---
 
 // MakeInt returns a Starlark int for the specified signed integer.
 func MakeInt(x int) Int { return MakeInt64(int64(x)) }
@@ -42,9 +26,9 @@
 // MakeInt64 returns a Starlark int for the specified int64.
 func MakeInt64(x int64) Int {
 	if math.MinInt32 <= x && x <= math.MaxInt32 {
-		return Int{small: x}
+		return makeSmallInt(x)
 	}
-	return Int{big: newBig(x)}
+	return makeBigInt(big.NewInt(x))
 }
 
 // MakeUint returns a Starlark int for the specified unsigned integer.
@@ -53,27 +37,23 @@
 // MakeUint64 returns a Starlark int for the specified uint64.
 func MakeUint64(x uint64) Int {
 	if x <= math.MaxInt32 {
-		return Int{small: int64(x)}
+		return makeSmallInt(int64(x))
 	}
-	if uint64(big.Word(x)) == x {
-		// See comment in newBig for an explanation of this optimization.
-		return Int{big: new(big.Int).SetBits([]big.Word{big.Word(x)})}
-	}
-	return Int{big: new(big.Int).SetUint64(x)}
+	return makeBigInt(new(big.Int).SetUint64(x))
 }
 
 // MakeBigInt returns a Starlark int for the specified big.Int.
 // The caller must not subsequently modify x.
 func MakeBigInt(x *big.Int) Int {
 	if n := x.BitLen(); n < 32 || n == 32 && x.Int64() == math.MinInt32 {
-		return Int{small: x.Int64()}
+		return makeSmallInt(x.Int64())
 	}
-	return Int{big: x}
+	return makeBigInt(x)
 }
 
 var (
-	zero, one = Int{small: 0}, Int{small: 1}
-	oneBig    = newBig(1)
+	zero, one = makeSmallInt(0), makeSmallInt(1)
+	oneBig    = big.NewInt(1)
 
 	_ HasUnary = Int{}
 )
@@ -94,39 +74,42 @@
 // Int64 returns the value as an int64.
 // If it is not exactly representable the result is undefined and ok is false.
 func (i Int) Int64() (_ int64, ok bool) {
-	if i.big != nil {
-		x, acc := bigintToInt64(i.big)
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		x, acc := bigintToInt64(iBig)
 		if acc != big.Exact {
 			return // inexact
 		}
 		return x, true
 	}
-	return i.small, true
+	return iSmall, true
 }
 
 // BigInt returns the value as a big.Int.
 // The returned variable must not be modified by the client.
 func (i Int) BigInt() *big.Int {
-	if i.big != nil {
-		return i.big
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		return iBig
 	}
-	return newBig(i.small)
+	return big.NewInt(iSmall)
 }
 
 // Uint64 returns the value as a uint64.
 // If it is not exactly representable the result is undefined and ok is false.
 func (i Int) Uint64() (_ uint64, ok bool) {
-	if i.big != nil {
-		x, acc := bigintToUint64(i.big)
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		x, acc := bigintToUint64(iBig)
 		if acc != big.Exact {
 			return // inexact
 		}
 		return x, true
 	}
-	if i.small < 0 {
+	if iSmall < 0 {
 		return // inexact
 	}
-	return uint64(i.small), true
+	return uint64(iSmall), true
 }
 
 // The math/big API should provide this function.
@@ -163,103 +146,125 @@
 )
 
 func (i Int) Format(s fmt.State, ch rune) {
-	if i.big != nil {
-		i.big.Format(s, ch)
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		iBig.Format(s, ch)
 		return
 	}
-	newBig(i.small).Format(s, ch)
+	big.NewInt(iSmall).Format(s, ch)
 }
 func (i Int) String() string {
-	if i.big != nil {
-		return i.big.Text(10)
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		return iBig.Text(10)
 	}
-	return strconv.FormatInt(i.small, 10)
+	return strconv.FormatInt(iSmall, 10)
 }
 func (i Int) Type() string { return "int" }
 func (i Int) Freeze()      {} // immutable
 func (i Int) Truth() Bool  { return i.Sign() != 0 }
 func (i Int) Hash() (uint32, error) {
+	iSmall, iBig := i.get()
 	var lo big.Word
-	if i.big != nil {
-		lo = i.big.Bits()[0]
+	if iBig != nil {
+		lo = iBig.Bits()[0]
 	} else {
-		lo = big.Word(i.small)
+		lo = big.Word(iSmall)
 	}
 	return 12582917 * uint32(lo+3), nil
 }
 func (x Int) CompareSameType(op syntax.Token, v Value, depth int) (bool, error) {
 	y := v.(Int)
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		return threeway(op, x.BigInt().Cmp(y.BigInt())), nil
 	}
-	return threeway(op, signum64(x.small-y.small)), nil
+	return threeway(op, signum64(xSmall-ySmall)), nil
 }
 
 // Float returns the float value nearest i.
 func (i Int) Float() Float {
-	if i.big != nil {
-		f, _ := new(big.Float).SetInt(i.big).Float64()
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		f, _ := new(big.Float).SetInt(iBig).Float64()
 		return Float(f)
 	}
-	return Float(i.small)
+	return Float(iSmall)
 }
 
 func (x Int) Sign() int {
-	if x.big != nil {
-		return x.big.Sign()
+	xSmall, xBig := x.get()
+	if xBig != nil {
+		return xBig.Sign()
 	}
-	return signum64(x.small)
+	return signum64(xSmall)
 }
 
 func (x Int) Add(y Int) Int {
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		return MakeBigInt(new(big.Int).Add(x.BigInt(), y.BigInt()))
 	}
-	return MakeInt64(x.small + y.small)
+	return MakeInt64(xSmall + ySmall)
 }
 func (x Int) Sub(y Int) Int {
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		return MakeBigInt(new(big.Int).Sub(x.BigInt(), y.BigInt()))
 	}
-	return MakeInt64(x.small - y.small)
+	return MakeInt64(xSmall - ySmall)
 }
 func (x Int) Mul(y Int) Int {
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		return MakeBigInt(new(big.Int).Mul(x.BigInt(), y.BigInt()))
 	}
-	return MakeInt64(x.small * y.small)
+	return MakeInt64(xSmall * ySmall)
 }
 func (x Int) Or(y Int) Int {
-	if x.big != nil || y.big != nil {
-		return Int{big: new(big.Int).Or(x.BigInt(), y.BigInt())}
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
+		return MakeBigInt(new(big.Int).Or(x.BigInt(), y.BigInt()))
 	}
-	return Int{small: x.small | y.small}
+	return makeSmallInt(xSmall | ySmall)
 }
 func (x Int) And(y Int) Int {
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		return MakeBigInt(new(big.Int).And(x.BigInt(), y.BigInt()))
 	}
-	return Int{small: x.small & y.small}
+	return makeSmallInt(xSmall & ySmall)
 }
 func (x Int) Xor(y Int) Int {
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		return MakeBigInt(new(big.Int).Xor(x.BigInt(), y.BigInt()))
 	}
-	return Int{small: x.small ^ y.small}
+	return makeSmallInt(xSmall ^ ySmall)
 }
 func (x Int) Not() Int {
-	if x.big != nil {
-		return MakeBigInt(new(big.Int).Not(x.big))
+	xSmall, xBig := x.get()
+	if xBig != nil {
+		return MakeBigInt(new(big.Int).Not(xBig))
 	}
-	return Int{small: ^x.small}
+	return makeSmallInt(^xSmall)
 }
 func (x Int) Lsh(y uint) Int { return MakeBigInt(new(big.Int).Lsh(x.BigInt(), y)) }
 func (x Int) Rsh(y uint) Int { return MakeBigInt(new(big.Int).Rsh(x.BigInt(), y)) }
 
 // Precondition: y is nonzero.
 func (x Int) Div(y Int) Int {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
 	// http://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html
-	if x.big != nil || y.big != nil {
+	if xBig != nil || yBig != nil {
 		xb, yb := x.BigInt(), y.BigInt()
 
 		var quo, rem big.Int
@@ -269,9 +274,9 @@
 		}
 		return MakeBigInt(&quo)
 	}
-	quo := x.small / y.small
-	rem := x.small % y.small
-	if (x.small < 0) != (y.small < 0) && rem != 0 {
+	quo := xSmall / ySmall
+	rem := xSmall % ySmall
+	if (xSmall < 0) != (ySmall < 0) && rem != 0 {
 		quo -= 1
 	}
 	return MakeInt64(quo)
@@ -279,7 +284,9 @@
 
 // Precondition: y is nonzero.
 func (x Int) Mod(y Int) Int {
-	if x.big != nil || y.big != nil {
+	xSmall, xBig := x.get()
+	ySmall, yBig := y.get()
+	if xBig != nil || yBig != nil {
 		xb, yb := x.BigInt(), y.BigInt()
 
 		var quo, rem big.Int
@@ -289,18 +296,19 @@
 		}
 		return MakeBigInt(&rem)
 	}
-	rem := x.small % y.small
-	if (x.small < 0) != (y.small < 0) && rem != 0 {
-		rem += y.small
+	rem := xSmall % ySmall
+	if (xSmall < 0) != (ySmall < 0) && rem != 0 {
+		rem += ySmall
 	}
-	return Int{small: rem}
+	return makeSmallInt(rem)
 }
 
 func (i Int) rational() *big.Rat {
-	if i.big != nil {
-		return new(big.Rat).SetInt(i.big)
+	iSmall, iBig := i.get()
+	if iBig != nil {
+		return new(big.Rat).SetInt(iBig)
 	}
-	return new(big.Rat).SetInt64(i.small)
+	return new(big.Rat).SetInt64(iSmall)
 }
 
 // AsInt32 returns the value of x if is representable as an int32.
@@ -309,10 +317,11 @@
 	if !ok {
 		return 0, fmt.Errorf("got %s, want int", x.Type())
 	}
-	if i.big != nil {
+	iSmall, iBig := i.get()
+	if iBig != nil {
 		return 0, fmt.Errorf("%s out of range", i)
 	}
-	return int(i.small), nil
+	return int(iSmall), nil
 }
 
 // NumberToInt converts a number x to an integer value.

diff --git a/starlark/int_generic.go b/starlark/int_generic.go
new file mode 100644
index 0000000..a472a4c
--- /dev/null
+++ b/starlark/int_generic.go

@@ -0,0 +1,33 @@
+//+build !linux,!darwin !amd64,!arm64,!mips64x,!ppc64x
+
+package starlark
+
+// generic Int implementation as a union
+
+import "math/big"
+
+type intImpl struct {
+	// We use only the signed 32-bit range of small to ensure
+	// that small+small and small*small do not overflow.
+	small_ int64    // minint32 <= small <= maxint32
+	big_   *big.Int // big != nil <=> value is not representable as int32
+}
+
+// --- low-level accessors ---
+
+// get returns the small and big components of the Int.
+// small is defined only if big is nil.
+// small is sign-extended to 64 bits for ease of subsequent arithmetic.
+func (i Int) get() (small int64, big *big.Int) {
+	return i.small_, i.big_
+}
+
+// Precondition: math.MinInt32 <= x && x <= math.MaxInt32
+func makeSmallInt(x int64) Int {
+	return Int{intImpl{small_: x}}
+}
+
+// Precondition: x cannot be represented as int32.
+func makeBigInt(x *big.Int) Int {
+	return Int{intImpl{big_: x}}
+}

diff --git a/starlark/int_posix64.go b/starlark/int_posix64.go
new file mode 100644
index 0000000..9e979d3
--- /dev/null
+++ b/starlark/int_posix64.go

@@ -0,0 +1,59 @@
+//+build linux darwin
+//+build amd64 arm64 mips64x ppc64x
+
+package starlark
+
+// This file defines an optimized Int implementation for 64-bit machines
+// running POSIX. It reserves a 4GB portion of the address space using
+// mmap and represents int32 values as addresses within that range. This
+// disambiguates int32 values from *big.Int pointers, letting all Int
+// values be represented as an unsafe.Pointer, so that Int-to-Value
+// interface conversion need not allocate.
+
+import (
+	"log"
+	"math"
+	"math/big"
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+)
+
+// intImpl represents a union of (int32, *big.Int) in a single pointer,
+// so that Int-to-Value conversions need not allocate.
+//
+// The pointer is either a *big.Int, if the value is big, or a pointer into a
+// reserved portion of the address space (smallints), if the value is small.
+//
+// See int_generic.go for the basic representation concepts.
+type intImpl unsafe.Pointer
+
+// get returns the (small, big) arms of the union.
+func (i Int) get() (int64, *big.Int) {
+	ptr := uintptr(i.impl)
+	if ptr >= smallints && ptr < smallints+1<<32 {
+		return math.MinInt32 + int64(ptr-smallints), nil
+	}
+	return 0, (*big.Int)(i.impl)
+}
+
+// Precondition: math.MinInt32 <= x && x <= math.MaxInt32
+func makeSmallInt(x int64) Int {
+	return Int{intImpl(uintptr(x-math.MinInt32) + smallints)}
+}
+
+// Precondition: x cannot be represented as int32.
+func makeBigInt(x *big.Int) Int { return Int{intImpl(x)} }
+
+// smallints is the base address of a 2^32 byte memory region.
+// Pointers to addresses in this region represent int32 values.
+// We assume smallints is not at the very top of the address space.
+var smallints = reserveAddresses(1 << 32)
+
+func reserveAddresses(len int) uintptr {
+	b, err := unix.Mmap(-1, 0, len, unix.PROT_READ, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
+	if err != nil {
+		log.Fatalf("mmap: %v", err)
+	}
+	return uintptr(unsafe.Pointer(&b[0]))
+}

diff --git a/starlark/int_test.go b/starlark/int_test.go
index 985bc38..6725616 100644
--- a/starlark/int_test.go
+++ b/starlark/int_test.go

@@ -56,16 +56,17 @@
 		if got := fmt.Sprintf("%x", test.val); got != test.want {
 			t.Errorf("%d equals %s, want %s", i, got, test.want)
 		}
-		if test.val.small < math.MinInt32 || math.MaxInt32 < test.val.small {
+		small, big := test.val.get()
+		if small < math.MinInt32 || math.MaxInt32 < small {
 			t.Errorf("expected big, %d %s", i, test.val)
 		}
-		if test.val.big == nil {
+		if big == nil {
 			continue
 		}
-		if test.val.small != 0 {
-			t.Errorf("expected 0 small, %d %s with %d", i, test.val, test.val.small)
+		if small != 0 {
+			t.Errorf("expected 0 small, %d %s with %d", i, test.val, small)
 		}
-		if b := test.val.big; b.Cmp(left) >= 0 && b.Cmp(right) <= 0 {
+		if big.Cmp(left) >= 0 && big.Cmp(right) <= 0 {
 			t.Errorf("expected small, %d %s", i, test.val)
 		}
 	}

diff --git a/starlark/testdata/benchmark.star b/starlark/testdata/benchmark.star
index 2fc6f02..18a1a2c 100644
--- a/starlark/testdata/benchmark.star
+++ b/starlark/testdata/benchmark.star

@@ -32,6 +32,12 @@
         a += 1
 
 def bench_bigint():
-    a = 1 << 31 # maxint32 + 1
+    a = 1 << 31  # maxint32 + 1
     for _ in range1000:
         a += 1
+
+def bench_gauss():
+    # Sum of arithmetic series. All results fit in int32.
+    acc = 0
+    for x in range(92000):
+        acc += x
commit	c6daab680f283fdf16a79c99d8e86a1562400761	[log] [tgz]
author	alandonovan <adonovan@google.com>	Wed Jun 17 14:27:56 2020 -0400
committer	GitHub <noreply@github.com>	Wed Jun 17 14:27:56 2020 -0400
tree	3a69918222b2653c9f7fd98f2ed22b846b6a1756
parent	fd77f1382f3580e9991b1b20a9985b14dd670dda [diff]