int: reduce allocation by representing small ints as pointers (#280)
* int: consolidate field accessors
This change defines low-level accessors for the small and big
arms of the int union so that the representation can be easily
changed in a follow-up.
Change-Id: I7c4ae279a6d2e7b76e102ba5d01a3cd1c56fb368
* int: improve performance by avoiding allocation
This change defines a new representation for Int on 64-bit machines
running a POSIX operating system, by reserving a 4GB portion of the
address space.
Pointers to addresses in this region represent int32 values,
and are disjoint from all *big.Int pointers,
allowing us to represent the union in a single pointer.
This means the conversion from Int to Value does not allocate.
The gauss benchmark (added in this CL) shows -40% ns, -48% bytes, -63% allocs:
Benchmark/bench_gauss-12 84 13648744 ns/op 3175816 B/op 105862 allocs/op (before)
Benchmark/bench_gauss-12 55 24283703 ns/op 6119844 B/op 289862 allocs/op (after)
On 32-bit machines, or those running a non-POSIX system,
we continue to use the old representation.
diff --git a/go.mod b/go.mod
index 2e64352..a9577a6 100644
--- a/go.mod
+++ b/go.mod
@@ -6,5 +6,5 @@
github.com/chzyer/logex v1.1.10 // indirect
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect
- golang.org/x/sys v0.0.0-20191002063906-3421d5a6bb1c // indirect
+ golang.org/x/sys v0.0.0-20191002063906-3421d5a6bb1c
)
diff --git a/starlark/int.go b/starlark/int.go
index 35bd42b..c40532a 100644
--- a/starlark/int.go
+++ b/starlark/int.go
@@ -14,27 +14,11 @@
)
// Int is the type of a Starlark int.
-type Int struct {
- // We use only the signed 32 bit range of small to ensure
- // that small+small and small*small do not overflow.
+//
+// The zero value is not a legal value; use MakeInt(0).
+type Int struct{ impl intImpl }
- small int64 // minint32 <= small <= maxint32
- big *big.Int // big != nil <=> value is not representable as int32
-}
-
-// newBig allocates a new big.Int.
-func newBig(x int64) *big.Int {
- if 0 <= x && int64(big.Word(x)) == x {
- // x is guaranteed to fit into a single big.Word.
- // Most starlark ints are small,
- // but math/big assumes that since you've chosen to use math/big,
- // your big.Ints will probably grow, so it over-allocates.
- // Avoid that over-allocation by manually constructing a single-word slice.
- // See https://golang.org/cl/150999, which will hopefully land in Go 1.13.
- return new(big.Int).SetBits([]big.Word{big.Word(x)})
- }
- return big.NewInt(x)
-}
+// --- high-level accessors ---
// MakeInt returns a Starlark int for the specified signed integer.
func MakeInt(x int) Int { return MakeInt64(int64(x)) }
@@ -42,9 +26,9 @@
// MakeInt64 returns a Starlark int for the specified int64.
func MakeInt64(x int64) Int {
if math.MinInt32 <= x && x <= math.MaxInt32 {
- return Int{small: x}
+ return makeSmallInt(x)
}
- return Int{big: newBig(x)}
+ return makeBigInt(big.NewInt(x))
}
// MakeUint returns a Starlark int for the specified unsigned integer.
@@ -53,27 +37,23 @@
// MakeUint64 returns a Starlark int for the specified uint64.
func MakeUint64(x uint64) Int {
if x <= math.MaxInt32 {
- return Int{small: int64(x)}
+ return makeSmallInt(int64(x))
}
- if uint64(big.Word(x)) == x {
- // See comment in newBig for an explanation of this optimization.
- return Int{big: new(big.Int).SetBits([]big.Word{big.Word(x)})}
- }
- return Int{big: new(big.Int).SetUint64(x)}
+ return makeBigInt(new(big.Int).SetUint64(x))
}
// MakeBigInt returns a Starlark int for the specified big.Int.
// The caller must not subsequently modify x.
func MakeBigInt(x *big.Int) Int {
if n := x.BitLen(); n < 32 || n == 32 && x.Int64() == math.MinInt32 {
- return Int{small: x.Int64()}
+ return makeSmallInt(x.Int64())
}
- return Int{big: x}
+ return makeBigInt(x)
}
var (
- zero, one = Int{small: 0}, Int{small: 1}
- oneBig = newBig(1)
+ zero, one = makeSmallInt(0), makeSmallInt(1)
+ oneBig = big.NewInt(1)
_ HasUnary = Int{}
)
@@ -94,39 +74,42 @@
// Int64 returns the value as an int64.
// If it is not exactly representable the result is undefined and ok is false.
func (i Int) Int64() (_ int64, ok bool) {
- if i.big != nil {
- x, acc := bigintToInt64(i.big)
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ x, acc := bigintToInt64(iBig)
if acc != big.Exact {
return // inexact
}
return x, true
}
- return i.small, true
+ return iSmall, true
}
// BigInt returns the value as a big.Int.
// The returned variable must not be modified by the client.
func (i Int) BigInt() *big.Int {
- if i.big != nil {
- return i.big
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return iBig
}
- return newBig(i.small)
+ return big.NewInt(iSmall)
}
// Uint64 returns the value as a uint64.
// If it is not exactly representable the result is undefined and ok is false.
func (i Int) Uint64() (_ uint64, ok bool) {
- if i.big != nil {
- x, acc := bigintToUint64(i.big)
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ x, acc := bigintToUint64(iBig)
if acc != big.Exact {
return // inexact
}
return x, true
}
- if i.small < 0 {
+ if iSmall < 0 {
return // inexact
}
- return uint64(i.small), true
+ return uint64(iSmall), true
}
// The math/big API should provide this function.
@@ -163,103 +146,125 @@
)
func (i Int) Format(s fmt.State, ch rune) {
- if i.big != nil {
- i.big.Format(s, ch)
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ iBig.Format(s, ch)
return
}
- newBig(i.small).Format(s, ch)
+ big.NewInt(iSmall).Format(s, ch)
}
func (i Int) String() string {
- if i.big != nil {
- return i.big.Text(10)
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return iBig.Text(10)
}
- return strconv.FormatInt(i.small, 10)
+ return strconv.FormatInt(iSmall, 10)
}
func (i Int) Type() string { return "int" }
func (i Int) Freeze() {} // immutable
func (i Int) Truth() Bool { return i.Sign() != 0 }
func (i Int) Hash() (uint32, error) {
+ iSmall, iBig := i.get()
var lo big.Word
- if i.big != nil {
- lo = i.big.Bits()[0]
+ if iBig != nil {
+ lo = iBig.Bits()[0]
} else {
- lo = big.Word(i.small)
+ lo = big.Word(iSmall)
}
return 12582917 * uint32(lo+3), nil
}
func (x Int) CompareSameType(op syntax.Token, v Value, depth int) (bool, error) {
y := v.(Int)
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
return threeway(op, x.BigInt().Cmp(y.BigInt())), nil
}
- return threeway(op, signum64(x.small-y.small)), nil
+ return threeway(op, signum64(xSmall-ySmall)), nil
}
// Float returns the float value nearest i.
func (i Int) Float() Float {
- if i.big != nil {
- f, _ := new(big.Float).SetInt(i.big).Float64()
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ f, _ := new(big.Float).SetInt(iBig).Float64()
return Float(f)
}
- return Float(i.small)
+ return Float(iSmall)
}
func (x Int) Sign() int {
- if x.big != nil {
- return x.big.Sign()
+ xSmall, xBig := x.get()
+ if xBig != nil {
+ return xBig.Sign()
}
- return signum64(x.small)
+ return signum64(xSmall)
}
func (x Int) Add(y Int) Int {
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
return MakeBigInt(new(big.Int).Add(x.BigInt(), y.BigInt()))
}
- return MakeInt64(x.small + y.small)
+ return MakeInt64(xSmall + ySmall)
}
func (x Int) Sub(y Int) Int {
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
return MakeBigInt(new(big.Int).Sub(x.BigInt(), y.BigInt()))
}
- return MakeInt64(x.small - y.small)
+ return MakeInt64(xSmall - ySmall)
}
func (x Int) Mul(y Int) Int {
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
return MakeBigInt(new(big.Int).Mul(x.BigInt(), y.BigInt()))
}
- return MakeInt64(x.small * y.small)
+ return MakeInt64(xSmall * ySmall)
}
func (x Int) Or(y Int) Int {
- if x.big != nil || y.big != nil {
- return Int{big: new(big.Int).Or(x.BigInt(), y.BigInt())}
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
+ return MakeBigInt(new(big.Int).Or(x.BigInt(), y.BigInt()))
}
- return Int{small: x.small | y.small}
+ return makeSmallInt(xSmall | ySmall)
}
func (x Int) And(y Int) Int {
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
return MakeBigInt(new(big.Int).And(x.BigInt(), y.BigInt()))
}
- return Int{small: x.small & y.small}
+ return makeSmallInt(xSmall & ySmall)
}
func (x Int) Xor(y Int) Int {
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
return MakeBigInt(new(big.Int).Xor(x.BigInt(), y.BigInt()))
}
- return Int{small: x.small ^ y.small}
+ return makeSmallInt(xSmall ^ ySmall)
}
func (x Int) Not() Int {
- if x.big != nil {
- return MakeBigInt(new(big.Int).Not(x.big))
+ xSmall, xBig := x.get()
+ if xBig != nil {
+ return MakeBigInt(new(big.Int).Not(xBig))
}
- return Int{small: ^x.small}
+ return makeSmallInt(^xSmall)
}
func (x Int) Lsh(y uint) Int { return MakeBigInt(new(big.Int).Lsh(x.BigInt(), y)) }
func (x Int) Rsh(y uint) Int { return MakeBigInt(new(big.Int).Rsh(x.BigInt(), y)) }
// Precondition: y is nonzero.
func (x Int) Div(y Int) Int {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
// http://python-history.blogspot.com/2010/08/why-pythons-integer-division-floors.html
- if x.big != nil || y.big != nil {
+ if xBig != nil || yBig != nil {
xb, yb := x.BigInt(), y.BigInt()
var quo, rem big.Int
@@ -269,9 +274,9 @@
}
return MakeBigInt(&quo)
}
- quo := x.small / y.small
- rem := x.small % y.small
- if (x.small < 0) != (y.small < 0) && rem != 0 {
+ quo := xSmall / ySmall
+ rem := xSmall % ySmall
+ if (xSmall < 0) != (ySmall < 0) && rem != 0 {
quo -= 1
}
return MakeInt64(quo)
@@ -279,7 +284,9 @@
// Precondition: y is nonzero.
func (x Int) Mod(y Int) Int {
- if x.big != nil || y.big != nil {
+ xSmall, xBig := x.get()
+ ySmall, yBig := y.get()
+ if xBig != nil || yBig != nil {
xb, yb := x.BigInt(), y.BigInt()
var quo, rem big.Int
@@ -289,18 +296,19 @@
}
return MakeBigInt(&rem)
}
- rem := x.small % y.small
- if (x.small < 0) != (y.small < 0) && rem != 0 {
- rem += y.small
+ rem := xSmall % ySmall
+ if (xSmall < 0) != (ySmall < 0) && rem != 0 {
+ rem += ySmall
}
- return Int{small: rem}
+ return makeSmallInt(rem)
}
func (i Int) rational() *big.Rat {
- if i.big != nil {
- return new(big.Rat).SetInt(i.big)
+ iSmall, iBig := i.get()
+ if iBig != nil {
+ return new(big.Rat).SetInt(iBig)
}
- return new(big.Rat).SetInt64(i.small)
+ return new(big.Rat).SetInt64(iSmall)
}
// AsInt32 returns the value of x if is representable as an int32.
@@ -309,10 +317,11 @@
if !ok {
return 0, fmt.Errorf("got %s, want int", x.Type())
}
- if i.big != nil {
+ iSmall, iBig := i.get()
+ if iBig != nil {
return 0, fmt.Errorf("%s out of range", i)
}
- return int(i.small), nil
+ return int(iSmall), nil
}
// NumberToInt converts a number x to an integer value.
diff --git a/starlark/int_generic.go b/starlark/int_generic.go
new file mode 100644
index 0000000..a472a4c
--- /dev/null
+++ b/starlark/int_generic.go
@@ -0,0 +1,33 @@
+//+build !linux,!darwin !amd64,!arm64,!mips64x,!ppc64x
+
+package starlark
+
+// generic Int implementation as a union
+
+import "math/big"
+
+type intImpl struct {
+ // We use only the signed 32-bit range of small to ensure
+ // that small+small and small*small do not overflow.
+ small_ int64 // minint32 <= small <= maxint32
+ big_ *big.Int // big != nil <=> value is not representable as int32
+}
+
+// --- low-level accessors ---
+
+// get returns the small and big components of the Int.
+// small is defined only if big is nil.
+// small is sign-extended to 64 bits for ease of subsequent arithmetic.
+func (i Int) get() (small int64, big *big.Int) {
+ return i.small_, i.big_
+}
+
+// Precondition: math.MinInt32 <= x && x <= math.MaxInt32
+func makeSmallInt(x int64) Int {
+ return Int{intImpl{small_: x}}
+}
+
+// Precondition: x cannot be represented as int32.
+func makeBigInt(x *big.Int) Int {
+ return Int{intImpl{big_: x}}
+}
diff --git a/starlark/int_posix64.go b/starlark/int_posix64.go
new file mode 100644
index 0000000..9e979d3
--- /dev/null
+++ b/starlark/int_posix64.go
@@ -0,0 +1,59 @@
+//+build linux darwin
+//+build amd64 arm64 mips64x ppc64x
+
+package starlark
+
+// This file defines an optimized Int implementation for 64-bit machines
+// running POSIX. It reserves a 4GB portion of the address space using
+// mmap and represents int32 values as addresses within that range. This
+// disambiguates int32 values from *big.Int pointers, letting all Int
+// values be represented as an unsafe.Pointer, so that Int-to-Value
+// interface conversion need not allocate.
+
+import (
+ "log"
+ "math"
+ "math/big"
+ "unsafe"
+
+ "golang.org/x/sys/unix"
+)
+
+// intImpl represents a union of (int32, *big.Int) in a single pointer,
+// so that Int-to-Value conversions need not allocate.
+//
+// The pointer is either a *big.Int, if the value is big, or a pointer into a
+// reserved portion of the address space (smallints), if the value is small.
+//
+// See int_generic.go for the basic representation concepts.
+type intImpl unsafe.Pointer
+
+// get returns the (small, big) arms of the union.
+func (i Int) get() (int64, *big.Int) {
+ ptr := uintptr(i.impl)
+ if ptr >= smallints && ptr < smallints+1<<32 {
+ return math.MinInt32 + int64(ptr-smallints), nil
+ }
+ return 0, (*big.Int)(i.impl)
+}
+
+// Precondition: math.MinInt32 <= x && x <= math.MaxInt32
+func makeSmallInt(x int64) Int {
+ return Int{intImpl(uintptr(x-math.MinInt32) + smallints)}
+}
+
+// Precondition: x cannot be represented as int32.
+func makeBigInt(x *big.Int) Int { return Int{intImpl(x)} }
+
+// smallints is the base address of a 2^32 byte memory region.
+// Pointers to addresses in this region represent int32 values.
+// We assume smallints is not at the very top of the address space.
+var smallints = reserveAddresses(1 << 32)
+
+func reserveAddresses(len int) uintptr {
+ b, err := unix.Mmap(-1, 0, len, unix.PROT_READ, unix.MAP_PRIVATE|unix.MAP_ANONYMOUS)
+ if err != nil {
+ log.Fatalf("mmap: %v", err)
+ }
+ return uintptr(unsafe.Pointer(&b[0]))
+}
diff --git a/starlark/int_test.go b/starlark/int_test.go
index 985bc38..6725616 100644
--- a/starlark/int_test.go
+++ b/starlark/int_test.go
@@ -56,16 +56,17 @@
if got := fmt.Sprintf("%x", test.val); got != test.want {
t.Errorf("%d equals %s, want %s", i, got, test.want)
}
- if test.val.small < math.MinInt32 || math.MaxInt32 < test.val.small {
+ small, big := test.val.get()
+ if small < math.MinInt32 || math.MaxInt32 < small {
t.Errorf("expected big, %d %s", i, test.val)
}
- if test.val.big == nil {
+ if big == nil {
continue
}
- if test.val.small != 0 {
- t.Errorf("expected 0 small, %d %s with %d", i, test.val, test.val.small)
+ if small != 0 {
+ t.Errorf("expected 0 small, %d %s with %d", i, test.val, small)
}
- if b := test.val.big; b.Cmp(left) >= 0 && b.Cmp(right) <= 0 {
+ if big.Cmp(left) >= 0 && big.Cmp(right) <= 0 {
t.Errorf("expected small, %d %s", i, test.val)
}
}
diff --git a/starlark/testdata/benchmark.star b/starlark/testdata/benchmark.star
index 2fc6f02..18a1a2c 100644
--- a/starlark/testdata/benchmark.star
+++ b/starlark/testdata/benchmark.star
@@ -32,6 +32,12 @@
a += 1
def bench_bigint():
- a = 1 << 31 # maxint32 + 1
+ a = 1 << 31 # maxint32 + 1
for _ in range1000:
a += 1
+
+def bench_gauss():
+ # Sum of arithmetic series. All results fit in int32.
+ acc = 0
+ for x in range(92000):
+ acc += x