blob: 36ebe18b82f3fd29b01318bf413423e124382a1d [file] [log] [blame]
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package inlheur
import (
"cmd/compile/internal/ir"
"cmd/compile/internal/pgo"
"cmd/compile/internal/typecheck"
"fmt"
"os"
"strings"
)
type callSiteAnalyzer struct {
fn *ir.Func
*nameFinder
}
type callSiteTableBuilder struct {
fn *ir.Func
*nameFinder
cstab CallSiteTab
ptab map[ir.Node]pstate
nstack []ir.Node
loopNest int
isInit bool
}
func makeCallSiteAnalyzer(fn *ir.Func) *callSiteAnalyzer {
return &callSiteAnalyzer{
fn: fn,
nameFinder: newNameFinder(fn),
}
}
func makeCallSiteTableBuilder(fn *ir.Func, cstab CallSiteTab, ptab map[ir.Node]pstate, loopNestingLevel int, nf *nameFinder) *callSiteTableBuilder {
isInit := fn.IsPackageInit() || strings.HasPrefix(fn.Sym().Name, "init.")
return &callSiteTableBuilder{
fn: fn,
cstab: cstab,
ptab: ptab,
isInit: isInit,
loopNest: loopNestingLevel,
nstack: []ir.Node{fn},
nameFinder: nf,
}
}
// computeCallSiteTable builds and returns a table of call sites for
// the specified region in function fn. A region here corresponds to a
// specific subtree within the AST for a function. The main intended
// use cases are for 'region' to be either A) an entire function body,
// or B) an inlined call expression.
func computeCallSiteTable(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, ptab map[ir.Node]pstate, loopNestingLevel int, nf *nameFinder) CallSiteTab {
cstb := makeCallSiteTableBuilder(fn, cstab, ptab, loopNestingLevel, nf)
var doNode func(ir.Node) bool
doNode = func(n ir.Node) bool {
cstb.nodeVisitPre(n)
ir.DoChildren(n, doNode)
cstb.nodeVisitPost(n)
return false
}
for _, n := range region {
doNode(n)
}
return cstb.cstab
}
func (cstb *callSiteTableBuilder) flagsForNode(call *ir.CallExpr) CSPropBits {
var r CSPropBits
if debugTrace&debugTraceCalls != 0 {
fmt.Fprintf(os.Stderr, "=-= analyzing call at %s\n",
fmtFullPos(call.Pos()))
}
// Set a bit if this call is within a loop.
if cstb.loopNest > 0 {
r |= CallSiteInLoop
}
// Set a bit if the call is within an init function (either
// compiler-generated or user-written).
if cstb.isInit {
r |= CallSiteInInitFunc
}
// Decide whether to apply the panic path heuristic. Hack: don't
// apply this heuristic in the function "main.main" (mostly just
// to avoid annoying users).
if !isMainMain(cstb.fn) {
r = cstb.determinePanicPathBits(call, r)
}
return r
}
// determinePanicPathBits updates the CallSiteOnPanicPath bit within
// "r" if we think this call is on an unconditional path to
// panic/exit. Do this by walking back up the node stack to see if we
// can find either A) an enclosing panic, or B) a statement node that
// we've determined leads to a panic/exit.
func (cstb *callSiteTableBuilder) determinePanicPathBits(call ir.Node, r CSPropBits) CSPropBits {
cstb.nstack = append(cstb.nstack, call)
defer func() {
cstb.nstack = cstb.nstack[:len(cstb.nstack)-1]
}()
for ri := range cstb.nstack[:len(cstb.nstack)-1] {
i := len(cstb.nstack) - ri - 1
n := cstb.nstack[i]
_, isCallExpr := n.(*ir.CallExpr)
_, isStmt := n.(ir.Stmt)
if isCallExpr {
isStmt = false
}
if debugTrace&debugTraceCalls != 0 {
ps, inps := cstb.ptab[n]
fmt.Fprintf(os.Stderr, "=-= callpar %d op=%s ps=%s inptab=%v stmt=%v\n", i, n.Op().String(), ps.String(), inps, isStmt)
}
if n.Op() == ir.OPANIC {
r |= CallSiteOnPanicPath
break
}
if v, ok := cstb.ptab[n]; ok {
if v == psCallsPanic {
r |= CallSiteOnPanicPath
break
}
if isStmt {
break
}
}
}
return r
}
// propsForArg returns property bits for a given call argument expression arg.
func (cstb *callSiteTableBuilder) propsForArg(arg ir.Node) ActualExprPropBits {
if cval := cstb.constValue(arg); cval != nil {
return ActualExprConstant
}
if cstb.isConcreteConvIface(arg) {
return ActualExprIsConcreteConvIface
}
fname := cstb.funcName(arg)
if fname != nil {
if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) {
return ActualExprIsInlinableFunc
}
return ActualExprIsFunc
}
return 0
}
// argPropsForCall returns a slice of argument properties for the
// expressions being passed to the callee in the specific call
// expression; these will be stored in the CallSite object for a given
// call and then consulted when scoring. If no arg has any interesting
// properties we try to save some space and return a nil slice.
func (cstb *callSiteTableBuilder) argPropsForCall(ce *ir.CallExpr) []ActualExprPropBits {
rv := make([]ActualExprPropBits, len(ce.Args))
somethingInteresting := false
for idx := range ce.Args {
argProp := cstb.propsForArg(ce.Args[idx])
somethingInteresting = somethingInteresting || (argProp != 0)
rv[idx] = argProp
}
if !somethingInteresting {
return nil
}
return rv
}
func (cstb *callSiteTableBuilder) addCallSite(callee *ir.Func, call *ir.CallExpr) {
flags := cstb.flagsForNode(call)
argProps := cstb.argPropsForCall(call)
if debugTrace&debugTraceCalls != 0 {
fmt.Fprintf(os.Stderr, "=-= props %+v for call %v\n", argProps, call)
}
// FIXME: maybe bulk-allocate these?
cs := &CallSite{
Call: call,
Callee: callee,
Assign: cstb.containingAssignment(call),
ArgProps: argProps,
Flags: flags,
ID: uint(len(cstb.cstab)),
}
if _, ok := cstb.cstab[call]; ok {
fmt.Fprintf(os.Stderr, "*** cstab duplicate entry at: %s\n",
fmtFullPos(call.Pos()))
fmt.Fprintf(os.Stderr, "*** call: %+v\n", call)
panic("bad")
}
// Set initial score for callsite to the cost computed
// by CanInline; this score will be refined later based
// on heuristics.
cs.Score = int(callee.Inl.Cost)
if cstb.cstab == nil {
cstb.cstab = make(CallSiteTab)
}
cstb.cstab[call] = cs
if debugTrace&debugTraceCalls != 0 {
fmt.Fprintf(os.Stderr, "=-= added callsite: caller=%v callee=%v n=%s\n",
cstb.fn, callee, fmtFullPos(call.Pos()))
}
}
func (cstb *callSiteTableBuilder) nodeVisitPre(n ir.Node) {
switch n.Op() {
case ir.ORANGE, ir.OFOR:
if !hasTopLevelLoopBodyReturnOrBreak(loopBody(n)) {
cstb.loopNest++
}
case ir.OCALLFUNC:
ce := n.(*ir.CallExpr)
callee := pgo.DirectCallee(ce.Fun)
if callee != nil && callee.Inl != nil {
cstb.addCallSite(callee, ce)
}
}
cstb.nstack = append(cstb.nstack, n)
}
func (cstb *callSiteTableBuilder) nodeVisitPost(n ir.Node) {
cstb.nstack = cstb.nstack[:len(cstb.nstack)-1]
switch n.Op() {
case ir.ORANGE, ir.OFOR:
if !hasTopLevelLoopBodyReturnOrBreak(loopBody(n)) {
cstb.loopNest--
}
}
}
func loopBody(n ir.Node) ir.Nodes {
if forst, ok := n.(*ir.ForStmt); ok {
return forst.Body
}
if rst, ok := n.(*ir.RangeStmt); ok {
return rst.Body
}
return nil
}
// hasTopLevelLoopBodyReturnOrBreak examines the body of a "for" or
// "range" loop to try to verify that it is a real loop, as opposed to
// a construct that is syntactically loopy but doesn't actually iterate
// multiple times, like:
//
// for {
// blah()
// return 1
// }
//
// [Remark: the pattern above crops up quite a bit in the source code
// for the compiler itself, e.g. the auto-generated rewrite code]
//
// Note that we don't look for GOTO statements here, so it's possible
// we'll get the wrong result for a loop with complicated control
// jumps via gotos.
func hasTopLevelLoopBodyReturnOrBreak(loopBody ir.Nodes) bool {
for _, n := range loopBody {
if n.Op() == ir.ORETURN || n.Op() == ir.OBREAK {
return true
}
}
return false
}
// containingAssignment returns the top-level assignment statement
// for a statement level function call "n". Examples:
//
// x := foo()
// x, y := bar(z, baz())
// if blah() { ...
//
// Here the top-level assignment statement for the foo() call is the
// statement assigning to "x"; the top-level assignment for "bar()"
// call is the assignment to x,y. For the baz() and blah() calls,
// there is no top level assignment statement.
//
// The unstated goal here is that we want to use the containing
// assignment to establish a connection between a given call and the
// variables to which its results/returns are being assigned.
//
// Note that for the "bar" command above, the front end sometimes
// decomposes this into two assignments, the first one assigning the
// call to a pair of auto-temps, then the second one assigning the
// auto-temps to the user-visible vars. This helper will return the
// second (outer) of these two.
func (cstb *callSiteTableBuilder) containingAssignment(n ir.Node) ir.Node {
parent := cstb.nstack[len(cstb.nstack)-1]
// assignsOnlyAutoTemps returns TRUE of the specified OAS2FUNC
// node assigns only auto-temps.
assignsOnlyAutoTemps := func(x ir.Node) bool {
alst := x.(*ir.AssignListStmt)
oa2init := alst.Init()
if len(oa2init) == 0 {
return false
}
for _, v := range oa2init {
d := v.(*ir.Decl)
if !ir.IsAutoTmp(d.X) {
return false
}
}
return true
}
// Simple case: x := foo()
if parent.Op() == ir.OAS {
return parent
}
// Multi-return case: x, y := bar()
if parent.Op() == ir.OAS2FUNC {
// Hack city: if the result vars are auto-temps, try looking
// for an outer assignment in the tree. The code shape we're
// looking for here is:
//
// OAS1({x,y},OCONVNOP(OAS2FUNC({auto1,auto2},OCALLFUNC(bar))))
//
if assignsOnlyAutoTemps(parent) {
par2 := cstb.nstack[len(cstb.nstack)-2]
if par2.Op() == ir.OAS2 {
return par2
}
if par2.Op() == ir.OCONVNOP {
par3 := cstb.nstack[len(cstb.nstack)-3]
if par3.Op() == ir.OAS2 {
return par3
}
}
}
}
return nil
}
// UpdateCallsiteTable handles updating of callerfn's call site table
// after an inlined has been carried out, e.g. the call at 'n' as been
// turned into the inlined call expression 'ic' within function
// callerfn. The chief thing of interest here is to make sure that any
// call nodes within 'ic' are added to the call site table for
// 'callerfn' and scored appropriately.
func UpdateCallsiteTable(callerfn *ir.Func, n *ir.CallExpr, ic *ir.InlinedCallExpr) {
enableDebugTraceIfEnv()
defer disableDebugTrace()
funcInlHeur, ok := fpmap[callerfn]
if !ok {
// This can happen for compiler-generated wrappers.
if debugTrace&debugTraceCalls != 0 {
fmt.Fprintf(os.Stderr, "=-= early exit, no entry for caller fn %v\n", callerfn)
}
return
}
if debugTrace&debugTraceCalls != 0 {
fmt.Fprintf(os.Stderr, "=-= UpdateCallsiteTable(caller=%v, cs=%s)\n",
callerfn, fmtFullPos(n.Pos()))
}
// Mark the call in question as inlined.
oldcs, ok := funcInlHeur.cstab[n]
if !ok {
// This can happen for compiler-generated wrappers.
return
}
oldcs.aux |= csAuxInlined
if debugTrace&debugTraceCalls != 0 {
fmt.Fprintf(os.Stderr, "=-= marked as inlined: callee=%v %s\n",
oldcs.Callee, EncodeCallSiteKey(oldcs))
}
// Walk the inlined call region to collect new callsites.
var icp pstate
if oldcs.Flags&CallSiteOnPanicPath != 0 {
icp = psCallsPanic
}
var loopNestLevel int
if oldcs.Flags&CallSiteInLoop != 0 {
loopNestLevel = 1
}
ptab := map[ir.Node]pstate{ic: icp}
nf := newNameFinder(nil)
icstab := computeCallSiteTable(callerfn, ic.Body, nil, ptab, loopNestLevel, nf)
// Record parent callsite. This is primarily for debug output.
for _, cs := range icstab {
cs.parent = oldcs
}
// Score the calls in the inlined body. Note the setting of
// "doCallResults" to false here: at the moment there isn't any
// easy way to localize or region-ize the work done by
// "rescoreBasedOnCallResultUses", which currently does a walk
// over the entire function to look for uses of a given set of
// results. Similarly we're passing nil to makeCallSiteAnalyzer,
// so as to run name finding without the use of static value &
// friends.
csa := makeCallSiteAnalyzer(nil)
const doCallResults = false
csa.scoreCallsRegion(callerfn, ic.Body, icstab, doCallResults, ic)
}