| // Copyright 2023 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package inlheur |
| |
| import ( |
| "cmd/compile/internal/base" |
| "cmd/compile/internal/ir" |
| "cmd/compile/internal/pgo" |
| "cmd/compile/internal/types" |
| "fmt" |
| "os" |
| "sort" |
| "strconv" |
| "strings" |
| ) |
| |
| // These constants enumerate the set of possible ways/scenarios |
| // in which we'll adjust the score of a given callsite. |
| type scoreAdjustTyp uint |
| |
| // These constants capture the various ways in which the inliner's |
| // scoring phase can adjust a callsite score based on heuristics. They |
| // fall broadly into three categories: |
| // |
| // 1) adjustments based solely on the callsite context (ex: call |
| // appears on panic path) |
| // |
| // 2) adjustments that take into account specific interesting values |
| // passed at a call site (ex: passing a constant that could result in |
| // cprop/deadcode in the caller) |
| // |
| // 3) adjustments that take into account values returned from the call |
| // at a callsite (ex: call always returns the same inlinable function, |
| // and return value flows unmodified into an indirect call) |
| // |
| // For categories 2 and 3 above, each adjustment can have either a |
| // "must" version and a "may" version (but not both). Here the idea is |
| // that in the "must" version the value flow is unconditional: if the |
| // callsite executes, then the condition we're interested in (ex: |
| // param feeding call) is guaranteed to happen. For the "may" version, |
| // there may be control flow that could cause the benefit to be |
| // bypassed. |
| const ( |
| // Category 1 adjustments (see above) |
| panicPathAdj scoreAdjustTyp = (1 << iota) |
| initFuncAdj |
| inLoopAdj |
| |
| // Category 2 adjustments (see above). |
| passConstToIfAdj |
| passConstToNestedIfAdj |
| passConcreteToItfCallAdj |
| passConcreteToNestedItfCallAdj |
| passFuncToIndCallAdj |
| passFuncToNestedIndCallAdj |
| passInlinableFuncToIndCallAdj |
| passInlinableFuncToNestedIndCallAdj |
| |
| // Category 3 adjustments. |
| returnFeedsConstToIfAdj |
| returnFeedsFuncToIndCallAdj |
| returnFeedsInlinableFuncToIndCallAdj |
| returnFeedsConcreteToInterfaceCallAdj |
| |
| sentinelScoreAdj // sentinel; not a real adjustment |
| ) |
| |
| // This table records the specific values we use to adjust call |
| // site scores in a given scenario. |
| // NOTE: these numbers are chosen very arbitrarily; ideally |
| // we will go through some sort of turning process to decide |
| // what value for each one produces the best performance. |
| |
| var adjValues = map[scoreAdjustTyp]int{ |
| panicPathAdj: 40, |
| initFuncAdj: 20, |
| inLoopAdj: -5, |
| passConstToIfAdj: -20, |
| passConstToNestedIfAdj: -15, |
| passConcreteToItfCallAdj: -30, |
| passConcreteToNestedItfCallAdj: -25, |
| passFuncToIndCallAdj: -25, |
| passFuncToNestedIndCallAdj: -20, |
| passInlinableFuncToIndCallAdj: -45, |
| passInlinableFuncToNestedIndCallAdj: -40, |
| returnFeedsConstToIfAdj: -15, |
| returnFeedsFuncToIndCallAdj: -25, |
| returnFeedsInlinableFuncToIndCallAdj: -40, |
| returnFeedsConcreteToInterfaceCallAdj: -25, |
| } |
| |
| // SetupScoreAdjustments interprets the value of the -d=inlscoreadj |
| // debugging option, if set. The value of this flag is expected to be |
| // a series of "/"-separated clauses of the form adj1:value1. Example: |
| // -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99 |
| func SetupScoreAdjustments() { |
| if base.Debug.InlScoreAdj == "" { |
| return |
| } |
| if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil { |
| base.Fatalf("malformed -d=inlscoreadj argument %q: %v", |
| base.Debug.InlScoreAdj, err) |
| } |
| } |
| |
| func adjStringToVal(s string) (scoreAdjustTyp, bool) { |
| for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 { |
| if adj.String() == s { |
| return adj, true |
| } |
| } |
| return 0, false |
| } |
| |
| func parseScoreAdj(val string) error { |
| clauses := strings.Split(val, "/") |
| if len(clauses) == 0 { |
| return fmt.Errorf("no clauses") |
| } |
| for _, clause := range clauses { |
| elems := strings.Split(clause, ":") |
| if len(elems) < 2 { |
| return fmt.Errorf("clause %q: expected colon", clause) |
| } |
| if len(elems) != 2 { |
| return fmt.Errorf("clause %q has %d elements, wanted 2", clause, |
| len(elems)) |
| } |
| adj, ok := adjStringToVal(elems[0]) |
| if !ok { |
| return fmt.Errorf("clause %q: unknown adjustment", clause) |
| } |
| val, err := strconv.Atoi(elems[1]) |
| if err != nil { |
| return fmt.Errorf("clause %q: malformed value: %v", clause, err) |
| } |
| adjValues[adj] = val |
| } |
| return nil |
| } |
| |
| func adjValue(x scoreAdjustTyp) int { |
| if val, ok := adjValues[x]; ok { |
| return val |
| } else { |
| panic("internal error unregistered adjustment type") |
| } |
| } |
| |
| var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{ |
| {may: passConstToNestedIfAdj, must: passConstToIfAdj}, |
| {may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj}, |
| {may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj}, |
| {may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj}, |
| } |
| |
| func isMay(x scoreAdjustTyp) bool { |
| return mayToMust(x) != 0 |
| } |
| |
| func isMust(x scoreAdjustTyp) bool { |
| return mustToMay(x) != 0 |
| } |
| |
| func mayToMust(x scoreAdjustTyp) scoreAdjustTyp { |
| for _, v := range mayMustAdj { |
| if x == v.may { |
| return v.must |
| } |
| } |
| return 0 |
| } |
| |
| func mustToMay(x scoreAdjustTyp) scoreAdjustTyp { |
| for _, v := range mayMustAdj { |
| if x == v.must { |
| return v.may |
| } |
| } |
| return 0 |
| } |
| |
| // computeCallSiteScore takes a given call site whose ir node is |
| // 'call' and callee function is 'callee' and with previously computed |
| // call site properties 'csflags', then computes a score for the |
| // callsite that combines the size cost of the callee with heuristics |
| // based on previously computed argument and function properties, |
| // then stores the score and the adjustment mask in the appropriate |
| // fields in 'cs' |
| func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) { |
| callee := cs.Callee |
| csflags := cs.Flags |
| call := cs.Call |
| |
| // Start with the size-based score for the callee. |
| score := int(callee.Inl.Cost) |
| var tmask scoreAdjustTyp |
| |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n", |
| callee.Sym().Name, fmtFullPos(call.Pos()), score) |
| } |
| |
| // First some score adjustments to discourage inlining in selected cases. |
| if csflags&CallSiteOnPanicPath != 0 { |
| score, tmask = adjustScore(panicPathAdj, score, tmask) |
| } |
| if csflags&CallSiteInInitFunc != 0 { |
| score, tmask = adjustScore(initFuncAdj, score, tmask) |
| } |
| |
| // Then adjustments to encourage inlining in selected cases. |
| if csflags&CallSiteInLoop != 0 { |
| score, tmask = adjustScore(inLoopAdj, score, tmask) |
| } |
| |
| // Stop here if no callee props. |
| if calleeProps == nil { |
| cs.Score, cs.ScoreMask = score, tmask |
| return |
| } |
| |
| // Walk through the actual expressions being passed at the call. |
| calleeRecvrParms := callee.Type().RecvParams() |
| for idx := range call.Args { |
| // ignore blanks |
| if calleeRecvrParms[idx].Sym == nil || |
| calleeRecvrParms[idx].Sym.IsBlank() { |
| continue |
| } |
| arg := call.Args[idx] |
| pflag := calleeProps.ParamFlags[idx] |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n", |
| idx, len(call.Args), arg, pflag.String()) |
| } |
| |
| if len(cs.ArgProps) == 0 { |
| continue |
| } |
| argProps := cs.ArgProps[idx] |
| |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n", |
| idx, argProps.String(), arg) |
| } |
| |
| if argProps&ActualExprConstant != 0 { |
| if pflag&ParamMayFeedIfOrSwitch != 0 { |
| score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask) |
| } |
| if pflag&ParamFeedsIfOrSwitch != 0 { |
| score, tmask = adjustScore(passConstToIfAdj, score, tmask) |
| } |
| } |
| |
| if argProps&ActualExprIsConcreteConvIface != 0 { |
| // FIXME: ideally here it would be nice to make a |
| // distinction between the inlinable case and the |
| // non-inlinable case, but this is hard to do. Example: |
| // |
| // type I interface { Tiny() int; Giant() } |
| // type Conc struct { x int } |
| // func (c *Conc) Tiny() int { return 42 } |
| // func (c *Conc) Giant() { <huge amounts of code> } |
| // |
| // func passConcToItf(c *Conc) { |
| // makesItfMethodCall(c) |
| // } |
| // |
| // In the code above, function properties will only tell |
| // us that 'makesItfMethodCall' invokes a method on its |
| // interface parameter, but we don't know whether it calls |
| // "Tiny" or "Giant". If we knew if called "Tiny", then in |
| // theory in addition to converting the interface call to |
| // a direct call, we could also inline (in which case |
| // we'd want to decrease the score even more). |
| // |
| // One thing we could do (not yet implemented) is iterate |
| // through all of the methods of "*Conc" that allow it to |
| // satisfy I, and if all are inlinable, then exploit that. |
| if pflag&ParamMayFeedInterfaceMethodCall != 0 { |
| score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask) |
| } |
| if pflag&ParamFeedsInterfaceMethodCall != 0 { |
| score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask) |
| } |
| } |
| |
| if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 { |
| mayadj := passFuncToNestedIndCallAdj |
| mustadj := passFuncToIndCallAdj |
| if argProps&ActualExprIsInlinableFunc != 0 { |
| mayadj = passInlinableFuncToNestedIndCallAdj |
| mustadj = passInlinableFuncToIndCallAdj |
| } |
| if pflag&ParamMayFeedIndirectCall != 0 { |
| score, tmask = adjustScore(mayadj, score, tmask) |
| } |
| if pflag&ParamFeedsIndirectCall != 0 { |
| score, tmask = adjustScore(mustadj, score, tmask) |
| } |
| } |
| } |
| |
| cs.Score, cs.ScoreMask = score, tmask |
| } |
| |
| func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) { |
| |
| if isMust(typ) { |
| if mask&typ != 0 { |
| return score, mask |
| } |
| may := mustToMay(typ) |
| if mask&may != 0 { |
| // promote may to must, so undo may |
| score -= adjValue(may) |
| mask &^= may |
| } |
| } else if isMay(typ) { |
| must := mayToMust(typ) |
| if mask&(must|typ) != 0 { |
| return score, mask |
| } |
| } |
| if mask&typ == 0 { |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n", |
| adjValue(typ), typ.String()) |
| } |
| score += adjValue(typ) |
| mask |= typ |
| } |
| return score, mask |
| } |
| |
| var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp |
| var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp |
| |
| func setupFlagToAdjMaps() { |
| resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{ |
| ResultIsAllocatedMem: returnFeedsConcreteToInterfaceCallAdj, |
| ResultAlwaysSameFunc: returnFeedsFuncToIndCallAdj, |
| ResultAlwaysSameConstant: returnFeedsConstToIfAdj, |
| } |
| paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{ |
| ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj, |
| ParamFeedsInterfaceMethodCall: passConcreteToItfCallAdj, |
| ParamMayFeedIndirectCall: passInlinableFuncToNestedIndCallAdj, |
| ParamFeedsIndirectCall: passInlinableFuncToIndCallAdj, |
| } |
| } |
| |
| // LargestNegativeScoreAdjustment tries to estimate the largest possible |
| // negative score adjustment that could be applied to a call of the |
| // function with the specified props. Example: |
| // |
| // func foo() { func bar(x int, p *int) int { |
| // ... if x < 0 { *p = x } |
| // } return 99 |
| // } |
| // |
| // Function 'foo' above on the left has no interesting properties, |
| // thus as a result the most we'll adjust any call to is the value for |
| // "call in loop". If the calculated cost of the function is 150, and |
| // the in-loop adjustment is 5 (for example), then there is not much |
| // point treating it as inlinable. On the other hand "bar" has a param |
| // property (parameter "x" feeds unmodified to an "if" statement") and |
| // a return property (always returns same constant) meaning that a |
| // given call _could_ be rescored down as much as -35 points-- thus if |
| // the size of "bar" is 100 (for example) then there is at least a |
| // chance that scoring will enable inlining. |
| func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int { |
| if resultFlagToPositiveAdj == nil { |
| setupFlagToAdjMaps() |
| } |
| var tmask scoreAdjustTyp |
| score := adjValues[inLoopAdj] // any call can be in a loop |
| for _, pf := range props.ParamFlags { |
| if adj, ok := paramFlagToPositiveAdj[pf]; ok { |
| score, tmask = adjustScore(adj, score, tmask) |
| } |
| } |
| for _, rf := range props.ResultFlags { |
| if adj, ok := resultFlagToPositiveAdj[rf]; ok { |
| score, tmask = adjustScore(adj, score, tmask) |
| } |
| } |
| |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n", |
| fn, score) |
| } |
| |
| return score |
| } |
| |
| // LargestPositiveScoreAdjustment tries to estimate the largest possible |
| // positive score adjustment that could be applied to a given callsite. |
| // At the moment we don't have very many positive score adjustments, so |
| // this is just hard-coded, not table-driven. |
| func LargestPositiveScoreAdjustment(fn *ir.Func) int { |
| return adjValues[panicPathAdj] + adjValues[initFuncAdj] |
| } |
| |
| // callSiteTab contains entries for each call in the function |
| // currently being processed by InlineCalls; this variable will either |
| // be set to 'cstabCache' below (for non-inlinable routines) or to the |
| // local 'cstab' entry in the fnInlHeur object for inlinable routines. |
| // |
| // NOTE: this assumes that inlining operations are happening in a serial, |
| // single-threaded fashion,f which is true today but probably won't hold |
| // in the future (for example, we might want to score the callsites |
| // in multiple functions in parallel); if the inliner evolves in this |
| // direction we'll need to come up with a different approach here. |
| var callSiteTab CallSiteTab |
| |
| // scoreCallsCache caches a call site table and call site list between |
| // invocations of ScoreCalls so that we can reuse previously allocated |
| // storage. |
| var scoreCallsCache scoreCallsCacheType |
| |
| type scoreCallsCacheType struct { |
| tab CallSiteTab |
| csl []*CallSite |
| } |
| |
| // ScoreCalls assigns numeric scores to each of the callsites in |
| // function 'fn'; the lower the score, the more helpful we think it |
| // will be to inline. |
| // |
| // Unlike a lot of the other inline heuristics machinery, callsite |
| // scoring can't be done as part of the CanInline call for a function, |
| // due to fact that we may be working on a non-trivial SCC. So for |
| // example with this SCC: |
| // |
| // func foo(x int) { func bar(x int, f func()) { |
| // if x != 0 { f() |
| // bar(x, func(){}) foo(x-1) |
| // } } |
| // } |
| // |
| // We don't want to perform scoring for the 'foo' call in "bar" until |
| // after foo has been analyzed, but it's conceivable that CanInline |
| // might visit bar before foo for this SCC. |
| func ScoreCalls(fn *ir.Func) { |
| if len(fn.Body) == 0 { |
| return |
| } |
| enableDebugTraceIfEnv() |
| |
| nameFinder := newNameFinder(fn) |
| |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn)) |
| } |
| |
| // If this is an inlinable function, use the precomputed |
| // call site table for it. If the function wasn't an inline |
| // candidate, collect a callsite table for it now. |
| var cstab CallSiteTab |
| if funcInlHeur, ok := fpmap[fn]; ok { |
| cstab = funcInlHeur.cstab |
| } else { |
| if len(scoreCallsCache.tab) != 0 { |
| panic("missing call to ScoreCallsCleanup") |
| } |
| if scoreCallsCache.tab == nil { |
| scoreCallsCache.tab = make(CallSiteTab) |
| } |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n", |
| ir.FuncName(fn)) |
| } |
| cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0, |
| nameFinder) |
| } |
| |
| csa := makeCallSiteAnalyzer(fn) |
| const doCallResults = true |
| csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil) |
| |
| disableDebugTrace() |
| } |
| |
| // scoreCallsRegion assigns numeric scores to each of the callsites in |
| // region 'region' within function 'fn'. This can be called on |
| // an entire function, or with 'region' set to a chunk of |
| // code corresponding to an inlined call. |
| func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) { |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n", |
| ir.FuncName(fn), region[0].Op().String(), len(cstab)) |
| } |
| |
| // Sort callsites to avoid any surprises with non deterministic |
| // map iteration order (this is probably not needed, but here just |
| // in case). |
| csl := scoreCallsCache.csl[:0] |
| for _, cs := range cstab { |
| csl = append(csl, cs) |
| } |
| scoreCallsCache.csl = csl[:0] |
| sort.Slice(csl, func(i, j int) bool { |
| return csl[i].ID < csl[j].ID |
| }) |
| |
| // Score each call site. |
| var resultNameTab map[*ir.Name]resultPropAndCS |
| for _, cs := range csl { |
| var cprops *FuncProps |
| fihcprops := false |
| desercprops := false |
| if funcInlHeur, ok := fpmap[cs.Callee]; ok { |
| cprops = funcInlHeur.props |
| fihcprops = true |
| } else if cs.Callee.Inl != nil { |
| cprops = DeserializeFromString(cs.Callee.Inl.Properties) |
| desercprops = true |
| } else { |
| if base.Debug.DumpInlFuncProps != "" { |
| fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos())) |
| panic("should never happen") |
| } else { |
| continue |
| } |
| } |
| cs.computeCallSiteScore(csa, cprops) |
| |
| if doCallResults { |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) |
| } |
| resultNameTab = csa.examineCallResults(cs, resultNameTab) |
| } |
| |
| if debugTrace&debugTraceScoring != 0 { |
| fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) |
| } |
| } |
| |
| if resultNameTab != nil { |
| csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab) |
| } |
| |
| disableDebugTrace() |
| |
| if ic != nil && callSiteTab != nil { |
| // Integrate the calls from this cstab into the table for the caller. |
| if err := callSiteTab.merge(cstab); err != nil { |
| base.FatalfAt(ic.Pos(), "%v", err) |
| } |
| } else { |
| callSiteTab = cstab |
| } |
| } |
| |
| // ScoreCallsCleanup resets the state of the callsite cache |
| // once ScoreCalls is done with a function. |
| func ScoreCallsCleanup() { |
| if base.Debug.DumpInlCallSiteScores != 0 { |
| if allCallSites == nil { |
| allCallSites = make(CallSiteTab) |
| } |
| for call, cs := range callSiteTab { |
| allCallSites[call] = cs |
| } |
| } |
| for k := range scoreCallsCache.tab { |
| delete(scoreCallsCache.tab, k) |
| } |
| } |
| |
| // GetCallSiteScore returns the previously calculated score for call |
| // within fn. |
| func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) { |
| if funcInlHeur, ok := fpmap[fn]; ok { |
| if cs, ok := funcInlHeur.cstab[call]; ok { |
| return cs.Score, true |
| } |
| } |
| if cs, ok := callSiteTab[call]; ok { |
| return cs.Score, true |
| } |
| return 0, false |
| } |
| |
| // BudgetExpansion returns the amount to relax/expand the base |
| // inlining budget when the new inliner is turned on; the inliner |
| // will add the returned value to the hairyness budget. |
| // |
| // Background: with the new inliner, the score for a given callsite |
| // can be adjusted down by some amount due to heuristics, however we |
| // won't know whether this is going to happen until much later after |
| // the CanInline call. This function returns the amount to relax the |
| // budget initially (to allow for a large score adjustment); later on |
| // in RevisitInlinability we'll look at each individual function to |
| // demote it if needed. |
| func BudgetExpansion(maxBudget int32) int32 { |
| if base.Debug.InlBudgetSlack != 0 { |
| return int32(base.Debug.InlBudgetSlack) |
| } |
| // In the default case, return maxBudget, which will effectively |
| // double the budget from 80 to 160; this should be good enough |
| // for most cases. |
| return maxBudget |
| } |
| |
| var allCallSites CallSiteTab |
| |
| // DumpInlCallSiteScores is invoked by the inliner if the debug flag |
| // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable |
| // summary of all (potentially) inlinable callsites in the package, |
| // along with info on call site scoring and the adjustments made to a |
| // given score. Here profile is the PGO profile in use (may be |
| // nil), budgetCallback is a callback that can be invoked to find out |
| // the original pre-adjustment hairyness limit for the function, and |
| // inlineHotMaxBudget is the constant of the same name used in the |
| // inliner. Sample output lines: |
| // |
| // Score Adjustment Status Callee CallerPos ScoreFlags |
| // 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj |
| // 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj |
| // 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1 |
| // 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj |
| // |
| // In the dump above, "Score" is the final score calculated for the |
| // callsite, "Adjustment" is the amount added to or subtracted from |
| // the original hairyness estimate to form the score. "Status" shows |
| // whether anything changed with the site -- did the adjustment bump |
| // it down just below the threshold ("PROMOTED") or instead bump it |
| // above the threshold ("DEMOTED"); this will be blank ("---") if no |
| // threshold was crossed as a result of the heuristics. Note that |
| // "Status" also shows whether PGO was involved. "Callee" is the name |
| // of the function called, "CallerPos" is the position of the |
| // callsite, and "ScoreFlags" is a digest of the specific properties |
| // we used to make adjustments to callsite score via heuristics. |
| func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) { |
| |
| var indirectlyDueToPromotion func(cs *CallSite) bool |
| indirectlyDueToPromotion = func(cs *CallSite) bool { |
| bud, _ := budgetCallback(cs.Callee, profile) |
| hairyval := cs.Callee.Inl.Cost |
| score := int32(cs.Score) |
| if hairyval > bud && score <= bud { |
| return true |
| } |
| if cs.parent != nil { |
| return indirectlyDueToPromotion(cs.parent) |
| } |
| return false |
| } |
| |
| genstatus := func(cs *CallSite) string { |
| hairyval := cs.Callee.Inl.Cost |
| bud, isPGO := budgetCallback(cs.Callee, profile) |
| score := int32(cs.Score) |
| st := "---" |
| expinl := false |
| switch { |
| case hairyval <= bud && score <= bud: |
| // "Normal" inlined case: hairy val sufficiently low that |
| // it would have been inlined anyway without heuristics. |
| expinl = true |
| case hairyval > bud && score > bud: |
| // "Normal" not inlined case: hairy val sufficiently high |
| // and scoring didn't lower it. |
| case hairyval > bud && score <= bud: |
| // Promoted: we would not have inlined it before, but |
| // after score adjustment we decided to inline. |
| st = "PROMOTED" |
| expinl = true |
| case hairyval <= bud && score > bud: |
| // Demoted: we would have inlined it before, but after |
| // score adjustment we decided not to inline. |
| st = "DEMOTED" |
| } |
| inlined := cs.aux&csAuxInlined != 0 |
| indprom := false |
| if cs.parent != nil { |
| indprom = indirectlyDueToPromotion(cs.parent) |
| } |
| if inlined && indprom { |
| st += "|INDPROM" |
| } |
| if inlined && !expinl { |
| st += "|[NI?]" |
| } else if !inlined && expinl { |
| st += "|[IN?]" |
| } |
| if isPGO { |
| st += "|PGO" |
| } |
| return st |
| } |
| |
| if base.Debug.DumpInlCallSiteScores != 0 { |
| var sl []*CallSite |
| for _, cs := range allCallSites { |
| sl = append(sl, cs) |
| } |
| sort.Slice(sl, func(i, j int) bool { |
| if sl[i].Score != sl[j].Score { |
| return sl[i].Score < sl[j].Score |
| } |
| fni := ir.PkgFuncName(sl[i].Callee) |
| fnj := ir.PkgFuncName(sl[j].Callee) |
| if fni != fnj { |
| return fni < fnj |
| } |
| ecsi := EncodeCallSiteKey(sl[i]) |
| ecsj := EncodeCallSiteKey(sl[j]) |
| return ecsi < ecsj |
| }) |
| |
| mkname := func(fn *ir.Func) string { |
| var n string |
| if fn == nil || fn.Nname == nil { |
| return "<nil>" |
| } |
| if fn.Sym().Pkg == types.LocalPkg { |
| n = "ยท" + fn.Sym().Name |
| } else { |
| n = ir.PkgFuncName(fn) |
| } |
| // don't try to print super-long names |
| if len(n) <= 64 { |
| return n |
| } |
| return n[:32] + "..." + n[len(n)-32:len(n)] |
| } |
| |
| if len(sl) != 0 { |
| fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path) |
| fmt.Fprintf(os.Stdout, "# Score Adjustment Status Callee CallerPos Flags ScoreFlags\n") |
| } |
| for _, cs := range sl { |
| hairyval := cs.Callee.Inl.Cost |
| adj := int32(cs.Score) - hairyval |
| nm := mkname(cs.Callee) |
| ecc := EncodeCallSiteKey(cs) |
| fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n", |
| cs.Score, adj, genstatus(cs), |
| nm, ecc, |
| cs.ScoreMask.String()) |
| } |
| } |
| } |