dashboard/app/tree.go - platform/external/syzkaller - Git at Google

 // Copyright 2023 syzkaller project authors. All rights reserved.
 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

 package main

 // Given information on how commits flow from one kernel source tree to another, assign
 // bugs labels of two kinds:
 // a) LabelIntroduced -- reproducer does not work in any other kernel tree, FROM which commits flow.
 // b) LabelReached -- reproducer does not work in any other kernel tree, TO which commits flow.

 import (
 	"context"
 	"fmt"
 	"sort"
 	"sync"
 	"time"

 	"github.com/google/syzkaller/dashboard/dashapi"
 	"golang.org/x/sync/errgroup"
 	db "google.golang.org/appengine/v2/datastore"
 	"google.golang.org/appengine/v2/log"
 )

 // generateTreeOriginJobs generates new jobs for bug origin tree determination.
 func generateTreeOriginJobs(cGlobal context.Context, bugKey *db.Key,
 	managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) {
 	var job *Job
 	var jobKey *db.Key
 	tx := func(c context.Context) error {
 		bug := new(Bug)
 		if err := db.Get(c, bugKey, bug); err != nil {
 			return fmt.Errorf("failed to get bug: %w", err)
 		}
 		ctx := &bugTreeContext{
 			c:       c,
 			cGlobal: cGlobal,
 			bug:     bug,
 			bugKey:  bug.key(c),
 		}
 		ret := ctx.pollBugTreeJobs(managers)
 		switch ret.(type) {
 		case pollResultError:
 			return ret.(error)
 		case pollResultWait:
 			newTime, ok := ret.(time.Time)
 			if ok && newTime.After(bug.TreeTests.NextPoll) {
 				bug.TreeTests.NextPoll = newTime
 			}
 		}
 		bug.TreeTests.NeedPoll = false
 		if _, err := db.Put(c, bugKey, bug); err != nil {
 			return fmt.Errorf("failed to put bug: %w", err)
 		}
 		job, jobKey = ctx.job, ctx.jobKey
 		return nil
 	}
 	if err := db.RunInTransaction(cGlobal, tx,
 		&db.TransactionOptions{XG: true, Attempts: 10}); err != nil {
 		return nil, nil, err
 	}
 	return job, jobKey, nil
 }

 // treeOriginJobDone is supposed to be called when tree origin job is done.
 // It keeps the cached info in Bug up to date and assigns bug tree origin labels.
 func treeOriginJobDone(cGlobal context.Context, jobKey *db.Key, job *Job) error {
 	bugKey := jobKey.Parent()
 	tx := func(c context.Context) error {
 		bug := new(Bug)
 		if err := db.Get(c, bugKey, bug); err != nil {
 			return fmt.Errorf("failed to get bug: %w", err)
 		}
 		ctx := &bugTreeContext{
 			c:         c,
 			cGlobal:   cGlobal,
 			bug:       bug,
 			bugKey:    bug.key(c),
 			noNewJobs: true,
 		}
 		ret := ctx.pollBugTreeJobs(
 			map[string]dashapi.ManagerJobs{job.Manager: {TestPatches: true}},
 		)
 		switch ret.(type) {
 		case pollResultError:
 			return ret.(error)
 		case pollResultPending:
 			bug.TreeTests.NextPoll = time.Time{}
 			bug.TreeTests.NeedPoll = true
 		}
 		if _, err := db.Put(c, bugKey, bug); err != nil {
 			return fmt.Errorf("failed to put bug: %w", err)
 		}
 		return nil
 	}
 	return db.RunInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true, Attempts: 10})
 }

 type pollTreeJobResult interface{}

 // pollResultPending is returned when we wait some job to finish.
 type pollResultPending struct{}

 // pollResultWait is returned when we know the next time the process could be repeated.
 type pollResultWait time.Time

 // pollResultSkip means that there are no poll jobs we could run at the moment.
 // It's impossible to say when it changes, so it's better not to repeat polling soon.
 type pollResultSkip struct{}

 type pollResultError error

 type pollResultDone struct {
 	Crashed  bool
 	Finished time.Time
 }

 type bugTreeContext struct {
 	c context.Context
 	// Datastore puts limits on how often a single entity can be accessed by transactions.
 	// And we actually don't always need a consistent view of the DB, we just want to query
 	// a single entity. So, when possible, let's make queries outside of a transaction.
 	cGlobal   context.Context
 	crash     *Crash
 	crashKey  *db.Key
 	bugKey    *db.Key
 	bug       *Bug
 	build     *Build
 	repoNode  *repoNode
 	noNewJobs bool

 	// If any jobs were created, here'll be one of them.
 	job    *Job
 	jobKey *db.Key
 }

 func (ctx *bugTreeContext) pollBugTreeJobs(managers map[string]dashapi.ManagerJobs) pollTreeJobResult {
 	// Determine the crash we'd stick to.
 	err := ctx.loadCrashInfo()
 	if err != nil {
 		log.Errorf(ctx.c, "bug %q: failed to load crash info: %s", ctx.bug.displayTitle(), err)
 		return pollResultError(err)
 	}
 	if ctx.crash == nil {
 		// There are no crashes we could further work with.
 		// TODO: consider looking at the recent repro retest results.
 		log.Infof(ctx.c, "bug %q: no suitable crash", ctx.bug.displayTitle())
 		return pollResultSkip{}
 	}
 	if ctx.repoNode == nil {
 		// We have no information about the tree on which the bug happened.
 		log.Errorf(ctx.c, "bug %q: no information about the tree", ctx.bug.displayTitle())
 		return pollResultSkip{}
 	}
 	if !managers[ctx.crash.Manager].TestPatches {
 		return pollResultSkip{}
 	}
 	if len(ctx.bug.TreeTests.List) > 0 && ctx.crashKey.IntID() != ctx.bug.TreeTests.List[0].CrashID {
 		// Clean up old job records, they are no longer relevant.
 		ctx.bug.TreeTests.List = nil
 	}
 	for i := range ctx.bug.TreeTests.List {
 		err := ctx.bug.TreeTests.List[i].applyPending(ctx.c)
 		if err != nil {
 			return pollResultError(err)
 		}
 	}
 	return ctx.groupResults([]pollTreeJobResult{
 		ctx.setOriginLabels(),
 		ctx.missingBackports(),
 	})
 }

 func (ctx *bugTreeContext) setOriginLabels() pollTreeJobResult {
 	if !ctx.labelsCanBeSet() || ctx.bug.HasUserLabel(OriginLabel) {
 		return pollResultSkip{}
 	}
 	ctx.bug.UnsetLabels(OriginLabel)

 	var results []pollTreeJobResult
 	perNode := map[*repoNode]pollTreeJobResult{}
 	for node, merge := range ctx.repoNode.allReachable() {
 		var result pollTreeJobResult
 		if merge {
 			// Merge base gives a much better result quality, so use it whenever possible.
 			result = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{
 				Repo:   ctx.build.KernelRepo,
 				Branch: ctx.build.KernelBranch,
 			})
 		} else {
 			result = ctx.runRepro(node.repo, wantFirstAny{}, runOnHEAD{})
 		}
 		perNode[node] = result
 		results = append(results, result)
 	}
 	result := ctx.groupResults(results)
 	if _, ok := result.(pollResultPending); ok {
 		// At least wait until all started jobs have finished (successfully or not).
 		return result
 	}
 	lastDone := ctx.lastDone(results)
 	if lastDone.IsZero() {
 		// Demand that at least one of the finished jobs has finished successfully.
 		return pollResultSkip{}
 	}
 	// Since we have a repro for it, it definitely crashed at some point.
 	perNode[ctx.repoNode] = pollResultDone{Crashed: true}
 	allLabels := append(ctx.selectRepoLabels(true, perNode), ctx.selectRepoLabels(false, perNode)...)
 	for _, label := range allLabels {
 		if label == ctx.repoNode.repo.LabelIntroduced || label == ctx.repoNode.repo.LabelReached {
 			// It looks like our reproducer does not work on other trees.
 			// Just in case verify that it still works on the original one.
 			result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(lastDone), runOnHEAD{})
 			resultDone, ok := result.(pollResultDone)
 			if !ok {
 				return result
 			}
 			if !resultDone.Crashed {
 				// Unfortunately the repro no longer works. Don't assign labels.
 				return pollResultSkip{}
 			}
 		}
 	}
 	var labels []BugLabel
 	for _, label := range allLabels {
 		labels = append(labels, BugLabel{Label: OriginLabel, Value: label})
 	}
 	ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), labels)
 	return pollResultSkip{}
 }

 // selectRepoLabels attributes bugs to trees depending on the patch testing results.
 func (ctx *bugTreeContext) selectRepoLabels(in bool, results map[*repoNode]pollTreeJobResult) []string {
 	crashed := map[*repoNode]bool{}
 	for node, result := range results {
 		done, ok := result.(pollResultDone)
 		if ok {
 			crashed[node] = done.Crashed
 		}
 	}
 	for node := range crashed {
 		if !crashed[node] {
 			continue
 		}
 		// (1) The in = true case:
 		// If, for a tree X, there's a tree Y from which commits flow to X and the reproducer crashed
 		// on Y, X cannot be among bug origin trees.
 		// (1) The in = false case:
 		// If, for a tree X, there's a tree Y to which commits flow to X and the reproducer crashed
 		// on Y, X cannot be the last tree to which the bug has spread.
 		for otherNode := range node.reachable(!in) {
 			crashed[otherNode] = false
 		}
 	}
 	ret := []string{}
 	for node, set := range crashed {
 		if !set {
 			continue
 		}
 		if in && node.repo.LabelIntroduced != "" {
 			ret = append(ret, node.repo.LabelIntroduced)
 		} else if !in && node.repo.LabelReached != "" {
 			ret = append(ret, node.repo.LabelReached)
 		}
 	}
 	return ret
 }

 // Test if there's any sense in testing other trees.
 // For example, if we hit a bug on a mainline, there's no sense to test linux-next to check
 // if it's a linux-next bug.
 func (ctx *bugTreeContext) labelsCanBeSet() bool {
 	for node := range ctx.repoNode.reachable(true) {
 		if node.repo.LabelIntroduced != "" {
 			return true
 		}
 	}
 	for node := range ctx.repoNode.reachable(false) {
 		if node.repo.LabelReached != "" {
 			return true
 		}
 	}
 	return ctx.repoNode.repo.LabelIntroduced != "" ||
 		ctx.repoNode.repo.LabelReached != ""
 }

 func (ctx *bugTreeContext) missingBackports() pollTreeJobResult {
 	if !ctx.repoNode.repo.DetectMissingBackports || ctx.bug.HasUserLabel(MissingBackportLabel) {
 		return pollResultSkip{}
 	}
 	var okDate time.Time
 	results := []pollTreeJobResult{}
 	for node, merge := range ctx.repoNode.reachable(true) {
 		resultOK := ctx.runRepro(node.repo, wantFirstOK{}, runOnHEAD{})
 		doneOK, ok := resultOK.(pollResultDone)
 		if !ok {
 			results = append(results, resultOK)
 			continue
 		}
 		var resultCrash pollTreeJobResult
 		if merge {
 			resultCrash = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{
 				Repo:   ctx.build.KernelRepo,
 				Branch: ctx.build.KernelBranch,
 			})
 		} else {
 			// We already know that the reproducer doesn't crash the tree.
 			// There'd be no sense to call runRepro in the hope of getting a crash,
 			// so let's just look into the past tree testing results.
 			resultCrash, _ = ctx.bug.findResult(ctx.c, node.repo, wantFirstCrash{}, runOnAny{})
 		}
 		doneCrash, ok := resultCrash.(pollResultDone)
 		if !ok {
 			results = append(results, resultCrash)
 			continue
 		} else if merge && doneCrash.Crashed || doneOK.Finished.After(doneCrash.Finished) {
 			// That's what we want: earlier it crashed and then stopped.
 			okDate = doneOK.Finished
 			break
 		}
 	}
 	if okDate.IsZero() {
 		return ctx.groupResults(results)
 	}
 	// We are about to assign the "missing backport" label.
 	// To reduce the number of backports, just in case run once more on HEAD.
 	// The bug fix could have already reached the repository.
 	result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(okDate), runOnHEAD{})
 	resultDone, ok := result.(pollResultDone)
 	if !ok {
 		return result
 	}
 	ctx.bug.UnsetLabels(MissingBackportLabel)
 	if resultDone.Crashed {
 		ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), []BugLabel{
 			{Label: MissingBackportLabel},
 		})
 	}
 	return pollResultSkip{}
 }

 func (ctx *bugTreeContext) lastDone(results []pollTreeJobResult) time.Time {
 	var maxTime time.Time
 	for _, item := range results {
 		done, ok := item.(pollResultDone)
 		if !ok {
 			continue
 		}
 		if done.Finished.After(maxTime) {
 			maxTime = done.Finished
 		}
 	}
 	return maxTime
 }

 func (ctx *bugTreeContext) groupResults(results []pollTreeJobResult) pollTreeJobResult {
 	var minWait time.Time
 	for _, result := range results {
 		switch v := result.(type) {
 		case pollResultPending, pollResultError:
 			// Wait for the job result to continue.
 			return result
 		case pollResultWait:
 			t := time.Time(v)
 			if minWait.IsZero() || minWait.After(t) {
 				minWait = t
 			}
 		}
 	}
 	if !minWait.IsZero() {
 		return pollResultWait(minWait)
 	}
 	return pollResultSkip{}
 }

 type expectedResult interface{}

 // resultFreshness subtypes.
 type wantFirstOK struct{}
 type wantFirstCrash struct{}
 type wantFirstAny struct{}
 type wantNewAny time.Time

 type runReproOn interface{}

 // runReproOn subtypes.
 type runOnAny struct{} // attempts to find any result, if unsuccessful, runs on HEAD
 type runOnHEAD struct{}
 type runOnMergeBase struct {
 	Repo   string
 	Branch string
 }

 func (ctx *bugTreeContext) runRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult {
 	ret := ctx.doRunRepro(repo, result, runOn)
 	log.Infof(ctx.c, "runRepro on %s, %T, %T: %#v", repo.Alias, result, runOn, ret)
 	return ret
 }

 func (ctx *bugTreeContext) doRunRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult {
 	existingResult, _ := ctx.bug.findResult(ctx.c, repo, result, runOn)
 	if _, ok := existingResult.(pollResultSkip); !ok {
 		return existingResult
 	}
 	// Okay, nothing suitable was found. We need to set up a new job.
 	if ctx.noNewJobs {
 		return pollResultPending{}
 	}
 	// First check if there's existing BugTreeTest object.
 	if _, ok := runOn.(runOnAny); ok {
 		runOn = runOnHEAD{}
 	}
 	candidates := ctx.bug.matchingTreeTests(repo, runOn)
 	var bugTreeTest *BugTreeTest
 	if len(candidates) > 0 {
 		bugTreeTest = &ctx.bug.TreeTests.List[candidates[0]]
 	} else {
 		item := BugTreeTest{
 			CrashID: ctx.crashKey.IntID(),
 			Repo:    repo.URL,
 			Branch:  repo.Branch,
 		}
 		if v, ok := runOn.(runOnMergeBase); ok {
 			item.MergeBaseRepo = v.Repo
 			item.MergeBaseBranch = v.Branch
 		}
 		ctx.bug.TreeTests.List = append(ctx.bug.TreeTests.List, item)
 		bugTreeTest = &ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1]
 	}

 	if bugTreeTest.Error != "" {
 		const errorRetryTime = 24 * time.Hour * 14
 		result := ctx.ensureRepeatPeriod(bugTreeTest.Error, errorRetryTime)
 		if _, ok := result.(pollResultSkip); !ok {
 			return result
 		}
 		bugTreeTest.Error = ""
 	}
 	if bugTreeTest.Last != "" {
 		const fixRetryTime = 24 * time.Hour * 45
 		result := ctx.ensureRepeatPeriod(bugTreeTest.Last, fixRetryTime)
 		if _, ok := result.(pollResultSkip); !ok {
 			return result
 		}
 	}
 	var err error
 	ctx.job, ctx.jobKey, err = addTestJob(ctx.c, &testJobArgs{
 		crash:         ctx.crash,
 		crashKey:      ctx.crashKey,
 		configRef:     ctx.build.KernelConfig,
 		configAppend:  repo.AppendConfig,
 		inTransaction: true,
 		treeOrigin:    true,
 		testReqArgs: testReqArgs{
 			bug:             ctx.bug,
 			bugKey:          ctx.bugKey,
 			repo:            bugTreeTest.Repo,
 			branch:          bugTreeTest.Branch,
 			mergeBaseRepo:   bugTreeTest.MergeBaseRepo,
 			mergeBaseBranch: bugTreeTest.MergeBaseBranch,
 		},
 	})
 	if err != nil {
 		return pollResultError(err)
 	}
 	bugTreeTest.Pending = ctx.jobKey.Encode()
 	return pollResultPending{}
 }

 func (ctx *bugTreeContext) ensureRepeatPeriod(jobKey string, period time.Duration) pollTreeJobResult {
 	job, _, err := fetchJob(ctx.c, jobKey)
 	if err != nil {
 		return pollResultError(err)
 	}
 	timePassed := timeNow(ctx.c).Sub(job.Finished)
 	if timePassed < period {
 		return pollResultWait(job.Finished.Add(period))
 	}
 	return pollResultSkip{}
 }

 func (bug *Bug) findResult(c context.Context,
 	repo KernelRepo, result expectedResult, runOn runReproOn) (pollTreeJobResult, *Job) {
 	anyPending := false
 	for _, i := range bug.matchingTreeTests(repo, runOn) {
 		info := &bug.TreeTests.List[i]
 		anyPending = anyPending || info.Pending != ""
 		key := ""
 		switch result.(type) {
 		case wantFirstOK:
 			key = info.FirstOK
 		case wantFirstCrash:
 			key = info.FirstCrash
 		case wantFirstAny:
 			key = info.First
 		case wantNewAny:
 			key = info.Last
 		default:
 			return pollResultError(fmt.Errorf("unexpected expected result: %T", result)), nil
 		}
 		if key == "" {
 			continue
 		}
 		job, _, err := fetchJob(c, key)
 		if err != nil {
 			return pollResultError(err), nil
 		}
 		if date, ok := result.(wantNewAny); ok {
 			if job.Finished.Before(time.Time(date)) {
 				continue
 			}
 		}
 		return pollResultDone{
 			Crashed:  job.CrashTitle != "",
 			Finished: job.Finished,
 		}, job
 	}
 	if anyPending {
 		return pollResultPending{}, nil
 	} else {
 		return pollResultSkip{}, nil
 	}
 }

 func (bug *Bug) matchingTreeTests(repo KernelRepo, runOn runReproOn) []int {
 	ret := []int{}
 	for i, item := range bug.TreeTests.List {
 		if item.Repo != repo.URL {
 			continue
 		}
 		ok := true
 		switch v := runOn.(type) {
 		case runOnHEAD:
 			// TODO: should we check for an empty merge base here?
 			ok = item.Branch == repo.Branch
 		case runOnMergeBase:
 			ok = item.Branch == repo.Branch &&
 				item.MergeBaseRepo == v.Repo &&
 				item.MergeBaseBranch == v.Branch
 		}
 		if ok {
 			ret = append(ret, i)
 		}
 	}
 	return ret
 }

 func (ctx *bugTreeContext) loadCrashInfo() error {
 	// First look at the crash from previous tests.
 	if len(ctx.bug.TreeTests.List) > 0 {
 		crashID := ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1].CrashID
 		crashKey := db.NewKey(ctx.c, "Crash", "", crashID, ctx.bugKey)
 		crash := new(Crash)
 		// We need to also tolerate the case when the crash was just deleted.
 		err := db.Get(ctx.cGlobal, crashKey, crash)
 		if err != nil && err != db.ErrNoSuchEntity {
 			return fmt.Errorf("failed to get crash: %w", err)
 		} else if err == nil {
 			ok, build, err := ctx.isCrashRelevant(crash)
 			if err != nil {
 				return err
 			}
 			if ok {
 				ctx.build = build
 				ctx.crash = crash
 				ctx.crashKey = crashKey
 			}
 		}
 	}

 	// Query the most relevant crash with repro.
 	crash, crashKey, err := findCrashForBug(ctx.cGlobal, ctx.bug)
 	if err != nil {
 		return err
 	}
 	ok, build, err := ctx.isCrashRelevant(crash)
 	if err != nil {
 		return err
 	} else if ok && (ctx.crash == nil || crash.ReportLen > ctx.crash.ReportLen) {
 		// Update the crash only if we found a better one.
 		ctx.build = build
 		ctx.crash = crash
 		ctx.crashKey = crashKey
 	}
 	// Load the rest of the data.
 	if ctx.crash != nil {
 		var err error
 		ns := ctx.bug.Namespace
 		repoGraph, err := makeRepoGraph(getNsConfig(ctx.c, ns).Repos)
 		if err != nil {
 			return err
 		}
 		ctx.repoNode = repoGraph.nodeByRepo(ctx.build.KernelRepo, ctx.build.KernelBranch)
 	}
 	return nil
 }

 func (ctx *bugTreeContext) isCrashRelevant(crash *Crash) (bool, *Build, error) {
 	if crash.ReproIsRevoked {
 		// No sense in running the reproducer.
 		return false, nil, nil
 	} else if crash.ReproC == 0 && crash.ReproSyz == 0 {
 		// Let's wait for the repro.
 		return false, nil, nil
 	}
 	newManager, _ := activeManager(ctx.cGlobal, crash.Manager, ctx.bug.Namespace)
 	if newManager != crash.Manager {
 		// The manager was deprecated since the crash.
 		// Let's just ignore such bugs for now.
 		return false, nil, nil
 	}
 	build, err := loadBuild(ctx.cGlobal, ctx.bug.Namespace, crash.BuildID)
 	if err != nil {
 		return false, nil, err
 	}
 	mgrBuild, err := lastManagerBuild(ctx.cGlobal, build.Namespace, newManager)
 	if err != nil {
 		return false, build, err
 	}
 	// It does happen that we sometimes update the tested tree.
 	// It's not frequent at all, but it will make all results very confusing.
 	return build.KernelRepo == mgrBuild.KernelRepo &&
 		build.KernelBranch == mgrBuild.KernelBranch, build, nil
 }

 func (test *BugTreeTest) applyPending(c context.Context) error {
 	if test.Pending == "" {
 		return nil
 	}
 	job, _, err := fetchJob(c, test.Pending)
 	if err != nil {
 		return err
 	}
 	if job.Finished.IsZero() {
 		// Not yet ready.
 		return nil
 	}
 	pendingKey := test.Pending
 	test.Pending = ""
 	if job.Error != 0 {
 		test.Error = pendingKey
 		return nil
 	}
 	test.Last = pendingKey
 	if test.First == "" {
 		test.First = pendingKey
 	}
 	if test.FirstOK == "" && job.CrashTitle == "" {
 		test.FirstOK = pendingKey
 	} else if test.FirstCrash == "" && job.CrashTitle != "" {
 		test.FirstCrash = pendingKey
 	}
 	return nil
 }

 // treeTestJobs fetches relevant tree testing results.
 func treeTestJobs(c context.Context, bug *Bug) ([]*dashapi.JobInfo, error) {
 	g, _ := errgroup.WithContext(context.Background())
 	jobIDs := make(chan string)

 	var ret []*dashapi.JobInfo
 	var mu sync.Mutex

 	// The underlying code makes a number of queries, so let's do it in parallel to speed up processing.
 	const threads = 3
 	for i := 0; i < threads; i++ {
 		g.Go(func() error {
 			for id := range jobIDs {
 				job, jobKey, err := fetchJob(c, id)
 				if err != nil {
 					return err
 				}
 				build, err := loadBuild(c, job.Namespace, job.BuildID)
 				if err != nil {
 					return err
 				}
 				crashKey := db.NewKey(c, "Crash", "", job.CrashID, bug.key(c))
 				crash := new(Crash)
 				if err := db.Get(c, crashKey, crash); err != nil {
 					return fmt.Errorf("failed to get crash: %w", err)
 				}
 				info := makeJobInfo(c, job, jobKey, bug, build, crash)
 				mu.Lock()
 				ret = append(ret, info)
 				mu.Unlock()
 			}
 			return nil
 		})
 	}
 	for _, info := range bug.TreeTests.List {
 		if info.FirstOK != "" {
 			jobIDs <- info.FirstOK
 		}
 		if info.FirstCrash != "" {
 			jobIDs <- info.FirstCrash
 		}
 		if info.Error != "" {
 			jobIDs <- info.Error
 		}
 	}
 	// Wait until we have all information.
 	close(jobIDs)
 	err := g.Wait()
 	if err != nil {
 		return nil, err
 	}
 	// Sort structures to keep output consistent.
 	sort.Slice(ret, func(i, j int) bool {
 		if ret[i].KernelAlias != ret[j].KernelAlias {
 			return ret[i].KernelAlias < ret[j].KernelAlias
 		}
 		return ret[i].Finished.Before(ret[j].Finished)
 	})
 	return ret, nil
 }

 // Create a cross-tree bisection job (if needed).
 // Returns:
 // a) Job object and its key -- in case of success.
 // b) Whether the lookup was expensive (it can help optimize crossTreeBisection calls).
 func crossTreeBisection(c context.Context, bug *Bug,
 	managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, bool, error) {
 	repoGraph, err := makeRepoGraph(getNsConfig(c, bug.Namespace).Repos)
 	if err != nil {
 		return nil, nil, false, err
 	}
 	bugJobs := &lazyJobList{
 		c:       c,
 		bug:     bug,
 		jobType: JobBisectFix,
 	}
 	var job *Job
 	var jobKey *db.Key
 	expensive := false
 	err = repoGraph.forEachEdge(func(from, to *repoNode, info KernelRepoLink) error {
 		if jobKey != nil {
 			return nil
 		}
 		if !info.BisectFixes {
 			return nil
 		}
 		expensive = true
 		log.Infof(c, "%s: considering cross-tree bisection %s/%s",
 			bug.displayTitle(), from.repo.Alias, to.repo.Alias)
 		_, crashJob := bug.findResult(c, to.repo, wantNewAny{}, runOnHEAD{})
 		if crashJob == nil {
 			// No patch testing was performed yet.
 			return nil
 		}
 		if crashJob.CrashTitle == "" {
 			// The bug is already fixed on the target tree.
 			return nil
 		}
 		crashBuild, err := loadBuild(c, bug.Namespace, crashJob.BuildID)
 		if err != nil {
 			return err
 		}
 		manager, _ := activeManager(c, crashJob.Manager, crashJob.Namespace)
 		if !managers[manager].BisectFix {
 			return nil
 		}
 		_, successJob := bug.findResult(c, from.repo, wantNewAny{}, runOnHEAD{})
 		if successJob == nil {
 			// The jobs is not done yet.
 			return nil
 		}
 		if successJob.CrashTitle != "" {
 			// The kernel tree is still crashed by the repro.
 			return nil
 		}
 		newJob := &Job{
 			Type:            JobBisectFix,
 			Created:         timeNow(c),
 			Namespace:       bug.Namespace,
 			Manager:         crashJob.Manager,
 			BisectFrom:      crashBuild.KernelCommit,
 			KernelRepo:      from.repo.URL,
 			KernelBranch:    from.repo.Branch,
 			MergeBaseRepo:   to.repo.URL,
 			MergeBaseBranch: to.repo.Branch,
 			BugTitle:        bug.displayTitle(),
 			CrashID:         crashJob.CrashID,
 		}
 		// It's expected that crossTreeBisection is not concurrently called with the same
 		// manager list.
 		prevJob, err := bugJobs.lastMatch(newJob)
 		if err != nil {
 			return err
 		}
 		const repeatPeriod = time.Hour * 24 * 30
 		if prevJob != nil && (prevJob.Error == 0 ||
 			prevJob.Finished.After(timeNow(c).Add(-repeatPeriod))) {
 			// The job is already pending or failed recently. Skip.
 			return nil
 		}
 		job = newJob
 		jobKey, err = saveJob(c, newJob, bug.key(c))
 		return err
 	})
 	return job, jobKey, expensive, err
 }

 type lazyJobList struct {
 	c       context.Context
 	bug     *Bug
 	jobType JobType
 	jobs    *bugJobs
 }

 func (list *lazyJobList) lastMatch(job *Job) (*Job, error) {
 	if list.jobs == nil {
 		var err error
 		list.jobs, err = queryBugJobs(list.c, list.bug, list.jobType)
 		if err != nil {
 			return nil, err
 		}
 	}
 	var best *Job
 	for _, item := range list.jobs.all() {
 		otherJob := item.job
 		same := otherJob.Manager == job.Manager &&
 			otherJob.KernelRepo == job.KernelRepo &&
 			otherJob.KernelBranch == job.KernelBranch &&
 			otherJob.CrashID == job.CrashID &&
 			otherJob.MergeBaseRepo == job.MergeBaseRepo &&
 			otherJob.MergeBaseBranch == job.MergeBaseBranch
 		if !same {
 			continue
 		}
 		if best == nil || best.Created.Before(otherJob.Created) {
 			best = otherJob
 		}
 	}
 	return best, nil
 }

 func doneCrossTreeBisection(c context.Context, jobKey *db.Key, job *Job) error {
 	if job.Type != JobBisectFix || job.MergeBaseRepo == "" {
 		// Not a cross tree bisection.
 		return nil
 	}
 	if job.Error != 0 || job.isUnreliableBisect() || len(job.Commits) != 1 {
 		// The result is not interesting.
 		return nil
 	}
 	return updateSingleBug(c, jobKey.Parent(), func(bug *Bug) error {
 		bug.FixCandidateJob = jobKey.Encode()
 		return nil
 	})
 }

 type repoNode struct {
 	repo  KernelRepo
 	edges []repoEdge
 }

 type repoEdge struct {
 	in    bool
 	info  KernelRepoLink
 	other *repoNode
 }

 type repoGraph struct {
 	nodes map[string]*repoNode
 }

 func makeRepoGraph(repos []KernelRepo) (*repoGraph, error) {
 	g := &repoGraph{
 		nodes: map[string]*repoNode{},
 	}
 	for _, repo := range repos {
 		if repo.Alias == "" {
 			return nil, fmt.Errorf("one of the repos has an empty alias")
 		}
 		g.nodes[repo.Alias] = &repoNode{repo: repo}
 	}
 	for _, repo := range repos {
 		for _, link := range repo.CommitInflow {
 			if g.nodes[link.Alias] == nil {
 				return nil, fmt.Errorf("no repo with alias %q", link.Alias)
 			}
 			g.nodes[repo.Alias].addEdge(true, link, g.nodes[link.Alias])
 			g.nodes[link.Alias].addEdge(false, link, g.nodes[repo.Alias])
 		}
 	}
 	for alias, node := range g.nodes {
 		reachable := node.reachable(true)
 		if _, ok := reachable[node]; ok {
 			return nil, fmt.Errorf("%q lies on a cycle", alias)
 		}
 	}
 	return g, nil
 }

 func (g *repoGraph) nodeByRepo(url, branch string) *repoNode {
 	for _, node := range g.nodes {
 		if node.repo.URL == url && node.repo.Branch == branch {
 			return node
 		}
 	}
 	return nil
 }

 func (g *repoGraph) nodeByAlias(alias string) *repoNode {
 	for _, node := range g.nodes {
 		if node.repo.Alias == alias {
 			return node
 		}
 	}
 	return nil
 }

 func (g *repoGraph) forEachEdge(cb func(from, to *repoNode, info KernelRepoLink) error) error {
 	for _, node := range g.nodes {
 		for _, e := range node.edges {
 			if !e.in {
 				continue
 			}
 			err := cb(e.other, node, e.info)
 			if err != nil {
 				return err
 			}
 		}
 	}
 	return nil
 }

 // reachable returns a map *repoNode -> bool (whether commits are merged).
 func (n *repoNode) reachable(in bool) map[*repoNode]bool {
 	ret := map[*repoNode]bool{}
 	// First collect nodes only reachable via merge=true links.
 	n.reachableMerged(in, true, ret)
 	n.reachableMerged(in, false, ret)
 	return ret
 }

 func (n *repoNode) reachableMerged(in, onlyMerge bool, ret map[*repoNode]bool) {
 	var dfs func(*repoNode, bool)
 	dfs = func(node *repoNode, merge bool) {
 		for _, edge := range node.edges {
 			if edge.in != in || onlyMerge && !edge.info.Merge {
 				continue
 			}
 			if _, ok := ret[edge.other]; ok {
 				continue
 			}
 			ret[edge.other] = merge && edge.info.Merge
 			dfs(edge.other, merge && edge.info.Merge)
 		}
 	}
 	dfs(n, true)
 }

 func (n *repoNode) allReachable() map[*repoNode]bool {
 	ret := n.reachable(true)
 	for node, merge := range n.reachable(false) {
 		ret[node] = merge
 	}
 	return ret
 }

 func (n *repoNode) addEdge(in bool, info KernelRepoLink, other *repoNode) {
 	n.edges = append(n.edges, repoEdge{
 		in:    in,
 		info:  info,
 		other: other,
 	})
 }