| // Copyright 2015 syzkaller project authors. All rights reserved. |
| // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "bytes" |
| "encoding/json" |
| "flag" |
| "fmt" |
| "math/rand" |
| "net" |
| "os" |
| "os/exec" |
| "path/filepath" |
| "strings" |
| "sync" |
| "sync/atomic" |
| "time" |
| |
| "github.com/google/syzkaller/dashboard/dashapi" |
| "github.com/google/syzkaller/pkg/cover" |
| "github.com/google/syzkaller/pkg/csource" |
| "github.com/google/syzkaller/pkg/db" |
| "github.com/google/syzkaller/pkg/gce" |
| "github.com/google/syzkaller/pkg/hash" |
| "github.com/google/syzkaller/pkg/instance" |
| "github.com/google/syzkaller/pkg/log" |
| "github.com/google/syzkaller/pkg/mgrconfig" |
| "github.com/google/syzkaller/pkg/osutil" |
| "github.com/google/syzkaller/pkg/report" |
| "github.com/google/syzkaller/pkg/repro" |
| "github.com/google/syzkaller/pkg/rpctype" |
| "github.com/google/syzkaller/pkg/signal" |
| "github.com/google/syzkaller/prog" |
| "github.com/google/syzkaller/sys" |
| "github.com/google/syzkaller/sys/targets" |
| "github.com/google/syzkaller/vm" |
| ) |
| |
| var ( |
| flagConfig = flag.String("config", "", "configuration file") |
| flagDebug = flag.Bool("debug", false, "dump all VM output to console") |
| flagBench = flag.String("bench", "", "write execution statistics into this file periodically") |
| ) |
| |
| type Manager struct { |
| cfg *mgrconfig.Config |
| vmPool *vm.Pool |
| target *prog.Target |
| sysTarget *targets.Target |
| reporter report.Reporter |
| crashdir string |
| port int |
| corpusDB *db.DB |
| startTime time.Time |
| firstConnect time.Time |
| fuzzingTime time.Duration |
| stats *Stats |
| crashTypes map[string]bool |
| vmStop chan bool |
| checkResult *rpctype.CheckArgs |
| fresh bool |
| numFuzzing uint32 |
| numReproducing uint32 |
| |
| dash *dashapi.Dashboard |
| |
| mu sync.Mutex |
| phase int |
| enabledSyscalls []int |
| |
| candidates []rpctype.RPCCandidate // untriaged inputs from corpus and hub |
| disabledHashes map[string]struct{} |
| corpus map[string]rpctype.RPCInput |
| newRepros [][]byte |
| lastMinCorpus int |
| memoryLeakFrames map[string]bool |
| |
| needMoreRepros chan chan bool |
| hubReproQueue chan *Crash |
| reproRequest chan chan map[string]bool |
| |
| // For checking that files that we are using are not changing under us. |
| // Maps file name to modification time. |
| usedFiles map[string]time.Time |
| } |
| |
| const ( |
| // Just started, nothing done yet. |
| phaseInit = iota |
| // Corpus is loaded and machine is checked. |
| phaseLoadedCorpus |
| // Triaged all inputs from corpus. |
| // This is when we start querying hub and minimizing persistent corpus. |
| phaseTriagedCorpus |
| // Done the first request to hub. |
| phaseQueriedHub |
| // Triaged all new inputs from hub. |
| // This is when we start reproducing crashes. |
| phaseTriagedHub |
| ) |
| |
| const currentDBVersion = 3 |
| |
| type Crash struct { |
| vmIndex int |
| hub bool // this crash was created based on a repro from hub |
| *report.Report |
| } |
| |
| func main() { |
| if sys.GitRevision == "" { |
| log.Fatalf("Bad syz-manager build. Build with make, run bin/syz-manager.") |
| } |
| flag.Parse() |
| log.EnableLogCaching(1000, 1<<20) |
| cfg, err := mgrconfig.LoadFile(*flagConfig) |
| if err != nil { |
| log.Fatalf("%v", err) |
| } |
| target, err := prog.GetTarget(cfg.TargetOS, cfg.TargetArch) |
| if err != nil { |
| log.Fatalf("%v", err) |
| } |
| sysTarget := targets.Get(cfg.TargetOS, cfg.TargetArch) |
| if sysTarget == nil { |
| log.Fatalf("unsupported OS/arch: %v/%v", cfg.TargetOS, cfg.TargetArch) |
| } |
| syscalls, err := mgrconfig.ParseEnabledSyscalls(target, cfg.EnabledSyscalls, cfg.DisabledSyscalls) |
| if err != nil { |
| log.Fatalf("%v", err) |
| } |
| RunManager(cfg, target, sysTarget, syscalls) |
| } |
| |
| func RunManager(cfg *mgrconfig.Config, target *prog.Target, sysTarget *targets.Target, syscalls []int) { |
| var vmPool *vm.Pool |
| // Type "none" is a special case for debugging/development when manager |
| // does not start any VMs, but instead you start them manually |
| // and start syz-fuzzer there. |
| if cfg.Type != "none" { |
| var err error |
| vmPool, err = vm.Create(cfg, *flagDebug) |
| if err != nil { |
| log.Fatalf("%v", err) |
| } |
| } |
| |
| crashdir := filepath.Join(cfg.Workdir, "crashes") |
| osutil.MkdirAll(crashdir) |
| |
| reporter, err := report.NewReporter(cfg) |
| if err != nil { |
| log.Fatalf("%v", err) |
| } |
| |
| mgr := &Manager{ |
| cfg: cfg, |
| vmPool: vmPool, |
| target: target, |
| sysTarget: sysTarget, |
| reporter: reporter, |
| crashdir: crashdir, |
| startTime: time.Now(), |
| stats: new(Stats), |
| crashTypes: make(map[string]bool), |
| enabledSyscalls: syscalls, |
| corpus: make(map[string]rpctype.RPCInput), |
| disabledHashes: make(map[string]struct{}), |
| memoryLeakFrames: make(map[string]bool), |
| fresh: true, |
| vmStop: make(chan bool), |
| hubReproQueue: make(chan *Crash, 10), |
| needMoreRepros: make(chan chan bool), |
| reproRequest: make(chan chan map[string]bool), |
| usedFiles: make(map[string]time.Time), |
| } |
| |
| log.Logf(0, "loading corpus...") |
| mgr.corpusDB, err = db.Open(filepath.Join(cfg.Workdir, "corpus.db")) |
| if err != nil { |
| log.Fatalf("failed to open corpus database: %v", err) |
| } |
| |
| // Create HTTP server. |
| mgr.initHTTP() |
| mgr.collectUsedFiles() |
| |
| // Create RPC server for fuzzers. |
| mgr.port, err = startRPCServer(mgr) |
| if err != nil { |
| log.Fatalf("failed to create rpc server: %v", err) |
| } |
| |
| if cfg.DashboardAddr != "" { |
| mgr.dash = dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey) |
| } |
| |
| go func() { |
| for lastTime := time.Now(); ; { |
| time.Sleep(10 * time.Second) |
| now := time.Now() |
| diff := now.Sub(lastTime) |
| lastTime = now |
| mgr.mu.Lock() |
| if mgr.firstConnect.IsZero() { |
| mgr.mu.Unlock() |
| continue |
| } |
| mgr.fuzzingTime += diff * time.Duration(atomic.LoadUint32(&mgr.numFuzzing)) |
| executed := mgr.stats.execTotal.get() |
| crashes := mgr.stats.crashes.get() |
| signal := mgr.stats.corpusSignal.get() |
| mgr.mu.Unlock() |
| numReproducing := atomic.LoadUint32(&mgr.numReproducing) |
| numFuzzing := atomic.LoadUint32(&mgr.numFuzzing) |
| |
| log.Logf(0, "VMs %v, executed %v, cover %v, crashes %v, repro %v", |
| numFuzzing, executed, signal, crashes, numReproducing) |
| } |
| }() |
| |
| if *flagBench != "" { |
| f, err := os.OpenFile(*flagBench, os.O_WRONLY|os.O_CREATE|os.O_EXCL, osutil.DefaultFilePerm) |
| if err != nil { |
| log.Fatalf("failed to open bench file: %v", err) |
| } |
| go func() { |
| for { |
| time.Sleep(time.Minute) |
| vals := mgr.stats.all() |
| mgr.mu.Lock() |
| if mgr.firstConnect.IsZero() { |
| mgr.mu.Unlock() |
| continue |
| } |
| mgr.minimizeCorpus() |
| vals["corpus"] = uint64(len(mgr.corpus)) |
| vals["uptime"] = uint64(time.Since(mgr.firstConnect)) / 1e9 |
| vals["fuzzing"] = uint64(mgr.fuzzingTime) / 1e9 |
| mgr.mu.Unlock() |
| |
| data, err := json.MarshalIndent(vals, "", " ") |
| if err != nil { |
| log.Fatalf("failed to serialize bench data") |
| } |
| if _, err := f.Write(append(data, '\n')); err != nil { |
| log.Fatalf("failed to write bench data") |
| } |
| } |
| }() |
| } |
| |
| if mgr.dash != nil { |
| go mgr.dashboardReporter() |
| } |
| |
| osutil.HandleInterrupts(vm.Shutdown) |
| if mgr.vmPool == nil { |
| log.Logf(0, "no VMs started (type=none)") |
| log.Logf(0, "you are supposed to start syz-fuzzer manually as:") |
| log.Logf(0, "syz-fuzzer -manager=manager.ip:%v [other flags as necessary]", mgr.port) |
| <-vm.Shutdown |
| return |
| } |
| mgr.vmLoop() |
| } |
| |
| type RunResult struct { |
| idx int |
| crash *Crash |
| err error |
| } |
| |
| type ReproResult struct { |
| instances []int |
| title0 string |
| res *repro.Result |
| stats *repro.Stats |
| err error |
| hub bool // repro came from hub |
| } |
| |
| // Manager needs to be refactored (#605). |
| // nolint: gocyclo |
| func (mgr *Manager) vmLoop() { |
| log.Logf(0, "booting test machines...") |
| log.Logf(0, "wait for the connection from test machine...") |
| instancesPerRepro := 4 |
| vmCount := mgr.vmPool.Count() |
| if instancesPerRepro > vmCount { |
| instancesPerRepro = vmCount |
| } |
| instances := make([]int, vmCount) |
| for i := range instances { |
| instances[i] = vmCount - i - 1 |
| } |
| runDone := make(chan *RunResult, 1) |
| pendingRepro := make(map[*Crash]bool) |
| reproducing := make(map[string]bool) |
| reproInstances := 0 |
| var reproQueue []*Crash |
| reproDone := make(chan *ReproResult, 1) |
| stopPending := false |
| shutdown := vm.Shutdown |
| for shutdown != nil || len(instances) != vmCount { |
| mgr.mu.Lock() |
| phase := mgr.phase |
| mgr.mu.Unlock() |
| |
| for crash := range pendingRepro { |
| if reproducing[crash.Title] { |
| continue |
| } |
| delete(pendingRepro, crash) |
| if !mgr.needRepro(crash) { |
| continue |
| } |
| log.Logf(1, "loop: add to repro queue '%v'", crash.Title) |
| reproducing[crash.Title] = true |
| reproQueue = append(reproQueue, crash) |
| } |
| |
| log.Logf(1, "loop: phase=%v shutdown=%v instances=%v/%v %+v repro: pending=%v reproducing=%v queued=%v", |
| phase, shutdown == nil, len(instances), vmCount, instances, |
| len(pendingRepro), len(reproducing), len(reproQueue)) |
| |
| canRepro := func() bool { |
| return phase >= phaseTriagedHub && |
| len(reproQueue) != 0 && reproInstances+instancesPerRepro <= vmCount |
| } |
| |
| if shutdown != nil { |
| for canRepro() && len(instances) >= instancesPerRepro { |
| last := len(reproQueue) - 1 |
| crash := reproQueue[last] |
| reproQueue[last] = nil |
| reproQueue = reproQueue[:last] |
| vmIndexes := append([]int{}, instances[len(instances)-instancesPerRepro:]...) |
| instances = instances[:len(instances)-instancesPerRepro] |
| reproInstances += instancesPerRepro |
| atomic.AddUint32(&mgr.numReproducing, 1) |
| log.Logf(1, "loop: starting repro of '%v' on instances %+v", crash.Title, vmIndexes) |
| go func() { |
| res, stats, err := repro.Run(crash.Output, mgr.cfg, mgr.reporter, mgr.vmPool, vmIndexes) |
| reproDone <- &ReproResult{vmIndexes, crash.Title, res, stats, err, crash.hub} |
| }() |
| } |
| for !canRepro() && len(instances) != 0 { |
| last := len(instances) - 1 |
| idx := instances[last] |
| instances = instances[:last] |
| log.Logf(1, "loop: starting instance %v", idx) |
| go func() { |
| crash, err := mgr.runInstance(idx) |
| runDone <- &RunResult{idx, crash, err} |
| }() |
| } |
| } |
| |
| var stopRequest chan bool |
| if !stopPending && canRepro() { |
| stopRequest = mgr.vmStop |
| } |
| |
| wait: |
| select { |
| case stopRequest <- true: |
| log.Logf(1, "loop: issued stop request") |
| stopPending = true |
| case res := <-runDone: |
| log.Logf(1, "loop: instance %v finished, crash=%v", res.idx, res.crash != nil) |
| if res.err != nil && shutdown != nil { |
| log.Logf(0, "%v", res.err) |
| } |
| stopPending = false |
| instances = append(instances, res.idx) |
| // On shutdown qemu crashes with "qemu: terminating on signal 2", |
| // which we detect as "lost connection". Don't save that as crash. |
| if shutdown != nil && res.crash != nil { |
| needRepro := mgr.saveCrash(res.crash) |
| if needRepro { |
| log.Logf(1, "loop: add pending repro for '%v'", res.crash.Title) |
| pendingRepro[res.crash] = true |
| } |
| } |
| case res := <-reproDone: |
| atomic.AddUint32(&mgr.numReproducing, ^uint32(0)) |
| crepro := false |
| title := "" |
| if res.res != nil { |
| crepro = res.res.CRepro |
| title = res.res.Report.Title |
| } |
| log.Logf(1, "loop: repro on %+v finished '%v', repro=%v crepro=%v desc='%v'", |
| res.instances, res.title0, res.res != nil, crepro, title) |
| if res.err != nil { |
| log.Logf(0, "repro failed: %v", res.err) |
| } |
| delete(reproducing, res.title0) |
| instances = append(instances, res.instances...) |
| reproInstances -= instancesPerRepro |
| if res.res == nil { |
| if !res.hub { |
| mgr.saveFailedRepro(res.title0, res.stats) |
| } |
| } else { |
| mgr.saveRepro(res.res, res.stats, res.hub) |
| } |
| case <-shutdown: |
| log.Logf(1, "loop: shutting down...") |
| shutdown = nil |
| case crash := <-mgr.hubReproQueue: |
| log.Logf(1, "loop: get repro from hub") |
| pendingRepro[crash] = true |
| case reply := <-mgr.needMoreRepros: |
| reply <- phase >= phaseTriagedHub && |
| len(reproQueue)+len(pendingRepro)+len(reproducing) == 0 |
| goto wait |
| case reply := <-mgr.reproRequest: |
| repros := make(map[string]bool) |
| for title := range reproducing { |
| repros[title] = true |
| } |
| reply <- repros |
| goto wait |
| } |
| } |
| } |
| |
| func (mgr *Manager) loadCorpus() { |
| // By default we don't re-minimize/re-smash programs from corpus, |
| // it takes lots of time on start and is unnecessary. |
| // However, on version bumps we can selectively re-minimize/re-smash. |
| minimized, smashed := true, true |
| switch mgr.corpusDB.Version { |
| case 0: |
| // Version 0 had broken minimization, so we need to re-minimize. |
| minimized = false |
| fallthrough |
| case 1: |
| // Version 1->2: memory is preallocated so lots of mmaps become unnecessary. |
| minimized = false |
| fallthrough |
| case 2: |
| // Version 2->3: big-endian hints. |
| smashed = false |
| fallthrough |
| case currentDBVersion: |
| } |
| syscalls := make(map[int]bool) |
| for _, id := range mgr.checkResult.EnabledCalls[mgr.cfg.Sandbox] { |
| syscalls[id] = true |
| } |
| deleted := 0 |
| for key, rec := range mgr.corpusDB.Records { |
| p, err := mgr.target.Deserialize(rec.Val, prog.NonStrict) |
| if err != nil { |
| if deleted < 10 { |
| log.Logf(0, "deleting broken program: %v\n%s", err, rec.Val) |
| } |
| mgr.corpusDB.Delete(key) |
| deleted++ |
| continue |
| } |
| disabled := false |
| for _, c := range p.Calls { |
| if !syscalls[c.Meta.ID] { |
| disabled = true |
| break |
| } |
| } |
| if disabled { |
| // This program contains a disabled syscall. |
| // We won't execute it, but remember its hash so |
| // it is not deleted during minimization. |
| mgr.disabledHashes[hash.String(rec.Val)] = struct{}{} |
| continue |
| } |
| mgr.candidates = append(mgr.candidates, rpctype.RPCCandidate{ |
| Prog: rec.Val, |
| Minimized: minimized, |
| Smashed: smashed, |
| }) |
| } |
| mgr.fresh = len(mgr.corpusDB.Records) == 0 |
| log.Logf(0, "%-24v: %v (%v deleted)", "corpus", len(mgr.candidates), deleted) |
| |
| // Now this is ugly. |
| // We duplicate all inputs in the corpus and shuffle the second part. |
| // This solves the following problem. A fuzzer can crash while triaging candidates, |
| // in such case it will also lost all cached candidates. Or, the input can be somewhat flaky |
| // and doesn't give the coverage on first try. So we give each input the second chance. |
| // Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing. |
| mgr.candidates = append(mgr.candidates, mgr.candidates...) |
| shuffle := mgr.candidates[len(mgr.candidates)/2:] |
| for i := range shuffle { |
| j := i + rand.Intn(len(shuffle)-i) |
| shuffle[i], shuffle[j] = shuffle[j], shuffle[i] |
| } |
| if mgr.phase != phaseInit { |
| panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase)) |
| } |
| mgr.phase = phaseLoadedCorpus |
| } |
| |
| func (mgr *Manager) runInstance(index int) (*Crash, error) { |
| mgr.checkUsedFiles() |
| inst, err := mgr.vmPool.Create(index) |
| if err != nil { |
| return nil, fmt.Errorf("failed to create instance: %v", err) |
| } |
| defer inst.Close() |
| |
| fwdAddr, err := inst.Forward(mgr.port) |
| if err != nil { |
| return nil, fmt.Errorf("failed to setup port forwarding: %v", err) |
| } |
| fuzzerBin, err := inst.Copy(mgr.cfg.SyzFuzzerBin) |
| if err != nil { |
| return nil, fmt.Errorf("failed to copy binary: %v", err) |
| } |
| executorBin, err := inst.Copy(mgr.cfg.SyzExecutorBin) |
| if err != nil { |
| return nil, fmt.Errorf("failed to copy binary: %v", err) |
| } |
| |
| fuzzerV := 0 |
| procs := mgr.cfg.Procs |
| if *flagDebug { |
| fuzzerV = 100 |
| procs = 1 |
| } |
| |
| // Run the fuzzer binary. |
| start := time.Now() |
| atomic.AddUint32(&mgr.numFuzzing, 1) |
| defer atomic.AddUint32(&mgr.numFuzzing, ^uint32(0)) |
| cmd := instance.FuzzerCmd(fuzzerBin, executorBin, fmt.Sprintf("vm-%v", index), |
| mgr.cfg.TargetOS, mgr.cfg.TargetArch, fwdAddr, mgr.cfg.Sandbox, procs, fuzzerV, |
| mgr.cfg.Cover, *flagDebug, false, false) |
| outc, errc, err := inst.Run(time.Hour, mgr.vmStop, cmd) |
| if err != nil { |
| return nil, fmt.Errorf("failed to run fuzzer: %v", err) |
| } |
| |
| rep := inst.MonitorExecution(outc, errc, mgr.reporter, vm.ExitTimeout) |
| if rep == nil { |
| // This is the only "OK" outcome. |
| log.Logf(0, "vm-%v: running for %v, restarting", index, time.Since(start)) |
| return nil, nil |
| } |
| crash := &Crash{ |
| vmIndex: index, |
| hub: false, |
| Report: rep, |
| } |
| return crash, nil |
| } |
| |
| func (mgr *Manager) emailCrash(crash *Crash) { |
| if len(mgr.cfg.EmailAddrs) == 0 { |
| return |
| } |
| args := []string{"-s", "syzkaller: " + crash.Title} |
| args = append(args, mgr.cfg.EmailAddrs...) |
| log.Logf(0, "sending email to %v", mgr.cfg.EmailAddrs) |
| |
| cmd := exec.Command("mailx", args...) |
| cmd.Stdin = bytes.NewReader(crash.Report.Report) |
| if _, err := osutil.Run(10*time.Minute, cmd); err != nil { |
| log.Logf(0, "failed to send email: %v", err) |
| } |
| } |
| |
| func (mgr *Manager) saveCrash(crash *Crash) bool { |
| isMemoryLeak := strings.HasPrefix(crash.Title, report.MemoryLeakPrefix) |
| if isMemoryLeak { |
| frame := crash.Title[len(report.MemoryLeakPrefix):] |
| mgr.mu.Lock() |
| mgr.memoryLeakFrames[frame] = true |
| mgr.mu.Unlock() |
| } |
| if crash.Suppressed { |
| log.Logf(0, "vm-%v: suppressed crash %v", crash.vmIndex, crash.Title) |
| mgr.stats.crashSuppressed.inc() |
| return false |
| } |
| corrupted := "" |
| if crash.Corrupted { |
| corrupted = " [corrupted]" |
| } |
| log.Logf(0, "vm-%v: crash: %v%v", crash.vmIndex, crash.Title, corrupted) |
| if err := mgr.reporter.Symbolize(crash.Report); err != nil { |
| log.Logf(0, "failed to symbolize report: %v", err) |
| } |
| |
| mgr.stats.crashes.inc() |
| mgr.mu.Lock() |
| if !mgr.crashTypes[crash.Title] { |
| mgr.crashTypes[crash.Title] = true |
| mgr.stats.crashTypes.inc() |
| } |
| mgr.mu.Unlock() |
| |
| if mgr.dash != nil { |
| if isMemoryLeak { |
| return true |
| } |
| dc := &dashapi.Crash{ |
| BuildID: mgr.cfg.Tag, |
| Title: crash.Title, |
| Corrupted: crash.Corrupted, |
| Maintainers: crash.Maintainers, |
| Log: crash.Output, |
| Report: crash.Report.Report, |
| } |
| resp, err := mgr.dash.ReportCrash(dc) |
| if err != nil { |
| log.Logf(0, "failed to report crash to dashboard: %v", err) |
| } else { |
| // Don't store the crash locally, if we've successfully |
| // uploaded it to the dashboard. These will just eat disk space. |
| return resp.NeedRepro |
| } |
| } |
| |
| sig := hash.Hash([]byte(crash.Title)) |
| id := sig.String() |
| dir := filepath.Join(mgr.crashdir, id) |
| osutil.MkdirAll(dir) |
| if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(crash.Title+"\n")); err != nil { |
| log.Logf(0, "failed to write crash: %v", err) |
| } |
| // Save up to 100 reports. If we already have 100, overwrite the oldest one. |
| // Newer reports are generally more useful. Overwriting is also needed |
| // to be able to understand if a particular bug still happens or already fixed. |
| oldestI := 0 |
| var oldestTime time.Time |
| for i := 0; i < 100; i++ { |
| info, err := os.Stat(filepath.Join(dir, fmt.Sprintf("log%v", i))) |
| if err != nil { |
| oldestI = i |
| if i == 0 { |
| go mgr.emailCrash(crash) |
| } |
| break |
| } |
| if oldestTime.IsZero() || info.ModTime().Before(oldestTime) { |
| oldestI = i |
| oldestTime = info.ModTime() |
| } |
| } |
| osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("log%v", oldestI)), crash.Output) |
| if len(mgr.cfg.Tag) > 0 { |
| osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("tag%v", oldestI)), []byte(mgr.cfg.Tag)) |
| } |
| if len(crash.Report.Report) > 0 { |
| osutil.WriteFile(filepath.Join(dir, fmt.Sprintf("report%v", oldestI)), crash.Report.Report) |
| } |
| |
| return mgr.needLocalRepro(crash) |
| } |
| |
| const maxReproAttempts = 3 |
| |
| func (mgr *Manager) needLocalRepro(crash *Crash) bool { |
| if !mgr.cfg.Reproduce || crash.Corrupted { |
| return false |
| } |
| sig := hash.Hash([]byte(crash.Title)) |
| dir := filepath.Join(mgr.crashdir, sig.String()) |
| if osutil.IsExist(filepath.Join(dir, "repro.prog")) { |
| return false |
| } |
| for i := 0; i < maxReproAttempts; i++ { |
| if !osutil.IsExist(filepath.Join(dir, fmt.Sprintf("repro%v", i))) { |
| return true |
| } |
| } |
| return false |
| } |
| |
| func (mgr *Manager) needRepro(crash *Crash) bool { |
| if crash.hub { |
| return true |
| } |
| if mgr.dash == nil { |
| return mgr.needLocalRepro(crash) |
| } |
| if strings.HasPrefix(crash.Title, report.MemoryLeakPrefix) { |
| return true |
| } |
| cid := &dashapi.CrashID{ |
| BuildID: mgr.cfg.Tag, |
| Title: crash.Title, |
| Corrupted: crash.Corrupted, |
| } |
| needRepro, err := mgr.dash.NeedRepro(cid) |
| if err != nil { |
| log.Logf(0, "dashboard.NeedRepro failed: %v", err) |
| } |
| return needRepro |
| } |
| |
| func (mgr *Manager) saveFailedRepro(title string, stats *repro.Stats) { |
| if strings.HasPrefix(title, report.MemoryLeakPrefix) { |
| // Don't send failed leak repro attempts to dashboard |
| // as we did not send the crash itself. |
| return |
| } |
| if mgr.dash != nil { |
| cid := &dashapi.CrashID{ |
| BuildID: mgr.cfg.Tag, |
| Title: title, |
| } |
| if err := mgr.dash.ReportFailedRepro(cid); err != nil { |
| log.Logf(0, "failed to report failed repro to dashboard: %v", err) |
| } else { |
| return |
| } |
| } |
| dir := filepath.Join(mgr.crashdir, hash.String([]byte(title))) |
| osutil.MkdirAll(dir) |
| for i := 0; i < maxReproAttempts; i++ { |
| name := filepath.Join(dir, fmt.Sprintf("repro%v", i)) |
| if !osutil.IsExist(name) { |
| saveReproStats(name, stats) |
| break |
| } |
| } |
| } |
| |
| func (mgr *Manager) saveRepro(res *repro.Result, stats *repro.Stats, hub bool) { |
| rep := res.Report |
| if err := mgr.reporter.Symbolize(rep); err != nil { |
| log.Logf(0, "failed to symbolize repro: %v", err) |
| } |
| opts := fmt.Sprintf("# %+v\n", res.Opts) |
| prog := res.Prog.Serialize() |
| |
| // Append this repro to repro list to send to hub if it didn't come from hub originally. |
| if !hub { |
| progForHub := []byte(fmt.Sprintf("# %+v\n# %v\n# %v\n%s", |
| res.Opts, res.Report.Title, mgr.cfg.Tag, prog)) |
| mgr.mu.Lock() |
| mgr.newRepros = append(mgr.newRepros, progForHub) |
| mgr.mu.Unlock() |
| } |
| |
| var cprogText []byte |
| if res.CRepro { |
| cprog, err := csource.Write(res.Prog, res.Opts) |
| if err == nil { |
| formatted, err := csource.Format(cprog) |
| if err == nil { |
| cprog = formatted |
| } |
| cprogText = cprog |
| } else { |
| log.Logf(0, "failed to write C source: %v", err) |
| } |
| } |
| |
| if mgr.dash != nil { |
| // Note: we intentionally don't set Corrupted for reproducers: |
| // 1. This is reproducible so can be debugged even with corrupted report. |
| // 2. Repro re-tried 3 times and still got corrupted report at the end, |
| // so maybe corrupted report detection is broken. |
| // 3. Reproduction is expensive so it's good to persist the result. |
| dc := &dashapi.Crash{ |
| BuildID: mgr.cfg.Tag, |
| Title: res.Report.Title, |
| Maintainers: res.Report.Maintainers, |
| Log: res.Report.Output, |
| Report: res.Report.Report, |
| ReproOpts: res.Opts.Serialize(), |
| ReproSyz: res.Prog.Serialize(), |
| ReproC: cprogText, |
| } |
| if _, err := mgr.dash.ReportCrash(dc); err != nil { |
| log.Logf(0, "failed to report repro to dashboard: %v", err) |
| } else { |
| // Don't store the crash locally, if we've successfully |
| // uploaded it to the dashboard. These will just eat disk space. |
| return |
| } |
| } |
| |
| dir := filepath.Join(mgr.crashdir, hash.String([]byte(rep.Title))) |
| osutil.MkdirAll(dir) |
| |
| if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(rep.Title+"\n")); err != nil { |
| log.Logf(0, "failed to write crash: %v", err) |
| } |
| osutil.WriteFile(filepath.Join(dir, "repro.prog"), append([]byte(opts), prog...)) |
| if len(mgr.cfg.Tag) > 0 { |
| osutil.WriteFile(filepath.Join(dir, "repro.tag"), []byte(mgr.cfg.Tag)) |
| } |
| if len(rep.Output) > 0 { |
| osutil.WriteFile(filepath.Join(dir, "repro.log"), rep.Output) |
| } |
| if len(rep.Report) > 0 { |
| osutil.WriteFile(filepath.Join(dir, "repro.report"), rep.Report) |
| } |
| if len(cprogText) > 0 { |
| osutil.WriteFile(filepath.Join(dir, "repro.cprog"), cprogText) |
| } |
| saveReproStats(filepath.Join(dir, "repro.stats"), stats) |
| } |
| |
| func saveReproStats(filename string, stats *repro.Stats) { |
| text := "" |
| if stats != nil { |
| text = fmt.Sprintf("Extracting prog: %v\nMinimizing prog: %v\n"+ |
| "Simplifying prog options: %v\nExtracting C: %v\nSimplifying C: %v\n\n\n%s", |
| stats.ExtractProgTime, stats.MinimizeProgTime, |
| stats.SimplifyProgTime, stats.ExtractCTime, stats.SimplifyCTime, stats.Log) |
| } |
| osutil.WriteFile(filename, []byte(text)) |
| } |
| |
| func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) { |
| mgr.mu.Lock() |
| defer mgr.mu.Unlock() |
| mgr.minimizeCorpus() |
| corpus = make([][]byte, 0, len(mgr.corpus)) |
| for _, inp := range mgr.corpus { |
| corpus = append(corpus, inp.Prog) |
| } |
| repros = mgr.newRepros |
| mgr.newRepros = nil |
| return |
| } |
| |
| func (mgr *Manager) addNewCandidates(progs [][]byte) { |
| candidates := make([]rpctype.RPCCandidate, len(progs)) |
| for i, inp := range progs { |
| candidates[i] = rpctype.RPCCandidate{ |
| Prog: inp, |
| Minimized: false, // don't trust programs from hub |
| Smashed: false, |
| } |
| } |
| mgr.mu.Lock() |
| defer mgr.mu.Unlock() |
| mgr.candidates = append(mgr.candidates, candidates...) |
| if mgr.phase == phaseTriagedCorpus { |
| mgr.phase = phaseQueriedHub |
| } |
| } |
| |
| func (mgr *Manager) minimizeCorpus() { |
| if mgr.phase < phaseLoadedCorpus || len(mgr.corpus) <= mgr.lastMinCorpus*101/100 { |
| return |
| } |
| inputs := make([]signal.Context, 0, len(mgr.corpus)) |
| for _, inp := range mgr.corpus { |
| inputs = append(inputs, signal.Context{ |
| Signal: inp.Signal.Deserialize(), |
| Context: inp, |
| }) |
| } |
| newCorpus := make(map[string]rpctype.RPCInput) |
| for _, ctx := range signal.Minimize(inputs) { |
| inp := ctx.(rpctype.RPCInput) |
| newCorpus[hash.String(inp.Prog)] = inp |
| } |
| log.Logf(1, "minimized corpus: %v -> %v", len(mgr.corpus), len(newCorpus)) |
| mgr.corpus = newCorpus |
| mgr.lastMinCorpus = len(newCorpus) |
| |
| // Don't minimize persistent corpus until fuzzers have triaged all inputs from it. |
| if mgr.phase < phaseTriagedCorpus { |
| return |
| } |
| for key := range mgr.corpusDB.Records { |
| _, ok1 := mgr.corpus[key] |
| _, ok2 := mgr.disabledHashes[key] |
| if !ok1 && !ok2 { |
| mgr.corpusDB.Delete(key) |
| } |
| } |
| mgr.corpusDB.BumpVersion(currentDBVersion) |
| } |
| |
| func (mgr *Manager) fuzzerConnect() ([]rpctype.RPCInput, [][]byte) { |
| mgr.mu.Lock() |
| defer mgr.mu.Unlock() |
| |
| mgr.minimizeCorpus() |
| corpus := make([]rpctype.RPCInput, 0, len(mgr.corpus)) |
| for _, inp := range mgr.corpus { |
| corpus = append(corpus, inp) |
| } |
| memoryLeakFrames := make([][]byte, 0, len(mgr.memoryLeakFrames)) |
| for frame := range mgr.memoryLeakFrames { |
| memoryLeakFrames = append(memoryLeakFrames, []byte(frame)) |
| } |
| return corpus, memoryLeakFrames |
| } |
| |
| func (mgr *Manager) machineChecked(a *rpctype.CheckArgs) { |
| mgr.mu.Lock() |
| defer mgr.mu.Unlock() |
| if len(mgr.cfg.EnabledSyscalls) != 0 && len(a.DisabledCalls[mgr.cfg.Sandbox]) != 0 { |
| disabled := make(map[string]string) |
| for _, dc := range a.DisabledCalls[mgr.cfg.Sandbox] { |
| disabled[mgr.target.Syscalls[dc.ID].Name] = dc.Reason |
| } |
| for _, id := range mgr.enabledSyscalls { |
| name := mgr.target.Syscalls[id].Name |
| if reason := disabled[name]; reason != "" { |
| log.Logf(0, "disabling %v: %v", name, reason) |
| } |
| } |
| } |
| if a.Error != "" { |
| log.Fatalf("machine check: %v", a.Error) |
| } |
| log.Logf(0, "machine check:") |
| log.Logf(0, "%-24v: %v/%v", "syscalls", |
| len(a.EnabledCalls[mgr.cfg.Sandbox]), len(mgr.target.Syscalls)) |
| for _, feat := range a.Features { |
| log.Logf(0, "%-24v: %v", feat.Name, feat.Reason) |
| } |
| mgr.checkResult = a |
| mgr.loadCorpus() |
| mgr.firstConnect = time.Now() |
| } |
| |
| func (mgr *Manager) newInput(inp rpctype.RPCInput, sign signal.Signal) { |
| mgr.mu.Lock() |
| defer mgr.mu.Unlock() |
| sig := hash.String(inp.Prog) |
| if old, ok := mgr.corpus[sig]; ok { |
| // The input is already present, but possibly with diffent signal/coverage/call. |
| sign.Merge(old.Signal.Deserialize()) |
| old.Signal = sign.Serialize() |
| var cov cover.Cover |
| cov.Merge(old.Cover) |
| cov.Merge(inp.Cover) |
| old.Cover = cov.Serialize() |
| mgr.corpus[sig] = old |
| } else { |
| mgr.corpus[sig] = inp |
| mgr.corpusDB.Save(sig, inp.Prog, 0) |
| if err := mgr.corpusDB.Flush(); err != nil { |
| log.Logf(0, "failed to save corpus database: %v", err) |
| } |
| } |
| } |
| |
| func (mgr *Manager) candidateBatch(size int) []rpctype.RPCCandidate { |
| mgr.mu.Lock() |
| defer mgr.mu.Unlock() |
| var res []rpctype.RPCCandidate |
| for i := 0; i < size && len(mgr.candidates) > 0; i++ { |
| last := len(mgr.candidates) - 1 |
| res = append(res, mgr.candidates[last]) |
| mgr.candidates[last] = rpctype.RPCCandidate{} |
| mgr.candidates = mgr.candidates[:last] |
| } |
| if len(mgr.candidates) == 0 { |
| mgr.candidates = nil |
| if mgr.phase == phaseLoadedCorpus { |
| if mgr.cfg.HubClient != "" { |
| mgr.phase = phaseTriagedCorpus |
| go mgr.hubSyncLoop() |
| } else { |
| mgr.phase = phaseTriagedHub |
| } |
| } else if mgr.phase == phaseQueriedHub { |
| mgr.phase = phaseTriagedHub |
| } |
| } |
| return res |
| } |
| |
| func (mgr *Manager) collectUsedFiles() { |
| if mgr.vmPool == nil { |
| return |
| } |
| addUsedFile := func(f string) { |
| if f == "" { |
| return |
| } |
| stat, err := os.Stat(f) |
| if err != nil { |
| log.Fatalf("failed to stat %v: %v", f, err) |
| } |
| mgr.usedFiles[f] = stat.ModTime() |
| } |
| cfg := mgr.cfg |
| addUsedFile(cfg.SyzFuzzerBin) |
| addUsedFile(cfg.SyzExecprogBin) |
| addUsedFile(cfg.SyzExecutorBin) |
| addUsedFile(cfg.SSHKey) |
| if vmlinux := filepath.Join(cfg.KernelObj, mgr.sysTarget.KernelObject); osutil.IsExist(vmlinux) { |
| addUsedFile(vmlinux) |
| } |
| if cfg.Image != "9p" { |
| addUsedFile(cfg.Image) |
| } |
| } |
| |
| func (mgr *Manager) checkUsedFiles() { |
| for f, mod := range mgr.usedFiles { |
| stat, err := os.Stat(f) |
| if err != nil { |
| log.Fatalf("failed to stat %v: %v", f, err) |
| } |
| if mod != stat.ModTime() { |
| log.Fatalf("file %v that syz-manager uses has been modified by an external program\n"+ |
| "this can lead to arbitrary syz-manager misbehavior\n"+ |
| "modification time has changed: %v -> %v\n"+ |
| "don't modify files that syz-manager uses. exiting to prevent harm", |
| f, mod, stat.ModTime()) |
| } |
| } |
| } |
| |
| func (mgr *Manager) dashboardReporter() { |
| webAddr := publicWebAddr(mgr.cfg.HTTP) |
| var lastFuzzingTime time.Duration |
| var lastCrashes, lastExecs uint64 |
| for { |
| time.Sleep(time.Minute) |
| mgr.mu.Lock() |
| if mgr.firstConnect.IsZero() { |
| mgr.mu.Unlock() |
| continue |
| } |
| crashes := mgr.stats.crashes.get() |
| execs := mgr.stats.execTotal.get() |
| req := &dashapi.ManagerStatsReq{ |
| Name: mgr.cfg.Name, |
| Addr: webAddr, |
| UpTime: time.Since(mgr.firstConnect), |
| Corpus: uint64(len(mgr.corpus)), |
| Cover: mgr.stats.corpusSignal.get(), |
| FuzzingTime: mgr.fuzzingTime - lastFuzzingTime, |
| Crashes: crashes - lastCrashes, |
| Execs: execs - lastExecs, |
| } |
| mgr.mu.Unlock() |
| |
| if err := mgr.dash.UploadManagerStats(req); err != nil { |
| log.Logf(0, "failed to upload dashboard stats: %v", err) |
| continue |
| } |
| mgr.mu.Lock() |
| lastFuzzingTime += req.FuzzingTime |
| lastCrashes += req.Crashes |
| lastExecs += req.Execs |
| mgr.mu.Unlock() |
| } |
| } |
| |
| func publicWebAddr(addr string) string { |
| _, port, err := net.SplitHostPort(addr) |
| if err == nil && port != "" { |
| if host, err := os.Hostname(); err == nil { |
| addr = net.JoinHostPort(host, port) |
| } |
| if GCE, err := gce.NewContext(); err == nil { |
| addr = net.JoinHostPort(GCE.ExternalIP, port) |
| } |
| } |
| return "http://" + addr |
| } |