| // Copyright 2016 syzkaller project authors. All rights reserved. |
| // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. |
| |
| // Package gce allows to use Google Compute Engine (GCE) virtual machines as VMs. |
| // It is assumed that syz-manager also runs on GCE as VMs are created in the current project/zone. |
| // |
| // See https://cloud.google.com/compute/docs for details. |
| // In particular, how to build GCE-compatible images: |
| // https://cloud.google.com/compute/docs/tutorials/building-images |
| // Working with serial console: |
| // https://cloud.google.com/compute/docs/instances/interacting-with-serial-console |
| package gce |
| |
| import ( |
| "archive/tar" |
| "bytes" |
| "compress/gzip" |
| "fmt" |
| "io" |
| "io/ioutil" |
| "os" |
| "path/filepath" |
| "time" |
| |
| "github.com/google/syzkaller/pkg/config" |
| "github.com/google/syzkaller/pkg/gce" |
| "github.com/google/syzkaller/pkg/gcs" |
| "github.com/google/syzkaller/pkg/kd" |
| "github.com/google/syzkaller/pkg/log" |
| "github.com/google/syzkaller/pkg/osutil" |
| "github.com/google/syzkaller/vm/vmimpl" |
| ) |
| |
| func init() { |
| vmimpl.Register("gce", ctor) |
| } |
| |
| type Config struct { |
| Count int `json:"count"` // number of VMs to use |
| MachineType string `json:"machine_type"` // GCE machine type (e.g. "n1-highcpu-2") |
| GCSPath string `json:"gcs_path"` // GCS path to upload image |
| GCEImage string `json:"gce_image"` // Pre-created GCE image to use |
| } |
| |
| type Pool struct { |
| env *vmimpl.Env |
| cfg *Config |
| GCE *gce.Context |
| } |
| |
| type instance struct { |
| env *vmimpl.Env |
| cfg *Config |
| GCE *gce.Context |
| debug bool |
| name string |
| ip string |
| gceKey string // per-instance private ssh key associated with the instance |
| sshKey string // ssh key |
| sshUser string |
| closed chan bool |
| } |
| |
| func ctor(env *vmimpl.Env) (vmimpl.Pool, error) { |
| if env.Name == "" { |
| return nil, fmt.Errorf("config param name is empty (required for GCE)") |
| } |
| cfg := &Config{ |
| Count: 1, |
| } |
| if err := config.LoadData(env.Config, cfg); err != nil { |
| return nil, fmt.Errorf("failed to parse gce vm config: %v", err) |
| } |
| if cfg.Count < 1 || cfg.Count > 1000 { |
| return nil, fmt.Errorf("invalid config param count: %v, want [1, 1000]", cfg.Count) |
| } |
| if env.Debug { |
| cfg.Count = 1 |
| } |
| if cfg.MachineType == "" { |
| return nil, fmt.Errorf("machine_type parameter is empty") |
| } |
| if cfg.GCEImage == "" && cfg.GCSPath == "" { |
| return nil, fmt.Errorf("gcs_path parameter is empty") |
| } |
| if cfg.GCEImage == "" && env.Image == "" { |
| return nil, fmt.Errorf("config param image is empty (required for GCE)") |
| } |
| if cfg.GCEImage != "" && env.Image != "" { |
| return nil, fmt.Errorf("both image and gce_image are specified") |
| } |
| |
| GCE, err := gce.NewContext() |
| if err != nil { |
| return nil, fmt.Errorf("failed to init gce: %v", err) |
| } |
| log.Logf(0, "GCE initialized: running on %v, internal IP %v, project %v, zone %v, net %v/%v", |
| GCE.Instance, GCE.InternalIP, GCE.ProjectID, GCE.ZoneID, GCE.Network, GCE.Subnetwork) |
| |
| if cfg.GCEImage == "" { |
| cfg.GCEImage = env.Name |
| gcsImage := filepath.Join(cfg.GCSPath, env.Name+"-image.tar.gz") |
| log.Logf(0, "uploading image to %v...", gcsImage) |
| if err := uploadImageToGCS(env.Image, gcsImage); err != nil { |
| return nil, err |
| } |
| log.Logf(0, "creating GCE image %v...", cfg.GCEImage) |
| if err := GCE.DeleteImage(cfg.GCEImage); err != nil { |
| return nil, fmt.Errorf("failed to delete GCE image: %v", err) |
| } |
| if err := GCE.CreateImage(cfg.GCEImage, gcsImage); err != nil { |
| return nil, fmt.Errorf("failed to create GCE image: %v", err) |
| } |
| } |
| pool := &Pool{ |
| cfg: cfg, |
| env: env, |
| GCE: GCE, |
| } |
| return pool, nil |
| } |
| |
| func (pool *Pool) Count() int { |
| return pool.cfg.Count |
| } |
| |
| func (pool *Pool) Create(workdir string, index int) (vmimpl.Instance, error) { |
| name := fmt.Sprintf("%v-%v", pool.env.Name, index) |
| // Create SSH key for the instance. |
| gceKey := filepath.Join(workdir, "key") |
| keygen := osutil.Command("ssh-keygen", "-t", "rsa", "-b", "2048", "-N", "", "-C", "syzkaller", "-f", gceKey) |
| if out, err := keygen.CombinedOutput(); err != nil { |
| return nil, fmt.Errorf("failed to execute ssh-keygen: %v\n%s", err, out) |
| } |
| gceKeyPub, err := ioutil.ReadFile(gceKey + ".pub") |
| if err != nil { |
| return nil, fmt.Errorf("failed to read file: %v", err) |
| } |
| |
| log.Logf(0, "deleting instance: %v", name) |
| if err := pool.GCE.DeleteInstance(name, true); err != nil { |
| return nil, err |
| } |
| log.Logf(0, "creating instance: %v", name) |
| ip, err := pool.GCE.CreateInstance(name, pool.cfg.MachineType, pool.cfg.GCEImage, string(gceKeyPub)) |
| if err != nil { |
| return nil, err |
| } |
| |
| ok := false |
| defer func() { |
| if !ok { |
| pool.GCE.DeleteInstance(name, true) |
| } |
| }() |
| sshKey := pool.env.SSHKey |
| sshUser := pool.env.SSHUser |
| if sshKey == "" { |
| // Assuming image supports GCE ssh fanciness. |
| sshKey = gceKey |
| sshUser = "syzkaller" |
| } |
| log.Logf(0, "wait instance to boot: %v (%v)", name, ip) |
| if err := vmimpl.WaitForSSH(pool.env.Debug, 5*time.Minute, ip, |
| sshKey, sshUser, pool.env.OS, 22); err != nil { |
| output, outputErr := pool.getSerialPortOutput(name, gceKey) |
| if outputErr != nil { |
| output = []byte(fmt.Sprintf("failed to get boot output: %v", outputErr)) |
| } |
| return nil, vmimpl.BootError{Title: err.Error(), Output: output} |
| } |
| ok = true |
| inst := &instance{ |
| env: pool.env, |
| cfg: pool.cfg, |
| debug: pool.env.Debug, |
| GCE: pool.GCE, |
| name: name, |
| ip: ip, |
| gceKey: gceKey, |
| sshKey: sshKey, |
| sshUser: sshUser, |
| closed: make(chan bool), |
| } |
| return inst, nil |
| } |
| |
| func (inst *instance) Close() { |
| close(inst.closed) |
| inst.GCE.DeleteInstance(inst.name, false) |
| } |
| |
| func (inst *instance) Forward(port int) (string, error) { |
| return fmt.Sprintf("%v:%v", inst.GCE.InternalIP, port), nil |
| } |
| |
| func (inst *instance) Copy(hostSrc string) (string, error) { |
| vmDst := "./" + filepath.Base(hostSrc) |
| args := append(vmimpl.SCPArgs(inst.debug, inst.sshKey, 22), hostSrc, inst.sshUser+"@"+inst.ip+":"+vmDst) |
| if err := runCmd(inst.debug, "scp", args...); err != nil { |
| return "", err |
| } |
| return vmDst, nil |
| } |
| |
| func (inst *instance) Run(timeout time.Duration, stop <-chan bool, command string) ( |
| <-chan []byte, <-chan error, error) { |
| conRpipe, conWpipe, err := osutil.LongPipe() |
| if err != nil { |
| return nil, nil, err |
| } |
| |
| conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1@ssh-serialport.googleapis.com", |
| inst.GCE.ProjectID, inst.GCE.ZoneID, inst.name) |
| conArgs := append(vmimpl.SSHArgs(inst.debug, inst.gceKey, 9600), conAddr) |
| con := osutil.Command("ssh", conArgs...) |
| con.Env = []string{} |
| con.Stdout = conWpipe |
| con.Stderr = conWpipe |
| if _, err := con.StdinPipe(); err != nil { // SSH would close connection on stdin EOF |
| conRpipe.Close() |
| conWpipe.Close() |
| return nil, nil, err |
| } |
| if err := con.Start(); err != nil { |
| conRpipe.Close() |
| conWpipe.Close() |
| return nil, nil, fmt.Errorf("failed to connect to console server: %v", err) |
| |
| } |
| conWpipe.Close() |
| |
| var tee io.Writer |
| if inst.debug { |
| tee = os.Stdout |
| } |
| merger := vmimpl.NewOutputMerger(tee) |
| var decoder func(data []byte) (int, int, []byte) |
| if inst.env.OS == "windows" { |
| decoder = kd.Decode |
| } |
| merger.AddDecoder("console", conRpipe, decoder) |
| if err := waitForConsoleConnect(merger); err != nil { |
| con.Process.Kill() |
| merger.Wait() |
| return nil, nil, err |
| } |
| sshRpipe, sshWpipe, err := osutil.LongPipe() |
| if err != nil { |
| con.Process.Kill() |
| merger.Wait() |
| sshRpipe.Close() |
| return nil, nil, err |
| } |
| if inst.env.OS == "linux" { |
| if inst.sshUser != "root" { |
| command = fmt.Sprintf("sudo bash -c '%v'", command) |
| } |
| } |
| args := append(vmimpl.SSHArgs(inst.debug, inst.sshKey, 22), inst.sshUser+"@"+inst.ip, command) |
| ssh := osutil.Command("ssh", args...) |
| ssh.Stdout = sshWpipe |
| ssh.Stderr = sshWpipe |
| if err := ssh.Start(); err != nil { |
| con.Process.Kill() |
| merger.Wait() |
| sshRpipe.Close() |
| sshWpipe.Close() |
| return nil, nil, fmt.Errorf("failed to connect to instance: %v", err) |
| } |
| sshWpipe.Close() |
| merger.Add("ssh", sshRpipe) |
| |
| errc := make(chan error, 1) |
| signal := func(err error) { |
| select { |
| case errc <- err: |
| default: |
| } |
| } |
| |
| go func() { |
| select { |
| case <-time.After(timeout): |
| signal(vmimpl.ErrTimeout) |
| case <-stop: |
| signal(vmimpl.ErrTimeout) |
| case <-inst.closed: |
| signal(fmt.Errorf("instance closed")) |
| case err := <-merger.Err: |
| con.Process.Kill() |
| ssh.Process.Kill() |
| merger.Wait() |
| con.Wait() |
| if cmdErr := ssh.Wait(); cmdErr == nil { |
| // If the command exited successfully, we got EOF error from merger. |
| // But in this case no error has happened and the EOF is expected. |
| err = nil |
| } else if merr, ok := err.(vmimpl.MergerError); ok && merr.R == conRpipe { |
| // Console connection must never fail. If it does, it's either |
| // instance preemption or a GCE bug. In either case, not a kernel bug. |
| log.Logf(1, "%v: gce console connection failed with %v", inst.name, merr.Err) |
| err = vmimpl.ErrTimeout |
| } else { |
| // Check if the instance was terminated due to preemption or host maintenance. |
| time.Sleep(5 * time.Second) // just to avoid any GCE races |
| if !inst.GCE.IsInstanceRunning(inst.name) { |
| log.Logf(1, "%v: ssh exited but instance is not running", inst.name) |
| err = vmimpl.ErrTimeout |
| } |
| } |
| signal(err) |
| return |
| } |
| con.Process.Kill() |
| ssh.Process.Kill() |
| merger.Wait() |
| con.Wait() |
| ssh.Wait() |
| }() |
| return merger.Output, errc, nil |
| } |
| |
| func waitForConsoleConnect(merger *vmimpl.OutputMerger) error { |
| // We've started the console reading ssh command, but it has not necessary connected yet. |
| // If we proceed to running the target command right away, we can miss part |
| // of console output. During repro we can crash machines very quickly and |
| // would miss beginning of a crash. Before ssh starts piping console output, |
| // it usually prints: |
| // "serialport: Connected to ... port 1 (session ID: ..., active connections: 1)" |
| // So we wait for this line, or at least a minute and at least some output. |
| timeout := time.NewTimer(time.Minute) |
| defer timeout.Stop() |
| connectedMsg := []byte("serialport: Connected") |
| permissionDeniedMsg := []byte("Permission denied (publickey)") |
| var output []byte |
| for { |
| select { |
| case out := <-merger.Output: |
| output = append(output, out...) |
| if bytes.Contains(output, connectedMsg) { |
| // Just to make sure (otherwise we still see trimmed reports). |
| time.Sleep(5 * time.Second) |
| return nil |
| } |
| if bytes.Contains(output, permissionDeniedMsg) { |
| // This is a GCE bug. |
| return fmt.Errorf("broken console: %s", permissionDeniedMsg) |
| } |
| case <-timeout.C: |
| if len(output) == 0 { |
| return fmt.Errorf("broken console: no output") |
| } |
| return nil |
| } |
| } |
| } |
| |
| func (inst *instance) Diagnose() bool { |
| return false |
| } |
| |
| func (pool *Pool) getSerialPortOutput(name, gceKey string) ([]byte, error) { |
| conRpipe, conWpipe, err := osutil.LongPipe() |
| if err != nil { |
| return nil, err |
| } |
| defer conRpipe.Close() |
| defer conWpipe.Close() |
| conAddr := fmt.Sprintf("%v.%v.%v.syzkaller.port=1.replay-lines=10000@ssh-serialport.googleapis.com", |
| pool.GCE.ProjectID, pool.GCE.ZoneID, name) |
| conArgs := append(vmimpl.SSHArgs(pool.env.Debug, gceKey, 9600), conAddr) |
| con := osutil.Command("ssh", conArgs...) |
| con.Env = []string{} |
| con.Stdout = conWpipe |
| con.Stderr = conWpipe |
| if _, err := con.StdinPipe(); err != nil { // SSH would close connection on stdin EOF |
| return nil, err |
| } |
| if err := con.Start(); err != nil { |
| return nil, fmt.Errorf("failed to connect to console server: %v", err) |
| } |
| conWpipe.Close() |
| done := make(chan bool) |
| go func() { |
| timeout := time.NewTimer(time.Minute) |
| defer timeout.Stop() |
| select { |
| case <-done: |
| case <-timeout.C: |
| } |
| con.Process.Kill() |
| }() |
| var output []byte |
| buf := make([]byte, 64<<10) |
| for { |
| n, err := conRpipe.Read(buf) |
| if err != nil || n == 0 { |
| break |
| } |
| output = append(output, buf[:n]...) |
| } |
| close(done) |
| con.Wait() |
| return output, nil |
| } |
| |
| func uploadImageToGCS(localImage, gcsImage string) error { |
| GCS, err := gcs.NewClient() |
| if err != nil { |
| return fmt.Errorf("failed to create GCS client: %v", err) |
| } |
| defer GCS.Close() |
| |
| localReader, err := os.Open(localImage) |
| if err != nil { |
| return fmt.Errorf("failed to open image file: %v", err) |
| } |
| defer localReader.Close() |
| localStat, err := localReader.Stat() |
| if err != nil { |
| return fmt.Errorf("failed to stat image file: %v", err) |
| } |
| |
| gcsWriter, err := GCS.FileWriter(gcsImage) |
| if err != nil { |
| return fmt.Errorf("failed to upload image: %v", err) |
| } |
| defer gcsWriter.Close() |
| |
| gzipWriter := gzip.NewWriter(gcsWriter) |
| tarWriter := tar.NewWriter(gzipWriter) |
| tarHeader := &tar.Header{ |
| Name: "disk.raw", |
| Typeflag: tar.TypeReg, |
| Mode: 0640, |
| Size: localStat.Size(), |
| ModTime: time.Now(), |
| // This is hacky but we actually need these large uids. |
| // GCE understands only the old GNU tar format and |
| // there is no direct way to force tar package to use GNU format. |
| // But these large numbers force tar to switch to GNU format. |
| Uid: 100000000, |
| Gid: 100000000, |
| Uname: "syzkaller", |
| Gname: "syzkaller", |
| } |
| if err := tarWriter.WriteHeader(tarHeader); err != nil { |
| return fmt.Errorf("failed to write image tar header: %v", err) |
| } |
| if _, err := io.Copy(tarWriter, localReader); err != nil { |
| return fmt.Errorf("failed to write image file: %v", err) |
| } |
| if err := tarWriter.Close(); err != nil { |
| return fmt.Errorf("failed to write image file: %v", err) |
| } |
| if err := gzipWriter.Close(); err != nil { |
| return fmt.Errorf("failed to write image file: %v", err) |
| } |
| if err := gcsWriter.Close(); err != nil { |
| return fmt.Errorf("failed to write image file: %v", err) |
| } |
| return nil |
| } |
| |
| func runCmd(debug bool, bin string, args ...string) error { |
| if debug { |
| log.Logf(0, "running command: %v %#v", bin, args) |
| } |
| output, err := osutil.RunCmd(time.Minute, "", bin, args...) |
| if debug { |
| log.Logf(0, "result: %v\n%s", err, output) |
| } |
| return err |
| } |