src/plugin/mod.rs - platform/external/crosvm - Git at Google

 // Copyright 2018 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 mod process;
 mod vcpu;

 use std::fmt::{self, Display};
 use std::fs::File;
 use std::io;
 use std::os::unix::io::{AsRawFd, FromRawFd};
 use std::os::unix::net::UnixDatagram;
 use std::path::Path;
 use std::result;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::{Arc, Barrier};
 use std::thread;
 use std::time::{Duration, Instant};

 use libc::{
     c_int, c_ulong, fcntl, ioctl, socketpair, AF_UNIX, EAGAIN, EBADF, EDEADLK, EEXIST, EINTR,
     EINVAL, ENOENT, EOVERFLOW, EPERM, FIOCLEX, F_SETPIPE_SZ, MS_NODEV, MS_NOEXEC, MS_NOSUID,
     MS_RDONLY, SIGCHLD, SOCK_SEQPACKET,
 };

 use protobuf::ProtobufError;
 use remain::sorted;

 use base::{
     block_signal, clear_signal, drop_capabilities, error, getegid, geteuid, info, pipe,
     register_rt_signal_handler, validate_raw_fd, warn, Error as SysError, Event, Killable,
     MmapError, PollContext, PollToken, Result as SysResult, SignalFd, SignalFdError, SIGRTMIN,
 };
 use kvm::{Cap, Datamatch, IoeventAddress, Kvm, Vcpu, VcpuExit, Vm};
 use minijail::{self, Minijail};
 use net_util::{Error as TapError, Tap, TapT};
 use vm_memory::GuestMemory;

 use self::process::*;
 use self::vcpu::*;
 use crate::{Config, Executable};

 const MAX_DATAGRAM_SIZE: usize = 4096;
 const MAX_VCPU_DATAGRAM_SIZE: usize = 0x40000;

 /// An error that occurs during the lifetime of a plugin process.
 #[sorted]
 pub enum Error {
     CloneEvent(SysError),
     CloneVcpuPipe(io::Error),
     CreateEvent(SysError),
     CreateIrqChip(SysError),
     CreateJail(minijail::Error),
     CreateKvm(SysError),
     CreateMainSocket(SysError),
     CreatePIT(SysError),
     CreatePollContext(SysError),
     CreateSignalFd(SignalFdError),
     CreateSocketPair(io::Error),
     CreateTapFd(TapError),
     CreateVcpu(SysError),
     CreateVcpuSocket(SysError),
     CreateVm(SysError),
     DecodeRequest(ProtobufError),
     DropCapabilities(SysError),
     EncodeResponse(ProtobufError),
     Mount(minijail::Error),
     MountDev(minijail::Error),
     MountLib(minijail::Error),
     MountLib64(minijail::Error),
     MountPlugin(minijail::Error),
     MountPluginLib(minijail::Error),
     MountProc(minijail::Error),
     MountRoot(minijail::Error),
     NoRootDir,
     ParsePivotRoot(minijail::Error),
     ParseSeccomp(minijail::Error),
     PluginFailed(i32),
     PluginKill(SysError),
     PluginKilled(i32),
     PluginRunJail(minijail::Error),
     PluginSocketHup,
     PluginSocketPoll(SysError),
     PluginSocketRecv(SysError),
     PluginSocketSend(SysError),
     PluginSpawn(io::Error),
     PluginTimeout,
     PluginWait(SysError),
     Poll(SysError),
     PollContextAdd(SysError),
     RootNotAbsolute,
     RootNotDir,
     SetGidMap(minijail::Error),
     SetUidMap(minijail::Error),
     SigChild {
         pid: u32,
         signo: u32,
         status: i32,
         code: i32,
     },
     SignalFd(SignalFdError),
     SpawnVcpu(io::Error),
     TapEnable(TapError),
     TapOpen(TapError),
     TapSetIp(TapError),
     TapSetMacAddress(TapError),
     TapSetNetmask(TapError),
     ValidateTapFd(SysError),
 }

 impl Display for Error {
     #[remain::check]
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         use self::Error::*;

         #[sorted]
         match self {
             CloneEvent(e) => write!(f, "failed to clone event: {}", e),
             CloneVcpuPipe(e) => write!(f, "failed to clone vcpu pipe: {}", e),
             CreateEvent(e) => write!(f, "failed to create event: {}", e),
             CreateIrqChip(e) => write!(f, "failed to create kvm irqchip: {}", e),
             CreateJail(e) => write!(f, "failed to create jail: {}", e),
             CreateKvm(e) => write!(f, "error creating Kvm: {}", e),
             CreateMainSocket(e) => write!(f, "error creating main request socket: {}", e),
             CreatePIT(e) => write!(f, "failed to create kvm PIT: {}", e),
             CreatePollContext(e) => write!(f, "failed to create poll context: {}", e),
             CreateSignalFd(e) => write!(f, "failed to create signalfd: {}", e),
             CreateSocketPair(e) => write!(f, "failed to create socket pair: {}", e),
             CreateTapFd(e) => write!(f, "failed to create tap device from raw fd: {}", e),
             CreateVcpu(e) => write!(f, "error creating vcpu: {}", e),
             CreateVcpuSocket(e) => write!(f, "error creating vcpu request socket: {}", e),
             CreateVm(e) => write!(f, "error creating vm: {}", e),
             DecodeRequest(e) => write!(f, "failed to decode plugin request: {}", e),
             DropCapabilities(e) => write!(f, "failed to drop process capabilities: {}", e),
             EncodeResponse(e) => write!(f, "failed to encode plugin response: {}", e),
             Mount(e) | MountDev(e) | MountLib(e) | MountLib64(e) | MountPlugin(e)
             | MountPluginLib(e) | MountProc(e) | MountRoot(e) => {
                 write!(f, "failed to mount: {}", e)
             }
             NoRootDir => write!(f, "no root directory for jailed process to pivot root into"),
             ParsePivotRoot(e) => write!(f, "failed to set jail pivot root: {}", e),
             ParseSeccomp(e) => write!(f, "failed to parse jail seccomp filter: {}", e),
             PluginFailed(e) => write!(f, "plugin exited with error: {}", e),
             PluginKill(e) => write!(f, "error sending kill signal to plugin: {}", e),
             PluginKilled(e) => write!(f, "plugin exited with signal {}", e),
             PluginRunJail(e) => write!(f, "failed to run jail: {}", e),
             PluginSocketHup => write!(f, "plugin request socket has been hung up"),
             PluginSocketPoll(e) => write!(f, "failed to poll plugin request sockets: {}", e),
             PluginSocketRecv(e) => write!(f, "failed to recv from plugin request socket: {}", e),
             PluginSocketSend(e) => write!(f, "failed to send to plugin request socket: {}", e),
             PluginSpawn(e) => write!(f, "failed to spawn plugin: {}", e),
             PluginTimeout => write!(f, "plugin did not exit within timeout"),
             PluginWait(e) => write!(f, "error waiting for plugin to exit: {}", e),
             Poll(e) => write!(f, "failed to poll all FDs: {}", e),
             PollContextAdd(e) => write!(f, "failed to add fd to poll context: {}", e),
             RootNotAbsolute => write!(f, "path to the root directory must be absolute"),
             RootNotDir => write!(f, "specified root directory is not a directory"),
             SetGidMap(e) => write!(f, "failed to set gidmap for jail: {}", e),
             SetUidMap(e) => write!(f, "failed to set uidmap for jail: {}", e),
             SigChild {
                 pid,
                 signo,
                 status,
                 code,
             } => write!(
                 f,
                 "process {} died with signal {}, status {}, and code {}",
                 pid, signo, status, code
             ),
             SignalFd(e) => write!(f, "failed to read signal fd: {}", e),
             SpawnVcpu(e) => write!(f, "error spawning vcpu thread: {}", e),
             TapEnable(e) => write!(f, "error enabling tap device: {}", e),
             TapOpen(e) => write!(f, "error opening tap device: {}", e),
             TapSetIp(e) => write!(f, "error setting tap ip: {}", e),
             TapSetMacAddress(e) => write!(f, "error setting tap mac address: {}", e),
             TapSetNetmask(e) => write!(f, "error setting tap netmask: {}", e),
             ValidateTapFd(e) => write!(f, "failed to validate raw tap fd: {}", e),
         }
     }
 }

 type Result<T> = result::Result<T, Error>;

 fn new_seqpacket_pair() -> SysResult<(UnixDatagram, UnixDatagram)> {
     let mut fds = [0, 0];
     unsafe {
         let ret = socketpair(AF_UNIX, SOCK_SEQPACKET, 0, fds.as_mut_ptr());
         if ret == 0 {
             ioctl(fds[0], FIOCLEX);
             Ok((
                 UnixDatagram::from_raw_fd(fds[0]),
                 UnixDatagram::from_raw_fd(fds[1]),
             ))
         } else {
             Err(SysError::last())
         }
     }
 }

 struct VcpuPipe {
     crosvm_read: File,
     plugin_write: File,
     plugin_read: File,
     crosvm_write: File,
 }

 fn new_pipe_pair() -> SysResult<VcpuPipe> {
     let to_crosvm = pipe(true)?;
     let to_plugin = pipe(true)?;
     // Increasing the pipe size can be a nice-to-have to make sure that
     // messages get across atomically (and made sure that writes don't block),
     // though it's not necessary a hard requirement for things to work.
     let flags = unsafe {
         fcntl(
             to_crosvm.0.as_raw_fd(),
             F_SETPIPE_SZ,
             MAX_VCPU_DATAGRAM_SIZE as c_int,
         )
     };
     if flags < 0 || flags != MAX_VCPU_DATAGRAM_SIZE as i32 {
         warn!(
             "Failed to adjust size of crosvm pipe (result {}): {}",
             flags,
             SysError::last()
         );
     }
     let flags = unsafe {
         fcntl(
             to_plugin.0.as_raw_fd(),
             F_SETPIPE_SZ,
             MAX_VCPU_DATAGRAM_SIZE as c_int,
         )
     };
     if flags < 0 || flags != MAX_VCPU_DATAGRAM_SIZE as i32 {
         warn!(
             "Failed to adjust size of plugin pipe (result {}): {}",
             flags,
             SysError::last()
         );
     }
     Ok(VcpuPipe {
         crosvm_read: to_crosvm.0,
         plugin_write: to_crosvm.1,
         plugin_read: to_plugin.0,
         crosvm_write: to_plugin.1,
     })
 }

 fn proto_to_sys_err(e: ProtobufError) -> SysError {
     match e {
         ProtobufError::IoError(e) => SysError::new(e.raw_os_error().unwrap_or(EINVAL)),
         _ => SysError::new(EINVAL),
     }
 }

 fn io_to_sys_err(e: io::Error) -> SysError {
     SysError::new(e.raw_os_error().unwrap_or(EINVAL))
 }

 fn mmap_to_sys_err(e: MmapError) -> SysError {
     match e {
         MmapError::SystemCallFailed(e) => e,
         _ => SysError::new(EINVAL),
     }
 }

 fn create_plugin_jail(root: &Path, log_failures: bool, seccomp_policy: &Path) -> Result<Minijail> {
     // All child jails run in a new user namespace without any users mapped,
     // they run as nobody unless otherwise configured.
     let mut j = Minijail::new().map_err(Error::CreateJail)?;
     j.namespace_pids();
     j.namespace_user();
     j.uidmap(&format!("0 {0} 1", geteuid()))
         .map_err(Error::SetUidMap)?;
     j.gidmap(&format!("0 {0} 1", getegid()))
         .map_err(Error::SetGidMap)?;
     j.namespace_user_disable_setgroups();
     // Don't need any capabilities.
     j.use_caps(0);
     // Create a new mount namespace with an empty root FS.
     j.namespace_vfs();
     j.enter_pivot_root(root).map_err(Error::ParsePivotRoot)?;
     // Run in an empty network namespace.
     j.namespace_net();
     j.no_new_privs();
     // By default we'll prioritize using the pre-compiled .bpf over the .policy
     // file (the .bpf is expected to be compiled using "trap" as the failure
     // behavior instead of the default "kill" behavior).
     // Refer to the code comment for the "seccomp-log-failures"
     // command-line parameter for an explanation about why the |log_failures|
     // flag forces the use of .policy files (and the build-time alternative to
     // this run-time flag).
     let bpf_policy_file = seccomp_policy.with_extension("bpf");
     if bpf_policy_file.exists() && !log_failures {
         j.parse_seccomp_program(&bpf_policy_file)
             .map_err(Error::ParseSeccomp)?;
     } else {
         // Use TSYNC only for the side effect of it using SECCOMP_RET_TRAP,
         // which will correctly kill the entire device process if a worker
         // thread commits a seccomp violation.
         j.set_seccomp_filter_tsync();
         if log_failures {
             j.log_seccomp_filter_failures();
         }
         j.parse_seccomp_filters(&seccomp_policy.with_extension("policy"))
             .map_err(Error::ParseSeccomp)?;
     }
     j.use_seccomp_filter();
     // Don't do init setup.
     j.run_as_init();

     // Create a tmpfs in the plugin's root directory so that we can bind mount it's executable
     // file into it.  The size=67108864 is size=64*1024*1024 or size=64MB.
     j.mount_with_data(
         Path::new("none"),
         Path::new("/"),
         "tmpfs",
         (MS_NOSUID | MS_NODEV | MS_NOEXEC) as usize,
         "size=67108864",
     )
     .map_err(Error::MountRoot)?;

     // Because we requested to "run as init", minijail will not mount /proc for us even though
     // plugin will be running in its own PID namespace, so we have to mount it ourselves.
     j.mount(
         Path::new("proc"),
         Path::new("/proc"),
         "proc",
         (MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RDONLY) as usize,
     )
     .map_err(Error::MountProc)?;

     Ok(j)
 }

 /// Each `PluginObject` represents one object that was instantiated by the guest using the `Create`
 /// request.
 ///
 /// Each such object has an ID associated with it that exists in an ID space shared by every variant
 /// of `PluginObject`. This allows all the objects to be indexed in a single map, and allows for a
 /// common destroy method.
 ///

 /// In addition to the destory method, each object may have methods specific to its variant type.
 /// These variant methods must be done by matching the variant to the expected type for that method.
 /// For example, getting the dirty log from a `Memory` object starting with an ID:
 ///
 /// ```ignore
 /// match objects.get(&request_id) {
 ///    Some(&PluginObject::Memory { slot, length }) => vm.get_dirty_log(slot, &mut dirty_log[..]),
 ///    _ => return Err(SysError::new(ENOENT)),
 /// }
 /// ```
 enum PluginObject {
     IoEvent {
         evt: Event,
         addr: IoeventAddress,
         length: u32,
         datamatch: u64,
     },
     Memory {
         slot: u32,
         length: usize,
     },
     IrqEvent {
         irq_id: u32,
         evt: Event,
     },
 }

 impl PluginObject {
     fn destroy(self, vm: &mut Vm) -> SysResult<()> {
         match self {
             PluginObject::IoEvent {
                 evt,
                 addr,
                 length,
                 datamatch,
             } => match length {
                 0 => vm.unregister_ioevent(&evt, addr, Datamatch::AnyLength),
                 1 => vm.unregister_ioevent(&evt, addr, Datamatch::U8(Some(datamatch as u8))),
                 2 => vm.unregister_ioevent(&evt, addr, Datamatch::U16(Some(datamatch as u16))),
                 4 => vm.unregister_ioevent(&evt, addr, Datamatch::U32(Some(datamatch as u32))),
                 8 => vm.unregister_ioevent(&evt, addr, Datamatch::U64(Some(datamatch as u64))),
                 _ => Err(SysError::new(EINVAL)),
             },
             PluginObject::Memory { slot, .. } => vm.remove_memory_region(slot).and(Ok(())),
             PluginObject::IrqEvent { irq_id, evt } => vm.unregister_irqfd(&evt, irq_id),
         }
     }
 }

 pub fn run_vcpus(
     kvm: &Kvm,
     vm: &Vm,
     plugin: &Process,
     vcpu_count: u32,
     kill_signaled: &Arc<AtomicBool>,
     exit_evt: &Event,
     vcpu_handles: &mut Vec<thread::JoinHandle<()>>,
 ) -> Result<()> {
     let vcpu_thread_barrier = Arc::new(Barrier::new((vcpu_count) as usize));
     let use_kvm_signals = !kvm.check_extension(Cap::ImmediateExit);

     // If we need to force a vcpu to exit from a VM then a SIGRTMIN signal is sent
     // to that vcpu's thread.  If KVM is running the VM then it'll return -EINTR.
     // An issue is what to do when KVM isn't running the VM (where we could be
     // in the kernel or in the app).
     //
     // If KVM supports "immediate exit" then we set a signal handler that will
     // set the |immediate_exit| flag that tells KVM to return -EINTR before running
     // the VM.
     //
     // If KVM doesn't support immediate exit then we'll block SIGRTMIN in the app
     // and tell KVM to unblock SIGRTMIN before running the VM (at which point a blocked
     // signal might get asserted).  There's overhead to have KVM unblock and re-block
     // SIGRTMIN each time it runs the VM, so this mode should be avoided.

     if use_kvm_signals {
         unsafe {
             extern "C" fn handle_signal() {}
             // Our signal handler does nothing and is trivially async signal safe.
             // We need to install this signal handler even though we do block
             // the signal below, to ensure that this signal will interrupt
             // execution of KVM_RUN (this is implementation issue).
             register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
                 .expect("failed to register vcpu signal handler");
         }
         // We do not really want the signal handler to run...
         block_signal(SIGRTMIN() + 0).expect("failed to block signal");
     } else {
         unsafe {
             extern "C" fn handle_signal() {
                 Vcpu::set_local_immediate_exit(true);
             }
             register_rt_signal_handler(SIGRTMIN() + 0, handle_signal)
                 .expect("failed to register vcpu signal handler");
         }
     }

     for cpu_id in 0..vcpu_count {
         let kill_signaled = kill_signaled.clone();
         let vcpu_thread_barrier = vcpu_thread_barrier.clone();
         let vcpu_exit_evt = exit_evt.try_clone().map_err(Error::CloneEvent)?;
         let vcpu_plugin = plugin.create_vcpu(cpu_id)?;
         let vcpu = Vcpu::new(cpu_id as c_ulong, kvm, vm).map_err(Error::CreateVcpu)?;

         vcpu_handles.push(
             thread::Builder::new()
                 .name(format!("crosvm_vcpu{}", cpu_id))
                 .spawn(move || {
                     if use_kvm_signals {
                         // Tell KVM to not block anything when entering kvm run
                         // because we will be using first RT signal to kick the VCPU.
                         vcpu.set_signal_mask(&[])
                             .expect("failed to set up KVM VCPU signal mask");
                     }

                     #[cfg(feature = "chromeos")]
                     if let Err(e) = base::sched::enable_core_scheduling() {
                         error!("Failed to enable core scheduling: {}", e);
                     }

                     let vcpu = vcpu
                         .to_runnable(Some(SIGRTMIN() + 0))
                         .expect("Failed to set thread id");

                     let res = vcpu_plugin.init(&vcpu);
                     vcpu_thread_barrier.wait();
                     if let Err(e) = res {
                         error!("failed to initialize vcpu {}: {}", cpu_id, e);
                     } else {
                         loop {
                             let mut interrupted_by_signal = false;
                             let run_res = vcpu.run();
                             match run_res {
                                 Ok(run) => match run {
                                     VcpuExit::IoIn { port, mut size } => {
                                         let mut data = [0; 256];
                                         if size > data.len() {
                                             error!("unsupported IoIn size of {} bytes", size);
                                             size = data.len();
                                         }
                                         vcpu_plugin.io_read(port as u64, &mut data[..size], &vcpu);
                                         if let Err(e) = vcpu.set_data(&data[..size]) {
                                             error!("failed to set return data for IoIn: {}", e);
                                         }
                                     }
                                     VcpuExit::IoOut {
                                         port,
                                         mut size,
                                         data,
                                     } => {
                                         if size > data.len() {
                                             error!("unsupported IoOut size of {} bytes", size);
                                             size = data.len();
                                         }
                                         vcpu_plugin.io_write(port as u64, &data[..size], &vcpu);
                                     }
                                     VcpuExit::MmioRead { address, size } => {
                                         let mut data = [0; 8];
                                         vcpu_plugin.mmio_read(
                                             address as u64,
                                             &mut data[..size],
                                             &vcpu,
                                         );
                                         // Setting data for mmio can not fail.
                                         let _ = vcpu.set_data(&data[..size]);
                                     }
                                     VcpuExit::MmioWrite {
                                         address,
                                         size,
                                         data,
                                     } => {
                                         vcpu_plugin.mmio_write(
                                             address as u64,
                                             &data[..size],
                                             &vcpu,
                                         );
                                     }
                                     VcpuExit::HypervHcall { input, params } => {
                                         let mut data = [0; 8];
                                         vcpu_plugin.hyperv_call(input, params, &mut data, &vcpu);
                                         // Setting data for hyperv call can not fail.
                                         let _ = vcpu.set_data(&data);
                                     }
                                     VcpuExit::HypervSynic {
                                         msr,
                                         control,
                                         evt_page,
                                         msg_page,
                                     } => {
                                         vcpu_plugin
                                             .hyperv_synic(msr, control, evt_page, msg_page, &vcpu);
                                     }
                                     VcpuExit::Hlt => break,
                                     VcpuExit::Shutdown => break,
                                     VcpuExit::InternalError => {
                                         error!("vcpu {} has internal error", cpu_id);
                                         break;
                                     }
                                     r => warn!("unexpected vcpu exit: {:?}", r),
                                 },
                                 Err(e) => match e.errno() {
                                     EINTR => interrupted_by_signal = true,
                                     EAGAIN => {}
                                     _ => {
                                         error!("vcpu hit unknown error: {}", e);
                                         break;
                                     }
                                 },
                             }
                             if kill_signaled.load(Ordering::SeqCst) {
                                 break;
                             }

                             // Only handle the pause request if kvm reported that it was
                             // interrupted by a signal.  This helps to entire that KVM has had a chance
                             // to finish emulating any IO that may have immediately happened.
                             // If we eagerly check pre_run() then any IO that we
                             // just reported to the plugin won't have been processed yet by KVM.
                             // Not eagerly calling pre_run() also helps to reduce
                             // any overhead from checking if a pause request is pending.
                             // The assumption is that pause requests aren't common
                             // or frequent so it's better to optimize for the non-pause execution paths.
                             if interrupted_by_signal {
                                 if use_kvm_signals {
                                     clear_signal(SIGRTMIN() + 0)
                                         .expect("failed to clear pending signal");
                                 } else {
                                     vcpu.set_immediate_exit(false);
                                 }

                                 if let Err(e) = vcpu_plugin.pre_run(&vcpu) {
                                     error!("failed to process pause on vcpu {}: {}", cpu_id, e);
                                     break;
                                 }
                             }
                         }
                     }
                     vcpu_exit_evt
                         .write(1)
                         .expect("failed to signal vcpu exit event");
                 })
                 .map_err(Error::SpawnVcpu)?,
         );
     }
     Ok(())
 }

 #[derive(PollToken)]
 enum Token {
     Exit,
     ChildSignal,
     Plugin { index: usize },
 }

 /// Run a VM with a plugin process specified by `cfg`.
 ///
 /// Not every field of `cfg` will be used. In particular, most field that pertain to a specific
 /// device are ignored because the plugin is responsible for emulating hardware.
 pub fn run_config(cfg: Config) -> Result<()> {
     info!("crosvm starting plugin process");

     // Masking signals is inherently dangerous, since this can persist across clones/execs. Do this
     // before any jailed devices have been spawned, so that we can catch any of them that fail very
     // quickly.
     let sigchld_fd = SignalFd::new(SIGCHLD).map_err(Error::CreateSignalFd)?;

     let jail = if cfg.sandbox {
         // An empty directory for jailed plugin pivot root.
         let root_path = match &cfg.plugin_root {
             Some(dir) => dir,
             None => Path::new(option_env!("DEFAULT_PIVOT_ROOT").unwrap_or("/var/empty")),
         };

         if root_path.is_relative() {
             return Err(Error::RootNotAbsolute);
         }

         if !root_path.exists() {
             return Err(Error::NoRootDir);
         }

         if !root_path.is_dir() {
             return Err(Error::RootNotDir);
         }

         let policy_path = cfg.seccomp_policy_dir.join("plugin");
         let mut jail = create_plugin_jail(root_path, cfg.seccomp_log_failures, &policy_path)?;

         // Update gid map of the jail if caller provided supplemental groups.
         if !cfg.plugin_gid_maps.is_empty() {
             let map = format!("0 {} 1", getegid())
                 + &cfg
                     .plugin_gid_maps
                     .into_iter()
                     .map(|m| format!(",{} {} {}", m.inner, m.outer, m.count))
                     .collect::<String>();
             jail.gidmap(&map).map_err(Error::SetGidMap)?;
         }

         // Mount minimal set of devices (full, zero, urandom, etc). We can not use
         // jail.mount_dev() here because crosvm may not be running with CAP_SYS_ADMIN.
         let device_names = ["full", "null", "urandom", "zero"];
         for name in &device_names {
             let device = Path::new("/dev").join(&name);
             jail.mount_bind(&device, &device, true)
                 .map_err(Error::MountDev)?;
         }

         for bind_mount in &cfg.plugin_mounts {
             jail.mount_bind(&bind_mount.src, &bind_mount.dst, bind_mount.writable)
                 .map_err(Error::Mount)?;
         }

         Some(jail)
     } else {
         None
     };

     let mut tap_interfaces: Vec<Tap> = Vec::new();
     if let Some(host_ip) = cfg.host_ip {
         if let Some(netmask) = cfg.netmask {
             if let Some(mac_address) = cfg.mac_address {
                 let tap = Tap::new(false, false).map_err(Error::TapOpen)?;
                 tap.set_ip_addr(host_ip).map_err(Error::TapSetIp)?;
                 tap.set_netmask(netmask).map_err(Error::TapSetNetmask)?;
                 tap.set_mac_address(mac_address)
                     .map_err(Error::TapSetMacAddress)?;

                 tap.enable().map_err(Error::TapEnable)?;
                 tap_interfaces.push(tap);
             }
         }
     }
     for tap_fd in cfg.tap_fd {
         // Safe because we ensure that we get a unique handle to the fd.
         let tap = unsafe {
             Tap::from_raw_fd(validate_raw_fd(tap_fd).map_err(Error::ValidateTapFd)?)
                 .map_err(Error::CreateTapFd)?
         };
         tap_interfaces.push(tap);
     }

     let plugin_args: Vec<&str> = cfg.params.iter().map(|s| &s[..]).collect();

     let plugin_path = match cfg.executable_path {
         Some(Executable::Plugin(ref plugin_path)) => plugin_path.as_path(),
         _ => panic!("Executable was not a plugin"),
     };
     let vcpu_count = cfg.vcpu_count.unwrap_or(1) as u32;
     let mem = GuestMemory::new(&[]).unwrap();
     let kvm = Kvm::new().map_err(Error::CreateKvm)?;
     let mut vm = Vm::new(&kvm, mem).map_err(Error::CreateVm)?;
     vm.create_irq_chip().map_err(Error::CreateIrqChip)?;
     vm.create_pit().map_err(Error::CreatePIT)?;

     let mut plugin = Process::new(vcpu_count, plugin_path, &plugin_args, jail)?;
     // Now that the jail for the plugin has been created and we had a chance to adjust gids there,
     // we can drop all our capabilities in case we had any.
     drop_capabilities().map_err(Error::DropCapabilities)?;

     let mut res = Ok(());
     // If Some, we will exit after enough time is passed to shutdown cleanly.
     let mut dying_instant: Option<Instant> = None;
     let duration_to_die = Duration::from_millis(1000);

     let exit_evt = Event::new().map_err(Error::CreateEvent)?;
     let kill_signaled = Arc::new(AtomicBool::new(false));
     let mut vcpu_handles = Vec::with_capacity(vcpu_count as usize);

     let poll_ctx =
         PollContext::build_with(&[(&exit_evt, Token::Exit), (&sigchld_fd, Token::ChildSignal)])
             .map_err(Error::PollContextAdd)?;

     let mut sockets_to_drop = Vec::new();
     let mut redo_poll_ctx_sockets = true;
     // In this loop, make every attempt to not return early. If an error is encountered, set `res`
     // to the error, set `dying_instant` to now, and signal the plugin that it will be killed soon.
     // If the plugin cannot be signaled because it is dead of `signal_kill` failed, simply break
     // from the poll loop so that the VCPU threads can be cleaned up.
     'poll: loop {
         // After we have waited long enough, it's time to give up and exit.
         if dying_instant
             .map(|i| i.elapsed() >= duration_to_die)
             .unwrap_or(false)
         {
             break;
         }

         if redo_poll_ctx_sockets {
             for (index, socket) in plugin.sockets().iter().enumerate() {
                 poll_ctx
                     .add(socket, Token::Plugin { index })
                     .map_err(Error::PollContextAdd)?;
             }
         }

         let plugin_socket_count = plugin.sockets().len();
         let events = {
             let poll_res = match dying_instant {
                 Some(inst) => poll_ctx.wait_timeout(duration_to_die - inst.elapsed()),
                 None => poll_ctx.wait(),
             };
             match poll_res {
                 Ok(v) => v,
                 Err(e) => {
                     // Polling no longer works, time to break and cleanup,
                     if res.is_ok() {
                         res = Err(Error::Poll(e));
                     }
                     break;
                 }
             }
         };
         for event in events.iter_readable() {
             match event.token() {
                 Token::Exit => {
                     // No need to check the exit event if we are already doing cleanup.
                     let _ = poll_ctx.delete(&exit_evt);
                     dying_instant.get_or_insert(Instant::now());
                     let sig_res = plugin.signal_kill();
                     if res.is_ok() && sig_res.is_err() {
                         res = sig_res.map_err(Error::PluginKill);
                     }
                 }
                 Token::ChildSignal => {
                     // Print all available siginfo structs, then exit the loop.
                     loop {
                         match sigchld_fd.read() {
                             Ok(Some(siginfo)) => {
                                 // If the plugin process has ended, there is no need to continue
                                 // processing plugin connections, so we break early.
                                 if siginfo.ssi_pid == plugin.pid() as u32 {
                                     break 'poll;
                                 }
                                 // Because SIGCHLD is not expected from anything other than the
                                 // plugin process, report it as an error.
                                 if res.is_ok() {
                                     res = Err(Error::SigChild {
                                         pid: siginfo.ssi_pid,
                                         signo: siginfo.ssi_signo,
                                         status: siginfo.ssi_status,
                                         code: siginfo.ssi_code,
                                     })
                                 }
                             }
                             Ok(None) => break, // No more signals to read.
                             Err(e) => {
                                 // Something really must be messed up for this to happen, continue
                                 // processing connections for a limited time.
                                 if res.is_ok() {
                                     res = Err(Error::SignalFd(e));
                                 }
                                 break;
                             }
                         }
                     }
                     // As we only spawn the plugin process, getting a SIGCHLD can only mean
                     // something went wrong.
                     dying_instant.get_or_insert(Instant::now());
                     let sig_res = plugin.signal_kill();
                     if res.is_ok() && sig_res.is_err() {
                         res = sig_res.map_err(Error::PluginKill);
                     }
                 }
                 Token::Plugin { index } => {
                     match plugin.handle_socket(index, &kvm, &mut vm, &vcpu_handles, &tap_interfaces)
                     {
                         Ok(_) => {}
                         // A HUP is an expected event for a socket, so don't bother warning about
                         // it.
                         Err(Error::PluginSocketHup) => sockets_to_drop.push(index),
                         // Only one connection out of potentially many is broken. Drop it, but don't
                         // start cleaning up. Because the error isn't returned, we will warn about
                         // it here.
                         Err(e) => {
                             warn!("error handling plugin socket: {}", e);
                             sockets_to_drop.push(index);
                         }
                     }
                 }
             }
         }

         if vcpu_handles.is_empty() && dying_instant.is_none() && plugin.is_started() {
             let res = run_vcpus(
                 &kvm,
                 &vm,
                 &plugin,
                 vcpu_count,
                 &kill_signaled,
                 &exit_evt,
                 &mut vcpu_handles,
             );
             if let Err(e) = res {
                 dying_instant.get_or_insert(Instant::now());
                 error!("failed to start vcpus: {}", e);
             }
         }

         redo_poll_ctx_sockets =
             !sockets_to_drop.is_empty() || plugin.sockets().len() != plugin_socket_count;

         // Cleanup all of the sockets that we have determined were disconnected or suffered some
         // other error.
         plugin.drop_sockets(&mut sockets_to_drop);
         sockets_to_drop.clear();

         if redo_poll_ctx_sockets {
             for socket in plugin.sockets() {
                 let _ = poll_ctx.delete(socket);
             }
         }
     }

     // vcpu threads MUST see the kill signaled flag, otherwise they may re-enter the VM.
     kill_signaled.store(true, Ordering::SeqCst);
     // Depending on how we ended up here, the plugin process, or a VCPU thread waiting for requests
     // might be stuck. The `signal_kill` call will unstick all the VCPU threads by closing their
     // blocked connections.
     plugin.signal_kill().map_err(Error::PluginKill)?;
     for handle in vcpu_handles {
         match handle.kill(SIGRTMIN() + 0) {
             Ok(_) => {
                 if let Err(e) = handle.join() {
                     error!("failed to join vcpu thread: {:?}", e);
                 }
             }
             Err(e) => error!("failed to kill vcpu thread: {}", e),
         }
     }

     match plugin.try_wait() {
         // The plugin has run out of time by now
         Ok(ProcessStatus::Running) => Err(Error::PluginTimeout),
         // Return an error discovered earlier in this function.
         Ok(ProcessStatus::Success) => res,
         Ok(ProcessStatus::Fail(code)) => Err(Error::PluginFailed(code)),
         Ok(ProcessStatus::Signal(code)) => Err(Error::PluginKilled(code)),
         Err(e) => Err(Error::PluginWait(e)),
     }
 }