blob: a4a533470cc6007903cd022b88ce62c975276ef9 [file] [log] [blame]
// Copyright 2020 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! A crate for abstracting the underlying kernel hypervisor used in crosvm.
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
pub mod aarch64;
pub mod caps;
#[cfg(all(windows, feature = "haxm"))]
pub mod haxm;
#[cfg(unix)]
pub mod kvm;
#[cfg(all(windows, feature = "whpx"))]
pub mod whpx;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod x86_64;
use std::os::raw::c_int;
use base::AsRawDescriptor;
use base::Event;
use base::MappedRegion;
use base::Protection;
use base::Result;
use base::SafeDescriptor;
use serde::Deserialize;
use serde::Serialize;
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
pub use crate::aarch64::*;
pub use crate::caps::*;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub use crate::x86_64::*;
/// An index in the list of guest-mapped memory regions.
pub type MemSlot = u32;
/// A trait for checking hypervisor capabilities.
pub trait Hypervisor: Send {
/// Makes a shallow clone of this `Hypervisor`.
fn try_clone(&self) -> Result<Self>
where
Self: Sized;
/// Checks if a particular `HypervisorCap` is available.
fn check_capability(&self, cap: HypervisorCap) -> bool;
}
/// A wrapper for using a VM and getting/setting its state.
pub trait Vm: Send {
/// Makes a shallow clone of this `Vm`.
fn try_clone(&self) -> Result<Self>
where
Self: Sized;
/// Checks if a particular `VmCap` is available.
///
/// This is distinct from the `Hypervisor` version of this method because some extensions depend
/// on the particular `Vm` instance. This method is encouraged because it more accurately
/// reflects the usable capabilities.
fn check_capability(&self, c: VmCap) -> bool;
/// Get the guest physical address size in bits.
fn get_guest_phys_addr_bits(&self) -> u8;
/// Gets the guest-mapped memory for the Vm.
fn get_memory(&self) -> &GuestMemory;
/// Inserts the given `MappedRegion` into the VM's address space at `guest_addr`.
///
/// The slot that was assigned the memory mapping is returned on success. The slot can be given
/// to `Vm::remove_memory_region` to remove the memory from the VM's address space and take back
/// ownership of `mem_region`.
///
/// Note that memory inserted into the VM's address space must not overlap with any other memory
/// slot's region.
///
/// If `read_only` is true, the guest will be able to read the memory as normal, but attempts to
/// write will trigger a mmio VM exit, leaving the memory untouched.
///
/// If `log_dirty_pages` is true, the slot number can be used to retrieve the pages written to
/// by the guest with `get_dirty_log`.
fn add_memory_region(
&mut self,
guest_addr: GuestAddress,
mem_region: Box<dyn MappedRegion>,
read_only: bool,
log_dirty_pages: bool,
) -> Result<MemSlot>;
/// Does a synchronous msync of the memory mapped at `slot`, syncing `size` bytes starting at
/// `offset` from the start of the region. `offset` must be page aligned.
fn msync_memory_region(&mut self, slot: MemSlot, offset: usize, size: usize) -> Result<()>;
/// Removes and drops the `UserMemoryRegion` that was previously added at the given slot.
fn remove_memory_region(&mut self, slot: MemSlot) -> Result<Box<dyn MappedRegion>>;
/// Creates an emulated device.
fn create_device(&self, kind: DeviceKind) -> Result<SafeDescriptor>;
/// Gets the bitmap of dirty pages since the last call to `get_dirty_log` for the memory at
/// `slot`. Only works on VMs that support `VmCap::DirtyLog`.
///
/// The size of `dirty_log` must be at least as many bits as there are pages in the memory
/// region `slot` represents. For example, if the size of `slot` is 16 pages, `dirty_log` must
/// be 2 bytes or greater.
fn get_dirty_log(&self, slot: MemSlot, dirty_log: &mut [u8]) -> Result<()>;
/// Registers an event to be signaled whenever a certain address is written to.
///
/// The `datamatch` parameter can be used to limit signaling `evt` to only the cases where the
/// value being written is equal to `datamatch`. Note that the size of `datamatch` is important
/// and must match the expected size of the guest's write.
///
/// In all cases where `evt` is signaled, the ordinary vmexit to userspace that would be
/// triggered is prevented.
fn register_ioevent(
&mut self,
evt: &Event,
addr: IoEventAddress,
datamatch: Datamatch,
) -> Result<()>;
/// Unregisters an event previously registered with `register_ioevent`.
///
/// The `evt`, `addr`, and `datamatch` set must be the same as the ones passed into
/// `register_ioevent`.
fn unregister_ioevent(
&mut self,
evt: &Event,
addr: IoEventAddress,
datamatch: Datamatch,
) -> Result<()>;
/// Trigger any matching registered io events based on an MMIO or PIO write at `addr`. The
/// `data` slice represents the contents and length of the write, which is used to compare with
/// the registered io events' Datamatch values. If the hypervisor does in-kernel IO event
/// delivery, this is a no-op.
fn handle_io_events(&self, addr: IoEventAddress, data: &[u8]) -> Result<()>;
/// Retrieves the current timestamp of the paravirtual clock as seen by the current guest.
/// Only works on VMs that support `VmCap::PvClock`.
fn get_pvclock(&self) -> Result<ClockState>;
/// Sets the current timestamp of the paravirtual clock as seen by the current guest.
/// Only works on VMs that support `VmCap::PvClock`.
fn set_pvclock(&self, state: &ClockState) -> Result<()>;
/// Maps `size` bytes starting at `fs_offset` bytes from within the given `fd`
/// at `offset` bytes from the start of the arena with `prot` protections.
/// `offset` must be page aligned.
///
/// # Arguments
/// * `offset` - Page aligned offset into the arena in bytes.
/// * `size` - Size of memory region in bytes.
/// * `fd` - File descriptor to mmap from.
/// * `fd_offset` - Offset in bytes from the beginning of `fd` to start the mmap.
/// * `prot` - Protection (e.g. readable/writable) of the memory region.
fn add_fd_mapping(
&mut self,
slot: u32,
offset: usize,
size: usize,
fd: &dyn AsRawDescriptor,
fd_offset: u64,
prot: Protection,
) -> Result<()>;
/// Remove `size`-byte mapping starting at `offset`.
fn remove_mapping(&mut self, slot: u32, offset: usize, size: usize) -> Result<()>;
/// Frees the given segment of guest memory to be reclaimed by the host OS.
/// This is intended for use with virtio-balloon, where a guest driver determines
/// unused ranges and requests they be freed. Use without the guest's knowledge is sure
/// to break something. As per virtio-balloon spec, the given address and size
/// are intended to be page-aligned.
///
/// # Arguments
/// * `guest_address` - Address in the guest's "physical" memory to begin the unmapping
/// * `size` - The size of the region to unmap, in bytes
fn handle_inflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>;
/// Reallocates memory and maps it to provide to the guest. This is intended to be used
/// exclusively in tandem with `handle_inflate`, and will return an `Err` Result otherwise.
///
/// # Arguments
/// * `guest_address` - Address in the guest's "physical" memory to begin the mapping
/// * `size` - The size of the region to map, in bytes
fn handle_deflate(&mut self, guest_address: GuestAddress, size: u64) -> Result<()>;
}
/// A unique fingerprint for a particular `VcpuRunHandle`, used in `Vcpu` impls to ensure the
/// `VcpuRunHandle ` they receive is the same one that was returned from `take_run_handle`.
#[derive(Clone, PartialEq, Eq)]
pub struct VcpuRunHandleFingerprint(u64);
impl VcpuRunHandleFingerprint {
pub fn as_u64(&self) -> u64 {
self.0
}
}
/// A handle returned by a `Vcpu` to be used with `Vcpu::run` to execute a virtual machine's VCPU.
///
/// This is used to ensure that the caller has bound the `Vcpu` to a thread with
/// `Vcpu::take_run_handle` and to execute hypervisor specific cleanup routines when dropped.
pub struct VcpuRunHandle {
drop_fn: fn(),
fingerprint: VcpuRunHandleFingerprint,
// Prevents Send+Sync for this type.
phantom: std::marker::PhantomData<*mut ()>,
}
impl VcpuRunHandle {
/// Used by `Vcpu` impls to create a unique run handle, that when dropped, will call the given
/// `drop_fn`.
pub fn new(drop_fn: fn()) -> Self {
// Creates a probably unique number with a hash of the current thread id and epoch time.
use std::hash::Hash;
use std::hash::Hasher;
let mut hasher = std::collections::hash_map::DefaultHasher::new();
std::time::Instant::now().hash(&mut hasher);
std::thread::current().id().hash(&mut hasher);
Self {
drop_fn,
fingerprint: VcpuRunHandleFingerprint(hasher.finish()),
phantom: std::marker::PhantomData,
}
}
/// Gets the unique fingerprint which may be copied and compared freely.
pub fn fingerprint(&self) -> &VcpuRunHandleFingerprint {
&self.fingerprint
}
}
impl Drop for VcpuRunHandle {
fn drop(&mut self) {
(self.drop_fn)();
}
}
/// Operation for Io and Mmio
#[derive(Copy, Clone, Debug)]
pub enum IoOperation {
Read,
Write {
/// Data to be written.
///
/// For 64 bit architecture, Mmio and Io only work with at most 8 bytes of data.
data: [u8; 8],
},
}
/// Parameters describing an MMIO or PIO from the guest.
#[derive(Copy, Clone, Debug)]
pub struct IoParams {
pub address: u64,
pub size: usize,
pub operation: IoOperation,
}
/// A virtual CPU holding a virtualized hardware thread's state, such as registers and interrupt
/// state, which may be used to execute virtual machines.
///
/// To run, `take_run_handle` must be called to lock the vcpu to a thread. Then the returned
/// `VcpuRunHandle` can be used for running.
pub trait Vcpu: downcast_rs::DowncastSync {
/// Makes a shallow clone of this `Vcpu`.
fn try_clone(&self) -> Result<Self>
where
Self: Sized;
/// Casts this architecture specific trait object to the base trait object `Vcpu`.
fn as_vcpu(&self) -> &dyn Vcpu;
/// Returns a unique `VcpuRunHandle`. A `VcpuRunHandle` is required to run the guest.
///
/// Assigns a vcpu to the current thread so that signal handlers can call
/// set_local_immediate_exit(). An optional signal number will be temporarily blocked while
/// assigning the vcpu to the thread and later blocked when `VcpuRunHandle` is destroyed.
///
/// Returns an error, `EBUSY`, if the current thread already contains a Vcpu.
fn take_run_handle(&self, signal_num: Option<c_int>) -> Result<VcpuRunHandle>;
/// Runs the VCPU until it exits, returning the reason for the exit.
///
/// Note that the state of the VCPU and associated VM must be setup first for this to do
/// anything useful. The given `run_handle` must be the same as the one returned by
/// `take_run_handle` for this `Vcpu`.
fn run(&mut self, run_handle: &VcpuRunHandle) -> Result<VcpuExit>;
/// Returns the vcpu id.
fn id(&self) -> usize;
/// Sets the bit that requests an immediate exit.
fn set_immediate_exit(&self, exit: bool);
/// Sets/clears the bit for immediate exit for the vcpu on the current thread.
fn set_local_immediate_exit(exit: bool)
where
Self: Sized;
/// Returns a function pointer that invokes `set_local_immediate_exit` in a
/// signal-safe way when called.
fn set_local_immediate_exit_fn(&self) -> extern "C" fn();
/// Handles an incoming MMIO request from the guest.
///
/// This function should be called after `Vcpu::run` returns `VcpuExit::Mmio`, and in the same
/// thread as run().
///
/// Once called, it will determine whether a MMIO read or MMIO write was the reason for the MMIO
/// exit, call `handle_fn` with the respective IoParams to perform the MMIO read or write, and
/// set the return data in the vcpu so that the vcpu can resume running.
fn handle_mmio(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>;
/// Handles an incoming PIO from the guest.
///
/// This function should be called after `Vcpu::run` returns `VcpuExit::Io`, and in the same
/// thread as run().
///
/// Once called, it will determine whether an input or output was the reason for the Io exit,
/// call `handle_fn` with the respective IoParams to perform the input/output operation, and set
/// the return data in the vcpu so that the vcpu can resume running.
fn handle_io(&self, handle_fn: &mut dyn FnMut(IoParams) -> Option<[u8; 8]>) -> Result<()>;
/// Handles the HYPERV_HYPERCALL exit from a vcpu.
///
/// This function should be called after `Vcpu::run` returns `VcpuExit::HypervHcall`, and in the
/// same thread as run.
///
/// Once called, it will parse the appropriate input parameters to the provided function to
/// handle the hyperv call, and then set the return data into the vcpu so it can resume running.
fn handle_hyperv_hypercall(&self, func: &mut dyn FnMut(HypervHypercall) -> u64) -> Result<()>;
/// Handles a RDMSR exit from the guest.
///
/// This function should be called after `Vcpu::run` returns `VcpuExit::RdMsr`,
/// and in the same thread as run.
///
/// It will put `data` into the guest buffer and return.
fn handle_rdmsr(&self, data: u64) -> Result<()>;
/// Handles a WRMSR exit from the guest by removing any error indication for the operation.
///
/// This function should be called after `Vcpu::run` returns `VcpuExit::WrMsr`,
/// and in the same thread as run.
fn handle_wrmsr(&self);
/// Signals to the hypervisor that this guest is being paused by userspace. Only works on Vms
/// that support `VmCap::PvClockSuspend`.
fn pvclock_ctrl(&self) -> Result<()>;
/// Specifies set of signals that are blocked during execution of `RunnableVcpu::run`. Signals
/// that are not blocked will cause run to return with `VcpuExit::Intr`. Only works on Vms that
/// support `VmCap::SignalMask`.
fn set_signal_mask(&self, signals: &[c_int]) -> Result<()>;
/// Enables a hypervisor-specific extension on this Vcpu. `cap` is a constant defined by the
/// hypervisor API (e.g., kvm.h). `args` are the arguments for enabling the feature, if any.
///
/// # Safety
/// This function is marked as unsafe because `args` may be interpreted as pointers for some
/// capabilities. The caller must ensure that any pointers passed in the `args` array are
/// allocated as the kernel expects, and that mutable pointers are owned.
unsafe fn enable_raw_capability(&self, cap: u32, args: &[u64; 4]) -> Result<()>;
}
downcast_rs::impl_downcast!(sync Vcpu);
/// An address either in programmable I/O space or in memory mapped I/O space.
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, std::hash::Hash)]
pub enum IoEventAddress {
Pio(u64),
Mmio(u64),
}
/// Used in `Vm::register_ioevent` to indicate a size and optionally value to match.
#[derive(PartialEq, Eq, Serialize, Deserialize)]
pub enum Datamatch {
AnyLength,
U8(Option<u8>),
U16(Option<u16>),
U32(Option<u32>),
U64(Option<u64>),
}
/// A reason why a VCPU exited. One of these returns every time `Vcpu::run` is called.
#[derive(Debug, Clone, Copy)]
pub enum VcpuExit {
/// An io instruction needs to be emulated.
/// vcpu handle_io should be called to handle the io operation
Io,
/// A mmio instruction needs to be emulated.
/// vcpu handle_mmio should be called to handle the mmio operation
Mmio,
IoapicEoi {
vector: u8,
},
HypervHypercall,
Unknown,
Exception,
Hypercall,
Debug,
Hlt,
IrqWindowOpen,
Shutdown,
FailEntry {
hardware_entry_failure_reason: u64,
},
Intr,
SetTpr,
TprAccess,
S390Sieic,
S390Reset,
Dcr,
Nmi,
InternalError,
Osi,
PaprHcall,
S390Ucontrol,
Watchdog,
S390Tsch,
Epr,
SystemEventShutdown,
SystemEventReset,
SystemEventCrash,
SystemEventS2Idle,
RdMsr {
index: u32,
},
WrMsr {
index: u32,
data: u64,
},
/// An invalid vcpu register was set while running.
InvalidVpRegister,
/// incorrect setup for vcpu requiring an unsupported feature
UnsupportedFeature,
/// vcpu run was user cancelled
Canceled,
/// an unrecoverable exception was encountered (different from Exception)
UnrecoverableException,
/// vcpu stopped due to an msr access.
MsrAccess,
/// vcpu stopped due to a cpuid request.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Cpuid {
entry: CpuIdEntry,
},
/// vcpu stopped due to calling rdtsc
RdTsc,
/// vcpu stopped for an apic smi trap
ApicSmiTrap,
/// vcpu stopped due to an apic trap
ApicInitSipiTrap,
}
/// A hypercall with parameters being made from the guest.
#[derive(Debug)]
pub enum HypervHypercall {
HypervSynic {
msr: u32,
control: u64,
evt_page: u64,
msg_page: u64,
},
HypervHcall {
input: u64,
params: [u64; 2],
},
}
/// A device type to create with `Vm.create_device`.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum DeviceKind {
/// VFIO device for direct access to devices from userspace
Vfio,
/// ARM virtual general interrupt controller v2
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
ArmVgicV2,
/// ARM virtual general interrupt controller v3
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
ArmVgicV3,
}
/// The source chip of an `IrqSource`
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum IrqSourceChip {
PicPrimary,
PicSecondary,
Ioapic,
Gic,
}
/// A source of IRQs in an `IrqRoute`.
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum IrqSource {
Irqchip { chip: IrqSourceChip, pin: u32 },
Msi { address: u64, data: u32 },
}
/// A single route for an IRQ.
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct IrqRoute {
pub gsi: u32,
pub source: IrqSource,
}
/// The state of the paravirtual clock.
#[derive(Debug, Default, Copy, Clone)]
pub struct ClockState {
/// Current pv clock timestamp, as seen by the guest
pub clock: u64,
/// Hypervisor-specific feature flags for the pv clock
pub flags: u32,
}
/// The MPState represents the state of a processor.
#[repr(C)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum MPState {
/// the vcpu is currently running (x86/x86_64,arm/arm64)
Runnable,
/// the vcpu is an application processor (AP) which has not yet received an INIT signal
/// (x86/x86_64)
Uninitialized,
/// the vcpu has received an INIT signal, and is now ready for a SIPI (x86/x86_64)
InitReceived,
/// the vcpu has executed a HLT instruction and is waiting for an interrupt (x86/x86_64)
Halted,
/// the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) (x86/x86_64)
SipiReceived,
/// the vcpu is stopped (arm/arm64)
Stopped,
}
/// Whether the VM should be run in protected mode or not.
#[derive(Copy, Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub enum ProtectionType {
/// The VM should be run in the unprotected mode, where the host has access to its memory.
Unprotected,
/// The VM should be run in protected mode, so the host cannot access its memory directly. It
/// should be booted via the protected VM firmware, so that it can access its secrets.
Protected,
/// The VM should be run in protected mode, but booted directly without pVM firmware. The host
/// will still be unable to access the VM memory, but it won't be given any secrets.
ProtectedWithoutFirmware,
/// The VM should be run in unprotected mode, but with the same memory layout as protected mode,
/// protected VM firmware loaded, and simulating protected mode as much as possible. This is
/// useful for debugging the protected VM firmware and other protected mode issues.
UnprotectedWithFirmware,
}
#[derive(Clone, Copy)]
pub struct Config {
pub protection_type: ProtectionType,
}
impl Default for Config {
fn default() -> Config {
Config {
protection_type: ProtectionType::Unprotected,
}
}
}