blob: 9650c87e8ab739d44e916aadd4230b365544c6bd [file] [log] [blame]
// Copyright 2022 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::arch::x86_64::__cpuid;
use std::arch::x86_64::__cpuid_count;
use std::convert::TryInto;
use std::fmt;
use std::fmt::Display;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::sync::Barrier;
use std::thread;
use std::thread::JoinHandle;
use std::time::Duration;
use std::time::Instant;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
use aarch64::AArch64 as Arch;
use anyhow::anyhow;
use anyhow::Result;
use arch::CpuConfigArch;
use arch::CpuSet;
use arch::IrqChipArch;
use arch::LinuxArch;
use arch::RunnableLinuxVm;
use arch::VcpuAffinity;
use arch::VcpuArch;
use arch::VmArch;
use base::error;
use base::info;
use base::set_audio_thread_priority;
use base::set_cpu_affinity;
use base::warn;
use base::Event;
use base::Result as BaseResult;
use base::SafeMultimediaHandle;
use base::SendTube;
use base::Timer;
use base::Tube;
use base::VmEventType;
use cros_async::select2;
use cros_async::EventAsync;
use cros_async::Executor;
use cros_async::SelectResult;
use cros_async::TimerAsync;
use cros_tracing::trace_event;
use crosvm_cli::bail_exit_code;
use crosvm_cli::sys::windows::exit::Exit;
use crosvm_cli::sys::windows::exit::ExitContext;
use crosvm_cli::sys::windows::exit::ExitContextAnyhow;
use devices::tsc::TscSyncMitigations;
use devices::Bus;
use devices::VcpuRunState;
use futures::pin_mut;
#[cfg(feature = "whpx")]
use hypervisor::whpx::WhpxVcpu;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use hypervisor::CpuConfigX86_64;
use hypervisor::HypervisorCap;
use hypervisor::IoEventAddress;
use hypervisor::IoOperation;
use hypervisor::IoParams;
use hypervisor::VcpuExit;
use hypervisor::VcpuInitX86_64;
use hypervisor::VcpuRunHandle;
use sync::Condvar;
use sync::Mutex;
use vm_control::VmRunMode;
use winapi::shared::winerror::ERROR_RETRY;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use x86_64::cpuid::adjust_cpuid;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use x86_64::cpuid::CpuIdContext;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use x86_64::X8664arch as Arch;
#[cfg(feature = "stats")]
use crate::crosvm::sys::windows::stats::StatisticsCollector;
#[cfg(feature = "stats")]
use crate::crosvm::sys::windows::stats::VmExitStatistics;
use crate::sys::windows::save_vcpu_tsc_offset;
use crate::sys::windows::ExitState;
const ERROR_RETRY_I32: i32 = ERROR_RETRY as i32;
#[derive(Default)]
pub struct VcpuRunMode {
mtx: Mutex<VmRunMode>,
cvar: Condvar,
}
impl VcpuRunMode {
pub fn set_and_notify(&self, new_mode: VmRunMode) {
*self.mtx.lock() = new_mode;
self.cvar.notify_all();
}
}
struct RunnableVcpuInfo<V> {
vcpu: V,
thread_priority_handle: Option<SafeMultimediaHandle>,
vcpu_run_handle: VcpuRunHandle,
}
#[derive(Clone, Debug)]
struct VcpuMonitoringMetadata {
pub start_instant: Instant,
// Milliseconds since the baseline start_instant
pub last_run_time: Arc<AtomicU64>,
pub last_exit_snapshot: Arc<Mutex<Option<VcpuExitData>>>,
}
#[derive(Clone, Debug)]
struct VcpuRunThread {
pub cpu_id: usize,
pub monitoring_metadata: Option<VcpuMonitoringMetadata>,
}
impl VcpuRunThread {
pub fn new(cpu_id: usize, enable_vcpu_monitoring: bool) -> VcpuRunThread {
VcpuRunThread {
cpu_id,
monitoring_metadata: enable_vcpu_monitoring.then(|| VcpuMonitoringMetadata {
start_instant: Instant::now(),
last_run_time: Arc::new(AtomicU64::new(0)),
last_exit_snapshot: Arc::new(Mutex::new(Option::None)),
}),
}
}
/// Perform WHPX-specific vcpu configurations
#[cfg(feature = "whpx")]
fn whpx_configure_vcpu(vcpu: &mut dyn VcpuArch, irq_chip: &mut dyn IrqChipArch) {
// only apply to actual WhpxVcpu instances
if let Some(whpx_vcpu) = vcpu.downcast_mut::<WhpxVcpu>() {
// WhpxVcpu instances need to know the TSC and Lapic frequencies to handle Hyper-V MSR reads
// and writes.
let tsc_freq = devices::tsc::tsc_frequency()
.map_err(|e| {
error!(
"Could not determine TSC frequency, WHPX vcpu will not be configured with \
a TSC Frequency: {e}"
);
e
})
.ok();
whpx_vcpu.set_frequencies(tsc_freq, irq_chip.lapic_frequency());
}
}
// Sets up a vcpu and converts it into a runnable vcpu.
fn runnable_vcpu<V>(
cpu_id: usize,
vcpu: Option<V>,
vcpu_init: VcpuInitX86_64,
vm: &impl VmArch,
irq_chip: &mut dyn IrqChipArch,
vcpu_count: usize,
run_rt: bool,
vcpu_affinity: Option<CpuSet>,
no_smt: bool,
has_bios: bool,
host_cpu_topology: bool,
force_calibrated_tsc_leaf: bool,
) -> Result<RunnableVcpuInfo<V>>
where
V: VcpuArch,
{
let mut vcpu = match vcpu {
Some(v) => v,
None => {
// If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from
// the vcpu thread.
match vm
.create_vcpu(cpu_id)
.exit_context(Exit::CreateVcpu, "failed to create vcpu")?
.downcast::<V>()
{
Ok(v) => *v,
Err(_) => panic!("VM created wrong type of VCPU"),
}
}
};
irq_chip
.add_vcpu(cpu_id, &vcpu)
.exit_context(Exit::AddIrqChipVcpu, "failed to add vcpu to irq chip")?;
if let Some(affinity) = vcpu_affinity {
if let Err(e) = set_cpu_affinity(affinity) {
error!("Failed to set CPU affinity: {}", e);
}
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let cpu_config = Some(CpuConfigX86_64::new(
force_calibrated_tsc_leaf,
host_cpu_topology,
false, /* enable_hwp */
false, /* enable_pnp_data */
no_smt,
false, /* itmt */
None, /* hybrid_type */
));
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
let cpu_config = None;
Arch::configure_vcpu(
vm,
vm.get_hypervisor(),
irq_chip,
&mut vcpu,
vcpu_init,
cpu_id,
vcpu_count,
has_bios,
cpu_config,
)
.exit_context(Exit::ConfigureVcpu, "failed to configure vcpu")?;
#[cfg(feature = "whpx")]
Self::whpx_configure_vcpu(&mut vcpu, irq_chip);
let mut thread_priority_handle = None;
if run_rt {
// Until we are multi process on Windows, we can't use the normal thread priority APIs;
// instead, we use a trick from the audio device which is able to set a thread RT even
// though the process itself is not RT.
thread_priority_handle = match set_audio_thread_priority() {
Ok(hndl) => Some(hndl),
Err(e) => {
warn!("Failed to set vcpu thread to real time priority: {}", e);
None
}
};
}
let vcpu_run_handle = vcpu
.take_run_handle(None)
.exit_context(Exit::RunnableVcpu, "failed to set thread id for vcpu")?;
Ok(RunnableVcpuInfo {
vcpu,
thread_priority_handle,
vcpu_run_handle,
})
}
pub fn run<V>(
&self,
vcpu: Option<V>,
vcpu_init: VcpuInitX86_64,
vcpus: Arc<Mutex<Vec<Box<dyn VcpuArch>>>>,
vm: impl VmArch + 'static,
mut irq_chip: Box<dyn IrqChipArch + 'static>,
vcpu_count: usize,
run_rt: bool,
vcpu_affinity: Option<CpuSet>,
delay_rt: bool,
no_smt: bool,
start_barrier: Arc<Barrier>,
vcpu_create_barrier: Arc<Barrier>,
has_bios: bool,
mut io_bus: devices::Bus,
mut mmio_bus: devices::Bus,
vm_evt_wrtube: SendTube,
requires_pvclock_ctrl: bool,
run_mode_arc: Arc<VcpuRunMode>,
#[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>,
host_cpu_topology: bool,
tsc_offset: Option<u64>,
force_calibrated_tsc_leaf: bool,
) -> Result<JoinHandle<Result<()>>>
where
V: VcpuArch + 'static,
{
let context = self.clone();
thread::Builder::new()
.name(format!("crosvm_vcpu{}", self.cpu_id))
.spawn(move || {
// Having a closure returning ExitState guarentees that we
// send a VmEventType on all code paths after the closure
// returns.
let vcpu_fn = || -> Result<ExitState> {
let runnable_vcpu = Self::runnable_vcpu(
context.cpu_id,
vcpu,
vcpu_init,
&vm,
irq_chip.as_mut(),
vcpu_count,
run_rt && !delay_rt,
vcpu_affinity,
no_smt,
has_bios,
host_cpu_topology,
force_calibrated_tsc_leaf,
);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let cpu_config = CpuConfigX86_64::new(
force_calibrated_tsc_leaf,
host_cpu_topology,
false, /* enable_hwp */
false, /* enable_pnp_data */
no_smt,
false, /* itmt */
None, /* hybrid_type */
);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let cpuid_context = CpuIdContext::new(
context.cpu_id,
vcpu_count,
Some(irq_chip.as_ref()),
cpu_config,
vm.get_hypervisor()
.check_capability(HypervisorCap::CalibratedTscLeafRequired),
__cpuid_count,
__cpuid,
);
// The vcpu_create_barrier is supplied from the main thread in order for it to
// wait until this thread is done creating its vcpu.
vcpu_create_barrier.wait();
// Wait for this barrier before continuing forward.
start_barrier.wait();
let RunnableVcpuInfo {
vcpu,
thread_priority_handle: _thread_priority_handle,
vcpu_run_handle,
} = runnable_vcpu?;
if let Some(offset) = tsc_offset {
vcpu.set_tsc_offset(offset).unwrap_or_else(|e| {
error!(
"Failed to set tsc_offset of {} on vcpu {}: {}",
offset, context.cpu_id, e
)
});
}
// Clone vcpu so it can be used by the main thread to force a vcpu run to exit
vcpus
.lock()
.push(Box::new(vcpu.try_clone().expect("Could not clone vcpu!")));
mmio_bus.set_access_id(context.cpu_id);
io_bus.set_access_id(context.cpu_id);
vcpu_loop(
&context,
vcpu,
vm,
vcpu_run_handle,
irq_chip,
io_bus,
mmio_bus,
requires_pvclock_ctrl,
run_mode_arc,
#[cfg(feature = "stats")]
stats,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
cpuid_context,
)
};
let final_event_data = match vcpu_fn().unwrap_or_else(|e| {
error!(
"vcpu {} run loop exited with error: {:#}",
context.cpu_id, e
);
ExitState::Stop
}) {
ExitState::Stop => VmEventType::Exit,
_ => unreachable!(),
};
vm_evt_wrtube
.send::<VmEventType>(&final_event_data)
.unwrap_or_else(|e| {
error!(
"failed to send final event {:?} on vcpu {}: {}",
final_event_data, context.cpu_id, e
)
});
Ok(())
})
.exit_context(Exit::SpawnVcpu, "failed to spawn VCPU thread")
}
}
#[derive(Clone, Debug)]
struct VcpuExitData {
// Represented by duration since baseline start_instant
exit_time: Duration,
exit_result: BaseResult<VcpuExit>,
}
impl Display for VcpuExitData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "exit result: {:?}", self.exit_result)
}
}
struct VcpuStallMonitor {
vcpu_run_threads: Vec<VcpuRunThread>,
run_mode: Arc<VcpuRunMode>,
}
impl VcpuStallMonitor {
const HOST_STALL_TIMEOUT: Duration = Duration::from_secs(2);
const VCPU_CHECKUP_INTERVAL: Duration = Duration::from_secs(1);
const STALL_REPORTING_LIMITER: Duration = Duration::from_secs(10);
pub fn init(run_mode: Arc<VcpuRunMode>) -> VcpuStallMonitor {
VcpuStallMonitor {
vcpu_run_threads: vec![],
run_mode,
}
}
pub fn add_vcpu_thread(&mut self, thread: VcpuRunThread) {
self.vcpu_run_threads.push(thread);
}
pub fn run(self, exit_event: &Event) -> Result<JoinHandle<Result<()>>> {
let cloned_exit_event = exit_event
.try_clone()
.exit_context(Exit::CloneEvent, "failed to clone event")?;
thread::Builder::new()
.name("crosvm_vcpu_stall_monitor".to_string())
.spawn(move || {
let ex = Executor::new()?;
let mut timer = TimerAsync::new(Timer::new()?, &ex)?;
let mut reset_timer = true;
let exit_evt_async = EventAsync::new(cloned_exit_event, &ex)?;
let exit_future = exit_evt_async.next_val();
pin_mut!(exit_future);
'main: loop {
if reset_timer {
timer.reset(
Self::VCPU_CHECKUP_INTERVAL,
Some(Self::VCPU_CHECKUP_INTERVAL),
)?;
reset_timer = false;
}
let timer_future = timer.wait();
pin_mut!(timer_future);
match ex.run_until(select2(timer_future, exit_future)) {
Ok((timer_result, exit_result)) => {
match exit_result {
SelectResult::Finished(_) => {
info!("vcpu monitor got exit event");
break 'main;
}
SelectResult::Pending(future) => exit_future = future,
}
match timer_result {
SelectResult::Finished(Err(e)) => {
error!(
"vcpu monitor aborting due to error awaiting future: {}",
e
);
break 'main;
}
SelectResult::Finished(_) => self.report_any_stalls(),
_ => (),
}
}
Err(e) => {
error!("vcpu monitor failed to wait on future set: {:?}", e);
break 'main;
}
}
// Always ensure the vcpus aren't suspended before continuing to montior.
let mut run_mode_lock = self.run_mode.mtx.lock();
loop {
match *run_mode_lock {
VmRunMode::Running => break,
VmRunMode::Suspending | VmRunMode::Breakpoint => {
info!("vcpu monitor pausing until end of suspension");
run_mode_lock = self.run_mode.cvar.wait(run_mode_lock);
reset_timer = true;
}
VmRunMode::Exiting => {
info!("vcpu monitor detected vm exit");
break 'main;
}
}
}
}
Ok(())
})
.exit_context(
Exit::SpawnVcpuMonitor,
"failed to spawn VCPU stall monitor thread",
)
}
fn report_any_stalls(&self) {
// TODO(b/208267651): Add and fire Clearcut events for stalls (and add tests)
// TODO(b/208267651): Also test guest stalls (vcpu.run() goes too long without exiting)
let now = Instant::now();
for vcpu_thread in self.vcpu_run_threads.iter() {
let monitoring_metadata = vcpu_thread.monitoring_metadata.as_ref().unwrap();
if let Some(ref exit_snapshot) = monitoring_metadata.last_exit_snapshot.lock().clone() {
let last_run =
Duration::from_millis(monitoring_metadata.last_run_time.load(Ordering::SeqCst));
if last_run < exit_snapshot.exit_time {
// VCPU is between runs
let time_since_exit = now.saturating_duration_since(
monitoring_metadata.start_instant + exit_snapshot.exit_time,
);
if time_since_exit > Self::HOST_STALL_TIMEOUT {
self.report_stall(vcpu_thread.cpu_id, exit_snapshot, time_since_exit);
}
}
};
}
}
fn report_stall(&self, cpu_id: usize, exit_data: &VcpuExitData, stall_time: Duration) {
if stall_time > Self::STALL_REPORTING_LIMITER {
return;
}
// Double check the Vm is running. We don't care about stalls during suspension/exit
if *self.run_mode.mtx.lock() != VmRunMode::Running {
let duration_string = format!("{:.1}sec", stall_time.as_secs_f32());
error!(
"Host stall for {} on VCPU {} exit while handling: {}",
duration_string, cpu_id, exit_data,
);
}
}
}
fn setup_vcpu_signal_handler() -> Result<()> {
Ok(())
}
pub fn run_all_vcpus<V: VmArch + 'static, Vcpu: VcpuArch + 'static>(
vcpus: Vec<Option<Vcpu>>,
vcpu_boxes: Arc<Mutex<Vec<Box<dyn VcpuArch>>>>,
guest_os: &RunnableLinuxVm<V, Vcpu>,
exit_evt: &Event,
vm_evt_wrtube: &SendTube,
pvclock_host_tube: &Option<Tube>,
#[cfg(feature = "stats")] stats: &Option<Arc<Mutex<StatisticsCollector>>>,
host_cpu_topology: bool,
run_mode_arc: Arc<VcpuRunMode>,
tsc_sync_mitigations: TscSyncMitigations,
force_calibrated_tsc_leaf: bool,
) -> Result<Vec<JoinHandle<Result<()>>>> {
let mut vcpu_threads = Vec::with_capacity(guest_os.vcpu_count + 1);
let start_barrier = Arc::new(Barrier::new(guest_os.vcpu_count + 1));
let enable_vcpu_monitoring = anti_tamper::enable_vcpu_monitoring();
setup_vcpu_signal_handler()?;
let mut stall_monitor =
enable_vcpu_monitoring.then(|| VcpuStallMonitor::init(run_mode_arc.clone()));
for (cpu_id, vcpu) in vcpus.into_iter().enumerate() {
let vcpu_affinity = match guest_os.vcpu_affinity.clone() {
Some(VcpuAffinity::Global(v)) => Some(v),
Some(VcpuAffinity::PerVcpu(mut m)) => Some(m.remove(&cpu_id).unwrap_or_default()),
None => None,
};
// TSC sync mitigations may set vcpu affinity and set a TSC offset
let (vcpu_affinity, tsc_offset): (Option<CpuSet>, Option<u64>) =
if let Some(mitigation_affinity) = tsc_sync_mitigations.get_vcpu_affinity(cpu_id) {
if vcpu_affinity.is_none() {
(
Some(CpuSet::new(mitigation_affinity)),
tsc_sync_mitigations.get_vcpu_tsc_offset(cpu_id),
)
} else {
error!(
"Core affinity {:?} specified via commandline conflicts and overrides \
affinity needed for TSC sync mitigation: {:?}.",
vcpu_affinity, mitigation_affinity
);
(vcpu_affinity, None)
}
} else {
(vcpu_affinity, None)
};
let vcpu_init = &guest_os.vcpu_init[cpu_id];
// The vcpu_create_barrier allows the main thread to delay the spawning of additional
// vcpu threads until a single vcpu thread spawned has finished creating it's vcpu.
// We currently use this to allow creation of 1 vcpu at a time for all hypervisors.
// There are issues with multiple hypervisors with this approach:
// - Windows 11 has a regression which causes a BSOD with creation of multiple vcpu
// in parallel. http://b/229635845 for more details.
// - GHAXM/HAXM cannot create vcpu0 in parallel with other Vcpus.
let vcpu_create_barrier = Arc::new(Barrier::new(2));
let vcpu_run_thread = VcpuRunThread::new(cpu_id, enable_vcpu_monitoring);
let join_handle = vcpu_run_thread.run(
vcpu,
vcpu_init.clone(),
vcpu_boxes.clone(),
guest_os
.vm
.try_clone()
.exit_context(Exit::CloneEvent, "failed to clone vm")?,
guest_os
.irq_chip
.try_box_clone()
.exit_context(Exit::CloneEvent, "failed to clone event")?,
guest_os.vcpu_count,
guest_os.rt_cpus.contains(&cpu_id),
vcpu_affinity,
guest_os.delay_rt,
guest_os.no_smt,
start_barrier.clone(),
vcpu_create_barrier.clone(),
guest_os.has_bios,
(*guest_os.io_bus).clone(),
(*guest_os.mmio_bus).clone(),
vm_evt_wrtube
.try_clone()
.exit_context(Exit::CloneTube, "failed to clone tube")?,
pvclock_host_tube.is_none(),
run_mode_arc.clone(),
#[cfg(feature = "stats")]
stats.clone(),
host_cpu_topology,
tsc_offset,
force_calibrated_tsc_leaf,
)?;
if let Some(ref mut monitor) = stall_monitor {
monitor.add_vcpu_thread(vcpu_run_thread);
}
// Wait until the vcpu is created before we start a new vcpu thread
vcpu_create_barrier.wait();
vcpu_threads.push(join_handle);
}
if let Some(monitor) = stall_monitor {
vcpu_threads.push(monitor.run(exit_evt)?);
}
// Now wait on the start barrier to start all threads at the same time.
start_barrier.wait();
Ok(vcpu_threads)
}
fn vcpu_loop<V>(
context: &VcpuRunThread,
mut vcpu: V,
vm: impl VmArch + 'static,
vcpu_run_handle: VcpuRunHandle,
irq_chip: Box<dyn IrqChipArch + 'static>,
io_bus: Bus,
mmio_bus: Bus,
requires_pvclock_ctrl: bool,
run_mode_arc: Arc<VcpuRunMode>,
#[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] cpuid_context: CpuIdContext,
) -> Result<ExitState>
where
V: VcpuArch + 'static,
{
#[cfg(feature = "stats")]
let mut exit_stats = VmExitStatistics::new();
#[cfg(feature = "stats")]
{
mmio_bus.stats.lock().set_enabled(stats.is_some());
io_bus.stats.lock().set_enabled(stats.is_some());
exit_stats.set_enabled(stats.is_some());
}
let mut save_tsc_offset = true;
loop {
let _trace_event = trace_event!(crosvm, "vcpu loop");
let mut check_vm_shutdown = false;
match irq_chip.wait_until_runnable(&vcpu).with_exit_context(
Exit::WaitUntilRunnable,
|| {
format!(
"error waiting for vcpu {} to become runnable",
context.cpu_id
)
},
)? {
VcpuRunState::Runnable => {}
VcpuRunState::Interrupted => check_vm_shutdown = true,
}
if !check_vm_shutdown {
let exit = {
let _trace_event = trace_event!(crosvm, "vcpu::run");
if let Some(ref monitoring_metadata) = context.monitoring_metadata {
monitoring_metadata.last_run_time.store(
// Safe conversion because millis will always be < u32::MAX
monitoring_metadata
.start_instant
.elapsed()
.as_millis()
.try_into()
.unwrap(),
Ordering::SeqCst,
);
}
vcpu.run(&vcpu_run_handle)
};
if let Some(ref monitoring_metadata) = context.monitoring_metadata {
*monitoring_metadata.last_exit_snapshot.lock() = Some(VcpuExitData {
exit_time: monitoring_metadata.start_instant.elapsed(),
exit_result: exit,
});
}
// save the tsc offset if we need to
if save_tsc_offset {
if let Ok(offset) = vcpu.get_tsc_offset() {
save_vcpu_tsc_offset(offset, context.cpu_id);
} else {
error!("Unable to determine TSC offset");
}
save_tsc_offset = false;
}
#[cfg(feature = "stats")]
let start = exit_stats.start_stat();
match exit {
Ok(VcpuExit::Io) => {
let _trace_event = trace_event!(crosvm, "VcpuExit::Io");
vcpu.handle_io(&mut |IoParams { address, mut size, operation}| {
match operation {
IoOperation::Read => {
let mut data = [0u8; 8];
if size > data.len() {
error!("unsupported IoIn size of {} bytes", size);
size = data.len();
}
io_bus.read(address, &mut data[..size]);
Some(data)
}
IoOperation::Write { data } => {
if size > data.len() {
error!("unsupported IoOut size of {} bytes", size);
size = data.len()
}
vm.handle_io_events(IoEventAddress::Pio(address), &data[..size])
.unwrap_or_else(|e| error!(
"failed to handle ioevent for pio write to {} on vcpu {}: {}",
address, context.cpu_id, e
));
io_bus.write(address, &data[..size]);
None
}
}
}).unwrap_or_else(|e| error!("failed to handle io: {}", e));
}
Ok(VcpuExit::Mmio) => {
let _trace_event = trace_event!(crosvm, "VcpuExit::Mmio");
vcpu.handle_mmio(&mut |IoParams { address, mut size, operation }| {
match operation {
IoOperation::Read => {
let mut data = [0u8; 8];
if size > data.len() {
error!("unsupported MmioRead size of {} bytes", size);
size = data.len();
}
{
let data = &mut data[..size];
if !mmio_bus.read(address, data) {
info!(
"mmio read failed: {:x}; trying memory read..",
address
);
vm.get_memory()
.read_exact_at_addr(
data,
vm_memory::GuestAddress(address),
)
.unwrap_or_else(|e| {
error!(
"guest memory read failed at {:x}: {}",
address, e
)
});
}
}
Some(data)
}
IoOperation::Write { data } => {
if size > data.len() {
error!("unsupported MmioWrite size of {} bytes", size);
size = data.len()
}
let data = &data[..size];
vm.handle_io_events(IoEventAddress::Mmio(address), data)
.unwrap_or_else(|e| error!(
"failed to handle ioevent for mmio write to {} on vcpu {}: {}",
address, context.cpu_id, e
));
if !mmio_bus.write(address, data) {
info!(
"mmio write failed: {:x}; trying memory write..",
address
);
vm.get_memory()
.write_all_at_addr(data, vm_memory::GuestAddress(address))
.unwrap_or_else(|e| error!(
"guest memory write failed at {:x}: {}",
address, e
));
}
None
}
}
}).unwrap_or_else(|e| error!("failed to handle mmio: {}", e));
}
Ok(VcpuExit::IoapicEoi { vector }) => {
irq_chip.broadcast_eoi(vector).unwrap_or_else(|e| {
error!(
"failed to broadcast eoi {} on vcpu {}: {}",
vector, context.cpu_id, e
)
});
}
Ok(VcpuExit::IrqWindowOpen) => {}
Ok(VcpuExit::Hlt) => irq_chip.halted(context.cpu_id),
// VcpuExit::Shutdown is always an error on Windows. HAXM exits with
// Shutdown only for triple faults and other vcpu panics. WHPX never exits
// with Shutdown. Normal reboots and shutdowns, like window close, use
// the vm event tube and VmRunMode::Exiting instead of VcpuExit::Shutdown.
Ok(VcpuExit::Shutdown) => bail_exit_code!(Exit::VcpuShutdown, "vcpu shutdown"),
Ok(VcpuExit::FailEntry {
hardware_entry_failure_reason,
}) => bail_exit_code!(
Exit::VcpuFailEntry,
"vcpu hw run failure: {:#x}",
hardware_entry_failure_reason,
),
Ok(VcpuExit::SystemEventShutdown) => {
bail_exit_code!(Exit::VcpuSystemEvent, "vcpu SystemEventShutdown")
}
Ok(VcpuExit::SystemEventReset) => {
bail_exit_code!(Exit::VcpuSystemEvent, "vcpu SystemEventReset")
}
Ok(VcpuExit::SystemEventCrash) => {
bail_exit_code!(Exit::VcpuSystemEvent, "vcpu SystemEventCrash")
}
// When we're shutting down (e.g., emulator window gets closed), GVM vmexits
// with KVM_EXIT_INTR, which vcpu.run maps to VcpuExit::Intr. But KVM_EXIT_INTR
// can happen during normal operation too, when GVM's timer finds requests
// pending from the host. So we set check_vm_shutdown, then below check the
// VmRunMode state to see if we should exit the run loop.
Ok(VcpuExit::Intr) => check_vm_shutdown = true,
Ok(VcpuExit::Canceled) => check_vm_shutdown = true,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Ok(VcpuExit::Cpuid { mut entry }) => {
let _trace_event = trace_event!(crosvm, "VcpuExit::Cpuid");
// adjust the results based on crosvm logic
adjust_cpuid(&mut entry, &cpuid_context);
// let the vcpu finish handling the exit
vcpu.handle_cpuid(&entry).unwrap_or_else(|e| {
error!(
"failed to handle setting cpuid results on cpu {}: {}",
context.cpu_id, e
)
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Ok(VcpuExit::MsrAccess) => {} // MsrAccess handled by hypervisor impl
Ok(r) => {
error!("unexpected vcpu.run return value: {:?}", r);
check_vm_shutdown = true;
}
Err(e) => match e.errno() {
ERROR_RETRY_I32 => {}
_ => {
run_mode_arc.set_and_notify(VmRunMode::Exiting);
Err(e).exit_context(Exit::VcpuRunError, "vcpu run error")?;
}
},
}
#[cfg(feature = "stats")]
exit_stats.end_stat(&exit, start);
}
if check_vm_shutdown {
let mut run_mode_lock = run_mode_arc.mtx.lock();
loop {
match *run_mode_lock {
VmRunMode::Running => break,
VmRunMode::Suspending => {
// On KVM implementations that use a paravirtualized clock (e.g.
// x86), a flag must be set to indicate to the guest kernel that
// a VCPU was suspended. The guest kernel will use this flag to
// prevent the soft lockup detection from triggering when this
// VCPU resumes, which could happen days later in realtime.
if requires_pvclock_ctrl {
vcpu.pvclock_ctrl().unwrap_or_else(|e| error!(
"failed to signal to hypervisor that vcpu {} is being suspended: {}",
context.cpu_id, e
));
}
}
VmRunMode::Breakpoint => {}
VmRunMode::Exiting => {
#[cfg(feature = "stats")]
if let Some(stats) = stats {
let mut collector = stats.lock();
collector.pio_bus_stats.push(io_bus.stats);
collector.mmio_bus_stats.push(mmio_bus.stats);
collector.vm_exit_stats.push(exit_stats);
}
return Ok(ExitState::Stop);
}
}
// Give ownership of our exclusive lock to the condition variable that
// will block. When the condition variable is notified, `wait` will
// unblock and return a new exclusive lock.
run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock);
}
}
irq_chip.inject_interrupts(&vcpu).unwrap_or_else(|e| {
error!(
"failed to inject interrupts for vcpu {}: {}",
context.cpu_id, e
)
});
}
}
#[cfg(test)]
mod tests {
use super::*;
struct SetupData {
pub monitor: VcpuStallMonitor,
pub exit_evt: Event,
}
fn set_up_stall_monitor(vcpu_count: usize) -> Result<SetupData> {
let run_mode = Arc::new(VcpuRunMode::default());
let mut monitor = VcpuStallMonitor::init(run_mode);
for id in 0..vcpu_count {
let new_vcpu = VcpuRunThread::new(id, true /* enable_vcpu_monitoring */);
monitor.add_vcpu_thread(new_vcpu);
}
Ok(SetupData {
monitor,
exit_evt: Event::new().expect("Failed to create event"),
})
}
#[test]
fn stall_monitor_closes_on_exit_evt() -> Result<()> {
let SetupData { monitor, exit_evt } = set_up_stall_monitor(1)?;
exit_evt.signal()?;
let _ = monitor
.run(&exit_evt)?
.join()
.unwrap_or_else(|e| panic!("Thread join failed: {:?}", e));
Ok(())
}
}