| // Copyright 2022 The ChromiumOS Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| use std::arch::x86_64::__cpuid; |
| use std::arch::x86_64::__cpuid_count; |
| use std::convert::TryInto; |
| use std::fmt; |
| use std::fmt::Display; |
| use std::sync::atomic::AtomicU64; |
| use std::sync::atomic::Ordering; |
| use std::sync::Arc; |
| use std::sync::Barrier; |
| use std::thread; |
| use std::thread::JoinHandle; |
| use std::time::Duration; |
| use std::time::Instant; |
| |
| #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| use aarch64::AArch64 as Arch; |
| use anyhow::anyhow; |
| use anyhow::Result; |
| use arch::CpuConfigArch; |
| use arch::CpuSet; |
| use arch::IrqChipArch; |
| use arch::LinuxArch; |
| use arch::RunnableLinuxVm; |
| use arch::VcpuAffinity; |
| use arch::VcpuArch; |
| use arch::VmArch; |
| use base::error; |
| use base::info; |
| use base::set_audio_thread_priority; |
| use base::set_cpu_affinity; |
| use base::warn; |
| use base::Event; |
| use base::Result as BaseResult; |
| use base::SafeMultimediaHandle; |
| use base::SendTube; |
| use base::Timer; |
| use base::Tube; |
| use base::VmEventType; |
| use cros_async::select2; |
| use cros_async::EventAsync; |
| use cros_async::Executor; |
| use cros_async::SelectResult; |
| use cros_async::TimerAsync; |
| use cros_tracing::trace_event; |
| use crosvm_cli::bail_exit_code; |
| use crosvm_cli::sys::windows::exit::Exit; |
| use crosvm_cli::sys::windows::exit::ExitContext; |
| use crosvm_cli::sys::windows::exit::ExitContextAnyhow; |
| use devices::tsc::TscSyncMitigations; |
| use devices::Bus; |
| use devices::VcpuRunState; |
| use futures::pin_mut; |
| #[cfg(feature = "whpx")] |
| use hypervisor::whpx::WhpxVcpu; |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| use hypervisor::CpuConfigX86_64; |
| use hypervisor::HypervisorCap; |
| use hypervisor::IoEventAddress; |
| use hypervisor::IoOperation; |
| use hypervisor::IoParams; |
| use hypervisor::VcpuExit; |
| use hypervisor::VcpuInitX86_64; |
| use hypervisor::VcpuRunHandle; |
| use sync::Condvar; |
| use sync::Mutex; |
| use vm_control::VmRunMode; |
| use winapi::shared::winerror::ERROR_RETRY; |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| use x86_64::cpuid::adjust_cpuid; |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| use x86_64::cpuid::CpuIdContext; |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| use x86_64::X8664arch as Arch; |
| |
| #[cfg(feature = "stats")] |
| use crate::crosvm::sys::windows::stats::StatisticsCollector; |
| #[cfg(feature = "stats")] |
| use crate::crosvm::sys::windows::stats::VmExitStatistics; |
| use crate::sys::windows::save_vcpu_tsc_offset; |
| use crate::sys::windows::ExitState; |
| |
| const ERROR_RETRY_I32: i32 = ERROR_RETRY as i32; |
| |
| #[derive(Default)] |
| pub struct VcpuRunMode { |
| mtx: Mutex<VmRunMode>, |
| cvar: Condvar, |
| } |
| |
| impl VcpuRunMode { |
| pub fn set_and_notify(&self, new_mode: VmRunMode) { |
| *self.mtx.lock() = new_mode; |
| self.cvar.notify_all(); |
| } |
| } |
| |
| struct RunnableVcpuInfo<V> { |
| vcpu: V, |
| thread_priority_handle: Option<SafeMultimediaHandle>, |
| vcpu_run_handle: VcpuRunHandle, |
| } |
| |
| #[derive(Clone, Debug)] |
| struct VcpuMonitoringMetadata { |
| pub start_instant: Instant, |
| // Milliseconds since the baseline start_instant |
| pub last_run_time: Arc<AtomicU64>, |
| pub last_exit_snapshot: Arc<Mutex<Option<VcpuExitData>>>, |
| } |
| |
| #[derive(Clone, Debug)] |
| struct VcpuRunThread { |
| pub cpu_id: usize, |
| pub monitoring_metadata: Option<VcpuMonitoringMetadata>, |
| } |
| |
| impl VcpuRunThread { |
| pub fn new(cpu_id: usize, enable_vcpu_monitoring: bool) -> VcpuRunThread { |
| VcpuRunThread { |
| cpu_id, |
| monitoring_metadata: enable_vcpu_monitoring.then(|| VcpuMonitoringMetadata { |
| start_instant: Instant::now(), |
| last_run_time: Arc::new(AtomicU64::new(0)), |
| last_exit_snapshot: Arc::new(Mutex::new(Option::None)), |
| }), |
| } |
| } |
| |
| /// Perform WHPX-specific vcpu configurations |
| #[cfg(feature = "whpx")] |
| fn whpx_configure_vcpu(vcpu: &mut dyn VcpuArch, irq_chip: &mut dyn IrqChipArch) { |
| // only apply to actual WhpxVcpu instances |
| if let Some(whpx_vcpu) = vcpu.downcast_mut::<WhpxVcpu>() { |
| // WhpxVcpu instances need to know the TSC and Lapic frequencies to handle Hyper-V MSR reads |
| // and writes. |
| let tsc_freq = devices::tsc::tsc_frequency() |
| .map_err(|e| { |
| error!( |
| "Could not determine TSC frequency, WHPX vcpu will not be configured with \ |
| a TSC Frequency: {e}" |
| ); |
| e |
| }) |
| .ok(); |
| whpx_vcpu.set_frequencies(tsc_freq, irq_chip.lapic_frequency()); |
| } |
| } |
| |
| // Sets up a vcpu and converts it into a runnable vcpu. |
| fn runnable_vcpu<V>( |
| cpu_id: usize, |
| vcpu: Option<V>, |
| vcpu_init: VcpuInitX86_64, |
| vm: &impl VmArch, |
| irq_chip: &mut dyn IrqChipArch, |
| vcpu_count: usize, |
| run_rt: bool, |
| vcpu_affinity: Option<CpuSet>, |
| no_smt: bool, |
| has_bios: bool, |
| host_cpu_topology: bool, |
| force_calibrated_tsc_leaf: bool, |
| ) -> Result<RunnableVcpuInfo<V>> |
| where |
| V: VcpuArch, |
| { |
| let mut vcpu = match vcpu { |
| Some(v) => v, |
| None => { |
| // If vcpu is None, it means this arch/hypervisor requires create_vcpu to be called from |
| // the vcpu thread. |
| match vm |
| .create_vcpu(cpu_id) |
| .exit_context(Exit::CreateVcpu, "failed to create vcpu")? |
| .downcast::<V>() |
| { |
| Ok(v) => *v, |
| Err(_) => panic!("VM created wrong type of VCPU"), |
| } |
| } |
| }; |
| |
| irq_chip |
| .add_vcpu(cpu_id, &vcpu) |
| .exit_context(Exit::AddIrqChipVcpu, "failed to add vcpu to irq chip")?; |
| |
| if let Some(affinity) = vcpu_affinity { |
| if let Err(e) = set_cpu_affinity(affinity) { |
| error!("Failed to set CPU affinity: {}", e); |
| } |
| } |
| |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| let cpu_config = Some(CpuConfigX86_64::new( |
| force_calibrated_tsc_leaf, |
| host_cpu_topology, |
| false, /* enable_hwp */ |
| false, /* enable_pnp_data */ |
| no_smt, |
| false, /* itmt */ |
| None, /* hybrid_type */ |
| )); |
| |
| #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| let cpu_config = None; |
| |
| Arch::configure_vcpu( |
| vm, |
| vm.get_hypervisor(), |
| irq_chip, |
| &mut vcpu, |
| vcpu_init, |
| cpu_id, |
| vcpu_count, |
| has_bios, |
| cpu_config, |
| ) |
| .exit_context(Exit::ConfigureVcpu, "failed to configure vcpu")?; |
| |
| #[cfg(feature = "whpx")] |
| Self::whpx_configure_vcpu(&mut vcpu, irq_chip); |
| |
| let mut thread_priority_handle = None; |
| if run_rt { |
| // Until we are multi process on Windows, we can't use the normal thread priority APIs; |
| // instead, we use a trick from the audio device which is able to set a thread RT even |
| // though the process itself is not RT. |
| thread_priority_handle = match set_audio_thread_priority() { |
| Ok(hndl) => Some(hndl), |
| Err(e) => { |
| warn!("Failed to set vcpu thread to real time priority: {}", e); |
| None |
| } |
| }; |
| } |
| |
| let vcpu_run_handle = vcpu |
| .take_run_handle(None) |
| .exit_context(Exit::RunnableVcpu, "failed to set thread id for vcpu")?; |
| |
| Ok(RunnableVcpuInfo { |
| vcpu, |
| thread_priority_handle, |
| vcpu_run_handle, |
| }) |
| } |
| |
| pub fn run<V>( |
| &self, |
| vcpu: Option<V>, |
| vcpu_init: VcpuInitX86_64, |
| vcpus: Arc<Mutex<Vec<Box<dyn VcpuArch>>>>, |
| vm: impl VmArch + 'static, |
| mut irq_chip: Box<dyn IrqChipArch + 'static>, |
| vcpu_count: usize, |
| run_rt: bool, |
| vcpu_affinity: Option<CpuSet>, |
| delay_rt: bool, |
| no_smt: bool, |
| start_barrier: Arc<Barrier>, |
| vcpu_create_barrier: Arc<Barrier>, |
| has_bios: bool, |
| mut io_bus: devices::Bus, |
| mut mmio_bus: devices::Bus, |
| vm_evt_wrtube: SendTube, |
| requires_pvclock_ctrl: bool, |
| run_mode_arc: Arc<VcpuRunMode>, |
| #[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>, |
| host_cpu_topology: bool, |
| tsc_offset: Option<u64>, |
| force_calibrated_tsc_leaf: bool, |
| ) -> Result<JoinHandle<Result<()>>> |
| where |
| V: VcpuArch + 'static, |
| { |
| let context = self.clone(); |
| thread::Builder::new() |
| .name(format!("crosvm_vcpu{}", self.cpu_id)) |
| .spawn(move || { |
| // Having a closure returning ExitState guarentees that we |
| // send a VmEventType on all code paths after the closure |
| // returns. |
| let vcpu_fn = || -> Result<ExitState> { |
| let runnable_vcpu = Self::runnable_vcpu( |
| context.cpu_id, |
| vcpu, |
| vcpu_init, |
| &vm, |
| irq_chip.as_mut(), |
| vcpu_count, |
| run_rt && !delay_rt, |
| vcpu_affinity, |
| no_smt, |
| has_bios, |
| host_cpu_topology, |
| force_calibrated_tsc_leaf, |
| ); |
| |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| let cpu_config = CpuConfigX86_64::new( |
| force_calibrated_tsc_leaf, |
| host_cpu_topology, |
| false, /* enable_hwp */ |
| false, /* enable_pnp_data */ |
| no_smt, |
| false, /* itmt */ |
| None, /* hybrid_type */ |
| ); |
| |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| let cpuid_context = CpuIdContext::new( |
| context.cpu_id, |
| vcpu_count, |
| Some(irq_chip.as_ref()), |
| cpu_config, |
| vm.get_hypervisor() |
| .check_capability(HypervisorCap::CalibratedTscLeafRequired), |
| __cpuid_count, |
| __cpuid, |
| ); |
| |
| // The vcpu_create_barrier is supplied from the main thread in order for it to |
| // wait until this thread is done creating its vcpu. |
| vcpu_create_barrier.wait(); |
| |
| // Wait for this barrier before continuing forward. |
| start_barrier.wait(); |
| |
| let RunnableVcpuInfo { |
| vcpu, |
| thread_priority_handle: _thread_priority_handle, |
| vcpu_run_handle, |
| } = runnable_vcpu?; |
| |
| if let Some(offset) = tsc_offset { |
| vcpu.set_tsc_offset(offset).unwrap_or_else(|e| { |
| error!( |
| "Failed to set tsc_offset of {} on vcpu {}: {}", |
| offset, context.cpu_id, e |
| ) |
| }); |
| } |
| |
| // Clone vcpu so it can be used by the main thread to force a vcpu run to exit |
| vcpus |
| .lock() |
| .push(Box::new(vcpu.try_clone().expect("Could not clone vcpu!"))); |
| |
| mmio_bus.set_access_id(context.cpu_id); |
| io_bus.set_access_id(context.cpu_id); |
| |
| vcpu_loop( |
| &context, |
| vcpu, |
| vm, |
| vcpu_run_handle, |
| irq_chip, |
| io_bus, |
| mmio_bus, |
| requires_pvclock_ctrl, |
| run_mode_arc, |
| #[cfg(feature = "stats")] |
| stats, |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| cpuid_context, |
| ) |
| }; |
| |
| let final_event_data = match vcpu_fn().unwrap_or_else(|e| { |
| error!( |
| "vcpu {} run loop exited with error: {:#}", |
| context.cpu_id, e |
| ); |
| ExitState::Stop |
| }) { |
| ExitState::Stop => VmEventType::Exit, |
| _ => unreachable!(), |
| }; |
| vm_evt_wrtube |
| .send::<VmEventType>(&final_event_data) |
| .unwrap_or_else(|e| { |
| error!( |
| "failed to send final event {:?} on vcpu {}: {}", |
| final_event_data, context.cpu_id, e |
| ) |
| }); |
| Ok(()) |
| }) |
| .exit_context(Exit::SpawnVcpu, "failed to spawn VCPU thread") |
| } |
| } |
| |
| #[derive(Clone, Debug)] |
| struct VcpuExitData { |
| // Represented by duration since baseline start_instant |
| exit_time: Duration, |
| exit_result: BaseResult<VcpuExit>, |
| } |
| |
| impl Display for VcpuExitData { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| write!(f, "exit result: {:?}", self.exit_result) |
| } |
| } |
| |
| struct VcpuStallMonitor { |
| vcpu_run_threads: Vec<VcpuRunThread>, |
| run_mode: Arc<VcpuRunMode>, |
| } |
| |
| impl VcpuStallMonitor { |
| const HOST_STALL_TIMEOUT: Duration = Duration::from_secs(2); |
| const VCPU_CHECKUP_INTERVAL: Duration = Duration::from_secs(1); |
| const STALL_REPORTING_LIMITER: Duration = Duration::from_secs(10); |
| |
| pub fn init(run_mode: Arc<VcpuRunMode>) -> VcpuStallMonitor { |
| VcpuStallMonitor { |
| vcpu_run_threads: vec![], |
| run_mode, |
| } |
| } |
| |
| pub fn add_vcpu_thread(&mut self, thread: VcpuRunThread) { |
| self.vcpu_run_threads.push(thread); |
| } |
| |
| pub fn run(self, exit_event: &Event) -> Result<JoinHandle<Result<()>>> { |
| let cloned_exit_event = exit_event |
| .try_clone() |
| .exit_context(Exit::CloneEvent, "failed to clone event")?; |
| thread::Builder::new() |
| .name("crosvm_vcpu_stall_monitor".to_string()) |
| .spawn(move || { |
| let ex = Executor::new()?; |
| |
| let mut timer = TimerAsync::new(Timer::new()?, &ex)?; |
| let mut reset_timer = true; |
| |
| let exit_evt_async = EventAsync::new(cloned_exit_event, &ex)?; |
| let exit_future = exit_evt_async.next_val(); |
| pin_mut!(exit_future); |
| 'main: loop { |
| if reset_timer { |
| timer.reset( |
| Self::VCPU_CHECKUP_INTERVAL, |
| Some(Self::VCPU_CHECKUP_INTERVAL), |
| )?; |
| reset_timer = false; |
| } |
| let timer_future = timer.wait(); |
| pin_mut!(timer_future); |
| match ex.run_until(select2(timer_future, exit_future)) { |
| Ok((timer_result, exit_result)) => { |
| match exit_result { |
| SelectResult::Finished(_) => { |
| info!("vcpu monitor got exit event"); |
| break 'main; |
| } |
| SelectResult::Pending(future) => exit_future = future, |
| } |
| |
| match timer_result { |
| SelectResult::Finished(Err(e)) => { |
| error!( |
| "vcpu monitor aborting due to error awaiting future: {}", |
| e |
| ); |
| break 'main; |
| } |
| SelectResult::Finished(_) => self.report_any_stalls(), |
| _ => (), |
| } |
| } |
| Err(e) => { |
| error!("vcpu monitor failed to wait on future set: {:?}", e); |
| break 'main; |
| } |
| } |
| |
| // Always ensure the vcpus aren't suspended before continuing to montior. |
| let mut run_mode_lock = self.run_mode.mtx.lock(); |
| loop { |
| match *run_mode_lock { |
| VmRunMode::Running => break, |
| VmRunMode::Suspending | VmRunMode::Breakpoint => { |
| info!("vcpu monitor pausing until end of suspension"); |
| run_mode_lock = self.run_mode.cvar.wait(run_mode_lock); |
| reset_timer = true; |
| } |
| VmRunMode::Exiting => { |
| info!("vcpu monitor detected vm exit"); |
| break 'main; |
| } |
| } |
| } |
| } |
| |
| Ok(()) |
| }) |
| .exit_context( |
| Exit::SpawnVcpuMonitor, |
| "failed to spawn VCPU stall monitor thread", |
| ) |
| } |
| |
| fn report_any_stalls(&self) { |
| // TODO(b/208267651): Add and fire Clearcut events for stalls (and add tests) |
| // TODO(b/208267651): Also test guest stalls (vcpu.run() goes too long without exiting) |
| let now = Instant::now(); |
| for vcpu_thread in self.vcpu_run_threads.iter() { |
| let monitoring_metadata = vcpu_thread.monitoring_metadata.as_ref().unwrap(); |
| if let Some(ref exit_snapshot) = monitoring_metadata.last_exit_snapshot.lock().clone() { |
| let last_run = |
| Duration::from_millis(monitoring_metadata.last_run_time.load(Ordering::SeqCst)); |
| if last_run < exit_snapshot.exit_time { |
| // VCPU is between runs |
| let time_since_exit = now.saturating_duration_since( |
| monitoring_metadata.start_instant + exit_snapshot.exit_time, |
| ); |
| if time_since_exit > Self::HOST_STALL_TIMEOUT { |
| self.report_stall(vcpu_thread.cpu_id, exit_snapshot, time_since_exit); |
| } |
| } |
| }; |
| } |
| } |
| |
| fn report_stall(&self, cpu_id: usize, exit_data: &VcpuExitData, stall_time: Duration) { |
| if stall_time > Self::STALL_REPORTING_LIMITER { |
| return; |
| } |
| // Double check the Vm is running. We don't care about stalls during suspension/exit |
| if *self.run_mode.mtx.lock() != VmRunMode::Running { |
| let duration_string = format!("{:.1}sec", stall_time.as_secs_f32()); |
| error!( |
| "Host stall for {} on VCPU {} exit while handling: {}", |
| duration_string, cpu_id, exit_data, |
| ); |
| } |
| } |
| } |
| |
| fn setup_vcpu_signal_handler() -> Result<()> { |
| Ok(()) |
| } |
| |
| pub fn run_all_vcpus<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( |
| vcpus: Vec<Option<Vcpu>>, |
| vcpu_boxes: Arc<Mutex<Vec<Box<dyn VcpuArch>>>>, |
| guest_os: &RunnableLinuxVm<V, Vcpu>, |
| exit_evt: &Event, |
| vm_evt_wrtube: &SendTube, |
| pvclock_host_tube: &Option<Tube>, |
| #[cfg(feature = "stats")] stats: &Option<Arc<Mutex<StatisticsCollector>>>, |
| host_cpu_topology: bool, |
| run_mode_arc: Arc<VcpuRunMode>, |
| tsc_sync_mitigations: TscSyncMitigations, |
| force_calibrated_tsc_leaf: bool, |
| ) -> Result<Vec<JoinHandle<Result<()>>>> { |
| let mut vcpu_threads = Vec::with_capacity(guest_os.vcpu_count + 1); |
| let start_barrier = Arc::new(Barrier::new(guest_os.vcpu_count + 1)); |
| let enable_vcpu_monitoring = anti_tamper::enable_vcpu_monitoring(); |
| setup_vcpu_signal_handler()?; |
| |
| let mut stall_monitor = |
| enable_vcpu_monitoring.then(|| VcpuStallMonitor::init(run_mode_arc.clone())); |
| for (cpu_id, vcpu) in vcpus.into_iter().enumerate() { |
| let vcpu_affinity = match guest_os.vcpu_affinity.clone() { |
| Some(VcpuAffinity::Global(v)) => Some(v), |
| Some(VcpuAffinity::PerVcpu(mut m)) => Some(m.remove(&cpu_id).unwrap_or_default()), |
| None => None, |
| }; |
| |
| // TSC sync mitigations may set vcpu affinity and set a TSC offset |
| let (vcpu_affinity, tsc_offset): (Option<CpuSet>, Option<u64>) = |
| if let Some(mitigation_affinity) = tsc_sync_mitigations.get_vcpu_affinity(cpu_id) { |
| if vcpu_affinity.is_none() { |
| ( |
| Some(CpuSet::new(mitigation_affinity)), |
| tsc_sync_mitigations.get_vcpu_tsc_offset(cpu_id), |
| ) |
| } else { |
| error!( |
| "Core affinity {:?} specified via commandline conflicts and overrides \ |
| affinity needed for TSC sync mitigation: {:?}.", |
| vcpu_affinity, mitigation_affinity |
| ); |
| (vcpu_affinity, None) |
| } |
| } else { |
| (vcpu_affinity, None) |
| }; |
| |
| let vcpu_init = &guest_os.vcpu_init[cpu_id]; |
| // The vcpu_create_barrier allows the main thread to delay the spawning of additional |
| // vcpu threads until a single vcpu thread spawned has finished creating it's vcpu. |
| // We currently use this to allow creation of 1 vcpu at a time for all hypervisors. |
| // There are issues with multiple hypervisors with this approach: |
| // - Windows 11 has a regression which causes a BSOD with creation of multiple vcpu |
| // in parallel. http://b/229635845 for more details. |
| // - GHAXM/HAXM cannot create vcpu0 in parallel with other Vcpus. |
| let vcpu_create_barrier = Arc::new(Barrier::new(2)); |
| let vcpu_run_thread = VcpuRunThread::new(cpu_id, enable_vcpu_monitoring); |
| let join_handle = vcpu_run_thread.run( |
| vcpu, |
| vcpu_init.clone(), |
| vcpu_boxes.clone(), |
| guest_os |
| .vm |
| .try_clone() |
| .exit_context(Exit::CloneEvent, "failed to clone vm")?, |
| guest_os |
| .irq_chip |
| .try_box_clone() |
| .exit_context(Exit::CloneEvent, "failed to clone event")?, |
| guest_os.vcpu_count, |
| guest_os.rt_cpus.contains(&cpu_id), |
| vcpu_affinity, |
| guest_os.delay_rt, |
| guest_os.no_smt, |
| start_barrier.clone(), |
| vcpu_create_barrier.clone(), |
| guest_os.has_bios, |
| (*guest_os.io_bus).clone(), |
| (*guest_os.mmio_bus).clone(), |
| vm_evt_wrtube |
| .try_clone() |
| .exit_context(Exit::CloneTube, "failed to clone tube")?, |
| pvclock_host_tube.is_none(), |
| run_mode_arc.clone(), |
| #[cfg(feature = "stats")] |
| stats.clone(), |
| host_cpu_topology, |
| tsc_offset, |
| force_calibrated_tsc_leaf, |
| )?; |
| if let Some(ref mut monitor) = stall_monitor { |
| monitor.add_vcpu_thread(vcpu_run_thread); |
| } |
| |
| // Wait until the vcpu is created before we start a new vcpu thread |
| vcpu_create_barrier.wait(); |
| |
| vcpu_threads.push(join_handle); |
| } |
| if let Some(monitor) = stall_monitor { |
| vcpu_threads.push(monitor.run(exit_evt)?); |
| } |
| // Now wait on the start barrier to start all threads at the same time. |
| start_barrier.wait(); |
| Ok(vcpu_threads) |
| } |
| |
| fn vcpu_loop<V>( |
| context: &VcpuRunThread, |
| mut vcpu: V, |
| vm: impl VmArch + 'static, |
| vcpu_run_handle: VcpuRunHandle, |
| irq_chip: Box<dyn IrqChipArch + 'static>, |
| io_bus: Bus, |
| mmio_bus: Bus, |
| requires_pvclock_ctrl: bool, |
| run_mode_arc: Arc<VcpuRunMode>, |
| #[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>, |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] cpuid_context: CpuIdContext, |
| ) -> Result<ExitState> |
| where |
| V: VcpuArch + 'static, |
| { |
| #[cfg(feature = "stats")] |
| let mut exit_stats = VmExitStatistics::new(); |
| |
| #[cfg(feature = "stats")] |
| { |
| mmio_bus.stats.lock().set_enabled(stats.is_some()); |
| io_bus.stats.lock().set_enabled(stats.is_some()); |
| exit_stats.set_enabled(stats.is_some()); |
| } |
| |
| let mut save_tsc_offset = true; |
| |
| loop { |
| let _trace_event = trace_event!(crosvm, "vcpu loop"); |
| let mut check_vm_shutdown = false; |
| |
| match irq_chip.wait_until_runnable(&vcpu).with_exit_context( |
| Exit::WaitUntilRunnable, |
| || { |
| format!( |
| "error waiting for vcpu {} to become runnable", |
| context.cpu_id |
| ) |
| }, |
| )? { |
| VcpuRunState::Runnable => {} |
| VcpuRunState::Interrupted => check_vm_shutdown = true, |
| } |
| |
| if !check_vm_shutdown { |
| let exit = { |
| let _trace_event = trace_event!(crosvm, "vcpu::run"); |
| if let Some(ref monitoring_metadata) = context.monitoring_metadata { |
| monitoring_metadata.last_run_time.store( |
| // Safe conversion because millis will always be < u32::MAX |
| monitoring_metadata |
| .start_instant |
| .elapsed() |
| .as_millis() |
| .try_into() |
| .unwrap(), |
| Ordering::SeqCst, |
| ); |
| } |
| vcpu.run(&vcpu_run_handle) |
| }; |
| if let Some(ref monitoring_metadata) = context.monitoring_metadata { |
| *monitoring_metadata.last_exit_snapshot.lock() = Some(VcpuExitData { |
| exit_time: monitoring_metadata.start_instant.elapsed(), |
| exit_result: exit, |
| }); |
| } |
| |
| // save the tsc offset if we need to |
| if save_tsc_offset { |
| if let Ok(offset) = vcpu.get_tsc_offset() { |
| save_vcpu_tsc_offset(offset, context.cpu_id); |
| } else { |
| error!("Unable to determine TSC offset"); |
| } |
| save_tsc_offset = false; |
| } |
| |
| #[cfg(feature = "stats")] |
| let start = exit_stats.start_stat(); |
| |
| match exit { |
| Ok(VcpuExit::Io) => { |
| let _trace_event = trace_event!(crosvm, "VcpuExit::Io"); |
| vcpu.handle_io(&mut |IoParams { address, mut size, operation}| { |
| match operation { |
| IoOperation::Read => { |
| let mut data = [0u8; 8]; |
| if size > data.len() { |
| error!("unsupported IoIn size of {} bytes", size); |
| size = data.len(); |
| } |
| io_bus.read(address, &mut data[..size]); |
| Some(data) |
| } |
| IoOperation::Write { data } => { |
| if size > data.len() { |
| error!("unsupported IoOut size of {} bytes", size); |
| size = data.len() |
| } |
| vm.handle_io_events(IoEventAddress::Pio(address), &data[..size]) |
| .unwrap_or_else(|e| error!( |
| "failed to handle ioevent for pio write to {} on vcpu {}: {}", |
| address, context.cpu_id, e |
| )); |
| io_bus.write(address, &data[..size]); |
| None |
| } |
| } |
| }).unwrap_or_else(|e| error!("failed to handle io: {}", e)); |
| } |
| Ok(VcpuExit::Mmio) => { |
| let _trace_event = trace_event!(crosvm, "VcpuExit::Mmio"); |
| vcpu.handle_mmio(&mut |IoParams { address, mut size, operation }| { |
| match operation { |
| IoOperation::Read => { |
| let mut data = [0u8; 8]; |
| if size > data.len() { |
| error!("unsupported MmioRead size of {} bytes", size); |
| size = data.len(); |
| } |
| { |
| let data = &mut data[..size]; |
| if !mmio_bus.read(address, data) { |
| info!( |
| "mmio read failed: {:x}; trying memory read..", |
| address |
| ); |
| vm.get_memory() |
| .read_exact_at_addr( |
| data, |
| vm_memory::GuestAddress(address), |
| ) |
| .unwrap_or_else(|e| { |
| error!( |
| "guest memory read failed at {:x}: {}", |
| address, e |
| ) |
| }); |
| } |
| } |
| Some(data) |
| } |
| IoOperation::Write { data } => { |
| if size > data.len() { |
| error!("unsupported MmioWrite size of {} bytes", size); |
| size = data.len() |
| } |
| let data = &data[..size]; |
| vm.handle_io_events(IoEventAddress::Mmio(address), data) |
| .unwrap_or_else(|e| error!( |
| "failed to handle ioevent for mmio write to {} on vcpu {}: {}", |
| address, context.cpu_id, e |
| )); |
| if !mmio_bus.write(address, data) { |
| info!( |
| "mmio write failed: {:x}; trying memory write..", |
| address |
| ); |
| vm.get_memory() |
| .write_all_at_addr(data, vm_memory::GuestAddress(address)) |
| .unwrap_or_else(|e| error!( |
| "guest memory write failed at {:x}: {}", |
| address, e |
| )); |
| } |
| None |
| } |
| } |
| }).unwrap_or_else(|e| error!("failed to handle mmio: {}", e)); |
| } |
| Ok(VcpuExit::IoapicEoi { vector }) => { |
| irq_chip.broadcast_eoi(vector).unwrap_or_else(|e| { |
| error!( |
| "failed to broadcast eoi {} on vcpu {}: {}", |
| vector, context.cpu_id, e |
| ) |
| }); |
| } |
| Ok(VcpuExit::IrqWindowOpen) => {} |
| Ok(VcpuExit::Hlt) => irq_chip.halted(context.cpu_id), |
| |
| // VcpuExit::Shutdown is always an error on Windows. HAXM exits with |
| // Shutdown only for triple faults and other vcpu panics. WHPX never exits |
| // with Shutdown. Normal reboots and shutdowns, like window close, use |
| // the vm event tube and VmRunMode::Exiting instead of VcpuExit::Shutdown. |
| Ok(VcpuExit::Shutdown) => bail_exit_code!(Exit::VcpuShutdown, "vcpu shutdown"), |
| Ok(VcpuExit::FailEntry { |
| hardware_entry_failure_reason, |
| }) => bail_exit_code!( |
| Exit::VcpuFailEntry, |
| "vcpu hw run failure: {:#x}", |
| hardware_entry_failure_reason, |
| ), |
| Ok(VcpuExit::SystemEventShutdown) => { |
| bail_exit_code!(Exit::VcpuSystemEvent, "vcpu SystemEventShutdown") |
| } |
| Ok(VcpuExit::SystemEventReset) => { |
| bail_exit_code!(Exit::VcpuSystemEvent, "vcpu SystemEventReset") |
| } |
| Ok(VcpuExit::SystemEventCrash) => { |
| bail_exit_code!(Exit::VcpuSystemEvent, "vcpu SystemEventCrash") |
| } |
| |
| // When we're shutting down (e.g., emulator window gets closed), GVM vmexits |
| // with KVM_EXIT_INTR, which vcpu.run maps to VcpuExit::Intr. But KVM_EXIT_INTR |
| // can happen during normal operation too, when GVM's timer finds requests |
| // pending from the host. So we set check_vm_shutdown, then below check the |
| // VmRunMode state to see if we should exit the run loop. |
| Ok(VcpuExit::Intr) => check_vm_shutdown = true, |
| Ok(VcpuExit::Canceled) => check_vm_shutdown = true, |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| Ok(VcpuExit::Cpuid { mut entry }) => { |
| let _trace_event = trace_event!(crosvm, "VcpuExit::Cpuid"); |
| // adjust the results based on crosvm logic |
| adjust_cpuid(&mut entry, &cpuid_context); |
| |
| // let the vcpu finish handling the exit |
| vcpu.handle_cpuid(&entry).unwrap_or_else(|e| { |
| error!( |
| "failed to handle setting cpuid results on cpu {}: {}", |
| context.cpu_id, e |
| ) |
| }); |
| } |
| #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| Ok(VcpuExit::MsrAccess) => {} // MsrAccess handled by hypervisor impl |
| Ok(r) => { |
| error!("unexpected vcpu.run return value: {:?}", r); |
| check_vm_shutdown = true; |
| } |
| Err(e) => match e.errno() { |
| ERROR_RETRY_I32 => {} |
| _ => { |
| run_mode_arc.set_and_notify(VmRunMode::Exiting); |
| Err(e).exit_context(Exit::VcpuRunError, "vcpu run error")?; |
| } |
| }, |
| } |
| |
| #[cfg(feature = "stats")] |
| exit_stats.end_stat(&exit, start); |
| } |
| |
| if check_vm_shutdown { |
| let mut run_mode_lock = run_mode_arc.mtx.lock(); |
| loop { |
| match *run_mode_lock { |
| VmRunMode::Running => break, |
| VmRunMode::Suspending => { |
| // On KVM implementations that use a paravirtualized clock (e.g. |
| // x86), a flag must be set to indicate to the guest kernel that |
| // a VCPU was suspended. The guest kernel will use this flag to |
| // prevent the soft lockup detection from triggering when this |
| // VCPU resumes, which could happen days later in realtime. |
| if requires_pvclock_ctrl { |
| vcpu.pvclock_ctrl().unwrap_or_else(|e| error!( |
| "failed to signal to hypervisor that vcpu {} is being suspended: {}", |
| context.cpu_id, e |
| )); |
| } |
| } |
| VmRunMode::Breakpoint => {} |
| VmRunMode::Exiting => { |
| #[cfg(feature = "stats")] |
| if let Some(stats) = stats { |
| let mut collector = stats.lock(); |
| collector.pio_bus_stats.push(io_bus.stats); |
| collector.mmio_bus_stats.push(mmio_bus.stats); |
| collector.vm_exit_stats.push(exit_stats); |
| } |
| return Ok(ExitState::Stop); |
| } |
| } |
| // Give ownership of our exclusive lock to the condition variable that |
| // will block. When the condition variable is notified, `wait` will |
| // unblock and return a new exclusive lock. |
| run_mode_lock = run_mode_arc.cvar.wait(run_mode_lock); |
| } |
| } |
| |
| irq_chip.inject_interrupts(&vcpu).unwrap_or_else(|e| { |
| error!( |
| "failed to inject interrupts for vcpu {}: {}", |
| context.cpu_id, e |
| ) |
| }); |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| struct SetupData { |
| pub monitor: VcpuStallMonitor, |
| pub exit_evt: Event, |
| } |
| |
| fn set_up_stall_monitor(vcpu_count: usize) -> Result<SetupData> { |
| let run_mode = Arc::new(VcpuRunMode::default()); |
| let mut monitor = VcpuStallMonitor::init(run_mode); |
| |
| for id in 0..vcpu_count { |
| let new_vcpu = VcpuRunThread::new(id, true /* enable_vcpu_monitoring */); |
| monitor.add_vcpu_thread(new_vcpu); |
| } |
| |
| Ok(SetupData { |
| monitor, |
| exit_evt: Event::new().expect("Failed to create event"), |
| }) |
| } |
| |
| #[test] |
| fn stall_monitor_closes_on_exit_evt() -> Result<()> { |
| let SetupData { monitor, exit_evt } = set_up_stall_monitor(1)?; |
| |
| exit_evt.signal()?; |
| let _ = monitor |
| .run(&exit_evt)? |
| .join() |
| .unwrap_or_else(|e| panic!("Thread join failed: {:?}", e)); |
| Ok(()) |
| } |
| } |