| // Copyright 2022 The ChromiumOS Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // TODO(b:240716507): There is huge chunk for code which depends on haxm, whpx or gvm to be enabled |
| // but isn't marked so. Remove this when we do so. |
| #![allow(dead_code, unused_imports, unused_variables, unreachable_code)] |
| |
| pub(crate) mod control_server; |
| pub(crate) mod irq_wait; |
| pub(crate) mod main; |
| #[cfg(not(feature = "crash-report"))] |
| mod panic_hook; |
| |
| mod generic; |
| use generic as product; |
| pub(crate) mod run_vcpu; |
| |
| #[cfg(feature = "whpx")] |
| use std::arch::x86_64::__cpuid; |
| #[cfg(feature = "whpx")] |
| use std::arch::x86_64::__cpuid_count; |
| use std::cmp::Reverse; |
| use std::collections::BTreeMap; |
| use std::collections::HashMap; |
| use std::fs::File; |
| use std::fs::OpenOptions; |
| use std::io::stdin; |
| use std::iter; |
| use std::mem; |
| use std::os::windows::fs::OpenOptionsExt; |
| use std::path::PathBuf; |
| use std::sync::mpsc; |
| use std::sync::Arc; |
| |
| #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| use aarch64::AArch64 as Arch; |
| use acpi_tables::sdt::SDT; |
| use anyhow::anyhow; |
| use anyhow::bail; |
| use anyhow::Context; |
| use anyhow::Result; |
| use arch::CpuConfigArch; |
| use arch::DtbOverlay; |
| use arch::IrqChipArch; |
| use arch::LinuxArch; |
| use arch::RunnableLinuxVm; |
| use arch::VcpuArch; |
| use arch::VirtioDeviceStub; |
| use arch::VmArch; |
| use arch::VmComponents; |
| use arch::VmImage; |
| use base::enable_high_res_timers; |
| use base::error; |
| use base::info; |
| use base::open_file_or_duplicate; |
| use base::warn; |
| use base::AsRawDescriptor; |
| #[cfg(feature = "gpu")] |
| use base::BlockingMode; |
| use base::CloseNotifier; |
| use base::Event; |
| use base::EventToken; |
| use base::EventType; |
| use base::FlushOnDropTube; |
| #[cfg(feature = "gpu")] |
| use base::FramingMode; |
| use base::FromRawDescriptor; |
| use base::ProtoTube; |
| use base::RawDescriptor; |
| use base::ReadNotifier; |
| use base::RecvTube; |
| use base::SendTube; |
| #[cfg(feature = "gpu")] |
| use base::StreamChannel; |
| use base::Terminal; |
| use base::TriggeredEvent; |
| use base::Tube; |
| use base::TubeError; |
| use base::VmEventType; |
| use base::WaitContext; |
| use broker_ipc::common_child_setup; |
| use broker_ipc::CommonChildStartupArgs; |
| use control_server::ControlServer; |
| use crosvm_cli::sys::windows::exit::Exit; |
| use crosvm_cli::sys::windows::exit::ExitContext; |
| use crosvm_cli::sys::windows::exit::ExitContextAnyhow; |
| use crosvm_cli::sys::windows::exit::ExitContextOption; |
| use devices::create_devices_worker_thread; |
| use devices::serial_device::SerialHardware; |
| use devices::serial_device::SerialParameters; |
| use devices::tsc::get_tsc_sync_mitigations; |
| use devices::tsc::standard_deviation; |
| use devices::tsc::TscSyncMitigations; |
| use devices::virtio; |
| use devices::virtio::block::DiskOption; |
| #[cfg(feature = "audio")] |
| use devices::virtio::snd::common_backend::VirtioSnd; |
| #[cfg(feature = "audio")] |
| use devices::virtio::snd::parameters::Parameters as SndParameters; |
| #[cfg(feature = "gpu")] |
| use devices::virtio::vhost::user::device::gpu::sys::windows::GpuVmmConfig; |
| #[cfg(feature = "gpu")] |
| use devices::virtio::vhost::user::device::gpu::sys::windows::InputEventSplitConfig; |
| #[cfg(feature = "gpu")] |
| use devices::virtio::vhost::user::device::gpu::sys::windows::InputEventVmmConfig; |
| #[cfg(feature = "gpu")] |
| use devices::virtio::vhost::user::gpu::sys::windows::product::GpuBackendConfig as GpuBackendConfigProduct; |
| #[cfg(feature = "gpu")] |
| use devices::virtio::vhost::user::gpu::sys::windows::run_gpu_device_worker; |
| #[cfg(feature = "audio")] |
| use devices::virtio::vhost::user::snd::sys::windows::product::SndBackendConfig as SndBackendConfigProduct; |
| #[cfg(feature = "balloon")] |
| use devices::virtio::BalloonFeatures; |
| #[cfg(feature = "balloon")] |
| use devices::virtio::BalloonMode; |
| use devices::virtio::Console; |
| #[cfg(feature = "gpu")] |
| use devices::virtio::GpuParameters; |
| use devices::BusDeviceObj; |
| #[cfg(feature = "gvm")] |
| use devices::GvmIrqChip; |
| #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| use devices::IrqChip; |
| use devices::UserspaceIrqChip; |
| use devices::VcpuRunState; |
| use devices::VirtioPciDevice; |
| #[cfg(feature = "whpx")] |
| use devices::WhpxSplitIrqChip; |
| #[cfg(feature = "gpu")] |
| use gpu_display::EventDevice; |
| #[cfg(feature = "gpu")] |
| use gpu_display::WindowProcedureThread; |
| #[cfg(feature = "gpu")] |
| use gpu_display::WindowProcedureThreadBuilder; |
| #[cfg(feature = "gvm")] |
| use hypervisor::gvm::Gvm; |
| #[cfg(feature = "gvm")] |
| use hypervisor::gvm::GvmVcpu; |
| #[cfg(feature = "gvm")] |
| use hypervisor::gvm::GvmVersion; |
| #[cfg(feature = "gvm")] |
| use hypervisor::gvm::GvmVm; |
| #[cfg(feature = "haxm")] |
| use hypervisor::haxm::get_use_ghaxm; |
| #[cfg(feature = "haxm")] |
| use hypervisor::haxm::set_use_ghaxm; |
| #[cfg(feature = "haxm")] |
| use hypervisor::haxm::Haxm; |
| #[cfg(feature = "haxm")] |
| use hypervisor::haxm::HaxmVcpu; |
| #[cfg(feature = "haxm")] |
| use hypervisor::haxm::HaxmVm; |
| #[cfg(feature = "whpx")] |
| use hypervisor::whpx::Whpx; |
| #[cfg(feature = "whpx")] |
| use hypervisor::whpx::WhpxFeature; |
| #[cfg(feature = "whpx")] |
| use hypervisor::whpx::WhpxVcpu; |
| #[cfg(feature = "whpx")] |
| use hypervisor::whpx::WhpxVm; |
| use hypervisor::Hypervisor; |
| #[cfg(feature = "whpx")] |
| use hypervisor::HypervisorCap; |
| #[cfg(feature = "whpx")] |
| use hypervisor::HypervisorX86_64; |
| use hypervisor::ProtectionType; |
| use hypervisor::Vm; |
| use irq_wait::IrqWaitWorker; |
| use jail::FakeMinijailStub as Minijail; |
| #[cfg(not(feature = "crash-report"))] |
| pub(crate) use panic_hook::set_panic_hook; |
| use product::create_snd_mute_tube_pair; |
| #[cfg(any(feature = "haxm", feature = "gvm", feature = "whpx"))] |
| use product::create_snd_state_tube; |
| use product::handle_pvclock_request; |
| use product::merge_session_invariants; |
| use product::run_ime_thread; |
| use product::set_package_name; |
| pub(crate) use product::setup_metrics_reporting; |
| use product::start_service_ipc_listener; |
| use product::RunControlArgs; |
| use product::ServiceVmState; |
| use product::Token; |
| use resources::SystemAllocator; |
| use run_vcpu::run_all_vcpus; |
| use run_vcpu::VcpuRunMode; |
| use rutabaga_gfx::RutabagaGralloc; |
| use smallvec::SmallVec; |
| use sync::Mutex; |
| use tube_transporter::TubeToken; |
| use tube_transporter::TubeTransporterReader; |
| use vm_control::api::VmMemoryClient; |
| #[cfg(feature = "balloon")] |
| use vm_control::BalloonControlCommand; |
| #[cfg(feature = "balloon")] |
| use vm_control::BalloonTube; |
| use vm_control::DeviceControlCommand; |
| use vm_control::IrqHandlerRequest; |
| use vm_control::PvClockCommand; |
| use vm_control::VcpuControl; |
| use vm_control::VmMemoryRegionState; |
| use vm_control::VmMemoryRequest; |
| use vm_control::VmRequest; |
| use vm_control::VmResponse; |
| use vm_control::VmRunMode; |
| use vm_memory::GuestAddress; |
| use vm_memory::GuestMemory; |
| use win_util::ProcessType; |
| #[cfg(feature = "whpx")] |
| use x86_64::cpuid::adjust_cpuid; |
| #[cfg(feature = "whpx")] |
| use x86_64::cpuid::CpuIdContext; |
| #[cfg(all(target_arch = "x86_64", feature = "haxm"))] |
| use x86_64::get_cpu_manufacturer; |
| #[cfg(all(target_arch = "x86_64", feature = "haxm"))] |
| use x86_64::CpuManufacturer; |
| #[cfg(target_arch = "x86_64")] |
| use x86_64::X8664arch as Arch; |
| |
| use crate::crosvm::config::Config; |
| use crate::crosvm::config::Executable; |
| use crate::crosvm::config::InputDeviceOption; |
| #[cfg(any(feature = "gvm", feature = "whpx"))] |
| use crate::crosvm::config::IrqChipKind; |
| #[cfg(feature = "gpu")] |
| use crate::crosvm::config::TouchDeviceOption; |
| use crate::crosvm::config::DEFAULT_TOUCH_DEVICE_HEIGHT; |
| use crate::crosvm::config::DEFAULT_TOUCH_DEVICE_WIDTH; |
| use crate::crosvm::sys::config::HypervisorKind; |
| use crate::crosvm::sys::windows::broker::BrokerTubes; |
| #[cfg(feature = "stats")] |
| use crate::crosvm::sys::windows::stats::StatisticsCollector; |
| #[cfg(feature = "gpu")] |
| pub(crate) use crate::sys::windows::product::get_gpu_product_configs; |
| #[cfg(feature = "audio")] |
| pub(crate) use crate::sys::windows::product::get_snd_product_configs; |
| #[cfg(feature = "gpu")] |
| pub(crate) use crate::sys::windows::product::get_window_procedure_thread_product_configs; |
| use crate::sys::windows::product::log_descriptor; |
| #[cfg(feature = "audio")] |
| pub(crate) use crate::sys::windows::product::num_input_sound_devices; |
| #[cfg(feature = "audio")] |
| pub(crate) use crate::sys::windows::product::num_input_sound_streams; |
| use crate::sys::windows::product::spawn_anti_tamper_thread; |
| use crate::sys::windows::product::MetricEventType; |
| |
| const DEFAULT_GUEST_CID: u64 = 3; |
| |
| // by default, if enabled, the balloon WS features will use 4 bins. |
| const VIRTIO_BALLOON_WS_DEFAULT_NUM_BINS: u8 = 4; |
| |
| enum TaggedControlTube { |
| Vm(FlushOnDropTube), |
| Product(product::TaggedControlTube), |
| } |
| |
| impl ReadNotifier for TaggedControlTube { |
| fn get_read_notifier(&self) -> &dyn AsRawDescriptor { |
| match self { |
| Self::Vm(tube) => tube.0.get_read_notifier(), |
| Self::Product(tube) => tube.get_read_notifier(), |
| } |
| } |
| } |
| |
| impl CloseNotifier for TaggedControlTube { |
| fn get_close_notifier(&self) -> &dyn AsRawDescriptor { |
| match self { |
| Self::Vm(tube) => tube.0.get_close_notifier(), |
| Self::Product(tube) => tube.get_close_notifier(), |
| } |
| } |
| } |
| |
| pub enum ExitState { |
| Reset, |
| Stop, |
| Crash, |
| #[allow(dead_code)] |
| GuestPanic, |
| WatchdogReset, |
| } |
| |
| type DeviceResult<T = VirtioDeviceStub> = Result<T>; |
| |
| fn create_vhost_user_block_device(cfg: &Config, disk_device_tube: Tube) -> DeviceResult { |
| let dev = virtio::VhostUserFrontend::new( |
| virtio::DeviceType::Block, |
| virtio::base_features(cfg.protection_type), |
| disk_device_tube, |
| None, |
| None, |
| ) |
| .exit_context( |
| Exit::VhostUserBlockDeviceNew, |
| "failed to set up vhost-user block device", |
| )?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| fn create_block_device(cfg: &Config, disk: &DiskOption, disk_device_tube: Tube) -> DeviceResult { |
| let features = virtio::base_features(cfg.protection_type); |
| let dev = virtio::BlockAsync::new( |
| features, |
| disk.open()?, |
| disk, |
| Some(disk_device_tube), |
| None, |
| None, |
| ) |
| .exit_context(Exit::BlockDeviceNew, "failed to create block device")?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "gpu")] |
| fn create_vhost_user_gpu_device(base_features: u64, vhost_user_tube: Tube) -> DeviceResult { |
| let dev = virtio::VhostUserFrontend::new( |
| virtio::DeviceType::Gpu, |
| base_features, |
| vhost_user_tube, |
| None, |
| None, |
| ) |
| .exit_context( |
| Exit::VhostUserGpuDeviceNew, |
| "failed to set up vhost-user gpu device", |
| )?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "audio")] |
| fn create_snd_device( |
| cfg: &Config, |
| parameters: SndParameters, |
| _product_args: SndBackendConfigProduct, |
| ) -> DeviceResult { |
| let features = virtio::base_features(cfg.protection_type); |
| let dev = VirtioSnd::new(features, parameters) |
| .exit_context(Exit::VirtioSoundDeviceNew, "failed to create snd device")?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "audio")] |
| fn create_vhost_user_snd_device(base_features: u64, vhost_user_tube: Tube) -> DeviceResult { |
| let dev = virtio::VhostUserFrontend::new( |
| virtio::DeviceType::Sound, |
| base_features, |
| vhost_user_tube, |
| None, |
| None, |
| ) |
| .exit_context( |
| Exit::VhostUserSndDeviceNew, |
| "failed to set up vhost-user snd device", |
| )?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "gpu")] |
| fn create_multi_touch_device( |
| cfg: &Config, |
| event_pipe: StreamChannel, |
| width: u32, |
| height: u32, |
| name: Option<&str>, |
| idx: u32, |
| ) -> DeviceResult { |
| let dev = virtio::input::new_multi_touch( |
| idx, |
| event_pipe, |
| width, |
| height, |
| name, |
| virtio::base_features(cfg.protection_type), |
| ) |
| .exit_context(Exit::InputDeviceNew, "failed to set up input device")?; |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "gpu")] |
| fn create_mouse_device(cfg: &Config, event_pipe: StreamChannel, idx: u32) -> DeviceResult { |
| let dev = virtio::input::new_mouse(idx, event_pipe, virtio::base_features(cfg.protection_type)) |
| .exit_context(Exit::InputDeviceNew, "failed to set up input device")?; |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "slirp")] |
| fn create_vhost_user_net_device(cfg: &Config, net_device_tube: Tube) -> DeviceResult { |
| let features = virtio::base_features(cfg.protection_type); |
| let dev = virtio::VhostUserFrontend::new( |
| virtio::DeviceType::Net, |
| features, |
| net_device_tube, |
| None, |
| None, |
| ) |
| .exit_context( |
| Exit::VhostUserNetDeviceNew, |
| "failed to set up vhost-user net device", |
| )?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| fn create_rng_device(cfg: &Config) -> DeviceResult { |
| let dev = virtio::Rng::new(virtio::base_features(cfg.protection_type)) |
| .exit_context(Exit::RngDeviceNew, "failed to set up rng")?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| fn create_console_device(cfg: &Config, param: &SerialParameters) -> DeviceResult { |
| let mut keep_rds = Vec::new(); |
| let evt = Event::new().exit_context(Exit::CreateEvent, "failed to create event")?; |
| let dev = param |
| .create_serial_device::<Console>(cfg.protection_type, &evt, &mut keep_rds) |
| .exit_context(Exit::CreateConsole, "failed to create console device")?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| #[cfg(feature = "balloon")] |
| fn create_balloon_device( |
| cfg: &Config, |
| balloon_device_tube: Tube, |
| dynamic_mapping_device_tube: Tube, |
| inflate_tube: Option<Tube>, |
| init_balloon_size: u64, |
| ) -> DeviceResult { |
| let balloon_features = |
| (cfg.balloon_page_reporting as u64) << BalloonFeatures::PageReporting as u64; |
| let dev = virtio::Balloon::new( |
| virtio::base_features(cfg.protection_type), |
| balloon_device_tube, |
| VmMemoryClient::new(dynamic_mapping_device_tube), |
| inflate_tube, |
| init_balloon_size, |
| if cfg.strict_balloon { |
| BalloonMode::Strict |
| } else { |
| BalloonMode::Relaxed |
| }, |
| balloon_features, |
| #[cfg(feature = "registered_events")] |
| None, |
| VIRTIO_BALLOON_WS_DEFAULT_NUM_BINS, |
| ) |
| .exit_context(Exit::BalloonDeviceNew, "failed to create balloon")?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| fn create_vsock_device(cfg: &Config) -> DeviceResult { |
| // We only support a single guest, so we can confidently assign a default |
| // CID if one isn't provided. We choose the lowest non-reserved value. |
| let dev = virtio::vsock::Vsock::new( |
| cfg.vsock |
| .as_ref() |
| .map(|cfg| cfg.cid) |
| .unwrap_or(DEFAULT_GUEST_CID), |
| cfg.host_guid.clone(), |
| virtio::base_features(cfg.protection_type), |
| ) |
| .exit_context( |
| Exit::UserspaceVsockDeviceNew, |
| "failed to create userspace vsock device", |
| )?; |
| |
| Ok(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }) |
| } |
| |
| fn create_virtio_devices( |
| cfg: &mut Config, |
| vm_evt_wrtube: &SendTube, |
| #[allow(clippy::ptr_arg)] control_tubes: &mut Vec<TaggedControlTube>, |
| disk_device_tubes: &mut Vec<Tube>, |
| balloon_device_tube: Option<Tube>, |
| pvclock_device_tube: Option<Tube>, |
| dynamic_mapping_device_tube: Option<Tube>, |
| inflate_tube: Option<Tube>, |
| init_balloon_size: u64, |
| tsc_frequency: u64, |
| virtio_snd_state_device_tube: Option<Tube>, |
| virtio_snd_control_device_tube: Option<Tube>, |
| ) -> DeviceResult<Vec<VirtioDeviceStub>> { |
| let mut devs = Vec::new(); |
| |
| if cfg.block_vhost_user_tube.is_empty() { |
| // Disk devices must precede virtio-console devices or the kernel does not boot. |
| // TODO(b/171215421): figure out why this ordering is required and fix it. |
| for disk in &cfg.disks { |
| let disk_device_tube = disk_device_tubes.remove(0); |
| devs.push(create_block_device(cfg, disk, disk_device_tube)?); |
| } |
| } else { |
| info!("Starting up vhost user block backends..."); |
| for _disk in &cfg.disks { |
| let disk_device_tube = cfg.block_vhost_user_tube.remove(0); |
| devs.push(create_vhost_user_block_device(cfg, disk_device_tube)?); |
| } |
| } |
| |
| for (_, param) in cfg |
| .serial_parameters |
| .iter() |
| .filter(|(_k, v)| v.hardware == SerialHardware::VirtioConsole) |
| { |
| let dev = create_console_device(cfg, param)?; |
| devs.push(dev); |
| } |
| |
| #[cfg(feature = "audio")] |
| if product::virtio_sound_enabled() { |
| let snd_split_config = cfg |
| .snd_split_config |
| .as_mut() |
| .expect("snd_split_config must exist"); |
| let snd_vmm_config = snd_split_config |
| .vmm_config |
| .as_mut() |
| .expect("snd_vmm_config must exist"); |
| product::push_snd_control_tubes(control_tubes, snd_vmm_config); |
| |
| match snd_split_config.backend_config.take() { |
| None => { |
| // No backend config present means the backend is running in another process. |
| devs.push(create_vhost_user_snd_device( |
| virtio::base_features(cfg.protection_type), |
| snd_vmm_config |
| .main_vhost_user_tube |
| .take() |
| .expect("Snd VMM vhost-user tube should be set"), |
| )?); |
| } |
| Some(backend_config) => { |
| // Backend config present, so initialize Snd in this process. |
| devs.push(create_snd_device( |
| cfg, |
| backend_config.parameters, |
| backend_config.product_config, |
| )?); |
| } |
| } |
| } |
| |
| if let Some(tube) = pvclock_device_tube { |
| product::push_pvclock_device(cfg, &mut devs, tsc_frequency, tube); |
| } |
| |
| devs.push(create_rng_device(cfg)?); |
| |
| #[cfg(feature = "slirp")] |
| if let Some(net_vhost_user_tube) = cfg.net_vhost_user_tube.take() { |
| devs.push(create_vhost_user_net_device(cfg, net_vhost_user_tube)?); |
| } |
| |
| #[cfg(feature = "balloon")] |
| if let (Some(balloon_device_tube), Some(dynamic_mapping_device_tube)) = |
| (balloon_device_tube, dynamic_mapping_device_tube) |
| { |
| devs.push(create_balloon_device( |
| cfg, |
| balloon_device_tube, |
| dynamic_mapping_device_tube, |
| inflate_tube, |
| init_balloon_size, |
| )?); |
| } |
| |
| devs.push(create_vsock_device(cfg)?); |
| |
| #[cfg(feature = "gpu")] |
| let event_devices = if let Some(InputEventSplitConfig { |
| backend_config, |
| vmm_config, |
| }) = cfg.input_event_split_config.take() |
| { |
| devs.extend( |
| create_virtio_input_event_devices(cfg, vmm_config) |
| .context("create input event devices")?, |
| ); |
| backend_config.map(|cfg| cfg.event_devices) |
| } else { |
| None |
| }; |
| |
| #[cfg(feature = "gpu")] |
| if let Some(wndproc_thread_vmm_config) = cfg |
| .window_procedure_thread_split_config |
| .as_mut() |
| .map(|split_cfg| &mut split_cfg.vmm_config) |
| { |
| product::push_window_procedure_thread_control_tubes( |
| control_tubes, |
| wndproc_thread_vmm_config, |
| ); |
| } |
| |
| #[cfg(feature = "gpu")] |
| let mut wndproc_thread = cfg |
| .window_procedure_thread_split_config |
| .as_mut() |
| .and_then(|cfg| cfg.wndproc_thread_builder.take()) |
| .map(WindowProcedureThreadBuilder::start_thread) |
| .transpose() |
| .context("Failed to start the window procedure thread.")?; |
| |
| #[cfg(feature = "gpu")] |
| if let Some(gpu_vmm_config) = cfg.gpu_vmm_config.take() { |
| devs.push(create_virtio_gpu_device( |
| cfg, |
| gpu_vmm_config, |
| event_devices, |
| &mut wndproc_thread, |
| control_tubes, |
| )?); |
| } |
| |
| Ok(devs) |
| } |
| |
| #[cfg(feature = "gpu")] |
| fn create_virtio_input_event_devices( |
| cfg: &Config, |
| mut input_event_vmm_config: InputEventVmmConfig, |
| ) -> DeviceResult<Vec<VirtioDeviceStub>> { |
| let mut devs = Vec::new(); |
| |
| // Iterate event devices, create the VMM end. |
| let mut multi_touch_pipes = input_event_vmm_config |
| .multi_touch_pipes |
| .drain(..) |
| .enumerate(); |
| for input in &cfg.virtio_input { |
| match input { |
| InputDeviceOption::SingleTouch { .. } => { |
| unimplemented!("--single-touch is no longer supported. Use --multi-touch instead."); |
| } |
| InputDeviceOption::MultiTouch { |
| width, |
| height, |
| name, |
| .. |
| } => { |
| let Some((idx, pipe)) = multi_touch_pipes.next() else { |
| break; |
| }; |
| let mut width = *width; |
| let mut height = *height; |
| if idx == 0 { |
| if width.is_none() { |
| width = cfg.display_input_width; |
| } |
| if height.is_none() { |
| height = cfg.display_input_height; |
| } |
| } |
| devs.push(create_multi_touch_device( |
| cfg, |
| pipe, |
| width.unwrap_or(DEFAULT_TOUCH_DEVICE_WIDTH), |
| height.unwrap_or(DEFAULT_TOUCH_DEVICE_HEIGHT), |
| name.as_deref(), |
| idx as u32, |
| )?); |
| } |
| _ => {} |
| } |
| } |
| drop(multi_touch_pipes); |
| |
| product::push_mouse_device(cfg, &mut input_event_vmm_config, &mut devs)?; |
| |
| for (idx, pipe) in input_event_vmm_config.mouse_pipes.drain(..).enumerate() { |
| devs.push(create_mouse_device(cfg, pipe, idx as u32)?); |
| } |
| |
| let keyboard_pipe = input_event_vmm_config |
| .keyboard_pipes |
| .pop() |
| .expect("at least one keyboard should be in GPU VMM config"); |
| let dev = virtio::input::new_keyboard( |
| /* idx= */ 0, |
| keyboard_pipe, |
| virtio::base_features(cfg.protection_type), |
| ) |
| .exit_context(Exit::InputDeviceNew, "failed to set up input device")?; |
| |
| devs.push(VirtioDeviceStub { |
| dev: Box::new(dev), |
| jail: None, |
| }); |
| |
| Ok(devs) |
| } |
| |
| #[cfg(feature = "gpu")] |
| fn create_virtio_gpu_device( |
| cfg: &mut Config, |
| mut gpu_vmm_config: GpuVmmConfig, |
| event_devices: Option<Vec<EventDevice>>, |
| wndproc_thread: &mut Option<WindowProcedureThread>, |
| #[allow(clippy::ptr_arg)] control_tubes: &mut Vec<TaggedControlTube>, |
| ) -> DeviceResult<VirtioDeviceStub> { |
| let resource_bridges = Vec::<Tube>::new(); |
| |
| product::push_gpu_control_tubes(control_tubes, &mut gpu_vmm_config); |
| |
| // If the GPU backend is passed, start up the vhost-user worker in the main process. |
| if let Some(backend_config) = cfg.gpu_backend_config.take() { |
| let event_devices = event_devices.ok_or_else(|| { |
| anyhow!("event devices are missing when creating virtio-gpu in the current process.") |
| })?; |
| let wndproc_thread = wndproc_thread |
| .take() |
| .ok_or_else(|| anyhow!("Window procedure thread is missing."))?; |
| |
| std::thread::spawn(move || { |
| run_gpu_device_worker(backend_config, event_devices, wndproc_thread) |
| }); |
| } |
| |
| // The GPU is always vhost-user, even if running in the main process. |
| create_vhost_user_gpu_device( |
| virtio::base_features(cfg.protection_type), |
| gpu_vmm_config |
| .main_vhost_user_tube |
| .take() |
| .expect("GPU VMM vhost-user tube should be set"), |
| ) |
| .context("create vhost-user GPU device") |
| } |
| |
| fn create_devices( |
| cfg: &mut Config, |
| mem: &GuestMemory, |
| exit_evt_wrtube: &SendTube, |
| irq_control_tubes: &mut Vec<Tube>, |
| vm_memory_control_tubes: &mut Vec<Tube>, |
| control_tubes: &mut Vec<TaggedControlTube>, |
| disk_device_tubes: &mut Vec<Tube>, |
| balloon_device_tube: Option<Tube>, |
| pvclock_device_tube: Option<Tube>, |
| dynamic_mapping_device_tube: Option<Tube>, |
| inflate_tube: Option<Tube>, |
| init_balloon_size: u64, |
| tsc_frequency: u64, |
| virtio_snd_state_device_tube: Option<Tube>, |
| virtio_snd_control_device_tube: Option<Tube>, |
| ) -> DeviceResult<Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>> { |
| let stubs = create_virtio_devices( |
| cfg, |
| exit_evt_wrtube, |
| control_tubes, |
| disk_device_tubes, |
| balloon_device_tube, |
| pvclock_device_tube, |
| dynamic_mapping_device_tube, |
| inflate_tube, |
| init_balloon_size, |
| tsc_frequency, |
| virtio_snd_state_device_tube, |
| virtio_snd_control_device_tube, |
| )?; |
| |
| let mut pci_devices = Vec::new(); |
| |
| for stub in stubs { |
| let (msi_host_tube, msi_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| irq_control_tubes.push(msi_host_tube); |
| |
| let shared_memory_tube = if stub.dev.get_shared_memory_region().is_some() { |
| let (host_tube, device_tube) = |
| Tube::pair().context("failed to create VVU proxy tube")?; |
| vm_memory_control_tubes.push(host_tube); |
| Some(device_tube) |
| } else { |
| None |
| }; |
| |
| let (ioevent_host_tube, ioevent_device_tube) = |
| Tube::pair().context("failed to create ioevent tube")?; |
| vm_memory_control_tubes.push(ioevent_host_tube); |
| |
| let (vm_control_host_tube, vm_control_device_tube) = |
| Tube::pair().context("failed to create vm_control tube")?; |
| control_tubes.push(TaggedControlTube::Vm(FlushOnDropTube::from( |
| vm_control_host_tube, |
| ))); |
| |
| let dev = Box::new( |
| VirtioPciDevice::new( |
| mem.clone(), |
| stub.dev, |
| msi_device_tube, |
| cfg.disable_virtio_intx, |
| shared_memory_tube.map(VmMemoryClient::new), |
| VmMemoryClient::new(ioevent_device_tube), |
| vm_control_device_tube, |
| ) |
| .exit_context(Exit::VirtioPciDev, "failed to create virtio pci dev")?, |
| ) as Box<dyn BusDeviceObj>; |
| pci_devices.push((dev, stub.jail)); |
| } |
| |
| Ok(pci_devices) |
| } |
| |
| #[derive(Debug)] |
| struct PvClockError(String); |
| |
| fn handle_readable_event<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( |
| event: &TriggeredEvent<Token>, |
| vm_control_ids_to_remove: &mut Vec<usize>, |
| next_control_id: &mut usize, |
| service_vm_state: &mut ServiceVmState, |
| disk_host_tubes: &[Tube], |
| ipc_main_loop_tube: Option<&Tube>, |
| #[cfg(feature = "gpu")] gpu_control_tube: Option<&Tube>, |
| vm_evt_rdtube: &RecvTube, |
| control_tubes: &mut BTreeMap<usize, TaggedControlTube>, |
| guest_os: &mut RunnableLinuxVm<V, Vcpu>, |
| sys_allocator_mutex: &Arc<Mutex<SystemAllocator>>, |
| virtio_snd_host_mute_tube: &mut Option<Tube>, |
| proto_main_loop_tube: Option<&ProtoTube>, |
| anti_tamper_main_thread_tube: &Option<ProtoTube>, |
| #[cfg(feature = "balloon")] mut balloon_tube: Option<&mut BalloonTube>, |
| memory_size_mb: u64, |
| vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>, |
| pvclock_host_tube: &Option<Tube>, |
| run_mode_arc: &VcpuRunMode, |
| region_state: &mut VmMemoryRegionState, |
| vm_control_server: Option<&mut ControlServer>, |
| irq_handler_control: &Tube, |
| device_ctrl_tube: &Tube, |
| wait_ctx: &WaitContext<Token>, |
| force_s2idle: bool, |
| vcpu_control_channels: &[mpsc::Sender<VcpuControl>], |
| ) -> Result<Option<ExitState>> { |
| let execute_vm_request = |request: VmRequest, guest_os: &mut RunnableLinuxVm<V, Vcpu>| { |
| let mut run_mode_opt = None; |
| let vcpu_size = vcpu_boxes.lock().len(); |
| let resp = request.execute( |
| &guest_os.vm, |
| &mut run_mode_opt, |
| disk_host_tubes, |
| &mut guest_os.pm, |
| #[cfg(feature = "gpu")] |
| gpu_control_tube, |
| #[cfg(not(feature = "gpu"))] |
| None, |
| None, |
| &mut None, |
| |msg| { |
| kick_all_vcpus( |
| run_mode_arc, |
| vcpu_control_channels, |
| vcpu_boxes, |
| guest_os.irq_chip.as_ref(), |
| pvclock_host_tube, |
| msg, |
| ); |
| }, |
| force_s2idle, |
| #[cfg(feature = "swap")] |
| None, |
| device_ctrl_tube, |
| vcpu_size, |
| irq_handler_control, |
| || guest_os.irq_chip.as_ref().snapshot(vcpu_size), |
| ); |
| (resp, run_mode_opt) |
| }; |
| |
| match event.token { |
| Token::VmEvent => match vm_evt_rdtube.recv::<VmEventType>() { |
| Ok(vm_event) => { |
| let exit_state = match vm_event { |
| VmEventType::Exit => { |
| info!("vcpu requested shutdown"); |
| Some(ExitState::Stop) |
| } |
| VmEventType::Reset => { |
| info!("vcpu requested reset"); |
| Some(ExitState::Reset) |
| } |
| VmEventType::Crash => { |
| info!("vcpu crashed"); |
| Some(ExitState::Crash) |
| } |
| VmEventType::Panic(_) => { |
| error!("got pvpanic event. this event is not expected on Windows."); |
| None |
| } |
| VmEventType::WatchdogReset => { |
| info!("vcpu stall detected"); |
| Some(ExitState::WatchdogReset) |
| } |
| }; |
| return Ok(exit_state); |
| } |
| Err(e) => { |
| warn!("failed to recv VmEvent: {}", e); |
| } |
| }, |
| Token::BrokerShutdown => { |
| info!("main loop got broker shutdown event"); |
| return Ok(Some(ExitState::Stop)); |
| } |
| Token::VmControlServer => { |
| let server = |
| vm_control_server.expect("control server must exist if this event triggers"); |
| let client = server.accept(); |
| let id = *next_control_id; |
| *next_control_id += 1; |
| wait_ctx |
| .add(client.0.get_read_notifier(), Token::VmControl { id }) |
| .exit_context( |
| Exit::WaitContextAdd, |
| "failed to add trigger to wait context", |
| )?; |
| wait_ctx |
| .add(client.0.get_close_notifier(), Token::VmControl { id }) |
| .exit_context( |
| Exit::WaitContextAdd, |
| "failed to add trigger to wait context", |
| )?; |
| control_tubes.insert(id, TaggedControlTube::Vm(client)); |
| } |
| #[allow(clippy::collapsible_match)] |
| Token::VmControl { id } => { |
| if let Some(tube) = control_tubes.get(&id) { |
| #[allow(clippy::single_match)] |
| match tube { |
| TaggedControlTube::Product(product_tube) => { |
| product::handle_tagged_control_tube_event( |
| product_tube, |
| virtio_snd_host_mute_tube, |
| service_vm_state, |
| ipc_main_loop_tube, |
| ) |
| } |
| TaggedControlTube::Vm(tube) => match tube.0.recv::<VmRequest>() { |
| Ok(request) => { |
| let mut run_mode_opt = None; |
| let response = match request { |
| VmRequest::HotPlugVfioCommand { device, add } => { |
| // Suppress warnings. |
| let _ = (device, add); |
| unimplemented!("not implemented on Windows"); |
| } |
| #[cfg(feature = "registered_events")] |
| VmRequest::RegisterListener { socket_addr, event } => { |
| unimplemented!("not implemented on Windows"); |
| } |
| #[cfg(feature = "registered_events")] |
| VmRequest::UnregisterListener { socket_addr, event } => { |
| unimplemented!("not implemented on Windows"); |
| } |
| #[cfg(feature = "registered_events")] |
| VmRequest::Unregister { socket_addr } => { |
| unimplemented!("not implemented on Windows"); |
| } |
| #[cfg(feature = "balloon")] |
| VmRequest::BalloonCommand(cmd) => { |
| if let Some(balloon_tube) = balloon_tube { |
| if let Some((r, key)) = balloon_tube.send_cmd(cmd, Some(id)) |
| { |
| if key != id { |
| unimplemented!("not implemented on Windows"); |
| } |
| Some(r) |
| } else { |
| None |
| } |
| } else { |
| error!("balloon not enabled"); |
| None |
| } |
| } |
| _ => { |
| let (resp, run_mode_ret) = |
| execute_vm_request(request, guest_os); |
| run_mode_opt = run_mode_ret; |
| Some(resp) |
| } |
| }; |
| |
| if let Some(response) = response { |
| if let Err(e) = tube.0.send(&response) { |
| error!("failed to send VmResponse: {}", e); |
| } |
| } |
| if let Some(exit_state) = |
| handle_run_mode_change_for_vm_request(&run_mode_opt, guest_os) |
| { |
| return Ok(Some(exit_state)); |
| } |
| } |
| Err(e) => { |
| if let TubeError::Disconnected = e { |
| vm_control_ids_to_remove.push(id); |
| } else { |
| error!("failed to recv VmRequest: {}", e); |
| } |
| } |
| }, |
| } |
| } |
| } |
| #[cfg(feature = "balloon")] |
| Token::BalloonTube => match balloon_tube.as_mut().expect("missing balloon tube").recv() { |
| Ok(resp) => { |
| for (resp, idx) in resp { |
| if let Some(TaggedControlTube::Vm(tube)) = control_tubes.get(&idx) { |
| if let Err(e) = tube.0.send(&resp) { |
| error!("failed to send VmResponse: {}", e); |
| } |
| } else { |
| error!("Bad tube index {}", idx); |
| } |
| } |
| } |
| Err(err) => { |
| error!("Error processing balloon tube {:?}", err) |
| } |
| }, |
| #[cfg(not(feature = "balloon"))] |
| Token::BalloonTube => unreachable!("balloon tube not registered"), |
| #[allow(unreachable_patterns)] |
| _ => { |
| let run_mode_opt = product::handle_received_token( |
| &event.token, |
| anti_tamper_main_thread_tube, |
| #[cfg(feature = "balloon")] |
| balloon_tube, |
| control_tubes, |
| guest_os, |
| ipc_main_loop_tube, |
| memory_size_mb, |
| proto_main_loop_tube, |
| pvclock_host_tube, |
| run_mode_arc, |
| service_vm_state, |
| vcpu_boxes, |
| virtio_snd_host_mute_tube, |
| execute_vm_request, |
| ); |
| if let Some(exit_state) = handle_run_mode_change_for_vm_request(&run_mode_opt, guest_os) |
| { |
| return Ok(Some(exit_state)); |
| } |
| } |
| }; |
| Ok(None) |
| } |
| |
| /// Handles a run mode change (if one occurred) if one is pending as a |
| /// result a VmRequest. The parameter, run_mode_opt, is the run mode change |
| /// proposed by the VmRequest's execution. |
| /// |
| /// Returns the exit state, if it changed due to a run mode change. |
| /// None otherwise. |
| fn handle_run_mode_change_for_vm_request<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( |
| run_mode_opt: &Option<VmRunMode>, |
| guest_os: &mut RunnableLinuxVm<V, Vcpu>, |
| ) -> Option<ExitState> { |
| if let Some(run_mode) = run_mode_opt { |
| info!("control socket changed run mode to {}", run_mode); |
| match run_mode { |
| VmRunMode::Exiting => return Some(ExitState::Stop), |
| other => { |
| if other == &VmRunMode::Running { |
| for dev in &guest_os.resume_notify_devices { |
| dev.lock().resume_imminent(); |
| } |
| } |
| } |
| } |
| } |
| // No exit state change. |
| None |
| } |
| |
| /// Commands to control the VM Memory handler thread. |
| #[derive(serde::Serialize, serde::Deserialize)] |
| pub enum VmMemoryHandlerRequest { |
| /// No response is sent for this command. |
| Exit, |
| } |
| |
| fn vm_memory_handler_thread( |
| control_tubes: Vec<Tube>, |
| mut vm: impl Vm, |
| sys_allocator_mutex: Arc<Mutex<SystemAllocator>>, |
| mut gralloc: RutabagaGralloc, |
| handler_control: Tube, |
| ) -> anyhow::Result<()> { |
| #[derive(EventToken)] |
| enum Token { |
| VmControl { id: usize }, |
| HandlerControl, |
| } |
| |
| let wait_ctx = |
| WaitContext::build_with(&[(handler_control.get_read_notifier(), Token::HandlerControl)]) |
| .context("failed to build wait context")?; |
| let mut control_tubes = BTreeMap::from_iter(control_tubes.into_iter().enumerate()); |
| for (id, socket) in control_tubes.iter() { |
| wait_ctx |
| .add(socket.get_read_notifier(), Token::VmControl { id: *id }) |
| .context("failed to add descriptor to wait context")?; |
| } |
| |
| let mut region_state = VmMemoryRegionState::new(); |
| |
| 'wait: loop { |
| let events = { |
| match wait_ctx.wait() { |
| Ok(v) => v, |
| Err(e) => { |
| error!("failed to poll: {}", e); |
| break; |
| } |
| } |
| }; |
| |
| let mut vm_control_ids_to_remove = Vec::new(); |
| for event in events.iter().filter(|e| e.is_readable) { |
| match event.token { |
| Token::HandlerControl => match handler_control.recv::<VmMemoryHandlerRequest>() { |
| Ok(request) => match request { |
| VmMemoryHandlerRequest::Exit => break 'wait, |
| }, |
| Err(e) => { |
| if let TubeError::Disconnected = e { |
| panic!("vm memory control tube disconnected."); |
| } else { |
| error!("failed to recv VmMemoryHandlerRequest: {}", e); |
| } |
| } |
| }, |
| |
| Token::VmControl { id } => { |
| if let Some(tube) = control_tubes.get(&id) { |
| match tube.recv::<VmMemoryRequest>() { |
| Ok(request) => { |
| let response = request.execute( |
| &mut vm, |
| &mut sys_allocator_mutex.lock(), |
| &mut gralloc, |
| None, |
| &mut region_state, |
| ); |
| if let Err(e) = tube.send(&response) { |
| error!("failed to send VmMemoryControlResponse: {}", e); |
| } |
| } |
| Err(e) => { |
| if let TubeError::Disconnected = e { |
| vm_control_ids_to_remove.push(id); |
| } else { |
| error!("failed to recv VmMemoryControlRequest: {}", e); |
| } |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| remove_closed_tubes(&wait_ctx, &mut control_tubes, vm_control_ids_to_remove)?; |
| if events |
| .iter() |
| .any(|e| e.is_hungup && !e.is_readable && matches!(e.token, Token::HandlerControl)) |
| { |
| error!("vm memory handler control hung up but did not request an exit."); |
| break 'wait; |
| } |
| } |
| Ok(()) |
| } |
| |
| fn create_control_server( |
| control_server_path: Option<PathBuf>, |
| wait_ctx: &WaitContext<Token>, |
| ) -> Result<Option<ControlServer>> { |
| #[cfg(not(feature = "prod-build"))] |
| { |
| if let Some(path) = control_server_path { |
| let server = |
| ControlServer::new(path.to_str().expect("control socket path must be a string")) |
| .exit_context( |
| Exit::FailedToCreateControlServer, |
| "failed to create control server", |
| )?; |
| wait_ctx |
| .add(server.client_waiting(), Token::VmControlServer) |
| .exit_context( |
| Exit::WaitContextAdd, |
| "failed to add control server to wait context", |
| )?; |
| return Ok(Some(server)); |
| } |
| } |
| Ok::<Option<ControlServer>, anyhow::Error>(None) |
| } |
| |
| fn run_control<V: VmArch + 'static, Vcpu: VcpuArch + 'static>( |
| mut guest_os: RunnableLinuxVm<V, Vcpu>, |
| sys_allocator: SystemAllocator, |
| control_tubes: Vec<TaggedControlTube>, |
| irq_control_tubes: Vec<Tube>, |
| vm_memory_control_tubes: Vec<Tube>, |
| vm_evt_rdtube: RecvTube, |
| vm_evt_wrtube: SendTube, |
| #[cfg(feature = "gpu")] gpu_control_tube: Option<Tube>, |
| broker_shutdown_evt: Option<Event>, |
| balloon_host_tube: Option<Tube>, |
| pvclock_host_tube: Option<Tube>, |
| disk_host_tubes: Vec<Tube>, |
| gralloc: RutabagaGralloc, |
| #[cfg(feature = "stats")] stats: Option<Arc<Mutex<StatisticsCollector>>>, |
| service_pipe_name: Option<String>, |
| memory_size_mb: u64, |
| host_cpu_topology: bool, |
| tsc_sync_mitigations: TscSyncMitigations, |
| force_calibrated_tsc_leaf: bool, |
| mut product_args: RunControlArgs, |
| mut virtio_snd_host_mute_tube: Option<Tube>, |
| restore_path: Option<PathBuf>, |
| control_server_path: Option<PathBuf>, |
| force_s2idle: bool, |
| suspended: bool, |
| ) -> Result<ExitState> { |
| let (ipc_main_loop_tube, proto_main_loop_tube, _service_ipc) = |
| start_service_ipc_listener(service_pipe_name)?; |
| |
| let mut service_vm_state = product::create_service_vm_state(memory_size_mb); |
| |
| let sys_allocator_mutex = Arc::new(Mutex::new(sys_allocator)); |
| |
| let exit_evt = Event::new().exit_context(Exit::CreateEvent, "failed to create event")?; |
| let (irq_handler_control, irq_handler_control_for_worker) = Tube::pair().exit_context( |
| Exit::CreateTube, |
| "failed to create IRQ handler control Tube", |
| )?; |
| |
| // Create a separate thread to wait on IRQ events. This is a natural division |
| // because IRQ interrupts have no dependencies on other events, and this lets |
| // us avoid approaching the Windows WaitForMultipleObjects 64-object limit. |
| let irq_join_handle = IrqWaitWorker::start( |
| irq_handler_control_for_worker, |
| guest_os |
| .irq_chip |
| .try_box_clone() |
| .exit_context(Exit::CloneEvent, "failed to clone irq chip")?, |
| irq_control_tubes, |
| sys_allocator_mutex.clone(), |
| ); |
| |
| let mut triggers = vec![(vm_evt_rdtube.get_read_notifier(), Token::VmEvent)]; |
| product::push_triggers(&mut triggers, &ipc_main_loop_tube, &proto_main_loop_tube); |
| let wait_ctx = WaitContext::build_with(&triggers).exit_context( |
| Exit::WaitContextAdd, |
| "failed to add trigger to wait context", |
| )?; |
| |
| #[cfg(feature = "balloon")] |
| let mut balloon_tube = balloon_host_tube |
| .map(|tube| -> Result<BalloonTube> { |
| wait_ctx |
| .add(tube.get_read_notifier(), Token::BalloonTube) |
| .context("failed to add trigger to wait context")?; |
| Ok(BalloonTube::new(tube)) |
| }) |
| .transpose() |
| .context("failed to create balloon tube")?; |
| |
| let (vm_memory_handler_control, vm_memory_handler_control_for_thread) = Tube::pair()?; |
| let vm_memory_handler_thread_join_handle = std::thread::Builder::new() |
| .name("vm_memory_handler_thread".into()) |
| .spawn({ |
| let vm = guest_os.vm.try_clone().context("failed to clone Vm")?; |
| let sys_allocator_mutex = sys_allocator_mutex.clone(); |
| move || { |
| vm_memory_handler_thread( |
| vm_memory_control_tubes, |
| vm, |
| sys_allocator_mutex, |
| gralloc, |
| vm_memory_handler_control_for_thread, |
| ) |
| } |
| }) |
| .unwrap(); |
| |
| if let Some(evt) = broker_shutdown_evt.as_ref() { |
| wait_ctx.add(evt, Token::BrokerShutdown).exit_context( |
| Exit::WaitContextAdd, |
| "failed to add trigger to wait context", |
| )?; |
| } |
| |
| let mut control_tubes = BTreeMap::from_iter(control_tubes.into_iter().enumerate()); |
| let mut next_control_id = control_tubes.len(); |
| for (id, control_tube) in control_tubes.iter() { |
| #[allow(clippy::single_match)] |
| match control_tube { |
| TaggedControlTube::Product(product_tube) => wait_ctx |
| .add( |
| product_tube.get_read_notifier(), |
| Token::VmControl { id: *id }, |
| ) |
| .exit_context( |
| Exit::WaitContextAdd, |
| "failed to add trigger to wait context", |
| )?, |
| _ => (), |
| } |
| } |
| |
| let (device_ctrl_tube, device_ctrl_resp) = Tube::pair().context("failed to create tube")?; |
| guest_os.devices_thread = match create_devices_worker_thread( |
| guest_os.vm.get_memory().clone(), |
| guest_os.io_bus.clone(), |
| guest_os.mmio_bus.clone(), |
| device_ctrl_resp, |
| ) { |
| Ok(join_handle) => Some(join_handle), |
| Err(e) => { |
| return Err(anyhow!("Failed to start devices thread: {}", e)); |
| } |
| }; |
| |
| let vcpus: Vec<Option<_>> = match guest_os.vcpus.take() { |
| Some(vec) => vec.into_iter().map(|vcpu| Some(vcpu)).collect(), |
| None => iter::repeat_with(|| None) |
| .take(guest_os.vcpu_count) |
| .collect(), |
| }; |
| |
| let anti_tamper_main_thread_tube = spawn_anti_tamper_thread(&wait_ctx); |
| |
| let mut vm_control_server = create_control_server(control_server_path, &wait_ctx)?; |
| |
| let ime_thread = run_ime_thread(&mut product_args, &exit_evt)?; |
| |
| let original_terminal_mode = stdin().set_raw_mode().ok(); |
| |
| let vcpu_boxes: Arc<Mutex<Vec<Box<dyn VcpuArch>>>> = Arc::new(Mutex::new(Vec::new())); |
| let run_mode_arc = Arc::new(VcpuRunMode::default()); |
| |
| let run_mode_state = if suspended { |
| // Sleep devices before creating vcpus. |
| device_ctrl_tube |
| .send(&DeviceControlCommand::SleepDevices) |
| .context("send command to devices control socket")?; |
| match device_ctrl_tube |
| .recv() |
| .context("receive from devices control socket")? |
| { |
| VmResponse::Ok => (), |
| resp => bail!("device sleep failed: {}", resp), |
| } |
| run_mode_arc.set_and_notify(VmRunMode::Suspending); |
| VmRunMode::Suspending |
| } else { |
| VmRunMode::Running |
| }; |
| |
| // If we are restoring from a snapshot, then start suspended. |
| if restore_path.is_some() { |
| run_mode_arc.set_and_notify(VmRunMode::Suspending); |
| } |
| |
| let (vcpu_threads, vcpu_control_channels) = run_all_vcpus( |
| vcpus, |
| vcpu_boxes.clone(), |
| &guest_os, |
| &exit_evt, |
| &vm_evt_wrtube, |
| #[cfg(feature = "stats")] |
| &stats, |
| host_cpu_topology, |
| run_mode_arc.clone(), |
| tsc_sync_mitigations, |
| force_calibrated_tsc_leaf, |
| )?; |
| |
| // Restore VM (if applicable). |
| if let Some(path) = restore_path { |
| vm_control::do_restore( |
| path, |
| &guest_os.vm, |
| |msg| { |
| kick_all_vcpus( |
| run_mode_arc.as_ref(), |
| &vcpu_control_channels, |
| vcpu_boxes.as_ref(), |
| guest_os.irq_chip.as_ref(), |
| &pvclock_host_tube, |
| msg, |
| ) |
| }, |
| |msg, index| { |
| kick_vcpu( |
| run_mode_arc.as_ref(), |
| &vcpu_control_channels, |
| vcpu_boxes.as_ref(), |
| guest_os.irq_chip.as_ref(), |
| &pvclock_host_tube, |
| index, |
| msg, |
| ) |
| }, |
| &irq_handler_control, |
| &device_ctrl_tube, |
| guest_os.vcpu_count, |
| |image| { |
| guest_os |
| .irq_chip |
| .try_box_clone()? |
| .restore(image, guest_os.vcpu_count) |
| }, |
| /* require_encrypted= */ false, |
| )?; |
| // Allow the vCPUs to start for real. |
| kick_all_vcpus( |
| run_mode_arc.as_ref(), |
| &vcpu_control_channels, |
| vcpu_boxes.as_ref(), |
| guest_os.irq_chip.as_ref(), |
| &pvclock_host_tube, |
| // Other platforms (unix) have multiple modes they could start in (e.g. starting for |
| // guest kernel debugging, etc). If/when we support those modes on Windows, we'll need |
| // to enter that mode here rather than VmRunMode::Running. |
| VcpuControl::RunState(run_mode_state), |
| ); |
| } |
| |
| let mut exit_state = ExitState::Stop; |
| let mut region_state = VmMemoryRegionState::new(); |
| |
| 'poll: loop { |
| let events = { |
| match wait_ctx.wait() { |
| Ok(v) => v, |
| Err(e) => { |
| error!("failed to wait: {}", e); |
| break; |
| } |
| } |
| }; |
| |
| let mut vm_control_ids_to_remove = Vec::new(); |
| for event in events.iter().filter(|e| e.is_readable) { |
| let state = handle_readable_event( |
| event, |
| &mut vm_control_ids_to_remove, |
| &mut next_control_id, |
| &mut service_vm_state, |
| disk_host_tubes.as_slice(), |
| ipc_main_loop_tube.as_ref(), |
| #[cfg(feature = "gpu")] |
| gpu_control_tube.as_ref(), |
| &vm_evt_rdtube, |
| &mut control_tubes, |
| &mut guest_os, |
| &sys_allocator_mutex, |
| &mut virtio_snd_host_mute_tube, |
| proto_main_loop_tube.as_ref(), |
| &anti_tamper_main_thread_tube, |
| #[cfg(feature = "balloon")] |
| balloon_tube.as_mut(), |
| memory_size_mb, |
| vcpu_boxes.as_ref(), |
| &pvclock_host_tube, |
| run_mode_arc.as_ref(), |
| &mut region_state, |
| vm_control_server.as_mut(), |
| &irq_handler_control, |
| &device_ctrl_tube, |
| &wait_ctx, |
| force_s2idle, |
| &vcpu_control_channels, |
| )?; |
| if let Some(state) = state { |
| exit_state = state; |
| break 'poll; |
| } |
| } |
| |
| remove_closed_tubes(&wait_ctx, &mut control_tubes, vm_control_ids_to_remove)?; |
| } |
| |
| info!("run_control poll loop completed, forcing vCPUs to exit..."); |
| |
| // VCPU threads MUST see the VmRunMode flag, otherwise they may re-enter the VM. |
| run_mode_arc.set_and_notify(VmRunMode::Exiting); |
| |
| // Force all vcpus to exit from the hypervisor |
| for vcpu in vcpu_boxes.lock().iter() { |
| vcpu.set_immediate_exit(true); |
| } |
| |
| let mut res = Ok(exit_state); |
| guest_os.irq_chip.kick_halted_vcpus(); |
| let _ = exit_evt.signal(); |
| |
| if guest_os.devices_thread.is_some() { |
| if let Err(e) = device_ctrl_tube.send(&DeviceControlCommand::Exit) { |
| error!("failed to stop device control loop: {}", e); |
| }; |
| if let Some(thread) = guest_os.devices_thread.take() { |
| if let Err(e) = thread.join() { |
| error!("failed to exit devices thread: {:?}", e); |
| } |
| } |
| } |
| |
| // Shut down the VM memory handler thread. |
| if let Err(e) = vm_memory_handler_control.send(&VmMemoryHandlerRequest::Exit) { |
| error!( |
| "failed to request exit from VM memory handler thread: {}", |
| e |
| ); |
| } |
| if let Err(e) = vm_memory_handler_thread_join_handle.join() { |
| error!("failed to exit VM Memory handler thread: {:?}", e); |
| } |
| |
| // Shut down the IRQ handler thread. |
| if let Err(e) = irq_handler_control.send(&IrqHandlerRequest::Exit) { |
| error!("failed to request exit from IRQ handler thread: {}", e); |
| } |
| |
| // Ensure any child threads have ended by sending the Exit vm event (possibly again) to ensure |
| // their run loops are aborted. |
| let _ = vm_evt_wrtube.send::<VmEventType>(&VmEventType::Exit); |
| for (i, thread) in vcpu_threads.into_iter().enumerate() { |
| // wait till all the threads exit, so that guest_os.vm arc memory count is down to 1. |
| // otherwise, we will hit a memory leak if we force kill the thread with terminate. |
| match thread.join() { |
| Ok(Err(e)) => { |
| error!("vcpu thread {} exited with an error: {}", i, e); |
| res = Err(e); |
| } |
| Ok(_) => {} |
| Err(e) => error!("vcpu thread {} panicked: {:?}", i, e), |
| } |
| } |
| |
| info!("vCPU threads have exited."); |
| |
| if let Some(ime) = ime_thread { |
| match ime.join() { |
| Ok(Err(e)) => { |
| error!("ime thread exited with an error: {}", e); |
| if res.is_ok() { |
| // Prioritize past errors, but return this error if it is unique, otherwise just |
| // log it. |
| res = Err(e) |
| } |
| } |
| Ok(_) => {} |
| Err(e) => error!("ime thread panicked: {:?}", e), |
| } |
| } |
| info!("IME thread has exited."); |
| |
| // This cancels all the outstanding and any future blocking operations. |
| // TODO(b/196911556): Shutdown executor for cleaner shutdown. Given we are using global, for a |
| // cleaner shutdown we have to call disarm so that all the incoming requests are run and are |
| // cancelled. If we call shutdown all blocking threads will go away and incoming operations |
| // won't be scheduled to run and will be dropped leading to panic. I think ideal place to call |
| // shutdown is when we drop non-global executor. |
| cros_async::unblock_disarm(); |
| info!("blocking async pool has shut down."); |
| |
| let _ = irq_join_handle.join(); |
| info!("IrqWaitWorker has shut down."); |
| |
| #[cfg(feature = "stats")] |
| if let Some(stats) = stats { |
| println!("Statistics Collected:\n{}", stats.lock()); |
| println!("Statistics JSON:\n{}", stats.lock().json()); |
| } |
| |
| if let Some(mode) = original_terminal_mode { |
| if let Err(e) = stdin().restore_mode(mode) { |
| warn!("failed to restore terminal mode: {}", e); |
| } |
| } |
| |
| // Explicitly drop the VM structure here to allow the devices to clean up before the |
| // control tubes are closed when this function exits. |
| mem::drop(guest_os); |
| |
| info!("guest_os dropped, run_control is done."); |
| |
| res |
| } |
| |
| /// Remove Tubes that have been closed from the WaitContext. |
| fn remove_closed_tubes<T, U>( |
| wait_ctx: &WaitContext<T>, |
| tubes: &mut BTreeMap<usize, U>, |
| mut tube_ids_to_remove: Vec<usize>, |
| ) -> anyhow::Result<()> |
| where |
| T: EventToken, |
| U: ReadNotifier + CloseNotifier, |
| { |
| tube_ids_to_remove.dedup(); |
| for id in tube_ids_to_remove { |
| if let Some(socket) = tubes.remove(&id) { |
| wait_ctx |
| .delete(socket.get_read_notifier()) |
| .context("failed to remove descriptor from wait context")?; |
| |
| // There may be a close notifier registered for this Tube. If there isn't one |
| // registered, we just ignore the error. |
| let _ = wait_ctx.delete(socket.get_close_notifier()); |
| } |
| } |
| Ok(()) |
| } |
| |
| /// Sends a message to all VCPUs. |
| fn kick_all_vcpus( |
| run_mode: &VcpuRunMode, |
| vcpu_control_channels: &[mpsc::Sender<VcpuControl>], |
| vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>, |
| irq_chip: &dyn IrqChipArch, |
| pvclock_host_tube: &Option<Tube>, |
| msg: VcpuControl, |
| ) { |
| // On Windows, we handle run mode switching directly rather than delegating to the VCPU thread |
| // like unix does. |
| match &msg { |
| VcpuControl::RunState(VmRunMode::Suspending) => { |
| suspend_all_vcpus(run_mode, vcpu_boxes, irq_chip, pvclock_host_tube); |
| return; |
| } |
| VcpuControl::RunState(VmRunMode::Running) => { |
| resume_all_vcpus(run_mode, vcpu_boxes, irq_chip, pvclock_host_tube); |
| return; |
| } |
| _ => (), |
| } |
| |
| // For non RunState commands, we dispatch just like unix would. |
| for vcpu in vcpu_control_channels { |
| if let Err(e) = vcpu.send(msg.clone()) { |
| error!("failed to send VcpuControl message: {}", e); |
| } |
| } |
| |
| // Now that we've sent a message, we need VCPUs to exit so they can process it. |
| for vcpu in vcpu_boxes.lock().iter() { |
| vcpu.set_immediate_exit(true); |
| } |
| irq_chip.kick_halted_vcpus(); |
| |
| // If the VCPU isn't running, we have to notify the run_mode condvar to wake it so it processes |
| // the control message. |
| let current_run_mode = run_mode.get_mode(); |
| if current_run_mode != VmRunMode::Running { |
| run_mode.set_and_notify(current_run_mode); |
| } |
| } |
| |
| /// Sends a message to a single VCPU. On Windows, `VcpuControl::RunState` cannot be sent to a single |
| /// VCPU. |
| fn kick_vcpu( |
| run_mode: &VcpuRunMode, |
| vcpu_control_channels: &[mpsc::Sender<VcpuControl>], |
| vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>, |
| irq_chip: &dyn IrqChipArch, |
| pvclock_host_tube: &Option<Tube>, |
| index: usize, |
| msg: VcpuControl, |
| ) { |
| assert!( |
| !matches!(msg, VcpuControl::RunState(_)), |
| "Windows does not support RunState changes on a per VCPU basis" |
| ); |
| |
| let vcpu = vcpu_control_channels |
| .get(index) |
| .expect("invalid vcpu index specified"); |
| if let Err(e) = vcpu.send(msg) { |
| error!("failed to send VcpuControl message: {}", e); |
| } |
| |
| // Now that we've sent a message, we need the VCPU to exit so it can |
| // process the message. |
| vcpu_boxes |
| .lock() |
| .get(index) |
| .expect("invalid vcpu index specified") |
| .set_immediate_exit(true); |
| irq_chip.kick_halted_vcpus(); |
| |
| // If the VCPU isn't running, we have to notify the run_mode condvar to wake it so it processes |
| // the control message. (Technically this wakes all VCPUs, but those without messages will go |
| // back to sleep.) |
| let current_run_mode = run_mode.get_mode(); |
| if current_run_mode != VmRunMode::Running { |
| run_mode.set_and_notify(current_run_mode); |
| } |
| } |
| |
| /// Suspends all VCPUs. The VM will be effectively frozen in time once this function is called, |
| /// though devices on the host will continue to run. |
| pub(crate) fn suspend_all_vcpus( |
| run_mode: &VcpuRunMode, |
| vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>, |
| irq_chip: &dyn IrqChipArch, |
| pvclock_host_tube: &Option<Tube>, |
| ) { |
| // VCPU threads MUST see the VmRunMode::Suspending flag first, otherwise |
| // they may re-enter the VM. |
| run_mode.set_and_notify(VmRunMode::Suspending); |
| |
| // Force all vcpus to exit from the hypervisor |
| for vcpu in vcpu_boxes.lock().iter() { |
| vcpu.set_immediate_exit(true); |
| } |
| irq_chip.kick_halted_vcpus(); |
| |
| handle_pvclock_request(pvclock_host_tube, PvClockCommand::Suspend) |
| .unwrap_or_else(|e| error!("Error handling pvclock suspend: {:?}", e)); |
| } |
| |
| /// Resumes all VCPUs. |
| pub(crate) fn resume_all_vcpus( |
| run_mode: &VcpuRunMode, |
| vcpu_boxes: &Mutex<Vec<Box<dyn VcpuArch>>>, |
| irq_chip: &dyn IrqChipArch, |
| pvclock_host_tube: &Option<Tube>, |
| ) { |
| handle_pvclock_request(pvclock_host_tube, PvClockCommand::Resume) |
| .unwrap_or_else(|e| error!("Error handling pvclock resume: {:?}", e)); |
| |
| // Make sure any immediate exit bits are disabled |
| for vcpu in vcpu_boxes.lock().iter() { |
| vcpu.set_immediate_exit(false); |
| } |
| |
| run_mode.set_and_notify(VmRunMode::Running); |
| } |
| |
| #[cfg(feature = "gvm")] |
| const GVM_MINIMUM_VERSION: GvmVersion = GvmVersion { |
| major: 1, |
| minor: 4, |
| patch: 1, |
| }; |
| |
| #[cfg(feature = "gvm")] |
| fn create_gvm_vm(gvm: Gvm, mem: GuestMemory) -> Result<GvmVm> { |
| match gvm.get_full_version() { |
| Ok(version) => { |
| if version < GVM_MINIMUM_VERSION { |
| error!( |
| "GVM version {} is below minimum version {}", |
| version, GVM_MINIMUM_VERSION |
| ); |
| return Err(base::Error::new(libc::ENXIO).into()); |
| } else { |
| info!("Using GVM version {}.", version) |
| } |
| } |
| Err(e) => { |
| error!("unable to determine gvm version: {}", e); |
| return Err(base::Error::new(libc::ENXIO).into()); |
| } |
| } |
| let vm = GvmVm::new(&gvm, mem)?; |
| Ok(vm) |
| } |
| |
| #[cfg(feature = "haxm")] |
| fn create_haxm_vm( |
| haxm: Haxm, |
| mem: GuestMemory, |
| kernel_log_file: &Option<String>, |
| ) -> Result<HaxmVm> { |
| let vm = HaxmVm::new(&haxm, mem)?; |
| if let Some(path) = kernel_log_file { |
| use hypervisor::haxm::HAX_CAP_VM_LOG; |
| if vm.check_raw_capability(HAX_CAP_VM_LOG) { |
| match vm.register_log_file(path) { |
| Ok(_) => {} |
| Err(e) => match e.errno() { |
| libc::E2BIG => { |
| error!( |
| "kernel_log_file path is too long, kernel log file will not be written" |
| ); |
| } |
| _ => return Err(e.into()), |
| }, |
| } |
| } else { |
| warn!( |
| "kernel_log_file specified but this version of HAXM does not support kernel log \ |
| files" |
| ); |
| } |
| } |
| Ok(vm) |
| } |
| |
| #[cfg(feature = "whpx")] |
| #[cfg(target_arch = "x86_64")] |
| fn create_whpx_vm( |
| whpx: Whpx, |
| mem: GuestMemory, |
| cpu_count: usize, |
| no_smt: bool, |
| apic_emulation: bool, |
| force_calibrated_tsc_leaf: bool, |
| vm_evt_wrtube: SendTube, |
| ) -> Result<WhpxVm> { |
| let cpu_config = hypervisor::CpuConfigX86_64::new( |
| force_calibrated_tsc_leaf, |
| false, /* host_cpu_topology */ |
| false, /* enable_hwp */ |
| no_smt, |
| false, /* itmt */ |
| None, /* hybrid_type */ |
| ); |
| |
| // context for non-cpu-specific cpuid results |
| let ctx = CpuIdContext::new( |
| 0, |
| cpu_count, |
| None, |
| cpu_config, |
| whpx.check_capability(HypervisorCap::CalibratedTscLeafRequired), |
| __cpuid_count, |
| __cpuid, |
| ); |
| |
| // Get all cpuid entries that we should pre-set |
| let mut cpuid = whpx.get_supported_cpuid()?; |
| |
| // Adjust them for crosvm |
| for entry in cpuid.cpu_id_entries.iter_mut() { |
| adjust_cpuid(entry, &ctx); |
| } |
| |
| let vm = WhpxVm::new( |
| &whpx, |
| cpu_count, |
| mem, |
| cpuid, |
| apic_emulation, |
| Some(vm_evt_wrtube), |
| ) |
| .exit_context(Exit::WhpxSetupError, "failed to create WHPX vm")?; |
| |
| Ok(vm) |
| } |
| |
| #[cfg(feature = "gvm")] |
| fn create_gvm_irq_chip(vm: &GvmVm, vcpu_count: usize) -> base::Result<GvmIrqChip> { |
| info!("Creating GVM irqchip"); |
| let irq_chip = GvmIrqChip::new(vm.try_clone()?, vcpu_count)?; |
| Ok(irq_chip) |
| } |
| |
| #[cfg(feature = "whpx")] |
| #[cfg(target_arch = "x86_64")] |
| fn create_whpx_split_irq_chip( |
| vm: &WhpxVm, |
| ioapic_device_tube: Tube, |
| ) -> base::Result<WhpxSplitIrqChip> { |
| info!("Creating WHPX split irqchip"); |
| WhpxSplitIrqChip::new( |
| vm.try_clone()?, |
| ioapic_device_tube, |
| None, // ioapic_pins |
| ) |
| } |
| |
| fn create_userspace_irq_chip<Vcpu>( |
| vcpu_count: usize, |
| ioapic_device_tube: Tube, |
| ) -> base::Result<UserspaceIrqChip<Vcpu>> |
| where |
| Vcpu: VcpuArch + 'static, |
| { |
| info!("Creating userspace irqchip"); |
| let irq_chip = |
| UserspaceIrqChip::new(vcpu_count, ioapic_device_tube, /* ioapic_pins: */ None)?; |
| Ok(irq_chip) |
| } |
| |
| pub fn get_default_hypervisor() -> Option<HypervisorKind> { |
| // The ordering here matters from most preferable to the least. |
| #[cfg(feature = "whpx")] |
| match hypervisor::whpx::Whpx::is_enabled() { |
| true => return Some(HypervisorKind::Whpx), |
| false => warn!("Whpx not enabled."), |
| }; |
| |
| #[cfg(feature = "haxm")] |
| if get_cpu_manufacturer() == CpuManufacturer::Intel { |
| // Make sure Haxm device can be opened before selecting it. |
| match Haxm::new() { |
| Ok(_) => return Some(HypervisorKind::Ghaxm), |
| Err(e) => warn!("Cannot initialize HAXM: {}", e), |
| }; |
| } |
| |
| #[cfg(feature = "gvm")] |
| // Make sure Gvm device can be opened before selecting it. |
| match Gvm::new() { |
| Ok(_) => return Some(HypervisorKind::Gvm), |
| Err(e) => warn!("Cannot initialize GVM: {}", e), |
| }; |
| |
| None |
| } |
| |
| fn setup_vm_components(cfg: &Config) -> Result<VmComponents> { |
| let initrd_image = if let Some(initrd_path) = &cfg.initrd_path { |
| Some( |
| File::open(initrd_path).with_exit_context(Exit::OpenInitrd, || { |
| format!("failed to open initrd {}", initrd_path.display()) |
| })?, |
| ) |
| } else { |
| None |
| }; |
| |
| let vm_image = match cfg.executable_path { |
| Some(Executable::Kernel(ref kernel_path)) => VmImage::Kernel( |
| File::open(kernel_path).with_exit_context(Exit::OpenKernel, || { |
| format!("failed to open kernel image {}", kernel_path.display(),) |
| })?, |
| ), |
| Some(Executable::Bios(ref bios_path)) => { |
| VmImage::Bios(File::open(bios_path).with_exit_context(Exit::OpenBios, || { |
| format!("failed to open bios {}", bios_path.display()) |
| })?) |
| } |
| _ => panic!("Did not receive a bios or kernel, should be impossible."), |
| }; |
| |
| let swiotlb = if let Some(size) = cfg.swiotlb { |
| Some( |
| size.checked_mul(1024 * 1024) |
| .ok_or_else(|| anyhow!("requested swiotlb size too large"))?, |
| ) |
| } else if matches!(cfg.protection_type, ProtectionType::Unprotected) { |
| None |
| } else { |
| Some(64 * 1024 * 1024) |
| }; |
| |
| let (pflash_image, pflash_block_size) = if let Some(pflash_parameters) = &cfg.pflash_parameters |
| { |
| ( |
| Some( |
| open_file_or_duplicate( |
| &pflash_parameters.path, |
| OpenOptions::new().read(true).write(true), |
| ) |
| .with_context(|| { |
| format!("failed to open pflash {}", pflash_parameters.path.display()) |
| })?, |
| ), |
| pflash_parameters.block_size, |
| ) |
| } else { |
| (None, 0) |
| }; |
| |
| Ok(VmComponents { |
| memory_size: cfg |
| .memory |
| .unwrap_or(256) |
| .checked_mul(1024 * 1024) |
| .ok_or_else(|| anyhow!("requested memory size too large"))?, |
| swiotlb, |
| vcpu_count: cfg.vcpu_count.unwrap_or(1), |
| fw_cfg_enable: false, |
| bootorder_fw_cfg_blob: Vec::new(), |
| vcpu_affinity: cfg.vcpu_affinity.clone(), |
| cpu_clusters: cfg.cpu_clusters.clone(), |
| cpu_capacity: cfg.cpu_capacity.clone(), |
| no_smt: cfg.no_smt, |
| hugepages: cfg.hugepages, |
| hv_cfg: hypervisor::Config { |
| protection_type: cfg.protection_type, |
| }, |
| vm_image, |
| android_fstab: cfg |
| .android_fstab |
| .as_ref() |
| .map(|x| { |
| File::open(x).with_exit_context(Exit::OpenAndroidFstab, || { |
| format!("failed to open android fstab file {}", x.display()) |
| }) |
| }) |
| .map_or(Ok(None), |v| v.map(Some))?, |
| pstore: cfg.pstore.clone(), |
| pflash_block_size, |
| pflash_image, |
| initrd_image, |
| extra_kernel_params: cfg.params.clone(), |
| acpi_sdts: cfg |
| .acpi_tables |
| .iter() |
| .map(|path| { |
| SDT::from_file(path).with_exit_context(Exit::OpenAcpiTable, || { |
| format!("failed to open ACPI file {}", path.display()) |
| }) |
| }) |
| .collect::<Result<Vec<SDT>>>()?, |
| rt_cpus: cfg.rt_cpus.clone(), |
| delay_rt: cfg.delay_rt, |
| no_i8042: cfg.no_i8042, |
| no_rtc: cfg.no_rtc, |
| host_cpu_topology: cfg.host_cpu_topology, |
| #[cfg(target_arch = "x86_64")] |
| force_s2idle: cfg.force_s2idle, |
| fw_cfg_parameters: cfg.fw_cfg_parameters.clone(), |
| itmt: false, |
| pvm_fw: None, |
| #[cfg(target_arch = "x86_64")] |
| pci_low_start: cfg.pci_low_start, |
| #[cfg(target_arch = "x86_64")] |
| pcie_ecam: cfg.pcie_ecam, |
| #[cfg(target_arch = "x86_64")] |
| smbios: cfg.smbios.clone(), |
| dynamic_power_coefficient: cfg.dynamic_power_coefficient.clone(), |
| #[cfg(target_arch = "x86_64")] |
| break_linux_pci_config_io: cfg.break_linux_pci_config_io, |
| boot_cpu: cfg.boot_cpu, |
| }) |
| } |
| |
| // Enum that allows us to assign a variable to what is essentially a &dyn IrqChipArch. |
| enum WindowsIrqChip<V: VcpuArch> { |
| Userspace(UserspaceIrqChip<V>), |
| #[cfg(feature = "gvm")] |
| Gvm(GvmIrqChip), |
| #[cfg(feature = "whpx")] |
| WhpxSplit(WhpxSplitIrqChip), |
| } |
| |
| impl<V: VcpuArch> WindowsIrqChip<V> { |
| // Convert our enum to a &mut dyn IrqChipArch |
| fn as_mut(&mut self) -> &mut dyn IrqChipArch { |
| match self { |
| WindowsIrqChip::Userspace(i) => i, |
| #[cfg(feature = "gvm")] |
| WindowsIrqChip::Gvm(i) => i, |
| #[cfg(feature = "whpx")] |
| WindowsIrqChip::WhpxSplit(i) => i, |
| } |
| } |
| } |
| |
| /// Storage for the VM TSC offset for each vcpu. Stored in a static because the tracing thread will |
| /// need access to it when tracing is enabled. |
| static TSC_OFFSETS: sync::Mutex<Vec<Option<u64>>> = sync::Mutex::new(Vec::new()); |
| |
| /// Save the TSC offset for a particular vcpu. |
| /// |
| /// After setting the TSC offset for a vcpu, this function checks the standard deviation of offsets |
| /// for all the VCPUs and logs this information. If the TSC offsets differ too much between vcpus |
| /// it can cause clock issues in the guest. |
| pub fn save_vcpu_tsc_offset(offset: u64, vcpu_id: usize) { |
| let offsets_copy = { |
| let mut offsets = TSC_OFFSETS.lock(); |
| // make sure offsets vec is large enough before inserting |
| let newlen = std::cmp::max(offsets.len(), vcpu_id + 1); |
| offsets.resize(newlen, None); |
| offsets[vcpu_id] = Some(offset); |
| |
| offsets.clone() |
| }; |
| |
| // do statistics on a clone of the offsets so we don't hold up other vcpus at this point |
| info!( |
| "TSC offset standard deviation is: {}", |
| standard_deviation( |
| &offsets_copy |
| .iter() |
| .filter(|x| x.is_some()) |
| .map(|x| x.unwrap() as u128) |
| .collect::<Vec<u128>>() |
| ) |
| ); |
| } |
| |
| /// Get the TSC offset of any vcpu. It will pick the first non-None offset it finds in TSC_OFFSETS. |
| #[cfg(feature = "perfetto")] |
| pub fn get_vcpu_tsc_offset() -> u64 { |
| if let Some(offset) = TSC_OFFSETS.lock().iter().flatten().next() { |
| return *offset; |
| } |
| 0 |
| } |
| |
| /// Callback that is registered with tracing crate, and will be called by the tracing thread when |
| /// tracing is enabled or disabled. Regardless of whether tracing is being enabled or disabled for |
| /// a given category or instance, we just emit a clock snapshot that maps the guest TSC to the |
| /// host TSC. Redundant snapshots should not be a problem for perfetto. |
| #[cfg(feature = "perfetto")] |
| fn set_tsc_clock_snapshot() { |
| let freq = match devices::tsc::tsc_frequency() { |
| Err(e) => { |
| error!( |
| "Could not determine tsc frequency, unable to snapshot tsc offset: {}", |
| e |
| ); |
| return; |
| } |
| Ok(freq) => freq, |
| }; |
| |
| // The offset is host-guest tsc value |
| let offset = get_vcpu_tsc_offset(); |
| // Safe because _rdtsc takes no arguments; |
| let host_tsc = unsafe { std::arch::x86_64::_rdtsc() }; |
| perfetto::snapshot_clock(perfetto::ClockSnapshot::new( |
| // Technically our multiplier should be freq/1_000_000_000, but perfetto doesn't |
| // support floating point multipliers yet. So for now we set the freq in Hz and rely |
| // on the merge tool to fix it. |
| perfetto::Clock::new( |
| perfetto::BuiltinClock::Tsc as u32, |
| host_tsc.wrapping_add(offset), |
| ) |
| .set_multiplier(freq as u64), |
| perfetto::Clock::new( |
| // The host builtin clock ids are all offset from the guest ids by |
| // HOST_GUEST_CLOCK_ID_OFFSET when the traces are merged. Because this snapshot |
| // contains both a guest and host clock, we need to offset it before merge. |
| perfetto::BuiltinClock::Tsc as u32 + cros_tracing::HOST_GUEST_CLOCK_ID_OFFSET, |
| host_tsc, |
| ) |
| .set_multiplier(freq as u64), |
| )); |
| } |
| |
| /// Launches run_config for the broker, reading configuration from a TubeTransporter. |
| pub fn run_config_for_broker(raw_tube_transporter: RawDescriptor) -> Result<ExitState> { |
| let tube_transporter = |
| // SAFETY: |
| // Safe because we know that raw_transport_tube is valid (passed by inheritance), and that |
| // the blocking & framing modes are accurate because we create them ourselves in the broker. |
| unsafe { TubeTransporterReader::from_raw_descriptor(raw_tube_transporter) }; |
| |
| let mut tube_data_list = tube_transporter |
| .read_tubes() |
| .exit_context(Exit::TubeTransporterInit, "failed to init tube transporter")?; |
| |
| let bootstrap_tube = tube_data_list |
| .get_tube(TubeToken::Bootstrap) |
| .exit_context(Exit::TubeFailure, "failed to get bootstrap tube")?; |
| |
| let mut cfg: Config = bootstrap_tube |
| .recv::<Config>() |
| .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?; |
| |
| let startup_args: CommonChildStartupArgs = bootstrap_tube |
| .recv::<CommonChildStartupArgs>() |
| .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?; |
| let _child_cleanup = common_child_setup(startup_args).exit_context( |
| Exit::CommonChildSetupError, |
| "failed to perform common child setup", |
| )?; |
| |
| cfg.broker_shutdown_event = Some( |
| bootstrap_tube |
| .recv::<Event>() |
| .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?, |
| ); |
| #[cfg(feature = "crash-report")] |
| let crash_tube_map = bootstrap_tube |
| .recv::<HashMap<ProcessType, Vec<SendTube>>>() |
| .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?; |
| #[cfg(feature = "crash-report")] |
| crash_report::set_crash_tube_map(crash_tube_map); |
| |
| let BrokerTubes { |
| vm_evt_wrtube, |
| vm_evt_rdtube, |
| } = bootstrap_tube |
| .recv::<BrokerTubes>() |
| .exit_context(Exit::TubeFailure, "failed to read bootstrap tube")?; |
| |
| run_config_inner(cfg, vm_evt_wrtube, vm_evt_rdtube) |
| } |
| |
| pub fn run_config(cfg: Config) -> Result<ExitState> { |
| let _raise_timer_resolution = enable_high_res_timers() |
| .exit_context(Exit::EnableHighResTimer, "failed to enable high res timer")?; |
| |
| // There is no broker when using run_config(), so the vm_evt tubes need to be created. |
| let (vm_evt_wrtube, vm_evt_rdtube) = |
| Tube::directional_pair().context("failed to create vm event tube")?; |
| |
| run_config_inner(cfg, vm_evt_wrtube, vm_evt_rdtube) |
| } |
| |
| fn create_guest_memory( |
| components: &VmComponents, |
| hypervisor: &impl Hypervisor, |
| ) -> Result<GuestMemory> { |
| let guest_mem_layout = Arch::guest_memory_layout(components, hypervisor).exit_context( |
| Exit::GuestMemoryLayout, |
| "failed to create guest memory layout", |
| )?; |
| GuestMemory::new_with_options(&guest_mem_layout) |
| .exit_context(Exit::CreateGuestMemory, "failed to create guest memory") |
| } |
| |
| fn run_config_inner( |
| cfg: Config, |
| vm_evt_wrtube: SendTube, |
| vm_evt_rdtube: RecvTube, |
| ) -> Result<ExitState> { |
| product::setup_common_metric_invariants(&cfg); |
| |
| #[cfg(feature = "perfetto")] |
| cros_tracing::add_per_trace_callback(set_tsc_clock_snapshot); |
| |
| let components: VmComponents = setup_vm_components(&cfg)?; |
| |
| #[allow(unused_mut)] |
| let mut hypervisor = cfg |
| .hypervisor |
| .or_else(get_default_hypervisor) |
| .exit_context(Exit::NoDefaultHypervisor, "no enabled hypervisor")?; |
| |
| #[cfg(feature = "whpx")] |
| if hypervisor::whpx::Whpx::is_enabled() { |
| // If WHPX is enabled, no other hypervisor can be used, so just override it |
| hypervisor = HypervisorKind::Whpx; |
| } |
| |
| match hypervisor { |
| #[cfg(feature = "haxm")] |
| HypervisorKind::Haxm | HypervisorKind::Ghaxm => { |
| if hypervisor == HypervisorKind::Haxm { |
| set_use_ghaxm(false); |
| } |
| info!("Creating HAXM ghaxm={}", get_use_ghaxm()); |
| let haxm = Haxm::new()?; |
| let guest_mem = create_guest_memory(&components, &haxm)?; |
| let vm = create_haxm_vm(haxm, guest_mem, &cfg.kernel_log_file)?; |
| let (ioapic_host_tube, ioapic_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| let irq_chip = |
| create_userspace_irq_chip::<HaxmVcpu>(components.vcpu_count, ioapic_device_tube)?; |
| run_vm::<HaxmVcpu, HaxmVm>( |
| cfg, |
| components, |
| vm, |
| WindowsIrqChip::Userspace(irq_chip).as_mut(), |
| Some(ioapic_host_tube), |
| vm_evt_wrtube, |
| vm_evt_rdtube, |
| ) |
| } |
| #[cfg(feature = "whpx")] |
| HypervisorKind::Whpx => { |
| let apic_emulation_supported = |
| Whpx::check_whpx_feature(WhpxFeature::LocalApicEmulation) |
| .exit_context(Exit::WhpxSetupError, "failed to set up whpx")?; |
| |
| let no_smt = cfg.no_smt; |
| |
| // Default to WhpxSplitIrqChip if it's supported because it's more performant |
| let irq_chip = cfg.irq_chip.unwrap_or(if apic_emulation_supported { |
| IrqChipKind::Split |
| } else { |
| IrqChipKind::Userspace |
| }); |
| |
| // Both WHPX irq chips use a userspace IOAPIC |
| let (ioapic_host_tube, ioapic_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| |
| info!("Creating Whpx"); |
| let whpx = Whpx::new()?; |
| let guest_mem = create_guest_memory(&components, &whpx)?; |
| let vm = create_whpx_vm( |
| whpx, |
| guest_mem, |
| components.vcpu_count, |
| no_smt, |
| apic_emulation_supported && irq_chip == IrqChipKind::Split, |
| cfg.force_calibrated_tsc_leaf, |
| vm_evt_wrtube |
| .try_clone() |
| .expect("could not clone vm_evt_wrtube"), |
| )?; |
| |
| let mut irq_chip = match irq_chip { |
| IrqChipKind::Kernel => unimplemented!("Kernel irqchip mode not supported by WHPX"), |
| IrqChipKind::Split => { |
| if !apic_emulation_supported { |
| panic!( |
| "split irqchip specified but your WHPX version does not support \ |
| local apic emulation" |
| ); |
| } |
| WindowsIrqChip::WhpxSplit(create_whpx_split_irq_chip(&vm, ioapic_device_tube)?) |
| } |
| IrqChipKind::Userspace => { |
| WindowsIrqChip::Userspace(create_userspace_irq_chip::<WhpxVcpu>( |
| components.vcpu_count, |
| ioapic_device_tube, |
| )?) |
| } |
| }; |
| run_vm::<WhpxVcpu, WhpxVm>( |
| cfg, |
| components, |
| vm, |
| irq_chip.as_mut(), |
| Some(ioapic_host_tube), |
| vm_evt_wrtube, |
| vm_evt_rdtube, |
| ) |
| } |
| #[cfg(feature = "gvm")] |
| HypervisorKind::Gvm => { |
| info!("Creating GVM"); |
| let gvm = Gvm::new()?; |
| let guest_mem = create_guest_memory(&components, &gvm)?; |
| let vm = create_gvm_vm(gvm, guest_mem)?; |
| let ioapic_host_tube; |
| let mut irq_chip = match cfg.irq_chip.unwrap_or(IrqChipKind::Kernel) { |
| IrqChipKind::Split => unimplemented!("Split irqchip mode not supported by GVM"), |
| IrqChipKind::Kernel => { |
| ioapic_host_tube = None; |
| WindowsIrqChip::Gvm(create_gvm_irq_chip(&vm, components.vcpu_count)?) |
| } |
| IrqChipKind::Userspace => { |
| let (host_tube, ioapic_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| ioapic_host_tube = Some(host_tube); |
| WindowsIrqChip::Userspace(create_userspace_irq_chip::<GvmVcpu>( |
| components.vcpu_count, |
| ioapic_device_tube, |
| )?) |
| } |
| }; |
| run_vm::<GvmVcpu, GvmVm>( |
| cfg, |
| components, |
| vm, |
| irq_chip.as_mut(), |
| ioapic_host_tube, |
| vm_evt_wrtube, |
| vm_evt_rdtube, |
| ) |
| } |
| } |
| } |
| |
| #[cfg(any(feature = "haxm", feature = "gvm", feature = "whpx"))] |
| fn run_vm<Vcpu, V>( |
| #[allow(unused_mut)] mut cfg: Config, |
| #[allow(unused_mut)] mut components: VmComponents, |
| mut vm: V, |
| irq_chip: &mut dyn IrqChipArch, |
| ioapic_host_tube: Option<Tube>, |
| vm_evt_wrtube: SendTube, |
| vm_evt_rdtube: RecvTube, |
| ) -> Result<ExitState> |
| where |
| Vcpu: VcpuArch + 'static, |
| V: VmArch + 'static, |
| { |
| let vm_memory_size_mb = components.memory_size / (1024 * 1024); |
| let mut control_tubes = Vec::new(); |
| let mut irq_control_tubes = Vec::new(); |
| let mut vm_memory_control_tubes = Vec::new(); |
| // Create one control tube per disk. |
| let mut disk_device_tubes = Vec::new(); |
| let mut disk_host_tubes = Vec::new(); |
| let disk_count = cfg.disks.len(); |
| for _ in 0..disk_count { |
| let (disk_host_tube, disk_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| disk_host_tubes.push(disk_host_tube); |
| disk_device_tubes.push(disk_device_tube); |
| } |
| |
| if let Some(ioapic_host_tube) = ioapic_host_tube { |
| irq_control_tubes.push(ioapic_host_tube); |
| } |
| |
| // Balloon gets a special socket so balloon requests can be forwarded from the main process. |
| let (balloon_host_tube, balloon_device_tube) = if cfg.balloon { |
| let (balloon_host_tube, balloon_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| (Some(balloon_host_tube), Some(balloon_device_tube)) |
| } else { |
| (None, None) |
| }; |
| // The balloon device also needs a tube to communicate back to the main process to |
| // handle remapping memory dynamically. |
| let dynamic_mapping_device_tube = if cfg.balloon { |
| let (dynamic_mapping_host_tube, dynamic_mapping_device_tube) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| vm_memory_control_tubes.push(dynamic_mapping_host_tube); |
| Some(dynamic_mapping_device_tube) |
| } else { |
| None |
| }; |
| |
| // PvClock gets a tube for handling suspend/resume requests from the main thread. |
| let (pvclock_host_tube, pvclock_device_tube) = if cfg.pvclock { |
| let (host, device) = |
| Tube::pair().exit_context(Exit::CreateTube, "failed to create tube")?; |
| (Some(host), Some(device)) |
| } else { |
| (None, None) |
| }; |
| |
| let gralloc = |
| RutabagaGralloc::new().exit_context(Exit::CreateGralloc, "failed to create gralloc")?; |
| |
| let pstore_size = components.pstore.as_ref().map(|pstore| pstore.size as u64); |
| let mut sys_allocator = SystemAllocator::new( |
| Arch::get_system_allocator_config(&vm), |
| pstore_size, |
| &cfg.mmio_address_ranges, |
| ) |
| .context("failed to create system allocator")?; |
| |
| // Allocate the ramoops region first. |
| let ramoops_region = match &components.pstore { |
| Some(pstore) => Some( |
| arch::pstore::create_memory_region( |
| &mut vm, |
| sys_allocator.reserved_region().unwrap(), |
| pstore, |
| ) |
| .exit_context( |
| Exit::Pstore, |
| format!("failed to allocate pstore region {:?}", &components.pstore), |
| )?, |
| ), |
| None => None, |
| }; |
| |
| let init_balloon_size = components |
| .memory_size |
| .checked_sub(cfg.init_memory.map_or(components.memory_size, |m| { |
| m.checked_mul(1024 * 1024).unwrap_or(u64::MAX) |
| })) |
| .context("failed to calculate init balloon size")?; |
| |
| let tsc_state = devices::tsc::tsc_state().exit_code(Exit::TscCalibrationFailed)?; |
| let tsc_sync_mitigations = get_tsc_sync_mitigations(&tsc_state, components.vcpu_count); |
| |
| if tsc_state.core_grouping.size() > 1 { |
| // Host TSCs are not in sync, log a metric about it. |
| warn!( |
| "Host TSCs are not in sync, applying the following mitigations: {:?}", |
| tsc_sync_mitigations |
| ); |
| log_descriptor( |
| MetricEventType::TscCoresOutOfSync, |
| // casting u64 as i64 is a no-op, so we don't lose any part of the bitmask |
| tsc_state.core_grouping.core_grouping_bitmask() as i64, |
| ); |
| } |
| |
| #[cfg(feature = "gpu")] |
| let gpu_control_tube = cfg |
| .gpu_vmm_config |
| .as_mut() |
| .and_then(|config| config.gpu_control_host_tube.take()); |
| let product_args = product::get_run_control_args(&mut cfg); |
| |
| // We open these files before lowering the token, as in the future a stricter policy may |
| // prevent it. |
| let dt_overlays = cfg |
| .device_tree_overlay |
| .iter() |
| .map(|o| { |
| Ok(DtbOverlay { |
| file: open_file_or_duplicate(o.path.as_path(), OpenOptions::new().read(true)) |
| .with_context(|| { |
| format!("failed to open device tree overlay {}", o.path.display()) |
| })?, |
| }) |
| }) |
| .collect::<Result<Vec<DtbOverlay>>>()?; |
| |
| // Lower the token, locking the main process down to a stricter security policy. |
| // |
| // WARNING: |
| // |
| // Windows system calls can behave in unusual ways if they happen concurrently to the token |
| // lowering. For example, access denied can happen if Tube pairs are created in another thread |
| // (b/281108137), and lower_token happens right before the client pipe is connected. Tubes are |
| // not privileged resources, but can be broken due to the token changing unexpectedly. |
| // |
| // We explicitly lower the token here and *then* call run_control to make it clear that any |
| // resources that require a privileged token should be created on the main thread & passed into |
| // run_control, to follow the correct order: |
| // - Privileged resources are created. |
| // - Token is lowered. |
| // - Threads are spawned & may create more non-privileged resources (without fear of the token |
| // changing at an undefined time). |
| // |
| // Recommendation: If you find your code doesnt work in run_control because of the sandbox, you |
| // should split any resource creation to before this token lowering & pass the resources into |
| // run_control. Don't move the token lowering somewhere else without considering multi-threaded |
| // effects. |
| #[cfg(feature = "sandbox")] |
| if sandbox::is_sandbox_target() { |
| sandbox::TargetServices::get() |
| .exit_code_from_err("failed to create sandbox")? |
| .expect("Could not create sandbox!") |
| .lower_token(); |
| } |
| |
| let virtio_snd_state_device_tube = create_snd_state_tube(&mut control_tubes)?; |
| |
| let (virtio_snd_host_mute_tube, virtio_snd_device_mute_tube) = create_snd_mute_tube_pair()?; |
| |
| let pci_devices = create_devices( |
| &mut cfg, |
| vm.get_memory(), |
| &vm_evt_wrtube, |
| &mut irq_control_tubes, |
| &mut vm_memory_control_tubes, |
| &mut control_tubes, |
| &mut disk_device_tubes, |
| balloon_device_tube, |
| pvclock_device_tube, |
| dynamic_mapping_device_tube, |
| /* inflate_tube= */ None, |
| init_balloon_size, |
| tsc_state.frequency, |
| virtio_snd_state_device_tube, |
| virtio_snd_device_mute_tube, |
| )?; |
| |
| let mut vcpu_ids = Vec::new(); |
| |
| let windows = Arch::build_vm::<V, Vcpu>( |
| components, |
| &vm_evt_wrtube, |
| &mut sys_allocator, |
| &cfg.serial_parameters, |
| None, |
| (cfg.battery_config.as_ref().map(|t| t.type_), None), |
| vm, |
| ramoops_region, |
| pci_devices, |
| irq_chip, |
| &mut vcpu_ids, |
| cfg.dump_device_tree_blob.clone(), |
| /* debugcon_jail= */ None, |
| None, |
| None, |
| dt_overlays, |
| ) |
| .exit_context(Exit::BuildVm, "the architecture failed to build the vm")?; |
| |
| #[cfg(feature = "stats")] |
| let stats = if cfg.exit_stats { |
| Some(Arc::new(Mutex::new(StatisticsCollector::new()))) |
| } else { |
| None |
| }; |
| |
| run_control( |
| windows, |
| sys_allocator, |
| control_tubes, |
| irq_control_tubes, |
| vm_memory_control_tubes, |
| vm_evt_rdtube, |
| vm_evt_wrtube, |
| #[cfg(feature = "gpu")] |
| gpu_control_tube, |
| cfg.broker_shutdown_event.take(), |
| balloon_host_tube, |
| pvclock_host_tube, |
| disk_host_tubes, |
| gralloc, |
| #[cfg(feature = "stats")] |
| stats, |
| cfg.service_pipe_name, |
| vm_memory_size_mb, |
| cfg.host_cpu_topology, |
| tsc_sync_mitigations, |
| cfg.force_calibrated_tsc_leaf, |
| product_args, |
| virtio_snd_host_mute_tube, |
| cfg.restore_path, |
| cfg.socket_path, |
| cfg.force_s2idle, |
| cfg.suspended, |
| ) |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use tempfile::TempDir; |
| |
| use super::*; |
| |
| fn create_config(test_dir: &TempDir) -> Config { |
| let mut config = Config::default(); |
| |
| let dummy_kernel_path = test_dir.path().join("dummy_kernel.txt"); |
| OpenOptions::new() |
| .create(true) |
| .write(true) |
| .open(&dummy_kernel_path) |
| .expect("Could not open file!"); |
| config.executable_path = Some(Executable::Kernel(dummy_kernel_path)); |
| |
| config |
| } |
| |
| #[test] |
| #[should_panic(expected = "Did not receive a bios or kernel")] |
| fn setup_vm_components_panics_when_no_kernel_provided() { |
| let mut config = |
| create_config(&TempDir::new().expect("Could not create temporary directory!")); |
| config.executable_path = None; |
| let _ = setup_vm_components(&config); |
| } |
| |
| #[test] |
| fn setup_vm_components_stores_memory_in_bytes() { |
| let tempdir = TempDir::new().expect("Could not create temporary directory!"); |
| let mut config = create_config(&tempdir); |
| config.memory = Some(1); |
| let vm_components = setup_vm_components(&config).expect("failed to setup vm components"); |
| assert_eq!(vm_components.memory_size, 1024 * 1024); |
| } |
| |
| #[test] |
| fn setup_vm_components_fails_when_memory_too_large() { |
| let tempdir = TempDir::new().expect("Could not create temporary directory!"); |
| let mut config = create_config(&tempdir); |
| // One mb more than a u64 can hold in bytes |
| config.memory = Some((u64::MAX / 1024 / 1024) + 1); |
| setup_vm_components(&config).err().expect("expected error"); |
| } |
| } |