blob: 9bf32115bd45ea2938257da2940a1cc419a18ced [file] [log] [blame]
// Copyright 2018 The ChromiumOS Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//! ARM 64-bit architecture support.
#![cfg(any(target_arch = "arm", target_arch = "aarch64"))]
use std::collections::BTreeMap;
use std::fs::File;
use std::io;
use std::path::PathBuf;
use std::sync::mpsc;
use std::sync::Arc;
use arch::get_serial_cmdline;
use arch::CpuSet;
use arch::DtbOverlay;
use arch::GetSerialCmdlineError;
use arch::RunnableLinuxVm;
use arch::VcpuAffinity;
use arch::VmComponents;
use arch::VmImage;
use base::Event;
use base::MemoryMappingBuilder;
use base::SendTube;
use devices::serial_device::SerialHardware;
use devices::serial_device::SerialParameters;
use devices::vmwdt::VMWDT_DEFAULT_CLOCK_HZ;
use devices::vmwdt::VMWDT_DEFAULT_TIMEOUT_SEC;
use devices::Bus;
use devices::BusDeviceObj;
use devices::BusError;
use devices::BusType;
use devices::IrqChip;
use devices::IrqChipAArch64;
use devices::IrqEventSource;
use devices::PciAddress;
use devices::PciConfigMmio;
use devices::PciDevice;
use devices::PciRootCommand;
use devices::Serial;
#[cfg(any(target_os = "android", target_os = "linux"))]
use devices::VirtCpufreq;
#[cfg(feature = "gdb")]
use gdbstub::arch::Arch;
#[cfg(feature = "gdb")]
use gdbstub_arch::aarch64::AArch64 as GdbArch;
use hypervisor::CpuConfigAArch64;
use hypervisor::DeviceKind;
use hypervisor::Hypervisor;
use hypervisor::HypervisorCap;
use hypervisor::MemCacheType;
use hypervisor::ProtectionType;
use hypervisor::VcpuAArch64;
use hypervisor::VcpuFeature;
use hypervisor::VcpuInitAArch64;
use hypervisor::VcpuRegAArch64;
use hypervisor::Vm;
use hypervisor::VmAArch64;
#[cfg(windows)]
use jail::FakeMinijailStub as Minijail;
use kernel_loader::LoadedKernel;
#[cfg(any(target_os = "android", target_os = "linux"))]
use minijail::Minijail;
use remain::sorted;
use resources::AddressRange;
use resources::SystemAllocator;
use resources::SystemAllocatorConfig;
#[cfg(any(target_os = "android", target_os = "linux"))]
use sync::Condvar;
use sync::Mutex;
use thiserror::Error;
use vm_control::BatControl;
use vm_control::BatteryType;
use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use vm_memory::GuestMemoryError;
use vm_memory::MemoryRegionOptions;
use vm_memory::MemoryRegionPurpose;
mod fdt;
// We place the kernel at the very beginning of physical memory.
const AARCH64_KERNEL_OFFSET: u64 = 0;
const AARCH64_FDT_MAX_SIZE: u64 = 0x200000;
const AARCH64_INITRD_ALIGN: u64 = 0x1000000;
// These constants indicate the address space used by the ARM vGIC.
const AARCH64_GIC_DIST_SIZE: u64 = 0x10000;
const AARCH64_GIC_CPUI_SIZE: u64 = 0x20000;
// This indicates the start of DRAM inside the physical address space.
const AARCH64_PHYS_MEM_START: u64 = 0x80000000;
const AARCH64_AXI_BASE: u64 = 0x40000000;
const AARCH64_PLATFORM_MMIO_SIZE: u64 = 0x800000;
// FDT is placed at the front of RAM when booting in BIOS mode.
const AARCH64_FDT_OFFSET_IN_BIOS_MODE: u64 = 0x0;
// Therefore, the BIOS is placed after the FDT in memory.
const AARCH64_BIOS_OFFSET: u64 = AARCH64_FDT_MAX_SIZE;
const AARCH64_BIOS_MAX_LEN: u64 = 1 << 20;
const AARCH64_PROTECTED_VM_FW_MAX_SIZE: u64 = 0x400000;
const AARCH64_PROTECTED_VM_FW_START: u64 =
AARCH64_PHYS_MEM_START - AARCH64_PROTECTED_VM_FW_MAX_SIZE;
const AARCH64_PVTIME_IPA_MAX_SIZE: u64 = 0x10000;
const AARCH64_PVTIME_IPA_START: u64 = AARCH64_MMIO_BASE - AARCH64_PVTIME_IPA_MAX_SIZE;
const AARCH64_PVTIME_SIZE: u64 = 64;
// These constants indicate the placement of the GIC registers in the physical
// address space.
const AARCH64_GIC_DIST_BASE: u64 = AARCH64_AXI_BASE - AARCH64_GIC_DIST_SIZE;
const AARCH64_GIC_CPUI_BASE: u64 = AARCH64_GIC_DIST_BASE - AARCH64_GIC_CPUI_SIZE;
const AARCH64_GIC_REDIST_SIZE: u64 = 0x20000;
// PSR (Processor State Register) bits
const PSR_MODE_EL1H: u64 = 0x00000005;
const PSR_F_BIT: u64 = 0x00000040;
const PSR_I_BIT: u64 = 0x00000080;
const PSR_A_BIT: u64 = 0x00000100;
const PSR_D_BIT: u64 = 0x00000200;
enum PayloadType {
Bios {
entry: GuestAddress,
image_size: u64,
},
Kernel(LoadedKernel),
}
impl PayloadType {
fn entry(&self) -> GuestAddress {
match self {
Self::Bios {
entry,
image_size: _,
} => *entry,
Self::Kernel(k) => k.entry,
}
}
fn size(&self) -> u64 {
match self {
Self::Bios {
entry: _,
image_size,
} => *image_size,
Self::Kernel(k) => k.size,
}
}
}
fn get_kernel_addr() -> GuestAddress {
GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_KERNEL_OFFSET)
}
fn get_bios_addr() -> GuestAddress {
GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_BIOS_OFFSET)
}
// When static swiotlb allocation is required, returns the address it should be allocated at.
// Otherwise, returns None.
fn get_swiotlb_addr(
memory_size: u64,
hypervisor: &(impl Hypervisor + ?Sized),
) -> Option<GuestAddress> {
if hypervisor.check_capability(HypervisorCap::StaticSwiotlbAllocationRequired) {
Some(GuestAddress(AARCH64_PHYS_MEM_START + memory_size))
} else {
None
}
}
// This was the speed kvmtool used, not sure if it matters.
const AARCH64_SERIAL_SPEED: u32 = 1843200;
// The serial device gets the first interrupt line
// Which gets mapped to the first SPI interrupt (physical 32).
const AARCH64_SERIAL_1_3_IRQ: u32 = 0;
const AARCH64_SERIAL_2_4_IRQ: u32 = 2;
// Place the RTC device at page 2
const AARCH64_RTC_ADDR: u64 = 0x2000;
// The RTC device gets one 4k page
const AARCH64_RTC_SIZE: u64 = 0x1000;
// The RTC device gets the second interrupt line
const AARCH64_RTC_IRQ: u32 = 1;
// The Goldfish battery device gets the 3rd interrupt line
const AARCH64_BAT_IRQ: u32 = 3;
// Place the virtual watchdog device at page 3
const AARCH64_VMWDT_ADDR: u64 = 0x3000;
// The virtual watchdog device gets one 4k page
const AARCH64_VMWDT_SIZE: u64 = 0x1000;
// PCI MMIO configuration region base address.
const AARCH64_PCI_CFG_BASE: u64 = 0x10000;
// PCI MMIO configuration region size.
const AARCH64_PCI_CFG_SIZE: u64 = 0x1000000;
// This is the base address of MMIO devices.
const AARCH64_MMIO_BASE: u64 = 0x2000000;
// Size of the whole MMIO region.
const AARCH64_MMIO_SIZE: u64 = 0x2000000;
// Virtio devices start at SPI interrupt number 4
const AARCH64_IRQ_BASE: u32 = 4;
// Virtual CPU Frequency Device.
const AARCH64_VIRTFREQ_BASE: u64 = 0x1040000;
const AARCH64_VIRTFREQ_SIZE: u64 = 0x8;
const AARCH64_VIRTFREQ_MAXSIZE: u64 = 0x10000;
// PMU PPI interrupt, same as qemu
const AARCH64_PMU_IRQ: u32 = 7;
#[sorted]
#[derive(Error, Debug)]
pub enum Error {
#[error("failed to allocate IRQ number")]
AllocateIrq,
#[error("bios could not be loaded: {0}")]
BiosLoadFailure(arch::LoadImageError),
#[error("failed to build arm pvtime memory: {0}")]
BuildPvtimeError(base::MmapError),
#[error("unable to clone an Event: {0}")]
CloneEvent(base::Error),
#[error("failed to clone IRQ chip: {0}")]
CloneIrqChip(base::Error),
#[error("the given kernel command line was invalid: {0}")]
Cmdline(kernel_cmdline::Error),
#[error("failed to configure CPU Frequencies: {0}")]
CpuFrequencies(base::Error),
#[error("failed to configure CPU topology: {0}")]
CpuTopology(base::Error),
#[error("unable to create battery devices: {0}")]
CreateBatDevices(arch::DeviceRegistrationError),
#[error("unable to make an Event: {0}")]
CreateEvent(base::Error),
#[error("FDT could not be created: {0}")]
CreateFdt(cros_fdt::Error),
#[error("failed to create GIC: {0}")]
CreateGICFailure(base::Error),
#[error("failed to create a PCI root hub: {0}")]
CreatePciRoot(arch::DeviceRegistrationError),
#[error("failed to create platform bus: {0}")]
CreatePlatformBus(arch::DeviceRegistrationError),
#[error("unable to create serial devices: {0}")]
CreateSerialDevices(arch::DeviceRegistrationError),
#[error("failed to create socket: {0}")]
CreateSocket(io::Error),
#[error("failed to create VCPU: {0}")]
CreateVcpu(base::Error),
#[error("custom pVM firmware could not be loaded: {0}")]
CustomPvmFwLoadFailure(arch::LoadImageError),
#[error("vm created wrong kind of vcpu")]
DowncastVcpu,
#[error("failed to enable singlestep execution: {0}")]
EnableSinglestep(base::Error),
#[error("failed to finalize IRQ chip: {0}")]
FinalizeIrqChip(base::Error),
#[error("failed to get HW breakpoint count: {0}")]
GetMaxHwBreakPoint(base::Error),
#[error("failed to get PSCI version: {0}")]
GetPsciVersion(base::Error),
#[error("failed to get serial cmdline: {0}")]
GetSerialCmdline(GetSerialCmdlineError),
#[error("failed to initialize arm pvtime: {0}")]
InitPvtimeError(base::Error),
#[error("initrd could not be loaded: {0}")]
InitrdLoadFailure(arch::LoadImageError),
#[error("failed to initialize virtual machine {0}")]
InitVmError(base::Error),
#[error("kernel could not be loaded: {0}")]
KernelLoadFailure(kernel_loader::Error),
#[error("error loading Kernel from Elf image: {0}")]
LoadElfKernel(kernel_loader::Error),
#[error("failed to map arm pvtime memory: {0}")]
MapPvtimeError(base::Error),
#[error("pVM firmware could not be loaded: {0}")]
PvmFwLoadFailure(base::Error),
#[error("ramoops address is different from high_mmio_base: {0} vs {1}")]
RamoopsAddress(u64, u64),
#[error("error reading guest memory: {0}")]
ReadGuestMemory(vm_memory::GuestMemoryError),
#[error("error reading CPU register: {0}")]
ReadReg(base::Error),
#[error("error reading CPU registers: {0}")]
ReadRegs(base::Error),
#[error("failed to register irq fd: {0}")]
RegisterIrqfd(base::Error),
#[error("error registering PCI bus: {0}")]
RegisterPci(BusError),
#[error("error registering virtual cpufreq device: {0}")]
RegisterVirtCpufreq(BusError),
#[error("error registering virtual socket device: {0}")]
RegisterVsock(arch::DeviceRegistrationError),
#[error("failed to set device attr: {0}")]
SetDeviceAttr(base::Error),
#[error("failed to set a hardware breakpoint: {0}")]
SetHwBreakpoint(base::Error),
#[error("failed to set register: {0}")]
SetReg(base::Error),
#[error("failed to set up guest memory: {0}")]
SetupGuestMemory(GuestMemoryError),
#[error("this function isn't supported")]
Unsupported,
#[error("failed to initialize VCPU: {0}")]
VcpuInit(base::Error),
#[error("error writing guest memory: {0}")]
WriteGuestMemory(GuestMemoryError),
#[error("error writing CPU register: {0}")]
WriteReg(base::Error),
#[error("error writing CPU registers: {0}")]
WriteRegs(base::Error),
}
pub type Result<T> = std::result::Result<T, Error>;
/// Returns the address in guest memory at which the FDT should be located.
fn fdt_address(memory_end: GuestAddress, has_bios: bool) -> GuestAddress {
// TODO(rammuthiah) make kernel and BIOS startup use FDT from the same location. ARCVM startup
// currently expects the kernel at 0x80080000 and the FDT at the end of RAM for unknown reasons.
// Root cause and figure out how to fold these code paths together.
if has_bios {
GuestAddress(AARCH64_PHYS_MEM_START + AARCH64_FDT_OFFSET_IN_BIOS_MODE)
} else {
// Put fdt up near the top of memory
// TODO(sonnyrao): will have to handle this differently if there's
// > 4GB memory
memory_end
.checked_sub(AARCH64_FDT_MAX_SIZE)
.expect("Not enough memory for FDT")
.checked_sub(0x10000)
.expect("Not enough memory for FDT")
}
}
fn load_kernel(
guest_mem: &GuestMemory,
kernel_start: GuestAddress,
mut kernel_image: &mut File,
) -> Result<LoadedKernel> {
if let Ok(elf_kernel) = kernel_loader::load_elf(
guest_mem,
kernel_start,
&mut kernel_image,
AARCH64_PHYS_MEM_START,
) {
return Ok(elf_kernel);
}
if let Ok(lz4_kernel) =
kernel_loader::load_arm64_kernel_lz4(guest_mem, kernel_start, &mut kernel_image)
{
return Ok(lz4_kernel);
}
kernel_loader::load_arm64_kernel(guest_mem, kernel_start, kernel_image)
.map_err(Error::KernelLoadFailure)
}
pub struct AArch64;
fn get_block_size() -> u64 {
let page_size = base::pagesize();
// Each PTE entry being 8 bytes long, we can fit in one page (page_size / 8)
// entries.
let ptes_per_page = page_size / 8;
let block_size = page_size * ptes_per_page;
block_size as u64
}
fn get_vcpu_mpidr_aff<Vcpu: VcpuAArch64>(vcpus: &[Vcpu], index: usize) -> Option<u64> {
const MPIDR_AFF_MASK: u64 = 0xff_00ff_ffff;
Some(vcpus.get(index)?.get_mpidr().ok()? & MPIDR_AFF_MASK)
}
impl arch::LinuxArch for AArch64 {
type Error = Error;
/// Returns a Vec of the valid memory addresses.
/// These should be used to configure the GuestMemory structure for the platform.
fn guest_memory_layout(
components: &VmComponents,
hypervisor: &impl Hypervisor,
) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
let mut memory_regions = vec![(
GuestAddress(AARCH64_PHYS_MEM_START),
components.memory_size,
MemoryRegionOptions::new().align(get_block_size()),
)];
// Allocate memory for the pVM firmware.
if components.hv_cfg.protection_type.runs_firmware() {
memory_regions.push((
GuestAddress(AARCH64_PROTECTED_VM_FW_START),
AARCH64_PROTECTED_VM_FW_MAX_SIZE,
MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
));
}
if let Some(size) = components.swiotlb {
if let Some(addr) = get_swiotlb_addr(components.memory_size, hypervisor) {
memory_regions.push((
addr,
size,
MemoryRegionOptions::new().purpose(MemoryRegionPurpose::StaticSwiotlbRegion),
));
}
}
Ok(memory_regions)
}
fn get_system_allocator_config<V: Vm>(vm: &V) -> SystemAllocatorConfig {
Self::get_resource_allocator_config(
vm.get_memory().end_addr(),
vm.get_guest_phys_addr_bits(),
)
}
fn build_vm<V, Vcpu>(
mut components: VmComponents,
_vm_evt_wrtube: &SendTube,
system_allocator: &mut SystemAllocator,
serial_parameters: &BTreeMap<(SerialHardware, u8), SerialParameters>,
serial_jail: Option<Minijail>,
(bat_type, bat_jail): (Option<BatteryType>, Option<Minijail>),
mut vm: V,
ramoops_region: Option<arch::pstore::RamoopsRegion>,
devs: Vec<(Box<dyn BusDeviceObj>, Option<Minijail>)>,
irq_chip: &mut dyn IrqChipAArch64,
vcpu_ids: &mut Vec<usize>,
dump_device_tree_blob: Option<PathBuf>,
_debugcon_jail: Option<Minijail>,
#[cfg(feature = "swap")] swap_controller: &mut Option<swap::SwapController>,
#[cfg(any(target_os = "android", target_os = "linux"))] _guest_suspended_cvar: Option<
Arc<(Mutex<bool>, Condvar)>,
>,
device_tree_overlays: Vec<DtbOverlay>,
) -> std::result::Result<RunnableLinuxVm<V, Vcpu>, Self::Error>
where
V: VmAArch64,
Vcpu: VcpuAArch64,
{
let has_bios = matches!(components.vm_image, VmImage::Bios(_));
let mem = vm.get_memory().clone();
// separate out image loading from other setup to get a specific error for
// image loading
let mut initrd = None;
let payload = match components.vm_image {
VmImage::Bios(ref mut bios) => {
let image_size =
arch::load_image(&mem, bios, get_bios_addr(), AARCH64_BIOS_MAX_LEN)
.map_err(Error::BiosLoadFailure)?;
PayloadType::Bios {
entry: get_bios_addr(),
image_size: image_size as u64,
}
}
VmImage::Kernel(ref mut kernel_image) => {
let loaded_kernel = load_kernel(&mem, get_kernel_addr(), kernel_image)?;
let kernel_end = loaded_kernel.address_range.end;
initrd = match components.initrd_image {
Some(initrd_file) => {
let mut initrd_file = initrd_file;
let initrd_addr =
(kernel_end + (AARCH64_INITRD_ALIGN - 1)) & !(AARCH64_INITRD_ALIGN - 1);
let initrd_max_size =
components.memory_size - (initrd_addr - AARCH64_PHYS_MEM_START);
let initrd_addr = GuestAddress(initrd_addr);
let initrd_size =
arch::load_image(&mem, &mut initrd_file, initrd_addr, initrd_max_size)
.map_err(Error::InitrdLoadFailure)?;
Some((initrd_addr, initrd_size))
}
None => None,
};
PayloadType::Kernel(loaded_kernel)
}
};
let memory_end = GuestAddress(AARCH64_PHYS_MEM_START + components.memory_size);
let fdt_offset = fdt_address(memory_end, has_bios);
let mut use_pmu = vm
.get_hypervisor()
.check_capability(HypervisorCap::ArmPmuV3);
let vcpu_count = components.vcpu_count;
let mut has_pvtime = true;
let mut vcpus = Vec::with_capacity(vcpu_count);
let mut vcpu_init = Vec::with_capacity(vcpu_count);
for vcpu_id in 0..vcpu_count {
let vcpu: Vcpu = *vm
.create_vcpu(vcpu_id)
.map_err(Error::CreateVcpu)?
.downcast::<Vcpu>()
.map_err(|_| Error::DowncastVcpu)?;
let per_vcpu_init = if vm
.get_hypervisor()
.check_capability(HypervisorCap::HypervisorInitializedBootContext)
{
// No registers are initialized: VcpuInitAArch64.regs is an empty BTreeMap
Default::default()
} else {
Self::vcpu_init(
vcpu_id,
&payload,
fdt_offset,
components.hv_cfg.protection_type,
components.boot_cpu,
)
};
has_pvtime &= vcpu.has_pvtime_support();
vcpus.push(vcpu);
vcpu_ids.push(vcpu_id);
vcpu_init.push(per_vcpu_init);
}
// Initialize Vcpus after all Vcpu objects have been created.
for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
vcpu.init(&Self::vcpu_features(vcpu_id, use_pmu, components.boot_cpu))
.map_err(Error::VcpuInit)?;
}
irq_chip.finalize().map_err(Error::FinalizeIrqChip)?;
if has_pvtime {
let pvtime_mem = MemoryMappingBuilder::new(AARCH64_PVTIME_IPA_MAX_SIZE as usize)
.build()
.map_err(Error::BuildPvtimeError)?;
vm.add_memory_region(
GuestAddress(AARCH64_PVTIME_IPA_START),
Box::new(pvtime_mem),
false,
false,
MemCacheType::CacheCoherent,
)
.map_err(Error::MapPvtimeError)?;
}
if components.hv_cfg.protection_type.loads_firmware() {
arch::load_image(
&mem,
&mut components
.pvm_fw
.expect("pvmfw must be available if ProtectionType loads it"),
GuestAddress(AARCH64_PROTECTED_VM_FW_START),
AARCH64_PROTECTED_VM_FW_MAX_SIZE,
)
.map_err(Error::CustomPvmFwLoadFailure)?;
} else if components.hv_cfg.protection_type.runs_firmware() {
// Tell the hypervisor to load the pVM firmware.
vm.load_protected_vm_firmware(
GuestAddress(AARCH64_PROTECTED_VM_FW_START),
AARCH64_PROTECTED_VM_FW_MAX_SIZE,
)
.map_err(Error::PvmFwLoadFailure)?;
}
for (vcpu_id, vcpu) in vcpus.iter().enumerate() {
use_pmu &= vcpu.init_pmu(AARCH64_PMU_IRQ as u64 + 16).is_ok();
if has_pvtime {
vcpu.init_pvtime(AARCH64_PVTIME_IPA_START + (vcpu_id as u64 * AARCH64_PVTIME_SIZE))
.map_err(Error::InitPvtimeError)?;
}
}
let mmio_bus = Arc::new(devices::Bus::new(BusType::Mmio));
// ARM doesn't really use the io bus like x86, so just create an empty bus.
let io_bus = Arc::new(devices::Bus::new(BusType::Io));
// Event used by PMDevice to notify crosvm that
// guest OS is trying to suspend.
let suspend_evt = Event::new().map_err(Error::CreateEvent)?;
let (pci_devices, others): (Vec<_>, Vec<_>) = devs
.into_iter()
.partition(|(dev, _)| dev.as_pci_device().is_some());
let pci_devices = pci_devices
.into_iter()
.map(|(dev, jail_orig)| (dev.into_pci_device().unwrap(), jail_orig))
.collect();
let (pci, pci_irqs, mut pid_debug_label_map, _amls, _gpe_scope_amls) =
arch::generate_pci_root(
pci_devices,
irq_chip.as_irq_chip_mut(),
mmio_bus.clone(),
GuestAddress(AARCH64_PCI_CFG_BASE),
8,
io_bus.clone(),
system_allocator,
&mut vm,
(devices::AARCH64_GIC_NR_SPIS - AARCH64_IRQ_BASE) as usize,
None,
#[cfg(feature = "swap")]
swap_controller,
)
.map_err(Error::CreatePciRoot)?;
let pci_root = Arc::new(Mutex::new(pci));
let pci_bus = Arc::new(Mutex::new(PciConfigMmio::new(pci_root.clone(), 8)));
let (platform_devices, _others): (Vec<_>, Vec<_>) = others
.into_iter()
.partition(|(dev, _)| dev.as_platform_device().is_some());
let platform_devices = platform_devices
.into_iter()
.map(|(dev, jail_orig)| (*(dev.into_platform_device().unwrap()), jail_orig))
.collect();
let (platform_devices, mut platform_pid_debug_label_map, dev_resources) =
arch::sys::linux::generate_platform_bus(
platform_devices,
irq_chip.as_irq_chip_mut(),
&mmio_bus,
system_allocator,
&mut vm,
#[cfg(feature = "swap")]
swap_controller,
components.hv_cfg.protection_type,
)
.map_err(Error::CreatePlatformBus)?;
pid_debug_label_map.append(&mut platform_pid_debug_label_map);
Self::add_arch_devs(
irq_chip.as_irq_chip_mut(),
&mmio_bus,
vcpu_count,
_vm_evt_wrtube,
)?;
let com_evt_1_3 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
let com_evt_2_4 = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
let serial_devices = arch::add_serial_devices(
components.hv_cfg.protection_type,
&mmio_bus,
(AARCH64_SERIAL_1_3_IRQ, com_evt_1_3.get_trigger()),
(AARCH64_SERIAL_2_4_IRQ, com_evt_2_4.get_trigger()),
serial_parameters,
serial_jail,
#[cfg(feature = "swap")]
swap_controller,
)
.map_err(Error::CreateSerialDevices)?;
let source = IrqEventSource {
device_id: Serial::device_id(),
queue_id: 0,
device_name: Serial::debug_label(),
};
irq_chip
.register_edge_irq_event(AARCH64_SERIAL_1_3_IRQ, &com_evt_1_3, source.clone())
.map_err(Error::RegisterIrqfd)?;
irq_chip
.register_edge_irq_event(AARCH64_SERIAL_2_4_IRQ, &com_evt_2_4, source)
.map_err(Error::RegisterIrqfd)?;
mmio_bus
.insert(pci_bus, AARCH64_PCI_CFG_BASE, AARCH64_PCI_CFG_SIZE)
.map_err(Error::RegisterPci)?;
#[cfg(any(target_os = "android", target_os = "linux"))]
if !components.cpu_frequencies.is_empty() {
// TODO: Revisit and optimization after benchmarking
let socket = components
.virt_cpufreq_socket
.map(|s| Arc::new(Mutex::new(s)));
for vcpu in 0..vcpu_count {
let vcpu_affinity = match components.vcpu_affinity.clone() {
Some(VcpuAffinity::Global(v)) => v,
Some(VcpuAffinity::PerVcpu(mut m)) => m.remove(&vcpu).unwrap_or_default(),
None => panic!("vcpu_affinity needs to be set for VirtCpufreq"),
};
let virt_cpufreq = Arc::new(Mutex::new(VirtCpufreq::new(
vcpu_affinity[0].try_into().unwrap(),
socket.clone(),
)));
if vcpu as u64 * AARCH64_VIRTFREQ_SIZE + AARCH64_VIRTFREQ_SIZE
> AARCH64_VIRTFREQ_MAXSIZE
{
panic!("Exceeded maximum number of virt cpufreq devices");
}
mmio_bus
.insert(
virt_cpufreq,
AARCH64_VIRTFREQ_BASE + (vcpu as u64 * AARCH64_VIRTFREQ_SIZE),
AARCH64_VIRTFREQ_SIZE,
)
.map_err(Error::RegisterVirtCpufreq)?;
}
}
let mut cmdline = Self::get_base_linux_cmdline();
get_serial_cmdline(&mut cmdline, serial_parameters, "mmio", &serial_devices)
.map_err(Error::GetSerialCmdline)?;
for param in components.extra_kernel_params {
cmdline.insert_str(&param).map_err(Error::Cmdline)?;
}
if let Some(ramoops_region) = ramoops_region {
arch::pstore::add_ramoops_kernel_cmdline(&mut cmdline, &ramoops_region)
.map_err(Error::Cmdline)?;
}
let psci_version = vcpus[0].get_psci_version().map_err(Error::GetPsciVersion)?;
let pci_cfg = fdt::PciConfigRegion {
base: AARCH64_PCI_CFG_BASE,
size: AARCH64_PCI_CFG_SIZE,
};
let pci_ranges: Vec<fdt::PciRange> = system_allocator
.mmio_pools()
.iter()
.map(|range| fdt::PciRange {
space: fdt::PciAddressSpace::Memory64,
bus_address: range.start,
cpu_physical_address: range.start,
size: range.len().unwrap(),
prefetchable: false,
})
.collect();
let (bat_control, bat_mmio_base_and_irq) = match bat_type {
Some(BatteryType::Goldfish) => {
let bat_irq = AARCH64_BAT_IRQ;
// a dummy AML buffer. Aarch64 crosvm doesn't use ACPI.
let mut amls = Vec::new();
let (control_tube, mmio_base) = arch::sys::linux::add_goldfish_battery(
&mut amls,
bat_jail,
&mmio_bus,
irq_chip.as_irq_chip_mut(),
bat_irq,
system_allocator,
#[cfg(feature = "swap")]
swap_controller,
)
.map_err(Error::CreateBatDevices)?;
(
Some(BatControl {
type_: BatteryType::Goldfish,
control_tube,
}),
Some((mmio_base, bat_irq)),
)
}
None => (None, None),
};
let vmwdt_cfg = fdt::VmWdtConfig {
base: AARCH64_VMWDT_ADDR,
size: AARCH64_VMWDT_SIZE,
clock_hz: VMWDT_DEFAULT_CLOCK_HZ,
timeout_sec: VMWDT_DEFAULT_TIMEOUT_SEC,
};
fdt::create_fdt(
AARCH64_FDT_MAX_SIZE as usize,
&mem,
pci_irqs,
pci_cfg,
&pci_ranges,
dev_resources,
vcpu_count as u32,
&|n| get_vcpu_mpidr_aff(&vcpus, n),
components.cpu_clusters,
components.cpu_capacity,
components.cpu_frequencies,
fdt_offset,
cmdline.as_str(),
(payload.entry(), payload.size() as usize),
initrd,
components.android_fstab,
irq_chip.get_vgic_version() == DeviceKind::ArmVgicV3,
use_pmu,
psci_version,
components.swiotlb.map(|size| {
(
get_swiotlb_addr(components.memory_size, vm.get_hypervisor()),
size,
)
}),
bat_mmio_base_and_irq,
vmwdt_cfg,
dump_device_tree_blob,
&|writer, phandles| vm.create_fdt(writer, phandles),
components.dynamic_power_coefficient,
device_tree_overlays,
&serial_devices,
)
.map_err(Error::CreateFdt)?;
vm.init_arch(
payload.entry(),
fdt_offset,
AARCH64_FDT_MAX_SIZE.try_into().unwrap(),
)
.map_err(Error::InitVmError)?;
Ok(RunnableLinuxVm {
vm,
vcpu_count,
vcpus: Some(vcpus),
vcpu_init,
vcpu_affinity: components.vcpu_affinity,
no_smt: components.no_smt,
irq_chip: irq_chip.try_box_clone().map_err(Error::CloneIrqChip)?,
io_bus,
mmio_bus,
pid_debug_label_map,
suspend_evt,
rt_cpus: components.rt_cpus,
delay_rt: components.delay_rt,
bat_control,
#[cfg(feature = "gdb")]
gdb: components.gdb,
pm: None,
resume_notify_devices: Vec::new(),
root_config: pci_root,
platform_devices,
hotplug_bus: BTreeMap::new(),
devices_thread: None,
vm_request_tube: None,
})
}
fn configure_vcpu<V: Vm>(
_vm: &V,
_hypervisor: &dyn Hypervisor,
_irq_chip: &mut dyn IrqChipAArch64,
vcpu: &mut dyn VcpuAArch64,
vcpu_init: VcpuInitAArch64,
_vcpu_id: usize,
_num_cpus: usize,
_cpu_config: Option<CpuConfigAArch64>,
) -> std::result::Result<(), Self::Error> {
for (reg, value) in vcpu_init.regs.iter() {
vcpu.set_one_reg(*reg, *value).map_err(Error::SetReg)?;
}
Ok(())
}
fn register_pci_device<V: VmAArch64, Vcpu: VcpuAArch64>(
_linux: &mut RunnableLinuxVm<V, Vcpu>,
_device: Box<dyn PciDevice>,
_minijail: Option<Minijail>,
_resources: &mut SystemAllocator,
_tube: &mpsc::Sender<PciRootCommand>,
#[cfg(feature = "swap")] _swap_controller: &mut Option<swap::SwapController>,
) -> std::result::Result<PciAddress, Self::Error> {
// hotplug function isn't verified on AArch64, so set it unsupported here.
Err(Error::Unsupported)
}
fn get_host_cpu_frequencies_khz() -> std::result::Result<BTreeMap<usize, Vec<u32>>, Self::Error>
{
Ok(
Self::collect_for_each_cpu(base::logical_core_frequencies_khz)
.map_err(Error::CpuFrequencies)?
.into_iter()
.enumerate()
.collect(),
)
}
// Returns a (cpu_id -> value) map of the DMIPS/MHz capacities of logical cores
// in the host system.
fn get_host_cpu_capacity() -> std::result::Result<BTreeMap<usize, u32>, Self::Error> {
Ok(Self::collect_for_each_cpu(base::logical_core_capacity)
.map_err(Error::CpuTopology)?
.into_iter()
.enumerate()
.collect())
}
// Creates CPU cluster mask for each CPU in the host system.
fn get_host_cpu_clusters() -> std::result::Result<Vec<CpuSet>, Self::Error> {
let cpu_capacities =
Self::collect_for_each_cpu(base::logical_core_capacity).map_err(Error::CpuTopology)?;
let mut unique_caps = Vec::new();
let mut cluster_ids = Vec::new();
for capacity in cpu_capacities {
if !unique_caps.contains(&capacity) {
unique_caps.push(capacity);
}
let idx = unique_caps.iter().position(|&r| r == capacity).unwrap();
cluster_ids.push(idx);
}
let mut unique_clusters: Vec<CpuSet> = cluster_ids
.iter()
.map(|&vcpu_cluster_id| {
cluster_ids
.iter()
.enumerate()
.filter(|(_, &cpu_cluster_id)| vcpu_cluster_id == cpu_cluster_id)
.map(|(cpu_id, _)| cpu_id)
.collect()
})
.collect();
unique_clusters.sort_unstable();
unique_clusters.dedup();
Ok(unique_clusters)
}
}
#[cfg(feature = "gdb")]
impl<T: VcpuAArch64> arch::GdbOps<T> for AArch64 {
type Error = Error;
fn read_memory(
_vcpu: &T,
guest_mem: &GuestMemory,
vaddr: GuestAddress,
len: usize,
) -> Result<Vec<u8>> {
let mut buf = vec![0; len];
guest_mem
.read_exact_at_addr(&mut buf, vaddr)
.map_err(Error::ReadGuestMemory)?;
Ok(buf)
}
fn write_memory(
_vcpu: &T,
guest_mem: &GuestMemory,
vaddr: GuestAddress,
buf: &[u8],
) -> Result<()> {
guest_mem
.write_all_at_addr(buf, vaddr)
.map_err(Error::WriteGuestMemory)
}
fn read_registers(vcpu: &T) -> Result<<GdbArch as Arch>::Registers> {
let mut regs: <GdbArch as Arch>::Registers = Default::default();
vcpu.get_gdb_registers(&mut regs).map_err(Error::ReadRegs)?;
Ok(regs)
}
fn write_registers(vcpu: &T, regs: &<GdbArch as Arch>::Registers) -> Result<()> {
vcpu.set_gdb_registers(regs).map_err(Error::WriteRegs)
}
fn read_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId) -> Result<Vec<u8>> {
let mut reg = vec![0; std::mem::size_of::<u128>()];
let size = vcpu
.get_gdb_register(reg_id, reg.as_mut_slice())
.map_err(Error::ReadReg)?;
reg.truncate(size);
Ok(reg)
}
fn write_register(vcpu: &T, reg_id: <GdbArch as Arch>::RegId, data: &[u8]) -> Result<()> {
vcpu.set_gdb_register(reg_id, data).map_err(Error::WriteReg)
}
fn enable_singlestep(vcpu: &T) -> Result<()> {
const SINGLE_STEP: bool = true;
vcpu.set_guest_debug(&[], SINGLE_STEP)
.map_err(Error::EnableSinglestep)
}
fn get_max_hw_breakpoints(vcpu: &T) -> Result<usize> {
vcpu.get_max_hw_bps().map_err(Error::GetMaxHwBreakPoint)
}
fn set_hw_breakpoints(vcpu: &T, breakpoints: &[GuestAddress]) -> Result<()> {
const SINGLE_STEP: bool = false;
vcpu.set_guest_debug(breakpoints, SINGLE_STEP)
.map_err(Error::SetHwBreakpoint)
}
}
impl AArch64 {
/// This returns a base part of the kernel command for this architecture
fn get_base_linux_cmdline() -> kernel_cmdline::Cmdline {
let mut cmdline = kernel_cmdline::Cmdline::new(base::pagesize());
cmdline.insert_str("panic=-1").unwrap();
cmdline
}
/// Returns a system resource allocator configuration.
///
/// # Arguments
///
/// * `memory_end` - The first address beyond the end of guest memory.
/// * `guest_phys_addr_bits` - Size of guest physical addresses (IPA) in bits.
fn get_resource_allocator_config(
memory_end: GuestAddress,
guest_phys_addr_bits: u8,
) -> SystemAllocatorConfig {
let guest_phys_end = 1u64 << guest_phys_addr_bits;
// The platform MMIO region is immediately past the end of RAM.
let plat_mmio_base = memory_end.offset();
let plat_mmio_size = AARCH64_PLATFORM_MMIO_SIZE;
// The high MMIO region is the rest of the address space after the platform MMIO region.
let high_mmio_base = plat_mmio_base + plat_mmio_size;
let high_mmio_size = guest_phys_end
.checked_sub(high_mmio_base)
.unwrap_or_else(|| {
panic!(
"guest_phys_end {:#x} < high_mmio_base {:#x}",
guest_phys_end, high_mmio_base,
);
});
SystemAllocatorConfig {
io: None,
low_mmio: AddressRange::from_start_and_size(AARCH64_MMIO_BASE, AARCH64_MMIO_SIZE)
.expect("invalid mmio region"),
high_mmio: AddressRange::from_start_and_size(high_mmio_base, high_mmio_size)
.expect("invalid high mmio region"),
platform_mmio: Some(
AddressRange::from_start_and_size(plat_mmio_base, plat_mmio_size)
.expect("invalid platform mmio region"),
),
first_irq: AARCH64_IRQ_BASE,
}
}
/// This adds any early platform devices for this architecture.
///
/// # Arguments
///
/// * `irq_chip` - The IRQ chip to add irqs to.
/// * `bus` - The bus to add devices to.
/// * `vcpu_count` - The number of virtual CPUs for this guest VM
/// * `vm_evt_wrtube` - The notification channel
fn add_arch_devs(
irq_chip: &mut dyn IrqChip,
bus: &Bus,
vcpu_count: usize,
vm_evt_wrtube: &SendTube,
) -> Result<()> {
let rtc_evt = devices::IrqEdgeEvent::new().map_err(Error::CreateEvent)?;
let rtc = devices::pl030::Pl030::new(rtc_evt.try_clone().map_err(Error::CloneEvent)?);
irq_chip
.register_edge_irq_event(AARCH64_RTC_IRQ, &rtc_evt, IrqEventSource::from_device(&rtc))
.map_err(Error::RegisterIrqfd)?;
bus.insert(
Arc::new(Mutex::new(rtc)),
AARCH64_RTC_ADDR,
AARCH64_RTC_SIZE,
)
.expect("failed to add rtc device");
let vm_wdt = Arc::new(Mutex::new(
devices::vmwdt::Vmwdt::new(vcpu_count, vm_evt_wrtube.try_clone().unwrap()).unwrap(),
));
bus.insert(vm_wdt, AARCH64_VMWDT_ADDR, AARCH64_VMWDT_SIZE)
.expect("failed to add vmwdt device");
Ok(())
}
/// Get ARM-specific features for vcpu with index `vcpu_id`.
///
/// # Arguments
///
/// * `vcpu_id` - The VM's index for `vcpu`.
/// * `use_pmu` - Should `vcpu` be configured to use the Performance Monitor Unit.
fn vcpu_features(vcpu_id: usize, use_pmu: bool, boot_cpu: usize) -> Vec<VcpuFeature> {
let mut features = vec![VcpuFeature::PsciV0_2];
if use_pmu {
features.push(VcpuFeature::PmuV3);
}
// Non-boot cpus are powered off initially
if vcpu_id != boot_cpu {
features.push(VcpuFeature::PowerOff);
}
features
}
/// Get initial register state for vcpu with index `vcpu_id`.
///
/// # Arguments
///
/// * `vcpu_id` - The VM's index for `vcpu`.
fn vcpu_init(
vcpu_id: usize,
payload: &PayloadType,
fdt_address: GuestAddress,
protection_type: ProtectionType,
boot_cpu: usize,
) -> VcpuInitAArch64 {
let mut regs: BTreeMap<VcpuRegAArch64, u64> = Default::default();
// All interrupts masked
let pstate = PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1H;
regs.insert(VcpuRegAArch64::Pstate, pstate);
// Other cpus are powered off initially
if vcpu_id == boot_cpu {
let entry_addr = if protection_type.loads_firmware() {
Some(AARCH64_PROTECTED_VM_FW_START)
} else if protection_type.runs_firmware() {
None // Initial PC value is set by the hypervisor
} else {
Some(payload.entry().offset())
};
/* PC -- entry point */
if let Some(entry) = entry_addr {
regs.insert(VcpuRegAArch64::Pc, entry);
}
/* X0 -- fdt address */
regs.insert(VcpuRegAArch64::X(0), fdt_address.offset());
if protection_type.runs_firmware() {
/* X1 -- payload entry point */
regs.insert(VcpuRegAArch64::X(1), payload.entry().offset());
/* X2 -- image size */
regs.insert(VcpuRegAArch64::X(2), payload.size());
}
}
VcpuInitAArch64 { regs }
}
fn collect_for_each_cpu<F, T>(func: F) -> std::result::Result<Vec<T>, base::Error>
where
F: Fn(usize) -> std::result::Result<T, base::Error>,
{
(0..base::number_of_logical_cores()?).map(func).collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn vcpu_init_unprotected_kernel() {
let payload = PayloadType::Kernel(LoadedKernel {
address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
size: 0x1000,
entry: GuestAddress(0x8080_0000),
});
let fdt_address = GuestAddress(0x1234);
let prot = ProtectionType::Unprotected;
let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
// PC: kernel image entry point
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8080_0000));
// X0: fdt_offset
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
}
#[test]
fn vcpu_init_unprotected_bios() {
let payload = PayloadType::Bios {
entry: GuestAddress(0x8020_0000),
image_size: 0x1000,
};
let fdt_address = GuestAddress(0x1234);
let prot = ProtectionType::Unprotected;
let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
// PC: bios image entry point
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), Some(&0x8020_0000));
// X0: fdt_offset
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
}
#[test]
fn vcpu_init_protected_kernel() {
let payload = PayloadType::Kernel(LoadedKernel {
address_range: AddressRange::from_start_and_size(0x8080_0000, 0x1000).unwrap(),
size: 0x1000,
entry: GuestAddress(0x8080_0000),
});
let fdt_address = GuestAddress(0x1234);
let prot = ProtectionType::Protected;
let vcpu_init = AArch64::vcpu_init(0, &payload, fdt_address, prot, 0);
// The hypervisor provides the initial value of PC, so PC should not be present in the
// vcpu_init register map.
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::Pc), None);
// X0: fdt_offset
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(0)), Some(&0x1234));
// X1: kernel image entry point
assert_eq!(
vcpu_init.regs.get(&VcpuRegAArch64::X(1)),
Some(&0x8080_0000)
);
// X2: image size
assert_eq!(vcpu_init.regs.get(&VcpuRegAArch64::X(2)), Some(&0x1000));
}
}