blob: 757ab139391fc07888fb5c31f69e352f997bca1b [file] [log] [blame]
// Copyright 2021 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use std::os::unix::fs::OpenOptionsExt;
use std::{
convert::{self, TryFrom, TryInto},
fs::{File, OpenOptions},
mem::size_of,
num::Wrapping,
os::unix::net::UnixListener,
path::Path,
str,
sync::{Arc, Mutex as StdMutex},
};
use anyhow::{bail, Context};
use argh::FromArgs;
use base::{
clear_fd_flags, error, info, AsRawDescriptor, Event, FromRawDescriptor, IntoRawDescriptor,
SafeDescriptor, UnlinkUnixListener,
};
use cros_async::{AsyncWrapper, EventAsync, Executor};
use data_model::{DataInit, Le64};
use hypervisor::ProtectionType;
use sync::Mutex;
use vhost::{self, Vhost, Vsock};
use vm_memory::GuestMemory;
use vmm_vhost::{
connection::vfio::{Endpoint as VfioEndpoint, Listener as VfioListener},
message::{
VhostUserConfigFlags, VhostUserInflight, VhostUserMemoryRegion, VhostUserProtocolFeatures,
VhostUserSingleMemoryRegion, VhostUserVirtioFeatures, VhostUserVringAddrFlags,
VhostUserVringState,
},
Error, Result, SlaveReqHandler, VhostUserSlaveReqHandlerMut,
};
use vmm_vhost::{Protocol, SlaveListener};
use crate::{
vfio::VfioRegionAddr,
virtio::{
base_features,
vhost::{
user::device::{
handler::{
create_guest_memory, create_vvu_guest_memory, vmm_va_to_gpa, MappingInfo,
},
vvu::{doorbell::DoorbellRegion, pci::VvuPciDevice, VvuDevice},
},
vsock,
},
Queue, SignalableInterrupt,
},
};
const MAX_VRING_LEN: u16 = vsock::QUEUE_SIZE;
const NUM_QUEUES: usize = vsock::QUEUE_SIZES.len();
const EVENT_QUEUE: usize = NUM_QUEUES - 1;
struct VsockBackend {
ex: Executor,
handle: Vsock,
cid: u64,
features: u64,
vvu_device: Option<Arc<Mutex<VvuPciDevice>>>,
protocol_features: VhostUserProtocolFeatures,
mem: Option<GuestMemory>,
vmm_maps: Option<Vec<MappingInfo>>,
queues: [Queue; NUM_QUEUES],
// Only used for vvu device mode.
call_evts: [Option<Arc<Mutex<DoorbellRegion>>>; NUM_QUEUES],
}
impl VsockBackend {
fn new<P: AsRef<Path>>(
ex: &Executor,
cid: u64,
vhost_socket: P,
vvu_device: Option<Arc<Mutex<VvuPciDevice>>>,
) -> anyhow::Result<VsockBackend> {
let handle = Vsock::new(
OpenOptions::new()
.read(true)
.write(true)
.custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
.open(vhost_socket)
.context("failed to open `Vsock` socket")?,
);
let features = handle.get_features().context("failed to get features")?;
let protocol_features = VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::CONFIG;
Ok(VsockBackend {
ex: ex.clone(),
handle,
cid,
features,
vvu_device,
protocol_features,
mem: None,
vmm_maps: None,
queues: [
Queue::new(MAX_VRING_LEN),
Queue::new(MAX_VRING_LEN),
Queue::new(MAX_VRING_LEN),
],
call_evts: Default::default(),
})
}
}
fn convert_vhost_error(err: vhost::Error) -> Error {
use vhost::Error::*;
match err {
IoctlError(e) => Error::ReqHandlerError(e),
_ => Error::SlaveInternalError,
}
}
impl VhostUserSlaveReqHandlerMut for VsockBackend {
fn protocol(&self) -> Protocol {
if self.vvu_device.is_some() {
Protocol::Virtio
} else {
Protocol::Regular
}
}
fn set_owner(&mut self) -> Result<()> {
self.handle.set_owner().map_err(convert_vhost_error)
}
fn reset_owner(&mut self) -> Result<()> {
self.handle.reset_owner().map_err(convert_vhost_error)
}
fn get_features(&mut self) -> Result<u64> {
let features = base_features(ProtectionType::Unprotected)
| self.features
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits();
Ok(features)
}
fn set_features(&mut self, features: u64) -> Result<()> {
self.handle
.set_features(features & self.features)
.map_err(convert_vhost_error)
}
fn get_protocol_features(&mut self) -> Result<VhostUserProtocolFeatures> {
Ok(self.protocol_features)
}
fn set_protocol_features(&mut self, features: u64) -> Result<()> {
let unrequested_features = features & !self.protocol_features.bits();
if unrequested_features != 0 {
Err(Error::InvalidParam)
} else {
Ok(())
}
}
fn set_mem_table(
&mut self,
contexts: &[VhostUserMemoryRegion],
files: Vec<File>,
) -> Result<()> {
let (guest_mem, vmm_maps) = match self.vvu_device.as_ref() {
None => create_guest_memory(contexts, files)?,
Some(dev) => {
// virtio-vhost-user doesn't pass FDs.
if !files.is_empty() {
return Err(Error::InvalidParam);
}
let device = dev.lock();
create_vvu_guest_memory(
&device.vfio_dev,
device.caps.shared_mem_cfg_addr(),
contexts,
)?
}
};
self.handle
.set_mem_table(&guest_mem)
.map_err(convert_vhost_error)?;
self.mem = Some(guest_mem);
self.vmm_maps = Some(vmm_maps);
Ok(())
}
fn get_queue_num(&mut self) -> Result<u64> {
Ok(NUM_QUEUES as u64)
}
fn set_vring_num(&mut self, index: u32, num: u32) -> Result<()> {
if index >= NUM_QUEUES as u32 || num == 0 || num > vsock::QUEUE_SIZE.into() {
return Err(Error::InvalidParam);
}
// We checked these values already.
let index = index as usize;
let num = num as u16;
self.queues[index].size = num;
// The last vq is an event-only vq that is not handled by the kernel.
if index == EVENT_QUEUE {
return Ok(());
}
self.handle
.set_vring_num(index, num)
.map_err(convert_vhost_error)
}
fn set_vring_addr(
&mut self,
index: u32,
flags: VhostUserVringAddrFlags,
descriptor: u64,
used: u64,
available: u64,
log: u64,
) -> Result<()> {
if index >= NUM_QUEUES as u32 {
return Err(Error::InvalidParam);
}
let index = index as usize;
let mem = self.mem.as_ref().ok_or(Error::InvalidParam)?;
let maps = self.vmm_maps.as_ref().ok_or(Error::InvalidParam)?;
let mut queue = &mut self.queues[index];
queue.desc_table = vmm_va_to_gpa(maps, descriptor)?;
queue.avail_ring = vmm_va_to_gpa(maps, available)?;
queue.used_ring = vmm_va_to_gpa(maps, used)?;
let log_addr = if flags.contains(VhostUserVringAddrFlags::VHOST_VRING_F_LOG) {
vmm_va_to_gpa(maps, log).map(Some)?
} else {
None
};
if index == EVENT_QUEUE {
return Ok(());
}
self.handle
.set_vring_addr(
mem,
queue.max_size,
queue.actual_size(),
index,
flags.bits(),
queue.desc_table,
queue.used_ring,
queue.avail_ring,
log_addr,
)
.map_err(convert_vhost_error)
}
fn set_vring_base(&mut self, index: u32, base: u32) -> Result<()> {
if index >= NUM_QUEUES as u32 || base >= vsock::QUEUE_SIZE.into() {
return Err(Error::InvalidParam);
}
let index = index as usize;
let base = base as u16;
let mut queue = &mut self.queues[index];
queue.next_avail = Wrapping(base);
queue.next_used = Wrapping(base);
if index == EVENT_QUEUE {
return Ok(());
}
self.handle
.set_vring_base(index, base)
.map_err(convert_vhost_error)
}
fn get_vring_base(&mut self, index: u32) -> Result<VhostUserVringState> {
if index >= NUM_QUEUES as u32 {
return Err(Error::InvalidParam);
}
let index = index as usize;
let next_avail = if index == EVENT_QUEUE {
self.queues[index].next_avail.0
} else {
self.handle
.get_vring_base(index)
.map_err(convert_vhost_error)?
};
Ok(VhostUserVringState::new(index as u32, next_avail.into()))
}
fn set_vring_kick(&mut self, index: u8, fd: Option<File>) -> Result<()> {
if index >= NUM_QUEUES as u8 {
return Err(Error::InvalidParam);
}
let index = usize::from(index);
let event = match self.vvu_device.as_ref() {
Some(dev) => {
if fd.is_some() {
return Err(Error::InvalidParam);
}
let queue = &mut self.queues[index];
if queue.ready {
error!("kick fd cannot replaced after queue is started");
return Err(Error::InvalidOperation);
}
let kick_evt = dev.lock().notification_evts[index]
.try_clone()
.map_err(|e| {
error!("failed to clone notification_evts[{}]: {}", index, e);
Error::InvalidOperation
})?;
kick_evt
}
None => {
let file = fd.ok_or(Error::InvalidParam)?;
// Safe because the descriptor is uniquely owned by `file`.
let event = unsafe { Event::from_raw_descriptor(file.into_raw_descriptor()) };
// Remove O_NONBLOCK from the kick fd.
if let Err(e) = clear_fd_flags(event.as_raw_descriptor(), libc::O_NONBLOCK) {
error!("failed to remove O_NONBLOCK for kick fd: {}", e);
return Err(Error::InvalidParam);
}
event
}
};
if index != EVENT_QUEUE {
self.handle
.set_vring_kick(index, &event)
.map_err(convert_vhost_error)?;
}
Ok(())
}
fn set_vring_call(&mut self, index: u8, fd: Option<File>) -> Result<()> {
if index >= NUM_QUEUES as u8 {
return Err(Error::InvalidParam);
}
let index = usize::from(index);
let event = match self.vvu_device.as_ref() {
Some(dev) => {
let dev = dev.lock();
let vfio = Arc::clone(&dev.vfio_dev);
let base = dev.caps.doorbell_base_addr();
let addr = VfioRegionAddr {
index: base.index,
addr: base.addr + (index as u64 * dev.caps.doorbell_off_multiplier() as u64),
};
let doorbell = DoorbellRegion {
vfio,
index: index as u8,
addr,
};
let call_evt = match self.call_evts[index].as_ref() {
None => {
let evt = Arc::new(Mutex::new(doorbell));
self.call_evts[index] = Some(evt.clone());
evt
}
Some(evt) => {
*evt.lock() = doorbell;
evt.clone()
}
};
let kernel_evt = Event::new().map_err(|_| Error::SlaveInternalError)?;
let task_evt = EventAsync::new(
kernel_evt.try_clone().expect("failed to clone event").0,
&self.ex,
)
.map_err(|_| Error::SlaveInternalError)?;
self.ex
.spawn_local(async move {
loop {
let _ = task_evt
.next_val()
.await
.expect("failed to wait for event fd");
call_evt.signal_used_queue(index as u16);
}
})
.detach();
kernel_evt
}
None => {
let file = fd.ok_or(Error::InvalidParam)?;
// Safe because the descriptor is uniquely owned by `file`.
unsafe { Event::from_raw_descriptor(file.into_raw_descriptor()) }
}
};
if index != EVENT_QUEUE {
self.handle
.set_vring_call(index, &event)
.map_err(convert_vhost_error)?;
}
Ok(())
}
fn set_vring_err(&mut self, index: u8, fd: Option<File>) -> Result<()> {
if index >= NUM_QUEUES as u8 {
return Err(Error::InvalidParam);
}
let index = usize::from(index);
let file = fd.ok_or(Error::InvalidParam)?;
// Safe because the descriptor is uniquely owned by `file`.
let event = unsafe { Event::from_raw_descriptor(file.into_raw_descriptor()) };
if index == EVENT_QUEUE {
return Ok(());
}
self.handle
.set_vring_err(index, &event)
.map_err(convert_vhost_error)
}
fn set_vring_enable(&mut self, index: u32, enable: bool) -> Result<()> {
if index >= NUM_QUEUES as u32 {
return Err(Error::InvalidParam);
}
self.queues[index as usize].ready = enable;
if index == (EVENT_QUEUE) as u32 {
return Ok(());
}
if self.queues[..EVENT_QUEUE].iter().all(|q| q.ready) {
// All queues are ready. Start the device.
self.handle.set_cid(self.cid).map_err(convert_vhost_error)?;
self.handle.start().map_err(convert_vhost_error)
} else if !enable {
// If we just disabled a vring then stop the device.
self.handle.stop().map_err(convert_vhost_error)
} else {
Ok(())
}
}
fn get_config(
&mut self,
offset: u32,
size: u32,
_flags: VhostUserConfigFlags,
) -> Result<Vec<u8>> {
let start: usize = offset.try_into().map_err(|_| Error::InvalidParam)?;
let end: usize = offset
.checked_add(size)
.and_then(|e| e.try_into().ok())
.ok_or(Error::InvalidParam)?;
if start >= size_of::<Le64>() || end > size_of::<Le64>() {
return Err(Error::InvalidParam);
}
Ok(Le64::from(self.cid).as_slice()[start..end].to_vec())
}
fn set_config(
&mut self,
_offset: u32,
_buf: &[u8],
_flags: VhostUserConfigFlags,
) -> Result<()> {
Err(Error::InvalidOperation)
}
fn set_slave_req_fd(&mut self, _vu_req: File) {}
fn get_inflight_fd(
&mut self,
_inflight: &VhostUserInflight,
) -> Result<(VhostUserInflight, File)> {
Err(Error::InvalidOperation)
}
fn set_inflight_fd(&mut self, _inflight: &VhostUserInflight, _file: File) -> Result<()> {
Err(Error::InvalidOperation)
}
fn get_max_mem_slots(&mut self) -> Result<u64> {
Err(Error::InvalidOperation)
}
fn add_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion, _fd: File) -> Result<()> {
Err(Error::InvalidOperation)
}
fn remove_mem_region(&mut self, _region: &VhostUserSingleMemoryRegion) -> Result<()> {
Err(Error::InvalidOperation)
}
}
async fn run_device<P: AsRef<Path>>(
ex: &Executor,
socket: P,
backend: Arc<StdMutex<VsockBackend>>,
) -> anyhow::Result<()> {
let listener = UnixListener::bind(socket)
.map(UnlinkUnixListener)
.context("failed to bind socket")?;
let (socket, _) = ex
.spawn_blocking(move || listener.accept())
.await
.context("failed to accept socket connection")?;
let mut req_handler = SlaveReqHandler::from_stream(socket, backend);
let h = SafeDescriptor::try_from(&req_handler as &dyn AsRawDescriptor)
.map(AsyncWrapper::new)
.expect("failed to get safe descriptor for handler");
let handler_source = ex.async_from(h).context("failed to create async handler")?;
loop {
handler_source
.wait_readable()
.await
.context("failed to wait for vhost socket to become readable")?;
match req_handler.handle_request() {
Ok(()) => (),
Err(Error::Disconnect) => {
info!("vhost-user connection closed");
// Exit as the client closed the connection.
return Ok(());
}
Err(e) => {
bail!("failed to handle a vhost-user request: {}", e);
}
};
}
}
#[derive(FromArgs)]
#[argh(description = "")]
struct Options {
#[argh(
option,
description = "path to bind a listening vhost-user socket",
arg_name = "PATH"
)]
socket: Option<String>,
#[argh(option, description = "name of vfio pci device", arg_name = "STRING")]
vfio: Option<String>,
#[argh(
option,
description = "the vsock context id for this device",
arg_name = "INT"
)]
cid: u64,
#[argh(
option,
description = "path to the vhost-vsock control socket",
default = "String::from(\"/dev/vhost-vsock\")",
arg_name = "PATH"
)]
vhost_socket: String,
}
fn run_vvu_device<P: AsRef<Path>>(
ex: &Executor,
cid: u64,
vhost_socket: P,
device_name: &str,
) -> anyhow::Result<()> {
let device = VvuPciDevice::new(device_name, NUM_QUEUES)
.map(Mutex::new)
.map(Arc::new)
.context("failed to create `VvuPciDevice`")?;
let driver = VvuDevice::new(device.clone());
let backend = VsockBackend::new(ex, cid, vhost_socket, Some(device))
.map(StdMutex::new)
.map(Arc::new)
.context("failed to create `VsockBackend`")?;
let mut listener = VfioListener::new(driver)
.context("failed to create `VfioListener`")
.and_then(|l| {
SlaveListener::<VfioEndpoint<_, _>, _>::new(l, backend)
.context("failed to create `SlaveListener`")
})?;
let mut req_handler = listener
.accept()
.context("failed to accept vfio connection")?
.expect("no incoming connection detected");
let h = SafeDescriptor::try_from(&req_handler as &dyn AsRawDescriptor)
.map(AsyncWrapper::new)
.expect("failed to get safe descriptor for handler");
let handler_source = ex
.async_from(h)
.context("failed to create async handler source")?;
let done = async move {
loop {
let count = handler_source
.read_u64()
.await
.context("failed to wait for handler source")?;
for _ in 0..count {
req_handler
.handle_request()
.context("failed to handle request")?;
}
}
};
match ex.run_until(done) {
Ok(Ok(())) => Ok(()),
Ok(Err(e)) => Err(e),
Err(e) => Err(e).context("executor error"),
}
}
/// Returns an error if the given `args` is invalid or the device fails to run.
pub fn run_vsock_device(program_name: &str, args: &[&str]) -> anyhow::Result<()> {
let opts = match Options::from_args(&[program_name], args) {
Ok(opts) => opts,
Err(e) => {
if e.status.is_err() {
bail!(e.output);
} else {
println!("{}", e.output);
}
return Ok(());
}
};
let ex = Executor::new().context("failed to create executor")?;
match (opts.socket, opts.vfio) {
(Some(socket), None) => {
let backend = VsockBackend::new(&ex, opts.cid, opts.vhost_socket, None)
.map(StdMutex::new)
.map(Arc::new)?;
// TODO: Replace the `and_then` with `Result::flatten` once it is stabilized.
ex.run_until(run_device(&ex, socket, backend))
.context("failed to run vsock device")
.and_then(convert::identity)
}
(None, Some(device_name)) => run_vvu_device(&ex, opts.cid, opts.vhost_socket, &device_name),
_ => bail!("Exactly one of `--socket` or `--vfio` is required"),
}
}