blob: 919a31776ddde17390541da50be3a970744d6b60 [file] [log] [blame]
// Copyright (C) 2019 Alibaba Cloud. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 or BSD-3-Clause
//! Virtio Vhost Backend Drivers
//!
//! Virtio devices use virtqueues to transport data efficiently. The first generation of virtqueue
//! is a set of three different single-producer, single-consumer ring structures designed to store
//! generic scatter-gather I/O. The virtio specification 1.1 introduces an alternative compact
//! virtqueue layout named "Packed Virtqueue", which is more friendly to memory cache system and
//! hardware implemented virtio devices. The packed virtqueue uses read-write memory, that means
//! the memory will be both read and written by both host and guest. The new Packed Virtqueue is
//! preferred for performance.
//!
//! Vhost is a mechanism to improve performance of Virtio devices by delegate data plane operations
//! to dedicated IO service processes. Only the configuration, I/O submission notification, and I/O
//! completion interruption are piped through the hypervisor.
//! It uses the same virtqueue layout as Virtio to allow Vhost devices to be mapped directly to
//! Virtio devices. This allows a Vhost device to be accessed directly by a guest OS inside a
//! hypervisor process with an existing Virtio (PCI) driver.
//!
//! The initial vhost implementation is a part of the Linux kernel and uses ioctl interface to
//! communicate with userspace applications. Dedicated kernel worker threads are created to handle
//! IO requests from the guest.
//!
//! Later Vhost-user protocol is introduced to complement the ioctl interface used to control the
//! vhost implementation in the Linux kernel. It implements the control plane needed to establish
//! virtqueues sharing with a user space process on the same host. It uses communication over a
//! Unix domain socket to share file descriptors in the ancillary data of the message.
//! The protocol defines 2 sides of the communication, master and slave. Master is the application
//! that shares its virtqueues. Slave is the consumer of the virtqueues. Master and slave can be
//! either a client (i.e. connecting) or server (listening) in the socket communication.
#![deny(missing_docs)]
#[cfg(any(feature = "vmm", feature = "device"))]
use std::fs::File;
use std::io::Error as IOError;
use remain::sorted;
use thiserror::Error as ThisError;
mod backend;
pub use backend::*;
pub mod message;
pub mod connection;
#[cfg(feature = "vmm")]
mod master;
#[cfg(feature = "vmm")]
pub use self::master::{Master, VhostUserMaster};
#[cfg(feature = "vmm")]
mod master_req_handler;
#[cfg(feature = "vmm")]
pub use self::master_req_handler::{
MasterReqHandler, VhostUserMasterReqHandler, VhostUserMasterReqHandlerMut,
};
#[cfg(feature = "device")]
mod slave;
#[cfg(feature = "device")]
pub use self::slave::SlaveListener;
#[cfg(feature = "device")]
mod slave_req_handler;
#[cfg(feature = "device")]
pub use self::slave_req_handler::{
Protocol, SlaveReqHandler, SlaveReqHelper, VhostUserSlaveReqHandler,
VhostUserSlaveReqHandlerMut,
};
#[cfg(feature = "device")]
mod slave_fs_cache;
#[cfg(feature = "device")]
pub use self::slave_fs_cache::SlaveFsCacheReq;
/// Errors for vhost-user operations
#[sorted]
#[derive(Debug, ThisError)]
pub enum Error {
/// Virtio/protocol features mismatch.
#[error("virtio features mismatch")]
FeatureMismatch,
/// Fd array in question is too big or too small
#[error("wrong number of attached fds")]
IncorrectFds,
/// Invalid message format, flag or content.
#[error("invalid message")]
InvalidMessage,
/// Unsupported operations due to that the protocol feature hasn't been negotiated.
#[error("invalid operation")]
InvalidOperation,
/// Invalid parameters.
#[error("invalid parameters")]
InvalidParam,
/// Failure from the master side.
#[error("master Internal error")]
MasterInternalError,
/// Message is too large
#[error("oversized message")]
OversizedMsg,
/// Only part of a message have been sent or received successfully
#[error("partial message")]
PartialMessage,
/// Error from request handler
#[error("handler failed to handle request: {0}")]
ReqHandlerError(IOError),
/// Failure from the slave side.
#[error("slave internal error")]
SlaveInternalError,
/// The socket is broken or has been closed.
#[error("socket is broken: {0}")]
SocketBroken(std::io::Error),
/// Can't connect to peer.
#[error("can't connect to peer: {0}")]
SocketConnect(std::io::Error),
/// Generic socket errors.
#[error("socket error: {0}")]
SocketError(std::io::Error),
/// Should retry the socket operation again.
#[error("temporary socket error: {0}")]
SocketRetry(std::io::Error),
/// Error from VFIO device.
#[error("error occurred in VFIO device: {0}")]
VfioDeviceError(anyhow::Error),
}
impl Error {
/// Determine whether to rebuild the underline communication channel.
pub fn should_reconnect(&self) -> bool {
match *self {
// Should reconnect because it may be caused by temporary network errors.
Error::PartialMessage => true,
// Should reconnect because the underline socket is broken.
Error::SocketBroken(_) => true,
// Slave internal error, hope it recovers on reconnect.
Error::SlaveInternalError => true,
// Master internal error, hope it recovers on reconnect.
Error::MasterInternalError => true,
// Should just retry the IO operation instead of rebuilding the underline connection.
Error::SocketRetry(_) => false,
Error::InvalidParam | Error::InvalidOperation => false,
Error::InvalidMessage | Error::IncorrectFds | Error::OversizedMsg => false,
Error::SocketError(_) | Error::SocketConnect(_) => false,
Error::FeatureMismatch => false,
Error::ReqHandlerError(_) => false,
Error::VfioDeviceError(_) => false,
}
}
}
impl std::convert::From<sys_util::Error> for Error {
/// Convert raw socket errors into meaningful vhost-user errors.
///
/// The sys_util::Error is a simple wrapper over the raw errno, which doesn't means
/// much to the vhost-user connection manager. So convert it into meaningful errors to simplify
/// the connection manager logic.
///
/// # Return:
/// * - Error::SocketRetry: temporary error caused by signals or short of resources.
/// * - Error::SocketBroken: the underline socket is broken.
/// * - Error::SocketError: other socket related errors.
#[allow(unreachable_patterns)] // EWOULDBLOCK equals to EGAIN on linux
fn from(err: sys_util::Error) -> Self {
match err.errno() {
// Retry:
// * EAGAIN, EWOULDBLOCK: The socket is marked nonblocking and the requested operation
// would block.
// * EINTR: A signal occurred before any data was transmitted
// * ENOBUFS: The output queue for a network interface was full. This generally
// indicates that the interface has stopped sending, but may be caused by transient
// congestion.
// * ENOMEM: No memory available.
libc::EAGAIN | libc::EWOULDBLOCK | libc::EINTR | libc::ENOBUFS | libc::ENOMEM => {
Error::SocketRetry(err.into())
}
// Broken:
// * ECONNRESET: Connection reset by peer.
// * EPIPE: The local end has been shut down on a connection oriented socket. In this
// case the process will also receive a SIGPIPE unless MSG_NOSIGNAL is set.
libc::ECONNRESET | libc::EPIPE => Error::SocketBroken(err.into()),
// Write permission is denied on the destination socket file, or search permission is
// denied for one of the directories the path prefix.
libc::EACCES => Error::SocketConnect(IOError::from_raw_os_error(libc::EACCES)),
// Catch all other errors
e => Error::SocketError(IOError::from_raw_os_error(e)),
}
}
}
/// Result of vhost-user operations
pub type Result<T> = std::result::Result<T, Error>;
/// Result of request handler.
pub type HandlerResult<T> = std::result::Result<T, IOError>;
/// Utility function to take the first element from option of a vector of files.
/// Returns `None` if the vector contains no file or more than one file.
#[cfg(any(feature = "vmm", feature = "device"))]
pub(crate) fn take_single_file(files: Option<Vec<File>>) -> Option<File> {
let mut files = files?;
if files.len() != 1 {
return None;
}
Some(files.swap_remove(0))
}
#[cfg(all(test, feature = "device"))]
mod dummy_slave;
#[cfg(all(test, feature = "vmm", feature = "device"))]
mod tests {
use std::os::unix::io::AsRawFd;
use std::path::Path;
use std::sync::{Arc, Barrier, Mutex};
use std::thread;
use super::connection::socket::{Endpoint, Listener};
use super::dummy_slave::{DummySlaveReqHandler, VIRTIO_FEATURES};
use super::message::*;
use super::*;
use crate::backend::VhostBackend;
use crate::{VhostUserMemoryRegionInfo, VringConfigData};
use base::AsRawDescriptor;
use tempfile::{tempfile, Builder, TempDir};
fn temp_dir() -> TempDir {
Builder::new().prefix("/tmp/vhost_test").tempdir().unwrap()
}
fn create_slave<P, S>(
path: P,
backend: Arc<S>,
) -> (
Master<Endpoint<MasterReq>>,
SlaveReqHandler<S, Endpoint<MasterReq>>,
)
where
P: AsRef<Path>,
S: VhostUserSlaveReqHandler,
{
let listener = Listener::new(&path, true).unwrap();
let mut slave_listener = SlaveListener::new(listener, backend).unwrap();
let master = Master::connect(&path, 1).unwrap();
(master, slave_listener.accept().unwrap().unwrap())
}
#[test]
fn create_dummy_slave() {
let slave = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
slave.set_owner().unwrap();
assert!(slave.set_owner().is_err());
}
#[test]
fn test_set_owner() {
let slave_be = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
let dir = temp_dir();
let mut path = dir.path().to_owned();
path.push("sock");
let (master, mut slave) = create_slave(&path, slave_be.clone());
assert!(!slave_be.lock().unwrap().owned);
master.set_owner().unwrap();
slave.handle_request().unwrap();
assert!(slave_be.lock().unwrap().owned);
master.set_owner().unwrap();
assert!(slave.handle_request().is_err());
assert!(slave_be.lock().unwrap().owned);
}
#[test]
fn test_set_features() {
let mbar = Arc::new(Barrier::new(2));
let sbar = mbar.clone();
let dir = temp_dir();
let mut path = dir.path().to_owned();
path.push("sock");
let slave_be = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
let (mut master, mut slave) = create_slave(&path, slave_be.clone());
thread::spawn(move || {
slave.handle_request().unwrap();
assert!(slave_be.lock().unwrap().owned);
slave.handle_request().unwrap();
slave.handle_request().unwrap();
assert_eq!(
slave_be.lock().unwrap().acked_features,
VIRTIO_FEATURES & !0x1
);
slave.handle_request().unwrap();
slave.handle_request().unwrap();
assert_eq!(
slave_be.lock().unwrap().acked_protocol_features,
VhostUserProtocolFeatures::all().bits()
);
sbar.wait();
});
master.set_owner().unwrap();
// set virtio features
let features = master.get_features().unwrap();
assert_eq!(features, VIRTIO_FEATURES);
master.set_features(VIRTIO_FEATURES & !0x1).unwrap();
// set vhost protocol features
let features = master.get_protocol_features().unwrap();
assert_eq!(features.bits(), VhostUserProtocolFeatures::all().bits());
master.set_protocol_features(features).unwrap();
mbar.wait();
}
#[test]
fn test_master_slave_process() {
let mbar = Arc::new(Barrier::new(2));
let sbar = mbar.clone();
let dir = temp_dir();
let mut path = dir.path().to_owned();
path.push("sock");
let slave_be = Arc::new(Mutex::new(DummySlaveReqHandler::new()));
let (mut master, mut slave) = create_slave(&path, slave_be.clone());
thread::spawn(move || {
// set_own()
slave.handle_request().unwrap();
assert!(slave_be.lock().unwrap().owned);
// get/set_features()
slave.handle_request().unwrap();
slave.handle_request().unwrap();
assert_eq!(
slave_be.lock().unwrap().acked_features,
VIRTIO_FEATURES & !0x1
);
slave.handle_request().unwrap();
slave.handle_request().unwrap();
assert_eq!(
slave_be.lock().unwrap().acked_protocol_features,
VhostUserProtocolFeatures::all().bits()
);
// get_inflight_fd()
slave.handle_request().unwrap();
// set_inflight_fd()
slave.handle_request().unwrap();
// get_queue_num()
slave.handle_request().unwrap();
// set_mem_table()
slave.handle_request().unwrap();
// get/set_config()
slave.handle_request().unwrap();
slave.handle_request().unwrap();
// set_slave_request_fd
slave.handle_request().unwrap();
// set_vring_enable
slave.handle_request().unwrap();
// set_log_base,set_log_fd()
slave.handle_request().unwrap_err();
slave.handle_request().unwrap_err();
// set_vring_xxx
slave.handle_request().unwrap();
slave.handle_request().unwrap();
slave.handle_request().unwrap();
slave.handle_request().unwrap();
slave.handle_request().unwrap();
slave.handle_request().unwrap();
// get_max_mem_slots()
slave.handle_request().unwrap();
// add_mem_region()
slave.handle_request().unwrap();
// remove_mem_region()
slave.handle_request().unwrap();
sbar.wait();
});
master.set_owner().unwrap();
// set virtio features
let features = master.get_features().unwrap();
assert_eq!(features, VIRTIO_FEATURES);
master.set_features(VIRTIO_FEATURES & !0x1).unwrap();
// set vhost protocol features
let features = master.get_protocol_features().unwrap();
assert_eq!(features.bits(), VhostUserProtocolFeatures::all().bits());
master.set_protocol_features(features).unwrap();
// Retrieve inflight I/O tracking information
let (inflight_info, inflight_file) = master
.get_inflight_fd(&VhostUserInflight {
num_queues: 2,
queue_size: 256,
..Default::default()
})
.unwrap();
// Set the buffer back to the backend
master
.set_inflight_fd(&inflight_info, inflight_file.as_raw_fd())
.unwrap();
let num = master.get_queue_num().unwrap();
assert_eq!(num, 2);
let eventfd = base::Event::new().unwrap();
let mem = [VhostUserMemoryRegionInfo {
guest_phys_addr: 0,
memory_size: 0x10_0000,
userspace_addr: 0,
mmap_offset: 0,
mmap_handle: eventfd.as_raw_descriptor(),
}];
master.set_mem_table(&mem).unwrap();
master
.set_config(0x100, VhostUserConfigFlags::WRITABLE, &[0xa5u8])
.unwrap();
let buf = [0x0u8; 4];
let (reply_body, reply_payload) = master
.get_config(0x100, 4, VhostUserConfigFlags::empty(), &buf)
.unwrap();
let offset = reply_body.offset;
assert_eq!(offset, 0x100);
assert_eq!(reply_payload[0], 0xa5);
master.set_slave_request_fd(&eventfd).unwrap();
master.set_vring_enable(0, true).unwrap();
// unimplemented yet
master
.set_log_base(0, Some(eventfd.as_raw_descriptor()))
.unwrap();
master.set_log_fd(eventfd.as_raw_descriptor()).unwrap();
master.set_vring_num(0, 256).unwrap();
master.set_vring_base(0, 0).unwrap();
let config = VringConfigData {
queue_max_size: 256,
queue_size: 128,
flags: VhostUserVringAddrFlags::VHOST_VRING_F_LOG.bits(),
desc_table_addr: 0x1000,
used_ring_addr: 0x2000,
avail_ring_addr: 0x3000,
log_addr: Some(0x4000),
};
master.set_vring_addr(0, &config).unwrap();
master.set_vring_call(0, &eventfd).unwrap();
master.set_vring_kick(0, &eventfd).unwrap();
master.set_vring_err(0, &eventfd).unwrap();
let max_mem_slots = master.get_max_mem_slots().unwrap();
assert_eq!(max_mem_slots, 32);
let region_file = tempfile().unwrap();
let region = VhostUserMemoryRegionInfo {
guest_phys_addr: 0x10_0000,
memory_size: 0x10_0000,
userspace_addr: 0,
mmap_offset: 0,
mmap_handle: region_file.as_raw_fd(),
};
master.add_mem_region(&region).unwrap();
master.remove_mem_region(&region).unwrap();
mbar.wait();
}
#[test]
fn test_error_display() {
assert_eq!(format!("{}", Error::InvalidParam), "invalid parameters");
assert_eq!(format!("{}", Error::InvalidOperation), "invalid operation");
}
#[test]
fn test_error_from_sys_util_error() {
let e: Error = sys_util::Error::new(libc::EAGAIN).into();
if let Error::SocketRetry(e1) = e {
assert_eq!(e1.raw_os_error().unwrap(), libc::EAGAIN);
} else {
panic!("invalid error code conversion!");
}
}
}