Snap for 12901447 from 0f92d9ec81adb42a62d257886b5b29937cb3d5d3 to simpleperf-release
Change-Id: I1eba1f7de3f6a8f67b6b50c7273e26dfdb4c83f9
diff --git a/Cargo.lock b/Cargo.lock
index 3c99ccd..d5a9ad3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -53,6 +53,19 @@
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
+name = "ahash"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a"
+dependencies = [
+ "cfg-if",
+ "getrandom",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1476,7 +1489,7 @@
"sync",
"thiserror",
"vm_control",
- "vulkano",
+ "vulkano 0.31.1",
"which",
"win_util",
"winapi",
@@ -1489,6 +1502,7 @@
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad6a9459c9c30b177b925162351f97e7d967c7ea8bab3b8352805327daf45554"
dependencies = [
+ "bytemuck",
"crunchy",
]
@@ -2197,9 +2211,9 @@
[[package]]
name = "p9"
-version = "0.2.3"
+version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4838a2d89bdcbcad051f18347ed6cbe3e5b9b09fb0019e1a6ec4bb2bb1d29481"
+checksum = "0dc5b2b13cb6a9a5fcf7c668ebf2aef67e0d83d4451c1db95feb9fb0775874f0"
dependencies = [
"libc",
"p9_wire_format_derive",
@@ -2208,13 +2222,13 @@
[[package]]
name = "p9_wire_format_derive"
-version = "0.2.3"
+version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6085210d8ec9bcbdf38b5c8e97bccef1877f3f291eae48b65388ca979f5314e"
+checksum = "9317f09e751274d3cb2a2678a785c456133a3d1f956f9f79bd460aec84acb600"
dependencies = [
"proc-macro2",
"quote 1.0.36",
- "syn 1.0.103",
+ "syn 2.0.77",
]
[[package]]
@@ -2342,6 +2356,16 @@
]
[[package]]
+name = "proc-macro-crate"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
+dependencies = [
+ "once_cell",
+ "toml_edit",
+]
+
+[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2572,6 +2596,7 @@
"libc",
"remain",
"serde",
+ "serde_json",
"thiserror",
]
@@ -2638,6 +2663,7 @@
"serde",
"serde_json",
"thiserror",
+ "vulkano 0.33.0",
"winapi",
"zerocopy",
]
@@ -2932,6 +2958,16 @@
]
[[package]]
+name = "thread_local"
+version = "1.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+]
+
+[[package]]
name = "threadpool"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2993,6 +3029,23 @@
]
[[package]]
+name = "toml_datetime"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+
+[[package]]
+name = "toml_edit"
+version = "0.19.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
+dependencies = [
+ "indexmap 2.6.0",
+ "toml_datetime",
+ "winnow",
+]
+
+[[package]]
name = "tube_transporter"
version = "0.1.0"
dependencies = [
@@ -3295,6 +3348,47 @@
]
[[package]]
+name = "vulkano"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e1f15eeb9d93a05eb3c237332a10806eac1eb82444e54485bfcc1859c483c23"
+dependencies = [
+ "ahash",
+ "ash",
+ "bytemuck",
+ "core-graphics-types",
+ "crossbeam-queue",
+ "half",
+ "heck",
+ "indexmap 1.9.1",
+ "libloading",
+ "objc",
+ "once_cell",
+ "parking_lot",
+ "proc-macro2",
+ "quote 1.0.36",
+ "regex",
+ "serde",
+ "serde_json",
+ "smallvec",
+ "thread_local",
+ "vk-parse",
+ "vulkano-macros",
+]
+
+[[package]]
+name = "vulkano-macros"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "895b8a2cac1e7650d2d0552f2392da0970a358515ac11a34adaf19bfdc771b98"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote 1.0.36",
+ "syn 1.0.103",
+]
+
+[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -3584,6 +3678,15 @@
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
+name = "winnow"
+version = "0.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
name = "wio"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index f94461d..61087bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -555,7 +555,7 @@
[target.'cfg(any(target_os = "android", target_os = "linux"))'.dependencies]
minijail = "*" # provided by ebuild
-p9 = "0.2.3"
+p9 = "0.3.1"
vhost = { path = "vhost" }
android_audio = { path = "android_audio"}
diff --git a/base/src/sys/linux/net.rs b/base/src/sys/linux/net.rs
index e10f381..cc28162 100644
--- a/base/src/sys/linux/net.rs
+++ b/base/src/sys/linux/net.rs
@@ -37,7 +37,6 @@
use crate::unix::net::TcpSocket;
use crate::SafeDescriptor;
use crate::ScmSocket;
-use crate::StreamChannel;
use crate::UnixSeqpacket;
use crate::UnixSeqpacketListener;
@@ -177,7 +176,6 @@
};
}
-ScmSocketTryFrom!(StreamChannel);
ScmSocketTryFrom!(UnixDatagram);
ScmSocketTryFrom!(UnixListener);
ScmSocketTryFrom!(UnixSeqpacket);
diff --git a/base/src/sys/macos/net.rs b/base/src/sys/macos/net.rs
index 10141fe..98ab65c 100644
--- a/base/src/sys/macos/net.rs
+++ b/base/src/sys/macos/net.rs
@@ -41,7 +41,6 @@
use crate::FromRawDescriptor;
use crate::SafeDescriptor;
use crate::ScmSocket;
-use crate::StreamChannel;
use crate::UnixSeqpacket;
use crate::UnixSeqpacketListener;
@@ -75,7 +74,6 @@
};
}
-ScmSocketTryFrom!(StreamChannel);
ScmSocketTryFrom!(UnixDatagram);
ScmSocketTryFrom!(UnixListener);
ScmSocketTryFrom!(UnixSeqpacket);
diff --git a/base/src/sys/unix/stream_channel.rs b/base/src/sys/unix/stream_channel.rs
index 9ac7a55..e8a9f3e 100644
--- a/base/src/sys/unix/stream_channel.rs
+++ b/base/src/sys/unix/stream_channel.rs
@@ -59,6 +59,9 @@
/// An abstraction over named pipes and unix socketpairs. This abstraction can be used in a blocking
/// and non blocking mode.
+///
+/// WARNING: partial reads of messages behave differently depending on the platform.
+/// See sys::unix::StreamChannel::inner_read for details.
#[derive(Debug, Deserialize, Serialize)]
pub struct StreamChannel {
stream: SocketType,
@@ -84,12 +87,18 @@
SocketType::Byte(sock) => (&mut &*sock).read(buf),
// On Windows, reading from SOCK_SEQPACKET with a buffer that is too small is an error,
- // but on Linux will silently truncate unless MSG_TRUNC is passed. Here, we emulate
- // Windows behavior on POSIX.
+ // and the extra data will be preserved inside the named pipe.
//
- // Note that Rust translates ERROR_MORE_DATA into io::ErrorKind::Other
- // (see sys::decode_error_kind) on Windows, so we preserve this behavior on POSIX even
- // though one could argue ErrorKind::UnexpectedEof is a closer match to the true error.
+ // Linux though, will silently truncate unless MSG_TRUNC is passed. So we pass it, but
+ // even in that case, Linux will still throw away the extra data. This means there is a
+ // slight behavior difference between platforms from the consumer's perspective.
+ // In practice on Linux, intentional partial reads of messages are usually accomplished
+ // by also passing MSG_PEEK. While we could do this, and hide this rough edge from
+ // consumers, it would add complexity & turn every read into two read syscalls.
+ //
+ // So the compromise is this:
+ // * On Linux: a partial read of a message is an Err and loses data.
+ // * On Windows: a partial read of a message is Ok and does not lose data.
SocketType::Message(sock) => {
// SAFETY:
// Safe because buf is valid, we pass buf's size to recv to bound the return
@@ -149,22 +158,6 @@
Ok((stream_a, stream_b))
}
- pub fn from_unix_seqpacket(sock: UnixSeqpacket) -> StreamChannel {
- StreamChannel {
- stream: SocketType::Message(sock),
- }
- }
-
- pub fn peek_size(&self) -> io::Result<usize> {
- match &self.stream {
- SocketType::Byte(_) => Err(std::io::Error::new(
- std::io::ErrorKind::Other,
- "Cannot check the size of streamed data",
- )),
- SocketType::Message(sock) => Ok(sock.next_packet_size()?),
- }
- }
-
pub fn set_read_timeout(&self, timeout: Option<Duration>) -> io::Result<()> {
match &self.stream {
SocketType::Byte(sock) => sock.set_read_timeout(timeout),
@@ -385,40 +378,4 @@
// non blocking pipe.
assert!(receiver.read(&mut recv_buffer).is_err());
}
-
- #[test]
- fn test_from_unix_seqpacket() {
- let (sock_sender, sock_receiver) = UnixSeqpacket::pair().unwrap();
- let mut sender = StreamChannel::from_unix_seqpacket(sock_sender);
- let mut receiver = StreamChannel::from_unix_seqpacket(sock_receiver);
-
- sender.write_all(&[75, 77, 54, 82, 76, 65]).unwrap();
-
- // Wait for the data to arrive.
- let event_ctx: EventContext<Token> =
- EventContext::build_with(&[(receiver.get_read_notifier(), Token::ReceivedData)])
- .unwrap();
- let events = event_ctx.wait().unwrap();
- let tokens: Vec<Token> = events
- .iter()
- .filter(|e| e.is_readable)
- .map(|e| e.token)
- .collect();
- assert_eq!(tokens, vec! {Token::ReceivedData});
-
- let mut recv_buffer: [u8; 6] = [0; 6];
-
- let size = receiver.read(&mut recv_buffer).unwrap();
- assert_eq!(size, 6);
- assert_eq!(recv_buffer, [75, 77, 54, 82, 76, 65]);
-
- // Now that we've polled for & received all data, polling again should show no events.
- assert_eq!(
- event_ctx
- .wait_timeout(std::time::Duration::new(0, 0))
- .unwrap()
- .len(),
- 0
- );
- }
}
diff --git a/base/src/sys/unix/tube.rs b/base/src/sys/unix/tube.rs
index 6c8e655..cb47137 100644
--- a/base/src/sys/unix/tube.rs
+++ b/base/src/sys/unix/tube.rs
@@ -18,12 +18,9 @@
use crate::tube::RecvTube;
use crate::tube::Result;
use crate::tube::SendTube;
-use crate::BlockingMode;
-use crate::FramingMode;
use crate::RawDescriptor;
use crate::ReadNotifier;
use crate::ScmSocket;
-use crate::StreamChannel;
use crate::UnixSeqpacket;
use crate::SCM_SOCKET_MAX_FD_COUNT;
@@ -33,42 +30,19 @@
/// Bidirectional tube that support both send and recv.
#[derive(Serialize, Deserialize)]
pub struct Tube {
- socket: ScmSocket<StreamChannel>,
+ socket: ScmSocket<UnixSeqpacket>,
}
impl Tube {
/// Create a pair of connected tubes. Request is sent in one direction while response is in the
/// other direction.
pub fn pair() -> Result<(Tube, Tube)> {
- let (socket1, socket2) = StreamChannel::pair(BlockingMode::Blocking, FramingMode::Message)
- .map_err(|errno| Error::Pair(std::io::Error::from(errno)))?;
- let tube1 = Tube::new(socket1)?;
- let tube2 = Tube::new(socket2)?;
+ let (socket1, socket2) = UnixSeqpacket::pair().map_err(Error::Pair)?;
+ let tube1 = Tube::try_from(socket1)?;
+ let tube2 = Tube::try_from(socket2)?;
Ok((tube1, tube2))
}
- /// Create a new `Tube` from a `StreamChannel`.
- /// The StreamChannel must use FramingMode::Message (meaning, must use a SOCK_SEQPACKET as the
- /// underlying socket type), otherwise, this method returns an error.
- pub fn new(socket: StreamChannel) -> Result<Tube> {
- match socket.get_framing_mode() {
- FramingMode::Message => Ok(Tube {
- socket: socket.try_into().map_err(Error::DupDescriptor)?,
- }),
- FramingMode::Byte => Err(Error::InvalidFramingMode),
- }
- }
-
- /// Create a new `Tube` from a UnixSeqpacket. The StreamChannel is implicitly constructed to
- /// have the right FramingMode by being constructed from a UnixSeqpacket.
- pub fn new_from_unix_seqpacket(sock: UnixSeqpacket) -> Result<Tube> {
- Ok(Tube {
- socket: StreamChannel::from_unix_seqpacket(sock)
- .try_into()
- .map_err(Error::DupDescriptor)?,
- })
- }
-
/// DO NOT USE this method directly as it will become private soon (b/221484449). Use a
/// directional Tube pair instead.
#[deprecated]
@@ -76,8 +50,8 @@
self.socket
.inner()
.try_clone()
- .map(Tube::new)
.map_err(Error::Clone)?
+ .try_into()
}
/// Sends a message via a Tube.
@@ -123,9 +97,10 @@
// is readable, then a call to `Tube::recv` will not block (which ought to be true since we
// use SOCK_SEQPACKET and a single recvmsg call currently).
- let msg_size = handle_eintr!(self.socket.inner().peek_size()).map_err(Error::Recv)?;
- // This buffer is the right size, as the size received in peek_size() represents the size
- // of only the message itself and not the file descriptors. The descriptors are stored
+ let msg_size =
+ handle_eintr!(self.socket.inner().next_packet_size()).map_err(Error::Recv)?;
+ // This buffer is the right size, as the size received in next_packet_size() represents the
+ // size of only the message itself and not the file descriptors. The descriptors are stored
// separately in msghdr::msg_control.
let mut msg_json = vec![0u8; msg_size];
@@ -170,7 +145,8 @@
#[cfg(feature = "proto_tube")]
fn recv_proto<M: protobuf::Message>(&self) -> Result<M> {
- let msg_size = handle_eintr!(self.socket.inner().peek_size()).map_err(Error::Recv)?;
+ let msg_size =
+ handle_eintr!(self.socket.inner().next_packet_size()).map_err(Error::Recv)?;
let mut msg_bytes = vec![0u8; msg_size];
let (msg_bytes_size, _) =
@@ -185,6 +161,16 @@
}
}
+impl TryFrom<UnixSeqpacket> for Tube {
+ type Error = Error;
+
+ fn try_from(socket: UnixSeqpacket) -> Result<Self> {
+ Ok(Tube {
+ socket: socket.try_into().map_err(Error::ScmSocket)?,
+ })
+ }
+}
+
impl AsRawDescriptor for Tube {
fn as_raw_descriptor(&self) -> RawDescriptor {
self.socket.as_raw_descriptor()
@@ -193,7 +179,7 @@
impl AsRawFd for Tube {
fn as_raw_fd(&self) -> RawFd {
- self.socket.inner().as_raw_fd()
+ self.socket.inner().as_raw_descriptor()
}
}
@@ -234,9 +220,12 @@
pub fn recv_proto<M: protobuf::Message>(&self) -> Result<M> {
self.0.recv_proto()
}
+}
- pub fn new_from_unix_seqpacket(sock: UnixSeqpacket) -> Result<ProtoTube> {
- Ok(ProtoTube(Tube::new_from_unix_seqpacket(sock)?))
+#[cfg(feature = "proto_tube")]
+impl From<Tube> for ProtoTube {
+ fn from(tube: Tube) -> Self {
+ ProtoTube(tube)
}
}
diff --git a/base/src/sys/windows/named_pipes.rs b/base/src/sys/windows/named_pipes.rs
index a18b3a9..0d4ac21 100644
--- a/base/src/sys/windows/named_pipes.rs
+++ b/base/src/sys/windows/named_pipes.rs
@@ -82,7 +82,19 @@
static NEXT_PIPE_INDEX: AtomicUsize = AtomicUsize::new(1);
+#[remain::sorted]
+#[derive(Debug, thiserror::Error)]
+pub enum PipeError {
+ #[error("read zero bytes, but this is not an EOF")]
+ ZeroByteReadNoEof,
+}
+
/// Represents one end of a named pipe
+///
+/// NOTE: implementations of Read & Write are trait complaint for EOF/broken pipe handling
+/// (returning a successful zero byte read), but overlapped read/write versions are NOT (they will
+/// return broken pipe directly due to API limitations; see PipeConnection::read for
+/// details).
#[derive(Serialize, Deserialize, Debug)]
pub struct PipeConnection {
handle: SafeDescriptor,
@@ -530,7 +542,38 @@
/// If buf's type is file descriptors, this is only safe when those file descriptors are valid
/// for the process where this function was called.
pub unsafe fn read<T: PipeSendable>(&self, buf: &mut [T]) -> Result<usize> {
- PipeConnection::read_internal(&self.handle, self.blocking_mode, buf, None)
+ match PipeConnection::read_internal(&self.handle, self.blocking_mode, buf, None) {
+ // Windows allows for zero byte writes on one end of a pipe to be read by the other as
+ // zero byte reads. These zero byte reads DO NOT signify EOF, so from the perspective
+ // of std::io::Read, they cannot be reported as Ok(0). We translate them to errors.
+ //
+ // Within CrosVM, this behavior is not used, but it has been implemented to avoid UB
+ // either in the future, or when talking to non CrosVM named pipes. If we need to
+ // actually use/understand this error from other parts of KiwiVM (e.g. PipeConnection
+ // consumers), we could use ErrorKind::Interrupted (which as of 24/11/26 is not used by
+ // Rust for other purposes).
+ Ok(len) if len == 0 && !buf.is_empty() => Err(io::Error::new(
+ io::ErrorKind::Other,
+ PipeError::ZeroByteReadNoEof,
+ )),
+
+ // Read at least 1 byte, or 0 bytes if a zero byte buffer was provided.
+ Ok(len) => Ok(len),
+
+ // Treat a closed pipe like an EOF, because that is consistent with the Read trait.
+ //
+ // NOTE: this is explicitly NOT done for overlapped operations for a few reasons:
+ // 1. Overlapped operations do not follow the Read trait, so there is no strong reason
+ // *to* do it.
+ // 2. Ok(0) also means "overlapped operation started successfully." This is a real
+ // problem because the general pattern is to start an overlapped operation and then
+ // wait for it. So if we did that and the Ok(0) meant the pipe is closed, we would
+ // enter an infinite wait. (The kernel already told us when we started the operation
+ // that the pipe was closed. It won't tell us again.)
+ Err(e) if e.raw_os_error() == Some(ERROR_BROKEN_PIPE as i32) => Ok(0),
+
+ Err(e) => Err(e),
+ }
}
/// Similar to `PipeConnection::read` except it also allows:
@@ -589,9 +632,12 @@
);
match res {
Ok(bytes_read) => Ok(bytes_read),
- // Treat a closed pipe like an EOF.
- // We check the raw error because `ErrorKind::BrokenPipe` is ambiguous on Windows.
- Err(e) if e.raw_os_error() == Some(ERROR_BROKEN_PIPE as i32) => Ok(0),
+ // For message mode pipes, if the buffer is too small for the entire message, the kernel
+ // will return ERROR_MORE_DATA. This isn't strictly an "error" because the operation
+ // succeeds. Making it an error also means it's hard to handle this cleanly from the
+ // perspective of an io::Read consumer. So we discard the non-error, and return the
+ // successful result of filling the entire buffer.
+ Err(e) if e.raw_os_error() == Some(ERROR_MORE_DATA as i32) => Ok(buf.len()),
Err(e)
if blocking_mode == BlockingMode::NoWait
&& e.raw_os_error() == Some(ERROR_NO_DATA as i32) =>
@@ -619,9 +665,6 @@
// Safe because we are providing a valid buffer slice and also providing a valid
// overlapped struct.
match unsafe { self.read_overlapped(buf, overlapped_wrapper) } {
- // More data isn't necessarily an error as long as we've filled the provided buffer,
- // as is checked later in this function.
- Err(e) if e.raw_os_error() == Some(ERROR_MORE_DATA as i32) => Ok(()),
Err(e) => Err(e),
Ok(()) => Ok(()),
}?;
@@ -930,20 +973,30 @@
"Overlapped struct is not in use",
));
}
+
let mut size_transferred = 0;
// SAFETY:
// Safe as long as `overlapped_struct` isn't copied and also contains a valid event.
// Also the named pipe handle must created with `FILE_FLAG_OVERLAPPED`.
- fail_if_zero!(unsafe {
+ if (unsafe {
GetOverlappedResult(
self.handle.as_raw_descriptor(),
&mut *overlapped_wrapper.overlapped.0,
&mut size_transferred,
if wait { TRUE } else { FALSE },
)
- });
-
- Ok(size_transferred)
+ }) != 0
+ {
+ Ok(size_transferred)
+ } else {
+ let e = io::Error::last_os_error();
+ match e.raw_os_error() {
+ // More data => partial read of a message on a message pipe. This isn't really an
+ // error (see PipeConnection::read_internal) since we filled the provided buffer.
+ Some(error_code) if error_code as u32 == ERROR_MORE_DATA => Ok(size_transferred),
+ _ => Err(e),
+ }
+ }
}
/// Cancels I/O Operations in the current process. Since `lpOverlapped` is null, this will
diff --git a/base/src/sys/windows/stream_channel.rs b/base/src/sys/windows/stream_channel.rs
index 3587578..12f6266 100644
--- a/base/src/sys/windows/stream_channel.rs
+++ b/base/src/sys/windows/stream_channel.rs
@@ -57,6 +57,9 @@
/// An abstraction over named pipes and unix socketpairs.
///
+/// WARNING: partial reads of messages behave differently depending on the platform.
+/// See sys::unix::StreamChannel::inner_read for details.
+///
/// The ReadNotifier will return an event handle that is set when data is in the channel.
///
/// In message mode, single writes larger than
diff --git a/base/src/sys/windows/tube.rs b/base/src/sys/windows/tube.rs
index 8e9c20b..6133702 100644
--- a/base/src/sys/windows/tube.rs
+++ b/base/src/sys/windows/tube.rs
@@ -16,7 +16,6 @@
use serde::Deserialize;
use serde::Serialize;
use serde::Serializer;
-use winapi::shared::winerror::ERROR_MORE_DATA;
use zerocopy::AsBytes;
use zerocopy::FromBytes;
use zerocopy::FromZeroes;
@@ -281,25 +280,13 @@
}
}
-/// Reads a part of a Tube packet asserting that it was correctly read. This means:
-/// * Treats partial "message" (transport framing) reads are Ok, as long as we filled our buffer. We
-/// use this to ignore errors when reading the message header, which has the lengths we need to
-/// allocate our buffers for the remainder of the message.
-/// * We filled the supplied buffer.
+/// Reads a part of a Tube packet asserting that it was correctly read. In other words, we've
+/// filled the supplied buffer.
fn perform_read<F: FnMut(&mut [u8]) -> io::Result<usize>>(
read_fn: &mut F,
buf: &mut [u8],
) -> io::Result<usize> {
- let bytes_read = match read_fn(buf) {
- Ok(s) => Ok(s),
- Err(e)
- if e.raw_os_error()
- .map_or(false, |errno| errno == ERROR_MORE_DATA as i32) =>
- {
- Ok(buf.len())
- }
- Err(e) => Err(e),
- }?;
+ let bytes_read = read_fn(buf)?;
if bytes_read != buf.len() {
Err(io::Error::new(
diff --git a/base/src/tube.rs b/base/src/tube.rs
index 9bcd83b..c3cff40 100644
--- a/base/src/tube.rs
+++ b/base/src/tube.rs
@@ -138,6 +138,9 @@
RecvTooManyFds,
#[error("Received a message with a zero sized body. This should not happen.")]
RecvUnexpectedEmptyBody,
+ #[cfg(unix)]
+ #[error("failed to construct ScmSocket: {0}")]
+ ScmSocket(io::Error),
#[error("failed to send packet: {0}")]
Send(io::Error),
#[error("failed to write packet to intermediate buffer: {0}")]
diff --git a/base/tests/linux/tube.rs b/base/tests/linux/tube.rs
index 1decdb1..64d36f5 100644
--- a/base/tests/linux/tube.rs
+++ b/base/tests/linux/tube.rs
@@ -5,15 +5,12 @@
use std::time;
use base::deserialize_with_descriptors;
-use base::BlockingMode;
use base::EventContext;
use base::EventToken;
-use base::FramingMode;
use base::FromRawDescriptor;
use base::ReadNotifier;
use base::SafeDescriptor;
use base::SerializeDescriptors;
-use base::StreamChannel;
use base::Tube;
use base::UnixSeqpacket;
@@ -25,11 +22,8 @@
const EVENT_WAIT_TIME: time::Duration = time::Duration::from_secs(10);
#[test]
-fn test_serialize_tube_new() {
- let (sock_send, sock_recv) =
- StreamChannel::pair(BlockingMode::Nonblocking, FramingMode::Message).unwrap();
- let tube_send = Tube::new(sock_send).unwrap();
- let tube_recv = Tube::new(sock_recv).unwrap();
+fn test_serialize_tube_pair() {
+ let (tube_send, tube_recv) = Tube::pair().unwrap();
// Serialize the Tube
let msg_serialize = SerializeDescriptors::new(&tube_send);
@@ -62,10 +56,10 @@
}
#[test]
-fn test_send_recv_new_from_seqpacket() {
+fn test_send_recv_from_seqpacket() {
let (sock_send, sock_recv) = UnixSeqpacket::pair().unwrap();
- let tube_send = Tube::new_from_unix_seqpacket(sock_send).unwrap();
- let tube_recv = Tube::new_from_unix_seqpacket(sock_recv).unwrap();
+ let tube_send = Tube::try_from(sock_send).unwrap();
+ let tube_recv = Tube::try_from(sock_recv).unwrap();
tube_send.send(&"hi".to_string()).unwrap();
@@ -82,12 +76,3 @@
assert_eq!(tube_recv.recv::<String>().unwrap(), "hi");
}
-
-#[test]
-fn test_tube_new_byte_mode_error() {
- let (sock_byte_mode, _) =
- StreamChannel::pair(BlockingMode::Nonblocking, FramingMode::Byte).unwrap();
- let tube_error = Tube::new(sock_byte_mode);
-
- assert!(tube_error.is_err());
-}
diff --git a/devices/Android.bp b/devices/Android.bp
index a1349c1..96d6cf1 100644
--- a/devices/Android.bp
+++ b/devices/Android.bp
@@ -275,6 +275,7 @@
"libsmallvec",
"libswap",
"libsync_rust",
+ "libtempfile",
"libthiserror",
"libusb_util",
"libvfio_sys",
diff --git a/devices/Cargo.toml b/devices/Cargo.toml
index eeb33a7..f8c2b3f 100644
--- a/devices/Cargo.toml
+++ b/devices/Cargo.toml
@@ -90,6 +90,7 @@
smallvec = "1.6.1"
sync = { path = "../common/sync" }
system_api = { path = "../system_api", optional = true }
+tempfile = "3"
thiserror = "1.0.20"
cros_tracing = { path = "../cros_tracing" }
swap = { path = "../swap" }
@@ -107,7 +108,7 @@
libcras = { version = "*", optional = true }
minijail = "*"
net_sys = { path = "../net_sys" }
-p9 = "0.2"
+p9 = "0.3.1"
usb_util = { path = "../usb_util" }
vfio_sys = { path = "../vfio_sys" }
vhost = { path = "../vhost" }
diff --git a/devices/src/bat.rs b/devices/src/bat.rs
index a855013..47d9892 100644
--- a/devices/src/bat.rs
+++ b/devices/src/bat.rs
@@ -25,7 +25,6 @@
use serde::Serialize;
use sync::Mutex;
use thiserror::Error;
-use vm_control::BatConfig;
use vm_control::BatControlCommand;
use vm_control::BatControlResult;
@@ -49,6 +48,21 @@
/// the GoldFish Battery MMIO length.
pub const GOLDFISHBAT_MMIO_LEN: u64 = 0x1000;
+/// Configuration of fake battery status information.
+#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq, Copy)]
+pub enum BatConfig {
+ /// Propagates host's battery status
+ #[default]
+ Real,
+ /// Fake on battery status. Simulates a disconnected AC adapter.
+ /// This forces ac_online to false and sets the battery status
+ /// to DISCHARGING
+ Fake {
+ // Sets the maximum battery capacity reported to the guest
+ max_capacity: u32,
+ },
+}
+
#[derive(Clone, Serialize, Deserialize)]
struct GoldfishBatteryState {
// interrupt state
@@ -66,13 +80,16 @@
charge_counter: u32,
charge_full: u32,
initialized: bool,
+ // bat_config is used for goldfish battery to report fake battery to the guest.
+ bat_config: BatConfig,
}
macro_rules! create_battery_func {
// $property: the battery property which is going to be modified.
+ // $ty: the type annotation of value argument
// $int: the interrupt status which is going to be set to notify the guest.
- ($fn:ident, $property:ident, $int:ident) => {
- pub(crate) fn $fn(&mut self, value: u32) -> bool {
+ ($fn:ident, $property:ident, $ty:ty, $int:ident) => {
+ pub(crate) fn $fn(&mut self, value: $ty) -> bool {
let old = std::mem::replace(&mut self.$property, value);
old != self.$property && self.set_int_status($int)
}
@@ -81,34 +98,39 @@
impl GoldfishBatteryState {
fn set_int_status(&mut self, mask: u32) -> bool {
- if ((self.int_enable & mask) != 0) && ((self.int_status & mask) == 0) {
- self.int_status |= mask;
- return true;
- }
- false
+ let old = self.int_status;
+ self.int_status |= self.int_enable & mask;
+ old != self.int_status
}
fn int_status(&self) -> u32 {
self.int_status
}
- create_battery_func!(set_ac_online, ac_online, AC_STATUS_CHANGED);
+ create_battery_func!(set_ac_online, ac_online, u32, AC_STATUS_CHANGED);
- create_battery_func!(set_status, status, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_status, status, u32, BATTERY_STATUS_CHANGED);
- create_battery_func!(set_health, health, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_health, health, u32, BATTERY_STATUS_CHANGED);
- create_battery_func!(set_present, present, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_present, present, u32, BATTERY_STATUS_CHANGED);
- create_battery_func!(set_capacity, capacity, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_capacity, capacity, u32, BATTERY_STATUS_CHANGED);
- create_battery_func!(set_voltage, voltage, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_voltage, voltage, u32, BATTERY_STATUS_CHANGED);
- create_battery_func!(set_current, current, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_current, current, u32, BATTERY_STATUS_CHANGED);
- create_battery_func!(set_charge_counter, charge_counter, BATTERY_STATUS_CHANGED);
+ create_battery_func!(
+ set_charge_counter,
+ charge_counter,
+ u32,
+ BATTERY_STATUS_CHANGED
+ );
- create_battery_func!(set_charge_full, charge_full, BATTERY_STATUS_CHANGED);
+ create_battery_func!(set_charge_full, charge_full, u32, BATTERY_STATUS_CHANGED);
+
+ create_battery_func!(set_bat_config, bat_config, BatConfig, BATTERY_INT_MASK);
}
/// GoldFish Battery state
@@ -122,8 +144,6 @@
tube: Option<Tube>,
create_power_monitor: Option<Box<dyn CreatePowerMonitorFn>>,
create_powerd_client: Option<Box<dyn CreatePowerClientFn>>,
- // battery_config is used for goldfish battery to report fake battery to the guest.
- battery_config: Arc<Mutex<BatConfig>>,
}
#[derive(Serialize, Deserialize)]
@@ -183,7 +203,6 @@
kill_evt: Event,
state: Arc<Mutex<GoldfishBatteryState>>,
create_power_monitor: Option<Box<dyn CreatePowerMonitorFn>>,
- battery_config: Arc<Mutex<BatConfig>>,
) {
let wait_ctx: WaitContext<Token> = match WaitContext::build_with(&[
(&tube, Token::Commands),
@@ -234,7 +253,6 @@
}
};
- let mut bat_config = battery_config.lock();
let mut bat_state = state.lock();
let inject_irq = match req {
BatControlCommand::SetStatus(status) => bat_state.set_status(status.into()),
@@ -253,12 +271,10 @@
}
BatControlCommand::SetFakeBatConfig(max_capacity) => {
let max_capacity = std::cmp::min(max_capacity, 100);
- *bat_config = BatConfig::Fake { max_capacity };
- true
+ bat_state.set_bat_config(BatConfig::Fake { max_capacity })
}
BatControlCommand::CancelFakeConfig => {
- *bat_config = BatConfig::Real;
- true
+ bat_state.set_bat_config(BatConfig::Real)
}
};
@@ -362,10 +378,9 @@
charge_counter: 0,
charge_full: 0,
initialized: false,
+ bat_config: BatConfig::Real,
}));
- let battery_config = Arc::new(Mutex::new(BatConfig::default()));
-
Ok(GoldfishBattery {
state,
mmio_base: mmio_base as u32,
@@ -376,7 +391,6 @@
tube: Some(tube),
create_power_monitor,
create_powerd_client,
- battery_config,
})
}
@@ -404,16 +418,8 @@
let irq_evt = self.irq_evt.try_clone().unwrap();
let bat_state = self.state.clone();
let create_monitor_fn = self.create_power_monitor.take();
- let battery_config = self.battery_config.clone();
self.monitor_thread = Some(WorkerThread::start(self.debug_label(), move |kill_evt| {
- command_monitor(
- tube,
- irq_evt,
- kill_evt,
- bat_state,
- create_monitor_fn,
- battery_config,
- )
+ command_monitor(tube, irq_evt, kill_evt, bat_state, create_monitor_fn)
}));
self.activated = true;
}
@@ -507,18 +513,24 @@
std::mem::replace(&mut self.state.lock().int_status, 0)
}
BATTERY_INT_ENABLE => self.state.lock().int_enable,
- BATTERY_AC_ONLINE => match *self.battery_config.lock() {
- BatConfig::Real => self.state.lock().ac_online,
- BatConfig::Fake { max_capacity: _ } => AC_ONLINE_VAL_OFFLINE,
- },
- BATTERY_STATUS => match *self.battery_config.lock() {
- BatConfig::Real => self.state.lock().status,
- BatConfig::Fake { max_capacity: _ } => BATTERY_STATUS_VAL_DISCHARGING,
- },
+ BATTERY_AC_ONLINE => {
+ let bat_config = self.state.lock().bat_config;
+ match bat_config {
+ BatConfig::Real => self.state.lock().ac_online,
+ BatConfig::Fake { max_capacity: _ } => AC_ONLINE_VAL_OFFLINE,
+ }
+ }
+ BATTERY_STATUS => {
+ let bat_config = self.state.lock().bat_config;
+ match bat_config {
+ BatConfig::Real => self.state.lock().status,
+ BatConfig::Fake { max_capacity: _ } => BATTERY_STATUS_VAL_DISCHARGING,
+ }
+ }
BATTERY_HEALTH => self.state.lock().health,
BATTERY_PRESENT => self.state.lock().present,
BATTERY_CAPACITY => {
- let max_capacity = match *self.battery_config.lock() {
+ let max_capacity = match self.state.lock().bat_config {
BatConfig::Real => 100,
BatConfig::Fake { max_capacity } => max_capacity,
};
diff --git a/devices/src/irqchip/gunyah.rs b/devices/src/irqchip/gunyah.rs
index e3070de..21d4740 100644
--- a/devices/src/irqchip/gunyah.rs
+++ b/devices/src/irqchip/gunyah.rs
@@ -26,6 +26,9 @@
impl GunyahIrqChip {
pub fn new(vm: GunyahVm) -> Result<GunyahIrqChip> {
+ // NOTE: Unlike the other hypervisors supported by crosvm, the Gunyah IRQ chip is not
+ // explicitly configured here. Instead, Gunyah uses the information in the FDT generated by
+ // crosvm and to determine where and how to setup the IRQ chip.
Ok(GunyahIrqChip { vm })
}
}
diff --git a/devices/src/pci/coiommu.rs b/devices/src/pci/coiommu.rs
index ff59cef..a2ba2e5 100644
--- a/devices/src/pci/coiommu.rs
+++ b/devices/src/pci/coiommu.rs
@@ -1442,15 +1442,7 @@
fn allocate_address(&mut self, resources: &mut SystemAllocator) -> PciResult<PciAddress> {
if self.pci_address.is_none() {
- self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
- Some(Alloc::PciBar {
- bus,
- dev,
- func,
- bar: _,
- }) => Some(PciAddress { bus, dev, func }),
- _ => None,
- }
+ self.pci_address = resources.allocate_pci(0, self.debug_label());
}
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
}
diff --git a/devices/src/pci/mod.rs b/devices/src/pci/mod.rs
index 9aed043..8f51c4f 100644
--- a/devices/src/pci/mod.rs
+++ b/devices/src/pci/mod.rs
@@ -9,7 +9,6 @@
mod coiommu;
mod msi;
mod msix;
-mod pci_address;
mod pci_configuration;
mod pci_device;
#[cfg(feature = "pci-hotplug")]
@@ -24,6 +23,8 @@
mod vfio_pci;
use libc::EINVAL;
+pub use resources::PciAddress;
+pub use resources::PciAddressError;
use serde::Deserialize;
use serde::Serialize;
@@ -38,8 +39,6 @@
pub use self::msix::MsixCap;
pub use self::msix::MsixConfig;
pub use self::msix::MsixStatus;
-pub use self::pci_address::Error as PciAddressError;
-pub use self::pci_address::PciAddress;
pub use self::pci_configuration::PciBarConfiguration;
pub use self::pci_configuration::PciBarIndex;
pub use self::pci_configuration::PciBarPrefetchable;
diff --git a/devices/src/pci/pci_hotplug.rs b/devices/src/pci/pci_hotplug.rs
index 3346399..568b62a 100644
--- a/devices/src/pci/pci_hotplug.rs
+++ b/devices/src/pci/pci_hotplug.rs
@@ -10,7 +10,6 @@
use base::AsRawDescriptors;
use base::RawDescriptor;
use base::Tube;
-use resources::Alloc;
use serde::Deserialize;
use serde::Serialize;
use vm_control::api::VmMemoryClient;
@@ -155,15 +154,7 @@
) -> Result<()> {
match self.pci_address {
None => {
- if resources.reserve_pci(
- Alloc::PciBar {
- bus: preferred_address.bus,
- dev: preferred_address.dev,
- func: preferred_address.func,
- bar: 0,
- },
- self.debug_label(),
- ) {
+ if resources.reserve_pci(preferred_address, self.debug_label()) {
self.pci_address = Some(preferred_address);
} else {
return Err(PciDeviceError::PciAllocationFailed);
diff --git a/devices/src/pci/pcie/pcie_port.rs b/devices/src/pci/pcie/pcie_port.rs
index 5fd8070..3e35252 100644
--- a/devices/src/pci/pcie/pcie_port.rs
+++ b/devices/src/pci/pcie/pcie_port.rs
@@ -8,7 +8,6 @@
use base::error;
use base::warn;
use base::Event;
-use resources::Alloc;
use resources::SystemAllocator;
use sync::Mutex;
use zerocopy::FromBytes;
@@ -231,29 +230,14 @@
) -> std::result::Result<PciAddress, PciDeviceError> {
if self.pci_address.is_none() {
if let Some(address) = self.preferred_address {
- if resources.reserve_pci(
- Alloc::PciBar {
- bus: address.bus,
- dev: address.dev,
- func: address.func,
- bar: 0,
- },
- self.debug_label(),
- ) {
+ if resources.reserve_pci(address, self.debug_label()) {
self.pci_address = Some(address);
} else {
self.pci_address = None;
}
} else {
- match resources.allocate_pci(self.bus_range.primary, self.debug_label()) {
- Some(Alloc::PciBar {
- bus,
- dev,
- func,
- bar: _,
- }) => self.pci_address = Some(PciAddress { bus, dev, func }),
- _ => self.pci_address = None,
- }
+ self.pci_address =
+ resources.allocate_pci(self.bus_range.primary, self.debug_label());
}
}
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
diff --git a/devices/src/pci/pvpanic.rs b/devices/src/pci/pvpanic.rs
index acc0719..4dcf0e8 100644
--- a/devices/src/pci/pvpanic.rs
+++ b/devices/src/pci/pvpanic.rs
@@ -117,15 +117,7 @@
fn allocate_address(&mut self, resources: &mut SystemAllocator) -> Result<PciAddress> {
if self.pci_address.is_none() {
- self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
- Some(Alloc::PciBar {
- bus,
- dev,
- func,
- bar: _,
- }) => Some(PciAddress { bus, dev, func }),
- _ => None,
- }
+ self.pci_address = resources.allocate_pci(0, self.debug_label());
}
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
}
diff --git a/devices/src/pci/stub.rs b/devices/src/pci/stub.rs
index 6a3c5ee..5a202c9 100644
--- a/devices/src/pci/stub.rs
+++ b/devices/src/pci/stub.rs
@@ -13,7 +13,6 @@
//! something to the guest on function 0.
use base::RawDescriptor;
-use resources::Alloc;
use resources::SystemAllocator;
use serde::Deserialize;
use serde::Deserializer;
@@ -164,15 +163,7 @@
fn allocate_address(&mut self, resources: &mut SystemAllocator) -> Result<PciAddress> {
if self.assigned_address.is_none() {
- if resources.reserve_pci(
- Alloc::PciBar {
- bus: self.requested_address.bus,
- dev: self.requested_address.dev,
- func: self.requested_address.func,
- bar: 0,
- },
- self.debug_label(),
- ) {
+ if resources.reserve_pci(self.requested_address, self.debug_label()) {
self.assigned_address = Some(self.requested_address);
}
}
@@ -289,7 +280,7 @@
let mut device = StubPciDevice::new(&CONFIG);
assert!(device.allocate_address(&mut allocator).is_ok());
- assert!(allocator.release_pci(0xa, 0xb, 1));
+ assert!(allocator.release_pci(PciAddress::new(0, 0xa, 0xb, 1).unwrap()));
}
#[test]
diff --git a/devices/src/pci/vfio_pci.rs b/devices/src/pci/vfio_pci.rs
index 132286d..2790cfe 100644
--- a/devices/src/pci/vfio_pci.rs
+++ b/devices/src/pci/vfio_pci.rs
@@ -1629,15 +1629,7 @@
if self.pci_address.is_none() {
let mut address = self.preferred_address;
while address.func < 8 {
- if resources.reserve_pci(
- Alloc::PciBar {
- bus: address.bus,
- dev: address.dev,
- func: address.func,
- bar: 0,
- },
- self.debug_label(),
- ) {
+ if resources.reserve_pci(address, self.debug_label()) {
self.pci_address = Some(address);
break;
} else if self.hotplug_bus_number.is_none() {
diff --git a/devices/src/proxy.rs b/devices/src/proxy.rs
index c4cbfc8..9f24c84 100644
--- a/devices/src/proxy.rs
+++ b/devices/src/proxy.rs
@@ -5,10 +5,17 @@
//! Runs hardware devices in child processes.
use std::fs;
+use std::fs::File;
+use std::io::BufReader;
+use std::io::BufWriter;
+use std::io::Seek;
+use std::io::Write;
use anyhow::anyhow;
+use anyhow::Context;
use base::error;
use base::info;
+use base::with_as_descriptor;
use base::AsRawDescriptor;
#[cfg(feature = "swap")]
use base::AsRawDescriptors;
@@ -22,6 +29,7 @@
use remain::sorted;
use serde::Deserialize;
use serde::Serialize;
+use tempfile::tempfile;
use thiserror::Error;
use crate::bus::ConfigWriteResult;
@@ -50,6 +58,63 @@
pub type Result<T> = std::result::Result<T, Error>;
+/// Wrapper for sending snapshots to and receiving snapshots from proxied devices using a file
+/// to handle the case of snapshot being potentially too large to send across a Tube in a single
+/// message.
+#[derive(Debug, Serialize, Deserialize)]
+struct SnapshotFile {
+ #[serde(with = "with_as_descriptor")]
+ file: File,
+}
+
+impl SnapshotFile {
+ fn new() -> anyhow::Result<SnapshotFile> {
+ Ok(SnapshotFile {
+ file: tempfile().context("failed to create snasphot wrapper tempfile")?,
+ })
+ }
+
+ fn from_data(data: serde_json::Value) -> anyhow::Result<SnapshotFile> {
+ let mut snapshot = SnapshotFile::new()?;
+ snapshot.write(data)?;
+ Ok(snapshot)
+ }
+
+ fn read(&mut self) -> anyhow::Result<serde_json::Value> {
+ let data: serde_json::Value = {
+ let mut reader = BufReader::new(&self.file);
+
+ serde_json::from_reader(&mut reader)
+ .context("failed to read snapshot data from snapshot temp file")?
+ };
+
+ self.file
+ .rewind()
+ .context("failed to rewind snapshot temp file after read")?;
+
+ Ok(data)
+ }
+
+ fn write(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
+ {
+ let mut writer = BufWriter::new(&self.file);
+
+ serde_json::to_writer(&mut writer, &data)
+ .context("failed to write data to snasphot temp file")?;
+
+ writer
+ .flush()
+ .context("failed to flush data to snapshot temp file")?;
+ }
+
+ self.file
+ .rewind()
+ .context("failed to rewind snapshot temp file after write")?;
+
+ Ok(())
+ }
+}
+
#[derive(Debug, Serialize, Deserialize)]
enum Command {
Activate,
@@ -82,13 +147,18 @@
DestroyDevice,
Shutdown,
GetRanges,
- Snapshot,
+ Snapshot {
+ // NOTE: the SnapshotFile is created by the parent and sent to the child proxied device
+ // as the jailed child may not have permission to create a temp file.
+ snapshot: SnapshotFile,
+ },
Restore {
- data: serde_json::Value,
+ snapshot: SnapshotFile,
},
Sleep,
Wake,
}
+
#[derive(Debug, Serialize, Deserialize)]
enum CommandResult {
Ok,
@@ -104,7 +174,7 @@
InitPciConfigMappingResult(bool),
ReadVirtualConfigResult(u32),
GetRangesResult(Vec<(BusRange, BusType)>),
- SnapshotResult(std::result::Result<serde_json::Value, String>),
+ SnapshotResult(std::result::Result<SnapshotFile, String>),
RestoreResult(std::result::Result<(), String>),
SleepResult(std::result::Result<(), String>),
WakeResult(std::result::Result<(), String>),
@@ -215,14 +285,17 @@
let ranges = device.get_ranges();
tube.send(&CommandResult::GetRangesResult(ranges))
}
- Command::Snapshot => {
- let res = device.snapshot();
+ Command::Snapshot { mut snapshot } => {
+ let res = device.snapshot().and_then(|data| {
+ snapshot.write(data)?;
+ Ok(snapshot)
+ });
tube.send(&CommandResult::SnapshotResult(
res.map_err(|e| e.to_string()),
))
}
- Command::Restore { data } => {
- let res = device.restore(data);
+ Command::Restore { mut snapshot } => {
+ let res = snapshot.read().and_then(|data| device.restore(data));
tube.send(&CommandResult::RestoreResult(
res.map_err(|e| e.to_string()),
))
@@ -532,9 +605,11 @@
impl Suspendable for ProxyDevice {
fn snapshot(&mut self) -> anyhow::Result<serde_json::Value> {
- let res = self.sync_send(&Command::Snapshot);
+ let res = self.sync_send(&Command::Snapshot {
+ snapshot: SnapshotFile::new()?,
+ });
match res {
- Some(CommandResult::SnapshotResult(Ok(snap))) => Ok(snap),
+ Some(CommandResult::SnapshotResult(Ok(mut snapshot))) => snapshot.read(),
Some(CommandResult::SnapshotResult(Err(e))) => Err(anyhow!(
"failed to snapshot {}: {:#}",
self.debug_label(),
@@ -545,7 +620,9 @@
}
fn restore(&mut self, data: serde_json::Value) -> anyhow::Result<()> {
- let res = self.sync_send(&Command::Restore { data });
+ let res = self.sync_send(&Command::Restore {
+ snapshot: SnapshotFile::from_data(data)?,
+ });
match res {
Some(CommandResult::RestoreResult(Ok(()))) => Ok(()),
Some(CommandResult::RestoreResult(Err(e))) => {
diff --git a/devices/src/sys/windows/serial_device.rs b/devices/src/sys/windows/serial_device.rs
index 061a564..90f8268 100644
--- a/devices/src/sys/windows/serial_device.rs
+++ b/devices/src/sys/windows/serial_device.rs
@@ -53,20 +53,18 @@
match ¶m.path {
None => Err(Error::PathRequired),
Some(path) => {
- // We must create this pipe in non-blocking mode because a blocking
- // read in one thread will block a write in another thread having a
- // handle to the same end of the pipe, which will hang the
- // emulator. This does mean that the event loop writing to the
- // pipe's output will need to swallow errors caused by writing to
- // the pipe when it's not ready; but in practice this does not seem
- // to cause a problem.
+ // Note that when this pipe is not connected, the serial device will
+ // discard output. If the pipe's buffer is allowed to fill, writes
+ // will block, which will stall the output queue. This generally
+ // points to a bug in the named pipe consumer, and if desired we
+ // could address it in CrosVM by adding a write timeout.
let pipe_in = named_pipes::create_server_pipe(
path.to_str().unwrap(),
&FramingMode::Byte,
- &BlockingMode::NoWait,
- 0, // default timeout
+ &BlockingMode::Wait,
+ /* timeout= */ 0,
named_pipes::DEFAULT_BUFFER_SIZE,
- false,
+ /* overlapped= */ true,
)
.map_err(Error::SystemTypeError)?;
diff --git a/devices/src/usb/xhci/xhci_controller.rs b/devices/src/usb/xhci/xhci_controller.rs
index 4ce2f3c..426e764 100644
--- a/devices/src/usb/xhci/xhci_controller.rs
+++ b/devices/src/usb/xhci/xhci_controller.rs
@@ -189,15 +189,7 @@
resources: &mut SystemAllocator,
) -> Result<PciAddress, PciDeviceError> {
if self.pci_address.is_none() {
- self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
- Some(Alloc::PciBar {
- bus,
- dev,
- func,
- bar: _,
- }) => Some(PciAddress { bus, dev, func }),
- _ => None,
- }
+ self.pci_address = resources.allocate_pci(0, self.debug_label());
}
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
}
diff --git a/devices/src/virtio/block/asynchronous.rs b/devices/src/virtio/block/asynchronous.rs
index 511de98..158d601 100644
--- a/devices/src/virtio/block/asynchronous.rs
+++ b/devices/src/virtio/block/asynchronous.rs
@@ -278,7 +278,6 @@
flush_timer: &RefCell<TimerAsync<Timer>>,
flush_timer_armed: &RefCell<bool>,
) {
- let _trace = cros_tracing::trace_event!(VirtioBlk, "process_one_chain");
let len = match process_one_request(&mut avail_desc, disk_state, flush_timer, flush_timer_armed)
.await
{
@@ -822,7 +821,6 @@
let offset = sector
.checked_shl(u32::from(SECTOR_SHIFT))
.ok_or(ExecuteError::OutOfRange)?;
- let _trace = cros_tracing::trace_event!(VirtioBlk, "in", offset, data_len);
check_range(offset, data_len as u64, disk_size)?;
let disk_image = &disk_state.disk_image;
writer
@@ -842,7 +840,6 @@
let offset = sector
.checked_shl(u32::from(SECTOR_SHIFT))
.ok_or(ExecuteError::OutOfRange)?;
- let _trace = cros_tracing::trace_event!(VirtioBlk, "out", offset, data_len);
check_range(offset, data_len as u64, disk_size)?;
let disk_image = &disk_state.disk_image;
reader
@@ -865,12 +862,6 @@
}
}
VIRTIO_BLK_T_DISCARD | VIRTIO_BLK_T_WRITE_ZEROES => {
- #[allow(clippy::if_same_then_else)]
- let _trace = if req_type == VIRTIO_BLK_T_DISCARD {
- cros_tracing::trace_event!(VirtioBlk, "discard")
- } else {
- cros_tracing::trace_event!(VirtioBlk, "write_zeroes")
- };
if req_type == VIRTIO_BLK_T_DISCARD && !disk_state.sparse {
// Discard is a hint; if this is a non-sparse disk, just ignore it.
return Ok(());
@@ -926,7 +917,6 @@
}
}
VIRTIO_BLK_T_FLUSH => {
- let _trace = cros_tracing::trace_event!(VirtioBlk, "flush");
disk_state
.disk_image
.fdatasync()
@@ -942,7 +932,6 @@
}
}
VIRTIO_BLK_T_GET_ID => {
- let _trace = cros_tracing::trace_event!(VirtioBlk, "get_id");
if let Some(id) = disk_state.id {
writer.write_all(&id).map_err(ExecuteError::CopyId)?;
} else {
diff --git a/devices/src/virtio/console.rs b/devices/src/virtio/console.rs
index 52d48c8..4aac9c0 100644
--- a/devices/src/virtio/console.rs
+++ b/devices/src/virtio/console.rs
@@ -153,7 +153,6 @@
#[cfg(windows)]
use base::windows::named_pipes;
use tempfile::tempfile;
- use vm_memory::GuestAddress;
use super::*;
use crate::suspendable_virtio_tests;
diff --git a/devices/src/virtio/fs/mod.rs b/devices/src/virtio/fs/mod.rs
index 3886672..8a32b49 100644
--- a/devices/src/virtio/fs/mod.rs
+++ b/devices/src/virtio/fs/mod.rs
@@ -122,6 +122,7 @@
queue_sizes: Box<[u16]>,
avail_features: u64,
acked_features: u64,
+ use_dax: bool,
pci_bar: Option<Alloc>,
tube: Option<Tube>,
workers: Vec<WorkerThread<Result<()>>>,
@@ -152,6 +153,9 @@
// There is always a high priority queue in addition to the request queues.
let num_queues = num_workers + 1;
+ // TODO(b/176129399): Remove cfg! once DAX is supported on ARM.
+ let use_dax = cfg!(target_arch = "x86_64") && fs.cfg().use_dax;
+
Ok(Fs {
cfg,
tag: tag.to_string(),
@@ -159,6 +163,7 @@
queue_sizes: vec![QUEUE_SIZE; num_queues].into_boxed_slice(),
avail_features: base_features,
acked_features: 0,
+ use_dax,
pci_bar: None,
tube: Some(tube),
workers: Vec::with_capacity(num_workers + 1),
@@ -223,19 +228,15 @@
}
let fs = self.fs.take().expect("missing file system implementation");
- let use_dax = fs.cfg().use_dax;
let server = Arc::new(Server::new(fs));
let socket = self.tube.take().expect("missing mapping socket");
let mut slot = 0;
// Set up shared memory for DAX.
- // TODO(b/176129399): Remove cfg! once DAX is supported on ARM.
- if cfg!(target_arch = "x86_64") && use_dax {
+ if let Some(pci_bar) = self.pci_bar {
// Create the shared memory region now before we start processing requests.
- let request = FsMappingRequest::AllocateSharedMemoryRegion(
- self.pci_bar.as_ref().cloned().expect("No pci_bar"),
- );
+ let request = FsMappingRequest::AllocateSharedMemoryRegion(pci_bar);
socket
.send(&request)
.expect("failed to send allocation message");
@@ -276,7 +277,7 @@
}
fn get_device_bars(&mut self, address: PciAddress) -> Vec<PciBarConfiguration> {
- if self.fs.as_ref().map_or(false, |fs| !fs.cfg().use_dax) {
+ if !self.use_dax {
return vec![];
}
@@ -296,7 +297,7 @@
}
fn get_device_caps(&self) -> Vec<Box<dyn PciCapability>> {
- if self.fs.as_ref().map_or(false, |fs| !fs.cfg().use_dax) {
+ if !self.use_dax {
return vec![];
}
diff --git a/devices/src/virtio/fs/passthrough.rs b/devices/src/virtio/fs/passthrough.rs
index a077ec1..e2c6bef 100644
--- a/devices/src/virtio/fs/passthrough.rs
+++ b/devices/src/virtio/fs/passthrough.rs
@@ -1506,9 +1506,12 @@
#[cfg(feature = "arc_quota")]
let st = stat(&*data)?;
+ #[cfg(feature = "arc_quota")]
+ let ctx_uid = self.lookup_host_uid(&ctx, inode);
+
// Only privleged uid can perform FS_IOC_SETFLAGS through cryptohome.
#[cfg(feature = "arc_quota")]
- if ctx.uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx.uid) {
+ if ctx_uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx_uid) {
// Get the current flag.
let mut buf = MaybeUninit::<c_int>::zeroed();
// SAFETY: the kernel will only write to `buf` and we check the return value.
@@ -2042,6 +2045,24 @@
};
Ok(res)
}
+
+ /// Looks up the host uid according to the path of file that inode is referring to.
+ fn lookup_host_uid(&self, ctx: &Context, inode: Inode) -> u32 {
+ if let Ok(inode_data) = self.find_inode(inode) {
+ let path = &inode_data.path;
+ for perm_data in self
+ .permission_paths
+ .read()
+ .expect("acquire permission_paths read lock")
+ .iter()
+ {
+ if perm_data.need_set_permission(path) {
+ return perm_data.host_uid;
+ }
+ }
+ }
+ ctx.uid
+ }
}
/// Decrements the refcount of the inode.
diff --git a/devices/src/virtio/gpu/parameters.rs b/devices/src/virtio/gpu/parameters.rs
index 783437f..6035738 100644
--- a/devices/src/virtio/gpu/parameters.rs
+++ b/devices/src/virtio/gpu/parameters.rs
@@ -6,6 +6,7 @@
#[cfg(windows)]
use std::marker::PhantomData;
+use std::path::PathBuf;
use serde::Deserialize;
use serde::Deserializer;
@@ -89,6 +90,9 @@
pub allow_implicit_render_server_exec: bool,
// Passthrough parameters sent to the underlying renderer in a renderer-specific format.
pub renderer_features: Option<String>,
+ // When running with device sandboxing, the path of a directory available for
+ // scratch space.
+ pub snapshot_scratch_path: Option<PathBuf>,
}
impl Default for GpuParameters {
@@ -120,6 +124,7 @@
fixed_blob_mapping: cfg!(target_os = "linux") && !cfg!(feature = "gfxstream"),
allow_implicit_render_server_exec: false,
renderer_features: None,
+ snapshot_scratch_path: None,
}
}
}
diff --git a/devices/src/virtio/input/mod.rs b/devices/src/virtio/input/mod.rs
index edaa88b..56233c3 100644
--- a/devices/src/virtio/input/mod.rs
+++ b/devices/src/virtio/input/mod.rs
@@ -954,6 +954,7 @@
#[cfg(test)]
mod tests {
+ use defaults::new_keyboard_config;
use tempfile::TempDir;
use super::*;
@@ -1027,4 +1028,20 @@
let expected_ev_led_bitmap = &virtio_input_bitmap::from_bits(&[0, 1, 2]);
assert_eq!(ev_led_bitmap, expected_ev_led_bitmap);
}
+
+ // Test the example custom device config file
+ // (tests/data/input/example_custom_input_config.json) provides the same supported events as
+ // default keyboard's supported events.
+ #[test]
+ fn example_custom_config_file_events_eq_default_keyboard_events() {
+ let temp_file = TempDir::new().unwrap();
+ let path = temp_file.path().join("test.json");
+ let test_json = include_str!("../../../tests/data/input/example_custom_input_config.json");
+ fs::write(&path, test_json).expect("Unable to write test file");
+
+ let keyboard_supported_events = new_keyboard_config(0).supported_events;
+ let custom_supported_events = parse_input_config_file(&path, 0).unwrap().supported_events;
+
+ assert_eq!(keyboard_supported_events, custom_supported_events);
+ }
}
diff --git a/devices/src/virtio/queue/split_queue.rs b/devices/src/virtio/queue/split_queue.rs
index 187b360..fe53ceb 100644
--- a/devices/src/virtio/queue/split_queue.rs
+++ b/devices/src/virtio/queue/split_queue.rs
@@ -129,7 +129,14 @@
features: config.acked_features(),
next_avail: config.next_avail(),
next_used: config.next_used(),
- last_used: config.next_used(),
+
+ // WARNING: last_used controls interrupt suppression
+ // (VIRTIO_RING_F_EVENT_IDX). The only safe value initial value is
+ // zero (unless restoring a snapshot and the value that was stored
+ // on the device is known; however we do not bother with that in our
+ // snapshot system since it is much simpler to just use the zero
+ // value and send a potentially spurious interrupt on restore).
+ last_used: Wrapping(0),
})
}
@@ -147,9 +154,30 @@
.mem
.read_obj_from_addr_volatile(used_index_addr)
.unwrap();
- // We assume the vhost-user backend sent interrupts for any descriptors it marked used
- // before it stopped processing the queue, so `last_used == next_used`.
- self.last_used = self.next_used;
+
+ // Since the backend has not told us what its actual last_used value
+ // was, we have to assume that an interrupt must be sent when next
+ // available descriptor is used, so we set this to zero.
+ //
+ // But wait, one might ask, why can't we just assume the vhost-user
+ // backend has already sent interrupts for any descriptors it marked
+ // used before it stopped processing the queue? Then we could just
+ // initialize last_used as `last_used == next_used`, which would skip
+ // spurious interrupts and be more efficient. Right?
+ //
+ // If VIRTIO_RING_F_EVENT_IDX is enabled, then no. The reason is the
+ // device could be in an interrupt suppressed state and so it may indeed
+ // have marked some descriptors used, but not yet sent an interrupt for
+ // them. Once we set last_used = next_used, no interrupts will be sent
+ // to the driver until the driver updates next_used (see
+ // queue_wants_interrupt for details), but the driver will
+ // never wake up the device isn't sending any interrupts. Thus, the
+ // device stalls.
+ //
+ // NOTE: this value is not used by the snapshot/restore process, but we
+ // still want to pick a reasonable value here in case it is used in the
+ // future.
+ self.last_used = Wrapping(0);
}
pub fn next_avail_to_process(&self) -> u16 {
diff --git a/devices/src/virtio/vhost/user/device/fs/sys/linux.rs b/devices/src/virtio/vhost/user/device/fs/sys/linux.rs
index c9989b7..d51e4ae 100644
--- a/devices/src/virtio/vhost/user/device/fs/sys/linux.rs
+++ b/devices/src/virtio/vhost/user/device/fs/sys/linux.rs
@@ -14,7 +14,6 @@
use cros_async::Executor;
use jail::create_base_minijail;
use jail::create_base_minijail_without_pivot_root;
-use jail::set_embedded_bpf_program;
use minijail::Minijail;
use crate::virtio::vhost::user::device::fs::FsBackend;
@@ -85,7 +84,7 @@
// vvu locks around 512k memory. Just give 1M.
j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;
#[cfg(not(feature = "seccomp_trace"))]
- set_embedded_bpf_program(&mut j, "fs_device_vhost_user")?;
+ jail::set_embedded_bpf_program(&mut j, "fs_device_vhost_user")?;
j.use_seccomp_filter();
j
};
diff --git a/devices/src/virtio/vhost/user/device/gpu/sys/linux.rs b/devices/src/virtio/vhost/user/device/gpu/sys/linux.rs
index f86d207..83a1148 100644
--- a/devices/src/virtio/vhost/user/device/gpu/sys/linux.rs
+++ b/devices/src/virtio/vhost/user/device/gpu/sys/linux.rs
@@ -199,7 +199,7 @@
ex.spawn_blocking(move || match listener.accept() {
Ok(stream) => resource_bridges
.lock()
- .push(Tube::new_from_unix_seqpacket(stream).unwrap()),
+ .push(Tube::try_from(stream).unwrap()),
Err(e) => {
let path = listener
.path()
diff --git a/devices/src/virtio/vhost/user/device/handler.rs b/devices/src/virtio/vhost/user/device/handler.rs
index 3161e35..0cc253a 100644
--- a/devices/src/virtio/vhost/user/device/handler.rs
+++ b/devices/src/virtio/vhost/user/device/handler.rs
@@ -501,7 +501,7 @@
}
fn set_vring_base(&mut self, index: u32, base: u32) -> VhostResult<()> {
- if index as usize >= self.vrings.len() || base >= Queue::MAX_SIZE.into() {
+ if index as usize >= self.vrings.len() {
return Err(VhostError::InvalidParam);
}
diff --git a/devices/src/virtio/vhost/user/device/wl.rs b/devices/src/virtio/vhost/user/device/wl.rs
index a2468c9..207f90b 100644
--- a/devices/src/virtio/vhost/user/device/wl.rs
+++ b/devices/src/virtio/vhost/user/device/wl.rs
@@ -360,7 +360,7 @@
let deadline = Instant::now() + Duration::from_secs(5);
loop {
match UnixSeqpacket::connect(&p) {
- Ok(s) => return Ok(Tube::new_from_unix_seqpacket(s).unwrap()),
+ Ok(s) => return Ok(Tube::try_from(s).unwrap()),
Err(e) => {
if Instant::now() < deadline {
thread::sleep(Duration::from_millis(50));
diff --git a/devices/src/virtio/virtio_device.rs b/devices/src/virtio/virtio_device.rs
index 4449b15..9c481b9 100644
--- a/devices/src/virtio/virtio_device.rs
+++ b/devices/src/virtio/virtio_device.rs
@@ -11,15 +11,14 @@
use base::Protection;
use base::RawDescriptor;
use hypervisor::MemCacheType;
+use resources::AddressRange;
use vm_control::VmMemorySource;
-use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use super::*;
use crate::pci::MsixStatus;
use crate::pci::PciAddress;
use crate::pci::PciBarConfiguration;
-use crate::pci::PciBarIndex;
use crate::pci::PciCapability;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -175,16 +174,6 @@
Some(sdts)
}
- /// Reads from a BAR region mapped in to the device.
- /// * `addr` - The guest address inside the BAR.
- /// * `data` - Filled with the data from `addr`.
- fn read_bar(&mut self, _bar_index: PciBarIndex, _offset: u64, _data: &mut [u8]) {}
-
- /// Writes to a BAR region mapped in to the device.
- /// * `addr` - The guest address inside the BAR.
- /// * `data` - The data to write.
- fn write_bar(&mut self, _bar_index: PciBarIndex, _offset: u64, _data: &[u8]) {}
-
/// Returns the PCI address where the device will be allocated.
/// Returns `None` if any address is good for the device.
fn pci_address(&self) -> Option<PciAddress> {
@@ -218,13 +207,11 @@
/// before `activate`.
fn set_shared_memory_mapper(&mut self, _mapper: Box<dyn SharedMemoryMapper>) {}
- /// Provides the base address of the shared memory region, if one is present. Will
+ /// Provides the guest address range of the shared memory region, if one is present. Will
/// be called before `activate`.
- ///
- /// NOTE: Mappings in shared memory regions should be accessed via offset, rather
- /// than via raw guest physical address. This function is only provided so
- /// devices can remain backwards compatible with older drivers.
- fn set_shared_memory_region_base(&mut self, _addr: GuestAddress) {}
+ fn set_shared_memory_region(&mut self, shmem_region: AddressRange) {
+ let _ = shmem_region;
+ }
/// Queries the implementation whether a single prepared hypervisor memory mapping with explicit
/// caching type should be setup lazily on first mapping request, or whether to dynamically
@@ -304,6 +291,7 @@
use super::*;
fn memory() -> GuestMemory {
+ use vm_memory::GuestAddress;
GuestMemory::new(&[(GuestAddress(0u64), 4 * 1024 * 1024)])
.expect("Creating guest memory failed.")
}
diff --git a/devices/src/virtio/virtio_pci_device.rs b/devices/src/virtio/virtio_pci_device.rs
index ce87b49..52fe200 100644
--- a/devices/src/virtio/virtio_pci_device.rs
+++ b/devices/src/virtio/virtio_pci_device.rs
@@ -26,6 +26,7 @@
use libc::ERANGE;
#[cfg(target_arch = "x86_64")]
use metrics::MetricEventType;
+use resources::AddressRange;
use resources::Alloc;
use resources::AllocOptions;
use resources::SystemAllocator;
@@ -43,7 +44,6 @@
use vm_control::VmMemoryDestination;
use vm_control::VmMemoryRegionId;
use vm_control::VmMemorySource;
-use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use zerocopy::AsBytes;
use zerocopy::FromBytes;
@@ -725,28 +725,12 @@
) -> std::result::Result<PciAddress, PciDeviceError> {
if self.pci_address.is_none() {
if let Some(address) = self.preferred_address {
- if !resources.reserve_pci(
- Alloc::PciBar {
- bus: address.bus,
- dev: address.dev,
- func: address.func,
- bar: 0,
- },
- self.debug_label(),
- ) {
+ if !resources.reserve_pci(address, self.debug_label()) {
return Err(PciDeviceError::PciAllocationFailed);
}
self.pci_address = Some(address);
} else {
- self.pci_address = match resources.allocate_pci(0, self.debug_label()) {
- Some(Alloc::PciBar {
- bus,
- dev,
- func,
- bar: _,
- }) => Some(PciAddress { bus, dev, func }),
- _ => None,
- }
+ self.pci_address = resources.allocate_pci(0, self.debug_label());
}
}
self.pci_address.ok_or(PciDeviceError::PciAllocationFailed)
@@ -914,8 +898,6 @@
}
_ => (),
}
- } else {
- self.device.read_bar(bar_index, offset, data);
}
}
@@ -969,8 +951,6 @@
}
_ => (),
}
- } else {
- self.device.write_bar(bar_index, offset, data);
}
if !self.device_activated && self.is_driver_ready() {
@@ -1151,9 +1131,11 @@
.get_shared_memory_region()
.is_some()
{
+ let shmem_region = AddressRange::from_start_and_size(ranges[0].addr, ranges[0].size)
+ .expect("invalid shmem region");
virtio_pci_device
.device
- .set_shared_memory_region_base(GuestAddress(ranges[0].addr));
+ .set_shared_memory_region(shmem_region);
}
Ok(ranges)
diff --git a/devices/src/virtio/wl.rs b/devices/src/virtio/wl.rs
index ecf4f4e..5c7405c 100644
--- a/devices/src/virtio/wl.rs
+++ b/devices/src/virtio/wl.rs
@@ -126,7 +126,6 @@
use rutabaga_gfx::RUTABAGA_MAP_CACHE_MASK;
use thiserror::Error as ThisError;
use vm_control::VmMemorySource;
-use vm_memory::GuestAddress;
use vm_memory::GuestMemory;
use vm_memory::GuestMemoryError;
use zerocopy::AsBytes;
@@ -2096,8 +2095,8 @@
})
}
- fn set_shared_memory_region_base(&mut self, shmem_base: GuestAddress) {
- self.address_offset = Some(shmem_base.0);
+ fn set_shared_memory_region(&mut self, shmem_region: AddressRange) {
+ self.address_offset = Some(shmem_region.start);
}
fn set_shared_memory_mapper(&mut self, mapper: Box<dyn SharedMemoryMapper>) {
diff --git a/devices/tests/data/input/example_custom_input_config.json b/devices/tests/data/input/example_custom_input_config.json
new file mode 100644
index 0000000..da685ea
--- /dev/null
+++ b/devices/tests/data/input/example_custom_input_config.json
@@ -0,0 +1,138 @@
+{
+ "name": "Virtio Custom Test",
+ "serial_name": "virtio-custom-test",
+ "events": [
+ {
+ "event_type": "EV_KEY",
+ "event_type_code": 1,
+ "supported_events": {
+ "KEY_ESC": 1,
+ "KEY_1": 2,
+ "KEY_2": 3,
+ "KEY_3": 4,
+ "KEY_4": 5,
+ "KEY_5": 6,
+ "KEY_6": 7,
+ "KEY_7": 8,
+ "KEY_8": 9,
+ "KEY_9": 10,
+ "KEY_0": 11,
+ "KEY_MINUS": 12,
+ "KEY_EQUAL": 13,
+ "KEY_BACKSPACE": 14,
+ "KEY_TAB": 15,
+ "KEY_Q": 16,
+ "KEY_W": 17,
+ "KEY_E": 18,
+ "KEY_R": 19,
+ "KEY_T": 20,
+ "KEY_Y": 21,
+ "KEY_U": 22,
+ "KEY_I": 23,
+ "KEY_O": 24,
+ "KEY_P": 25,
+ "KEY_LEFTBRACE": 26,
+ "KEY_RIGHTBRACE": 27,
+ "KEY_ENTER": 28,
+ "KEY_LEFTCTRL": 29,
+ "KEY_A": 30,
+ "KEY_S": 31,
+ "KEY_D": 32,
+ "KEY_F": 33,
+ "KEY_G": 34,
+ "KEY_H": 35,
+ "KEY_J": 36,
+ "KEY_K": 37,
+ "KEY_L": 38,
+ "KEY_SEMICOLON": 39,
+ "KEY_APOSTROPHE": 40,
+ "KEY_GRAVE": 41,
+ "KEY_LEFTSHIFT": 42,
+ "KEY_BACKSLASH": 43,
+ "KEY_Z": 44,
+ "KEY_X": 45,
+ "KEY_C": 46,
+ "KEY_V": 47,
+ "KEY_B": 48,
+ "KEY_N": 49,
+ "KEY_M": 50,
+ "KEY_COMMA": 51,
+ "KEY_DOT": 52,
+ "KEY_SLASH": 53,
+ "KEY_RIGHTSHIFT": 54,
+ "KEY_KPASTERISK": 55,
+ "KEY_LEFTALT": 56,
+ "KEY_SPACE": 57,
+ "KEY_CAPSLOCK": 58,
+ "KEY_F1": 59,
+ "KEY_F2": 60,
+ "KEY_F3": 61,
+ "KEY_F4": 62,
+ "KEY_F5": 63,
+ "KEY_F6": 64,
+ "KEY_F7": 65,
+ "KEY_F8": 66,
+ "KEY_F9": 67,
+ "KEY_F10": 68,
+ "KEY_NUMLOCK": 69,
+ "KEY_SCROLLLOCK": 70,
+ "KEY_KP7": 71,
+ "KEY_KP8": 72,
+ "KEY_KP9": 73,
+ "KEY_KPMINUS": 74,
+ "KEY_KP4": 75,
+ "KEY_KP5": 76,
+ "KEY_KP6": 77,
+ "KEY_KPPLUS": 78,
+ "KEY_KP1": 79,
+ "KEY_KP2": 80,
+ "KEY_KP3": 81,
+ "KEY_KP0": 82,
+ "KEY_KPDOT": 83,
+ "KEY_F11": 87,
+ "KEY_F12": 88,
+ "KEY_KPENTER": 96,
+ "KEY_RIGHTCTRL": 97,
+ "KEY_KPSLASH": 98,
+ "KEY_SYSRQ": 99,
+ "KEY_RIGHTALT": 100,
+ "KEY_HOME": 102,
+ "KEY_UP": 103,
+ "KEY_PAGEUP": 104,
+ "KEY_LEFT": 105,
+ "KEY_RIGHT": 106,
+ "KEY_END": 107,
+ "KEY_DOWN": 108,
+ "KEY_PAGEDOWN": 109,
+ "KEY_INSERT": 110,
+ "KEY_DELETE": 111,
+ "KEY_MUTE": 113,
+ "KEY_VOLUMEDOWN": 114,
+ "KEY_VOLUMEUP": 115,
+ "KEY_POWER": 116,
+ "KEY_PAUSE": 119,
+ "KEY_MENU": 139,
+ "KEY_BACK": 158,
+ "KEY_HOMEPAGE": 172,
+ "KEY_PRINT": 210
+ }
+ },
+ {
+ "event_type": "EV_REP",
+ "event_type_code": 20,
+ "supported_events": {
+ "REP_DELAY": 0,
+ "REP_PERIOD": 1
+ }
+ },
+ {
+ "event_type": "EV_LED",
+ "event_type_code": 17,
+ "supported_events": {
+ "LED_NUML": 0,
+ "LED_CAPSL": 1,
+ "LED_SCROLLL": 2
+ }
+ }
+ ]
+}
diff --git a/docs/book/book.toml b/docs/book/book.toml
index ec46cac..39fbf54 100644
--- a/docs/book/book.toml
+++ b/docs/book/book.toml
@@ -20,3 +20,4 @@
"testing.html" = "testing/index.html"
[output.linkcheck]
+warning-policy = "error"
diff --git a/docs/book/src/SUMMARY.md b/docs/book/src/SUMMARY.md
index 03612a2..7221f60 100644
--- a/docs/book/src/SUMMARY.md
+++ b/docs/book/src/SUMMARY.md
@@ -22,7 +22,9 @@
- [SCSI (experimental)](./devices/scsi.md)
- [Fs](./devices/fs.md)
- [Vsock](./devices/vsock.md)
- - [Pmem](./devices/pmem.md)
+ - [Pmem](./devices/pmem/README.md)
+ - [VirtIO Pmem](./devices/pmem/basic.md)
+ - [Sharing host directory with virtio-pmem (experimental)](./devices/pmem/pmem_ext2.md)
- [USB](./devices/usb.md)
- [Wayland](./devices/wayland.md)
- [Video (experimental)](./devices/video.md)
diff --git a/docs/book/src/appendix/memory_layout.md b/docs/book/src/appendix/memory_layout.md
index 33b82f0..92ea05f 100644
--- a/docs/book/src/appendix/memory_layout.md
+++ b/docs/book/src/appendix/memory_layout.md
@@ -65,7 +65,7 @@
| [`AARCH64_PCI_CAM_BASE_DEFAULT`] | `1_0000` | `101_0000` | 16 MiB | PCI configuration (CAM) |
| [`AARCH64_VIRTFREQ_BASE`] | `104_0000` | `105_0000` | 64 KiB | Virtual cpufreq device |
| [`AARCH64_PVTIME_IPA_START`] | `1ff_0000` | `200_0000` | 64 KiB | Paravirtualized time |
-| [`AARCH64_PCI_CAM_BASE_DEFAULT`] | `200_0000` | `400_0000` | 32 MiB | Low MMIO allocation area |
+| [`AARCH64_PCI_MEM_BASE_DEFAULT`] | `200_0000` | `400_0000` | 32 MiB | Low MMIO allocation area |
| [`AARCH64_GIC_CPUI_BASE`] | `3ffd_0000` | `3fff_0000` | 128 KiB | vGIC |
| [`AARCH64_GIC_DIST_BASE`] | `3fff_0000` | `4000_0000` | 64 KiB | vGIC |
| [`AARCH64_PROTECTED_VM_FW_START`] | `7fc0_0000` | `8000_0000` | 4 MiB | pVM firmware (if running a protected VM) |
@@ -105,9 +105,9 @@
[serial_addr]: https://crsrc.org/o/src/platform/crosvm/arch/src/serial.rs;l=78?q=SERIAL_ADDR
[`aarch64_rtc_addr`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=177?q=AARCH64_RTC_ADDR
[`aarch64_vmwdt_addr`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=187?q=AARCH64_VMWDT_ADDR
-[`aarch64_pci_cfg_base`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=192?q=AARCH64_PCI_CAM_BASE_DEFAULT
[`aarch64_virtfreq_base`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=207?q=AARCH64_VIRTFREQ_BASE
-[`aarch64_mmio_base`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=196?q=AARCH64_PCI_CAM_BASE_DEFAULT
+[`aarch64_pci_cam_base_default`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=154?q=AARCH64_PCI_CAM_BASE_DEFAULT
+[`aarch64_pci_mem_base_default`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=154?q=AARCH64_PCI_MEM_BASE_DEFAULT
[`aarch64_gic_cpui_base`]: https://crsrc.org/o/src/platform/crosvm/devices/src/irqchip/kvm/aarch64.rs;l=106?q=AARCH64_GIC_CPUI_BASE
[`aarch64_gic_dist_base`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=105?q=AARCH64_GIC_DIST_BASE
[`aarch64_pvtime_ipa_start`]: https://crsrc.org/o/src/platform/crosvm/aarch64/src/lib.rs;l=100?q=AARCH64_PVTIME_IPA_START
diff --git a/docs/book/src/devices/index.md b/docs/book/src/devices/index.md
index 80caf45..0ae602b 100644
--- a/docs/book/src/devices/index.md
+++ b/docs/book/src/devices/index.md
@@ -77,7 +77,7 @@
[`iommu`]: https://chromium.googlesource.com/crosvm/crosvm/+/refs/heads/main/devices/src/virtio/iommu.rs
[`net`]: net.md
[`p9`]: https://chromium.googlesource.com/crosvm/crosvm/+/refs/heads/main/devices/src/virtio/p9.rs
-[`pmem`]: pmem.md
+[`pmem`]: pmem/README.md
[`rng`]: https://chromium.googlesource.com/crosvm/crosvm/+/refs/heads/main/devices/src/virtio/rng.rs
[`scsi`]: scsi.md
[`serial`]: https://chromium.googlesource.com/crosvm/crosvm/+/refs/heads/main/devices/src/serial.rs
diff --git a/docs/book/src/devices/input.md b/docs/book/src/devices/input.md
index 3631abe..7673833 100644
--- a/docs/book/src/devices/input.md
+++ b/docs/book/src/devices/input.md
@@ -186,7 +186,7 @@
Add a custom virtio-input device.
- `path` (required): path to event source socket
-- `config_path` (required): path to file configuring device
+- `config-path` (required): path to file configuring device
```sh
crosvm run \
@@ -198,42 +198,14 @@
events. "name" defines the customized device name, "serial" defines customized serial name. The
properties and axis info are yet to be supported.
-Here is an example of event config file:
+You can find an example config JSON from
+[`/devices/tests/data/input/example_custom_input_config.json`](https://chromium.googlesource.com/crosvm/crosvm/+/refs/heads/main/devices/tests/data/input/example_custom_input_config.json).
+It configs the same supported events as keyboard's supported events(`default_keyboard_events` in
+[`devices/src/virtio/input/defaults.rs`](https://chromium.googlesource.com/crosvm/crosvm/+/refs/heads/main/devices/src/virtio/input/defaults.rs#320)).
+Here is a portion of the example config file:
```
-{
- "name": "Virtio Custom",
- "serial_name": "virtio-custom",
- "events": [
- {
- "event_type": "EV_KEY",
- "event_type_code": 1,
- "supported_events": {
- "KEY_ESC": 1,
- "KEY_1": 2,
- "KEY_2": 3,
- "KEY_A": 30,
- "KEY_B": 48,
- "KEY_SPACE": 57
- }
- },
- {
- "event_type": "EV_REP",
- "event_type_code": 20,
- "supported_events": {
- "REP_DELAY": 0,
- "REP_PERIOD": 1
- }
- },
- {
- "event_type": "EV_LED",
- "event_type_code": 17,
- "supported_events": {
- "LED_NUML": 0,
- "LED_CAPSL": 1,
- "LED_SCROLLL": 2
- }
- }
- ]
-}
+{{#include ../../../../devices/tests/data/input/example_custom_input_config.json::11}}
+ ...
+{{#include ../../../../devices/tests/data/input/example_custom_input_config.json:115:}}
```
diff --git a/docs/book/src/devices/pmem/README.md b/docs/book/src/devices/pmem/README.md
new file mode 100644
index 0000000..c2b611b
--- /dev/null
+++ b/docs/book/src/devices/pmem/README.md
@@ -0,0 +1,11 @@
+# Pmem
+
+This section contains the following sub pages:
+
+- **[VirtIO Pmem]** describes the basic usage of virtio-pmem device to provide a disk device with
+ the guest.
+- **[Sharing host directory via virtio-pmem]** describes crosvm's virtual ext2 feature on
+ virtio-pmem, which allow sharing a host directory with the guest as read-only.
+
+[sharing host directory via virtio-pmem]: pmem_ext2.md
+[virtio pmem]: basic.md
diff --git a/docs/book/src/devices/pmem.md b/docs/book/src/devices/pmem/basic.md
similarity index 92%
rename from docs/book/src/devices/pmem.md
rename to docs/book/src/devices/pmem/basic.md
index 279d631..6eb4a35 100644
--- a/docs/book/src/devices/pmem.md
+++ b/docs/book/src/devices/pmem/basic.md
@@ -1,4 +1,4 @@
-# Pmem
+# VirtIO Pmem
crosvm supports `virtio-pmem` to provide a virtual device emulating a byte-addressable persistent
memory device. The disk image is provided to the guest using a memory-mapped view of the image file,
@@ -32,5 +32,5 @@
The file backing a persistent memory device is mapped directly into the guest's address space, which
means that only the raw disk image format is supported; disk images in qcow2 or other formats may
-not be used as a pmem device. See the [`block`](block.md) device for an alternative that supports
+not be used as a pmem device. See the [`block`](../block.md) device for an alternative that supports
more file formats.
diff --git a/docs/book/src/devices/pmem/pmem_ext2.md b/docs/book/src/devices/pmem/pmem_ext2.md
new file mode 100644
index 0000000..19e2ae0
--- /dev/null
+++ b/docs/book/src/devices/pmem/pmem_ext2.md
@@ -0,0 +1,55 @@
+# Sharing host directory with virtio-pmem
+
+crosvm has an experimental feature to share a host directory with the guest as read-only via
+virtio-pmem device.
+
+## How it works
+
+When this feature is enabled, `crosvm` creates a virtual ext2 filesystem in memory. This filesystem
+contains the contents of the specified host directory. When creating the file system, `crosvm` do
+`mmap` each file instead of data copy. As a result, the actual file data is read from disk only when
+it's accessed by the guest.
+
+## Usage
+
+To share a host directory with the guest, you'll need to start `crosvm` with the device enabled, and
+mount the device in the guest.
+
+### Host
+
+You can use `--pmem-ext2` flag to enable the device.
+
+```console
+$ mkdir host_shared_dir
+$ HOST_SHARED_DIR=$(pwd)/host_shared_dir
+$ echo "Hello!" > $HOST_SHARED_DIR/test.txt
+$ crosvm run \
+ --pmem-ext2 "$HOST_SHARED_DIR" \
+ # usual crosvm args
+```
+
+You can check a full list of parameters for `--pmem-ext2` with `crosvm run --help`.
+
+### Guest
+
+Then, you can mount the ext2 file system from the guest. With `-o dax`, we can avoid duplicated page
+caches between the guest and the host.
+
+```console
+$ mkdir /tmp/shared
+$ mount -t ext2 -o dax /dev/pmem0 /tmp/shared
+$ ls /tmp/shared
+lost+found test.txt
+$ cat /tmp/shared/test.txt
+Hello!
+```
+
+## Comparison with other methods
+
+Since access to files provided by this device is through pmem, it is done as a host OS page fault.
+This can reduce the number of context switches to the host userspace compared to virtio-blk or
+virtio-fs.
+
+This feature is similar to
+[the VVFAT (Virtual FAT filesystem)](https://github.com/qemu/qemu/blob/master/block/vvfat.c) device
+in QEMU, but our pmem-ext2 uses the ext2 filesystem and supports read-only accesses only.
diff --git a/docs/book/src/testing/index.md b/docs/book/src/testing/index.md
index 58822da..37de3ad 100644
--- a/docs/book/src/testing/index.md
+++ b/docs/book/src/testing/index.md
@@ -22,6 +22,43 @@
This allows us to execute unit tests for any platform using emulators such as qemu-user-static or
wine64.
+#### File Access in Unit Tests
+
+Some unit tests may need to access extra data files. Instead of relying on relative paths, which can
+be fragile and break when tests are executed from different $PWD than the root of the tests' crate,
+always utilize the `CARGO_MANIFEST_DIR` environment variable. Cargo sets this variable to the
+absolute path of the directory containing manifest of your package.
+
+The `CARGO_MANIFEST_DIR` should be accessed at build time using `env!()` macro, rather than at run
+time with functions like `std::env::var()`. This approach is crucial because certain test
+environment may require to run the test binaries directly instead of using `cargo test`.
+Additionally, it ensures the test binary can be run manually within a debugger like GDB.
+
+To enhance test portability, embed extra data files directly into your Rust binary using the
+`include_str!` macro. At runtime, write this data to a temporary file instead of accessing it via
+CARGO_MANIFEST_DIR. This avoids hardcoding paths in the binary, ensuring tests function correctly
+regardless of the binary or source tree location.
+
+These approaches ensure that units tests be able to find the correct paths in various build &
+execution environment.
+
+**Example:**
+
+```rust
+#[test]
+fn test_my_config() {
+ let temp_file = TempDir::new().unwrap();
+ let path = temp_file.path().join("my_config.cfg");
+ let test_config = include_str!(concat!(
+ env!("CARGO_MANIFEST_DIR"),
+ "/config/my_config.cfg",
+ ));
+ fs::write(&path, test_config).expect("Unable to write test file");
+ let config_file = File::open(path).expect("Failed to open config file");
+ // ... rest of your test ...
+}
+```
+
### Documentation tests
Rust's
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 932007b..d8a6d7e 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -22,7 +22,7 @@
tempfile = "3"
usb_util = { path = "../usb_util" }
vm_memory = { path = "../vm_memory" }
-p9 = "0.2"
+p9 = "0.3.1"
rand_core = {version = "0.6", features = ["std"]}
cfg-if = "1.0"
diff --git a/hypervisor/src/geniezone/mod.rs b/hypervisor/src/geniezone/mod.rs
index f0240e5..a22473c 100644
--- a/hypervisor/src/geniezone/mod.rs
+++ b/hypervisor/src/geniezone/mod.rs
@@ -600,6 +600,7 @@
let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
for region in guest_mem.regions() {
let flags = match region.options.purpose {
+ MemoryRegionPurpose::Bios => GZVM_USER_MEM_REGION_GUEST_MEM,
MemoryRegionPurpose::GuestMemoryRegion => GZVM_USER_MEM_REGION_GUEST_MEM,
MemoryRegionPurpose::ProtectedFirmwareRegion => GZVM_USER_MEM_REGION_PROTECT_FW,
MemoryRegionPurpose::StaticSwiotlbRegion => GZVM_USER_MEM_REGION_STATIC_SWIOTLB,
diff --git a/hypervisor/src/gunyah/aarch64.rs b/hypervisor/src/gunyah/aarch64.rs
index c50654f..c7b9c4b 100644
--- a/hypervisor/src/gunyah/aarch64.rs
+++ b/hypervisor/src/gunyah/aarch64.rs
@@ -131,6 +131,7 @@
let mut base_set = false;
for region in self.guest_mem.regions() {
let create_shm_node = match region.options.purpose {
+ MemoryRegionPurpose::Bios => false,
MemoryRegionPurpose::GuestMemoryRegion => {
// Assume first GuestMemoryRegion contains the payload
// This memory region is described by the "base-address" property
diff --git a/hypervisor/src/gunyah/mod.rs b/hypervisor/src/gunyah/mod.rs
index d52b0e1..738a19c 100644
--- a/hypervisor/src/gunyah/mod.rs
+++ b/hypervisor/src/gunyah/mod.rs
@@ -209,6 +209,7 @@
for region in guest_mem.regions() {
let lend = if cfg.protection_type.isolates_memory() {
match region.options.purpose {
+ MemoryRegionPurpose::Bios => true,
MemoryRegionPurpose::GuestMemoryRegion => true,
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
MemoryRegionPurpose::ProtectedFirmwareRegion => true,
diff --git a/hypervisor/src/haxm/vm.rs b/hypervisor/src/haxm/vm.rs
index d82a6d0..8876d18 100644
--- a/hypervisor/src/haxm/vm.rs
+++ b/hypervisor/src/haxm/vm.rs
@@ -485,6 +485,15 @@
fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
Ok(())
}
+
+ fn load_protected_vm_firmware(
+ &mut self,
+ _fw_addr: GuestAddress,
+ _fw_max_size: u64,
+ ) -> Result<()> {
+ // Haxm does not support protected VMs
+ Err(Error::new(libc::ENXIO))
+ }
}
// TODO(b:241252288): Enable tests disabled with dummy feature flag - enable_haxm_tests.
diff --git a/hypervisor/src/kvm/cap.rs b/hypervisor/src/kvm/cap.rs
index f5c59d5..ba099e3 100644
--- a/hypervisor/src/kvm/cap.rs
+++ b/hypervisor/src/kvm/cap.rs
@@ -122,8 +122,10 @@
ImmediateExit = KVM_CAP_IMMEDIATE_EXIT,
ArmPmuV3 = KVM_CAP_ARM_PMU_V3,
ArmProtectedVm = KVM_CAP_ARM_PROTECTED_VM,
+ X86ProtectedVm = KVM_CAP_X86_PROTECTED_VM,
ArmMte = KVM_CAP_ARM_MTE,
#[cfg(target_arch = "x86_64")]
BusLockDetect = KVM_CAP_X86_BUS_LOCK_EXIT,
MemNoncoherentDma = KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA,
+ UserMemory2 = KVM_CAP_USER_MEMORY2,
}
diff --git a/hypervisor/src/kvm/mod.rs b/hypervisor/src/kvm/mod.rs
index fc2788b..580ab64 100644
--- a/hypervisor/src/kvm/mod.rs
+++ b/hypervisor/src/kvm/mod.rs
@@ -95,7 +95,7 @@
// SAFETY:
// Safe when the guest regions are guaranteed not to overlap.
unsafe fn set_user_memory_region(
- descriptor: &SafeDescriptor,
+ kvm: &KvmVm,
slot: MemSlot,
read_only: bool,
log_dirty_pages: bool,
@@ -104,22 +104,42 @@
memory_size: u64,
userspace_addr: *mut u8,
) -> Result<()> {
- let mut flags = if read_only { KVM_MEM_READONLY } else { 0 };
+ let mut use_2_variant = false;
+ let mut flags = 0;
+ if read_only {
+ flags |= KVM_MEM_READONLY;
+ }
if log_dirty_pages {
flags |= KVM_MEM_LOG_DIRTY_PAGES;
}
- if cache == MemCacheType::CacheNonCoherent {
+ if kvm.caps.user_noncoherent_dma && cache == MemCacheType::CacheNonCoherent {
flags |= KVM_MEM_NON_COHERENT_DMA;
+ use_2_variant = kvm.caps.user_memory_region2;
}
- let region = kvm_userspace_memory_region {
- slot,
- flags,
- guest_phys_addr: guest_addr,
- memory_size,
- userspace_addr: userspace_addr as u64,
+
+ let ret = if use_2_variant {
+ let region2 = kvm_userspace_memory_region2 {
+ slot,
+ flags,
+ guest_phys_addr: guest_addr,
+ memory_size,
+ userspace_addr: userspace_addr as u64,
+ guest_memfd_offset: 0,
+ guest_memfd: 0,
+ ..Default::default()
+ };
+ ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION2, ®ion2)
+ } else {
+ let region = kvm_userspace_memory_region {
+ slot,
+ flags,
+ guest_phys_addr: guest_addr,
+ memory_size,
+ userspace_addr: userspace_addr as u64,
+ };
+ ioctl_with_ref(&kvm.vm, KVM_SET_USER_MEMORY_REGION, ®ion)
};
- let ret = ioctl_with_ref(descriptor, KVM_SET_USER_MEMORY_REGION, ®ion);
if ret == 0 {
Ok(())
} else {
@@ -220,6 +240,14 @@
}
}
+/// Storage for constant KVM driver caps
+#[derive(Clone, Copy, Default)]
+struct KvmVmCaps {
+ kvmclock_ctrl: bool,
+ user_noncoherent_dma: bool,
+ user_memory_region2: bool,
+}
+
/// A wrapper around creating and using a KVM VM.
pub struct KvmVm {
kvm: Kvm,
@@ -228,7 +256,7 @@
mem_regions: Arc<Mutex<BTreeMap<MemSlot, Box<dyn MappedRegion>>>>,
/// A min heap of MemSlot numbers that were used and then removed and can now be re-used
mem_slot_gaps: Arc<Mutex<BinaryHeap<Reverse<MemSlot>>>>,
- cap_kvmclock_ctrl: bool,
+ caps: KvmVmCaps,
}
impl KvmVm {
@@ -250,12 +278,26 @@
// SAFETY:
// Safe because we verify that ret is valid and we own the fd.
let vm_descriptor = unsafe { SafeDescriptor::from_raw_descriptor(ret) };
- for region in guest_mem.regions() {
+ let mut vm = KvmVm {
+ kvm: kvm.try_clone()?,
+ vm: vm_descriptor,
+ guest_mem,
+ mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
+ mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
+ caps: Default::default(),
+ };
+ vm.caps.kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
+ vm.caps.user_noncoherent_dma = vm.check_raw_capability(KvmCap::MemNoncoherentDma);
+ vm.caps.user_memory_region2 = vm.check_raw_capability(KvmCap::UserMemory2);
+
+ vm.init_arch(&cfg)?;
+
+ for region in vm.guest_mem.regions() {
// SAFETY:
// Safe because the guest regions are guaranteed not to overlap.
unsafe {
set_user_memory_region(
- &vm_descriptor,
+ &vm,
region.index as MemSlot,
false,
false,
@@ -267,16 +309,6 @@
}?;
}
- let mut vm = KvmVm {
- kvm: kvm.try_clone()?,
- vm: vm_descriptor,
- guest_mem,
- mem_regions: Arc::new(Mutex::new(BTreeMap::new())),
- mem_slot_gaps: Arc::new(Mutex::new(BinaryHeap::new())),
- cap_kvmclock_ctrl: false,
- };
- vm.cap_kvmclock_ctrl = vm.check_raw_capability(KvmCap::KvmclockCtrl);
- vm.init_arch(&cfg)?;
Ok(vm)
}
@@ -307,7 +339,7 @@
vm: self.vm.try_clone()?,
vcpu,
id,
- cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
+ cap_kvmclock_ctrl: self.caps.kvmclock_ctrl,
run_mmap: Arc::new(run_mmap),
})
}
@@ -551,7 +583,7 @@
guest_mem: self.guest_mem.clone(),
mem_regions: self.mem_regions.clone(),
mem_slot_gaps: self.mem_slot_gaps.clone(),
- cap_kvmclock_ctrl: self.cap_kvmclock_ctrl,
+ caps: self.caps,
})
}
@@ -627,12 +659,6 @@
None => (regions.len() + self.guest_mem.num_regions() as usize) as MemSlot,
};
- let cache_type = if self.check_capability(VmCap::MemNoncoherentDma) {
- cache
- } else {
- MemCacheType::CacheCoherent
- };
-
// SAFETY:
// Safe because we check that the given guest address is valid and has no overlaps. We also
// know that the pointer and size are correct because the MemoryMapping interface ensures
@@ -640,11 +666,11 @@
// is removed.
let res = unsafe {
set_user_memory_region(
- &self.vm,
+ self,
slot,
read_only,
log_dirty_pages,
- cache_type,
+ cache,
guest_addr.offset(),
size,
mem.as_ptr(),
@@ -716,7 +742,7 @@
// Safe because the slot is checked against the list of memory slots.
unsafe {
set_user_memory_region(
- &self.vm,
+ self,
slot,
false,
false,
diff --git a/hypervisor/src/kvm/x86_64.rs b/hypervisor/src/kvm/x86_64.rs
index 7d0ce4f..2529b3b 100644
--- a/hypervisor/src/kvm/x86_64.rs
+++ b/hypervisor/src/kvm/x86_64.rs
@@ -23,7 +23,9 @@
use kvm_sys::*;
use libc::E2BIG;
use libc::EAGAIN;
+use libc::EINVAL;
use libc::EIO;
+use libc::ENOMEM;
use libc::ENXIO;
use serde::Deserialize;
use serde::Serialize;
@@ -31,6 +33,7 @@
use super::Config;
use super::Kvm;
+use super::KvmCap;
use super::KvmVcpu;
use super::KvmVm;
use crate::host_phys_addr_bits;
@@ -445,6 +448,48 @@
Ok(())
}
}
+
+ /// Get pKVM hypervisor details, e.g. the firmware size.
+ ///
+ /// Returns `Err` if not running under pKVM.
+ ///
+ /// Uses `KVM_ENABLE_CAP` internally, but it is only a getter, there should be no side effects
+ /// in KVM.
+ fn get_protected_vm_info(&self) -> Result<KvmProtectedVmInfo> {
+ let mut info = KvmProtectedVmInfo {
+ firmware_size: 0,
+ reserved: [0; 7],
+ };
+ // SAFETY:
+ // Safe because we allocated the struct and we know the kernel won't write beyond the end of
+ // the struct or keep a pointer to it.
+ unsafe {
+ self.enable_raw_capability(
+ KvmCap::X86ProtectedVm,
+ KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO,
+ &[&mut info as *mut KvmProtectedVmInfo as u64, 0, 0, 0],
+ )
+ }?;
+ Ok(info)
+ }
+
+ fn set_protected_vm_firmware_gpa(&self, fw_addr: GuestAddress) -> Result<()> {
+ // SAFETY:
+ // Safe because none of the args are pointers.
+ unsafe {
+ self.enable_raw_capability(
+ KvmCap::X86ProtectedVm,
+ KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA,
+ &[fw_addr.0, 0, 0, 0],
+ )
+ }
+ }
+}
+
+#[repr(C)]
+struct KvmProtectedVmInfo {
+ firmware_size: u64,
+ reserved: [u64; 7],
}
impl VmX86_64 for KvmVm {
@@ -452,6 +497,22 @@
&self.kvm
}
+ fn load_protected_vm_firmware(
+ &mut self,
+ fw_addr: GuestAddress,
+ fw_max_size: u64,
+ ) -> Result<()> {
+ let info = self.get_protected_vm_info()?;
+ if info.firmware_size == 0 {
+ Err(Error::new(EINVAL))
+ } else {
+ if info.firmware_size > fw_max_size {
+ return Err(Error::new(ENOMEM));
+ }
+ self.set_protected_vm_firmware_gpa(fw_addr)
+ }
+ }
+
fn create_vcpu(&self, id: usize) -> Result<Box<dyn VcpuX86_64>> {
// create_vcpu is declared separately in VmAArch64 and VmX86, so it can return VcpuAArch64
// or VcpuX86. But both use the same implementation in KvmVm::create_vcpu.
diff --git a/hypervisor/src/whpx/vm.rs b/hypervisor/src/whpx/vm.rs
index 62e477c..2aae1c4 100644
--- a/hypervisor/src/whpx/vm.rs
+++ b/hypervisor/src/whpx/vm.rs
@@ -776,6 +776,15 @@
fn set_identity_map_addr(&self, _addr: GuestAddress) -> Result<()> {
Ok(())
}
+
+ fn load_protected_vm_firmware(
+ &mut self,
+ _fw_addr: GuestAddress,
+ _fw_max_size: u64,
+ ) -> Result<()> {
+ // WHPX does not support protected VMs
+ Err(Error::new(libc::ENXIO))
+ }
}
// NOTE: WHPX Tests need to be run serially as otherwise it barfs unless we map new regions of guest
diff --git a/hypervisor/src/x86_64.rs b/hypervisor/src/x86_64.rs
index 1b9dc3f..fe47e1d 100644
--- a/hypervisor/src/x86_64.rs
+++ b/hypervisor/src/x86_64.rs
@@ -65,6 +65,12 @@
/// Sets the address of a one-page region in the VM's address space.
fn set_identity_map_addr(&self, addr: GuestAddress) -> Result<()>;
+
+ /// Load pVM firmware for the VM, creating a memslot for it as needed.
+ ///
+ /// Only works on protected VMs (i.e. those with vm_type == KVM_X86_PKVM_PROTECTED_VM).
+ fn load_protected_vm_firmware(&mut self, fw_addr: GuestAddress, fw_max_size: u64)
+ -> Result<()>;
}
/// A wrapper around creating and using a VCPU on x86_64.
diff --git a/infra/README.recipes.md b/infra/README.recipes.md
index e296cd7..99f158e 100644
--- a/infra/README.recipes.md
+++ b/infra/README.recipes.md
@@ -181,19 +181,19 @@
— **def [RunSteps](/infra/recipes/update_chromeos_merges.py#14)(api):**
-[depot_tools/recipe_modules/bot_update]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/2515d3513f7b15bf0cf40994599989b5dd7128ac/recipes/README.recipes.md#recipe_modules-bot_update
-[depot_tools/recipe_modules/depot_tools]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/2515d3513f7b15bf0cf40994599989b5dd7128ac/recipes/README.recipes.md#recipe_modules-depot_tools
-[depot_tools/recipe_modules/gclient]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/2515d3513f7b15bf0cf40994599989b5dd7128ac/recipes/README.recipes.md#recipe_modules-gclient
-[depot_tools/recipe_modules/git]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/2515d3513f7b15bf0cf40994599989b5dd7128ac/recipes/README.recipes.md#recipe_modules-git
-[depot_tools/recipe_modules/gsutil]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/2515d3513f7b15bf0cf40994599989b5dd7128ac/recipes/README.recipes.md#recipe_modules-gsutil
-[recipe_engine/recipe_modules/buildbucket]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-buildbucket
-[recipe_engine/recipe_modules/cipd]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-cipd
-[recipe_engine/recipe_modules/context]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-context
-[recipe_engine/recipe_modules/file]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-file
-[recipe_engine/recipe_modules/json]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-json
-[recipe_engine/recipe_modules/path]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-path
-[recipe_engine/recipe_modules/platform]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-platform
-[recipe_engine/recipe_modules/properties]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-properties
-[recipe_engine/recipe_modules/raw_io]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-raw_io
-[recipe_engine/recipe_modules/step]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/README.recipes.md#recipe_modules-step
-[recipe_engine/wkt/RecipeApi]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/3624a48ac89993276cb80e675a88fcd3b39a0f39/recipe_engine/recipe_api.py#433
+[depot_tools/recipe_modules/bot_update]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/44a8a8d49a33aa5673f3e93f42471a3b15a2a07a/recipes/README.recipes.md#recipe_modules-bot_update
+[depot_tools/recipe_modules/depot_tools]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/44a8a8d49a33aa5673f3e93f42471a3b15a2a07a/recipes/README.recipes.md#recipe_modules-depot_tools
+[depot_tools/recipe_modules/gclient]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/44a8a8d49a33aa5673f3e93f42471a3b15a2a07a/recipes/README.recipes.md#recipe_modules-gclient
+[depot_tools/recipe_modules/git]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/44a8a8d49a33aa5673f3e93f42471a3b15a2a07a/recipes/README.recipes.md#recipe_modules-git
+[depot_tools/recipe_modules/gsutil]: https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/44a8a8d49a33aa5673f3e93f42471a3b15a2a07a/recipes/README.recipes.md#recipe_modules-gsutil
+[recipe_engine/recipe_modules/buildbucket]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-buildbucket
+[recipe_engine/recipe_modules/cipd]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-cipd
+[recipe_engine/recipe_modules/context]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-context
+[recipe_engine/recipe_modules/file]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-file
+[recipe_engine/recipe_modules/json]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-json
+[recipe_engine/recipe_modules/path]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-path
+[recipe_engine/recipe_modules/platform]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-platform
+[recipe_engine/recipe_modules/properties]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-properties
+[recipe_engine/recipe_modules/raw_io]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-raw_io
+[recipe_engine/recipe_modules/step]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/README.recipes.md#recipe_modules-step
+[recipe_engine/wkt/RecipeApi]: https://chromium.googlesource.com/infra/luci/recipes-py.git/+/43ded771bb19ba1ce1b62bd9165fc585fb8c3564/recipe_engine/recipe_api.py#433
diff --git a/infra/config/recipes.cfg b/infra/config/recipes.cfg
index 45d9f05..333d71f 100644
--- a/infra/config/recipes.cfg
+++ b/infra/config/recipes.cfg
@@ -20,12 +20,12 @@
"deps": {
"depot_tools": {
"branch": "refs/heads/main",
- "revision": "2515d3513f7b15bf0cf40994599989b5dd7128ac",
+ "revision": "44a8a8d49a33aa5673f3e93f42471a3b15a2a07a",
"url": "https://chromium.googlesource.com/chromium/tools/depot_tools.git"
},
"recipe_engine": {
"branch": "refs/heads/main",
- "revision": "3624a48ac89993276cb80e675a88fcd3b39a0f39",
+ "revision": "43ded771bb19ba1ce1b62bd9165fc585fb8c3564",
"url": "https://chromium.googlesource.com/infra/luci/recipes-py.git"
}
},
diff --git a/jail/seccomp/aarch64/gpu_common.policy b/jail/seccomp/aarch64/gpu_common.policy
index 5b1277f..2067af3 100644
--- a/jail/seccomp/aarch64/gpu_common.policy
+++ b/jail/seccomp/aarch64/gpu_common.policy
@@ -57,6 +57,7 @@
write: 1
writev: 1
uname: 1
+unlinkat: 1
# Required for perfetto tracing
getsockopt: 1
diff --git a/jail/seccomp/arm/gpu_common.policy b/jail/seccomp/arm/gpu_common.policy
index 5390785..d9d95c1 100644
--- a/jail/seccomp/arm/gpu_common.policy
+++ b/jail/seccomp/arm/gpu_common.policy
@@ -65,6 +65,7 @@
write: 1
writev: 1
uname: 1
+unlinkat: 1
# Required for perfetto tracing
getsockopt: 1
diff --git a/jail/seccomp/x86_64/gpu_common.policy b/jail/seccomp/x86_64/gpu_common.policy
index 407705a..c6e2e2f 100644
--- a/jail/seccomp/x86_64/gpu_common.policy
+++ b/jail/seccomp/x86_64/gpu_common.policy
@@ -62,6 +62,7 @@
write: 1
writev: 1
uname: 1
+unlinkat: 1
# Rules specific to gpu
connect: 1
diff --git a/jail/src/helpers.rs b/jail/src/helpers.rs
index 28a49e7..a751a10 100644
--- a/jail/src/helpers.rs
+++ b/jail/src/helpers.rs
@@ -19,10 +19,9 @@
use base::warn;
use libc::c_ulong;
use minijail::Minijail;
-#[cfg(not(feature = "seccomp_trace"))]
use once_cell::sync::Lazy;
#[cfg(feature = "seccomp_trace")]
-use static_assertions::assert_eq_size;
+use static_assertions::const_assert;
#[cfg(feature = "seccomp_trace")]
use zerocopy::AsBytes;
@@ -358,6 +357,7 @@
root: &Path,
config: &SandboxConfig,
render_node_only: bool,
+ snapshot_scratch_directory: Option<&Path>,
) -> Result<Minijail> {
let mut jail = create_sandbox_minijail(root, MAX_OPEN_FILES_FOR_GPU, config)?;
@@ -420,6 +420,17 @@
jail.mount_bind(perfetto_path, perfetto_path, true)?;
}
+ // Provide scratch space for the GPU device to build or unpack snapshots.
+ if let Some(snapshot_scratch_directory) = snapshot_scratch_directory {
+ jail.mount_with_data(
+ Path::new("none"),
+ snapshot_scratch_directory,
+ "tmpfs",
+ (libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC) as usize,
+ "size=4294967296",
+ )?;
+ }
+
Ok(jail)
}
@@ -479,8 +490,9 @@
/// Read minijail internal struct address for uniquely identifying and tracking jail's lifetime
#[cfg(feature = "seccomp_trace")]
pub fn read_jail_addr(jail: &Minijail) -> usize {
- // We can only hope minijail's rust object will always only contain a pointer to C jail struct
- assert_eq_size!(Minijail, usize);
+ // We can only hope minijail's rust object will always contain a pointer to C jail struct as the
+ // first field.
+ const_assert!(std::mem::size_of::<Minijail>() >= std::mem::size_of::<usize>());
// Safe because it's only doing a read within bound checked by static assert
unsafe { *(jail as *const Minijail as *const usize) }
}
diff --git a/kvm/src/cap.rs b/kvm/src/cap.rs
index 032403a..f789c31 100644
--- a/kvm/src/cap.rs
+++ b/kvm/src/cap.rs
@@ -122,6 +122,7 @@
ImmediateExit = KVM_CAP_IMMEDIATE_EXIT,
ArmPmuV3 = KVM_CAP_ARM_PMU_V3,
ArmProtectedVm = KVM_CAP_ARM_PROTECTED_VM,
+ X86ProtectedVm = KVM_CAP_X86_PROTECTED_VM,
ArmMte = KVM_CAP_ARM_MTE,
#[cfg(target_arch = "x86_64")]
BusLockDetect = KVM_CAP_X86_BUS_LOCK_EXIT,
diff --git a/kvm_sys/bindgen.sh b/kvm_sys/bindgen.sh
index 9c6b518..3cc7af3 100755
--- a/kvm_sys/bindgen.sh
+++ b/kvm_sys/bindgen.sh
@@ -15,9 +15,21 @@
use zerocopy::FromBytes;
use zerocopy::FromZeroes;
-// TODO(b/316337317): Update if new memslot flag is accepted in upstream
-pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
-pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
+// TODO(b/369492345): Remove once bindgen generates from newer kernel headers (e.g. 6.12)
+pub const KVM_CAP_USER_MEMORY2: u32 = 231;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct kvm_userspace_memory_region2 {
+ pub slot: u32,
+ pub flags: u32,
+ pub guest_phys_addr: u64,
+ pub memory_size: u64,
+ pub userspace_addr: u64,
+ pub guest_memfd_offset: u64,
+ pub guest_memfd: u32,
+ pub pad1: u32,
+ pub pad2: [u64; 14usize],
+}
// TODO(qwandor): Update this once the pKVM patches are merged upstream with a stable capability ID.
pub const KVM_CAP_ARM_PROTECTED_VM: u32 = 0xffbadab1;
@@ -25,6 +37,9 @@
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
+pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]
diff --git a/kvm_sys/src/aarch64/bindings.rs b/kvm_sys/src/aarch64/bindings.rs
index d181722..e739d2a 100644
--- a/kvm_sys/src/aarch64/bindings.rs
+++ b/kvm_sys/src/aarch64/bindings.rs
@@ -13,9 +13,21 @@
use zerocopy::FromBytes;
use zerocopy::FromZeroes;
-// TODO(b/316337317): Update if new memslot flag is accepted in upstream
-pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
-pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
+// TODO(b/369492345): Remove once bindgen generates from newer kernel headers (e.g. 6.12)
+pub const KVM_CAP_USER_MEMORY2: u32 = 231;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct kvm_userspace_memory_region2 {
+ pub slot: u32,
+ pub flags: u32,
+ pub guest_phys_addr: u64,
+ pub memory_size: u64,
+ pub userspace_addr: u64,
+ pub guest_memfd_offset: u64,
+ pub guest_memfd: u32,
+ pub pad1: u32,
+ pub pad2: [u64; 14usize],
+}
// TODO(qwandor): Update this once the pKVM patches are merged upstream with a stable capability ID.
pub const KVM_CAP_ARM_PROTECTED_VM: u32 = 0xffbadab1;
@@ -23,6 +35,9 @@
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
+pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]
@@ -421,6 +436,7 @@
pub const KVM_TRC_PPC_INSTR: u32 = 131097;
pub const KVM_MEM_LOG_DIRTY_PAGES: u32 = 1;
pub const KVM_MEM_READONLY: u32 = 2;
+pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
pub const KVM_PIT_SPEAKER_DUMMY: u32 = 1;
pub const KVM_S390_CMMA_PEEK: u32 = 1;
pub const KVM_EXIT_HYPERV_SYNIC: u32 = 1;
@@ -773,9 +789,11 @@
pub const KVM_CAP_COUNTER_OFFSET: u32 = 227;
pub const KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: u32 = 228;
pub const KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: u32 = 229;
+pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
pub const KVM_CAP_GET_CUR_CPUFREQ: u32 = 512;
pub const KVM_CAP_UTIL_HINT: u32 = 513;
pub const KVM_CAP_GET_CPUFREQ_TBL: u32 = 514;
+pub const KVM_CAP_PV_SCHED: u32 = 600;
pub const KVM_IRQ_ROUTING_IRQCHIP: u32 = 1;
pub const KVM_IRQ_ROUTING_MSI: u32 = 2;
pub const KVM_IRQ_ROUTING_S390_ADAPTER: u32 = 3;
diff --git a/kvm_sys/src/lib.rs b/kvm_sys/src/lib.rs
index 38c77a6..cb55451 100644
--- a/kvm_sys/src/lib.rs
+++ b/kvm_sys/src/lib.rs
@@ -115,6 +115,12 @@
);
ioctl_io_nr!(KVM_SET_TSS_ADDR, KVMIO, 0x47);
ioctl_iow_nr!(KVM_SET_IDENTITY_MAP_ADDR, KVMIO, 0x48, u64);
+ioctl_iow_nr!(
+ KVM_SET_USER_MEMORY_REGION2,
+ KVMIO,
+ 0x49,
+ kvm_userspace_memory_region2
+);
ioctl_io_nr!(KVM_CREATE_IRQCHIP, KVMIO, 0x60);
ioctl_iow_nr!(KVM_IRQ_LINE, KVMIO, 0x61, kvm_irq_level);
ioctl_iowr_nr!(KVM_GET_IRQCHIP, KVMIO, 0x62, kvm_irqchip);
diff --git a/kvm_sys/src/riscv64/bindings.rs b/kvm_sys/src/riscv64/bindings.rs
index 0dfbbd2..19f80de 100644
--- a/kvm_sys/src/riscv64/bindings.rs
+++ b/kvm_sys/src/riscv64/bindings.rs
@@ -13,9 +13,21 @@
use zerocopy::FromBytes;
use zerocopy::FromZeroes;
-// TODO(b/316337317): Update if new memslot flag is accepted in upstream
-pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
-pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
+// TODO(b/369492345): Remove once bindgen generates from newer kernel headers (e.g. 6.12)
+pub const KVM_CAP_USER_MEMORY2: u32 = 231;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct kvm_userspace_memory_region2 {
+ pub slot: u32,
+ pub flags: u32,
+ pub guest_phys_addr: u64,
+ pub memory_size: u64,
+ pub userspace_addr: u64,
+ pub guest_memfd_offset: u64,
+ pub guest_memfd: u32,
+ pub pad1: u32,
+ pub pad2: [u64; 14usize],
+}
// TODO(qwandor): Update this once the pKVM patches are merged upstream with a stable capability ID.
pub const KVM_CAP_ARM_PROTECTED_VM: u32 = 0xffbadab1;
@@ -23,6 +35,9 @@
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
+pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]
@@ -39,7 +54,6 @@
pub sid_idx: u32,
pub vsid: u32,
}
-pub const KVM_PVIOMMU_SET_CONFIG: i32 = 1;
#[repr(C)]
#[derive(Default)]
@@ -173,6 +187,7 @@
pub const KVM_TRC_PPC_INSTR: u32 = 131097;
pub const KVM_MEM_LOG_DIRTY_PAGES: u32 = 1;
pub const KVM_MEM_READONLY: u32 = 2;
+pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
pub const KVM_PIT_SPEAKER_DUMMY: u32 = 1;
pub const KVM_S390_CMMA_PEEK: u32 = 1;
pub const KVM_EXIT_HYPERV_SYNIC: u32 = 1;
@@ -523,9 +538,11 @@
pub const KVM_CAP_COUNTER_OFFSET: u32 = 227;
pub const KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: u32 = 228;
pub const KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: u32 = 229;
+pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
pub const KVM_CAP_GET_CUR_CPUFREQ: u32 = 512;
pub const KVM_CAP_UTIL_HINT: u32 = 513;
pub const KVM_CAP_GET_CPUFREQ_TBL: u32 = 514;
+pub const KVM_CAP_PV_SCHED: u32 = 600;
pub const KVM_IRQ_ROUTING_IRQCHIP: u32 = 1;
pub const KVM_IRQ_ROUTING_MSI: u32 = 2;
pub const KVM_IRQ_ROUTING_S390_ADAPTER: u32 = 3;
diff --git a/kvm_sys/src/x86/bindings.rs b/kvm_sys/src/x86/bindings.rs
index 6702b3f..5b23934 100644
--- a/kvm_sys/src/x86/bindings.rs
+++ b/kvm_sys/src/x86/bindings.rs
@@ -13,9 +13,21 @@
use zerocopy::FromBytes;
use zerocopy::FromZeroes;
-// TODO(b/316337317): Update if new memslot flag is accepted in upstream
-pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
-pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
+// TODO(b/369492345): Remove once bindgen generates from newer kernel headers (e.g. 6.12)
+pub const KVM_CAP_USER_MEMORY2: u32 = 231;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone)]
+pub struct kvm_userspace_memory_region2 {
+ pub slot: u32,
+ pub flags: u32,
+ pub guest_phys_addr: u64,
+ pub memory_size: u64,
+ pub userspace_addr: u64,
+ pub guest_memfd_offset: u64,
+ pub guest_memfd: u32,
+ pub pad1: u32,
+ pub pad2: [u64; 14usize],
+}
// TODO(qwandor): Update this once the pKVM patches are merged upstream with a stable capability ID.
pub const KVM_CAP_ARM_PROTECTED_VM: u32 = 0xffbadab1;
@@ -23,6 +35,9 @@
pub const KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_VM_TYPE_ARM_PROTECTED: u32 = 0x80000000;
pub const KVM_X86_PKVM_PROTECTED_VM: u32 = 28;
+pub const KVM_CAP_X86_PROTECTED_VM: u32 = 0xffbadab2;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_SET_FW_GPA: u32 = 0;
+pub const KVM_CAP_X86_PROTECTED_VM_FLAGS_INFO: u32 = 1;
pub const KVM_DEV_VFIO_PVIOMMU: u32 = 2;
pub const KVM_DEV_VFIO_PVIOMMU_ATTACH: u32 = 1;
#[repr(C)]
@@ -39,7 +54,6 @@
pub sid_idx: u32,
pub vsid: u32,
}
-pub const KVM_PVIOMMU_SET_CONFIG: i32 = 1;
// This is how zerocopy's author deal with bindings for __BindgenBitfieldUnit<Storage>, see:
// https://fuchsia-review.googlesource.com/c/859278/8/src/starnix/lib/linux_uapi/generate.py
@@ -340,6 +354,7 @@
pub const KVM_TRC_PPC_INSTR: u32 = 131097;
pub const KVM_MEM_LOG_DIRTY_PAGES: u32 = 1;
pub const KVM_MEM_READONLY: u32 = 2;
+pub const KVM_MEM_NON_COHERENT_DMA: u32 = 8;
pub const KVM_PIT_SPEAKER_DUMMY: u32 = 1;
pub const KVM_S390_CMMA_PEEK: u32 = 1;
pub const KVM_EXIT_HYPERV_SYNIC: u32 = 1;
@@ -700,9 +715,11 @@
pub const KVM_CAP_COUNTER_OFFSET: u32 = 227;
pub const KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE: u32 = 228;
pub const KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES: u32 = 229;
+pub const KVM_CAP_USER_CONFIGURE_NONCOHERENT_DMA: u32 = 236;
pub const KVM_CAP_GET_CUR_CPUFREQ: u32 = 512;
pub const KVM_CAP_UTIL_HINT: u32 = 513;
pub const KVM_CAP_GET_CPUFREQ_TBL: u32 = 514;
+pub const KVM_CAP_PV_SCHED: u32 = 600;
pub const KVM_IRQ_ROUTING_IRQCHIP: u32 = 1;
pub const KVM_IRQ_ROUTING_MSI: u32 = 2;
pub const KVM_IRQ_ROUTING_S390_ADAPTER: u32 = 3;
diff --git a/resources/Android.bp b/resources/Android.bp
index 78932b8..9c80c8c 100644
--- a/resources/Android.bp
+++ b/resources/Android.bp
@@ -48,6 +48,7 @@
"libbase_rust",
"liblibc",
"libserde",
+ "libserde_json",
"libthiserror",
],
proc_macros: ["libremain"],
diff --git a/resources/Cargo.toml b/resources/Cargo.toml
index 5839809..58201fb 100644
--- a/resources/Cargo.toml
+++ b/resources/Cargo.toml
@@ -10,3 +10,6 @@
serde = { version = "1", features = ["derive"] }
remain = "0.2"
thiserror = "1"
+
+[dev-dependencies]
+serde_json = "1"
diff --git a/resources/src/lib.rs b/resources/src/lib.rs
index 287ee32..099b68a 100644
--- a/resources/src/lib.rs
+++ b/resources/src/lib.rs
@@ -10,6 +10,8 @@
use thiserror::Error;
pub use crate::address_range::AddressRange;
+pub use crate::pci_address::Error as PciAddressError;
+pub use crate::pci_address::PciAddress;
pub use crate::system_allocator::AllocOptions;
pub use crate::system_allocator::MmioType;
pub use crate::system_allocator::SystemAllocator;
@@ -17,6 +19,7 @@
pub mod address_allocator;
mod address_range;
+mod pci_address;
mod system_allocator;
/// Used to tag SystemAllocator allocations.
diff --git a/devices/src/pci/pci_address.rs b/resources/src/pci_address.rs
similarity index 97%
rename from devices/src/pci/pci_address.rs
rename to resources/src/pci_address.rs
index 966fd9f..45cf3af 100644
--- a/devices/src/pci/pci_address.rs
+++ b/resources/src/pci_address.rs
@@ -83,11 +83,11 @@
/// # Example
///
/// ```
-/// use devices::PciAddress;
+/// use resources::PciAddress;
///
/// let pci_address = PciAddress::new(0x0000, 0x03, 0x14, 0x1)?;
/// assert_eq!(pci_address.to_string(), "0000:03:14.1");
-/// # Ok::<(), devices::PciAddressError>(())
+/// # Ok::<(), resources::PciAddressError>(())
/// ```
impl Display for PciAddress {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -107,13 +107,13 @@
///
/// ```
/// use std::str::FromStr;
-/// use devices::PciAddress;
+/// use resources::PciAddress;
///
/// let pci_address = PciAddress::from_str("d7:15.4")?;
/// assert_eq!(pci_address.bus, 0xd7);
/// assert_eq!(pci_address.dev, 0x15);
/// assert_eq!(pci_address.func, 0x4);
-/// # Ok::<(), devices::PciAddressError>(())
+/// # Ok::<(), resources::PciAddressError>(())
/// ```
impl FromStr for PciAddress {
type Err = Error;
@@ -222,7 +222,7 @@
/// # Example
///
/// ```
- /// use devices::PciAddress;
+ /// use resources::PciAddress;
///
/// let (pci_address, register_index) = PciAddress::from_config_address(0x32a354, 8);
/// assert_eq!(pci_address.bus, 0x32);
@@ -270,12 +270,12 @@
/// # Example
///
/// ```
- /// use devices::PciAddress;
+ /// use resources::PciAddress;
///
/// let pci_address = PciAddress::new(0x0000, 0x32, 0x14, 0x3)?;
/// let config_address = pci_address.to_config_address(0x15, 8);
/// assert_eq!(config_address, 0x32a354);
- /// # Ok::<(), devices::PciAddressError>(())
+ /// # Ok::<(), resources::PciAddressError>(())
/// ```
pub fn to_config_address(&self, register: usize, register_bits_num: usize) -> u32 {
let bus_offset = register_bits_num + Self::FUNCTION_BITS_NUM + Self::DEVICE_BITS_NUM;
diff --git a/resources/src/system_allocator.rs b/resources/src/system_allocator.rs
index ac3729f..df3ae24 100644
--- a/resources/src/system_allocator.rs
+++ b/resources/src/system_allocator.rs
@@ -12,6 +12,7 @@
use crate::AddressRange;
use crate::Alloc;
use crate::Error;
+use crate::PciAddress;
use crate::Result;
/// Manages allocating system resources such as address space and interrupt numbers.
@@ -302,7 +303,7 @@
}
/// Allocate PCI slot location.
- pub fn allocate_pci(&mut self, bus: u8, tag: String) -> Option<Alloc> {
+ pub fn allocate_pci(&mut self, bus: u8, tag: String) -> Option<PciAddress> {
let id = self.get_anon_alloc();
let allocator = match self.get_pci_allocator_mut(bus) {
Some(v) => v,
@@ -310,45 +311,35 @@
};
allocator
.allocate(1, id, tag)
- .map(|v| Alloc::PciBar {
+ .map(|v| PciAddress {
bus,
dev: (v >> 3) as u8,
func: (v & 7) as u8,
- bar: 0,
})
.ok()
}
/// Reserve PCI slot location.
- pub fn reserve_pci(&mut self, alloc: Alloc, tag: String) -> bool {
+ pub fn reserve_pci(&mut self, pci_addr: PciAddress, tag: String) -> bool {
let id = self.get_anon_alloc();
- match alloc {
- Alloc::PciBar {
- bus,
- dev,
- func,
- bar: _,
- } => {
- let allocator = match self.get_pci_allocator_mut(bus) {
- Some(v) => v,
- None => return false,
- };
- let df = ((dev as u64) << 3) | (func as u64);
- allocator
- .allocate_at(AddressRange { start: df, end: df }, id, tag)
- .is_ok()
- }
- _ => false,
- }
- }
- /// release PCI slot location.
- pub fn release_pci(&mut self, bus: u8, dev: u8, func: u8) -> bool {
- let allocator = match self.get_pci_allocator_mut(bus) {
+ let allocator = match self.get_pci_allocator_mut(pci_addr.bus) {
Some(v) => v,
None => return false,
};
- let df = ((dev as u64) << 3) | (func as u64);
+ let df = ((pci_addr.dev as u64) << 3) | (pci_addr.func as u64);
+ allocator
+ .allocate_at(AddressRange { start: df, end: df }, id, tag)
+ .is_ok()
+ }
+
+ /// release PCI slot location.
+ pub fn release_pci(&mut self, pci_addr: PciAddress) -> bool {
+ let allocator = match self.get_pci_allocator_mut(pci_addr.bus) {
+ Some(v) => v,
+ None => return false,
+ };
+ let df = ((pci_addr.dev as u64) << 3) | (pci_addr.func as u64);
allocator.release_containing(df).is_ok()
}
diff --git a/rutabaga_gfx/Cargo.toml b/rutabaga_gfx/Cargo.toml
index 944dfd3..b18cb20 100644
--- a/rutabaga_gfx/Cargo.toml
+++ b/rutabaga_gfx/Cargo.toml
@@ -11,9 +11,8 @@
gfxstream_stub = []
virgl_renderer = []
minigbm = []
-# To try out Vulkano, delete the following line and uncomment the line in "dependencies". Vulkano
-# features are just a prototype and not integrated yet into the ChromeOS build system.
-vulkano = []
+# Vulkano features are just a prototype and not integrated yet into the ChromeOS build system.
+vulkano = ["dep:vulkano"]
x = []
[dependencies]
@@ -28,7 +27,7 @@
log = "0.4"
# To build latest Vulkano, change version to git = "https://github.com/vulkano-rs/vulkano.git"
-# vulkano = { version = "0.33.0", optional = true }
+vulkano = { version = "0.33.0", optional = true }
[target.'cfg(any(target_os = "android", target_os = "linux"))'.dependencies]
nix = { version = "0.28", features = ["event", "feature", "fs", "mman", "socket", "uio", "ioctl"] }
diff --git a/rutabaga_gfx/ffi/src/include/rutabaga_gfx_ffi.h b/rutabaga_gfx/ffi/src/include/rutabaga_gfx_ffi.h
index 59019f6..3355105 100644
--- a/rutabaga_gfx/ffi/src/include/rutabaga_gfx_ffi.h
+++ b/rutabaga_gfx/ffi/src/include/rutabaga_gfx_ffi.h
@@ -107,6 +107,11 @@
#define RUTABAGA_DEBUG_WARN 0x2
#define RUTABAGA_DEBUG_INFO 0x3
+/**
+ * Rutabaga resource import flags
+ */
+#define RUTABAGA_IMPORT_FLAG_3D_INFO (1 << 0)
+
struct rutabaga;
struct rutabaga_create_blob {
@@ -129,6 +134,19 @@
uint32_t flags;
};
+struct rutabaga_import_data {
+ uint32_t flags;
+ struct {
+ uint32_t width;
+ uint32_t height;
+ uint32_t drm_fourcc;
+ uint32_t strides[4];
+ uint32_t offsets[4];
+ uint64_t modifier;
+ bool guest_cpu_mappable;
+ } info_3d;
+};
+
struct rutabaga_transfer {
uint32_t x;
uint32_t y;
@@ -366,6 +384,10 @@
int32_t rutabaga_resource_wait_sync(struct rutabaga *ptr, uint32_t resource_id);
+int32_t rutabaga_resource_import(struct rutabaga *ptr, uint32_t resource_id,
+ const struct rutabaga_handle *import_handle,
+ const struct rutabaga_import_data *import_data);
+
#ifdef __cplusplus
}
#endif
diff --git a/rutabaga_gfx/ffi/src/lib.rs b/rutabaga_gfx/ffi/src/lib.rs
index 0073d4f..cc826a3 100644
--- a/rutabaga_gfx/ffi/src/lib.rs
+++ b/rutabaga_gfx/ffi/src/lib.rs
@@ -169,6 +169,9 @@
}
#[allow(non_camel_case_types)]
+type rutabaga_import_data = RutabagaImportData;
+
+#[allow(non_camel_case_types)]
pub type rutabaga_fence_callback = extern "C" fn(user_data: u64, fence: &rutabaga_fence);
#[allow(non_camel_case_types)]
@@ -430,6 +433,27 @@
.unwrap_or(-ESRCH)
}
+#[no_mangle]
+pub unsafe extern "C" fn rutabaga_resource_import(
+ ptr: &mut rutabaga,
+ resource_id: u32,
+ import_handle: &rutabaga_handle,
+ import_data: &rutabaga_import_data,
+) -> i32 {
+ catch_unwind(AssertUnwindSafe(|| {
+ let internal_handle = RutabagaHandle {
+ os_handle: RutabagaDescriptor::from_raw_descriptor(
+ (*import_handle).os_handle.try_into().unwrap(),
+ ),
+ handle_type: (*import_handle).handle_type,
+ };
+
+ let result = ptr.resource_import(resource_id, internal_handle, *import_data);
+ return_result(result)
+ }))
+ .unwrap_or(-ESRCH)
+}
+
/// # Safety
/// - If `iovecs` is not null, the caller must ensure `(*iovecs).iovecs` points to a valid array of
/// iovecs of size `(*iovecs).num_iovecs`.
diff --git a/rutabaga_gfx/src/gfxstream.rs b/rutabaga_gfx/src/gfxstream.rs
index aba7069..813e058 100644
--- a/rutabaga_gfx/src/gfxstream.rs
+++ b/rutabaga_gfx/src/gfxstream.rs
@@ -44,6 +44,9 @@
const STREAM_RENDERER_PARAM_DEBUG_CALLBACK: u64 = 6;
const STREAM_RENDERER_PARAM_RENDERER_FEATURES: u64 = 11;
+#[cfg(gfxstream_unstable)]
+const STREAM_RENDERER_IMPORT_FLAG_3D_INFO: u32 = 1 << 0;
+
#[repr(C)]
#[derive(Clone, Copy, Debug)]
pub struct stream_renderer_param {
@@ -90,6 +93,25 @@
#[allow(non_camel_case_types)]
pub type stream_renderer_debug = RutabagaDebug;
+#[cfg(gfxstream_unstable)]
+#[repr(C)]
+pub struct stream_renderer_3d_info {
+ pub width: u32,
+ pub height: u32,
+ pub drm_fourcc: u32,
+ pub strides: [u32; 4],
+ pub offsets: [u32; 4],
+ pub modifier: u64,
+}
+
+#[cfg(gfxstream_unstable)]
+#[repr(C)]
+pub struct stream_renderer_import_data {
+ pub flags: u32,
+ pub info_3d: stream_renderer_3d_info,
+ pub info_vulkan: stream_renderer_vulkan_info,
+}
+
extern "C" {
// Entry point for the stream renderer.
fn stream_renderer_init(
@@ -198,6 +220,13 @@
#[cfg(gfxstream_unstable)]
fn stream_renderer_wait_sync_resource(res_handle: u32) -> c_int;
+
+ #[cfg(gfxstream_unstable)]
+ fn stream_renderer_import_resource(
+ res_handle: u32,
+ import_handle: *const stream_renderer_handle,
+ import_data: *const stream_renderer_import_data,
+ ) -> c_int;
}
/// The virtio-gpu backend state tracker which supports accelerated rendering.
@@ -548,6 +577,59 @@
})
}
+ #[cfg(gfxstream_unstable)]
+ fn import(
+ &self,
+ resource_id: u32,
+ import_handle: RutabagaHandle,
+ import_data: RutabagaImportData,
+ ) -> RutabagaResult<RutabagaResource> {
+ let stream_handle = stream_renderer_handle {
+ os_handle: import_handle.os_handle.into_raw_descriptor() as i64,
+ handle_type: import_handle.handle_type,
+ };
+
+ // When importing and creating a new resource, 3D_INFO flag must be set. This flag should
+ // be the only flag set in the gfxstream call
+ assert!(0 != import_data.flags & STREAM_RENDERER_IMPORT_FLAG_3D_INFO);
+ let stream_import_data = stream_renderer_import_data {
+ flags: STREAM_RENDERER_IMPORT_FLAG_3D_INFO,
+ info_3d: stream_renderer_3d_info {
+ width: import_data.info_3d.width,
+ height: import_data.info_3d.height,
+ drm_fourcc: import_data.info_3d.drm_fourcc,
+ strides: import_data.info_3d.strides,
+ offsets: import_data.info_3d.offsets,
+ modifier: import_data.info_3d.modifier,
+ },
+ info_vulkan: Default::default(),
+ };
+
+ // SAFETY:
+ // Safe because gfxstream is initialized by now, and the return value is checked before
+ // returning a new resource. The backing buffers are not supplied with this call.
+ let ret = unsafe {
+ stream_renderer_import_resource(resource_id, &stream_handle, &stream_import_data)
+ };
+ ret_to_res(ret)?;
+
+ Ok(RutabagaResource {
+ resource_id,
+ handle: None,
+ blob: false,
+ blob_mem: 0,
+ blob_flags: 0,
+ map_info: None,
+ info_2d: None,
+ info_3d: None,
+ vulkan_info: None,
+ backing_iovecs: None,
+ component_mask: 1 << (RutabagaComponentType::Gfxstream as u8),
+ size: 0,
+ mapping: None,
+ })
+ }
+
fn attach_backing(
&self,
resource_id: u32,
diff --git a/rutabaga_gfx/src/lib.rs b/rutabaga_gfx/src/lib.rs
index 378c186..bd046e4 100644
--- a/rutabaga_gfx/src/lib.rs
+++ b/rutabaga_gfx/src/lib.rs
@@ -20,6 +20,7 @@
mod rutabaga_gralloc;
mod rutabaga_os;
mod rutabaga_utils;
+mod snapshot;
mod virgl_renderer;
pub use crate::rutabaga_core::calculate_capset_mask;
diff --git a/rutabaga_gfx/src/rutabaga_core.rs b/rutabaga_gfx/src/rutabaga_core.rs
index ec27db4..763e820 100644
--- a/rutabaga_gfx/src/rutabaga_core.rs
+++ b/rutabaga_gfx/src/rutabaga_core.rs
@@ -173,6 +173,29 @@
})
}
+ fn import(
+ &self,
+ resource_id: u32,
+ _import_handle: RutabagaHandle,
+ _import_data: RutabagaImportData,
+ ) -> RutabagaResult<RutabagaResource> {
+ Ok(RutabagaResource {
+ resource_id,
+ handle: None,
+ blob: false,
+ blob_mem: 0,
+ blob_flags: 0,
+ map_info: None,
+ info_2d: None,
+ info_3d: None,
+ vulkan_info: None,
+ backing_iovecs: None,
+ component_mask: 0,
+ size: 0,
+ mapping: None,
+ })
+ }
+
/// Implementations must attach `vecs` to the resource.
fn attach_backing(
&self,
@@ -656,6 +679,28 @@
Ok(())
}
+ /// Creates and imports to a resource with the external `import_handle` and the `import_data`
+ /// metadata.
+ pub fn resource_import(
+ &mut self,
+ resource_id: u32,
+ import_handle: RutabagaHandle,
+ import_data: RutabagaImportData,
+ ) -> RutabagaResult<()> {
+ let component = self
+ .components
+ .get_mut(&self.default_component)
+ .ok_or(RutabagaError::InvalidComponent)?;
+
+ if self.resources.contains_key(&resource_id) {
+ return Err(RutabagaError::InvalidResourceId);
+ }
+
+ let resource = component.import(resource_id, import_handle, import_data)?;
+ self.resources.insert(resource_id, resource);
+ Ok(())
+ }
+
/// Attaches `vecs` to the resource.
pub fn attach_backing(
&mut self,
diff --git a/rutabaga_gfx/src/rutabaga_utils.rs b/rutabaga_gfx/src/rutabaga_utils.rs
index 8104ca9..b331a66 100644
--- a/rutabaga_gfx/src/rutabaga_utils.rs
+++ b/rutabaga_gfx/src/rutabaga_utils.rs
@@ -98,6 +98,7 @@
}
/// Metadata associated with a swapchain, video or camera image.
+#[repr(C)]
#[derive(Default, Copy, Clone, Debug)]
pub struct Resource3DInfo {
pub width: u32,
@@ -183,6 +184,14 @@
pub message: *const c_char,
}
+/// Import Data for resource_import
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct RutabagaImportData {
+ pub flags: u32,
+ pub info_3d: Resource3DInfo,
+}
+
// SAFETY:
// This is sketchy, since `message` is a C-string and there's no locking + atomics. However,
// the current use case is to mirror the C-API. If the `RutabagaDebugHandler` is used with
@@ -312,6 +321,9 @@
NixError(NixError),
#[error("Nul Error occured {0}")]
NulError(NulError),
+ /// An error with a snapshot.
+ #[error("a snapshot error occured: {0}")]
+ SnapshotError(String),
/// Violation of the Rutabaga spec occured.
#[error("violation of the rutabaga spec: {0}")]
SpecViolation(&'static str),
diff --git a/rutabaga_gfx/src/snapshot.rs b/rutabaga_gfx/src/snapshot.rs
new file mode 100644
index 0000000..8d10d83
--- /dev/null
+++ b/rutabaga_gfx/src/snapshot.rs
@@ -0,0 +1,93 @@
+// Copyright 2024 The ChromiumOS Authors
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// TODO: remove in next change.
+#![allow(dead_code)]
+
+use std::fs::File;
+use std::io::BufReader;
+use std::io::BufWriter;
+use std::io::Write;
+use std::path::PathBuf;
+
+use crate::RutabagaError;
+use crate::RutabagaResult;
+
+pub struct RutabagaSnapshotWriter {
+ dir: PathBuf,
+}
+
+impl RutabagaSnapshotWriter {
+ pub fn from_existing(directory: PathBuf) -> Self {
+ Self { dir: directory }
+ }
+
+ pub fn get_path(&self) -> PathBuf {
+ self.dir.clone()
+ }
+
+ pub fn add_namespace(&self, name: &str) -> RutabagaResult<Self> {
+ let directory = self.dir.join(name);
+
+ std::fs::create_dir(&directory).map_err(RutabagaError::IoError)?;
+
+ Ok(Self::from_existing(directory))
+ }
+
+ pub fn add_fragment<T: serde::Serialize>(&self, name: &str, t: &T) -> RutabagaResult<()> {
+ let fragment_path = self.dir.join(name);
+ let fragment_file = File::options()
+ .write(true)
+ .create_new(true)
+ .open(fragment_path)
+ .map_err(|e| {
+ RutabagaError::SnapshotError(format!("failed to add fragment {}: {}", name, e))
+ })?;
+ let mut fragment_writer = BufWriter::new(fragment_file);
+ serde_json::to_writer(&mut fragment_writer, t).map_err(|e| {
+ RutabagaError::SnapshotError(format!("failed to write fragment {}: {}", name, e))
+ })?;
+ fragment_writer.flush().map_err(|e| {
+ RutabagaError::SnapshotError(format!("failed to flush fragment {}: {}", name, e))
+ })?;
+ Ok(())
+ }
+}
+
+pub struct RutabagaSnapshotReader {
+ dir: PathBuf,
+}
+
+impl RutabagaSnapshotReader {
+ pub fn new(directory: PathBuf) -> RutabagaResult<Self> {
+ if !directory.as_path().exists() {
+ return Err(RutabagaError::SnapshotError(format!(
+ "{} does not exist",
+ directory.display()
+ )));
+ }
+
+ Ok(Self { dir: directory })
+ }
+
+ pub fn get_path(&self) -> PathBuf {
+ self.dir.clone()
+ }
+
+ pub fn get_namespace(&self, name: &str) -> RutabagaResult<Self> {
+ let directory = self.dir.join(name);
+ Self::new(directory)
+ }
+
+ pub fn get_fragment<T: serde::de::DeserializeOwned>(&self, name: &str) -> RutabagaResult<T> {
+ let fragment_path = self.dir.join(name);
+ let fragment_file = File::open(fragment_path).map_err(|e| {
+ RutabagaError::SnapshotError(format!("failed to get fragment {}: {}", name, e))
+ })?;
+ let mut fragment_reader = BufReader::new(fragment_file);
+ serde_json::from_reader(&mut fragment_reader).map_err(|e| {
+ RutabagaError::SnapshotError(format!("failed to read fragment {}: {}", name, e))
+ })
+ }
+}
diff --git a/src/crosvm/cmdline.rs b/src/crosvm/cmdline.rs
index c99616f..014afdf 100644
--- a/src/crosvm/cmdline.rs
+++ b/src/crosvm/cmdline.rs
@@ -1792,13 +1792,16 @@
/// PCI parameters.
///
/// Possible key values:
- /// mem=[start=INT,size=INT] - region for non-prefetchable PCI device memory below 4G
+ /// mem=[start=INT,size=INT] - region for non-prefetchable
+ /// PCI device memory below 4G
///
/// Possible key values (aarch64 only):
- /// cam=[start=INT,size=INT] - region for PCI Configuration Access Mechanism
+ /// cam=[start=INT,size=INT] - region for PCI Configuration
+ /// Access Mechanism
///
/// Possible key values (x86_64 only):
- /// ecam=[start=INT,size=INT] - region for PCIe Enhanced Configuration Access Mechanism
+ /// ecam=[start=INT,size=INT] - region for PCIe Enhanced
+ /// Configuration Access Mechanism
pub pci: Option<PciConfig>,
#[cfg(any(target_os = "android", target_os = "linux"))]
diff --git a/src/crosvm/sys/linux.rs b/src/crosvm/sys/linux.rs
index 1923397..d002ac3 100644
--- a/src/crosvm/sys/linux.rs
+++ b/src/crosvm/sys/linux.rs
@@ -663,7 +663,7 @@
#[cfg(feature = "balloon")]
if cfg.balloon {
let balloon_device_tube = if let Some(ref path) = cfg.balloon_control {
- Tube::new_from_unix_seqpacket(UnixSeqpacket::connect(path).with_context(|| {
+ Tube::try_from(UnixSeqpacket::connect(path).with_context(|| {
format!(
"failed to connect to balloon control socket {}",
path.display(),
@@ -2675,7 +2675,7 @@
for (bus_num, hp_bus) in linux.hotplug_bus.iter() {
let mut hp_bus_lock = hp_bus.lock();
if let Some(pci_addr) = hp_bus_lock.get_hotplug_device(hotplug_key) {
- sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
+ sys_allocator.release_pci(pci_addr);
hp_bus_lock.hot_unplug(pci_addr)?;
buses_to_remove.push(child_bus);
if hp_bus_lock.is_empty() {
@@ -2765,7 +2765,7 @@
hp_bus_lock.hot_unplug(pci_addr)?;
}
- sys_allocator.release_pci(pci_addr.bus, pci_addr.dev, pci_addr.func);
+ sys_allocator.release_pci(pci_addr);
if empty_simbling || hp_bus_lock.is_empty() {
if let Some(hotplug_key) = hp_bus_lock.get_hotplug_key() {
removed_key = Some(hotplug_key);
@@ -3400,7 +3400,7 @@
let sock = UnixSeqpacket::connect(addr.clone()).with_context(|| {
format!("failed to connect to registered listening socket {}", addr)
})?;
- let tube = ProtoTube::new_from_unix_seqpacket(sock)?;
+ let tube = ProtoTube::from(Tube::try_from(sock)?);
Ok(AddressedProtoTube {
tube: Rc::new(tube),
socket_addr: addr,
@@ -4068,10 +4068,8 @@
wait_ctx
.add(&socket, Token::VmControl { id })
.context("failed to add descriptor to wait context")?;
- control_tubes.insert(
- id,
- TaggedControlTube::Vm(Tube::new_from_unix_seqpacket(socket)?),
- );
+ control_tubes
+ .insert(id, TaggedControlTube::Vm(Tube::try_from(socket)?));
}
Err(e) => error!("failed to accept socket: {}", e),
}
@@ -4829,7 +4827,7 @@
loop {
match control_server_socket.accept() {
Ok(socket) => {
- let tube = match Tube::new_from_unix_seqpacket(socket) {
+ let tube = match Tube::try_from(socket) {
Ok(tube) => tube,
Err(e) => {
error!("failed to open tube: {:#}", e);
diff --git a/src/crosvm/sys/linux/device_helpers.rs b/src/crosvm/sys/linux/device_helpers.rs
index 821b49a..dad6ac8 100644
--- a/src/crosvm/sys/linux/device_helpers.rs
+++ b/src/crosvm/sys/linux/device_helpers.rs
@@ -1011,6 +1011,7 @@
&jail_config.pivot_root,
&config,
/* render_node_only= */ false,
+ /* snapshot_scratch_path= */ None,
)?;
// Bind mount the wayland socket's directory into jail's root. This is necessary since
// each new wayland context must open() the socket. If the wayland socket is ever
diff --git a/src/crosvm/sys/linux/gpu.rs b/src/crosvm/sys/linux/gpu.rs
index fcad670..7bb3ff7 100644
--- a/src/crosvm/sys/linux/gpu.rs
+++ b/src/crosvm/sys/linux/gpu.rs
@@ -94,6 +94,10 @@
let is_sandboxed = cfg.jail_config.is_some();
let mut gpu_params = cfg.gpu_parameters.clone().unwrap();
+ if is_sandboxed {
+ gpu_params.snapshot_scratch_path = Some(Path::new("/tmpfs-gpu-snapshot").to_path_buf());
+ }
+
if gpu_params.fixed_blob_mapping {
if has_vfio_gfx_device {
// TODO(b/323368701): make fixed_blob_mapping compatible with vfio dma_buf mapping for
@@ -163,6 +167,7 @@
&jail_config.pivot_root,
&config,
/* render_node_only= */ false,
+ gpu_params.snapshot_scratch_path.as_deref(),
)?;
// Prepare GPU shader disk cache directory.
@@ -299,6 +304,7 @@
&jail_config.pivot_root,
&config,
/* render_node_only= */ true,
+ /* snapshot_scratch_path= */ None,
)?;
let cache_info = get_gpu_cache_info(
diff --git a/src/crosvm/sys/linux/pci_hotplug_manager.rs b/src/crosvm/sys/linux/pci_hotplug_manager.rs
index efd7ee4..b0473dc 100644
--- a/src/crosvm/sys/linux/pci_hotplug_manager.rs
+++ b/src/crosvm/sys/linux/pci_hotplug_manager.rs
@@ -909,11 +909,7 @@
for (downstream_address, recoverable_resource) in port_stub.devices.drain() {
// port_stub.port does not have remove_hotplug_device method, as devices are removed
// when hot_unplug is called.
- resources.release_pci(
- downstream_address.bus,
- downstream_address.dev,
- downstream_address.func,
- );
+ resources.release_pci(downstream_address);
linux.irq_chip.unregister_level_irq_event(
recoverable_resource.irq_num,
&recoverable_resource.irq_evt,
diff --git a/src/sys.rs b/src/sys.rs
index 76d0590..ee8a9ee 100644
--- a/src/sys.rs
+++ b/src/sys.rs
@@ -12,6 +12,8 @@
use windows as platform;
pub(crate) use windows::ExitState;
pub(crate) use windows::run_config;
+ #[cfg(feature = "sandbox")]
+ pub(crate) use windows::main::sandbox_lower_token;
} else {
compile_error!("Unsupported platform");
}
@@ -22,8 +24,6 @@
pub(crate) use platform::main::get_library_watcher;
pub(crate) use platform::main::init_log;
pub(crate) use platform::main::run_command;
-#[cfg(feature = "sandbox")]
-pub(crate) use platform::main::sandbox_lower_token;
pub(crate) use platform::main::start_device;
#[cfg(not(feature = "crash-report"))]
pub(crate) use platform::set_panic_hook;
diff --git a/src/sys/windows/control_server.rs b/src/sys/windows/control_server.rs
index a814aa4..06a885c 100644
--- a/src/sys/windows/control_server.rs
+++ b/src/sys/windows/control_server.rs
@@ -35,7 +35,6 @@
use sync::Mutex;
use vm_control::VmRequest;
use vm_control::VmResponse;
-use winapi::shared::winerror::ERROR_MORE_DATA;
/// Windows named pipes don't fit in well with the control loop (`run_control`) the way sockets do
/// on unix, so this struct provides a compatibility layer (named pipe server) that functions very
@@ -356,8 +355,11 @@
{
println!("server: starting client 1");
control_server.client_waiting().wait().unwrap();
+ println!("server: woke on client 1");
let client1 = control_server.accept();
+ println!("server: accepted client 1");
let req: VmRequest = client1.0.recv().unwrap();
+ println!("server: got req from client 1");
assert!(matches!(req, VmRequest::Powerbtn));
client1.0.send(&VmResponse::Ok).unwrap();
}
@@ -367,8 +369,11 @@
{
println!("server: starting client 2");
control_server.client_waiting().wait().unwrap();
+ println!("server: woke on client 2");
let client2 = control_server.accept();
+ println!("server: accepted client 2");
let req: VmRequest = client2.0.recv().unwrap();
+ println!("server: got req from client 2");
assert!(matches!(req, VmRequest::Exit));
client2
.0
@@ -383,6 +388,7 @@
println!("client: starting client 1");
let client1 = create_client(&pipe_name);
client1.send(&VmRequest::Powerbtn).unwrap();
+ println!("client: sent client 1 request");
assert!(matches!(client1.recv().unwrap(), VmResponse::Ok));
println!("client: finished client 1");
}
@@ -391,6 +397,7 @@
println!("client: starting client 2");
let client2 = create_client(&pipe_name);
client2.send(&VmRequest::Exit).unwrap();
+ println!("client: sent client 2 request");
let resp = VmResponse::ErrString("err".to_owned());
assert!(matches!(client2.recv::<VmResponse>().unwrap(), resp,));
println!("client: finished client 2");
diff --git a/vm_control/src/api.rs b/vm_control/src/api.rs
index 500c9ef..7b550c4 100644
--- a/vm_control/src/api.rs
+++ b/vm_control/src/api.rs
@@ -155,40 +155,6 @@
self.request_unit(&VmMemoryRequest::UnregisterMemory(region))
}
- /// Register an ioeventfd by looking up using Alloc info.
- pub fn register_io_event_with_alloc(
- &self,
- evt: Event,
- allocation: Alloc,
- offset: u64,
- datamatch: Datamatch,
- ) -> Result<()> {
- self.request_unit(&VmMemoryRequest::IoEventWithAlloc {
- evt,
- allocation,
- offset,
- datamatch,
- register: true,
- })
- }
-
- /// Unregister an eventfd by looking up using Alloc info.
- pub fn unregister_io_event_with_alloc(
- &self,
- evt: Event,
- allocation: Alloc,
- offset: u64,
- datamatch: Datamatch,
- ) -> Result<()> {
- self.request_unit(&VmMemoryRequest::IoEventWithAlloc {
- evt,
- allocation,
- offset,
- datamatch,
- register: false,
- })
- }
-
/// Register an eventfd with raw guest memory address.
pub fn register_io_event(&self, event: Event, addr: u64, datamatch: Datamatch) -> Result<()> {
self.request_unit(&VmMemoryRequest::IoEventRaw(IoEventUpdateRequest {
diff --git a/vm_control/src/lib.rs b/vm_control/src/lib.rs
index 5f91cb5..84644fe 100644
--- a/vm_control/src/lib.rs
+++ b/vm_control/src/lib.rs
@@ -609,14 +609,6 @@
BalloonTargetReached { size: u64 },
/// Unregister the given memory slot that was previously registered with `RegisterMemory`.
UnregisterMemory(VmMemoryRegionId),
- /// Register an ioeventfd by looking up using Alloc info.
- IoEventWithAlloc {
- evt: Event,
- allocation: Alloc,
- offset: u64,
- datamatch: Datamatch,
- register: bool,
- },
/// Register an eventfd with raw guest memory address.
IoEventRaw(IoEventUpdateRequest),
}
@@ -995,40 +987,6 @@
Err(e) => VmMemoryResponse::Err(e),
}
}
- IoEventWithAlloc {
- evt,
- allocation,
- offset,
- datamatch,
- register,
- } => {
- let len = match datamatch {
- Datamatch::AnyLength => 1,
- Datamatch::U8(_) => 1,
- Datamatch::U16(_) => 2,
- Datamatch::U32(_) => 4,
- Datamatch::U64(_) => 8,
- };
- let addr = match sys_allocator
- .mmio_allocator_any()
- .address_from_pci_offset(allocation, offset, len)
- {
- Ok(addr) => addr,
- Err(e) => {
- error!("error getting target address: {:#}", e);
- return VmMemoryResponse::Err(SysError::new(EINVAL));
- }
- };
- let res = if register {
- vm.register_ioevent(&evt, IoEventAddress::Mmio(addr), datamatch)
- } else {
- vm.unregister_ioevent(&evt, IoEventAddress::Mmio(addr), datamatch)
- };
- match res {
- Ok(_) => VmMemoryResponse::Ok,
- Err(e) => VmMemoryResponse::Err(e),
- }
- }
IoEventRaw(request) => {
let res = if request.register {
vm.register_ioevent(
@@ -1375,21 +1333,6 @@
}
}
-/// Configuration of fake battery status information.
-#[derive(Serialize, Deserialize, Debug, Default)]
-pub enum BatConfig {
- // Propagates host's battery status
- #[default]
- Real,
- // Fake on battery status. Simulates a disconnected AC adapter.
- // This forces ac_online to false and sets the battery status
- // to DISCHARGING
- Fake {
- // Sets the maximum battery capacity reported to the guest
- max_capacity: u32,
- },
-}
-
#[derive(Serialize, Deserialize, Debug)]
pub enum BatControlCommand {
SetStatus(BatStatus),
diff --git a/vm_control/src/sys/linux.rs b/vm_control/src/sys/linux.rs
index 5472b0c..7eb97ba 100644
--- a/vm_control/src/sys/linux.rs
+++ b/vm_control/src/sys/linux.rs
@@ -49,7 +49,7 @@
) -> HandleRequestResult {
match UnixSeqpacket::connect(&socket_path) {
Ok(s) => {
- let socket = Tube::new_from_unix_seqpacket(s).map_err(|_| ())?;
+ let socket = Tube::try_from(s).map_err(|_| ())?;
if timeout.is_some() {
if let Err(e) = socket.set_recv_timeout(timeout) {
error!(
diff --git a/vm_memory/src/guest_memory.rs b/vm_memory/src/guest_memory.rs
index a6b30e9..634c706 100644
--- a/vm_memory/src/guest_memory.rs
+++ b/vm_memory/src/guest_memory.rs
@@ -115,10 +115,16 @@
#[sorted]
#[derive(Clone, Copy, Debug, Default, PartialOrd, PartialEq, Eq, Ord)]
pub enum MemoryRegionPurpose {
- // General purpose guest memory
+ /// BIOS/firmware ROM
+ Bios,
+
+ /// General purpose guest memory
#[default]
GuestMemoryRegion,
+
+ /// PVMFW
ProtectedFirmwareRegion,
+
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
StaticSwiotlbRegion,
}
diff --git a/x86_64/src/lib.rs b/x86_64/src/lib.rs
index ebd5831..6634361 100644
--- a/x86_64/src/lib.rs
+++ b/x86_64/src/lib.rs
@@ -241,6 +241,8 @@
LoadKernel(kernel_loader::Error),
#[error("error loading pflash: {0}")]
LoadPflash(io::Error),
+ #[error("error loading pVM firmware: {0}")]
+ LoadPvmFw(base::Error),
#[error("error translating address: Page not present")]
PageNotPresent,
#[error("pci mmio overlaps with pVM firmware memory")]
@@ -291,6 +293,8 @@
SetupSmbios(smbios::Error),
#[error("failed to set up sregs: {0}")]
SetupSregs(base::Error),
+ #[error("too many vCPUs")]
+ TooManyVcpus,
#[error("failed to translate virtual address")]
TranslatingVirtAddr,
#[error("protected VMs not supported on x86_64")]
@@ -353,7 +357,6 @@
const GB: u64 = 1 << 30;
pub const BOOT_STACK_POINTER: u64 = 0x8000;
-const START_OF_RAM_32BITS: u64 = 0;
const FIRST_ADDR_PAST_20BITS: u64 = 1 << 20;
const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32;
// Make sure it align to 256MB for MTRR convenient
@@ -488,13 +491,10 @@
})
}
-fn max_ram_end_before_32bit(
- arch_memory_layout: &ArchMemoryLayout,
- has_protected_vm_firmware: bool,
-) -> u64 {
+fn max_ram_end_before_32bit(arch_memory_layout: &ArchMemoryLayout) -> u64 {
let pci_start = arch_memory_layout.pci_mmio_before_32bit.start;
- if has_protected_vm_firmware {
- pci_start.min(PROTECTED_VM_FW_START)
+ if let Some(pvmfw_mem) = arch_memory_layout.pvmfw_mem {
+ pci_start.min(pvmfw_mem.start)
} else {
pci_start
}
@@ -673,7 +673,6 @@
ram_below_1m: AddressRange,
ram_below_4g: AddressRange,
ram_above_4g: AddressRange,
- has_protected_vm_firmware: bool,
) -> Result<Vec<E820Entry>> {
let mut e820_entries = Vec::new();
@@ -683,12 +682,11 @@
add_e820_entry(&mut e820_entries, ram_above_4g, E820Type::Ram)?
}
- if has_protected_vm_firmware {
+ if let Some(pvmfw_mem) = arch_memory_layout.pvmfw_mem {
// After the pVM firmware jumped to the guest, the pVM firmware itself
// is no longer running, so its memory is reusable by the guest OS.
// So add this memory as RAM rather than Reserved.
- let pvmfw_range = arch_memory_layout.pvmfw_mem.unwrap();
- add_e820_entry(&mut e820_entries, pvmfw_range, E820Type::Ram)?;
+ add_e820_entry(&mut e820_entries, pvmfw_mem, E820Type::Ram)?;
}
let pcie_cfg_mmio_range = arch_memory_layout.pcie_cfg_mmio;
@@ -721,15 +719,14 @@
arch_memory_layout: &ArchMemoryLayout,
size: u64,
bios_size: Option<u64>,
- has_protected_vm_firmware: bool,
) -> Vec<(GuestAddress, u64, MemoryRegionOptions)> {
let mut mem_size = size;
let mut regions = Vec::new();
- if has_protected_vm_firmware {
+ if let Some(pvmfw_mem) = arch_memory_layout.pvmfw_mem {
regions.push((
- GuestAddress(PROTECTED_VM_FW_START),
- PROTECTED_VM_FW_MAX_SIZE,
+ GuestAddress(pvmfw_mem.start),
+ pvmfw_mem.len().expect("invalid pvmfw_mem region"),
MemoryRegionOptions::new().purpose(MemoryRegionPurpose::ProtectedFirmwareRegion),
));
@@ -741,36 +738,41 @@
}
}
- let mem_start = START_OF_RAM_32BITS;
- let mem_end = GuestAddress(mem_size + mem_start);
-
- let first_addr_past_32bits = GuestAddress(FIRST_ADDR_PAST_32BITS);
- let max_end_32bits = GuestAddress(max_ram_end_before_32bit(
- arch_memory_layout,
- has_protected_vm_firmware,
+ let mem_below_4g = max_ram_end_before_32bit(arch_memory_layout).min(mem_size);
+ regions.push((
+ GuestAddress(0),
+ mem_below_4g,
+ MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
));
- if mem_end <= max_end_32bits {
- regions.push((GuestAddress(mem_start), mem_size, Default::default()));
- if let Some(bios_size) = bios_size {
- regions.push((bios_start(bios_size), bios_size, Default::default()));
- }
- } else {
+ let mem_above_4g = mem_size.saturating_sub(mem_below_4g);
+ if mem_above_4g > 0 {
regions.push((
- GuestAddress(mem_start),
- max_end_32bits.offset() - mem_start,
- Default::default(),
+ GuestAddress(FIRST_ADDR_PAST_32BITS),
+ mem_above_4g,
+ MemoryRegionOptions::new().purpose(MemoryRegionPurpose::GuestMemoryRegion),
));
- if let Some(bios_size) = bios_size {
- regions.push((bios_start(bios_size), bios_size, Default::default()));
- }
+ }
+
+ if let Some(bios_size) = bios_size {
regions.push((
- first_addr_past_32bits,
- mem_end.offset_from(max_end_32bits),
- Default::default(),
+ bios_start(bios_size),
+ bios_size,
+ MemoryRegionOptions::new().purpose(MemoryRegionPurpose::Bios),
));
}
+ regions.sort_unstable();
+
+ for (addr, size, options) in ®ions {
+ debug!(
+ "{:#018x}-{:#018x} {:?}",
+ addr.offset(),
+ addr.offset() + size - 1,
+ options.purpose,
+ );
+ }
+
regions
}
@@ -792,8 +794,6 @@
arch_memory_layout: &Self::ArchMemoryLayout,
_hypervisor: &impl Hypervisor,
) -> std::result::Result<Vec<(GuestAddress, u64, MemoryRegionOptions)>, Self::Error> {
- let has_protected_vm_firmware = components.hv_cfg.protection_type.runs_firmware();
-
let bios_size = match &components.vm_image {
VmImage::Bios(bios_file) => Some(bios_file.metadata().map_err(Error::LoadBios)?.len()),
VmImage::Kernel(_) => None,
@@ -803,7 +803,6 @@
arch_memory_layout,
components.memory_size,
bios_size,
- has_protected_vm_firmware,
))
}
@@ -992,7 +991,6 @@
irq_chip,
device_tube,
components.memory_size,
- components.hv_cfg.protection_type.runs_firmware(),
)
.map_err(Error::SetupCmos)?;
Some(host_tube)
@@ -1083,6 +1081,12 @@
// If another guest does need a way to pass these tables down to it's BIOS, this approach
// should be rethought.
+ // Make sure the `vcpu_count` casts below and the arithmetic in `setup_mptable` are well
+ // defined.
+ if vcpu_count >= u8::max_value().into() {
+ return Err(Error::TooManyVcpus);
+ }
+
if mptable {
// Note that this puts the mptable at 0x9FC00 in guest physical memory.
mptable::setup_mptable(&mem, vcpu_count as u8, &pci_irqs)
@@ -1167,7 +1171,6 @@
params,
dump_device_tree_blob,
device_tree_overlays,
- protection_type.runs_firmware(),
)?;
if protection_type.needs_firmware_loaded() {
@@ -1180,15 +1183,26 @@
PROTECTED_VM_FW_MAX_SIZE,
)
.map_err(Error::LoadCustomPvmFw)?;
+ } else if protection_type.runs_firmware() {
+ // Tell the hypervisor to load the pVM firmware.
+ vm.load_protected_vm_firmware(
+ GuestAddress(PROTECTED_VM_FW_START),
+ PROTECTED_VM_FW_MAX_SIZE,
+ )
+ .map_err(Error::LoadPvmFw)?;
}
- let entry_addr = if protection_type.runs_firmware() {
- PROTECTED_VM_FW_START
+ let entry_addr = if protection_type.needs_firmware_loaded() {
+ Some(PROTECTED_VM_FW_START)
+ } else if protection_type.runs_firmware() {
+ None // Initial RIP value is set by the hypervisor
} else {
- kernel_entry.offset()
+ Some(kernel_entry.offset())
};
- vcpu_init[0].regs.rip = entry_addr;
+ if let Some(entry) = entry_addr {
+ vcpu_init[0].regs.rip = entry;
+ }
match kernel_type {
KernelType::BzImage | KernelType::Elf => {
@@ -1630,7 +1644,6 @@
params: boot_params,
dump_device_tree_blob: Option<PathBuf>,
device_tree_overlays: Vec<DtbOverlay>,
- has_protected_vm_firmware: bool,
) -> Result<()> {
// Some guest kernels expect a typical PC memory layout where the region between 640 KB and
// 1 MB is reserved for device memory/ROMs and get confused if there is a RAM region
@@ -1638,7 +1651,7 @@
// high memory regions.
let ram_below_1m_end = 640 * 1024;
let ram_below_1m = AddressRange {
- start: START_OF_RAM_32BITS,
+ start: 0,
end: ram_below_1m_end - 1,
};
@@ -1648,8 +1661,7 @@
// Find the end of the part of guest memory below 4G that is not pVM firmware memory.
// This part of guest memory includes just one region, so just find the end of this region.
- let max_ram_end_below_4g =
- max_ram_end_before_32bit(arch_memory_layout, has_protected_vm_firmware) - 1;
+ let max_ram_end_below_4g = max_ram_end_before_32bit(arch_memory_layout) - 1;
let guest_mem_end_below_4g = mem
.regions()
.map(|r| r.guest_addr.offset() + r.size as u64 - 1)
@@ -1671,7 +1683,6 @@
ram_below_1m,
ram_below_4g,
ram_above_4g,
- has_protected_vm_firmware,
)?;
let kernel_max_cmdline_len = if params.hdr.cmdline_size == 0 {
@@ -1883,14 +1894,8 @@
irq_chip: &mut dyn IrqChipX86_64,
vm_control: Tube,
mem_size: u64,
- has_protected_vm_firmware: bool,
) -> anyhow::Result<()> {
- let mem_regions = arch_memory_regions(
- arch_memory_layout,
- mem_size,
- None,
- has_protected_vm_firmware,
- );
+ let mem_regions = arch_memory_regions(arch_memory_layout, mem_size, None);
let mem_below_4g = mem_regions
.iter()
@@ -2388,8 +2393,6 @@
use super::*;
- const TEST_MEMORY_SIZE: u64 = 2 * GB;
-
fn setup() -> ArchMemoryLayout {
let pci_config = PciConfig {
ecam: Some(MemoryRegionConfig {
@@ -2407,87 +2410,128 @@
#[test]
fn regions_lt_4gb_nobios() {
let arch_memory_layout = setup();
- let regions = arch_memory_regions(
- &arch_memory_layout,
- 512 * MB,
- /* bios_size */ None,
- /* has_protected_vm_firmware */ false,
+ let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, /* bios_size */ None);
+ assert_eq!(
+ regions,
+ [(
+ GuestAddress(0),
+ 1u64 << 29,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ )]
);
- assert_eq!(1, regions.len());
- assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
- assert_eq!(1u64 << 29, regions[0].1);
}
#[test]
fn regions_gt_4gb_nobios() {
let arch_memory_layout = setup();
let size = 4 * GB + 0x8000;
- let regions = arch_memory_regions(
- &arch_memory_layout,
- size,
- /* bios_size */ None,
- /* has_protected_vm_firmware */ false,
+ let regions = arch_memory_regions(&arch_memory_layout, size, /* bios_size */ None);
+ assert_eq!(
+ regions,
+ [
+ (
+ GuestAddress(0),
+ 2 * GB,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ ),
+ (
+ GuestAddress(4 * GB),
+ 2 * GB + 0x8000,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ ),
+ ]
);
- assert_eq!(2, regions.len());
- assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
- assert_eq!(GuestAddress(4 * GB), regions[1].0);
- assert_eq!(4 * GB + 0x8000, regions[0].1 + regions[1].1);
}
#[test]
fn regions_lt_4gb_bios() {
let arch_memory_layout = setup();
let bios_len = 1 * MB;
- let regions = arch_memory_regions(
- &arch_memory_layout,
- 512 * MB,
- Some(bios_len),
- /* has_protected_vm_firmware */ false,
- );
- assert_eq!(2, regions.len());
- assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
- assert_eq!(512 * MB, regions[0].1);
+ let regions = arch_memory_regions(&arch_memory_layout, 512 * MB, Some(bios_len));
assert_eq!(
- GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
- regions[1].0
+ regions,
+ [
+ (
+ GuestAddress(0),
+ 512 * MB,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ ),
+ (
+ GuestAddress(4 * GB - bios_len),
+ bios_len,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::Bios,
+ },
+ ),
+ ]
);
- assert_eq!(bios_len, regions[1].1);
}
#[test]
fn regions_gt_4gb_bios() {
let arch_memory_layout = setup();
let bios_len = 1 * MB;
- let regions = arch_memory_regions(
- &arch_memory_layout,
- 4 * GB + 0x8000,
- Some(bios_len),
- /* has_protected_vm_firmware */ false,
- );
- assert_eq!(3, regions.len());
- assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
+ let regions = arch_memory_regions(&arch_memory_layout, 4 * GB + 0x8000, Some(bios_len));
assert_eq!(
- GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
- regions[1].0
+ regions,
+ [
+ (
+ GuestAddress(0),
+ 2 * GB,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ ),
+ (
+ GuestAddress(4 * GB - bios_len),
+ bios_len,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::Bios,
+ },
+ ),
+ (
+ GuestAddress(4 * GB),
+ 2 * GB + 0x8000,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ ),
+ ]
);
- assert_eq!(bios_len, regions[1].1);
- assert_eq!(GuestAddress(4 * GB), regions[2].0);
}
#[test]
fn regions_eq_4gb_nobios() {
let arch_memory_layout = setup();
// Test with exact size of 4GB - the overhead.
- let regions = arch_memory_regions(
- &arch_memory_layout,
- TEST_MEMORY_SIZE - START_OF_RAM_32BITS,
- /* bios_size */ None,
- /* has_protected_vm_firmware */ false,
+ let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, /* bios_size */ None);
+ assert_eq!(
+ regions,
+ [(
+ GuestAddress(0),
+ 2 * GB,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ )]
);
- dbg!(®ions);
- assert_eq!(1, regions.len());
- assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
- assert_eq!(TEST_MEMORY_SIZE - START_OF_RAM_32BITS, regions[0].1);
}
#[test]
@@ -2495,20 +2539,28 @@
let arch_memory_layout = setup();
// Test with exact size of 4GB - the overhead.
let bios_len = 1 * MB;
- let regions = arch_memory_regions(
- &arch_memory_layout,
- TEST_MEMORY_SIZE - START_OF_RAM_32BITS,
- Some(bios_len),
- /* has_protected_vm_firmware */ false,
- );
- assert_eq!(2, regions.len());
- assert_eq!(GuestAddress(START_OF_RAM_32BITS), regions[0].0);
- assert_eq!(TEST_MEMORY_SIZE - START_OF_RAM_32BITS, regions[0].1);
+ let regions = arch_memory_regions(&arch_memory_layout, 2 * GB, Some(bios_len));
assert_eq!(
- GuestAddress(FIRST_ADDR_PAST_32BITS - bios_len),
- regions[1].0
+ regions,
+ [
+ (
+ GuestAddress(0),
+ 2 * GB,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::GuestMemoryRegion,
+ },
+ ),
+ (
+ GuestAddress(4 * GB - bios_len),
+ bios_len,
+ MemoryRegionOptions {
+ align: 0,
+ purpose: MemoryRegionPurpose::Bios,
+ },
+ ),
+ ]
);
- assert_eq!(bios_len, regions[1].1);
}
#[test]
diff --git a/x86_64/src/regs.rs b/x86_64/src/regs.rs
index bb7c1b5..e9d62b9 100644
--- a/x86_64/src/regs.rs
+++ b/x86_64/src/regs.rs
@@ -186,8 +186,6 @@
const BOOT_GDT_OFFSET: u64 = 0x1500;
const BOOT_IDT_OFFSET: u64 = 0x1528;
-const BOOT_GDT_MAX: usize = 5;
-
fn write_gdt_table(table: &[u64], guest_mem: &GuestMemory) -> Result<()> {
let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET);
for (index, entry) in table.iter().enumerate() {
@@ -215,12 +213,13 @@
/// Configures the GDT, IDT, and segment registers for long mode.
pub fn configure_segments_and_sregs(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
// reference: https://docs.kernel.org/arch/x86/boot.html?highlight=__BOOT_CS#id1
- let gdt_table: [u64; BOOT_GDT_MAX] = [
+ let gdt_table: [u64; 6] = [
gdt::gdt_entry(0, 0, 0), // NULL
gdt::gdt_entry(0, 0, 0), // NULL
gdt::gdt_entry(0xa09b, 0, 0xfffff), // CODE
gdt::gdt_entry(0xc093, 0, 0xfffff), // DATA
gdt::gdt_entry(0x808b, 0, 0xfffff), // TSS
+ 0, // TSS (upper 32 bits of base)
];
let code_seg = gdt::segment_from_gdt(gdt_table[2], 2);
@@ -254,7 +253,7 @@
/// Configures the GDT, IDT, and segment registers for 32-bit protected mode with paging disabled.
pub fn configure_segments_and_sregs_flat32(mem: &GuestMemory, sregs: &mut Sregs) -> Result<()> {
// reference: https://docs.kernel.org/arch/x86/boot.html?highlight=__BOOT_CS#id1
- let gdt_table: [u64; BOOT_GDT_MAX] = [
+ let gdt_table: [u64; 5] = [
gdt::gdt_entry(0, 0, 0), // NULL
gdt::gdt_entry(0, 0, 0), // NULL
gdt::gdt_entry(0xc09b, 0, 0xfffff), // CODE