| // Copyright 2019 The ChromiumOS Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| //! VM disk image file format I/O. |
| |
| use std::cmp::min; |
| use std::fmt::Debug; |
| use std::fs::File; |
| use std::io; |
| use std::io::Seek; |
| use std::io::SeekFrom; |
| use std::path::Path; |
| use std::sync::Arc; |
| |
| use async_trait::async_trait; |
| use base::get_filesystem_type; |
| use base::info; |
| use base::AsRawDescriptors; |
| use base::FileAllocate; |
| use base::FileReadWriteAtVolatile; |
| use base::FileSetLen; |
| use cros_async::BackingMemory; |
| use cros_async::Executor; |
| use cros_async::IoSource; |
| use cros_async::MemRegionIter; |
| use thiserror::Error as ThisError; |
| |
| mod asynchronous; |
| #[allow(unused)] |
| pub(crate) use asynchronous::AsyncDiskFileWrapper; |
| #[cfg(feature = "qcow")] |
| mod qcow; |
| #[cfg(feature = "qcow")] |
| pub use qcow::QcowFile; |
| #[cfg(feature = "qcow")] |
| pub use qcow::QCOW_MAGIC; |
| mod sys; |
| |
| #[cfg(feature = "composite-disk")] |
| mod composite; |
| #[cfg(feature = "composite-disk")] |
| use composite::CompositeDiskFile; |
| #[cfg(feature = "composite-disk")] |
| use composite::CDISK_MAGIC; |
| #[cfg(feature = "composite-disk")] |
| mod gpt; |
| #[cfg(feature = "composite-disk")] |
| pub use composite::create_composite_disk; |
| #[cfg(feature = "composite-disk")] |
| pub use composite::create_zero_filler; |
| #[cfg(feature = "composite-disk")] |
| pub use composite::Error as CompositeError; |
| #[cfg(feature = "composite-disk")] |
| pub use composite::ImagePartitionType; |
| #[cfg(feature = "composite-disk")] |
| pub use composite::PartitionInfo; |
| #[cfg(feature = "composite-disk")] |
| pub use gpt::Error as GptError; |
| |
| #[cfg(feature = "android-sparse")] |
| mod android_sparse; |
| #[cfg(feature = "android-sparse")] |
| use android_sparse::AndroidSparse; |
| #[cfg(feature = "android-sparse")] |
| use android_sparse::SPARSE_HEADER_MAGIC; |
| use sys::read_from_disk; |
| |
| /// Nesting depth limit for disk formats that can open other disk files. |
| pub const MAX_NESTING_DEPTH: u32 = 10; |
| |
| #[derive(ThisError, Debug)] |
| pub enum Error { |
| #[error("failed to create block device: {0}")] |
| BlockDeviceNew(base::Error), |
| #[error("requested file conversion not supported")] |
| ConversionNotSupported, |
| #[cfg(feature = "android-sparse")] |
| #[error("failure in android sparse disk: {0}")] |
| CreateAndroidSparseDisk(android_sparse::Error), |
| #[cfg(feature = "composite-disk")] |
| #[error("failure in composite disk: {0}")] |
| CreateCompositeDisk(composite::Error), |
| #[error("failure creating single file disk: {0}")] |
| CreateSingleFileDisk(cros_async::AsyncError), |
| #[error("failure with fdatasync: {0}")] |
| Fdatasync(cros_async::AsyncError), |
| #[error("failure with fsync: {0}")] |
| Fsync(cros_async::AsyncError), |
| #[error("failure with fdatasync: {0}")] |
| IoFdatasync(io::Error), |
| #[error("failure with flush: {0}")] |
| IoFlush(io::Error), |
| #[error("failure with fsync: {0}")] |
| IoFsync(io::Error), |
| #[error("failure to punch hole: {0}")] |
| IoPunchHole(io::Error), |
| #[error("checking host fs type: {0}")] |
| HostFsType(base::Error), |
| #[error("maximum disk nesting depth exceeded")] |
| MaxNestingDepthExceeded, |
| #[error("failure to punch hole: {0}")] |
| PunchHole(cros_async::AsyncError), |
| #[error("failure to punch hole for block device file: {0}")] |
| PunchHoleBlockDeviceFile(base::Error), |
| #[cfg(feature = "qcow")] |
| #[error("failure in qcow: {0}")] |
| QcowError(qcow::Error), |
| #[error("failed to read data: {0}")] |
| ReadingData(io::Error), |
| #[error("failed to read header: {0}")] |
| ReadingHeader(io::Error), |
| #[error("failed to read to memory: {0}")] |
| ReadToMem(cros_async::AsyncError), |
| #[error("failed to seek file: {0}")] |
| SeekingFile(io::Error), |
| #[error("failed to set file size: {0}")] |
| SettingFileSize(io::Error), |
| #[error("unknown disk type")] |
| UnknownType, |
| #[error("failed to write from memory: {0}")] |
| WriteFromMem(cros_async::AsyncError), |
| #[error("failed to write from vec: {0}")] |
| WriteFromVec(cros_async::AsyncError), |
| #[error("failed to write zeroes: {0}")] |
| WriteZeroes(io::Error), |
| #[error("failed to write data: {0}")] |
| WritingData(io::Error), |
| #[error("failed to convert to async: {0}")] |
| ToAsync(cros_async::AsyncError), |
| #[cfg(windows)] |
| #[error("failed to set disk file sparse: {0}")] |
| SetSparseFailure(io::Error), |
| #[error("failure with guest memory access: {0}")] |
| GuestMemory(cros_async::mem::Error), |
| #[error("unsupported operation")] |
| UnsupportedOperation, |
| } |
| |
| pub type Result<T> = std::result::Result<T, Error>; |
| |
| /// A trait for getting the length of a disk image or raw block device. |
| pub trait DiskGetLen { |
| /// Get the current length of the disk in bytes. |
| fn get_len(&self) -> io::Result<u64>; |
| } |
| |
| impl DiskGetLen for File { |
| fn get_len(&self) -> io::Result<u64> { |
| let mut s = self; |
| let orig_seek = s.stream_position()?; |
| let end = s.seek(SeekFrom::End(0))?; |
| s.seek(SeekFrom::Start(orig_seek))?; |
| Ok(end) |
| } |
| } |
| |
| /// The prerequisites necessary to support a block device. |
| pub trait DiskFile: |
| FileSetLen + DiskGetLen + FileReadWriteAtVolatile + ToAsyncDisk + Send + AsRawDescriptors + Debug |
| { |
| /// Creates a new DiskFile instance that shares the same underlying disk file image. IO |
| /// operations to a DiskFile should affect all DiskFile instances with the same underlying disk |
| /// file image. |
| /// |
| /// `try_clone()` returns [`io::ErrorKind::Unsupported`] Error if a DiskFile does not support |
| /// creating an instance with the same underlying disk file image. |
| fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> { |
| Err(io::Error::new( |
| io::ErrorKind::Unsupported, |
| "unsupported operation", |
| )) |
| } |
| } |
| |
| /// A `DiskFile` that can be converted for asychronous access. |
| pub trait ToAsyncDisk: AsRawDescriptors + DiskGetLen + Send { |
| /// Convert a boxed self in to a box-wrapped implementaiton of AsyncDisk. |
| /// Used to convert a standard disk image to an async disk image. This conversion and the |
| /// inverse are needed so that the `Send` DiskImage can be given to the block thread where it is |
| /// converted to a non-`Send` AsyncDisk. The AsyncDisk can then be converted back and returned |
| /// to the main device thread if the block device is destroyed or reset. |
| fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>>; |
| } |
| |
| impl ToAsyncDisk for File { |
| fn to_async_disk(self: Box<Self>, ex: &Executor) -> Result<Box<dyn AsyncDisk>> { |
| Ok(Box::new(SingleFileDisk::new(*self, ex)?)) |
| } |
| } |
| |
| /// The variants of image files on the host that can be used as virtual disks. |
| #[derive(Debug, PartialEq, Eq)] |
| pub enum ImageType { |
| Raw, |
| Qcow2, |
| CompositeDisk, |
| AndroidSparse, |
| } |
| |
| fn log_host_fs_type(file: &File) -> Result<()> { |
| let fstype = get_filesystem_type(file).map_err(Error::HostFsType)?; |
| info!("Disk image file is hosted on file system type {:x}", fstype); |
| Ok(()) |
| } |
| |
| /// Detect the type of an image file by checking for a valid header of the supported formats. |
| pub fn detect_image_type(file: &File, overlapped_mode: bool) -> Result<ImageType> { |
| let mut f = file; |
| let disk_size = f.get_len().map_err(Error::SeekingFile)?; |
| let orig_seek = f.stream_position().map_err(Error::SeekingFile)?; |
| |
| info!("disk size {}, ", disk_size); |
| log_host_fs_type(f)?; |
| // Try to read the disk in a nicely-aligned block size unless the whole file is smaller. |
| const MAGIC_BLOCK_SIZE: usize = 4096; |
| #[repr(align(4096))] |
| struct BlockAlignedBuffer { |
| data: [u8; MAGIC_BLOCK_SIZE], |
| } |
| let mut magic = BlockAlignedBuffer { |
| data: [0u8; MAGIC_BLOCK_SIZE], |
| }; |
| let magic_read_len = if disk_size > MAGIC_BLOCK_SIZE as u64 { |
| MAGIC_BLOCK_SIZE |
| } else { |
| // This cast is safe since we know disk_size is less than MAGIC_BLOCK_SIZE (4096) and |
| // therefore is representable in usize. |
| disk_size as usize |
| }; |
| |
| read_from_disk(f, 0, &mut magic.data[0..magic_read_len], overlapped_mode)?; |
| f.seek(SeekFrom::Start(orig_seek)) |
| .map_err(Error::SeekingFile)?; |
| |
| #[cfg(feature = "composite-disk")] |
| if let Some(cdisk_magic) = magic.data.get(0..CDISK_MAGIC.len()) { |
| if cdisk_magic == CDISK_MAGIC.as_bytes() { |
| return Ok(ImageType::CompositeDisk); |
| } |
| } |
| |
| #[allow(unused_variables)] // magic4 is only used with the qcow or android-sparse features. |
| if let Some(magic4) = magic.data.get(0..4) { |
| #[cfg(feature = "qcow")] |
| if magic4 == QCOW_MAGIC.to_be_bytes() { |
| return Ok(ImageType::Qcow2); |
| } |
| #[cfg(feature = "android-sparse")] |
| if magic4 == SPARSE_HEADER_MAGIC.to_le_bytes() { |
| return Ok(ImageType::AndroidSparse); |
| } |
| } |
| |
| Ok(ImageType::Raw) |
| } |
| |
| impl DiskFile for File { |
| fn try_clone(&self) -> io::Result<Box<dyn DiskFile>> { |
| Ok(Box::new(self.try_clone()?)) |
| } |
| } |
| |
| /// Inspect the image file type and create an appropriate disk file to match it. |
| pub fn create_disk_file( |
| raw_image: File, |
| is_sparse_file: bool, |
| max_nesting_depth: u32, |
| image_path: &Path, |
| ) -> Result<Box<dyn DiskFile>> { |
| let image_type = detect_image_type(&raw_image, false)?; |
| create_disk_file_of_type( |
| raw_image, |
| is_sparse_file, |
| max_nesting_depth, |
| image_path, |
| image_type, |
| ) |
| } |
| |
| /// create an appropriate disk file to match give image type. |
| pub fn create_disk_file_of_type( |
| raw_image: File, |
| is_sparse_file: bool, |
| // max_nesting_depth is only used if the composite-disk or qcow features are enabled. |
| #[allow(unused_variables)] mut max_nesting_depth: u32, |
| // image_path is only used if the composite-disk feature is enabled. |
| #[allow(unused_variables)] image_path: &Path, |
| image_type: ImageType, |
| ) -> Result<Box<dyn DiskFile>> { |
| if max_nesting_depth == 0 { |
| return Err(Error::MaxNestingDepthExceeded); |
| } |
| #[allow(unused_assignments)] |
| { |
| max_nesting_depth -= 1; |
| } |
| |
| Ok(match image_type { |
| ImageType::Raw => { |
| sys::apply_raw_disk_file_options(&raw_image, is_sparse_file)?; |
| Box::new(raw_image) as Box<dyn DiskFile> |
| } |
| #[cfg(feature = "qcow")] |
| ImageType::Qcow2 => { |
| Box::new(QcowFile::from(raw_image, max_nesting_depth).map_err(Error::QcowError)?) |
| as Box<dyn DiskFile> |
| } |
| #[cfg(feature = "composite-disk")] |
| ImageType::CompositeDisk => { |
| // Valid composite disk header present |
| Box::new( |
| CompositeDiskFile::from_file( |
| raw_image, |
| is_sparse_file, |
| max_nesting_depth, |
| image_path, |
| ) |
| .map_err(Error::CreateCompositeDisk)?, |
| ) as Box<dyn DiskFile> |
| } |
| #[cfg(feature = "android-sparse")] |
| ImageType::AndroidSparse => { |
| Box::new(AndroidSparse::from_file(raw_image).map_err(Error::CreateAndroidSparseDisk)?) |
| as Box<dyn DiskFile> |
| } |
| #[allow(unreachable_patterns)] |
| _ => return Err(Error::UnknownType), |
| }) |
| } |
| |
| /// An asynchronously accessible disk. |
| #[async_trait(?Send)] |
| pub trait AsyncDisk: DiskGetLen + FileSetLen + FileAllocate { |
| /// Returns the inner file consuming self. |
| fn into_inner(self: Box<Self>) -> Box<dyn DiskFile>; |
| |
| /// Flush intermediary buffers and/or dirty state to file. fsync not required. |
| async fn flush(&self) -> Result<()>; |
| |
| /// Asynchronously fsyncs any completed operations to the disk. |
| async fn fsync(&self) -> Result<()>; |
| |
| /// Asynchronously fdatasyncs any completed operations to the disk. |
| /// Note that an implementation may simply call fsync for fdatasync. |
| async fn fdatasync(&self) -> Result<()>; |
| |
| /// Reads from the file at 'file_offset' into memory `mem` at `mem_offsets`. |
| /// `mem_offsets` is similar to an iovec except relative to the start of `mem`. |
| async fn read_to_mem<'a>( |
| &'a self, |
| file_offset: u64, |
| mem: Arc<dyn BackingMemory + Send + Sync>, |
| mem_offsets: cros_async::MemRegionIter<'a>, |
| ) -> Result<usize>; |
| |
| /// Writes to the file at 'file_offset' from memory `mem` at `mem_offsets`. |
| async fn write_from_mem<'a>( |
| &'a self, |
| file_offset: u64, |
| mem: Arc<dyn BackingMemory + Send + Sync>, |
| mem_offsets: cros_async::MemRegionIter<'a>, |
| ) -> Result<usize>; |
| |
| /// Replaces a range of bytes with a hole. |
| async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()>; |
| |
| /// Writes up to `length` bytes of zeroes to the stream, returning how many bytes were written. |
| async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()>; |
| |
| /// Reads from the file at 'file_offset' into `buf`. |
| /// |
| /// Less efficient than `read_to_mem` because of extra copies and allocations. |
| async fn read_double_buffered(&self, file_offset: u64, buf: &mut [u8]) -> Result<usize> { |
| let backing_mem = Arc::new(cros_async::VecIoWrapper::from(vec![0u8; buf.len()])); |
| let region = cros_async::MemRegion { |
| offset: 0, |
| len: buf.len(), |
| }; |
| let n = self |
| .read_to_mem( |
| file_offset, |
| backing_mem.clone(), |
| MemRegionIter::new(&[region]), |
| ) |
| .await?; |
| backing_mem |
| .get_volatile_slice(region) |
| .expect("BUG: the VecIoWrapper shrank?") |
| .sub_slice(0, n) |
| .expect("BUG: read_to_mem return value too large?") |
| .copy_to(buf); |
| Ok(n) |
| } |
| |
| /// Writes to the file at 'file_offset' from `buf`. |
| /// |
| /// Less efficient than `write_from_mem` because of extra copies and allocations. |
| async fn write_double_buffered(&self, file_offset: u64, buf: &[u8]) -> Result<usize> { |
| let backing_mem = Arc::new(cros_async::VecIoWrapper::from(buf.to_vec())); |
| let region = cros_async::MemRegion { |
| offset: 0, |
| len: buf.len(), |
| }; |
| self.write_from_mem( |
| file_offset, |
| backing_mem, |
| cros_async::MemRegionIter::new(&[region]), |
| ) |
| .await |
| } |
| } |
| |
| /// A disk backed by a single file that implements `AsyncDisk` for access. |
| pub struct SingleFileDisk { |
| inner: IoSource<File>, |
| // Whether the backed file is a block device since the punch-hole needs different operation. |
| #[cfg(any(target_os = "android", target_os = "linux"))] |
| is_block_device_file: bool, |
| } |
| |
| impl DiskGetLen for SingleFileDisk { |
| fn get_len(&self) -> io::Result<u64> { |
| self.inner.as_source().get_len() |
| } |
| } |
| |
| impl FileSetLen for SingleFileDisk { |
| fn set_len(&self, len: u64) -> io::Result<()> { |
| self.inner.as_source().set_len(len) |
| } |
| } |
| |
| impl FileAllocate for SingleFileDisk { |
| fn allocate(&mut self, offset: u64, len: u64) -> io::Result<()> { |
| self.inner.as_source_mut().allocate(offset, len) |
| } |
| } |
| |
| #[async_trait(?Send)] |
| impl AsyncDisk for SingleFileDisk { |
| fn into_inner(self: Box<Self>) -> Box<dyn DiskFile> { |
| Box::new(self.inner.into_source()) |
| } |
| |
| async fn flush(&self) -> Result<()> { |
| // Nothing to flush, all file mutations are immediately sent to the OS. |
| Ok(()) |
| } |
| |
| async fn fsync(&self) -> Result<()> { |
| self.inner.fsync().await.map_err(Error::Fsync) |
| } |
| |
| async fn fdatasync(&self) -> Result<()> { |
| self.inner.fdatasync().await.map_err(Error::Fdatasync) |
| } |
| |
| async fn read_to_mem<'a>( |
| &'a self, |
| file_offset: u64, |
| mem: Arc<dyn BackingMemory + Send + Sync>, |
| mem_offsets: cros_async::MemRegionIter<'a>, |
| ) -> Result<usize> { |
| self.inner |
| .read_to_mem(Some(file_offset), mem, mem_offsets) |
| .await |
| .map_err(Error::ReadToMem) |
| } |
| |
| async fn write_from_mem<'a>( |
| &'a self, |
| file_offset: u64, |
| mem: Arc<dyn BackingMemory + Send + Sync>, |
| mem_offsets: cros_async::MemRegionIter<'a>, |
| ) -> Result<usize> { |
| self.inner |
| .write_from_mem(Some(file_offset), mem, mem_offsets) |
| .await |
| .map_err(Error::WriteFromMem) |
| } |
| |
| async fn punch_hole(&self, file_offset: u64, length: u64) -> Result<()> { |
| #[cfg(any(target_os = "android", target_os = "linux"))] |
| if self.is_block_device_file { |
| return base::linux::discard_block(self.inner.as_source(), file_offset, length) |
| .map_err(Error::PunchHoleBlockDeviceFile); |
| } |
| self.inner |
| .punch_hole(file_offset, length) |
| .await |
| .map_err(Error::PunchHole) |
| } |
| |
| async fn write_zeroes_at(&self, file_offset: u64, length: u64) -> Result<()> { |
| if self |
| .inner |
| .write_zeroes_at(file_offset, length) |
| .await |
| .is_ok() |
| { |
| return Ok(()); |
| } |
| |
| // Fall back to filling zeros if more efficient write_zeroes_at doesn't work. |
| let buf_size = min(length, 0x10000); |
| let mut nwritten = 0; |
| while nwritten < length { |
| let remaining = length - nwritten; |
| let write_size = min(remaining, buf_size) as usize; |
| let buf = vec![0u8; write_size]; |
| nwritten += self |
| .inner |
| .write_from_vec(Some(file_offset + nwritten), buf) |
| .await |
| .map(|(n, _)| n as u64) |
| .map_err(Error::WriteFromVec)?; |
| } |
| Ok(()) |
| } |
| } |