blob: beb6b3094400653c2ea5cf424b25a15d8837f6b3 [file] [log] [blame]
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
mod file;
mod mount;
use anyhow::{anyhow, bail, Result};
use fuse::filesystem::{
Context, DirEntry, DirectoryIterator, Entry, FileSystem, FsOptions, GetxattrReply,
SetattrValid, ZeroCopyReader, ZeroCopyWriter,
};
use fuse::sys::OpenOptions as FuseOpenOptions;
use log::{debug, error, warn};
use std::collections::{btree_map, BTreeMap};
use std::convert::{TryFrom, TryInto};
use std::ffi::{CStr, CString, OsStr};
use std::io;
use std::mem::{zeroed, MaybeUninit};
use std::option::Option;
use std::os::unix::ffi::OsStrExt;
use std::path::{Component, Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, RwLock};
use std::time::Duration;
use crate::common::{divide_roundup, ChunkedSizeIter, CHUNK_SIZE};
use crate::file::{
validate_basename, Attr, InMemoryDir, RandomWrite, ReadByChunk, RemoteDirEditor,
RemoteFileEditor, RemoteFileReader,
};
use crate::fsstat::RemoteFsStatsReader;
use crate::fsverity::VerifiedFileEditor;
pub use self::file::LazyVerifiedReadonlyFile;
pub use self::mount::mount_and_enter_message_loop;
use self::mount::MAX_WRITE_BYTES;
pub type Inode = u64;
type Handle = u64;
/// Maximum time for a file's metadata to be cached by the kernel. Since any file and directory
/// changes (if not read-only) has to go through AuthFS to be trusted, the timeout can be maximum.
const DEFAULT_METADATA_TIMEOUT: Duration = Duration::MAX;
const ROOT_INODE: Inode = 1;
/// `AuthFsEntry` defines the filesystem entry type supported by AuthFS.
pub enum AuthFsEntry {
/// A read-only directory (writable during initialization). Root directory is an example.
ReadonlyDirectory { dir: InMemoryDir },
/// A file type that is verified against fs-verity signature (thus read-only). The file is
/// served from a remote server.
VerifiedReadonly { reader: LazyVerifiedReadonlyFile },
/// A file type that is a read-only passthrough from a file on a remote server.
UnverifiedReadonly { reader: RemoteFileReader, file_size: u64 },
/// A file type that is initially empty, and the content is stored on a remote server. File
/// integrity is guaranteed with private Merkle tree.
VerifiedNew { editor: VerifiedFileEditor<RemoteFileEditor>, attr: Attr },
/// A directory type that is initially empty. One can create new file (`VerifiedNew`) and new
/// directory (`VerifiedNewDirectory` itself) with integrity guaranteed within the VM.
VerifiedNewDirectory { dir: RemoteDirEditor, attr: Attr },
}
impl AuthFsEntry {
fn expect_empty_deletable_directory(&self) -> io::Result<()> {
match self {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
if dir.number_of_entries() == 0 {
Ok(())
} else {
Err(io::Error::from_raw_os_error(libc::ENOTEMPTY))
}
}
AuthFsEntry::ReadonlyDirectory { .. } => {
Err(io::Error::from_raw_os_error(libc::EACCES))
}
_ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
}
}
}
struct InodeState {
/// Actual inode entry.
entry: AuthFsEntry,
/// Number of `Handle`s (i.e. file descriptors) that are currently referring to the this inode.
///
/// Technically, this does not matter to readonly entries, since they live forever. The
/// reference count is only needed for manageing lifetime of writable entries like `VerifiedNew`
/// and `VerifiedNewDirectory`. That is, when an entry is deleted, the actual entry needs to
/// stay alive until the reference count reaches zero.
///
/// Note: This is not to be confused with hardlinks, which AuthFS doesn't currently implement.
handle_ref_count: AtomicU64,
/// Whether the inode is already unlinked, i.e. should be removed, once `handle_ref_count` is
/// down to zero.
unlinked: bool,
}
impl InodeState {
fn new(entry: AuthFsEntry) -> Self {
InodeState { entry, handle_ref_count: AtomicU64::new(0), unlinked: false }
}
fn new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self {
InodeState { entry, handle_ref_count: AtomicU64::new(handle_ref_count), unlinked: false }
}
}
/// Data type that a directory implementation should be able to present its entry to `AuthFs`.
#[derive(Clone)]
pub struct AuthFsDirEntry {
pub inode: Inode,
pub name: CString,
pub is_dir: bool,
}
/// A snapshot of a directory entries for supporting `readdir` operation.
///
/// The `readdir` implementation is required by FUSE to not return any entries that have been
/// returned previously (while it's fine to not return new entries). Snapshot is the easiest way to
/// be compliant. See `fuse::filesystem::readdir` for more details.
///
/// A `DirEntriesSnapshot` is created on `opendir`, and is associated with the returned
/// `Handle`/FD. The snapshot is deleted when the handle is released in `releasedir`.
type DirEntriesSnapshot = Vec<AuthFsDirEntry>;
/// An iterator for reading from `DirEntriesSnapshot`.
pub struct DirEntriesSnapshotIterator {
/// A reference to the `DirEntriesSnapshot` in `AuthFs`.
snapshot: Arc<DirEntriesSnapshot>,
/// A value determined by `Self` to identify the last entry. 0 is a reserved value by FUSE to
/// mean reading from the beginning.
prev_offset: usize,
}
impl<'a> DirectoryIterator for DirEntriesSnapshotIterator {
fn next(&mut self) -> Option<DirEntry> {
// This iterator should not be the only reference to the snapshot. The snapshot should
// still be hold in `dir_handle_table`, i.e. when the FD is not yet closed.
//
// This code is unreachable when `readdir` is called with a closed FD. Only when the FD is
// not yet closed, `DirEntriesSnapshotIterator` can be created (but still short-lived
// during `readdir`).
debug_assert!(Arc::strong_count(&self.snapshot) >= 2);
// Since 0 is reserved, let's use 1-based index for the offset. This allows us to
// resume from the previous read in the snapshot easily.
let current_offset = if self.prev_offset == 0 {
1 // first element in the vector
} else {
self.prev_offset + 1 // next element in the vector
};
if current_offset > self.snapshot.len() {
None
} else {
let AuthFsDirEntry { inode, name, is_dir } = &self.snapshot[current_offset - 1];
let entry = DirEntry {
offset: current_offset as u64,
ino: *inode,
name,
type_: if *is_dir { libc::DT_DIR.into() } else { libc::DT_REG.into() },
};
self.prev_offset = current_offset;
Some(entry)
}
}
}
type DirHandleTable = BTreeMap<Handle, Arc<DirEntriesSnapshot>>;
// AuthFS needs to be `Sync` to be used with the `fuse` crate.
pub struct AuthFs {
/// Table for `Inode` to `InodeState` lookup.
inode_table: RwLock<BTreeMap<Inode, InodeState>>,
/// The next available inode number.
next_inode: AtomicU64,
/// Table for `Handle` to `Arc<DirEntriesSnapshot>` lookup. On `opendir`, a new directory handle
/// is created and the snapshot of the current directory is created. This is not super
/// efficient, but is the simplest way to be compliant to the FUSE contract (see
/// `fuse::filesystem::readdir`).
///
/// Currently, no code locks `dir_handle_table` and `inode_table` at the same time to avoid
/// deadlock.
dir_handle_table: RwLock<DirHandleTable>,
/// The next available handle number.
next_handle: AtomicU64,
/// A reader to access the remote filesystem stats, which is supposed to be of "the" output
/// directory. We assume all output are stored in the same partition.
remote_fs_stats_reader: RemoteFsStatsReader,
}
// Implementation for preparing an `AuthFs` instance, before starting to serve.
// TODO(victorhsieh): Consider implement a builder to separate the mutable initialization from the
// immutable / interiorly mutable serving phase.
impl AuthFs {
pub fn new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs {
let mut inode_table = BTreeMap::new();
inode_table.insert(
ROOT_INODE,
InodeState::new(AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() }),
);
AuthFs {
inode_table: RwLock::new(inode_table),
next_inode: AtomicU64::new(ROOT_INODE + 1),
dir_handle_table: RwLock::new(BTreeMap::new()),
next_handle: AtomicU64::new(1),
remote_fs_stats_reader,
}
}
/// Add an `AuthFsEntry` as `basename` to the filesystem root.
pub fn add_entry_at_root_dir(
&mut self,
basename: PathBuf,
entry: AuthFsEntry,
) -> Result<Inode> {
validate_basename(&basename)?;
self.add_entry_at_ro_dir_by_path(ROOT_INODE, &basename, entry)
}
/// Add an `AuthFsEntry` by path from the `ReadonlyDirectory` represented by `dir_inode`. The
/// path must be a related path. If some ancestor directories do not exist, they will be
/// created (also as `ReadonlyDirectory`) automatically.
pub fn add_entry_at_ro_dir_by_path(
&mut self,
dir_inode: Inode,
path: &Path,
entry: AuthFsEntry,
) -> Result<Inode> {
// 1. Make sure the parent directories all exist. Derive the entry's parent inode.
let parent_path =
path.parent().ok_or_else(|| anyhow!("No parent directory: {:?}", path))?;
let parent_inode =
parent_path.components().try_fold(dir_inode, |current_dir_inode, path_component| {
match path_component {
Component::RootDir => bail!("Absolute path is not supported"),
Component::Normal(name) => {
let inode_table = self.inode_table.get_mut().unwrap();
// Locate the internal directory structure.
let current_dir_entry = &mut inode_table
.get_mut(&current_dir_inode)
.ok_or_else(|| {
anyhow!("Unknown directory inode {}", current_dir_inode)
})?
.entry;
let dir = match current_dir_entry {
AuthFsEntry::ReadonlyDirectory { dir } => dir,
_ => unreachable!("Not a ReadonlyDirectory"),
};
// Return directory inode. Create first if not exists.
if let Some(existing_inode) = dir.lookup_inode(name.as_ref()) {
Ok(existing_inode)
} else {
let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
let new_dir_entry =
AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() };
// Actually update the tables.
dir.add_dir(name.as_ref(), new_inode)?;
if inode_table
.insert(new_inode, InodeState::new(new_dir_entry))
.is_some()
{
bail!("Unexpected to find a duplicated inode");
}
Ok(new_inode)
}
}
_ => Err(anyhow!("Path is not canonical: {:?}", path)),
}
})?;
// 2. Insert the entry to the parent directory, as well as the inode table.
let inode_table = self.inode_table.get_mut().unwrap();
let inode_state = inode_table.get_mut(&parent_inode).expect("previously returned inode");
match &mut inode_state.entry {
AuthFsEntry::ReadonlyDirectory { dir } => {
let basename =
path.file_name().ok_or_else(|| anyhow!("Bad file name: {:?}", path))?;
let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
// Actually update the tables.
dir.add_file(basename.as_ref(), new_inode)?;
if inode_table.insert(new_inode, InodeState::new(entry)).is_some() {
bail!("Unexpected to find a duplicated inode");
}
Ok(new_inode)
}
_ => unreachable!("Not a ReadonlyDirectory"),
}
}
}
// Implementation for serving requests.
impl AuthFs {
/// Handles the file associated with `inode` if found. This function returns whatever
/// `handle_fn` returns.
fn handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R>
where
F: FnOnce(&AuthFsEntry) -> io::Result<R>,
{
let inode_table = self.inode_table.read().unwrap();
handle_inode_locked(&inode_table, inode, |inode_state| handle_fn(&inode_state.entry))
}
/// Adds a new entry `name` created by `create_fn` at `parent_inode`, with an initial ref count
/// of one.
///
/// The operation involves two updates: adding the name with a new allocated inode to the
/// parent directory, and insert the new inode and the actual `AuthFsEntry` to the global inode
/// table.
///
/// `create_fn` receives the parent directory, through which it can create the new entry at and
/// register the new inode to. Its returned entry is then added to the inode table.
fn create_new_entry_with_ref_count<F>(
&self,
parent_inode: Inode,
name: &CStr,
create_fn: F,
) -> io::Result<Inode>
where
F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,
{
let mut inode_table = self.inode_table.write().unwrap();
let (new_inode, new_file_entry) = handle_inode_mut_locked(
&mut inode_table,
&parent_inode,
|InodeState { entry, .. }| {
let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
let basename: &Path = cstr_to_path(name);
let new_file_entry = create_fn(entry, basename, new_inode)?;
Ok((new_inode, new_file_entry))
},
)?;
if let btree_map::Entry::Vacant(entry) = inode_table.entry(new_inode) {
entry.insert(InodeState::new_with_ref_count(new_file_entry, 1));
Ok(new_inode)
} else {
unreachable!("Unexpected duplication of inode {}", new_inode);
}
}
fn open_dir_store_snapshot(
&self,
dir_entries: Vec<AuthFsDirEntry>,
) -> io::Result<(Option<Handle>, FuseOpenOptions)> {
let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
let mut dir_handle_table = self.dir_handle_table.write().unwrap();
if let btree_map::Entry::Vacant(value) = dir_handle_table.entry(handle) {
value.insert(Arc::new(dir_entries));
Ok((Some(handle), FuseOpenOptions::empty()))
} else {
unreachable!("Unexpected to see new handle {} to existing in the table", handle);
}
}
}
fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
if (flags & libc::O_ACCMODE as u32) == mode as u32 {
Ok(())
} else {
Err(io::Error::from_raw_os_error(libc::EACCES))
}
}
cfg_if::cfg_if! {
if #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] {
fn blk_size() -> libc::c_int { CHUNK_SIZE as libc::c_int }
} else {
fn blk_size() -> libc::c_long { CHUNK_SIZE as libc::c_long }
}
}
#[allow(clippy::enum_variant_names)]
enum AccessMode {
ReadOnly,
Variable(u32),
}
fn create_stat(
ino: libc::ino_t,
file_size: u64,
access_mode: AccessMode,
) -> io::Result<libc::stat64> {
// SAFETY: stat64 is a plan C struct without pointer.
let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
st.st_ino = ino;
st.st_mode = match access_mode {
AccessMode::ReadOnly => {
// Until needed, let's just grant the owner access.
libc::S_IFREG | libc::S_IRUSR
}
AccessMode::Variable(mode) => libc::S_IFREG | mode,
};
st.st_nlink = 1;
st.st_uid = 0;
st.st_gid = 0;
st.st_size = libc::off64_t::try_from(file_size)
.map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
st.st_blksize = blk_size();
// Per man stat(2), st_blocks is "Number of 512B blocks allocated".
st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
.map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
Ok(st)
}
fn create_dir_stat(
ino: libc::ino_t,
file_number: u16,
access_mode: AccessMode,
) -> io::Result<libc::stat64> {
// SAFETY: stat64 is a plan C struct without pointer.
let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
st.st_ino = ino;
st.st_mode = match access_mode {
AccessMode::ReadOnly => {
// Until needed, let's just grant the owner access and search to group and others.
libc::S_IFDIR | libc::S_IXUSR | libc::S_IRUSR | libc::S_IXGRP | libc::S_IXOTH
}
AccessMode::Variable(mode) => libc::S_IFDIR | mode,
};
// 2 extra for . and ..
st.st_nlink = file_number
.checked_add(2)
.ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?
.into();
st.st_uid = 0;
st.st_gid = 0;
Ok(st)
}
fn offset_to_chunk_index(offset: u64) -> u64 {
offset / CHUNK_SIZE
}
fn read_chunks<W: io::Write, T: ReadByChunk>(
mut w: W,
file: &T,
file_size: u64,
offset: u64,
size: u32,
) -> io::Result<usize> {
let remaining = file_size.saturating_sub(offset);
let size_to_read = std::cmp::min(size as usize, remaining as usize);
let total = ChunkedSizeIter::new(size_to_read, offset, CHUNK_SIZE as usize).try_fold(
0,
|total, (current_offset, planned_data_size)| {
// TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
// instead of accepting a buffer, the writer could expose the final destination buffer
// for the reader to write to. It might not be generally applicable though, e.g. with
// virtio transport, the buffer may not be continuous.
let mut buf = [0u8; CHUNK_SIZE as usize];
let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
if read_size < planned_data_size {
return Err(io::Error::from_raw_os_error(libc::ENODATA));
}
let begin = (current_offset % CHUNK_SIZE) as usize;
let end = begin + planned_data_size;
let s = w.write(&buf[begin..end])?;
if s != planned_data_size {
return Err(io::Error::from_raw_os_error(libc::EIO));
}
Ok(total + s)
},
)?;
Ok(total)
}
impl FileSystem for AuthFs {
type Inode = Inode;
type Handle = Handle;
type DirIter = DirEntriesSnapshotIterator;
fn max_buffer_size(&self) -> u32 {
MAX_WRITE_BYTES
}
fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
// Enable writeback cache for better performance especially since our bandwidth to the
// backend service is limited.
Ok(FsOptions::WRITEBACK_CACHE)
}
fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
let inode_table = self.inode_table.read().unwrap();
// Look up the entry's inode number in parent directory.
let inode =
handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
AuthFsEntry::ReadonlyDirectory { dir } => {
let path = cstr_to_path(name);
dir.lookup_inode(path).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
}
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
let path = cstr_to_path(name);
dir.find_inode(path)
}
_ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
})?;
// Create the entry's stat if found.
let st = handle_inode_locked(
&inode_table,
&inode,
|InodeState { entry, handle_ref_count, .. }| {
let st = match entry {
AuthFsEntry::ReadonlyDirectory { dir } => {
create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
}
AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
create_stat(inode, *file_size, AccessMode::ReadOnly)
}
AuthFsEntry::VerifiedReadonly { reader } => {
create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
}
AuthFsEntry::VerifiedNew { editor, attr, .. } => {
create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
}
AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
inode,
dir.number_of_entries(),
AccessMode::Variable(attr.mode()),
),
}?;
if handle_ref_count.fetch_add(1, Ordering::Relaxed) == u64::MAX {
panic!("Handle reference count overflow");
}
Ok(st)
},
)?;
Ok(Entry {
inode,
generation: 0,
attr: st,
entry_timeout: DEFAULT_METADATA_TIMEOUT,
attr_timeout: DEFAULT_METADATA_TIMEOUT,
})
}
fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) {
let mut inode_table = self.inode_table.write().unwrap();
let delete_now = handle_inode_mut_locked(
&mut inode_table,
&inode,
|InodeState { handle_ref_count, unlinked, .. }| {
let current = handle_ref_count.get_mut();
if count > *current {
error!(
"Trying to decrease refcount of inode {} by {} (> current {})",
inode, count, *current
);
panic!(); // log to logcat with error!
}
*current -= count;
Ok(*unlinked && *current == 0)
},
);
match delete_now {
Ok(true) => {
let _ = inode_table.remove(&inode).expect("Removed an existing entry");
}
Ok(false) => { /* Let the inode stay */ }
Err(e) => {
warn!(
"Unexpected failure when tries to forget an inode {} by refcount {}: {:?}",
inode, count, e
);
}
}
}
fn getattr(
&self,
_ctx: Context,
inode: Inode,
_handle: Option<Handle>,
) -> io::Result<(libc::stat64, Duration)> {
self.handle_inode(&inode, |config| {
Ok((
match config {
AuthFsEntry::ReadonlyDirectory { dir } => {
create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
}
AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
create_stat(inode, *file_size, AccessMode::ReadOnly)
}
AuthFsEntry::VerifiedReadonly { reader } => {
create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
}
AuthFsEntry::VerifiedNew { editor, attr, .. } => {
create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
}
AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
inode,
dir.number_of_entries(),
AccessMode::Variable(attr.mode()),
),
}?,
DEFAULT_METADATA_TIMEOUT,
))
})
}
fn open(
&self,
_ctx: Context,
inode: Self::Inode,
flags: u32,
) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
// Since file handle is not really used in later operations (which use Inode directly),
// return None as the handle.
self.handle_inode(&inode, |config| {
match config {
AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
check_access_mode(flags, libc::O_RDONLY)?;
}
AuthFsEntry::VerifiedNew { .. } => {
// TODO(victorhsieh): Imeplement ACL check using the attr and ctx. Always allow
// for now.
}
AuthFsEntry::ReadonlyDirectory { .. }
| AuthFsEntry::VerifiedNewDirectory { .. } => {
// TODO(victorhsieh): implement when needed.
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
}
// Always cache the file content. There is currently no need to support direct I/O or
// avoid the cache buffer. Memory mapping is only possible with cache enabled.
Ok((None, FuseOpenOptions::KEEP_CACHE))
})
}
fn create(
&self,
_ctx: Context,
parent: Self::Inode,
name: &CStr,
mode: u32,
_flags: u32,
umask: u32,
) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)> {
let new_inode = self.create_new_entry_with_ref_count(
parent,
name,
|parent_entry, basename, new_inode| match parent_entry {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
if dir.has_entry(basename) {
return Err(io::Error::from_raw_os_error(libc::EEXIST));
}
let mode = mode & !umask;
let (new_file, new_attr) = dir.create_file(basename, new_inode, mode)?;
Ok(AuthFsEntry::VerifiedNew { editor: new_file, attr: new_attr })
}
_ => Err(io::Error::from_raw_os_error(libc::EBADF)),
},
)?;
Ok((
Entry {
inode: new_inode,
generation: 0,
attr: create_stat(new_inode, /* file_size */ 0, AccessMode::Variable(mode))?,
entry_timeout: DEFAULT_METADATA_TIMEOUT,
attr_timeout: DEFAULT_METADATA_TIMEOUT,
},
// See also `open`.
/* handle */ None,
FuseOpenOptions::KEEP_CACHE,
))
}
fn read<W: io::Write + ZeroCopyWriter>(
&self,
_ctx: Context,
inode: Inode,
_handle: Handle,
w: W,
size: u32,
offset: u64,
_lock_owner: Option<u64>,
_flags: u32,
) -> io::Result<usize> {
self.handle_inode(&inode, |config| {
match config {
AuthFsEntry::VerifiedReadonly { reader } => {
read_chunks(w, reader, reader.file_size()?, offset, size)
}
AuthFsEntry::UnverifiedReadonly { reader, file_size } => {
read_chunks(w, reader, *file_size, offset, size)
}
AuthFsEntry::VerifiedNew { editor, .. } => {
// Note that with FsOptions::WRITEBACK_CACHE, it's possible for the kernel to
// request a read even if the file is open with O_WRONLY.
read_chunks(w, editor, editor.size(), offset, size)
}
AuthFsEntry::ReadonlyDirectory { .. }
| AuthFsEntry::VerifiedNewDirectory { .. } => {
Err(io::Error::from_raw_os_error(libc::EISDIR))
}
}
})
}
fn write<R: io::Read + ZeroCopyReader>(
&self,
_ctx: Context,
inode: Self::Inode,
_handle: Self::Handle,
mut r: R,
size: u32,
offset: u64,
_lock_owner: Option<u64>,
_delayed_write: bool,
_flags: u32,
) -> io::Result<usize> {
self.handle_inode(&inode, |config| match config {
AuthFsEntry::VerifiedNew { editor, .. } => {
let mut buf = vec![0; size as usize];
r.read_exact(&mut buf)?;
editor.write_at(&buf, offset)
}
AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
Err(io::Error::from_raw_os_error(libc::EPERM))
}
AuthFsEntry::ReadonlyDirectory { .. } | AuthFsEntry::VerifiedNewDirectory { .. } => {
Err(io::Error::from_raw_os_error(libc::EISDIR))
}
})
}
fn setattr(
&self,
_ctx: Context,
inode: Inode,
in_attr: libc::stat64,
_handle: Option<Handle>,
valid: SetattrValid,
) -> io::Result<(libc::stat64, Duration)> {
let mut inode_table = self.inode_table.write().unwrap();
handle_inode_mut_locked(&mut inode_table, &inode, |InodeState { entry, .. }| match entry {
AuthFsEntry::VerifiedNew { editor, attr } => {
check_unsupported_setattr_request(valid)?;
// Initialize the default stat.
let mut new_attr =
create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))?;
// `valid` indicates what fields in `attr` are valid. Update to return correctly.
if valid.contains(SetattrValid::SIZE) {
// st_size is i64, but the cast should be safe since kernel should not give a
// negative size.
debug_assert!(in_attr.st_size >= 0);
new_attr.st_size = in_attr.st_size;
editor.resize(in_attr.st_size as u64)?;
}
if valid.contains(SetattrValid::MODE) {
attr.set_mode(in_attr.st_mode)?;
new_attr.st_mode = in_attr.st_mode;
}
Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
}
AuthFsEntry::VerifiedNewDirectory { dir, attr } => {
check_unsupported_setattr_request(valid)?;
if valid.contains(SetattrValid::SIZE) {
return Err(io::Error::from_raw_os_error(libc::EISDIR));
}
// Initialize the default stat.
let mut new_attr = create_dir_stat(
inode,
dir.number_of_entries(),
AccessMode::Variable(attr.mode()),
)?;
if valid.contains(SetattrValid::MODE) {
attr.set_mode(in_attr.st_mode)?;
new_attr.st_mode = in_attr.st_mode;
}
Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
}
_ => Err(io::Error::from_raw_os_error(libc::EPERM)),
})
}
fn getxattr(
&self,
_ctx: Context,
inode: Self::Inode,
name: &CStr,
size: u32,
) -> io::Result<GetxattrReply> {
self.handle_inode(&inode, |config| {
match config {
AuthFsEntry::VerifiedNew { editor, .. } => {
// FUSE ioctl is limited, thus we can't implement fs-verity ioctls without a kernel
// change (see b/196635431). Until it's possible, use xattr to expose what we need
// as an authfs specific API.
if name != CStr::from_bytes_with_nul(b"authfs.fsverity.digest\0").unwrap() {
return Err(io::Error::from_raw_os_error(libc::ENODATA));
}
if size == 0 {
// Per protocol, when size is 0, return the value size.
Ok(GetxattrReply::Count(editor.get_fsverity_digest_size() as u32))
} else {
let digest = editor.calculate_fsverity_digest()?;
if digest.len() > size as usize {
Err(io::Error::from_raw_os_error(libc::ERANGE))
} else {
Ok(GetxattrReply::Value(digest.to_vec()))
}
}
}
_ => Err(io::Error::from_raw_os_error(libc::ENODATA)),
}
})
}
fn mkdir(
&self,
_ctx: Context,
parent: Self::Inode,
name: &CStr,
mode: u32,
umask: u32,
) -> io::Result<Entry> {
let new_inode = self.create_new_entry_with_ref_count(
parent,
name,
|parent_entry, basename, new_inode| match parent_entry {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
if dir.has_entry(basename) {
return Err(io::Error::from_raw_os_error(libc::EEXIST));
}
let mode = mode & !umask;
let (new_dir, new_attr) = dir.mkdir(basename, new_inode, mode)?;
Ok(AuthFsEntry::VerifiedNewDirectory { dir: new_dir, attr: new_attr })
}
AuthFsEntry::ReadonlyDirectory { .. } => {
Err(io::Error::from_raw_os_error(libc::EACCES))
}
_ => Err(io::Error::from_raw_os_error(libc::EBADF)),
},
)?;
Ok(Entry {
inode: new_inode,
generation: 0,
attr: create_dir_stat(new_inode, /* file_number */ 0, AccessMode::Variable(mode))?,
entry_timeout: DEFAULT_METADATA_TIMEOUT,
attr_timeout: DEFAULT_METADATA_TIMEOUT,
})
}
fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
let mut inode_table = self.inode_table.write().unwrap();
handle_inode_mut_locked(
&mut inode_table,
&parent,
|InodeState { entry, unlinked, .. }| match entry {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
let basename: &Path = cstr_to_path(name);
// Delete the file from in both the local and remote directories.
let _inode = dir.delete_file(basename)?;
*unlinked = true;
Ok(())
}
AuthFsEntry::ReadonlyDirectory { .. } => {
Err(io::Error::from_raw_os_error(libc::EACCES))
}
AuthFsEntry::VerifiedNew { .. } => {
// Deleting a entry in filesystem root is not currently supported.
Err(io::Error::from_raw_os_error(libc::ENOSYS))
}
AuthFsEntry::UnverifiedReadonly { .. } | AuthFsEntry::VerifiedReadonly { .. } => {
Err(io::Error::from_raw_os_error(libc::ENOTDIR))
}
},
)
}
fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
let mut inode_table = self.inode_table.write().unwrap();
// Check before actual removal, with readonly borrow.
handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
let basename: &Path = cstr_to_path(name);
let existing_inode = dir.find_inode(basename)?;
handle_inode_locked(&inode_table, &existing_inode, |inode_state| {
inode_state.entry.expect_empty_deletable_directory()
})
}
AuthFsEntry::ReadonlyDirectory { .. } => {
Err(io::Error::from_raw_os_error(libc::EACCES))
}
_ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
})?;
// Look up again, this time with mutable borrow. This needs to be done separately because
// the previous lookup needs to borrow multiple entry references in the table.
handle_inode_mut_locked(
&mut inode_table,
&parent,
|InodeState { entry, unlinked, .. }| match entry {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
let basename: &Path = cstr_to_path(name);
let _inode = dir.force_delete_directory(basename)?;
*unlinked = true;
Ok(())
}
_ => unreachable!("Mismatched entry type that is just checked"),
},
)
}
fn opendir(
&self,
_ctx: Context,
inode: Self::Inode,
_flags: u32,
) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
let entries = self.handle_inode(&inode, |config| match config {
AuthFsEntry::VerifiedNewDirectory { dir, .. } => dir.retrieve_entries(),
AuthFsEntry::ReadonlyDirectory { dir } => dir.retrieve_entries(),
_ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
})?;
self.open_dir_store_snapshot(entries)
}
fn readdir(
&self,
_ctx: Context,
_inode: Self::Inode,
handle: Self::Handle,
_size: u32,
offset: u64,
) -> io::Result<Self::DirIter> {
let dir_handle_table = self.dir_handle_table.read().unwrap();
if let Some(entry) = dir_handle_table.get(&handle) {
Ok(DirEntriesSnapshotIterator {
snapshot: entry.clone(),
prev_offset: offset.try_into().unwrap(),
})
} else {
Err(io::Error::from_raw_os_error(libc::EBADF))
}
}
fn releasedir(
&self,
_ctx: Context,
inode: Self::Inode,
_flags: u32,
handle: Self::Handle,
) -> io::Result<()> {
let mut dir_handle_table = self.dir_handle_table.write().unwrap();
if dir_handle_table.remove(&handle).is_none() {
unreachable!("Unknown directory handle {}, inode {}", handle, inode);
}
Ok(())
}
fn statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64> {
let remote_stat = self.remote_fs_stats_reader.statfs()?;
// Safe because we are zero-initializing a struct with only POD fields. Not all fields
// matter to FUSE. See also:
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/fuse/inode.c?h=v5.15#n460
let mut st: libc::statvfs64 = unsafe { zeroed() };
// Use the remote stat as a template, since it'd matter the most to consider the writable
// files/directories that are written to the remote.
st.f_bsize = remote_stat.block_size;
st.f_frsize = remote_stat.fragment_size;
st.f_blocks = remote_stat.block_numbers;
st.f_bavail = remote_stat.block_available;
st.f_favail = remote_stat.inodes_available;
st.f_namemax = remote_stat.max_filename;
// Assuming we are not privileged to use all free spaces on the remote server, set the free
// blocks/fragment to the same available amount.
st.f_bfree = st.f_bavail;
st.f_ffree = st.f_favail;
// Number of inodes on the filesystem
st.f_files = self.inode_table.read().unwrap().len() as u64;
Ok(st)
}
}
fn handle_inode_locked<F, R>(
inode_table: &BTreeMap<Inode, InodeState>,
inode: &Inode,
handle_fn: F,
) -> io::Result<R>
where
F: FnOnce(&InodeState) -> io::Result<R>,
{
if let Some(inode_state) = inode_table.get(inode) {
handle_fn(inode_state)
} else {
Err(io::Error::from_raw_os_error(libc::ENOENT))
}
}
fn handle_inode_mut_locked<F, R>(
inode_table: &mut BTreeMap<Inode, InodeState>,
inode: &Inode,
handle_fn: F,
) -> io::Result<R>
where
F: FnOnce(&mut InodeState) -> io::Result<R>,
{
if let Some(inode_state) = inode_table.get_mut(inode) {
handle_fn(inode_state)
} else {
Err(io::Error::from_raw_os_error(libc::ENOENT))
}
}
fn check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()> {
if valid.contains(SetattrValid::UID) {
warn!("Changing st_uid is not currently supported");
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
if valid.contains(SetattrValid::GID) {
warn!("Changing st_gid is not currently supported");
return Err(io::Error::from_raw_os_error(libc::ENOSYS));
}
if valid.intersects(
SetattrValid::CTIME
| SetattrValid::ATIME
| SetattrValid::ATIME_NOW
| SetattrValid::MTIME
| SetattrValid::MTIME_NOW,
) {
debug!("Ignoring ctime/atime/mtime change as authfs does not maintain timestamp currently");
}
Ok(())
}
fn cstr_to_path(cstr: &CStr) -> &Path {
OsStr::from_bytes(cstr.to_bytes()).as_ref()
}