| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| From: Eugene Zemtsov <ezemtsov@google.com> |
| Date: Mon, 18 Nov 2019 20:21:06 -0800 |
| Subject: ANDROID: Initial commit of Incremental FS |
| |
| Fully working incremental fs filesystem |
| |
| [CPNOTE: 20/07/21] Lee: Asked Paul to open an OoT bug to follow progress |
| |
| Bug: 133435829 |
| Signed-off-by: Eugene Zemtsov <ezemtsov@google.com> |
| Signed-off-by: Paul Lawrence <paullawrence@google.com> |
| [Lee: Squashed all subsequent changes into this initial patch] |
| Signed-off-by: Lee Jones <lee.jones@linaro.org> |
| Change-Id: I02cce0b654d0ef74de0a190d30907410b23ab160 |
| Signed-off-by: Lee Jones <joneslee@google.com> |
| --- |
| Documentation/ABI/testing/sysfs-fs-incfs | 70 + |
| Documentation/filesystems/incfs.rst | 85 + |
| MAINTAINERS | 7 + |
| fs/Kconfig | 1 + |
| fs/Makefile | 1 + |
| fs/incfs/Kconfig | 15 + |
| fs/incfs/Makefile | 13 + |
| fs/incfs/data_mgmt.c | 1889 ++++++++++++++++++++ |
| fs/incfs/data_mgmt.h | 551 ++++++ |
| fs/incfs/format.c | 752 ++++++++ |
| fs/incfs/format.h | 408 +++++ |
| fs/incfs/integrity.c | 235 +++ |
| fs/incfs/integrity.h | 56 + |
| fs/incfs/internal.h | 23 + |
| fs/incfs/main.c | 48 + |
| fs/incfs/pseudo_files.c | 1394 +++++++++++++++ |
| fs/incfs/pseudo_files.h | 20 + |
| fs/incfs/sysfs.c | 205 +++ |
| fs/incfs/sysfs.h | 22 + |
| fs/incfs/verity.c | 821 +++++++++ |
| fs/incfs/verity.h | 49 + |
| fs/incfs/vfs.c | 1994 ++++++++++++++++++++++ |
| fs/incfs/vfs.h | 33 + |
| include/uapi/linux/incrementalfs.h | 590 +++++++ |
| 24 files changed, 9282 insertions(+) |
| create mode 100644 Documentation/ABI/testing/sysfs-fs-incfs |
| create mode 100644 Documentation/filesystems/incfs.rst |
| create mode 100644 fs/incfs/Kconfig |
| create mode 100644 fs/incfs/Makefile |
| create mode 100644 fs/incfs/data_mgmt.c |
| create mode 100644 fs/incfs/data_mgmt.h |
| create mode 100644 fs/incfs/format.c |
| create mode 100644 fs/incfs/format.h |
| create mode 100644 fs/incfs/integrity.c |
| create mode 100644 fs/incfs/integrity.h |
| create mode 100644 fs/incfs/internal.h |
| create mode 100644 fs/incfs/main.c |
| create mode 100644 fs/incfs/pseudo_files.c |
| create mode 100644 fs/incfs/pseudo_files.h |
| create mode 100644 fs/incfs/sysfs.c |
| create mode 100644 fs/incfs/sysfs.h |
| create mode 100644 fs/incfs/verity.c |
| create mode 100644 fs/incfs/verity.h |
| create mode 100644 fs/incfs/vfs.c |
| create mode 100644 fs/incfs/vfs.h |
| create mode 100644 include/uapi/linux/incrementalfs.h |
| |
| diff --git a/Documentation/ABI/testing/sysfs-fs-incfs b/Documentation/ABI/testing/sysfs-fs-incfs |
| new file mode 100644 |
| --- /dev/null |
| +++ b/Documentation/ABI/testing/sysfs-fs-incfs |
| @@ -0,0 +1,70 @@ |
| +What: /sys/fs/incremental-fs/features/corefs |
| +Date: 2019 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Reads 'supported'. Always present. |
| + |
| +What: /sys/fs/incremental-fs/features/v2 |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Reads 'supported'. Present if all v2 features of incfs are |
| + supported. |
| + |
| +What: /sys/fs/incremental-fs/features/zstd |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Reads 'supported'. Present if zstd compression is supported |
| + for data blocks. |
| + |
| +What: /sys/fs/incremental-fs/features/bugfix_throttling |
| +Date: January 2023 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Reads 'supported'. Present if the throttling lock bug is fixed |
| + https://android-review.git.corp.google.com/c/kernel/common/+/2381827 |
| + |
| +What: /sys/fs/incremental-fs/instances/[name] |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Folder created when incfs is mounted with the sysfs_name=[name] |
| + option. If this option is used, the following values are created |
| + in this folder. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_min |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns a count of the number of reads that were delayed as a |
| + result of the per UID read timeouts min time setting. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_min_us |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns total delay time for all files since first mount as a |
| + result of the per UID read timeouts min time setting. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_pending |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns a count of the number of reads that were delayed as a |
| + result of waiting for a pending read. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_delayed_pending_us |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns total delay time for all files since first mount as a |
| + result of waiting for a pending read. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_failed_hash_verification |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns number of reads that failed because of hash verification |
| + failures. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_failed_other |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns number of reads that failed for reasons other than |
| + timing out or hash failures. |
| + |
| +What: /sys/fs/incremental-fs/instances/[name]/reads_failed_timed_out |
| +Date: April 2021 |
| +Contact: Paul Lawrence <paullawrence@google.com> |
| +Description: Returns number of reads that timed out. |
| diff --git a/Documentation/filesystems/incfs.rst b/Documentation/filesystems/incfs.rst |
| new file mode 100644 |
| --- /dev/null |
| +++ b/Documentation/filesystems/incfs.rst |
| @@ -0,0 +1,85 @@ |
| +.. SPDX-License-Identifier: GPL-2.0 |
| + |
| +================================================= |
| +incfs: A stacked incremental filesystem for Linux |
| +================================================= |
| + |
| +/sys/fs interface |
| +================= |
| + |
| +Please update Documentation/ABI/testing/sysfs-fs-incfs if you update this |
| +section. |
| + |
| +incfs creates the following files in /sys/fs. |
| + |
| +Features |
| +-------- |
| + |
| +/sys/fs/incremental-fs/features/corefs |
| + Reads 'supported'. Always present. |
| + |
| +/sys/fs/incremental-fs/features/v2 |
| + Reads 'supported'. Present if all v2 features of incfs are supported. These |
| + are: |
| + fs-verity support |
| + inotify support |
| + ioclts: |
| + INCFS_IOC_SET_READ_TIMEOUTS |
| + INCFS_IOC_GET_READ_TIMEOUTS |
| + INCFS_IOC_GET_BLOCK_COUNT |
| + INCFS_IOC_CREATE_MAPPED_FILE |
| + .incomplete folder |
| + .blocks_written pseudo file |
| + report_uid mount option |
| + |
| +/sys/fs/incremental-fs/features/zstd |
| + Reads 'supported'. Present if zstd compression is supported for data blocks. |
| + |
| +/sys/fs/incremental-fs/features/bugfix_throttling |
| + Reads 'supported'. Present if the throttling lock bug is fixed |
| + |
| +Optional per mount |
| +------------------ |
| + |
| +For each incfs mount, the mount option sysfs_name=[name] creates a /sys/fs |
| +node called: |
| + |
| +/sys/fs/incremental-fs/instances/[name] |
| + |
| +This will contain the following files: |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_delayed_min |
| + Returns a count of the number of reads that were delayed as a result of the |
| + per UID read timeouts min time setting. |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_delayed_min_us |
| + Returns total delay time for all files since first mount as a result of the |
| + per UID read timeouts min time setting. |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending |
| + Returns a count of the number of reads that were delayed as a result of |
| + waiting for a pending read. |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_delayed_pending_us |
| + Returns total delay time for all files since first mount as a result of |
| + waiting for a pending read. |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_failed_hash_verification |
| + Returns number of reads that failed because of hash verification failures. |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_failed_other |
| + Returns number of reads that failed for reasons other than timing out or |
| + hash failures. |
| + |
| +/sys/fs/incremental-fs/instances/[name]/reads_failed_timed_out |
| + Returns number of reads that timed out. |
| + |
| +For reads_delayed_*** settings, note that a file can count for both |
| +reads_delayed_min and reads_delayed_pending if incfs first waits for a pending |
| +read then has to wait further for the min time. In that case, the time spent |
| +waiting is split between reads_delayed_pending_us, which is increased by the |
| +time spent waiting for the pending read, and reads_delayed_min_us, which is |
| +increased by the remainder of the time spent waiting. |
| + |
| +Reads that timed out are not added to the reads_delayed_pending or the |
| +reads_delayed_pending_us counters. |
| diff --git a/MAINTAINERS b/MAINTAINERS |
| --- a/MAINTAINERS |
| +++ b/MAINTAINERS |
| @@ -10373,6 +10373,13 @@ F: Documentation/hwmon/ina2xx.rst |
| F: drivers/hwmon/ina2xx.c |
| F: include/linux/platform_data/ina2xx.h |
| |
| +INCREMENTAL FILE SYSTEM |
| +M: Paul Lawrence <paullawrence@google.com> |
| +L: linux-unionfs@vger.kernel.org |
| +S: Supported |
| +F: fs/incfs/ |
| +F: tools/testing/selftests/filesystems/incfs/ |
| + |
| INDEX OF FURTHER KERNEL DOCUMENTATION |
| M: Carlos Bilbao <carlos.bilbao@amd.com> |
| S: Maintained |
| diff --git a/fs/Kconfig b/fs/Kconfig |
| --- a/fs/Kconfig |
| +++ b/fs/Kconfig |
| @@ -136,6 +136,7 @@ source "fs/quota/Kconfig" |
| source "fs/autofs/Kconfig" |
| source "fs/fuse/Kconfig" |
| source "fs/overlayfs/Kconfig" |
| +source "fs/incfs/Kconfig" |
| |
| menu "Caches" |
| |
| diff --git a/fs/Makefile b/fs/Makefile |
| --- a/fs/Makefile |
| +++ b/fs/Makefile |
| @@ -106,6 +106,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/ |
| obj-$(CONFIG_FUSE_FS) += fuse/ |
| obj-$(CONFIG_OVERLAY_FS) += overlayfs/ |
| obj-$(CONFIG_ORANGEFS_FS) += orangefs/ |
| +obj-$(CONFIG_INCREMENTAL_FS) += incfs/ |
| obj-$(CONFIG_UDF_FS) += udf/ |
| obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ |
| obj-$(CONFIG_OMFS_FS) += omfs/ |
| diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/Kconfig |
| @@ -0,0 +1,15 @@ |
| +config INCREMENTAL_FS |
| + tristate "Incremental file system support" |
| + depends on BLOCK |
| + # incfs does not verify fsverity builtin signatures. |
| + depends on !CONFIG_FS_VERITY_BUILTIN_SIGNATURES |
| + select DECOMPRESS_LZ4 |
| + select DECOMPRESS_ZSTD |
| + select CRYPTO_SHA256 |
| + help |
| + Incremental FS is a read-only virtual file system that facilitates execution |
| + of programs while their binaries are still being lazily downloaded over the |
| + network, USB or pigeon post. |
| + |
| + To compile this file system support as a module, choose M here: the |
| + module will be called incrementalfs. |
| diff --git a/fs/incfs/Makefile b/fs/incfs/Makefile |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/Makefile |
| @@ -0,0 +1,13 @@ |
| +# SPDX-License-Identifier: GPL-2.0 |
| +obj-$(CONFIG_INCREMENTAL_FS) += incrementalfs.o |
| + |
| +incrementalfs-y := \ |
| + data_mgmt.o \ |
| + format.o \ |
| + integrity.o \ |
| + main.o \ |
| + pseudo_files.o \ |
| + sysfs.o \ |
| + vfs.o |
| + |
| +incrementalfs-$(CONFIG_FS_VERITY) += verity.o |
| diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/data_mgmt.c |
| @@ -0,0 +1,1889 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2019 Google LLC |
| + */ |
| +#include <linux/crc32.h> |
| +#include <linux/file.h> |
| +#include <linux/fsverity.h> |
| +#include <linux/gfp.h> |
| +#include <linux/kobject.h> |
| +#include <linux/ktime.h> |
| +#include <linux/lz4.h> |
| +#include <linux/mm.h> |
| +#include <linux/namei.h> |
| +#include <linux/pagemap.h> |
| +#include <linux/slab.h> |
| +#include <linux/types.h> |
| +#include <linux/workqueue.h> |
| + |
| +#include "data_mgmt.h" |
| +#include "format.h" |
| +#include "integrity.h" |
| +#include "sysfs.h" |
| +#include "verity.h" |
| + |
| +static int incfs_scan_metadata_chain(struct data_file *df); |
| + |
| +static void log_wake_up_all(struct work_struct *work) |
| +{ |
| + struct delayed_work *dw = container_of(work, struct delayed_work, work); |
| + struct read_log *rl = container_of(dw, struct read_log, ml_wakeup_work); |
| + wake_up_all(&rl->ml_notif_wq); |
| +} |
| + |
| +static void zstd_free_workspace(struct work_struct *work) |
| +{ |
| + struct delayed_work *dw = container_of(work, struct delayed_work, work); |
| + struct mount_info *mi = |
| + container_of(dw, struct mount_info, mi_zstd_cleanup_work); |
| + |
| + mutex_lock(&mi->mi_zstd_workspace_mutex); |
| + kvfree(mi->mi_zstd_workspace); |
| + mi->mi_zstd_workspace = NULL; |
| + mi->mi_zstd_stream = NULL; |
| + mutex_unlock(&mi->mi_zstd_workspace_mutex); |
| +} |
| + |
| +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, |
| + struct mount_options *options, |
| + struct path *backing_dir_path) |
| +{ |
| + struct mount_info *mi = NULL; |
| + int error = 0; |
| + struct incfs_sysfs_node *node; |
| + |
| + mi = kzalloc(sizeof(*mi), GFP_NOFS); |
| + if (!mi) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + mi->mi_sb = sb; |
| + mi->mi_backing_dir_path = *backing_dir_path; |
| + mi->mi_owner = get_current_cred(); |
| + path_get(&mi->mi_backing_dir_path); |
| + mutex_init(&mi->mi_dir_struct_mutex); |
| + init_waitqueue_head(&mi->mi_pending_reads_notif_wq); |
| + init_waitqueue_head(&mi->mi_log.ml_notif_wq); |
| + init_waitqueue_head(&mi->mi_blocks_written_notif_wq); |
| + atomic_set(&mi->mi_blocks_written, 0); |
| + INIT_DELAYED_WORK(&mi->mi_log.ml_wakeup_work, log_wake_up_all); |
| + spin_lock_init(&mi->mi_log.rl_lock); |
| + spin_lock_init(&mi->pending_read_lock); |
| + INIT_LIST_HEAD(&mi->mi_reads_list_head); |
| + spin_lock_init(&mi->mi_per_uid_read_timeouts_lock); |
| + mutex_init(&mi->mi_zstd_workspace_mutex); |
| + INIT_DELAYED_WORK(&mi->mi_zstd_cleanup_work, zstd_free_workspace); |
| + mutex_init(&mi->mi_le_mutex); |
| + |
| + node = incfs_add_sysfs_node(options->sysfs_name, mi); |
| + if (IS_ERR(node)) { |
| + error = PTR_ERR(node); |
| + goto err; |
| + } |
| + mi->mi_sysfs_node = node; |
| + |
| + error = incfs_realloc_mount_info(mi, options); |
| + if (error) |
| + goto err; |
| + |
| + return mi; |
| + |
| +err: |
| + incfs_free_mount_info(mi); |
| + return ERR_PTR(error); |
| +} |
| + |
| +int incfs_realloc_mount_info(struct mount_info *mi, |
| + struct mount_options *options) |
| +{ |
| + void *new_buffer = NULL; |
| + void *old_buffer; |
| + size_t new_buffer_size = 0; |
| + |
| + if (options->read_log_pages != mi->mi_options.read_log_pages) { |
| + struct read_log_state log_state; |
| + /* |
| + * Even though having two buffers allocated at once isn't |
| + * usually good, allocating a multipage buffer under a spinlock |
| + * is even worse, so let's optimize for the shorter lock |
| + * duration. It's not end of the world if we fail to increase |
| + * the buffer size anyway. |
| + */ |
| + if (options->read_log_pages > 0) { |
| + new_buffer_size = PAGE_SIZE * options->read_log_pages; |
| + new_buffer = kzalloc(new_buffer_size, GFP_NOFS); |
| + if (!new_buffer) |
| + return -ENOMEM; |
| + } |
| + |
| + spin_lock(&mi->mi_log.rl_lock); |
| + old_buffer = mi->mi_log.rl_ring_buf; |
| + mi->mi_log.rl_ring_buf = new_buffer; |
| + mi->mi_log.rl_size = new_buffer_size; |
| + log_state = (struct read_log_state){ |
| + .generation_id = mi->mi_log.rl_head.generation_id + 1, |
| + }; |
| + mi->mi_log.rl_head = log_state; |
| + mi->mi_log.rl_tail = log_state; |
| + spin_unlock(&mi->mi_log.rl_lock); |
| + |
| + kfree(old_buffer); |
| + } |
| + |
| + if (options->sysfs_name && !mi->mi_sysfs_node) |
| + mi->mi_sysfs_node = incfs_add_sysfs_node(options->sysfs_name, |
| + mi); |
| + else if (!options->sysfs_name && mi->mi_sysfs_node) { |
| + incfs_free_sysfs_node(mi->mi_sysfs_node); |
| + mi->mi_sysfs_node = NULL; |
| + } else if (options->sysfs_name && |
| + strcmp(options->sysfs_name, |
| + kobject_name(&mi->mi_sysfs_node->isn_sysfs_node))) { |
| + incfs_free_sysfs_node(mi->mi_sysfs_node); |
| + mi->mi_sysfs_node = incfs_add_sysfs_node(options->sysfs_name, |
| + mi); |
| + } |
| + |
| + if (IS_ERR(mi->mi_sysfs_node)) { |
| + int err = PTR_ERR(mi->mi_sysfs_node); |
| + |
| + mi->mi_sysfs_node = NULL; |
| + return err; |
| + } |
| + |
| + mi->mi_options = *options; |
| + return 0; |
| +} |
| + |
| +void incfs_free_mount_info(struct mount_info *mi) |
| +{ |
| + int i; |
| + if (!mi) |
| + return; |
| + |
| + flush_delayed_work(&mi->mi_log.ml_wakeup_work); |
| + flush_delayed_work(&mi->mi_zstd_cleanup_work); |
| + |
| + dput(mi->mi_index_dir); |
| + dput(mi->mi_incomplete_dir); |
| + path_put(&mi->mi_backing_dir_path); |
| + mutex_destroy(&mi->mi_dir_struct_mutex); |
| + mutex_destroy(&mi->mi_zstd_workspace_mutex); |
| + put_cred(mi->mi_owner); |
| + kfree(mi->mi_log.rl_ring_buf); |
| + for (i = 0; i < ARRAY_SIZE(mi->pseudo_file_xattr); ++i) |
| + kfree(mi->pseudo_file_xattr[i].data); |
| + kfree(mi->mi_per_uid_read_timeouts); |
| + incfs_free_sysfs_node(mi->mi_sysfs_node); |
| + kfree(mi); |
| +} |
| + |
| +static void data_file_segment_init(struct data_file_segment *segment) |
| +{ |
| + init_waitqueue_head(&segment->new_data_arrival_wq); |
| + init_rwsem(&segment->rwsem); |
| + INIT_LIST_HEAD(&segment->reads_list_head); |
| +} |
| + |
| +char *file_id_to_str(incfs_uuid_t id) |
| +{ |
| + char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); |
| + char *end; |
| + |
| + if (!result) |
| + return NULL; |
| + |
| + end = bin2hex(result, id.bytes, sizeof(id.bytes)); |
| + *end = 0; |
| + return result; |
| +} |
| + |
| +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name) |
| +{ |
| + struct inode *inode; |
| + struct dentry *result = NULL; |
| + |
| + if (!parent) |
| + return ERR_PTR(-EFAULT); |
| + |
| + inode = d_inode(parent); |
| + inode_lock_nested(inode, I_MUTEX_PARENT); |
| + result = lookup_one_len(name, parent, strlen(name)); |
| + inode_unlock(inode); |
| + |
| + if (IS_ERR(result)) |
| + pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); |
| + |
| + return result; |
| +} |
| + |
| +static struct data_file *handle_mapped_file(struct mount_info *mi, |
| + struct data_file *df) |
| +{ |
| + char *file_id_str; |
| + struct dentry *index_file_dentry; |
| + struct path path; |
| + struct file *bf; |
| + struct data_file *result = NULL; |
| + const struct cred *old_cred; |
| + |
| + file_id_str = file_id_to_str(df->df_id); |
| + if (!file_id_str) |
| + return ERR_PTR(-ENOENT); |
| + |
| + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, |
| + file_id_str); |
| + kfree(file_id_str); |
| + if (!index_file_dentry) |
| + return ERR_PTR(-ENOENT); |
| + if (IS_ERR(index_file_dentry)) |
| + return ERR_CAST(index_file_dentry); |
| + if (!d_really_is_positive(index_file_dentry)) { |
| + result = ERR_PTR(-ENOENT); |
| + goto out; |
| + } |
| + |
| + path = (struct path) { |
| + .mnt = mi->mi_backing_dir_path.mnt, |
| + .dentry = index_file_dentry |
| + }; |
| + |
| + old_cred = override_creds(mi->mi_owner); |
| + bf = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, |
| + current_cred()); |
| + revert_creds(old_cred); |
| + |
| + if (IS_ERR(bf)) { |
| + result = ERR_CAST(bf); |
| + goto out; |
| + } |
| + |
| + result = incfs_open_data_file(mi, bf); |
| + fput(bf); |
| + if (IS_ERR(result)) |
| + goto out; |
| + |
| + result->df_mapped_offset = df->df_metadata_off; |
| + |
| +out: |
| + dput(index_file_dentry); |
| + return result; |
| +} |
| + |
| +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) |
| +{ |
| + struct data_file *df = NULL; |
| + struct backing_file_context *bfc = NULL; |
| + int md_records; |
| + u64 size; |
| + int error = 0; |
| + int i; |
| + |
| + if (!bf || !mi) |
| + return ERR_PTR(-EFAULT); |
| + |
| + if (!S_ISREG(bf->f_inode->i_mode)) |
| + return ERR_PTR(-EBADF); |
| + |
| + bfc = incfs_alloc_bfc(mi, bf); |
| + if (IS_ERR(bfc)) |
| + return ERR_CAST(bfc); |
| + |
| + df = kzalloc(sizeof(*df), GFP_NOFS); |
| + if (!df) { |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + mutex_init(&df->df_enable_verity); |
| + |
| + df->df_backing_file_context = bfc; |
| + df->df_mount_info = mi; |
| + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) |
| + data_file_segment_init(&df->df_segments[i]); |
| + |
| + error = incfs_read_file_header(bfc, &df->df_metadata_off, &df->df_id, |
| + &size, &df->df_header_flags); |
| + |
| + if (error) |
| + goto out; |
| + |
| + df->df_size = size; |
| + if (size > 0) |
| + df->df_data_block_count = get_blocks_count_for_size(size); |
| + |
| + if (df->df_header_flags & INCFS_FILE_MAPPED) { |
| + struct data_file *mapped_df = handle_mapped_file(mi, df); |
| + |
| + incfs_free_data_file(df); |
| + return mapped_df; |
| + } |
| + |
| + md_records = incfs_scan_metadata_chain(df); |
| + if (md_records < 0) |
| + error = md_records; |
| + |
| +out: |
| + if (error) { |
| + incfs_free_bfc(bfc); |
| + if (df) |
| + df->df_backing_file_context = NULL; |
| + incfs_free_data_file(df); |
| + return ERR_PTR(error); |
| + } |
| + return df; |
| +} |
| + |
| +void incfs_free_data_file(struct data_file *df) |
| +{ |
| + u32 data_blocks_written, hash_blocks_written; |
| + |
| + if (!df) |
| + return; |
| + |
| + data_blocks_written = atomic_read(&df->df_data_blocks_written); |
| + hash_blocks_written = atomic_read(&df->df_hash_blocks_written); |
| + |
| + if (data_blocks_written != df->df_initial_data_blocks_written || |
| + hash_blocks_written != df->df_initial_hash_blocks_written) { |
| + struct backing_file_context *bfc = df->df_backing_file_context; |
| + int error = -1; |
| + |
| + if (bfc && !mutex_lock_interruptible(&bfc->bc_mutex)) { |
| + error = incfs_write_status_to_backing_file( |
| + df->df_backing_file_context, |
| + df->df_status_offset, |
| + data_blocks_written, |
| + hash_blocks_written); |
| + mutex_unlock(&bfc->bc_mutex); |
| + } |
| + |
| + if (error) |
| + /* Nothing can be done, just warn */ |
| + pr_warn("incfs: failed to write status to backing file\n"); |
| + } |
| + |
| + incfs_free_mtree(df->df_hash_tree); |
| + incfs_free_bfc(df->df_backing_file_context); |
| + kfree(df->df_signature); |
| + kfree(df->df_verity_file_digest.data); |
| + kfree(df->df_verity_signature); |
| + mutex_destroy(&df->df_enable_verity); |
| + kfree(df); |
| +} |
| + |
| +int make_inode_ready_for_data_ops(struct mount_info *mi, |
| + struct inode *inode, |
| + struct file *backing_file) |
| +{ |
| + struct inode_info *node = get_incfs_node(inode); |
| + struct data_file *df = NULL; |
| + int err = 0; |
| + |
| + inode_lock(inode); |
| + if (S_ISREG(inode->i_mode)) { |
| + if (!node->n_file) { |
| + df = incfs_open_data_file(mi, backing_file); |
| + |
| + if (IS_ERR(df)) |
| + err = PTR_ERR(df); |
| + else |
| + node->n_file = df; |
| + } |
| + } else |
| + err = -EBADF; |
| + inode_unlock(inode); |
| + return err; |
| +} |
| + |
| +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) |
| +{ |
| + struct dir_file *dir = NULL; |
| + |
| + if (!S_ISDIR(bf->f_inode->i_mode)) |
| + return ERR_PTR(-EBADF); |
| + |
| + dir = kzalloc(sizeof(*dir), GFP_NOFS); |
| + if (!dir) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + dir->backing_dir = get_file(bf); |
| + dir->mount_info = mi; |
| + return dir; |
| +} |
| + |
| +void incfs_free_dir_file(struct dir_file *dir) |
| +{ |
| + if (!dir) |
| + return; |
| + if (dir->backing_dir) |
| + fput(dir->backing_dir); |
| + kfree(dir); |
| +} |
| + |
| +static ssize_t zstd_decompress_safe(struct mount_info *mi, |
| + struct mem_range src, struct mem_range dst) |
| +{ |
| + ssize_t result; |
| + ZSTD_inBuffer inbuf = {.src = src.data, .size = src.len}; |
| + ZSTD_outBuffer outbuf = {.dst = dst.data, .size = dst.len}; |
| + |
| + result = mutex_lock_interruptible(&mi->mi_zstd_workspace_mutex); |
| + if (result) |
| + return result; |
| + |
| + if (!mi->mi_zstd_stream) { |
| + unsigned int workspace_size = zstd_dstream_workspace_bound( |
| + INCFS_DATA_FILE_BLOCK_SIZE); |
| + void *workspace = kvmalloc(workspace_size, GFP_NOFS); |
| + ZSTD_DStream *stream; |
| + |
| + if (!workspace) { |
| + result = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + stream = zstd_init_dstream(INCFS_DATA_FILE_BLOCK_SIZE, workspace, |
| + workspace_size); |
| + if (!stream) { |
| + kvfree(workspace); |
| + result = -EIO; |
| + goto out; |
| + } |
| + |
| + mi->mi_zstd_workspace = workspace; |
| + mi->mi_zstd_stream = stream; |
| + } |
| + |
| + result = zstd_decompress_stream(mi->mi_zstd_stream, &outbuf, &inbuf) ? |
| + -EBADMSG : outbuf.pos; |
| + |
| + mod_delayed_work(system_wq, &mi->mi_zstd_cleanup_work, |
| + msecs_to_jiffies(5000)); |
| + |
| +out: |
| + mutex_unlock(&mi->mi_zstd_workspace_mutex); |
| + return result; |
| +} |
| + |
| +static ssize_t decompress(struct mount_info *mi, |
| + struct mem_range src, struct mem_range dst, int alg) |
| +{ |
| + int result; |
| + |
| + switch (alg) { |
| + case INCFS_BLOCK_COMPRESSED_LZ4: |
| + result = LZ4_decompress_safe(src.data, dst.data, src.len, |
| + dst.len); |
| + if (result < 0) |
| + return -EBADMSG; |
| + return result; |
| + |
| + case INCFS_BLOCK_COMPRESSED_ZSTD: |
| + return zstd_decompress_safe(mi, src, dst); |
| + |
| + default: |
| + WARN_ON(true); |
| + return -EOPNOTSUPP; |
| + } |
| +} |
| + |
| +static void log_read_one_record(struct read_log *rl, struct read_log_state *rs) |
| +{ |
| + union log_record *record = |
| + (union log_record *)((u8 *)rl->rl_ring_buf + rs->next_offset); |
| + size_t record_size; |
| + |
| + switch (record->full_record.type) { |
| + case FULL: |
| + rs->base_record = record->full_record; |
| + record_size = sizeof(record->full_record); |
| + break; |
| + |
| + case SAME_FILE: |
| + rs->base_record.block_index = |
| + record->same_file.block_index; |
| + rs->base_record.absolute_ts_us += |
| + record->same_file.relative_ts_us; |
| + rs->base_record.uid = record->same_file.uid; |
| + record_size = sizeof(record->same_file); |
| + break; |
| + |
| + case SAME_FILE_CLOSE_BLOCK: |
| + rs->base_record.block_index += |
| + record->same_file_close_block.block_index_delta; |
| + rs->base_record.absolute_ts_us += |
| + record->same_file_close_block.relative_ts_us; |
| + record_size = sizeof(record->same_file_close_block); |
| + break; |
| + |
| + case SAME_FILE_CLOSE_BLOCK_SHORT: |
| + rs->base_record.block_index += |
| + record->same_file_close_block_short.block_index_delta; |
| + rs->base_record.absolute_ts_us += |
| + record->same_file_close_block_short.relative_ts_tens_us * 10; |
| + record_size = sizeof(record->same_file_close_block_short); |
| + break; |
| + |
| + case SAME_FILE_NEXT_BLOCK: |
| + ++rs->base_record.block_index; |
| + rs->base_record.absolute_ts_us += |
| + record->same_file_next_block.relative_ts_us; |
| + record_size = sizeof(record->same_file_next_block); |
| + break; |
| + |
| + case SAME_FILE_NEXT_BLOCK_SHORT: |
| + ++rs->base_record.block_index; |
| + rs->base_record.absolute_ts_us += |
| + record->same_file_next_block_short.relative_ts_tens_us * 10; |
| + record_size = sizeof(record->same_file_next_block_short); |
| + break; |
| + } |
| + |
| + rs->next_offset += record_size; |
| + if (rs->next_offset > rl->rl_size - sizeof(*record)) { |
| + rs->next_offset = 0; |
| + ++rs->current_pass_no; |
| + } |
| + ++rs->current_record_no; |
| +} |
| + |
| +static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, |
| + int block_index) |
| +{ |
| + struct read_log *log = &mi->mi_log; |
| + struct read_log_state *head, *tail; |
| + s64 now_us; |
| + s64 relative_us; |
| + union log_record record; |
| + size_t record_size; |
| + uid_t uid = current_uid().val; |
| + int block_delta; |
| + bool same_file, same_uid; |
| + bool next_block, close_block, very_close_block; |
| + bool close_time, very_close_time, very_very_close_time; |
| + |
| + /* |
| + * This may read the old value, but it's OK to delay the logging start |
| + * right after the configuration update. |
| + */ |
| + if (READ_ONCE(log->rl_size) == 0) |
| + return; |
| + |
| + now_us = ktime_to_us(ktime_get()); |
| + |
| + spin_lock(&log->rl_lock); |
| + if (log->rl_size == 0) { |
| + spin_unlock(&log->rl_lock); |
| + return; |
| + } |
| + |
| + head = &log->rl_head; |
| + tail = &log->rl_tail; |
| + relative_us = now_us - head->base_record.absolute_ts_us; |
| + |
| + same_file = !memcmp(id, &head->base_record.file_id, |
| + sizeof(incfs_uuid_t)); |
| + same_uid = uid == head->base_record.uid; |
| + |
| + block_delta = block_index - head->base_record.block_index; |
| + next_block = block_delta == 1; |
| + very_close_block = block_delta >= S8_MIN && block_delta <= S8_MAX; |
| + close_block = block_delta >= S16_MIN && block_delta <= S16_MAX; |
| + |
| + very_very_close_time = relative_us < (1 << 5) * 10; |
| + very_close_time = relative_us < (1 << 13); |
| + close_time = relative_us < (1 << 16); |
| + |
| + if (same_file && same_uid && next_block && very_very_close_time) { |
| + record.same_file_next_block_short = |
| + (struct same_file_next_block_short){ |
| + .type = SAME_FILE_NEXT_BLOCK_SHORT, |
| + .relative_ts_tens_us = div_s64(relative_us, 10), |
| + }; |
| + record_size = sizeof(struct same_file_next_block_short); |
| + } else if (same_file && same_uid && next_block && very_close_time) { |
| + record.same_file_next_block = (struct same_file_next_block){ |
| + .type = SAME_FILE_NEXT_BLOCK, |
| + .relative_ts_us = relative_us, |
| + }; |
| + record_size = sizeof(struct same_file_next_block); |
| + } else if (same_file && same_uid && very_close_block && |
| + very_very_close_time) { |
| + record.same_file_close_block_short = |
| + (struct same_file_close_block_short){ |
| + .type = SAME_FILE_CLOSE_BLOCK_SHORT, |
| + .relative_ts_tens_us = div_s64(relative_us, 10), |
| + .block_index_delta = block_delta, |
| + }; |
| + record_size = sizeof(struct same_file_close_block_short); |
| + } else if (same_file && same_uid && close_block && very_close_time) { |
| + record.same_file_close_block = (struct same_file_close_block){ |
| + .type = SAME_FILE_CLOSE_BLOCK, |
| + .relative_ts_us = relative_us, |
| + .block_index_delta = block_delta, |
| + }; |
| + record_size = sizeof(struct same_file_close_block); |
| + } else if (same_file && close_time) { |
| + record.same_file = (struct same_file){ |
| + .type = SAME_FILE, |
| + .block_index = block_index, |
| + .relative_ts_us = relative_us, |
| + .uid = uid, |
| + }; |
| + record_size = sizeof(struct same_file); |
| + } else { |
| + record.full_record = (struct full_record){ |
| + .type = FULL, |
| + .block_index = block_index, |
| + .file_id = *id, |
| + .absolute_ts_us = now_us, |
| + .uid = uid, |
| + }; |
| + head->base_record.file_id = *id; |
| + record_size = sizeof(struct full_record); |
| + } |
| + |
| + head->base_record.block_index = block_index; |
| + head->base_record.absolute_ts_us = now_us; |
| + |
| + /* Advance tail beyond area we are going to overwrite */ |
| + while (tail->current_pass_no < head->current_pass_no && |
| + tail->next_offset < head->next_offset + record_size) |
| + log_read_one_record(log, tail); |
| + |
| + memcpy(((u8 *)log->rl_ring_buf) + head->next_offset, &record, |
| + record_size); |
| + head->next_offset += record_size; |
| + if (head->next_offset > log->rl_size - sizeof(record)) { |
| + head->next_offset = 0; |
| + ++head->current_pass_no; |
| + } |
| + ++head->current_record_no; |
| + |
| + spin_unlock(&log->rl_lock); |
| + schedule_delayed_work(&log->ml_wakeup_work, msecs_to_jiffies(16)); |
| +} |
| + |
| +static int validate_hash_tree(struct backing_file_context *bfc, struct file *f, |
| + int block_index, struct mem_range data, u8 *buf) |
| +{ |
| + struct data_file *df = get_incfs_data_file(f); |
| + u8 stored_digest[INCFS_MAX_HASH_SIZE] = {}; |
| + u8 calculated_digest[INCFS_MAX_HASH_SIZE] = {}; |
| + struct mtree *tree = NULL; |
| + struct incfs_df_signature *sig = NULL; |
| + int digest_size; |
| + int hash_block_index = block_index; |
| + int lvl; |
| + int res; |
| + loff_t hash_block_offset[INCFS_MAX_MTREE_LEVELS]; |
| + size_t hash_offset_in_block[INCFS_MAX_MTREE_LEVELS]; |
| + int hash_per_block; |
| + pgoff_t file_pages; |
| + |
| + /* |
| + * Memory barrier to make sure tree is fully present if added via enable |
| + * verity |
| + */ |
| + tree = smp_load_acquire(&df->df_hash_tree); |
| + sig = df->df_signature; |
| + if (!tree || !sig) |
| + return 0; |
| + |
| + digest_size = tree->alg->digest_size; |
| + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; |
| + for (lvl = 0; lvl < tree->depth; lvl++) { |
| + loff_t lvl_off = tree->hash_level_suboffset[lvl]; |
| + |
| + hash_block_offset[lvl] = |
| + lvl_off + round_down(hash_block_index * digest_size, |
| + INCFS_DATA_FILE_BLOCK_SIZE); |
| + hash_offset_in_block[lvl] = hash_block_index * digest_size % |
| + INCFS_DATA_FILE_BLOCK_SIZE; |
| + hash_block_index /= hash_per_block; |
| + } |
| + |
| + memcpy(stored_digest, tree->root_hash, digest_size); |
| + |
| + file_pages = DIV_ROUND_UP(df->df_size, INCFS_DATA_FILE_BLOCK_SIZE); |
| + for (lvl = tree->depth - 1; lvl >= 0; lvl--) { |
| + pgoff_t hash_page = |
| + file_pages + |
| + hash_block_offset[lvl] / INCFS_DATA_FILE_BLOCK_SIZE; |
| + struct page *page = find_get_page_flags( |
| + f->f_inode->i_mapping, hash_page, FGP_ACCESSED); |
| + |
| + if (page && PageChecked(page)) { |
| + u8 *addr = kmap_atomic(page); |
| + |
| + memcpy(stored_digest, addr + hash_offset_in_block[lvl], |
| + digest_size); |
| + |
| + kunmap_atomic(addr); |
| + put_page(page); |
| + continue; |
| + } |
| + |
| + if (page) |
| + put_page(page); |
| + |
| + res = incfs_kread(bfc, buf, INCFS_DATA_FILE_BLOCK_SIZE, |
| + hash_block_offset[lvl] + sig->hash_offset); |
| + if (res < 0) |
| + return res; |
| + if (res != INCFS_DATA_FILE_BLOCK_SIZE) |
| + return -EIO; |
| + res = incfs_calc_digest(tree->alg, |
| + range(buf, INCFS_DATA_FILE_BLOCK_SIZE), |
| + range(calculated_digest, digest_size)); |
| + if (res) |
| + return res; |
| + |
| + if (memcmp(stored_digest, calculated_digest, digest_size)) { |
| + int i; |
| + bool zero = true; |
| + |
| + pr_warn("incfs: Hash mismatch lvl:%d blk:%d\n", |
| + lvl, block_index); |
| + for (i = 0; i < digest_size; i++) |
| + if (stored_digest[i]) { |
| + zero = false; |
| + break; |
| + } |
| + |
| + if (zero) |
| + pr_debug("Note saved_digest all zero - did you forget to load the hashes?\n"); |
| + return -EBADMSG; |
| + } |
| + |
| + memcpy(stored_digest, buf + hash_offset_in_block[lvl], |
| + digest_size); |
| + |
| + page = grab_cache_page(f->f_inode->i_mapping, hash_page); |
| + if (page) { |
| + u8 *addr = kmap_atomic(page); |
| + |
| + memcpy(addr, buf, INCFS_DATA_FILE_BLOCK_SIZE); |
| + kunmap_atomic(addr); |
| + SetPageChecked(page); |
| + SetPageUptodate(page); |
| + unlock_page(page); |
| + put_page(page); |
| + } |
| + } |
| + |
| + res = incfs_calc_digest(tree->alg, data, |
| + range(calculated_digest, digest_size)); |
| + if (res) |
| + return res; |
| + |
| + if (memcmp(stored_digest, calculated_digest, digest_size)) { |
| + pr_debug("Leaf hash mismatch blk:%d\n", block_index); |
| + return -EBADMSG; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static struct data_file_segment *get_file_segment(struct data_file *df, |
| + int block_index) |
| +{ |
| + int seg_idx = block_index % ARRAY_SIZE(df->df_segments); |
| + |
| + return &df->df_segments[seg_idx]; |
| +} |
| + |
| +static bool is_data_block_present(struct data_file_block *block) |
| +{ |
| + return (block->db_backing_file_data_offset != 0) && |
| + (block->db_stored_size != 0); |
| +} |
| + |
| +static void convert_data_file_block(struct incfs_blockmap_entry *bme, |
| + struct data_file_block *res_block) |
| +{ |
| + u16 flags = le16_to_cpu(bme->me_flags); |
| + |
| + res_block->db_backing_file_data_offset = |
| + le16_to_cpu(bme->me_data_offset_hi); |
| + res_block->db_backing_file_data_offset <<= 32; |
| + res_block->db_backing_file_data_offset |= |
| + le32_to_cpu(bme->me_data_offset_lo); |
| + res_block->db_stored_size = le16_to_cpu(bme->me_data_size); |
| + res_block->db_comp_alg = flags & INCFS_BLOCK_COMPRESSED_MASK; |
| +} |
| + |
| +static int get_data_file_block(struct data_file *df, int index, |
| + struct data_file_block *res_block) |
| +{ |
| + struct incfs_blockmap_entry bme = {}; |
| + struct backing_file_context *bfc = NULL; |
| + loff_t blockmap_off = 0; |
| + int error = 0; |
| + |
| + if (!df || !res_block) |
| + return -EFAULT; |
| + |
| + blockmap_off = df->df_blockmap_off; |
| + bfc = df->df_backing_file_context; |
| + |
| + if (index < 0 || blockmap_off == 0) |
| + return -EINVAL; |
| + |
| + error = incfs_read_blockmap_entry(bfc, index, blockmap_off, &bme); |
| + if (error) |
| + return error; |
| + |
| + convert_data_file_block(&bme, res_block); |
| + return 0; |
| +} |
| + |
| +static int check_room_for_one_range(u32 size, u32 size_out) |
| +{ |
| + if (size_out + sizeof(struct incfs_filled_range) > size) |
| + return -ERANGE; |
| + return 0; |
| +} |
| + |
| +static int copy_one_range(struct incfs_filled_range *range, void __user *buffer, |
| + u32 size, u32 *size_out) |
| +{ |
| + int error = check_room_for_one_range(size, *size_out); |
| + if (error) |
| + return error; |
| + |
| + if (copy_to_user(((char __user *)buffer) + *size_out, range, |
| + sizeof(*range))) |
| + return -EFAULT; |
| + |
| + *size_out += sizeof(*range); |
| + return 0; |
| +} |
| + |
| +#define READ_BLOCKMAP_ENTRIES 512 |
| +int incfs_get_filled_blocks(struct data_file *df, |
| + struct incfs_file_data *fd, |
| + struct incfs_get_filled_blocks_args *arg) |
| +{ |
| + int error = 0; |
| + bool in_range = false; |
| + struct incfs_filled_range range; |
| + void __user *buffer = u64_to_user_ptr(arg->range_buffer); |
| + u32 size = arg->range_buffer_size; |
| + u32 end_index = |
| + arg->end_index ? arg->end_index : df->df_total_block_count; |
| + u32 *size_out = &arg->range_buffer_size_out; |
| + int i = READ_BLOCKMAP_ENTRIES - 1; |
| + int entries_read = 0; |
| + struct incfs_blockmap_entry *bme; |
| + int data_blocks_filled = 0; |
| + int hash_blocks_filled = 0; |
| + |
| + *size_out = 0; |
| + if (end_index > df->df_total_block_count) |
| + end_index = df->df_total_block_count; |
| + arg->total_blocks_out = df->df_total_block_count; |
| + arg->data_blocks_out = df->df_data_block_count; |
| + |
| + if (atomic_read(&df->df_data_blocks_written) == |
| + df->df_data_block_count) { |
| + pr_debug("File marked full, fast get_filled_blocks"); |
| + if (arg->start_index > end_index) { |
| + arg->index_out = arg->start_index; |
| + return 0; |
| + } |
| + arg->index_out = arg->start_index; |
| + |
| + error = check_room_for_one_range(size, *size_out); |
| + if (error) |
| + return error; |
| + |
| + range = (struct incfs_filled_range){ |
| + .begin = arg->start_index, |
| + .end = end_index, |
| + }; |
| + |
| + error = copy_one_range(&range, buffer, size, size_out); |
| + if (error) |
| + return error; |
| + arg->index_out = end_index; |
| + return 0; |
| + } |
| + |
| + bme = kzalloc(sizeof(*bme) * READ_BLOCKMAP_ENTRIES, |
| + GFP_NOFS | __GFP_COMP); |
| + if (!bme) |
| + return -ENOMEM; |
| + |
| + for (arg->index_out = arg->start_index; arg->index_out < end_index; |
| + ++arg->index_out) { |
| + struct data_file_block dfb; |
| + |
| + if (++i == READ_BLOCKMAP_ENTRIES) { |
| + entries_read = incfs_read_blockmap_entries( |
| + df->df_backing_file_context, bme, |
| + arg->index_out, READ_BLOCKMAP_ENTRIES, |
| + df->df_blockmap_off); |
| + if (entries_read < 0) { |
| + error = entries_read; |
| + break; |
| + } |
| + |
| + i = 0; |
| + } |
| + |
| + if (i >= entries_read) { |
| + error = -EIO; |
| + break; |
| + } |
| + |
| + convert_data_file_block(bme + i, &dfb); |
| + |
| + if (is_data_block_present(&dfb)) { |
| + if (arg->index_out >= df->df_data_block_count) |
| + ++hash_blocks_filled; |
| + else |
| + ++data_blocks_filled; |
| + } |
| + |
| + if (is_data_block_present(&dfb) == in_range) |
| + continue; |
| + |
| + if (!in_range) { |
| + error = check_room_for_one_range(size, *size_out); |
| + if (error) |
| + break; |
| + in_range = true; |
| + range.begin = arg->index_out; |
| + } else { |
| + range.end = arg->index_out; |
| + error = copy_one_range(&range, buffer, size, size_out); |
| + if (error) { |
| + /* there will be another try out of the loop, |
| + * it will reset the index_out if it fails too |
| + */ |
| + break; |
| + } |
| + in_range = false; |
| + } |
| + } |
| + |
| + if (in_range) { |
| + range.end = arg->index_out; |
| + error = copy_one_range(&range, buffer, size, size_out); |
| + if (error) |
| + arg->index_out = range.begin; |
| + } |
| + |
| + if (arg->start_index == 0) { |
| + fd->fd_get_block_pos = 0; |
| + fd->fd_filled_data_blocks = 0; |
| + fd->fd_filled_hash_blocks = 0; |
| + } |
| + |
| + if (arg->start_index == fd->fd_get_block_pos) { |
| + fd->fd_get_block_pos = arg->index_out + 1; |
| + fd->fd_filled_data_blocks += data_blocks_filled; |
| + fd->fd_filled_hash_blocks += hash_blocks_filled; |
| + } |
| + |
| + if (fd->fd_get_block_pos == df->df_total_block_count + 1) { |
| + if (fd->fd_filled_data_blocks > |
| + atomic_read(&df->df_data_blocks_written)) |
| + atomic_set(&df->df_data_blocks_written, |
| + fd->fd_filled_data_blocks); |
| + |
| + if (fd->fd_filled_hash_blocks > |
| + atomic_read(&df->df_hash_blocks_written)) |
| + atomic_set(&df->df_hash_blocks_written, |
| + fd->fd_filled_hash_blocks); |
| + } |
| + |
| + kfree(bme); |
| + return error; |
| +} |
| + |
| +static bool is_read_done(struct pending_read *read) |
| +{ |
| + return atomic_read_acquire(&read->done) != 0; |
| +} |
| + |
| +static void set_read_done(struct pending_read *read) |
| +{ |
| + atomic_set_release(&read->done, 1); |
| +} |
| + |
| +/* |
| + * Notifies a given data file about pending read from a given block. |
| + * Returns a new pending read entry. |
| + */ |
| +static struct pending_read *add_pending_read(struct data_file *df, |
| + int block_index) |
| +{ |
| + struct pending_read *result = NULL; |
| + struct data_file_segment *segment = NULL; |
| + struct mount_info *mi = NULL; |
| + |
| + segment = get_file_segment(df, block_index); |
| + mi = df->df_mount_info; |
| + |
| + result = kzalloc(sizeof(*result), GFP_NOFS); |
| + if (!result) |
| + return NULL; |
| + |
| + result->file_id = df->df_id; |
| + result->block_index = block_index; |
| + result->timestamp_us = ktime_to_us(ktime_get()); |
| + result->uid = current_uid().val; |
| + |
| + spin_lock(&mi->pending_read_lock); |
| + |
| + result->serial_number = ++mi->mi_last_pending_read_number; |
| + mi->mi_pending_reads_count++; |
| + |
| + list_add_rcu(&result->mi_reads_list, &mi->mi_reads_list_head); |
| + list_add_rcu(&result->segment_reads_list, &segment->reads_list_head); |
| + |
| + spin_unlock(&mi->pending_read_lock); |
| + |
| + wake_up_all(&mi->mi_pending_reads_notif_wq); |
| + return result; |
| +} |
| + |
| +static void free_pending_read_entry(struct rcu_head *entry) |
| +{ |
| + struct pending_read *read; |
| + |
| + read = container_of(entry, struct pending_read, rcu); |
| + |
| + kfree(read); |
| +} |
| + |
| +/* Notifies a given data file that pending read is completed. */ |
| +static void remove_pending_read(struct data_file *df, struct pending_read *read) |
| +{ |
| + struct mount_info *mi = NULL; |
| + |
| + if (!df || !read) { |
| + WARN_ON(!df); |
| + WARN_ON(!read); |
| + return; |
| + } |
| + |
| + mi = df->df_mount_info; |
| + |
| + spin_lock(&mi->pending_read_lock); |
| + |
| + list_del_rcu(&read->mi_reads_list); |
| + list_del_rcu(&read->segment_reads_list); |
| + |
| + mi->mi_pending_reads_count--; |
| + |
| + spin_unlock(&mi->pending_read_lock); |
| + |
| + /* Don't free. Wait for readers */ |
| + call_rcu(&read->rcu, free_pending_read_entry); |
| +} |
| + |
| +static void notify_pending_reads(struct mount_info *mi, |
| + struct data_file_segment *segment, |
| + int index) |
| +{ |
| + struct pending_read *entry = NULL; |
| + |
| + /* Notify pending reads waiting for this block. */ |
| + rcu_read_lock(); |
| + list_for_each_entry_rcu(entry, &segment->reads_list_head, |
| + segment_reads_list) { |
| + if (entry->block_index == index) |
| + set_read_done(entry); |
| + } |
| + rcu_read_unlock(); |
| + wake_up_all(&segment->new_data_arrival_wq); |
| + |
| + atomic_inc(&mi->mi_blocks_written); |
| + wake_up_all(&mi->mi_blocks_written_notif_wq); |
| +} |
| + |
| +static int wait_for_data_block(struct data_file *df, int block_index, |
| + struct data_file_block *res_block, |
| + struct incfs_read_data_file_timeouts *timeouts, |
| + unsigned int *delayed_min_us) |
| +{ |
| + struct data_file_block block = {}; |
| + struct data_file_segment *segment = NULL; |
| + struct pending_read *read = NULL; |
| + struct mount_info *mi = NULL; |
| + int error; |
| + int wait_res = 0; |
| + unsigned int delayed_pending_us = 0; |
| + bool delayed_pending = false; |
| + |
| + if (!df || !res_block) |
| + return -EFAULT; |
| + |
| + if (block_index < 0 || block_index >= df->df_data_block_count) |
| + return -EINVAL; |
| + |
| + if (df->df_blockmap_off <= 0 || !df->df_mount_info) |
| + return -ENODATA; |
| + |
| + mi = df->df_mount_info; |
| + segment = get_file_segment(df, block_index); |
| + |
| + error = down_read_killable(&segment->rwsem); |
| + if (error) |
| + return error; |
| + |
| + /* Look up the given block */ |
| + error = get_data_file_block(df, block_index, &block); |
| + |
| + up_read(&segment->rwsem); |
| + |
| + if (error) |
| + return error; |
| + |
| + /* If the block was found, just return it. No need to wait. */ |
| + if (is_data_block_present(&block)) { |
| + *res_block = block; |
| + if (timeouts && timeouts->min_time_us) { |
| + *delayed_min_us = timeouts->min_time_us; |
| + goto out; |
| + } |
| + return 0; |
| + } else { |
| + /* If it's not found, create a pending read */ |
| + if (timeouts && timeouts->max_pending_time_us) { |
| + read = add_pending_read(df, block_index); |
| + if (!read) |
| + return -ENOMEM; |
| + } else { |
| + log_block_read(mi, &df->df_id, block_index); |
| + return -ETIME; |
| + } |
| + } |
| + |
| + /* Rest of function only applies if timeouts != NULL */ |
| + if (!timeouts) { |
| + pr_warn("incfs: timeouts unexpectedly NULL\n"); |
| + return -EFSCORRUPTED; |
| + } |
| + |
| + /* Wait for notifications about block's arrival */ |
| + wait_res = |
| + wait_event_interruptible_timeout(segment->new_data_arrival_wq, |
| + (is_read_done(read)), |
| + usecs_to_jiffies(timeouts->max_pending_time_us)); |
| + |
| + /* Woke up, the pending read is no longer needed. */ |
| + remove_pending_read(df, read); |
| + |
| + if (wait_res == 0) { |
| + /* Wait has timed out */ |
| + log_block_read(mi, &df->df_id, block_index); |
| + return -ETIME; |
| + } |
| + if (wait_res < 0) { |
| + /* |
| + * Only ERESTARTSYS is really expected here when a signal |
| + * comes while we wait. |
| + */ |
| + return wait_res; |
| + } |
| + |
| + delayed_pending = true; |
| + delayed_pending_us = timeouts->max_pending_time_us - |
| + jiffies_to_usecs(wait_res); |
| + if (timeouts->min_pending_time_us > delayed_pending_us) |
| + *delayed_min_us = timeouts->min_pending_time_us - |
| + delayed_pending_us; |
| + |
| + error = down_read_killable(&segment->rwsem); |
| + if (error) |
| + return error; |
| + |
| + /* |
| + * Re-read blocks info now, it has just arrived and |
| + * should be available. |
| + */ |
| + error = get_data_file_block(df, block_index, &block); |
| + if (!error) { |
| + if (is_data_block_present(&block)) |
| + *res_block = block; |
| + else { |
| + /* |
| + * Somehow wait finished successfully but block still |
| + * can't be found. It's not normal. |
| + */ |
| + pr_warn("incfs: Wait succeeded but block not found.\n"); |
| + error = -ENODATA; |
| + } |
| + } |
| + up_read(&segment->rwsem); |
| + |
| +out: |
| + if (error) |
| + return error; |
| + |
| + if (delayed_pending) { |
| + mi->mi_reads_delayed_pending++; |
| + mi->mi_reads_delayed_pending_us += |
| + delayed_pending_us; |
| + } |
| + |
| + if (delayed_min_us && *delayed_min_us) { |
| + mi->mi_reads_delayed_min++; |
| + mi->mi_reads_delayed_min_us += *delayed_min_us; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int incfs_update_sysfs_error(struct file *file, int index, int result, |
| + struct mount_info *mi, struct data_file *df) |
| +{ |
| + int error; |
| + |
| + if (result >= 0) |
| + return 0; |
| + |
| + error = mutex_lock_interruptible(&mi->mi_le_mutex); |
| + if (error) |
| + return error; |
| + |
| + mi->mi_le_file_id = df->df_id; |
| + mi->mi_le_time_us = ktime_to_us(ktime_get()); |
| + mi->mi_le_page = index; |
| + mi->mi_le_errno = result; |
| + mi->mi_le_uid = current_uid().val; |
| + mutex_unlock(&mi->mi_le_mutex); |
| + |
| + return 0; |
| +} |
| + |
| +ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, |
| + int index, struct mem_range tmp, |
| + struct incfs_read_data_file_timeouts *timeouts, |
| + unsigned int *delayed_min_us) |
| +{ |
| + loff_t pos; |
| + ssize_t result; |
| + size_t bytes_to_read; |
| + struct mount_info *mi = NULL; |
| + struct backing_file_context *bfc = NULL; |
| + struct data_file_block block = {}; |
| + struct data_file *df = get_incfs_data_file(f); |
| + |
| + if (!dst.data || !df || !tmp.data) |
| + return -EFAULT; |
| + |
| + if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE) |
| + return -ERANGE; |
| + |
| + mi = df->df_mount_info; |
| + bfc = df->df_backing_file_context; |
| + |
| + result = wait_for_data_block(df, index, &block, timeouts, |
| + delayed_min_us); |
| + if (result < 0) |
| + goto out; |
| + |
| + pos = block.db_backing_file_data_offset; |
| + if (block.db_comp_alg == COMPRESSION_NONE) { |
| + bytes_to_read = min(dst.len, block.db_stored_size); |
| + result = incfs_kread(bfc, dst.data, bytes_to_read, pos); |
| + |
| + /* Some data was read, but not enough */ |
| + if (result >= 0 && result != bytes_to_read) |
| + result = -EIO; |
| + } else { |
| + bytes_to_read = min(tmp.len, block.db_stored_size); |
| + result = incfs_kread(bfc, tmp.data, bytes_to_read, pos); |
| + if (result == bytes_to_read) { |
| + result = |
| + decompress(mi, range(tmp.data, bytes_to_read), |
| + dst, block.db_comp_alg); |
| + if (result < 0) { |
| + const char *name = |
| + bfc->bc_file->f_path.dentry->d_name.name; |
| + |
| + pr_warn_once("incfs: Decompression error. %s", |
| + name); |
| + } |
| + } else if (result >= 0) { |
| + /* Some data was read, but not enough */ |
| + result = -EIO; |
| + } |
| + } |
| + |
| + if (result > 0) { |
| + int err = validate_hash_tree(bfc, f, index, dst, tmp.data); |
| + |
| + if (err < 0) |
| + result = err; |
| + } |
| + |
| + if (result >= 0) |
| + log_block_read(mi, &df->df_id, index); |
| + |
| +out: |
| + if (result == -ETIME) |
| + mi->mi_reads_failed_timed_out++; |
| + else if (result == -EBADMSG) |
| + mi->mi_reads_failed_hash_verification++; |
| + else if (result < 0) |
| + mi->mi_reads_failed_other++; |
| + |
| + incfs_update_sysfs_error(f, index, result, mi, df); |
| + |
| + return result; |
| +} |
| + |
| +ssize_t incfs_read_merkle_tree_blocks(struct mem_range dst, |
| + struct data_file *df, size_t offset) |
| +{ |
| + struct backing_file_context *bfc = NULL; |
| + struct incfs_df_signature *sig = NULL; |
| + size_t to_read = dst.len; |
| + |
| + if (!dst.data || !df) |
| + return -EFAULT; |
| + |
| + sig = df->df_signature; |
| + bfc = df->df_backing_file_context; |
| + |
| + if (offset > sig->hash_size) |
| + return -ERANGE; |
| + |
| + if (offset + to_read > sig->hash_size) |
| + to_read = sig->hash_size - offset; |
| + |
| + return incfs_kread(bfc, dst.data, to_read, sig->hash_offset + offset); |
| +} |
| + |
| +int incfs_process_new_data_block(struct data_file *df, |
| + struct incfs_fill_block *block, u8 *data, |
| + bool *complete) |
| +{ |
| + struct mount_info *mi = NULL; |
| + struct backing_file_context *bfc = NULL; |
| + struct data_file_segment *segment = NULL; |
| + struct data_file_block existing_block = {}; |
| + u16 flags = 0; |
| + int error = 0; |
| + |
| + if (!df || !block) |
| + return -EFAULT; |
| + |
| + bfc = df->df_backing_file_context; |
| + mi = df->df_mount_info; |
| + |
| + if (block->block_index >= df->df_data_block_count) |
| + return -ERANGE; |
| + |
| + segment = get_file_segment(df, block->block_index); |
| + if (!segment) |
| + return -EFAULT; |
| + |
| + if (block->compression == COMPRESSION_LZ4) |
| + flags |= INCFS_BLOCK_COMPRESSED_LZ4; |
| + else if (block->compression == COMPRESSION_ZSTD) |
| + flags |= INCFS_BLOCK_COMPRESSED_ZSTD; |
| + else if (block->compression) |
| + return -EINVAL; |
| + |
| + error = down_read_killable(&segment->rwsem); |
| + if (error) |
| + return error; |
| + |
| + error = get_data_file_block(df, block->block_index, &existing_block); |
| + |
| + up_read(&segment->rwsem); |
| + |
| + if (error) |
| + return error; |
| + if (is_data_block_present(&existing_block)) |
| + /* Block is already present, nothing to do here */ |
| + return 0; |
| + |
| + error = down_write_killable(&segment->rwsem); |
| + if (error) |
| + return error; |
| + |
| + /* Recheck inside write lock */ |
| + error = get_data_file_block(df, block->block_index, &existing_block); |
| + if (error) |
| + goto out_up_write; |
| + |
| + if (is_data_block_present(&existing_block)) |
| + goto out_up_write; |
| + |
| + error = mutex_lock_interruptible(&bfc->bc_mutex); |
| + if (error) |
| + goto out_up_write; |
| + |
| + error = incfs_write_data_block_to_backing_file(bfc, |
| + range(data, block->data_len), block->block_index, |
| + df->df_blockmap_off, flags); |
| + if (error) |
| + goto out_mutex_unlock; |
| + |
| + if (atomic_inc_return(&df->df_data_blocks_written) |
| + >= df->df_data_block_count) |
| + *complete = true; |
| + |
| +out_mutex_unlock: |
| + mutex_unlock(&bfc->bc_mutex); |
| + if (!error) |
| + notify_pending_reads(mi, segment, block->block_index); |
| + |
| +out_up_write: |
| + up_write(&segment->rwsem); |
| + |
| + if (error) |
| + pr_debug("%d error: %d\n", block->block_index, error); |
| + return error; |
| +} |
| + |
| +int incfs_read_file_signature(struct data_file *df, struct mem_range dst) |
| +{ |
| + struct backing_file_context *bfc = df->df_backing_file_context; |
| + struct incfs_df_signature *sig; |
| + int read_res = 0; |
| + |
| + if (!dst.data) |
| + return -EFAULT; |
| + |
| + sig = df->df_signature; |
| + if (!sig) |
| + return 0; |
| + |
| + if (dst.len < sig->sig_size) |
| + return -E2BIG; |
| + |
| + read_res = incfs_kread(bfc, dst.data, sig->sig_size, sig->sig_offset); |
| + |
| + if (read_res < 0) |
| + return read_res; |
| + |
| + if (read_res != sig->sig_size) |
| + return -EIO; |
| + |
| + return read_res; |
| +} |
| + |
| +int incfs_process_new_hash_block(struct data_file *df, |
| + struct incfs_fill_block *block, u8 *data) |
| +{ |
| + struct backing_file_context *bfc = NULL; |
| + struct mount_info *mi = NULL; |
| + struct mtree *hash_tree = NULL; |
| + struct incfs_df_signature *sig = NULL; |
| + loff_t hash_area_base = 0; |
| + loff_t hash_area_size = 0; |
| + int error = 0; |
| + |
| + if (!df || !block) |
| + return -EFAULT; |
| + |
| + if (!(block->flags & INCFS_BLOCK_FLAGS_HASH)) |
| + return -EINVAL; |
| + |
| + bfc = df->df_backing_file_context; |
| + mi = df->df_mount_info; |
| + |
| + if (!df) |
| + return -ENOENT; |
| + |
| + hash_tree = df->df_hash_tree; |
| + sig = df->df_signature; |
| + if (!hash_tree || !sig || sig->hash_offset == 0) |
| + return -ENOTSUPP; |
| + |
| + hash_area_base = sig->hash_offset; |
| + hash_area_size = sig->hash_size; |
| + if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE |
| + + block->data_len) { |
| + /* Hash block goes beyond dedicated hash area of this file. */ |
| + return -ERANGE; |
| + } |
| + |
| + error = mutex_lock_interruptible(&bfc->bc_mutex); |
| + if (!error) { |
| + error = incfs_write_hash_block_to_backing_file( |
| + bfc, range(data, block->data_len), block->block_index, |
| + hash_area_base, df->df_blockmap_off, df->df_size); |
| + mutex_unlock(&bfc->bc_mutex); |
| + } |
| + if (!error) |
| + atomic_inc(&df->df_hash_blocks_written); |
| + |
| + return error; |
| +} |
| + |
| +static int process_blockmap_md(struct incfs_blockmap *bm, |
| + struct metadata_handler *handler) |
| +{ |
| + struct data_file *df = handler->context; |
| + int error = 0; |
| + loff_t base_off = le64_to_cpu(bm->m_base_offset); |
| + u32 block_count = le32_to_cpu(bm->m_block_count); |
| + |
| + if (!df) |
| + return -EFAULT; |
| + |
| + if (df->df_data_block_count > block_count) |
| + return -EBADMSG; |
| + |
| + df->df_total_block_count = block_count; |
| + df->df_blockmap_off = base_off; |
| + return error; |
| +} |
| + |
| +static int process_file_signature_md(struct incfs_file_signature *sg, |
| + struct metadata_handler *handler) |
| +{ |
| + struct data_file *df = handler->context; |
| + struct mtree *hash_tree = NULL; |
| + int error = 0; |
| + struct incfs_df_signature *signature = |
| + kzalloc(sizeof(*signature), GFP_NOFS); |
| + void *buf = NULL; |
| + ssize_t read; |
| + |
| + if (!signature) |
| + return -ENOMEM; |
| + |
| + if (!df || !df->df_backing_file_context || |
| + !df->df_backing_file_context->bc_file) { |
| + error = -ENOENT; |
| + goto out; |
| + } |
| + |
| + signature->hash_offset = le64_to_cpu(sg->sg_hash_tree_offset); |
| + signature->hash_size = le32_to_cpu(sg->sg_hash_tree_size); |
| + signature->sig_offset = le64_to_cpu(sg->sg_sig_offset); |
| + signature->sig_size = le32_to_cpu(sg->sg_sig_size); |
| + |
| + buf = kzalloc(signature->sig_size, GFP_NOFS); |
| + if (!buf) { |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + read = incfs_kread(df->df_backing_file_context, buf, |
| + signature->sig_size, signature->sig_offset); |
| + if (read < 0) { |
| + error = read; |
| + goto out; |
| + } |
| + |
| + if (read != signature->sig_size) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + hash_tree = incfs_alloc_mtree(range(buf, signature->sig_size), |
| + df->df_data_block_count); |
| + if (IS_ERR(hash_tree)) { |
| + error = PTR_ERR(hash_tree); |
| + hash_tree = NULL; |
| + goto out; |
| + } |
| + if (hash_tree->hash_tree_area_size != signature->hash_size) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + if (signature->hash_size > 0 && |
| + handler->md_record_offset <= signature->hash_offset) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + if (handler->md_record_offset <= signature->sig_offset) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + df->df_hash_tree = hash_tree; |
| + hash_tree = NULL; |
| + df->df_signature = signature; |
| + signature = NULL; |
| +out: |
| + incfs_free_mtree(hash_tree); |
| + kfree(signature); |
| + kfree(buf); |
| + |
| + return error; |
| +} |
| + |
| +static int process_status_md(struct incfs_status *is, |
| + struct metadata_handler *handler) |
| +{ |
| + struct data_file *df = handler->context; |
| + |
| + df->df_initial_data_blocks_written = |
| + le32_to_cpu(is->is_data_blocks_written); |
| + atomic_set(&df->df_data_blocks_written, |
| + df->df_initial_data_blocks_written); |
| + |
| + df->df_initial_hash_blocks_written = |
| + le32_to_cpu(is->is_hash_blocks_written); |
| + atomic_set(&df->df_hash_blocks_written, |
| + df->df_initial_hash_blocks_written); |
| + |
| + df->df_status_offset = handler->md_record_offset; |
| + return 0; |
| +} |
| + |
| +static int process_file_verity_signature_md( |
| + struct incfs_file_verity_signature *vs, |
| + struct metadata_handler *handler) |
| +{ |
| + struct data_file *df = handler->context; |
| + struct incfs_df_verity_signature *verity_signature; |
| + |
| + if (!df) |
| + return -EFAULT; |
| + |
| + verity_signature = kzalloc(sizeof(*verity_signature), GFP_NOFS); |
| + if (!verity_signature) |
| + return -ENOMEM; |
| + |
| + verity_signature->offset = le64_to_cpu(vs->vs_offset); |
| + verity_signature->size = le32_to_cpu(vs->vs_size); |
| + if (verity_signature->size > FS_VERITY_MAX_SIGNATURE_SIZE) { |
| + kfree(verity_signature); |
| + return -EFAULT; |
| + } |
| + |
| + df->df_verity_signature = verity_signature; |
| + return 0; |
| +} |
| + |
| +static int incfs_scan_metadata_chain(struct data_file *df) |
| +{ |
| + struct metadata_handler *handler = NULL; |
| + int result = 0; |
| + int records_count = 0; |
| + int error = 0; |
| + struct backing_file_context *bfc = NULL; |
| + int nondata_block_count; |
| + |
| + if (!df || !df->df_backing_file_context) |
| + return -EFAULT; |
| + |
| + bfc = df->df_backing_file_context; |
| + |
| + handler = kzalloc(sizeof(*handler), GFP_NOFS); |
| + if (!handler) |
| + return -ENOMEM; |
| + |
| + handler->md_record_offset = df->df_metadata_off; |
| + handler->context = df; |
| + handler->handle_blockmap = process_blockmap_md; |
| + handler->handle_signature = process_file_signature_md; |
| + handler->handle_status = process_status_md; |
| + handler->handle_verity_signature = process_file_verity_signature_md; |
| + |
| + while (handler->md_record_offset > 0) { |
| + error = incfs_read_next_metadata_record(bfc, handler); |
| + if (error) { |
| + pr_warn("incfs: Error during reading incfs-metadata record. Offset: %lld Record #%d Error code: %d\n", |
| + handler->md_record_offset, records_count + 1, |
| + -error); |
| + break; |
| + } |
| + records_count++; |
| + } |
| + if (error) { |
| + pr_warn("incfs: Error %d after reading %d incfs-metadata records.\n", |
| + -error, records_count); |
| + result = error; |
| + } else |
| + result = records_count; |
| + |
| + nondata_block_count = df->df_total_block_count - |
| + df->df_data_block_count; |
| + if (df->df_hash_tree) { |
| + int hash_block_count = get_blocks_count_for_size( |
| + df->df_hash_tree->hash_tree_area_size); |
| + |
| + /* |
| + * Files that were created with a hash tree have the hash tree |
| + * included in the block map, i.e. nondata_block_count == |
| + * hash_block_count. Files whose hash tree was added by |
| + * FS_IOC_ENABLE_VERITY will still have the original block |
| + * count, i.e. nondata_block_count == 0. |
| + */ |
| + if (nondata_block_count != hash_block_count && |
| + nondata_block_count != 0) |
| + result = -EINVAL; |
| + } else if (nondata_block_count != 0) { |
| + result = -EINVAL; |
| + } |
| + |
| + kfree(handler); |
| + return result; |
| +} |
| + |
| +/* |
| + * Quickly checks if there are pending reads with a serial number larger |
| + * than a given one. |
| + */ |
| +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) |
| +{ |
| + bool result = false; |
| + |
| + spin_lock(&mi->pending_read_lock); |
| + result = (mi->mi_last_pending_read_number > last_number) && |
| + (mi->mi_pending_reads_count > 0); |
| + spin_unlock(&mi->pending_read_lock); |
| + return result; |
| +} |
| + |
| +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, |
| + struct incfs_pending_read_info *reads, |
| + struct incfs_pending_read_info2 *reads2, |
| + int reads_size, int *new_max_sn) |
| +{ |
| + int reported_reads = 0; |
| + struct pending_read *entry = NULL; |
| + |
| + if (!mi) |
| + return -EFAULT; |
| + |
| + if (reads_size <= 0) |
| + return 0; |
| + |
| + if (!incfs_fresh_pending_reads_exist(mi, sn_lowerbound)) |
| + return 0; |
| + |
| + rcu_read_lock(); |
| + |
| + list_for_each_entry_rcu(entry, &mi->mi_reads_list_head, mi_reads_list) { |
| + if (entry->serial_number <= sn_lowerbound) |
| + continue; |
| + |
| + if (reads) { |
| + reads[reported_reads].file_id = entry->file_id; |
| + reads[reported_reads].block_index = entry->block_index; |
| + reads[reported_reads].serial_number = |
| + entry->serial_number; |
| + reads[reported_reads].timestamp_us = |
| + entry->timestamp_us; |
| + } |
| + |
| + if (reads2) { |
| + reads2[reported_reads].file_id = entry->file_id; |
| + reads2[reported_reads].block_index = entry->block_index; |
| + reads2[reported_reads].serial_number = |
| + entry->serial_number; |
| + reads2[reported_reads].timestamp_us = |
| + entry->timestamp_us; |
| + reads2[reported_reads].uid = entry->uid; |
| + } |
| + |
| + if (entry->serial_number > *new_max_sn) |
| + *new_max_sn = entry->serial_number; |
| + |
| + reported_reads++; |
| + if (reported_reads >= reads_size) |
| + break; |
| + } |
| + |
| + rcu_read_unlock(); |
| + |
| + return reported_reads; |
| +} |
| + |
| +struct read_log_state incfs_get_log_state(struct mount_info *mi) |
| +{ |
| + struct read_log *log = &mi->mi_log; |
| + struct read_log_state result; |
| + |
| + spin_lock(&log->rl_lock); |
| + result = log->rl_head; |
| + spin_unlock(&log->rl_lock); |
| + return result; |
| +} |
| + |
| +int incfs_get_uncollected_logs_count(struct mount_info *mi, |
| + const struct read_log_state *state) |
| +{ |
| + struct read_log *log = &mi->mi_log; |
| + u32 generation; |
| + u64 head_no, tail_no; |
| + |
| + spin_lock(&log->rl_lock); |
| + tail_no = log->rl_tail.current_record_no; |
| + head_no = log->rl_head.current_record_no; |
| + generation = log->rl_head.generation_id; |
| + spin_unlock(&log->rl_lock); |
| + |
| + if (generation != state->generation_id) |
| + return head_no - tail_no; |
| + else |
| + return head_no - max_t(u64, tail_no, state->current_record_no); |
| +} |
| + |
| +int incfs_collect_logged_reads(struct mount_info *mi, |
| + struct read_log_state *state, |
| + struct incfs_pending_read_info *reads, |
| + struct incfs_pending_read_info2 *reads2, |
| + int reads_size) |
| +{ |
| + int dst_idx; |
| + struct read_log *log = &mi->mi_log; |
| + struct read_log_state *head, *tail; |
| + |
| + spin_lock(&log->rl_lock); |
| + head = &log->rl_head; |
| + tail = &log->rl_tail; |
| + |
| + if (state->generation_id != head->generation_id) { |
| + pr_debug("read ptr is wrong generation: %u/%u", |
| + state->generation_id, head->generation_id); |
| + |
| + *state = (struct read_log_state){ |
| + .generation_id = head->generation_id, |
| + }; |
| + } |
| + |
| + if (state->current_record_no < tail->current_record_no) { |
| + pr_debug("read ptr is behind, moving: %u/%u -> %u/%u\n", |
| + (u32)state->next_offset, |
| + (u32)state->current_pass_no, |
| + (u32)tail->next_offset, (u32)tail->current_pass_no); |
| + |
| + *state = *tail; |
| + } |
| + |
| + for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { |
| + if (state->current_record_no == head->current_record_no) |
| + break; |
| + |
| + log_read_one_record(log, state); |
| + |
| + if (reads) |
| + reads[dst_idx] = (struct incfs_pending_read_info) { |
| + .file_id = state->base_record.file_id, |
| + .block_index = state->base_record.block_index, |
| + .serial_number = state->current_record_no, |
| + .timestamp_us = |
| + state->base_record.absolute_ts_us, |
| + }; |
| + |
| + if (reads2) |
| + reads2[dst_idx] = (struct incfs_pending_read_info2) { |
| + .file_id = state->base_record.file_id, |
| + .block_index = state->base_record.block_index, |
| + .serial_number = state->current_record_no, |
| + .timestamp_us = |
| + state->base_record.absolute_ts_us, |
| + .uid = state->base_record.uid, |
| + }; |
| + } |
| + |
| + spin_unlock(&log->rl_lock); |
| + return dst_idx; |
| +} |
| + |
| diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/data_mgmt.h |
| @@ -0,0 +1,551 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2019 Google LLC |
| + */ |
| +#ifndef _INCFS_DATA_MGMT_H |
| +#define _INCFS_DATA_MGMT_H |
| + |
| +#include <linux/cred.h> |
| +#include <linux/fs.h> |
| +#include <linux/types.h> |
| +#include <linux/mutex.h> |
| +#include <linux/spinlock.h> |
| +#include <linux/rcupdate.h> |
| +#include <linux/completion.h> |
| +#include <linux/wait.h> |
| +#include <linux/zstd.h> |
| +#include <crypto/hash.h> |
| +#include <linux/rwsem.h> |
| + |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "internal.h" |
| +#include "pseudo_files.h" |
| + |
| +#define SEGMENTS_PER_FILE 3 |
| + |
| +enum LOG_RECORD_TYPE { |
| + FULL, |
| + SAME_FILE, |
| + SAME_FILE_CLOSE_BLOCK, |
| + SAME_FILE_CLOSE_BLOCK_SHORT, |
| + SAME_FILE_NEXT_BLOCK, |
| + SAME_FILE_NEXT_BLOCK_SHORT, |
| +}; |
| + |
| +struct full_record { |
| + enum LOG_RECORD_TYPE type : 3; /* FULL */ |
| + u32 block_index : 29; |
| + incfs_uuid_t file_id; |
| + u64 absolute_ts_us; |
| + uid_t uid; |
| +} __packed; /* 32 bytes */ |
| + |
| +struct same_file { |
| + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE */ |
| + u32 block_index : 29; |
| + uid_t uid; |
| + u16 relative_ts_us; /* max 2^16 us ~= 64 ms */ |
| +} __packed; /* 10 bytes */ |
| + |
| +struct same_file_close_block { |
| + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK */ |
| + u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */ |
| + s16 block_index_delta; |
| +} __packed; /* 4 bytes */ |
| + |
| +struct same_file_close_block_short { |
| + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_CLOSE_BLOCK_SHORT */ |
| + u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */ |
| + s8 block_index_delta; |
| +} __packed; /* 2 bytes */ |
| + |
| +struct same_file_next_block { |
| + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK */ |
| + u16 relative_ts_us : 13; /* max 2^13 us ~= 8 ms */ |
| +} __packed; /* 2 bytes */ |
| + |
| +struct same_file_next_block_short { |
| + enum LOG_RECORD_TYPE type : 3; /* SAME_FILE_NEXT_BLOCK_SHORT */ |
| + u8 relative_ts_tens_us : 5; /* max 2^5*10 us ~= 320 us */ |
| +} __packed; /* 1 byte */ |
| + |
| +union log_record { |
| + struct full_record full_record; |
| + struct same_file same_file; |
| + struct same_file_close_block same_file_close_block; |
| + struct same_file_close_block_short same_file_close_block_short; |
| + struct same_file_next_block same_file_next_block; |
| + struct same_file_next_block_short same_file_next_block_short; |
| +}; |
| + |
| +struct read_log_state { |
| + /* Log buffer generation id, incremented on configuration changes */ |
| + u32 generation_id; |
| + |
| + /* Offset in rl_ring_buf to write into. */ |
| + u32 next_offset; |
| + |
| + /* Current number of writer passes over rl_ring_buf */ |
| + u32 current_pass_no; |
| + |
| + /* Current full_record to diff against */ |
| + struct full_record base_record; |
| + |
| + /* Current record number counting from configuration change */ |
| + u64 current_record_no; |
| +}; |
| + |
| +/* A ring buffer to save records about data blocks which were recently read. */ |
| +struct read_log { |
| + void *rl_ring_buf; |
| + |
| + int rl_size; |
| + |
| + struct read_log_state rl_head; |
| + |
| + struct read_log_state rl_tail; |
| + |
| + /* A lock to protect the above fields */ |
| + spinlock_t rl_lock; |
| + |
| + /* A queue of waiters who want to be notified about reads */ |
| + wait_queue_head_t ml_notif_wq; |
| + |
| + /* A work item to wake up those waiters without slowing down readers */ |
| + struct delayed_work ml_wakeup_work; |
| +}; |
| + |
| +struct mount_options { |
| + unsigned int read_timeout_ms; |
| + unsigned int readahead_pages; |
| + unsigned int read_log_pages; |
| + unsigned int read_log_wakeup_count; |
| + bool report_uid; |
| + char *sysfs_name; |
| +}; |
| + |
| +struct mount_info { |
| + struct super_block *mi_sb; |
| + |
| + struct path mi_backing_dir_path; |
| + |
| + struct dentry *mi_index_dir; |
| + /* For stacking mounts, if true, this indicates if the index dir needs |
| + * to be freed for this SB otherwise it was created by lower level SB */ |
| + bool mi_index_free; |
| + |
| + struct dentry *mi_incomplete_dir; |
| + /* For stacking mounts, if true, this indicates if the incomplete dir |
| + * needs to be freed for this SB. Similar to mi_index_free */ |
| + bool mi_incomplete_free; |
| + |
| + const struct cred *mi_owner; |
| + |
| + struct mount_options mi_options; |
| + |
| + /* This mutex is to be taken before create, rename, delete */ |
| + struct mutex mi_dir_struct_mutex; |
| + |
| + /* |
| + * A queue of waiters who want to be notified about new pending reads. |
| + */ |
| + wait_queue_head_t mi_pending_reads_notif_wq; |
| + |
| + /* |
| + * Protects - RCU safe: |
| + * - reads_list_head |
| + * - mi_pending_reads_count |
| + * - mi_last_pending_read_number |
| + * - data_file_segment.reads_list_head |
| + */ |
| + spinlock_t pending_read_lock; |
| + |
| + /* List of active pending_read objects */ |
| + struct list_head mi_reads_list_head; |
| + |
| + /* Total number of items in reads_list_head */ |
| + int mi_pending_reads_count; |
| + |
| + /* |
| + * Last serial number that was assigned to a pending read. |
| + * 0 means no pending reads have been seen yet. |
| + */ |
| + int mi_last_pending_read_number; |
| + |
| + /* Temporary buffer for read logger. */ |
| + struct read_log mi_log; |
| + |
| + /* SELinux needs special xattrs on our pseudo files */ |
| + struct mem_range pseudo_file_xattr[PSEUDO_FILE_COUNT]; |
| + |
| + /* A queue of waiters who want to be notified about blocks_written */ |
| + wait_queue_head_t mi_blocks_written_notif_wq; |
| + |
| + /* Number of blocks written since mount */ |
| + atomic_t mi_blocks_written; |
| + |
| + /* Per UID read timeouts */ |
| + spinlock_t mi_per_uid_read_timeouts_lock; |
| + struct incfs_per_uid_read_timeouts *mi_per_uid_read_timeouts; |
| + int mi_per_uid_read_timeouts_size; |
| + |
| + /* zstd workspace */ |
| + struct mutex mi_zstd_workspace_mutex; |
| + void *mi_zstd_workspace; |
| + ZSTD_DStream *mi_zstd_stream; |
| + struct delayed_work mi_zstd_cleanup_work; |
| + |
| + /* sysfs node */ |
| + struct incfs_sysfs_node *mi_sysfs_node; |
| + |
| + /* Last error information */ |
| + struct mutex mi_le_mutex; |
| + incfs_uuid_t mi_le_file_id; |
| + u64 mi_le_time_us; |
| + u32 mi_le_page; |
| + u32 mi_le_errno; |
| + uid_t mi_le_uid; |
| + |
| + /* Number of reads timed out */ |
| + u32 mi_reads_failed_timed_out; |
| + |
| + /* Number of reads failed because hash verification failed */ |
| + u32 mi_reads_failed_hash_verification; |
| + |
| + /* Number of reads failed for another reason */ |
| + u32 mi_reads_failed_other; |
| + |
| + /* Number of reads delayed because page had to be fetched */ |
| + u32 mi_reads_delayed_pending; |
| + |
| + /* Total time waiting for pages to be fetched */ |
| + u64 mi_reads_delayed_pending_us; |
| + |
| + /* |
| + * Number of reads delayed because of per-uid min_time_us or |
| + * min_pending_time_us settings |
| + */ |
| + u32 mi_reads_delayed_min; |
| + |
| + /* Total time waiting because of per-uid min_time_us or |
| + * min_pending_time_us settings. |
| + * |
| + * Note that if a read is initially delayed because we have to wait for |
| + * the page, then further delayed because of min_pending_time_us |
| + * setting, this counter gets incremented by only the further delay |
| + * time. |
| + */ |
| + u64 mi_reads_delayed_min_us; |
| +}; |
| + |
| +struct data_file_block { |
| + loff_t db_backing_file_data_offset; |
| + |
| + size_t db_stored_size; |
| + |
| + enum incfs_compression_alg db_comp_alg; |
| +}; |
| + |
| +struct pending_read { |
| + incfs_uuid_t file_id; |
| + |
| + s64 timestamp_us; |
| + |
| + atomic_t done; |
| + |
| + int block_index; |
| + |
| + int serial_number; |
| + |
| + uid_t uid; |
| + |
| + struct list_head mi_reads_list; |
| + |
| + struct list_head segment_reads_list; |
| + |
| + struct rcu_head rcu; |
| +}; |
| + |
| +struct data_file_segment { |
| + wait_queue_head_t new_data_arrival_wq; |
| + |
| + /* Protects reads and writes from the blockmap */ |
| + struct rw_semaphore rwsem; |
| + |
| + /* List of active pending_read objects belonging to this segment */ |
| + /* Protected by mount_info.pending_reads_mutex */ |
| + struct list_head reads_list_head; |
| +}; |
| + |
| +/* |
| + * Extra info associated with a file. Just a few bytes set by a user. |
| + */ |
| +struct file_attr { |
| + loff_t fa_value_offset; |
| + |
| + size_t fa_value_size; |
| + |
| + u32 fa_crc; |
| +}; |
| + |
| + |
| +struct data_file { |
| + struct backing_file_context *df_backing_file_context; |
| + |
| + struct mount_info *df_mount_info; |
| + |
| + incfs_uuid_t df_id; |
| + |
| + /* |
| + * Array of segments used to reduce lock contention for the file. |
| + * Segment is chosen for a block depends on the block's index. |
| + */ |
| + struct data_file_segment df_segments[SEGMENTS_PER_FILE]; |
| + |
| + /* Base offset of the first metadata record. */ |
| + loff_t df_metadata_off; |
| + |
| + /* Base offset of the block map. */ |
| + loff_t df_blockmap_off; |
| + |
| + /* File size in bytes */ |
| + loff_t df_size; |
| + |
| + /* File header flags */ |
| + u32 df_header_flags; |
| + |
| + /* File size in DATA_FILE_BLOCK_SIZE blocks */ |
| + int df_data_block_count; |
| + |
| + /* Total number of blocks, data + hash */ |
| + int df_total_block_count; |
| + |
| + /* For mapped files, the offset into the actual file */ |
| + loff_t df_mapped_offset; |
| + |
| + /* Number of data blocks written to file */ |
| + atomic_t df_data_blocks_written; |
| + |
| + /* Number of data blocks in the status block */ |
| + u32 df_initial_data_blocks_written; |
| + |
| + /* Number of hash blocks written to file */ |
| + atomic_t df_hash_blocks_written; |
| + |
| + /* Number of hash blocks in the status block */ |
| + u32 df_initial_hash_blocks_written; |
| + |
| + /* Offset to status metadata header */ |
| + loff_t df_status_offset; |
| + |
| + /* |
| + * Mutex acquired while enabling verity. Note that df_hash_tree is set |
| + * by enable verity. |
| + * |
| + * The backing file mutex bc_mutex may be taken while this mutex is |
| + * held. |
| + */ |
| + struct mutex df_enable_verity; |
| + |
| + /* |
| + * Set either at construction time or during enabling verity. In the |
| + * latter case, set via smp_store_release, so use smp_load_acquire to |
| + * read it. |
| + */ |
| + struct mtree *df_hash_tree; |
| + |
| + /* Guaranteed set if df_hash_tree is set. */ |
| + struct incfs_df_signature *df_signature; |
| + |
| + /* |
| + * The verity file digest, set when verity is enabled and the file has |
| + * been opened |
| + */ |
| + struct mem_range df_verity_file_digest; |
| + |
| + struct incfs_df_verity_signature *df_verity_signature; |
| +}; |
| + |
| +struct dir_file { |
| + struct mount_info *mount_info; |
| + |
| + struct file *backing_dir; |
| +}; |
| + |
| +struct inode_info { |
| + struct mount_info *n_mount_info; /* A mount, this file belongs to */ |
| + |
| + struct inode *n_backing_inode; |
| + |
| + struct data_file *n_file; |
| + |
| + struct inode n_vfs_inode; |
| +}; |
| + |
| +struct dentry_info { |
| + struct path backing_path; |
| +}; |
| + |
| +enum FILL_PERMISSION { |
| + CANT_FILL = 0, |
| + CAN_FILL = 1, |
| +}; |
| + |
| +struct incfs_file_data { |
| + /* Does this file handle have INCFS_IOC_FILL_BLOCKS permission */ |
| + enum FILL_PERMISSION fd_fill_permission; |
| + |
| + /* If INCFS_IOC_GET_FILLED_BLOCKS has been called, where are we */ |
| + int fd_get_block_pos; |
| + |
| + /* And how many filled blocks are there up to that point */ |
| + int fd_filled_data_blocks; |
| + int fd_filled_hash_blocks; |
| +}; |
| + |
| +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, |
| + struct mount_options *options, |
| + struct path *backing_dir_path); |
| + |
| +int incfs_realloc_mount_info(struct mount_info *mi, |
| + struct mount_options *options); |
| + |
| +void incfs_free_mount_info(struct mount_info *mi); |
| + |
| +char *file_id_to_str(incfs_uuid_t id); |
| +struct dentry *incfs_lookup_dentry(struct dentry *parent, const char *name); |
| +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); |
| +void incfs_free_data_file(struct data_file *df); |
| + |
| +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); |
| +void incfs_free_dir_file(struct dir_file *dir); |
| + |
| +struct incfs_read_data_file_timeouts { |
| + u32 min_time_us; |
| + u32 min_pending_time_us; |
| + u32 max_pending_time_us; |
| +}; |
| + |
| +ssize_t incfs_read_data_file_block(struct mem_range dst, struct file *f, |
| + int index, struct mem_range tmp, |
| + struct incfs_read_data_file_timeouts *timeouts, |
| + unsigned int *delayed_min_us); |
| + |
| +ssize_t incfs_read_merkle_tree_blocks(struct mem_range dst, |
| + struct data_file *df, size_t offset); |
| + |
| +int incfs_get_filled_blocks(struct data_file *df, |
| + struct incfs_file_data *fd, |
| + struct incfs_get_filled_blocks_args *arg); |
| + |
| +int incfs_read_file_signature(struct data_file *df, struct mem_range dst); |
| + |
| +int incfs_process_new_data_block(struct data_file *df, |
| + struct incfs_fill_block *block, u8 *data, |
| + bool *complete); |
| + |
| +int incfs_process_new_hash_block(struct data_file *df, |
| + struct incfs_fill_block *block, u8 *data); |
| + |
| +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); |
| + |
| +/* |
| + * Collects pending reads and saves them into the array (reads/reads_size). |
| + * Only reads with serial_number > sn_lowerbound are reported. |
| + * Returns how many reads were saved into the array. |
| + */ |
| +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, |
| + struct incfs_pending_read_info *reads, |
| + struct incfs_pending_read_info2 *reads2, |
| + int reads_size, int *new_max_sn); |
| + |
| +int incfs_collect_logged_reads(struct mount_info *mi, |
| + struct read_log_state *start_state, |
| + struct incfs_pending_read_info *reads, |
| + struct incfs_pending_read_info2 *reads2, |
| + int reads_size); |
| +struct read_log_state incfs_get_log_state(struct mount_info *mi); |
| +int incfs_get_uncollected_logs_count(struct mount_info *mi, |
| + const struct read_log_state *state); |
| + |
| +static inline struct inode_info *get_incfs_node(struct inode *inode) |
| +{ |
| + if (!inode) |
| + return NULL; |
| + |
| + if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) { |
| + /* This inode doesn't belong to us. */ |
| + pr_warn_once("incfs: %s on an alien inode.", __func__); |
| + return NULL; |
| + } |
| + |
| + return container_of(inode, struct inode_info, n_vfs_inode); |
| +} |
| + |
| +static inline struct data_file *get_incfs_data_file(struct file *f) |
| +{ |
| + struct inode_info *node = NULL; |
| + |
| + if (!f) |
| + return NULL; |
| + |
| + if (!S_ISREG(f->f_inode->i_mode)) |
| + return NULL; |
| + |
| + node = get_incfs_node(f->f_inode); |
| + if (!node) |
| + return NULL; |
| + |
| + return node->n_file; |
| +} |
| + |
| +static inline struct dir_file *get_incfs_dir_file(struct file *f) |
| +{ |
| + if (!f) |
| + return NULL; |
| + |
| + if (!S_ISDIR(f->f_inode->i_mode)) |
| + return NULL; |
| + |
| + return (struct dir_file *)f->private_data; |
| +} |
| + |
| +/* |
| + * Make sure that inode_info.n_file is initialized and inode can be used |
| + * for reading and writing data from/to the backing file. |
| + */ |
| +int make_inode_ready_for_data_ops(struct mount_info *mi, |
| + struct inode *inode, |
| + struct file *backing_file); |
| + |
| +static inline struct dentry_info *get_incfs_dentry(const struct dentry *d) |
| +{ |
| + if (!d) |
| + return NULL; |
| + |
| + return (struct dentry_info *)d->d_fsdata; |
| +} |
| + |
| +static inline void get_incfs_backing_path(const struct dentry *d, |
| + struct path *path) |
| +{ |
| + struct dentry_info *di = get_incfs_dentry(d); |
| + |
| + if (!di) { |
| + *path = (struct path) {}; |
| + return; |
| + } |
| + |
| + *path = di->backing_path; |
| + path_get(path); |
| +} |
| + |
| +static inline int get_blocks_count_for_size(u64 size) |
| +{ |
| + if (size == 0) |
| + return 0; |
| + return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; |
| +} |
| + |
| +#endif /* _INCFS_DATA_MGMT_H */ |
| diff --git a/fs/incfs/format.c b/fs/incfs/format.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/format.c |
| @@ -0,0 +1,752 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2018 Google LLC |
| + */ |
| +#include <linux/fs.h> |
| +#include <linux/file.h> |
| +#include <linux/types.h> |
| +#include <linux/mutex.h> |
| +#include <linux/mm.h> |
| +#include <linux/falloc.h> |
| +#include <linux/slab.h> |
| +#include <linux/crc32.h> |
| +#include <linux/kernel.h> |
| + |
| +#include "format.h" |
| +#include "data_mgmt.h" |
| + |
| +struct backing_file_context *incfs_alloc_bfc(struct mount_info *mi, |
| + struct file *backing_file) |
| +{ |
| + struct backing_file_context *result = NULL; |
| + |
| + result = kzalloc(sizeof(*result), GFP_NOFS); |
| + if (!result) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + result->bc_file = get_file(backing_file); |
| + result->bc_cred = mi->mi_owner; |
| + mutex_init(&result->bc_mutex); |
| + return result; |
| +} |
| + |
| +void incfs_free_bfc(struct backing_file_context *bfc) |
| +{ |
| + if (!bfc) |
| + return; |
| + |
| + if (bfc->bc_file) |
| + fput(bfc->bc_file); |
| + |
| + mutex_destroy(&bfc->bc_mutex); |
| + kfree(bfc); |
| +} |
| + |
| +static loff_t incfs_get_end_offset(struct file *f) |
| +{ |
| + /* |
| + * This function assumes that file size and the end-offset |
| + * are the same. This is not always true. |
| + */ |
| + return i_size_read(file_inode(f)); |
| +} |
| + |
| +/* |
| + * Truncate the tail of the file to the given length. |
| + * Used to rollback partially successful multistep writes. |
| + */ |
| +static int truncate_backing_file(struct backing_file_context *bfc, |
| + loff_t new_end) |
| +{ |
| + struct inode *inode = NULL; |
| + struct dentry *dentry = NULL; |
| + loff_t old_end = 0; |
| + struct iattr attr; |
| + int result = 0; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + if (!bfc->bc_file) |
| + return -EFAULT; |
| + |
| + old_end = incfs_get_end_offset(bfc->bc_file); |
| + if (old_end == new_end) |
| + return 0; |
| + if (old_end < new_end) |
| + return -EINVAL; |
| + |
| + inode = bfc->bc_file->f_inode; |
| + dentry = bfc->bc_file->f_path.dentry; |
| + |
| + attr.ia_size = new_end; |
| + attr.ia_valid = ATTR_SIZE; |
| + |
| + inode_lock(inode); |
| + result = notify_change(&nop_mnt_idmap, dentry, &attr, NULL); |
| + inode_unlock(inode); |
| + |
| + return result; |
| +} |
| + |
| +static int write_to_bf(struct backing_file_context *bfc, const void *buf, |
| + size_t count, loff_t pos) |
| +{ |
| + ssize_t res = incfs_kwrite(bfc, buf, count, pos); |
| + |
| + if (res < 0) |
| + return res; |
| + if (res != count) |
| + return -EIO; |
| + return 0; |
| +} |
| + |
| +static int append_zeros_no_fallocate(struct backing_file_context *bfc, |
| + size_t file_size, size_t len) |
| +{ |
| + u8 buffer[256] = {}; |
| + size_t i; |
| + |
| + for (i = 0; i < len; i += sizeof(buffer)) { |
| + int to_write = len - i > sizeof(buffer) |
| + ? sizeof(buffer) : len - i; |
| + int err = write_to_bf(bfc, buffer, to_write, file_size + i); |
| + |
| + if (err) |
| + return err; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +/* Append a given number of zero bytes to the end of the backing file. */ |
| +static int append_zeros(struct backing_file_context *bfc, size_t len) |
| +{ |
| + loff_t file_size = 0; |
| + loff_t new_last_byte_offset = 0; |
| + int result; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + if (len == 0) |
| + return 0; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + /* |
| + * Allocate only one byte at the new desired end of the file. |
| + * It will increase file size and create a zeroed area of |
| + * a given size. |
| + */ |
| + file_size = incfs_get_end_offset(bfc->bc_file); |
| + new_last_byte_offset = file_size + len - 1; |
| + result = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); |
| + if (result != -EOPNOTSUPP) |
| + return result; |
| + |
| + return append_zeros_no_fallocate(bfc, file_size, len); |
| +} |
| + |
| +/* |
| + * Append a given metadata record to the backing file and update a previous |
| + * record to add the new record the the metadata list. |
| + */ |
| +static int append_md_to_backing_file(struct backing_file_context *bfc, |
| + struct incfs_md_header *record) |
| +{ |
| + int result = 0; |
| + loff_t record_offset; |
| + loff_t file_pos; |
| + __le64 new_md_offset; |
| + size_t record_size; |
| + |
| + if (!bfc || !record) |
| + return -EFAULT; |
| + |
| + if (bfc->bc_last_md_record_offset < 0) |
| + return -EINVAL; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + record_size = le16_to_cpu(record->h_record_size); |
| + file_pos = incfs_get_end_offset(bfc->bc_file); |
| + record->h_next_md_offset = 0; |
| + |
| + /* Write the metadata record to the end of the backing file */ |
| + record_offset = file_pos; |
| + new_md_offset = cpu_to_le64(record_offset); |
| + result = write_to_bf(bfc, record, record_size, file_pos); |
| + if (result) |
| + return result; |
| + |
| + /* Update next metadata offset in a previous record or a superblock. */ |
| + if (bfc->bc_last_md_record_offset) { |
| + /* |
| + * Find a place in the previous md record where new record's |
| + * offset needs to be saved. |
| + */ |
| + file_pos = bfc->bc_last_md_record_offset + |
| + offsetof(struct incfs_md_header, h_next_md_offset); |
| + } else { |
| + /* |
| + * No metadata yet, file a place to update in the |
| + * file_header. |
| + */ |
| + file_pos = offsetof(struct incfs_file_header, |
| + fh_first_md_offset); |
| + } |
| + result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset), |
| + file_pos); |
| + if (result) |
| + return result; |
| + |
| + bfc->bc_last_md_record_offset = record_offset; |
| + return result; |
| +} |
| + |
| +/* |
| + * Reserve 0-filled space for the blockmap body, and append |
| + * incfs_blockmap metadata record pointing to it. |
| + */ |
| +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, |
| + u32 block_count) |
| +{ |
| + struct incfs_blockmap blockmap = {}; |
| + int result = 0; |
| + loff_t file_end = 0; |
| + size_t map_size = block_count * sizeof(struct incfs_blockmap_entry); |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + blockmap.m_header.h_md_entry_type = INCFS_MD_BLOCK_MAP; |
| + blockmap.m_header.h_record_size = cpu_to_le16(sizeof(blockmap)); |
| + blockmap.m_header.h_next_md_offset = cpu_to_le64(0); |
| + blockmap.m_block_count = cpu_to_le32(block_count); |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + /* Reserve 0-filled space for the blockmap body in the backing file. */ |
| + file_end = incfs_get_end_offset(bfc->bc_file); |
| + result = append_zeros(bfc, map_size); |
| + if (result) |
| + return result; |
| + |
| + /* Write blockmap metadata record pointing to the body written above. */ |
| + blockmap.m_base_offset = cpu_to_le64(file_end); |
| + result = append_md_to_backing_file(bfc, &blockmap.m_header); |
| + if (result) |
| + /* Error, rollback file changes */ |
| + truncate_backing_file(bfc, file_end); |
| + |
| + return result; |
| +} |
| + |
| +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, |
| + struct mem_range sig, u32 tree_size, |
| + loff_t *tree_offset, loff_t *sig_offset) |
| +{ |
| + struct incfs_file_signature sg = {}; |
| + int result = 0; |
| + loff_t rollback_pos = 0; |
| + loff_t tree_area_pos = 0; |
| + size_t alignment = 0; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + rollback_pos = incfs_get_end_offset(bfc->bc_file); |
| + |
| + sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; |
| + sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); |
| + sg.sg_header.h_next_md_offset = cpu_to_le64(0); |
| + if (sig.data != NULL && sig.len > 0) { |
| + sg.sg_sig_size = cpu_to_le32(sig.len); |
| + sg.sg_sig_offset = cpu_to_le64(rollback_pos); |
| + |
| + result = write_to_bf(bfc, sig.data, sig.len, rollback_pos); |
| + if (result) |
| + goto err; |
| + } |
| + |
| + tree_area_pos = incfs_get_end_offset(bfc->bc_file); |
| + if (tree_size > 0) { |
| + if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { |
| + /* |
| + * If hash tree is big enough, it makes sense to |
| + * align in the backing file for faster access. |
| + */ |
| + loff_t offset = round_up(tree_area_pos, PAGE_SIZE); |
| + |
| + alignment = offset - tree_area_pos; |
| + tree_area_pos = offset; |
| + } |
| + |
| + /* |
| + * If root hash is not the only hash in the tree. |
| + * reserve 0-filled space for the tree. |
| + */ |
| + result = append_zeros(bfc, tree_size + alignment); |
| + if (result) |
| + goto err; |
| + |
| + sg.sg_hash_tree_size = cpu_to_le32(tree_size); |
| + sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); |
| + } |
| + |
| + /* Write a hash tree metadata record pointing to the hash tree above. */ |
| + result = append_md_to_backing_file(bfc, &sg.sg_header); |
| +err: |
| + if (result) |
| + /* Error, rollback file changes */ |
| + truncate_backing_file(bfc, rollback_pos); |
| + else { |
| + if (tree_offset) |
| + *tree_offset = tree_area_pos; |
| + if (sig_offset) |
| + *sig_offset = rollback_pos; |
| + } |
| + |
| + return result; |
| +} |
| + |
| +static int write_new_status_to_backing_file(struct backing_file_context *bfc, |
| + u32 data_blocks_written, |
| + u32 hash_blocks_written) |
| +{ |
| + int result; |
| + loff_t rollback_pos; |
| + struct incfs_status is = { |
| + .is_header = { |
| + .h_md_entry_type = INCFS_MD_STATUS, |
| + .h_record_size = cpu_to_le16(sizeof(is)), |
| + }, |
| + .is_data_blocks_written = cpu_to_le32(data_blocks_written), |
| + .is_hash_blocks_written = cpu_to_le32(hash_blocks_written), |
| + }; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + rollback_pos = incfs_get_end_offset(bfc->bc_file); |
| + result = append_md_to_backing_file(bfc, &is.is_header); |
| + if (result) |
| + truncate_backing_file(bfc, rollback_pos); |
| + |
| + return result; |
| +} |
| + |
| +int incfs_write_status_to_backing_file(struct backing_file_context *bfc, |
| + loff_t status_offset, |
| + u32 data_blocks_written, |
| + u32 hash_blocks_written) |
| +{ |
| + struct incfs_status is; |
| + int result; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + if (status_offset == 0) |
| + return write_new_status_to_backing_file(bfc, |
| + data_blocks_written, hash_blocks_written); |
| + |
| + result = incfs_kread(bfc, &is, sizeof(is), status_offset); |
| + if (result != sizeof(is)) |
| + return -EIO; |
| + |
| + is.is_data_blocks_written = cpu_to_le32(data_blocks_written); |
| + is.is_hash_blocks_written = cpu_to_le32(hash_blocks_written); |
| + result = incfs_kwrite(bfc, &is, sizeof(is), status_offset); |
| + if (result != sizeof(is)) |
| + return -EIO; |
| + |
| + return 0; |
| +} |
| + |
| +int incfs_write_verity_signature_to_backing_file( |
| + struct backing_file_context *bfc, struct mem_range signature, |
| + loff_t *offset) |
| +{ |
| + struct incfs_file_verity_signature vs = {}; |
| + int result; |
| + loff_t pos; |
| + |
| + /* No verity signature section is equivalent to an empty section */ |
| + if (signature.data == NULL || signature.len == 0) |
| + return 0; |
| + |
| + pos = incfs_get_end_offset(bfc->bc_file); |
| + |
| + vs = (struct incfs_file_verity_signature) { |
| + .vs_header = (struct incfs_md_header) { |
| + .h_md_entry_type = INCFS_MD_VERITY_SIGNATURE, |
| + .h_record_size = cpu_to_le16(sizeof(vs)), |
| + .h_next_md_offset = cpu_to_le64(0), |
| + }, |
| + .vs_size = cpu_to_le32(signature.len), |
| + .vs_offset = cpu_to_le64(pos), |
| + }; |
| + |
| + result = write_to_bf(bfc, signature.data, signature.len, pos); |
| + if (result) |
| + goto err; |
| + |
| + result = append_md_to_backing_file(bfc, &vs.vs_header); |
| + if (result) |
| + goto err; |
| + |
| + *offset = pos; |
| +err: |
| + if (result) |
| + /* Error, rollback file changes */ |
| + truncate_backing_file(bfc, pos); |
| + return result; |
| +} |
| + |
| +/* |
| + * Write a backing file header |
| + * It should always be called only on empty file. |
| + * fh.fh_first_md_offset is 0 for now, but will be updated |
| + * once first metadata record is added. |
| + */ |
| +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, |
| + incfs_uuid_t *uuid, u64 file_size) |
| +{ |
| + struct incfs_file_header fh = {}; |
| + loff_t file_pos = 0; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); |
| + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); |
| + fh.fh_header_size = cpu_to_le16(sizeof(fh)); |
| + fh.fh_first_md_offset = cpu_to_le64(0); |
| + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); |
| + |
| + fh.fh_file_size = cpu_to_le64(file_size); |
| + fh.fh_uuid = *uuid; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + file_pos = incfs_get_end_offset(bfc->bc_file); |
| + if (file_pos != 0) |
| + return -EEXIST; |
| + |
| + return write_to_bf(bfc, &fh, sizeof(fh), file_pos); |
| +} |
| + |
| +/* |
| + * Write a backing file header for a mapping file |
| + * It should always be called only on empty file. |
| + */ |
| +int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, |
| + incfs_uuid_t *uuid, u64 file_size, u64 offset) |
| +{ |
| + struct incfs_file_header fh = {}; |
| + loff_t file_pos = 0; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); |
| + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); |
| + fh.fh_header_size = cpu_to_le16(sizeof(fh)); |
| + fh.fh_original_offset = cpu_to_le64(offset); |
| + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); |
| + |
| + fh.fh_mapped_file_size = cpu_to_le64(file_size); |
| + fh.fh_original_uuid = *uuid; |
| + fh.fh_flags = cpu_to_le32(INCFS_FILE_MAPPED); |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + file_pos = incfs_get_end_offset(bfc->bc_file); |
| + if (file_pos != 0) |
| + return -EEXIST; |
| + |
| + return write_to_bf(bfc, &fh, sizeof(fh), file_pos); |
| +} |
| + |
| +/* Write a given data block and update file's blockmap to point it. */ |
| +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, |
| + struct mem_range block, int block_index, |
| + loff_t bm_base_off, u16 flags) |
| +{ |
| + struct incfs_blockmap_entry bm_entry = {}; |
| + int result = 0; |
| + loff_t data_offset = 0; |
| + loff_t bm_entry_off = |
| + bm_base_off + sizeof(struct incfs_blockmap_entry) * block_index; |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + if (block.len >= (1 << 16) || block_index < 0) |
| + return -EINVAL; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + data_offset = incfs_get_end_offset(bfc->bc_file); |
| + if (data_offset <= bm_entry_off) { |
| + /* Blockmap entry is beyond the file's end. It is not normal. */ |
| + return -EINVAL; |
| + } |
| + |
| + /* Write the block data at the end of the backing file. */ |
| + result = write_to_bf(bfc, block.data, block.len, data_offset); |
| + if (result) |
| + return result; |
| + |
| + /* Update the blockmap to point to the newly written data. */ |
| + bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); |
| + bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); |
| + bm_entry.me_data_size = cpu_to_le16((u16)block.len); |
| + bm_entry.me_flags = cpu_to_le16(flags); |
| + |
| + return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), |
| + bm_entry_off); |
| +} |
| + |
| +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, |
| + struct mem_range block, |
| + int block_index, |
| + loff_t hash_area_off, |
| + loff_t bm_base_off, |
| + loff_t file_size) |
| +{ |
| + struct incfs_blockmap_entry bm_entry = {}; |
| + int result; |
| + loff_t data_offset = 0; |
| + loff_t file_end = 0; |
| + loff_t bm_entry_off = |
| + bm_base_off + |
| + sizeof(struct incfs_blockmap_entry) * |
| + (block_index + get_blocks_count_for_size(file_size)); |
| + |
| + if (!bfc) |
| + return -EFAULT; |
| + |
| + LOCK_REQUIRED(bfc->bc_mutex); |
| + |
| + data_offset = hash_area_off + block_index * INCFS_DATA_FILE_BLOCK_SIZE; |
| + file_end = incfs_get_end_offset(bfc->bc_file); |
| + if (data_offset + block.len > file_end) { |
| + /* Block is located beyond the file's end. It is not normal. */ |
| + return -EINVAL; |
| + } |
| + |
| + result = write_to_bf(bfc, block.data, block.len, data_offset); |
| + if (result) |
| + return result; |
| + |
| + bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); |
| + bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); |
| + bm_entry.me_data_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); |
| + |
| + return write_to_bf(bfc, &bm_entry, sizeof(bm_entry), bm_entry_off); |
| +} |
| + |
| +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, |
| + loff_t bm_base_off, |
| + struct incfs_blockmap_entry *bm_entry) |
| +{ |
| + int error = incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, |
| + bm_base_off); |
| + |
| + if (error < 0) |
| + return error; |
| + |
| + if (error == 0) |
| + return -EIO; |
| + |
| + if (error != 1) |
| + return -EFAULT; |
| + |
| + return 0; |
| +} |
| + |
| +int incfs_read_blockmap_entries(struct backing_file_context *bfc, |
| + struct incfs_blockmap_entry *entries, |
| + int start_index, int blocks_number, |
| + loff_t bm_base_off) |
| +{ |
| + loff_t bm_entry_off = |
| + bm_base_off + sizeof(struct incfs_blockmap_entry) * start_index; |
| + const size_t bytes_to_read = sizeof(struct incfs_blockmap_entry) |
| + * blocks_number; |
| + int result = 0; |
| + |
| + if (!bfc || !entries) |
| + return -EFAULT; |
| + |
| + if (start_index < 0 || bm_base_off <= 0) |
| + return -ENODATA; |
| + |
| + result = incfs_kread(bfc, entries, bytes_to_read, bm_entry_off); |
| + if (result < 0) |
| + return result; |
| + return result / sizeof(*entries); |
| +} |
| + |
| +int incfs_read_file_header(struct backing_file_context *bfc, |
| + loff_t *first_md_off, incfs_uuid_t *uuid, |
| + u64 *file_size, u32 *flags) |
| +{ |
| + ssize_t bytes_read = 0; |
| + struct incfs_file_header fh = {}; |
| + |
| + if (!bfc || !first_md_off) |
| + return -EFAULT; |
| + |
| + bytes_read = incfs_kread(bfc, &fh, sizeof(fh), 0); |
| + if (bytes_read < 0) |
| + return bytes_read; |
| + |
| + if (bytes_read < sizeof(fh)) |
| + return -EBADMSG; |
| + |
| + if (le64_to_cpu(fh.fh_magic) != INCFS_MAGIC_NUMBER) |
| + return -EILSEQ; |
| + |
| + if (le64_to_cpu(fh.fh_version) > INCFS_FORMAT_CURRENT_VER) |
| + return -EILSEQ; |
| + |
| + if (le16_to_cpu(fh.fh_data_block_size) != INCFS_DATA_FILE_BLOCK_SIZE) |
| + return -EILSEQ; |
| + |
| + if (le16_to_cpu(fh.fh_header_size) != sizeof(fh)) |
| + return -EILSEQ; |
| + |
| + if (first_md_off) |
| + *first_md_off = le64_to_cpu(fh.fh_first_md_offset); |
| + if (uuid) |
| + *uuid = fh.fh_uuid; |
| + if (file_size) |
| + *file_size = le64_to_cpu(fh.fh_file_size); |
| + if (flags) |
| + *flags = le32_to_cpu(fh.fh_flags); |
| + return 0; |
| +} |
| + |
| +/* |
| + * Read through metadata records from the backing file one by one |
| + * and call provided metadata handlers. |
| + */ |
| +int incfs_read_next_metadata_record(struct backing_file_context *bfc, |
| + struct metadata_handler *handler) |
| +{ |
| + const ssize_t max_md_size = INCFS_MAX_METADATA_RECORD_SIZE; |
| + ssize_t bytes_read = 0; |
| + size_t md_record_size = 0; |
| + loff_t next_record = 0; |
| + int res = 0; |
| + struct incfs_md_header *md_hdr = NULL; |
| + |
| + if (!bfc || !handler) |
| + return -EFAULT; |
| + |
| + if (handler->md_record_offset == 0) |
| + return -EPERM; |
| + |
| + memset(&handler->md_buffer, 0, max_md_size); |
| + bytes_read = incfs_kread(bfc, &handler->md_buffer, max_md_size, |
| + handler->md_record_offset); |
| + if (bytes_read < 0) |
| + return bytes_read; |
| + if (bytes_read < sizeof(*md_hdr)) |
| + return -EBADMSG; |
| + |
| + md_hdr = &handler->md_buffer.md_header; |
| + next_record = le64_to_cpu(md_hdr->h_next_md_offset); |
| + md_record_size = le16_to_cpu(md_hdr->h_record_size); |
| + |
| + if (md_record_size > max_md_size) { |
| + pr_warn("incfs: The record is too large. Size: %zu", |
| + md_record_size); |
| + return -EBADMSG; |
| + } |
| + |
| + if (bytes_read < md_record_size) { |
| + pr_warn("incfs: The record hasn't been fully read."); |
| + return -EBADMSG; |
| + } |
| + |
| + if (next_record <= handler->md_record_offset && next_record != 0) { |
| + pr_warn("incfs: Next record (%lld) points back in file.", |
| + next_record); |
| + return -EBADMSG; |
| + } |
| + |
| + switch (md_hdr->h_md_entry_type) { |
| + case INCFS_MD_NONE: |
| + break; |
| + case INCFS_MD_BLOCK_MAP: |
| + if (handler->handle_blockmap) |
| + res = handler->handle_blockmap( |
| + &handler->md_buffer.blockmap, handler); |
| + break; |
| + case INCFS_MD_FILE_ATTR: |
| + /* |
| + * File attrs no longer supported, ignore section for |
| + * compatibility |
| + */ |
| + break; |
| + case INCFS_MD_SIGNATURE: |
| + if (handler->handle_signature) |
| + res = handler->handle_signature( |
| + &handler->md_buffer.signature, handler); |
| + break; |
| + case INCFS_MD_STATUS: |
| + if (handler->handle_status) |
| + res = handler->handle_status( |
| + &handler->md_buffer.status, handler); |
| + break; |
| + case INCFS_MD_VERITY_SIGNATURE: |
| + if (handler->handle_verity_signature) |
| + res = handler->handle_verity_signature( |
| + &handler->md_buffer.verity_signature, handler); |
| + break; |
| + default: |
| + res = -ENOTSUPP; |
| + break; |
| + } |
| + |
| + if (!res) { |
| + if (next_record == 0) { |
| + /* |
| + * Zero offset for the next record means that the last |
| + * metadata record has just been processed. |
| + */ |
| + bfc->bc_last_md_record_offset = |
| + handler->md_record_offset; |
| + } |
| + handler->md_prev_record_offset = handler->md_record_offset; |
| + handler->md_record_offset = next_record; |
| + } |
| + return res; |
| +} |
| + |
| +ssize_t incfs_kread(struct backing_file_context *bfc, void *buf, size_t size, |
| + loff_t pos) |
| +{ |
| + const struct cred *old_cred = override_creds(bfc->bc_cred); |
| + int ret = kernel_read(bfc->bc_file, buf, size, &pos); |
| + |
| + revert_creds(old_cred); |
| + return ret; |
| +} |
| + |
| +ssize_t incfs_kwrite(struct backing_file_context *bfc, const void *buf, |
| + size_t size, loff_t pos) |
| +{ |
| + const struct cred *old_cred = override_creds(bfc->bc_cred); |
| + int ret = kernel_write(bfc->bc_file, buf, size, &pos); |
| + |
| + revert_creds(old_cred); |
| + return ret; |
| +} |
| diff --git a/fs/incfs/format.h b/fs/incfs/format.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/format.h |
| @@ -0,0 +1,408 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2018 Google LLC |
| + */ |
| + |
| +/* |
| + * Overview |
| + * -------- |
| + * The backbone of the incremental-fs ondisk format is an append only linked |
| + * list of metadata blocks. Each metadata block contains an offset of the next |
| + * one. These blocks describe files and directories on the |
| + * file system. They also represent actions of adding and removing file names |
| + * (hard links). |
| + * |
| + * Every time incremental-fs instance is mounted, it reads through this list |
| + * to recreate filesystem's state in memory. An offset of the first record in |
| + * the metadata list is stored in the superblock at the beginning of the backing |
| + * file. |
| + * |
| + * Most of the backing file is taken by data areas and blockmaps. |
| + * Since data blocks can be compressed and have different sizes, |
| + * single per-file data area can't be pre-allocated. That's why blockmaps are |
| + * needed in order to find a location and size of each data block in |
| + * the backing file. Each time a file is created, a corresponding block map is |
| + * allocated to store future offsets of data blocks. |
| + * |
| + * Whenever a data block is given by data loader to incremental-fs: |
| + * - A data area with the given block is appended to the end of |
| + * the backing file. |
| + * - A record in the blockmap for the given block index is updated to reflect |
| + * its location, size, and compression algorithm. |
| + |
| + * Metadata records |
| + * ---------------- |
| + * incfs_blockmap - metadata record that specifies size and location |
| + * of a blockmap area for a given file. This area |
| + * contains an array of incfs_blockmap_entry-s. |
| + * incfs_file_signature - metadata record that specifies where file signature |
| + * and its hash tree can be found in the backing file. |
| + * |
| + * incfs_file_attr - metadata record that specifies where additional file |
| + * attributes blob can be found. |
| + * |
| + * Metadata header |
| + * --------------- |
| + * incfs_md_header - header of a metadata record. It's always a part |
| + * of other structures and served purpose of metadata |
| + * bookkeeping. |
| + * |
| + * +-----------------------------------------------+ ^ |
| + * | incfs_md_header | | |
| + * | 1. type of body(BLOCKMAP, FILE_ATTR..) | | |
| + * | 2. size of the whole record header + body | | |
| + * | 3. CRC the whole record header + body | | |
| + * | 4. offset of the previous md record |]------+ |
| + * | 5. offset of the next md record (md link) |]---+ |
| + * +-----------------------------------------------+ | |
| + * | Metadata record body with useful data | | |
| + * +-----------------------------------------------+ | |
| + * +---> |
| + * |
| + * Other ondisk structures |
| + * ----------------------- |
| + * incfs_super_block - backing file header |
| + * incfs_blockmap_entry - a record in a blockmap area that describes size |
| + * and location of a data block. |
| + * Data blocks dont have any particular structure, they are written to the |
| + * backing file in a raw form as they come from a data loader. |
| + * |
| + * Backing file layout |
| + * ------------------- |
| + * |
| + * |
| + * +-------------------------------------------+ |
| + * | incfs_file_header |]---+ |
| + * +-------------------------------------------+ | |
| + * | metadata |<---+ |
| + * | incfs_file_signature |]---+ |
| + * +-------------------------------------------+ | |
| + * ......................... | |
| + * +-------------------------------------------+ | metadata |
| + * +------->| blockmap area | | list links |
| + * | | [incfs_blockmap_entry] | | |
| + * | | [incfs_blockmap_entry] | | |
| + * | | [incfs_blockmap_entry] | | |
| + * | +--[| [incfs_blockmap_entry] | | |
| + * | | | [incfs_blockmap_entry] | | |
| + * | | | [incfs_blockmap_entry] | | |
| + * | | +-------------------------------------------+ | |
| + * | | ......................... | |
| + * | | +-------------------------------------------+ | |
| + * | | | metadata |<---+ |
| + * +----|--[| incfs_blockmap |]---+ |
| + * | +-------------------------------------------+ | |
| + * | ......................... | |
| + * | +-------------------------------------------+ | |
| + * +-->| data block | | |
| + * +-------------------------------------------+ | |
| + * ......................... | |
| + * +-------------------------------------------+ | |
| + * | metadata |<---+ |
| + * | incfs_file_attr | |
| + * +-------------------------------------------+ |
| + */ |
| +#ifndef _INCFS_FORMAT_H |
| +#define _INCFS_FORMAT_H |
| +#include <linux/types.h> |
| +#include <linux/kernel.h> |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "internal.h" |
| + |
| +#define INCFS_MAX_NAME_LEN 255 |
| +#define INCFS_FORMAT_V1 1 |
| +#define INCFS_FORMAT_CURRENT_VER INCFS_FORMAT_V1 |
| + |
| +enum incfs_metadata_type { |
| + INCFS_MD_NONE = 0, |
| + INCFS_MD_BLOCK_MAP = 1, |
| + INCFS_MD_FILE_ATTR = 2, |
| + INCFS_MD_SIGNATURE = 3, |
| + INCFS_MD_STATUS = 4, |
| + INCFS_MD_VERITY_SIGNATURE = 5, |
| +}; |
| + |
| +enum incfs_file_header_flags { |
| + INCFS_FILE_MAPPED = 1 << 1, |
| +}; |
| + |
| +/* Header included at the beginning of all metadata records on the disk. */ |
| +struct incfs_md_header { |
| + __u8 h_md_entry_type; |
| + |
| + /* |
| + * Size of the metadata record. |
| + * (e.g. inode, dir entry etc) not just this struct. |
| + */ |
| + __le16 h_record_size; |
| + |
| + /* |
| + * Was: CRC32 of the metadata record. |
| + * (e.g. inode, dir entry etc) not just this struct. |
| + */ |
| + __le32 h_unused1; |
| + |
| + /* Offset of the next metadata entry if any */ |
| + __le64 h_next_md_offset; |
| + |
| + /* Was: Offset of the previous metadata entry if any */ |
| + __le64 h_unused2; |
| + |
| +} __packed; |
| + |
| +/* Backing file header */ |
| +struct incfs_file_header { |
| + /* Magic number: INCFS_MAGIC_NUMBER */ |
| + __le64 fh_magic; |
| + |
| + /* Format version: INCFS_FORMAT_CURRENT_VER */ |
| + __le64 fh_version; |
| + |
| + /* sizeof(incfs_file_header) */ |
| + __le16 fh_header_size; |
| + |
| + /* INCFS_DATA_FILE_BLOCK_SIZE */ |
| + __le16 fh_data_block_size; |
| + |
| + /* File flags, from incfs_file_header_flags */ |
| + __le32 fh_flags; |
| + |
| + union { |
| + /* Standard incfs file */ |
| + struct { |
| + /* Offset of the first metadata record */ |
| + __le64 fh_first_md_offset; |
| + |
| + /* Full size of the file's content */ |
| + __le64 fh_file_size; |
| + |
| + /* File uuid */ |
| + incfs_uuid_t fh_uuid; |
| + }; |
| + |
| + /* Mapped file - INCFS_FILE_MAPPED set in fh_flags */ |
| + struct { |
| + /* Offset in original file */ |
| + __le64 fh_original_offset; |
| + |
| + /* Full size of the file's content */ |
| + __le64 fh_mapped_file_size; |
| + |
| + /* Original file's uuid */ |
| + incfs_uuid_t fh_original_uuid; |
| + }; |
| + }; |
| +} __packed; |
| + |
| +enum incfs_block_map_entry_flags { |
| + INCFS_BLOCK_COMPRESSED_LZ4 = 1, |
| + INCFS_BLOCK_COMPRESSED_ZSTD = 2, |
| + |
| + /* Reserve 3 bits for compression alg */ |
| + INCFS_BLOCK_COMPRESSED_MASK = 7, |
| +}; |
| + |
| +/* Block map entry pointing to an actual location of the data block. */ |
| +struct incfs_blockmap_entry { |
| + /* Offset of the actual data block. Lower 32 bits */ |
| + __le32 me_data_offset_lo; |
| + |
| + /* Offset of the actual data block. Higher 16 bits */ |
| + __le16 me_data_offset_hi; |
| + |
| + /* How many bytes the data actually occupies in the backing file */ |
| + __le16 me_data_size; |
| + |
| + /* Block flags from incfs_block_map_entry_flags */ |
| + __le16 me_flags; |
| +} __packed; |
| + |
| +/* Metadata record for locations of file blocks. Type = INCFS_MD_BLOCK_MAP */ |
| +struct incfs_blockmap { |
| + struct incfs_md_header m_header; |
| + |
| + /* Base offset of the array of incfs_blockmap_entry */ |
| + __le64 m_base_offset; |
| + |
| + /* Size of the map entry array in blocks */ |
| + __le32 m_block_count; |
| +} __packed; |
| + |
| +/* |
| + * Metadata record for file signature. Type = INCFS_MD_SIGNATURE |
| + * |
| + * The signature stored here is the APK V4 signature data blob. See the |
| + * definition of incfs_new_file_args::signature_info for an explanation of this |
| + * blob. Specifically, it contains the root hash, but it does *not* contain |
| + * anything that the kernel treats as a signature. |
| + * |
| + * When FS_IOC_ENABLE_VERITY is called on a file without this record, an APK V4 |
| + * signature blob and a hash tree are added to the file, and then this metadata |
| + * record is created to record their locations. |
| + */ |
| +struct incfs_file_signature { |
| + struct incfs_md_header sg_header; |
| + |
| + __le32 sg_sig_size; /* The size of the signature. */ |
| + |
| + __le64 sg_sig_offset; /* Signature's offset in the backing file */ |
| + |
| + __le32 sg_hash_tree_size; /* The size of the hash tree. */ |
| + |
| + __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ |
| +} __packed; |
| + |
| +/* In memory version of above */ |
| +struct incfs_df_signature { |
| + u32 sig_size; |
| + u64 sig_offset; |
| + u32 hash_size; |
| + u64 hash_offset; |
| +}; |
| + |
| +struct incfs_status { |
| + struct incfs_md_header is_header; |
| + |
| + __le32 is_data_blocks_written; /* Number of data blocks written */ |
| + |
| + __le32 is_hash_blocks_written; /* Number of hash blocks written */ |
| + |
| + __le32 is_dummy[6]; /* Spare fields */ |
| +} __packed; |
| + |
| +/* |
| + * Metadata record for verity signature. Type = INCFS_MD_VERITY_SIGNATURE |
| + * |
| + * This record will only exist for verity-enabled files with signatures. Verity |
| + * enabled files without signatures do not have this record. |
| + * |
| + * This is obsolete, as incfs no longer checks this type of signature. |
| + */ |
| +struct incfs_file_verity_signature { |
| + struct incfs_md_header vs_header; |
| + |
| + /* The size of the signature */ |
| + __le32 vs_size; |
| + |
| + /* Signature's offset in the backing file */ |
| + __le64 vs_offset; |
| +} __packed; |
| + |
| +/* In memory version of above */ |
| +struct incfs_df_verity_signature { |
| + u32 size; |
| + u64 offset; |
| +}; |
| + |
| +/* State of the backing file. */ |
| +struct backing_file_context { |
| + /* Protects writes to bc_file */ |
| + struct mutex bc_mutex; |
| + |
| + /* File object to read data from */ |
| + struct file *bc_file; |
| + |
| + /* |
| + * Offset of the last known metadata record in the backing file. |
| + * 0 means there are no metadata records. |
| + */ |
| + loff_t bc_last_md_record_offset; |
| + |
| + /* |
| + * Credentials to set before reads/writes |
| + * Note that this is a pointer to the mount_info mi_owner field so |
| + * there is no need to get/put the creds |
| + */ |
| + const struct cred *bc_cred; |
| +}; |
| + |
| +struct metadata_handler { |
| + loff_t md_record_offset; |
| + loff_t md_prev_record_offset; |
| + void *context; |
| + |
| + union { |
| + struct incfs_md_header md_header; |
| + struct incfs_blockmap blockmap; |
| + struct incfs_file_signature signature; |
| + struct incfs_status status; |
| + struct incfs_file_verity_signature verity_signature; |
| + } md_buffer; |
| + |
| + int (*handle_blockmap)(struct incfs_blockmap *bm, |
| + struct metadata_handler *handler); |
| + int (*handle_signature)(struct incfs_file_signature *sig, |
| + struct metadata_handler *handler); |
| + int (*handle_status)(struct incfs_status *sig, |
| + struct metadata_handler *handler); |
| + int (*handle_verity_signature)(struct incfs_file_verity_signature *s, |
| + struct metadata_handler *handler); |
| +}; |
| +#define INCFS_MAX_METADATA_RECORD_SIZE \ |
| + sizeof_field(struct metadata_handler, md_buffer) |
| + |
| +/* Backing file context management */ |
| +struct mount_info; |
| +struct backing_file_context *incfs_alloc_bfc(struct mount_info *mi, |
| + struct file *backing_file); |
| + |
| +void incfs_free_bfc(struct backing_file_context *bfc); |
| + |
| +/* Writing stuff */ |
| +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, |
| + u32 block_count); |
| + |
| +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, |
| + incfs_uuid_t *uuid, u64 file_size); |
| + |
| +int incfs_write_mapping_fh_to_backing_file(struct backing_file_context *bfc, |
| + incfs_uuid_t *uuid, u64 file_size, u64 offset); |
| + |
| +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, |
| + struct mem_range block, |
| + int block_index, loff_t bm_base_off, |
| + u16 flags); |
| + |
| +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, |
| + struct mem_range block, |
| + int block_index, |
| + loff_t hash_area_off, |
| + loff_t bm_base_off, |
| + loff_t file_size); |
| + |
| +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, |
| + struct mem_range sig, u32 tree_size, |
| + loff_t *tree_offset, loff_t *sig_offset); |
| + |
| +int incfs_write_status_to_backing_file(struct backing_file_context *bfc, |
| + loff_t status_offset, |
| + u32 data_blocks_written, |
| + u32 hash_blocks_written); |
| +int incfs_write_verity_signature_to_backing_file( |
| + struct backing_file_context *bfc, struct mem_range signature, |
| + loff_t *offset); |
| + |
| +/* Reading stuff */ |
| +int incfs_read_file_header(struct backing_file_context *bfc, |
| + loff_t *first_md_off, incfs_uuid_t *uuid, |
| + u64 *file_size, u32 *flags); |
| + |
| +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, |
| + loff_t bm_base_off, |
| + struct incfs_blockmap_entry *bm_entry); |
| + |
| +int incfs_read_blockmap_entries(struct backing_file_context *bfc, |
| + struct incfs_blockmap_entry *entries, |
| + int start_index, int blocks_number, |
| + loff_t bm_base_off); |
| + |
| +int incfs_read_next_metadata_record(struct backing_file_context *bfc, |
| + struct metadata_handler *handler); |
| + |
| +ssize_t incfs_kread(struct backing_file_context *bfc, void *buf, size_t size, |
| + loff_t pos); |
| +ssize_t incfs_kwrite(struct backing_file_context *bfc, const void *buf, |
| + size_t size, loff_t pos); |
| + |
| +#endif /* _INCFS_FORMAT_H */ |
| diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/integrity.c |
| @@ -0,0 +1,235 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2019 Google LLC |
| + */ |
| +#include <crypto/sha2.h> |
| +#include <crypto/hash.h> |
| +#include <linux/err.h> |
| +#include <linux/version.h> |
| + |
| +#include "integrity.h" |
| + |
| +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) |
| +{ |
| + static struct incfs_hash_alg sha256 = { |
| + .name = "sha256", |
| + .digest_size = SHA256_DIGEST_SIZE, |
| + .id = INCFS_HASH_TREE_SHA256 |
| + }; |
| + struct incfs_hash_alg *result = NULL; |
| + struct crypto_shash *shash; |
| + |
| + if (id == INCFS_HASH_TREE_SHA256) { |
| + BUILD_BUG_ON(INCFS_MAX_HASH_SIZE < SHA256_DIGEST_SIZE); |
| + result = &sha256; |
| + } |
| + |
| + if (result == NULL) |
| + return ERR_PTR(-ENOENT); |
| + |
| + /* pairs with cmpxchg_release() below */ |
| + shash = smp_load_acquire(&result->shash); |
| + if (shash) |
| + return result; |
| + |
| + shash = crypto_alloc_shash(result->name, 0, 0); |
| + if (IS_ERR(shash)) { |
| + int err = PTR_ERR(shash); |
| + |
| + pr_err("Can't allocate hash alg %s, error code:%d", |
| + result->name, err); |
| + return ERR_PTR(err); |
| + } |
| + |
| + /* pairs with smp_load_acquire() above */ |
| + if (cmpxchg_release(&result->shash, NULL, shash) != NULL) |
| + crypto_free_shash(shash); |
| + |
| + return result; |
| +} |
| + |
| +struct signature_info { |
| + u32 version; |
| + enum incfs_hash_tree_algorithm hash_algorithm; |
| + u8 log2_blocksize; |
| + struct mem_range salt; |
| + struct mem_range root_hash; |
| +}; |
| + |
| +static bool read_u32(u8 **p, u8 *top, u32 *result) |
| +{ |
| + if (*p + sizeof(u32) > top) |
| + return false; |
| + |
| + *result = le32_to_cpu(*(__le32 *)*p); |
| + *p += sizeof(u32); |
| + return true; |
| +} |
| + |
| +static bool read_u8(u8 **p, u8 *top, u8 *result) |
| +{ |
| + if (*p + sizeof(u8) > top) |
| + return false; |
| + |
| + *result = *(u8 *)*p; |
| + *p += sizeof(u8); |
| + return true; |
| +} |
| + |
| +static bool read_mem_range(u8 **p, u8 *top, struct mem_range *range) |
| +{ |
| + u32 len; |
| + |
| + if (!read_u32(p, top, &len) || *p + len > top) |
| + return false; |
| + |
| + range->len = len; |
| + range->data = *p; |
| + *p += len; |
| + return true; |
| +} |
| + |
| +static int incfs_parse_signature(struct mem_range signature, |
| + struct signature_info *si) |
| +{ |
| + u8 *p = signature.data; |
| + u8 *top = signature.data + signature.len; |
| + u32 hash_section_size; |
| + |
| + if (signature.len > INCFS_MAX_SIGNATURE_SIZE) |
| + return -EINVAL; |
| + |
| + if (!read_u32(&p, top, &si->version) || |
| + si->version != INCFS_SIGNATURE_VERSION) |
| + return -EINVAL; |
| + |
| + if (!read_u32(&p, top, &hash_section_size) || |
| + p + hash_section_size > top) |
| + return -EINVAL; |
| + top = p + hash_section_size; |
| + |
| + if (!read_u32(&p, top, &si->hash_algorithm) || |
| + si->hash_algorithm != INCFS_HASH_TREE_SHA256) |
| + return -EINVAL; |
| + |
| + if (!read_u8(&p, top, &si->log2_blocksize) || si->log2_blocksize != 12) |
| + return -EINVAL; |
| + |
| + if (!read_mem_range(&p, top, &si->salt)) |
| + return -EINVAL; |
| + |
| + if (!read_mem_range(&p, top, &si->root_hash)) |
| + return -EINVAL; |
| + |
| + if (p != top) |
| + return -EINVAL; |
| + |
| + return 0; |
| +} |
| + |
| +struct mtree *incfs_alloc_mtree(struct mem_range signature, |
| + int data_block_count) |
| +{ |
| + int error; |
| + struct signature_info si; |
| + struct mtree *result = NULL; |
| + struct incfs_hash_alg *hash_alg = NULL; |
| + int hash_per_block; |
| + int lvl; |
| + int total_blocks = 0; |
| + int blocks_in_level[INCFS_MAX_MTREE_LEVELS]; |
| + int blocks = data_block_count; |
| + |
| + if (data_block_count <= 0) |
| + return ERR_PTR(-EINVAL); |
| + |
| + error = incfs_parse_signature(signature, &si); |
| + if (error) |
| + return ERR_PTR(error); |
| + |
| + hash_alg = incfs_get_hash_alg(si.hash_algorithm); |
| + if (IS_ERR(hash_alg)) |
| + return ERR_PTR(PTR_ERR(hash_alg)); |
| + |
| + if (si.root_hash.len < hash_alg->digest_size) |
| + return ERR_PTR(-EINVAL); |
| + |
| + result = kzalloc(sizeof(*result), GFP_NOFS); |
| + if (!result) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + result->alg = hash_alg; |
| + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / result->alg->digest_size; |
| + |
| + /* Calculating tree geometry. */ |
| + /* First pass: calculate how many blocks in each tree level. */ |
| + for (lvl = 0; blocks > 1; lvl++) { |
| + if (lvl >= INCFS_MAX_MTREE_LEVELS) { |
| + pr_err("incfs: too much data in mtree"); |
| + goto err; |
| + } |
| + |
| + blocks = (blocks + hash_per_block - 1) / hash_per_block; |
| + blocks_in_level[lvl] = blocks; |
| + total_blocks += blocks; |
| + } |
| + result->depth = lvl; |
| + result->hash_tree_area_size = total_blocks * INCFS_DATA_FILE_BLOCK_SIZE; |
| + if (result->hash_tree_area_size > INCFS_MAX_HASH_AREA_SIZE) |
| + goto err; |
| + |
| + blocks = 0; |
| + /* Second pass: calculate offset of each level. 0th level goes last. */ |
| + for (lvl = 0; lvl < result->depth; lvl++) { |
| + u32 suboffset; |
| + |
| + blocks += blocks_in_level[lvl]; |
| + suboffset = (total_blocks - blocks) |
| + * INCFS_DATA_FILE_BLOCK_SIZE; |
| + |
| + result->hash_level_suboffset[lvl] = suboffset; |
| + } |
| + |
| + /* Root hash is stored separately from the rest of the tree. */ |
| + memcpy(result->root_hash, si.root_hash.data, hash_alg->digest_size); |
| + return result; |
| + |
| +err: |
| + kfree(result); |
| + return ERR_PTR(-E2BIG); |
| +} |
| + |
| +void incfs_free_mtree(struct mtree *tree) |
| +{ |
| + kfree(tree); |
| +} |
| + |
| +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, |
| + struct mem_range digest) |
| +{ |
| + SHASH_DESC_ON_STACK(desc, alg->shash); |
| + |
| + if (!alg || !alg->shash || !data.data || !digest.data) |
| + return -EFAULT; |
| + |
| + if (alg->digest_size > digest.len) |
| + return -EINVAL; |
| + |
| + desc->tfm = alg->shash; |
| + |
| + if (data.len < INCFS_DATA_FILE_BLOCK_SIZE) { |
| + int err; |
| + void *buf = kzalloc(INCFS_DATA_FILE_BLOCK_SIZE, GFP_NOFS); |
| + |
| + if (!buf) |
| + return -ENOMEM; |
| + |
| + memcpy(buf, data.data, data.len); |
| + err = crypto_shash_digest(desc, buf, INCFS_DATA_FILE_BLOCK_SIZE, |
| + digest.data); |
| + kfree(buf); |
| + return err; |
| + } |
| + return crypto_shash_digest(desc, data.data, data.len, digest.data); |
| +} |
| + |
| diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/integrity.h |
| @@ -0,0 +1,56 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2019 Google LLC |
| + */ |
| +#ifndef _INCFS_INTEGRITY_H |
| +#define _INCFS_INTEGRITY_H |
| +#include <linux/types.h> |
| +#include <linux/kernel.h> |
| +#include <crypto/hash.h> |
| + |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "internal.h" |
| + |
| +#define INCFS_MAX_MTREE_LEVELS 8 |
| +#define INCFS_MAX_HASH_AREA_SIZE (1280 * 1024 * 1024) |
| + |
| +struct incfs_hash_alg { |
| + const char *name; |
| + int digest_size; |
| + enum incfs_hash_tree_algorithm id; |
| + |
| + struct crypto_shash *shash; |
| +}; |
| + |
| +/* Merkle tree structure. */ |
| +struct mtree { |
| + struct incfs_hash_alg *alg; |
| + |
| + u8 root_hash[INCFS_MAX_HASH_SIZE]; |
| + |
| + /* Offset of each hash level in the hash area. */ |
| + u32 hash_level_suboffset[INCFS_MAX_MTREE_LEVELS]; |
| + |
| + u32 hash_tree_area_size; |
| + |
| + /* Number of levels in hash_level_suboffset */ |
| + int depth; |
| +}; |
| + |
| +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); |
| + |
| +struct mtree *incfs_alloc_mtree(struct mem_range signature, |
| + int data_block_count); |
| + |
| +void incfs_free_mtree(struct mtree *tree); |
| + |
| +size_t incfs_get_mtree_depth(enum incfs_hash_tree_algorithm alg, loff_t size); |
| + |
| +size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, |
| + loff_t size); |
| + |
| +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, |
| + struct mem_range digest); |
| + |
| +#endif /* _INCFS_INTEGRITY_H */ |
| diff --git a/fs/incfs/internal.h b/fs/incfs/internal.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/internal.h |
| @@ -0,0 +1,23 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2018 Google LLC |
| + */ |
| +#ifndef _INCFS_INTERNAL_H |
| +#define _INCFS_INTERNAL_H |
| +#include <linux/types.h> |
| + |
| +struct mem_range { |
| + u8 *data; |
| + size_t len; |
| +}; |
| + |
| +static inline struct mem_range range(u8 *data, size_t len) |
| +{ |
| + return (struct mem_range){ .data = data, .len = len }; |
| +} |
| + |
| +#define LOCK_REQUIRED(lock) WARN_ON_ONCE(!mutex_is_locked(&lock)) |
| + |
| +#define EFSCORRUPTED EUCLEAN |
| + |
| +#endif /* _INCFS_INTERNAL_H */ |
| diff --git a/fs/incfs/main.c b/fs/incfs/main.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/main.c |
| @@ -0,0 +1,48 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2018 Google LLC |
| + */ |
| +#include <linux/fs.h> |
| +#include <linux/init.h> |
| +#include <linux/module.h> |
| + |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "sysfs.h" |
| +#include "vfs.h" |
| + |
| +static struct file_system_type incfs_fs_type = { |
| + .owner = THIS_MODULE, |
| + .name = INCFS_NAME, |
| + .mount = incfs_mount_fs, |
| + .kill_sb = incfs_kill_sb, |
| + .fs_flags = 0 |
| +}; |
| + |
| +static int __init init_incfs_module(void) |
| +{ |
| + int err = 0; |
| + |
| + err = incfs_init_sysfs(); |
| + if (err) |
| + return err; |
| + |
| + err = register_filesystem(&incfs_fs_type); |
| + if (err) |
| + incfs_cleanup_sysfs(); |
| + |
| + return err; |
| +} |
| + |
| +static void __exit cleanup_incfs_module(void) |
| +{ |
| + incfs_cleanup_sysfs(); |
| + unregister_filesystem(&incfs_fs_type); |
| +} |
| + |
| +module_init(init_incfs_module); |
| +module_exit(cleanup_incfs_module); |
| + |
| +MODULE_LICENSE("GPL v2"); |
| +MODULE_AUTHOR("Eugene Zemtsov <ezemtsov@google.com>"); |
| +MODULE_DESCRIPTION("Incremental File System"); |
| diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/pseudo_files.c |
| @@ -0,0 +1,1394 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2020 Google LLC |
| + */ |
| + |
| +#include <linux/file.h> |
| +#include <linux/fs.h> |
| +#include <linux/fsnotify.h> |
| +#include <linux/namei.h> |
| +#include <linux/poll.h> |
| +#include <linux/syscalls.h> |
| +#include <linux/fdtable.h> |
| +#include <linux/filelock.h> |
| + |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "pseudo_files.h" |
| + |
| +#include "data_mgmt.h" |
| +#include "format.h" |
| +#include "integrity.h" |
| +#include "vfs.h" |
| + |
| +#define READ_WRITE_FILE_MODE 0666 |
| + |
| +static bool is_pseudo_filename(struct mem_range name); |
| + |
| +/******************************************************************************* |
| + * .pending_reads pseudo file definition |
| + ******************************************************************************/ |
| +#define INCFS_PENDING_READS_INODE 2 |
| +static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; |
| + |
| +/* State of an open .pending_reads file, unique for each file descriptor. */ |
| +struct pending_reads_state { |
| + /* A serial number of the last pending read obtained from this file. */ |
| + int last_pending_read_sn; |
| +}; |
| + |
| +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, |
| + loff_t *ppos) |
| +{ |
| + struct pending_reads_state *pr_state = f->private_data; |
| + struct mount_info *mi = get_mount_info(file_superblock(f)); |
| + bool report_uid; |
| + unsigned long page = 0; |
| + struct incfs_pending_read_info *reads_buf = NULL; |
| + struct incfs_pending_read_info2 *reads_buf2 = NULL; |
| + size_t record_size; |
| + size_t reads_to_collect; |
| + int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); |
| + int new_max_sn = last_known_read_sn; |
| + int reads_collected = 0; |
| + ssize_t result = 0; |
| + |
| + if (!mi) |
| + return -EFAULT; |
| + |
| + report_uid = mi->mi_options.report_uid; |
| + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); |
| + reads_to_collect = len / record_size; |
| + |
| + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) |
| + return 0; |
| + |
| + page = get_zeroed_page(GFP_NOFS); |
| + if (!page) |
| + return -ENOMEM; |
| + |
| + if (report_uid) |
| + reads_buf2 = (struct incfs_pending_read_info2 *) page; |
| + else |
| + reads_buf = (struct incfs_pending_read_info *) page; |
| + |
| + reads_to_collect = |
| + min_t(size_t, PAGE_SIZE / record_size, reads_to_collect); |
| + |
| + reads_collected = incfs_collect_pending_reads(mi, last_known_read_sn, |
| + reads_buf, reads_buf2, reads_to_collect, |
| + &new_max_sn); |
| + |
| + if (reads_collected < 0) { |
| + result = reads_collected; |
| + goto out; |
| + } |
| + |
| + /* |
| + * Just to make sure that we don't accidentally copy more data |
| + * to reads buffer than userspace can handle. |
| + */ |
| + reads_collected = min_t(size_t, reads_collected, reads_to_collect); |
| + result = reads_collected * record_size; |
| + |
| + /* Copy reads info to the userspace buffer */ |
| + if (copy_to_user(buf, (void *)page, result)) { |
| + result = -EFAULT; |
| + goto out; |
| + } |
| + |
| + WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); |
| + *ppos = 0; |
| + |
| +out: |
| + free_page(page); |
| + return result; |
| +} |
| + |
| +static __poll_t pending_reads_poll(struct file *file, poll_table *wait) |
| +{ |
| + struct pending_reads_state *state = file->private_data; |
| + struct mount_info *mi = get_mount_info(file_superblock(file)); |
| + __poll_t ret = 0; |
| + |
| + poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); |
| + if (incfs_fresh_pending_reads_exist(mi, |
| + state->last_pending_read_sn)) |
| + ret = EPOLLIN | EPOLLRDNORM; |
| + |
| + return ret; |
| +} |
| + |
| +static int pending_reads_open(struct inode *inode, struct file *file) |
| +{ |
| + struct pending_reads_state *state = NULL; |
| + |
| + state = kzalloc(sizeof(*state), GFP_NOFS); |
| + if (!state) |
| + return -ENOMEM; |
| + |
| + file->private_data = state; |
| + return 0; |
| +} |
| + |
| +static int pending_reads_release(struct inode *inode, struct file *file) |
| +{ |
| + kfree(file->private_data); |
| + return 0; |
| +} |
| + |
| +static long ioctl_permit_fill(struct file *f, void __user *arg) |
| +{ |
| + struct incfs_permit_fill __user *usr_permit_fill = arg; |
| + struct incfs_permit_fill permit_fill; |
| + long error = 0; |
| + struct file *file = NULL; |
| + struct incfs_file_data *fd; |
| + |
| + if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) |
| + return -EFAULT; |
| + |
| + file = fget(permit_fill.file_descriptor); |
| + if (IS_ERR_OR_NULL(file)) { |
| + if (!file) |
| + return -ENOENT; |
| + |
| + return PTR_ERR(file); |
| + } |
| + |
| + if (file->f_op != &incfs_file_ops) { |
| + error = -EPERM; |
| + goto out; |
| + } |
| + |
| + if (file->f_inode->i_sb != f->f_inode->i_sb) { |
| + error = -EPERM; |
| + goto out; |
| + } |
| + |
| + fd = file->private_data; |
| + |
| + switch (fd->fd_fill_permission) { |
| + case CANT_FILL: |
| + fd->fd_fill_permission = CAN_FILL; |
| + break; |
| + |
| + case CAN_FILL: |
| + pr_debug("CAN_FILL already set"); |
| + break; |
| + |
| + default: |
| + pr_warn("Invalid file private data"); |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + |
| +out: |
| + fput(file); |
| + return error; |
| +} |
| + |
| +static int chmod(struct dentry *dentry, umode_t mode) |
| +{ |
| + struct inode *inode = dentry->d_inode; |
| + struct inode *delegated_inode = NULL; |
| + struct iattr newattrs; |
| + int error; |
| + |
| +retry_deleg: |
| + inode_lock(inode); |
| + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); |
| + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; |
| + error = notify_change(&nop_mnt_idmap, dentry, &newattrs, &delegated_inode); |
| + inode_unlock(inode); |
| + if (delegated_inode) { |
| + error = break_deleg_wait(&delegated_inode); |
| + if (!error) |
| + goto retry_deleg; |
| + } |
| + return error; |
| +} |
| + |
| +static bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) |
| +{ |
| + if (lhs.len != rhs.len) |
| + return false; |
| + return memcmp(lhs.data, rhs.data, lhs.len) == 0; |
| +} |
| + |
| +static int validate_name(char *file_name) |
| +{ |
| + struct mem_range name = range(file_name, strlen(file_name)); |
| + int i = 0; |
| + |
| + if (name.len > INCFS_MAX_NAME_LEN) |
| + return -ENAMETOOLONG; |
| + |
| + if (is_pseudo_filename(name)) |
| + return -EINVAL; |
| + |
| + for (i = 0; i < name.len; i++) |
| + if (name.data[i] == '/') |
| + return -EINVAL; |
| + |
| + return 0; |
| +} |
| + |
| +static int dir_relative_path_resolve( |
| + struct mount_info *mi, |
| + const char __user *relative_path, |
| + struct path *result_path, |
| + struct path *base_path) |
| +{ |
| + int dir_fd = get_unused_fd_flags(0); |
| + struct file *dir_f = NULL; |
| + int error = 0; |
| + |
| + if (!base_path) |
| + base_path = &mi->mi_backing_dir_path; |
| + |
| + if (dir_fd < 0) |
| + return dir_fd; |
| + |
| + dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, current_cred()); |
| + |
| + if (IS_ERR(dir_f)) { |
| + error = PTR_ERR(dir_f); |
| + goto out; |
| + } |
| + fd_install(dir_fd, dir_f); |
| + |
| + if (!relative_path) { |
| + /* No relative path given, just return the base dir. */ |
| + *result_path = *base_path; |
| + path_get(result_path); |
| + goto out; |
| + } |
| + |
| + error = user_path_at_empty(dir_fd, relative_path, |
| + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); |
| + |
| +out: |
| + close_fd(dir_fd); |
| + if (error) |
| + pr_debug("Error: %d\n", error); |
| + return error; |
| +} |
| + |
| +static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, |
| + u64 size) |
| +{ |
| + u8 *result; |
| + |
| + if (!original) |
| + return range(NULL, 0); |
| + |
| + if (size > INCFS_MAX_SIGNATURE_SIZE) |
| + return range(ERR_PTR(-EFAULT), 0); |
| + |
| + result = kzalloc(size, GFP_NOFS | __GFP_COMP); |
| + if (!result) |
| + return range(ERR_PTR(-ENOMEM), 0); |
| + |
| + if (copy_from_user(result, original, size)) { |
| + kfree(result); |
| + return range(ERR_PTR(-EFAULT), 0); |
| + } |
| + |
| + return range(result, size); |
| +} |
| + |
| +static int init_new_file(struct mount_info *mi, struct dentry *dentry, |
| + incfs_uuid_t *uuid, u64 size, struct mem_range attr, |
| + u8 __user *user_signature_info, u64 signature_size) |
| +{ |
| + struct path path = {}; |
| + struct file *new_file; |
| + int error = 0; |
| + struct backing_file_context *bfc = NULL; |
| + u32 block_count; |
| + struct mem_range raw_signature = { NULL }; |
| + struct mtree *hash_tree = NULL; |
| + |
| + if (!mi || !dentry || !uuid) |
| + return -EFAULT; |
| + |
| + /* Resize newly created file to its true size. */ |
| + path = (struct path) { |
| + .mnt = mi->mi_backing_dir_path.mnt, |
| + .dentry = dentry |
| + }; |
| + |
| + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, |
| + current_cred()); |
| + |
| + if (IS_ERR(new_file)) { |
| + error = PTR_ERR(new_file); |
| + goto out; |
| + } |
| + |
| + bfc = incfs_alloc_bfc(mi, new_file); |
| + fput(new_file); |
| + if (IS_ERR(bfc)) { |
| + error = PTR_ERR(bfc); |
| + bfc = NULL; |
| + goto out; |
| + } |
| + |
| + mutex_lock(&bfc->bc_mutex); |
| + error = incfs_write_fh_to_backing_file(bfc, uuid, size); |
| + if (error) |
| + goto out; |
| + |
| + block_count = (u32)get_blocks_count_for_size(size); |
| + |
| + if (user_signature_info) { |
| + raw_signature = incfs_copy_signature_info_from_user( |
| + user_signature_info, signature_size); |
| + |
| + if (IS_ERR(raw_signature.data)) { |
| + error = PTR_ERR(raw_signature.data); |
| + raw_signature.data = NULL; |
| + goto out; |
| + } |
| + |
| + hash_tree = incfs_alloc_mtree(raw_signature, block_count); |
| + if (IS_ERR(hash_tree)) { |
| + error = PTR_ERR(hash_tree); |
| + hash_tree = NULL; |
| + goto out; |
| + } |
| + |
| + error = incfs_write_signature_to_backing_file(bfc, |
| + raw_signature, hash_tree->hash_tree_area_size, |
| + NULL, NULL); |
| + if (error) |
| + goto out; |
| + |
| + block_count += get_blocks_count_for_size( |
| + hash_tree->hash_tree_area_size); |
| + } |
| + |
| + if (block_count) |
| + error = incfs_write_blockmap_to_backing_file(bfc, block_count); |
| + |
| + if (error) |
| + goto out; |
| + |
| +out: |
| + if (bfc) { |
| + mutex_unlock(&bfc->bc_mutex); |
| + incfs_free_bfc(bfc); |
| + } |
| + incfs_free_mtree(hash_tree); |
| + kfree(raw_signature.data); |
| + |
| + if (error) |
| + pr_debug("incfs: %s error: %d\n", __func__, error); |
| + return error; |
| +} |
| + |
| +static void notify_create(struct file *pending_reads_file, |
| + const char __user *dir_name, const char *file_name, |
| + const char *file_id_str, bool incomplete_file) |
| +{ |
| + struct mount_info *mi = |
| + get_mount_info(file_superblock(pending_reads_file)); |
| + struct path base_path = { |
| + .mnt = pending_reads_file->f_path.mnt, |
| + .dentry = pending_reads_file->f_path.dentry->d_parent, |
| + }; |
| + struct path dir_path = {}; |
| + struct dentry *file = NULL; |
| + struct dentry *dir = NULL; |
| + int error; |
| + |
| + error = dir_relative_path_resolve(mi, dir_name, &dir_path, &base_path); |
| + if (error) |
| + goto out; |
| + |
| + file = incfs_lookup_dentry(dir_path.dentry, file_name); |
| + if (IS_ERR(file)) { |
| + error = PTR_ERR(file); |
| + file = NULL; |
| + goto out; |
| + } |
| + |
| + fsnotify_create(d_inode(dir_path.dentry), file); |
| + |
| + if (file_id_str) { |
| + dir = incfs_lookup_dentry(base_path.dentry, INCFS_INDEX_NAME); |
| + if (IS_ERR(dir)) { |
| + error = PTR_ERR(dir); |
| + dir = NULL; |
| + goto out; |
| + } |
| + |
| + dput(file); |
| + file = incfs_lookup_dentry(dir, file_id_str); |
| + if (IS_ERR(file)) { |
| + error = PTR_ERR(file); |
| + file = NULL; |
| + goto out; |
| + } |
| + |
| + fsnotify_create(d_inode(dir), file); |
| + |
| + if (incomplete_file) { |
| + dput(dir); |
| + dir = incfs_lookup_dentry(base_path.dentry, |
| + INCFS_INCOMPLETE_NAME); |
| + if (IS_ERR(dir)) { |
| + error = PTR_ERR(dir); |
| + dir = NULL; |
| + goto out; |
| + } |
| + |
| + dput(file); |
| + file = incfs_lookup_dentry(dir, file_id_str); |
| + if (IS_ERR(file)) { |
| + error = PTR_ERR(file); |
| + file = NULL; |
| + goto out; |
| + } |
| + |
| + fsnotify_create(d_inode(dir), file); |
| + } |
| + } |
| +out: |
| + if (error) |
| + pr_warn("%s failed with error %d\n", __func__, error); |
| + |
| + dput(dir); |
| + dput(file); |
| + path_put(&dir_path); |
| +} |
| + |
| +static long ioctl_create_file(struct file *file, |
| + struct incfs_new_file_args __user *usr_args) |
| +{ |
| + struct mount_info *mi = get_mount_info(file_superblock(file)); |
| + struct incfs_new_file_args args; |
| + char *file_id_str = NULL; |
| + struct dentry *index_file_dentry = NULL; |
| + struct dentry *named_file_dentry = NULL; |
| + struct dentry *incomplete_file_dentry = NULL; |
| + struct path parent_dir_path = {}; |
| + struct inode *index_dir_inode = NULL; |
| + __le64 size_attr_value = 0; |
| + char *file_name = NULL; |
| + char *attr_value = NULL; |
| + int error = 0; |
| + bool locked = false; |
| + bool index_linked = false; |
| + bool name_linked = false; |
| + bool incomplete_linked = false; |
| + |
| + if (!mi || !mi->mi_index_dir || !mi->mi_incomplete_dir) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + |
| + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + |
| + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); |
| + if (IS_ERR(file_name)) { |
| + error = PTR_ERR(file_name); |
| + file_name = NULL; |
| + goto out; |
| + } |
| + |
| + error = validate_name(file_name); |
| + if (error) |
| + goto out; |
| + |
| + file_id_str = file_id_to_str(args.file_id); |
| + if (!file_id_str) { |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); |
| + if (error) |
| + goto out; |
| + locked = true; |
| + |
| + /* Find a directory to put the file into. */ |
| + error = dir_relative_path_resolve(mi, |
| + u64_to_user_ptr(args.directory_path), |
| + &parent_dir_path, NULL); |
| + if (error) |
| + goto out; |
| + |
| + if (parent_dir_path.dentry == mi->mi_index_dir) { |
| + /* Can't create a file directly inside .index */ |
| + error = -EBUSY; |
| + goto out; |
| + } |
| + |
| + if (parent_dir_path.dentry == mi->mi_incomplete_dir) { |
| + /* Can't create a file directly inside .incomplete */ |
| + error = -EBUSY; |
| + goto out; |
| + } |
| + |
| + /* Look up a dentry in the parent dir. It should be negative. */ |
| + named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, |
| + file_name); |
| + if (!named_file_dentry) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + if (IS_ERR(named_file_dentry)) { |
| + error = PTR_ERR(named_file_dentry); |
| + named_file_dentry = NULL; |
| + goto out; |
| + } |
| + if (d_really_is_positive(named_file_dentry)) { |
| + /* File with this path already exists. */ |
| + error = -EEXIST; |
| + goto out; |
| + } |
| + |
| + /* Look up a dentry in the incomplete dir. It should be negative. */ |
| + incomplete_file_dentry = incfs_lookup_dentry(mi->mi_incomplete_dir, |
| + file_id_str); |
| + if (!incomplete_file_dentry) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + if (IS_ERR(incomplete_file_dentry)) { |
| + error = PTR_ERR(incomplete_file_dentry); |
| + incomplete_file_dentry = NULL; |
| + goto out; |
| + } |
| + if (d_really_is_positive(incomplete_file_dentry)) { |
| + /* File with this path already exists. */ |
| + error = -EEXIST; |
| + goto out; |
| + } |
| + |
| + /* Look up a dentry in the .index dir. It should be negative. */ |
| + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); |
| + if (!index_file_dentry) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + if (IS_ERR(index_file_dentry)) { |
| + error = PTR_ERR(index_file_dentry); |
| + index_file_dentry = NULL; |
| + goto out; |
| + } |
| + if (d_really_is_positive(index_file_dentry)) { |
| + /* File with this ID already exists in index. */ |
| + error = -EEXIST; |
| + goto out; |
| + } |
| + |
| + /* Creating a file in the .index dir. */ |
| + index_dir_inode = d_inode(mi->mi_index_dir); |
| + inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); |
| + error = vfs_create(&nop_mnt_idmap, index_dir_inode, index_file_dentry, |
| + args.mode | 0222, true); |
| + inode_unlock(index_dir_inode); |
| + |
| + if (error) |
| + goto out; |
| + if (!d_really_is_positive(index_file_dentry)) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + error = chmod(index_file_dentry, args.mode | 0222); |
| + if (error) { |
| + pr_debug("incfs: chmod err: %d\n", error); |
| + goto out; |
| + } |
| + |
| + /* Save the file's ID as an xattr for easy fetching in future. */ |
| + error = vfs_setxattr(&nop_mnt_idmap, index_file_dentry, INCFS_XATTR_ID_NAME, |
| + file_id_str, strlen(file_id_str), XATTR_CREATE); |
| + if (error) { |
| + pr_debug("incfs: vfs_setxattr err:%d\n", error); |
| + goto out; |
| + } |
| + |
| + /* Save the file's size as an xattr for easy fetching in future. */ |
| + size_attr_value = cpu_to_le64(args.size); |
| + error = vfs_setxattr(&nop_mnt_idmap, index_file_dentry, INCFS_XATTR_SIZE_NAME, |
| + (char *)&size_attr_value, sizeof(size_attr_value), |
| + XATTR_CREATE); |
| + if (error) { |
| + pr_debug("incfs: vfs_setxattr err:%d\n", error); |
| + goto out; |
| + } |
| + |
| + /* Save the file's attribute as an xattr */ |
| + if (args.file_attr_len && args.file_attr) { |
| + if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { |
| + error = -E2BIG; |
| + goto out; |
| + } |
| + |
| + attr_value = kmalloc(args.file_attr_len, GFP_NOFS); |
| + if (!attr_value) { |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + if (copy_from_user(attr_value, |
| + u64_to_user_ptr(args.file_attr), |
| + args.file_attr_len) > 0) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + |
| + error = vfs_setxattr(&nop_mnt_idmap, index_file_dentry, |
| + INCFS_XATTR_METADATA_NAME, |
| + attr_value, args.file_attr_len, |
| + XATTR_CREATE); |
| + |
| + if (error) |
| + goto out; |
| + } |
| + |
| + /* Initializing a newly created file. */ |
| + error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, |
| + range(attr_value, args.file_attr_len), |
| + u64_to_user_ptr(args.signature_info), |
| + args.signature_size); |
| + if (error) |
| + goto out; |
| + index_linked = true; |
| + |
| + /* Linking a file with its real name from the requested dir. */ |
| + error = incfs_link(index_file_dentry, named_file_dentry); |
| + if (error) |
| + goto out; |
| + name_linked = true; |
| + |
| + if (args.size) { |
| + /* Linking a file with its incomplete entry */ |
| + error = incfs_link(index_file_dentry, incomplete_file_dentry); |
| + if (error) |
| + goto out; |
| + incomplete_linked = true; |
| + } |
| + |
| + notify_create(file, u64_to_user_ptr(args.directory_path), file_name, |
| + file_id_str, args.size != 0); |
| + |
| +out: |
| + if (error) { |
| + pr_debug("incfs: %s err:%d\n", __func__, error); |
| + if (index_linked) |
| + incfs_unlink(index_file_dentry); |
| + if (name_linked) |
| + incfs_unlink(named_file_dentry); |
| + if (incomplete_linked) |
| + incfs_unlink(incomplete_file_dentry); |
| + } |
| + |
| + kfree(file_id_str); |
| + kfree(file_name); |
| + kfree(attr_value); |
| + dput(named_file_dentry); |
| + dput(index_file_dentry); |
| + dput(incomplete_file_dentry); |
| + path_put(&parent_dir_path); |
| + if (locked) |
| + mutex_unlock(&mi->mi_dir_struct_mutex); |
| + |
| + return error; |
| +} |
| + |
| +static int init_new_mapped_file(struct mount_info *mi, struct dentry *dentry, |
| + incfs_uuid_t *uuid, u64 size, u64 offset) |
| +{ |
| + struct path path = {}; |
| + struct file *new_file; |
| + int error = 0; |
| + struct backing_file_context *bfc = NULL; |
| + |
| + if (!mi || !dentry || !uuid) |
| + return -EFAULT; |
| + |
| + /* Resize newly created file to its true size. */ |
| + path = (struct path) { |
| + .mnt = mi->mi_backing_dir_path.mnt, |
| + .dentry = dentry |
| + }; |
| + new_file = dentry_open(&path, O_RDWR | O_NOATIME | O_LARGEFILE, |
| + current_cred()); |
| + |
| + if (IS_ERR(new_file)) { |
| + error = PTR_ERR(new_file); |
| + goto out; |
| + } |
| + |
| + bfc = incfs_alloc_bfc(mi, new_file); |
| + fput(new_file); |
| + if (IS_ERR(bfc)) { |
| + error = PTR_ERR(bfc); |
| + bfc = NULL; |
| + goto out; |
| + } |
| + |
| + mutex_lock(&bfc->bc_mutex); |
| + error = incfs_write_mapping_fh_to_backing_file(bfc, uuid, size, offset); |
| + if (error) |
| + goto out; |
| + |
| +out: |
| + if (bfc) { |
| + mutex_unlock(&bfc->bc_mutex); |
| + incfs_free_bfc(bfc); |
| + } |
| + |
| + if (error) |
| + pr_debug("incfs: %s error: %d\n", __func__, error); |
| + return error; |
| +} |
| + |
| +static long ioctl_create_mapped_file(struct file *file, void __user *arg) |
| +{ |
| + struct mount_info *mi = get_mount_info(file_superblock(file)); |
| + struct incfs_create_mapped_file_args __user *args_usr_ptr = arg; |
| + struct incfs_create_mapped_file_args args = {}; |
| + char *file_name; |
| + int error = 0; |
| + struct path parent_dir_path = {}; |
| + char *source_file_name = NULL; |
| + struct dentry *source_file_dentry = NULL; |
| + u64 source_file_size; |
| + struct dentry *file_dentry = NULL; |
| + struct inode *parent_inode; |
| + __le64 size_attr_value; |
| + |
| + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) |
| + return -EINVAL; |
| + |
| + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); |
| + if (IS_ERR(file_name)) { |
| + error = PTR_ERR(file_name); |
| + file_name = NULL; |
| + goto out; |
| + } |
| + |
| + error = validate_name(file_name); |
| + if (error) |
| + goto out; |
| + |
| + if (args.source_offset % INCFS_DATA_FILE_BLOCK_SIZE) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + /* Validate file mapping is in range */ |
| + source_file_name = file_id_to_str(args.source_file_id); |
| + if (!source_file_name) { |
| + pr_warn("Failed to alloc source_file_name\n"); |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + source_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, |
| + source_file_name); |
| + if (!source_file_dentry) { |
| + pr_warn("Source file does not exist\n"); |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + if (IS_ERR(source_file_dentry)) { |
| + pr_warn("Error opening source file\n"); |
| + error = PTR_ERR(source_file_dentry); |
| + source_file_dentry = NULL; |
| + goto out; |
| + } |
| + if (!d_really_is_positive(source_file_dentry)) { |
| + pr_warn("Source file dentry negative\n"); |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + error = vfs_getxattr(&nop_mnt_idmap, source_file_dentry, INCFS_XATTR_SIZE_NAME, |
| + (char *)&size_attr_value, sizeof(size_attr_value)); |
| + if (error < 0) |
| + goto out; |
| + |
| + if (error != sizeof(size_attr_value)) { |
| + pr_warn("Mapped file has no size attr\n"); |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + source_file_size = le64_to_cpu(size_attr_value); |
| + if (args.source_offset + args.size > source_file_size) { |
| + pr_warn("Mapped file out of range\n"); |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + /* Find a directory to put the file into. */ |
| + error = dir_relative_path_resolve(mi, |
| + u64_to_user_ptr(args.directory_path), |
| + &parent_dir_path, NULL); |
| + if (error) |
| + goto out; |
| + |
| + if (parent_dir_path.dentry == mi->mi_index_dir) { |
| + /* Can't create a file directly inside .index */ |
| + error = -EBUSY; |
| + goto out; |
| + } |
| + |
| + /* Look up a dentry in the parent dir. It should be negative. */ |
| + file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, |
| + file_name); |
| + if (!file_dentry) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + if (IS_ERR(file_dentry)) { |
| + error = PTR_ERR(file_dentry); |
| + file_dentry = NULL; |
| + goto out; |
| + } |
| + if (d_really_is_positive(file_dentry)) { |
| + error = -EEXIST; |
| + goto out; |
| + } |
| + |
| + parent_inode = d_inode(parent_dir_path.dentry); |
| + inode_lock_nested(parent_inode, I_MUTEX_PARENT); |
| + error = vfs_create(&nop_mnt_idmap, parent_inode, file_dentry, |
| + args.mode | 0222, true); |
| + inode_unlock(parent_inode); |
| + if (error) |
| + goto out; |
| + |
| + error = chmod(file_dentry, args.mode | 0222); |
| + if (error) { |
| + pr_debug("incfs: chmod err: %d\n", error); |
| + goto delete_file; |
| + } |
| + |
| + /* Save the file's size as an xattr for easy fetching in future. */ |
| + size_attr_value = cpu_to_le64(args.size); |
| + error = vfs_setxattr(&nop_mnt_idmap, file_dentry, INCFS_XATTR_SIZE_NAME, |
| + (char *)&size_attr_value, sizeof(size_attr_value), |
| + XATTR_CREATE); |
| + if (error) { |
| + pr_debug("incfs: vfs_setxattr err:%d\n", error); |
| + goto delete_file; |
| + } |
| + |
| + error = init_new_mapped_file(mi, file_dentry, &args.source_file_id, |
| + args.size, args.source_offset); |
| + if (error) |
| + goto delete_file; |
| + |
| + notify_create(file, u64_to_user_ptr(args.directory_path), file_name, |
| + NULL, false); |
| + |
| + goto out; |
| + |
| +delete_file: |
| + incfs_unlink(file_dentry); |
| + |
| +out: |
| + dput(file_dentry); |
| + dput(source_file_dentry); |
| + path_put(&parent_dir_path); |
| + kfree(file_name); |
| + kfree(source_file_name); |
| + return error; |
| +} |
| + |
| +static long ioctl_get_read_timeouts(struct mount_info *mi, void __user *arg) |
| +{ |
| + struct incfs_get_read_timeouts_args __user *args_usr_ptr = arg; |
| + struct incfs_get_read_timeouts_args args = {}; |
| + int error = 0; |
| + struct incfs_per_uid_read_timeouts *buffer; |
| + int size; |
| + |
| + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) |
| + return -EINVAL; |
| + |
| + if (args.timeouts_array_size > INCFS_DATA_FILE_BLOCK_SIZE) |
| + return -EINVAL; |
| + |
| + buffer = kzalloc(args.timeouts_array_size, GFP_NOFS); |
| + if (!buffer) |
| + return -ENOMEM; |
| + |
| + spin_lock(&mi->mi_per_uid_read_timeouts_lock); |
| + size = mi->mi_per_uid_read_timeouts_size; |
| + if (args.timeouts_array_size < size) |
| + error = -E2BIG; |
| + else if (size) |
| + memcpy(buffer, mi->mi_per_uid_read_timeouts, size); |
| + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); |
| + |
| + args.timeouts_array_size_out = size; |
| + if (!error && size) |
| + if (copy_to_user(u64_to_user_ptr(args.timeouts_array), buffer, |
| + size)) |
| + error = -EFAULT; |
| + |
| + if (!error || error == -E2BIG) |
| + if (copy_to_user(args_usr_ptr, &args, sizeof(args)) > 0) |
| + error = -EFAULT; |
| + |
| + kfree(buffer); |
| + return error; |
| +} |
| + |
| +static long ioctl_set_read_timeouts(struct mount_info *mi, void __user *arg) |
| +{ |
| + struct incfs_set_read_timeouts_args __user *args_usr_ptr = arg; |
| + struct incfs_set_read_timeouts_args args = {}; |
| + int error = 0; |
| + int size; |
| + struct incfs_per_uid_read_timeouts *buffer = NULL, *tmp; |
| + int i; |
| + |
| + if (copy_from_user(&args, args_usr_ptr, sizeof(args))) |
| + return -EINVAL; |
| + |
| + size = args.timeouts_array_size; |
| + if (size) { |
| + if (size > INCFS_DATA_FILE_BLOCK_SIZE || |
| + size % sizeof(*buffer) != 0) |
| + return -EINVAL; |
| + |
| + buffer = kzalloc(size, GFP_NOFS); |
| + if (!buffer) |
| + return -ENOMEM; |
| + |
| + if (copy_from_user(buffer, u64_to_user_ptr(args.timeouts_array), |
| + size)) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + for (i = 0; i < size / sizeof(*buffer); ++i) { |
| + struct incfs_per_uid_read_timeouts *t = &buffer[i]; |
| + |
| + if (t->min_pending_time_us > t->max_pending_time_us) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + } |
| + } |
| + |
| + spin_lock(&mi->mi_per_uid_read_timeouts_lock); |
| + mi->mi_per_uid_read_timeouts_size = size; |
| + tmp = mi->mi_per_uid_read_timeouts; |
| + mi->mi_per_uid_read_timeouts = buffer; |
| + buffer = tmp; |
| + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); |
| + |
| +out: |
| + kfree(buffer); |
| + return error; |
| +} |
| + |
| +static long ioctl_get_last_read_error(struct mount_info *mi, void __user *arg) |
| +{ |
| + struct incfs_get_last_read_error_args __user *args_usr_ptr = arg; |
| + struct incfs_get_last_read_error_args args = {}; |
| + int error; |
| + |
| + error = mutex_lock_interruptible(&mi->mi_le_mutex); |
| + if (error) |
| + return error; |
| + |
| + args.file_id_out = mi->mi_le_file_id; |
| + args.time_us_out = mi->mi_le_time_us; |
| + args.page_out = mi->mi_le_page; |
| + args.errno_out = mi->mi_le_errno; |
| + args.uid_out = mi->mi_le_uid; |
| + |
| + mutex_unlock(&mi->mi_le_mutex); |
| + if (copy_to_user(args_usr_ptr, &args, sizeof(args)) > 0) |
| + error = -EFAULT; |
| + |
| + return error; |
| +} |
| + |
| +static long pending_reads_dispatch_ioctl(struct file *f, unsigned int req, |
| + unsigned long arg) |
| +{ |
| + struct mount_info *mi = get_mount_info(file_superblock(f)); |
| + |
| + switch (req) { |
| + case INCFS_IOC_CREATE_FILE: |
| + return ioctl_create_file(f, (void __user *)arg); |
| + case INCFS_IOC_PERMIT_FILL: |
| + return ioctl_permit_fill(f, (void __user *)arg); |
| + case INCFS_IOC_CREATE_MAPPED_FILE: |
| + return ioctl_create_mapped_file(f, (void __user *)arg); |
| + case INCFS_IOC_GET_READ_TIMEOUTS: |
| + return ioctl_get_read_timeouts(mi, (void __user *)arg); |
| + case INCFS_IOC_SET_READ_TIMEOUTS: |
| + return ioctl_set_read_timeouts(mi, (void __user *)arg); |
| + case INCFS_IOC_GET_LAST_READ_ERROR: |
| + return ioctl_get_last_read_error(mi, (void __user *)arg); |
| + default: |
| + return -EINVAL; |
| + } |
| +} |
| + |
| +static const struct file_operations incfs_pending_reads_file_ops = { |
| + .read = pending_reads_read, |
| + .poll = pending_reads_poll, |
| + .open = pending_reads_open, |
| + .release = pending_reads_release, |
| + .llseek = noop_llseek, |
| + .unlocked_ioctl = pending_reads_dispatch_ioctl, |
| + .compat_ioctl = pending_reads_dispatch_ioctl |
| +}; |
| + |
| +/******************************************************************************* |
| + * .log pseudo file definition |
| + ******************************************************************************/ |
| +#define INCFS_LOG_INODE 3 |
| +static const char log_file_name[] = INCFS_LOG_FILENAME; |
| + |
| +/* State of an open .log file, unique for each file descriptor. */ |
| +struct log_file_state { |
| + struct read_log_state state; |
| +}; |
| + |
| +static ssize_t log_read(struct file *f, char __user *buf, size_t len, |
| + loff_t *ppos) |
| +{ |
| + struct log_file_state *log_state = f->private_data; |
| + struct mount_info *mi = get_mount_info(file_superblock(f)); |
| + int total_reads_collected = 0; |
| + int rl_size; |
| + ssize_t result = 0; |
| + bool report_uid; |
| + unsigned long page = 0; |
| + struct incfs_pending_read_info *reads_buf = NULL; |
| + struct incfs_pending_read_info2 *reads_buf2 = NULL; |
| + size_t record_size; |
| + ssize_t reads_to_collect; |
| + ssize_t reads_per_page; |
| + |
| + if (!mi) |
| + return -EFAULT; |
| + |
| + report_uid = mi->mi_options.report_uid; |
| + record_size = report_uid ? sizeof(*reads_buf2) : sizeof(*reads_buf); |
| + reads_to_collect = len / record_size; |
| + reads_per_page = PAGE_SIZE / record_size; |
| + |
| + rl_size = READ_ONCE(mi->mi_log.rl_size); |
| + if (rl_size == 0) |
| + return 0; |
| + |
| + page = __get_free_page(GFP_NOFS); |
| + if (!page) |
| + return -ENOMEM; |
| + |
| + if (report_uid) |
| + reads_buf2 = (struct incfs_pending_read_info2 *)page; |
| + else |
| + reads_buf = (struct incfs_pending_read_info *)page; |
| + |
| + reads_to_collect = min_t(ssize_t, rl_size, reads_to_collect); |
| + while (reads_to_collect > 0) { |
| + struct read_log_state next_state; |
| + int reads_collected; |
| + |
| + memcpy(&next_state, &log_state->state, sizeof(next_state)); |
| + reads_collected = incfs_collect_logged_reads( |
| + mi, &next_state, reads_buf, reads_buf2, |
| + min_t(ssize_t, reads_to_collect, reads_per_page)); |
| + if (reads_collected <= 0) { |
| + result = total_reads_collected ? |
| + total_reads_collected * record_size : |
| + reads_collected; |
| + goto out; |
| + } |
| + if (copy_to_user(buf, (void *)page, |
| + reads_collected * record_size)) { |
| + result = total_reads_collected ? |
| + total_reads_collected * record_size : |
| + -EFAULT; |
| + goto out; |
| + } |
| + |
| + memcpy(&log_state->state, &next_state, sizeof(next_state)); |
| + total_reads_collected += reads_collected; |
| + buf += reads_collected * record_size; |
| + reads_to_collect -= reads_collected; |
| + } |
| + |
| + result = total_reads_collected * record_size; |
| + *ppos = 0; |
| +out: |
| + free_page(page); |
| + return result; |
| +} |
| + |
| +static __poll_t log_poll(struct file *file, poll_table *wait) |
| +{ |
| + struct log_file_state *log_state = file->private_data; |
| + struct mount_info *mi = get_mount_info(file_superblock(file)); |
| + int count; |
| + __poll_t ret = 0; |
| + |
| + poll_wait(file, &mi->mi_log.ml_notif_wq, wait); |
| + count = incfs_get_uncollected_logs_count(mi, &log_state->state); |
| + if (count >= mi->mi_options.read_log_wakeup_count) |
| + ret = EPOLLIN | EPOLLRDNORM; |
| + |
| + return ret; |
| +} |
| + |
| +static int log_open(struct inode *inode, struct file *file) |
| +{ |
| + struct log_file_state *log_state = NULL; |
| + struct mount_info *mi = get_mount_info(file_superblock(file)); |
| + |
| + log_state = kzalloc(sizeof(*log_state), GFP_NOFS); |
| + if (!log_state) |
| + return -ENOMEM; |
| + |
| + log_state->state = incfs_get_log_state(mi); |
| + file->private_data = log_state; |
| + return 0; |
| +} |
| + |
| +static int log_release(struct inode *inode, struct file *file) |
| +{ |
| + kfree(file->private_data); |
| + return 0; |
| +} |
| + |
| +static const struct file_operations incfs_log_file_ops = { |
| + .read = log_read, |
| + .poll = log_poll, |
| + .open = log_open, |
| + .release = log_release, |
| + .llseek = noop_llseek, |
| +}; |
| + |
| +/******************************************************************************* |
| + * .blocks_written pseudo file definition |
| + ******************************************************************************/ |
| +#define INCFS_BLOCKS_WRITTEN_INODE 4 |
| +static const char blocks_written_file_name[] = INCFS_BLOCKS_WRITTEN_FILENAME; |
| + |
| +/* State of an open .blocks_written file, unique for each file descriptor. */ |
| +struct blocks_written_file_state { |
| + unsigned long blocks_written; |
| +}; |
| + |
| +static ssize_t blocks_written_read(struct file *f, char __user *buf, size_t len, |
| + loff_t *ppos) |
| +{ |
| + struct mount_info *mi = get_mount_info(file_superblock(f)); |
| + struct blocks_written_file_state *state = f->private_data; |
| + unsigned long blocks_written; |
| + char string[21]; |
| + int result = 0; |
| + |
| + if (!mi) |
| + return -EFAULT; |
| + |
| + blocks_written = atomic_read(&mi->mi_blocks_written); |
| + if (state->blocks_written == blocks_written) |
| + return 0; |
| + |
| + result = snprintf(string, sizeof(string), "%lu", blocks_written); |
| + if (result > len) |
| + result = len; |
| + if (copy_to_user(buf, string, result)) |
| + return -EFAULT; |
| + |
| + state->blocks_written = blocks_written; |
| + return result; |
| +} |
| + |
| +static __poll_t blocks_written_poll(struct file *f, poll_table *wait) |
| +{ |
| + struct mount_info *mi = get_mount_info(file_superblock(f)); |
| + struct blocks_written_file_state *state = f->private_data; |
| + unsigned long blocks_written; |
| + |
| + if (!mi) |
| + return 0; |
| + |
| + poll_wait(f, &mi->mi_blocks_written_notif_wq, wait); |
| + blocks_written = atomic_read(&mi->mi_blocks_written); |
| + if (state->blocks_written == blocks_written) |
| + return 0; |
| + |
| + return EPOLLIN | EPOLLRDNORM; |
| +} |
| + |
| +static int blocks_written_open(struct inode *inode, struct file *file) |
| +{ |
| + struct blocks_written_file_state *state = |
| + kzalloc(sizeof(*state), GFP_NOFS); |
| + |
| + if (!state) |
| + return -ENOMEM; |
| + |
| + state->blocks_written = -1; |
| + file->private_data = state; |
| + return 0; |
| +} |
| + |
| +static int blocks_written_release(struct inode *inode, struct file *file) |
| +{ |
| + kfree(file->private_data); |
| + return 0; |
| +} |
| + |
| +static const struct file_operations incfs_blocks_written_file_ops = { |
| + .read = blocks_written_read, |
| + .poll = blocks_written_poll, |
| + .open = blocks_written_open, |
| + .release = blocks_written_release, |
| + .llseek = noop_llseek, |
| +}; |
| + |
| +/******************************************************************************* |
| + * Generic inode lookup functionality |
| + ******************************************************************************/ |
| + |
| +const struct mem_range incfs_pseudo_file_names[] = { |
| + { .data = (u8 *)pending_reads_file_name, |
| + .len = ARRAY_SIZE(pending_reads_file_name) - 1 }, |
| + { .data = (u8 *)log_file_name, .len = ARRAY_SIZE(log_file_name) - 1 }, |
| + { .data = (u8 *)blocks_written_file_name, |
| + .len = ARRAY_SIZE(blocks_written_file_name) - 1 } |
| +}; |
| + |
| +const unsigned long incfs_pseudo_file_inodes[] = { INCFS_PENDING_READS_INODE, |
| + INCFS_LOG_INODE, |
| + INCFS_BLOCKS_WRITTEN_INODE }; |
| + |
| +static const struct file_operations *const pseudo_file_operations[] = { |
| + &incfs_pending_reads_file_ops, &incfs_log_file_ops, |
| + &incfs_blocks_written_file_ops |
| +}; |
| + |
| +static bool is_pseudo_filename(struct mem_range name) |
| +{ |
| + int i = 0; |
| + |
| + for (; i < ARRAY_SIZE(incfs_pseudo_file_names); ++i) |
| + if (incfs_equal_ranges(incfs_pseudo_file_names[i], name)) |
| + return true; |
| + return false; |
| +} |
| + |
| +static bool get_pseudo_inode(int ino, struct inode *inode) |
| +{ |
| + int i = 0; |
| + |
| + for (; i < ARRAY_SIZE(incfs_pseudo_file_inodes); ++i) |
| + if (ino == incfs_pseudo_file_inodes[i]) |
| + break; |
| + if (i == ARRAY_SIZE(incfs_pseudo_file_inodes)) |
| + return false; |
| + |
| + inode_set_mtime(inode, 0, 0); |
| + inode_set_atime(inode, 0, 0); |
| + inode_set_ctime(inode, 0, 0); |
| + inode->i_size = 0; |
| + inode->i_ino = ino; |
| + inode->i_private = NULL; |
| + inode_init_owner(&nop_mnt_idmap, inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); |
| + inode->i_op = &incfs_file_inode_ops; |
| + inode->i_fop = pseudo_file_operations[i]; |
| + return true; |
| +} |
| + |
| +struct inode_search { |
| + unsigned long ino; |
| +}; |
| + |
| +static int inode_test(struct inode *inode, void *opaque) |
| +{ |
| + struct inode_search *search = opaque; |
| + |
| + return inode->i_ino == search->ino; |
| +} |
| + |
| +static int inode_set(struct inode *inode, void *opaque) |
| +{ |
| + struct inode_search *search = opaque; |
| + |
| + if (get_pseudo_inode(search->ino, inode)) |
| + return 0; |
| + |
| + /* Unknown inode requested. */ |
| + return -EINVAL; |
| +} |
| + |
| +static struct inode *fetch_inode(struct super_block *sb, unsigned long ino) |
| +{ |
| + struct inode_search search = { |
| + .ino = ino |
| + }; |
| + struct inode *inode = iget5_locked(sb, search.ino, inode_test, |
| + inode_set, &search); |
| + |
| + if (!inode) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + if (inode->i_state & I_NEW) |
| + unlock_new_inode(inode); |
| + |
| + return inode; |
| +} |
| + |
| +int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry) |
| +{ |
| + struct mem_range name_range = |
| + range((u8 *)dentry->d_name.name, dentry->d_name.len); |
| + unsigned long ino; |
| + struct inode *inode; |
| + int i = 0; |
| + |
| + for (; i < ARRAY_SIZE(incfs_pseudo_file_names); ++i) |
| + if (incfs_equal_ranges(incfs_pseudo_file_names[i], name_range)) |
| + break; |
| + if (i == ARRAY_SIZE(incfs_pseudo_file_names)) |
| + return -ENOENT; |
| + |
| + ino = incfs_pseudo_file_inodes[i]; |
| + |
| + inode = fetch_inode(sb, ino); |
| + if (IS_ERR(inode)) |
| + return PTR_ERR(inode); |
| + |
| + d_add(dentry, inode); |
| + return 0; |
| +} |
| + |
| +int emit_pseudo_files(struct dir_context *ctx) |
| +{ |
| + loff_t i = ctx->pos; |
| + |
| + for (; i < ARRAY_SIZE(incfs_pseudo_file_names); ++i) { |
| + if (!dir_emit(ctx, incfs_pseudo_file_names[i].data, |
| + incfs_pseudo_file_names[i].len, |
| + incfs_pseudo_file_inodes[i], DT_REG)) |
| + return -EINVAL; |
| + |
| + ctx->pos++; |
| + } |
| + return 0; |
| +} |
| diff --git a/fs/incfs/pseudo_files.h b/fs/incfs/pseudo_files.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/pseudo_files.h |
| @@ -0,0 +1,20 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2020 Google LLC |
| + */ |
| + |
| +#ifndef _INCFS_PSEUDO_FILES_H |
| +#define _INCFS_PSEUDO_FILES_H |
| + |
| +#include "internal.h" |
| + |
| +#define PSEUDO_FILE_COUNT 3 |
| +#define INCFS_START_INO_RANGE 10 |
| + |
| +extern const struct mem_range incfs_pseudo_file_names[PSEUDO_FILE_COUNT]; |
| +extern const unsigned long incfs_pseudo_file_inodes[PSEUDO_FILE_COUNT]; |
| + |
| +int dir_lookup_pseudo_files(struct super_block *sb, struct dentry *dentry); |
| +int emit_pseudo_files(struct dir_context *ctx); |
| + |
| +#endif |
| diff --git a/fs/incfs/sysfs.c b/fs/incfs/sysfs.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/sysfs.c |
| @@ -0,0 +1,205 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2021 Google LLC |
| + */ |
| +#include <linux/fs.h> |
| +#include <linux/kobject.h> |
| + |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "sysfs.h" |
| +#include "data_mgmt.h" |
| +#include "vfs.h" |
| + |
| +/****************************************************************************** |
| + * Define sys/fs/incrementalfs & sys/fs/incrementalfs/features |
| + *****************************************************************************/ |
| +#define INCFS_NODE_FEATURES "features" |
| +#define INCFS_NODE_INSTANCES "instances" |
| + |
| +static struct kobject *sysfs_root; |
| +static struct kobject *features_node; |
| +static struct kobject *instances_node; |
| + |
| +#define DECLARE_FEATURE_FLAG(name) \ |
| + static ssize_t name##_show(struct kobject *kobj, \ |
| + struct kobj_attribute *attr, char *buff) \ |
| +{ \ |
| + return sysfs_emit(buff, "supported\n"); \ |
| +} \ |
| + \ |
| +static struct kobj_attribute name##_attr = __ATTR_RO(name) |
| + |
| +DECLARE_FEATURE_FLAG(corefs); |
| +DECLARE_FEATURE_FLAG(zstd); |
| +DECLARE_FEATURE_FLAG(v2); |
| +DECLARE_FEATURE_FLAG(bugfix_throttling); |
| +DECLARE_FEATURE_FLAG(bugfix_inode_eviction); |
| + |
| +static struct attribute *attributes[] = { |
| + &corefs_attr.attr, |
| + &zstd_attr.attr, |
| + &v2_attr.attr, |
| + &bugfix_throttling_attr.attr, |
| + &bugfix_inode_eviction_attr.attr, |
| + NULL, |
| +}; |
| + |
| +static const struct attribute_group attr_group = { |
| + .attrs = attributes, |
| +}; |
| + |
| +int __init incfs_init_sysfs(void) |
| +{ |
| + int res = -ENOMEM; |
| + |
| + sysfs_root = kobject_create_and_add(INCFS_NAME, fs_kobj); |
| + if (!sysfs_root) |
| + return -ENOMEM; |
| + |
| + instances_node = kobject_create_and_add(INCFS_NODE_INSTANCES, |
| + sysfs_root); |
| + if (!instances_node) |
| + goto err_put_root; |
| + |
| + features_node = kobject_create_and_add(INCFS_NODE_FEATURES, |
| + sysfs_root); |
| + if (!features_node) |
| + goto err_put_instances; |
| + |
| + res = sysfs_create_group(features_node, &attr_group); |
| + if (res) |
| + goto err_put_features; |
| + |
| + return 0; |
| + |
| +err_put_features: |
| + kobject_put(features_node); |
| +err_put_instances: |
| + kobject_put(instances_node); |
| +err_put_root: |
| + kobject_put(sysfs_root); |
| + |
| + return res; |
| +} |
| + |
| +void incfs_cleanup_sysfs(void) |
| +{ |
| + if (features_node) { |
| + sysfs_remove_group(features_node, &attr_group); |
| + kobject_put(features_node); |
| + } |
| + |
| + kobject_put(instances_node); |
| + kobject_put(sysfs_root); |
| +} |
| + |
| +/****************************************************************************** |
| + * Define sys/fs/incrementalfs/instances/<name>/ |
| + *****************************************************************************/ |
| +#define __DECLARE_STATUS_FLAG(name) \ |
| +static ssize_t name##_show(struct kobject *kobj, \ |
| + struct kobj_attribute *attr, char *buff) \ |
| +{ \ |
| + struct incfs_sysfs_node *node = container_of(kobj, \ |
| + struct incfs_sysfs_node, isn_sysfs_node); \ |
| + \ |
| + return sysfs_emit(buff, "%d\n", node->isn_mi->mi_##name); \ |
| +} \ |
| + \ |
| +static struct kobj_attribute name##_attr = __ATTR_RO(name) |
| + |
| +#define __DECLARE_STATUS_FLAG64(name) \ |
| +static ssize_t name##_show(struct kobject *kobj, \ |
| + struct kobj_attribute *attr, char *buff) \ |
| +{ \ |
| + struct incfs_sysfs_node *node = container_of(kobj, \ |
| + struct incfs_sysfs_node, isn_sysfs_node); \ |
| + \ |
| + return sysfs_emit(buff, "%lld\n", node->isn_mi->mi_##name); \ |
| +} \ |
| + \ |
| +static struct kobj_attribute name##_attr = __ATTR_RO(name) |
| + |
| +__DECLARE_STATUS_FLAG(reads_failed_timed_out); |
| +__DECLARE_STATUS_FLAG(reads_failed_hash_verification); |
| +__DECLARE_STATUS_FLAG(reads_failed_other); |
| +__DECLARE_STATUS_FLAG(reads_delayed_pending); |
| +__DECLARE_STATUS_FLAG64(reads_delayed_pending_us); |
| +__DECLARE_STATUS_FLAG(reads_delayed_min); |
| +__DECLARE_STATUS_FLAG64(reads_delayed_min_us); |
| + |
| +static struct attribute *mount_attributes[] = { |
| + &reads_failed_timed_out_attr.attr, |
| + &reads_failed_hash_verification_attr.attr, |
| + &reads_failed_other_attr.attr, |
| + &reads_delayed_pending_attr.attr, |
| + &reads_delayed_pending_us_attr.attr, |
| + &reads_delayed_min_attr.attr, |
| + &reads_delayed_min_us_attr.attr, |
| + NULL, |
| +}; |
| + |
| +static void incfs_sysfs_release(struct kobject *kobj) |
| +{ |
| + struct incfs_sysfs_node *node = container_of(kobj, |
| + struct incfs_sysfs_node, isn_sysfs_node); |
| + |
| + complete(&node->isn_completion); |
| +} |
| + |
| +static const struct attribute_group mount_attr_group = { |
| + .attrs = mount_attributes, |
| +}; |
| + |
| +static struct kobj_type incfs_kobj_node_ktype = { |
| + .sysfs_ops = &kobj_sysfs_ops, |
| + .release = &incfs_sysfs_release, |
| +}; |
| + |
| +struct incfs_sysfs_node *incfs_add_sysfs_node(const char *name, |
| + struct mount_info *mi) |
| +{ |
| + struct incfs_sysfs_node *node = NULL; |
| + int error; |
| + |
| + if (!name) |
| + return NULL; |
| + |
| + node = kzalloc(sizeof(*node), GFP_NOFS); |
| + if (!node) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + node->isn_mi = mi; |
| + |
| + init_completion(&node->isn_completion); |
| + kobject_init(&node->isn_sysfs_node, &incfs_kobj_node_ktype); |
| + error = kobject_add(&node->isn_sysfs_node, instances_node, "%s", name); |
| + if (error) |
| + goto err; |
| + |
| + error = sysfs_create_group(&node->isn_sysfs_node, &mount_attr_group); |
| + if (error) |
| + goto err; |
| + |
| + return node; |
| + |
| +err: |
| + /* |
| + * Note kobject_put always calls release, so incfs_sysfs_release will |
| + * free node |
| + */ |
| + kobject_put(&node->isn_sysfs_node); |
| + return ERR_PTR(error); |
| +} |
| + |
| +void incfs_free_sysfs_node(struct incfs_sysfs_node *node) |
| +{ |
| + if (!node) |
| + return; |
| + |
| + sysfs_remove_group(&node->isn_sysfs_node, &mount_attr_group); |
| + kobject_put(&node->isn_sysfs_node); |
| + wait_for_completion_interruptible(&node->isn_completion); |
| + kfree(node); |
| +} |
| diff --git a/fs/incfs/sysfs.h b/fs/incfs/sysfs.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/sysfs.h |
| @@ -0,0 +1,22 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2021 Google LLC |
| + */ |
| +#ifndef _INCFS_SYSFS_H |
| +#define _INCFS_SYSFS_H |
| + |
| +struct incfs_sysfs_node { |
| + struct kobject isn_sysfs_node; |
| + |
| + struct completion isn_completion; |
| + |
| + struct mount_info *isn_mi; |
| +}; |
| + |
| +int incfs_init_sysfs(void); |
| +void incfs_cleanup_sysfs(void); |
| +struct incfs_sysfs_node *incfs_add_sysfs_node(const char *name, |
| + struct mount_info *mi); |
| +void incfs_free_sysfs_node(struct incfs_sysfs_node *node); |
| + |
| +#endif |
| diff --git a/fs/incfs/verity.c b/fs/incfs/verity.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/verity.c |
| @@ -0,0 +1,821 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2020 Google LLC |
| + */ |
| + |
| +/* |
| + * fs-verity integration into incfs |
| + * |
| + * Since incfs has its own merkle tree implementation, most of fs/verity/ is not |
| + * needed. incfs also only needs to support the case where |
| + * CONFIG_FS_VERITY_BUILTIN_SIGNATURES=n. Therefore, the integration consists of |
| + * the following modifications: |
| + * |
| + * 1. Add the (optional) verity signature to the incfs file format. (Not really |
| + * needed anymore, but this is kept around since this is the behavior of |
| + * fs/verity/ even when CONFIG_FS_VERITY_BUILTIN_SIGNATURES=n.) |
| + * 2. Add a pointer to the digest of the fs-verity descriptor struct to the |
| + * data_file struct that incfs attaches to each file inode. |
| + * 3. Add the following ioclts: |
| + * - FS_IOC_ENABLE_VERITY |
| + * - FS_IOC_GETFLAGS |
| + * - FS_IOC_MEASURE_VERITY |
| + * 4. When FS_IOC_ENABLE_VERITY is called on a non-verity file, the |
| + * fs-verity descriptor struct is populated and digested. Then the S_VERITY |
| + * flag is set and the xattr incfs.verity is set. If the signature is |
| + * non-NULL, an INCFS_MD_VERITY_SIGNATURE is added to the backing file |
| + * containing the signature. |
| + * 5. When a file with an incfs.verity xattr's inode is initialized, the |
| + * inode’s S_VERITY flag is set. |
| + * 6. When a file with the S_VERITY flag set on its inode is opened, the |
| + * data_file is checked for its verity digest. If the file doesn’t have a |
| + * digest, the file’s digest is calculated as above, checked, and set, or the |
| + * open is denied if it is not valid. |
| + * 7. FS_IOC_GETFLAGS simply returns the value of the S_VERITY flag |
| + * 8. FS_IOC_MEASURE_VERITY simply returns the cached digest |
| + * 9. The final complication is that if FS_IOC_ENABLE_VERITY is called on a file |
| + * which doesn’t have a merkle tree, the merkle tree is calculated before the |
| + * rest of the process is completed. |
| + */ |
| + |
| +#include <crypto/hash.h> |
| +#include <crypto/sha2.h> |
| +#include <linux/fsverity.h> |
| +#include <linux/mount.h> |
| + |
| +#include "verity.h" |
| + |
| +#include "data_mgmt.h" |
| +#include "format.h" |
| +#include "integrity.h" |
| +#include "vfs.h" |
| + |
| +#define FS_VERITY_MAX_SIGNATURE_SIZE 16128 |
| + |
| +static int incfs_get_root_hash(struct file *filp, u8 *root_hash) |
| +{ |
| + struct data_file *df = get_incfs_data_file(filp); |
| + |
| + if (!df) |
| + return -EINVAL; |
| + |
| + memcpy(root_hash, df->df_hash_tree->root_hash, |
| + df->df_hash_tree->alg->digest_size); |
| + |
| + return 0; |
| +} |
| + |
| +static int incfs_end_enable_verity(struct file *filp, u8 *sig, size_t sig_size) |
| +{ |
| + struct inode *inode = file_inode(filp); |
| + struct mem_range signature = { |
| + .data = sig, |
| + .len = sig_size, |
| + }; |
| + struct data_file *df = get_incfs_data_file(filp); |
| + struct backing_file_context *bfc; |
| + int error; |
| + struct incfs_df_verity_signature *vs = NULL; |
| + loff_t offset; |
| + |
| + if (!df || !df->df_backing_file_context) |
| + return -EFSCORRUPTED; |
| + |
| + if (sig) { |
| + vs = kzalloc(sizeof(*vs), GFP_NOFS); |
| + if (!vs) |
| + return -ENOMEM; |
| + } |
| + |
| + bfc = df->df_backing_file_context; |
| + error = mutex_lock_interruptible(&bfc->bc_mutex); |
| + if (error) |
| + goto out; |
| + |
| + error = incfs_write_verity_signature_to_backing_file(bfc, signature, |
| + &offset); |
| + mutex_unlock(&bfc->bc_mutex); |
| + if (error) |
| + goto out; |
| + |
| + /* |
| + * Set verity xattr so we can set S_VERITY without opening backing file |
| + */ |
| + error = vfs_setxattr(&nop_mnt_idmap, bfc->bc_file->f_path.dentry, |
| + INCFS_XATTR_VERITY_NAME, NULL, 0, XATTR_CREATE); |
| + if (error) { |
| + pr_warn("incfs: error setting verity xattr: %d\n", error); |
| + goto out; |
| + } |
| + |
| + if (sig) { |
| + *vs = (struct incfs_df_verity_signature) { |
| + .size = signature.len, |
| + .offset = offset, |
| + }; |
| + |
| + df->df_verity_signature = vs; |
| + vs = NULL; |
| + } |
| + |
| + inode_set_flags(inode, S_VERITY, S_VERITY); |
| + |
| +out: |
| + kfree(vs); |
| + return error; |
| +} |
| + |
| +static int incfs_compute_file_digest(struct incfs_hash_alg *alg, |
| + struct fsverity_descriptor *desc, |
| + u8 *digest) |
| +{ |
| + SHASH_DESC_ON_STACK(d, alg->shash); |
| + |
| + d->tfm = alg->shash; |
| + return crypto_shash_digest(d, (u8 *)desc, sizeof(*desc), digest); |
| +} |
| + |
| +static enum incfs_hash_tree_algorithm incfs_convert_fsverity_hash_alg( |
| + int hash_alg) |
| +{ |
| + switch (hash_alg) { |
| + case FS_VERITY_HASH_ALG_SHA256: |
| + return INCFS_HASH_TREE_SHA256; |
| + default: |
| + return -EINVAL; |
| + } |
| +} |
| + |
| +static struct mem_range incfs_get_verity_digest(struct inode *inode) |
| +{ |
| + struct inode_info *node = get_incfs_node(inode); |
| + struct data_file *df; |
| + struct mem_range verity_file_digest; |
| + |
| + if (!node) { |
| + pr_warn("Invalid inode\n"); |
| + return range(NULL, 0); |
| + } |
| + |
| + df = node->n_file; |
| + |
| + /* |
| + * Pairs with the cmpxchg_release() in incfs_set_verity_digest(). |
| + * I.e., another task may publish ->df_verity_file_digest concurrently, |
| + * executing a RELEASE barrier. We need to use smp_load_acquire() here |
| + * to safely ACQUIRE the memory the other task published. |
| + */ |
| + verity_file_digest.data = smp_load_acquire( |
| + &df->df_verity_file_digest.data); |
| + verity_file_digest.len = df->df_verity_file_digest.len; |
| + return verity_file_digest; |
| +} |
| + |
| +static void incfs_set_verity_digest(struct inode *inode, |
| + struct mem_range verity_file_digest) |
| +{ |
| + struct inode_info *node = get_incfs_node(inode); |
| + struct data_file *df; |
| + |
| + if (!node) { |
| + pr_warn("Invalid inode\n"); |
| + kfree(verity_file_digest.data); |
| + return; |
| + } |
| + |
| + df = node->n_file; |
| + df->df_verity_file_digest.len = verity_file_digest.len; |
| + |
| + /* |
| + * Multiple tasks may race to set ->df_verity_file_digest.data, so use |
| + * cmpxchg_release(). This pairs with the smp_load_acquire() in |
| + * incfs_get_verity_digest(). I.e., here we publish |
| + * ->df_verity_file_digest.data, with a RELEASE barrier so that other |
| + * tasks can ACQUIRE it. |
| + */ |
| + if (cmpxchg_release(&df->df_verity_file_digest.data, NULL, |
| + verity_file_digest.data) != NULL) |
| + /* Lost the race, so free the file_digest we allocated. */ |
| + kfree(verity_file_digest.data); |
| +} |
| + |
| +/* Calculate the digest of the fsverity_descriptor. */ |
| +static struct mem_range incfs_calc_verity_digest_from_desc( |
| + const struct inode *inode, |
| + struct fsverity_descriptor *desc) |
| +{ |
| + enum incfs_hash_tree_algorithm incfs_hash_alg; |
| + struct mem_range verity_file_digest; |
| + int err; |
| + struct incfs_hash_alg *hash_alg; |
| + |
| + incfs_hash_alg = incfs_convert_fsverity_hash_alg(desc->hash_algorithm); |
| + if (incfs_hash_alg < 0) |
| + return range(ERR_PTR(incfs_hash_alg), 0); |
| + |
| + hash_alg = incfs_get_hash_alg(incfs_hash_alg); |
| + if (IS_ERR(hash_alg)) |
| + return range((u8 *)hash_alg, 0); |
| + |
| + verity_file_digest = range(kzalloc(hash_alg->digest_size, GFP_KERNEL), |
| + hash_alg->digest_size); |
| + if (!verity_file_digest.data) |
| + return range(ERR_PTR(-ENOMEM), 0); |
| + |
| + err = incfs_compute_file_digest(hash_alg, desc, |
| + verity_file_digest.data); |
| + if (err) { |
| + pr_err("Error %d computing file digest", err); |
| + kfree(verity_file_digest.data); |
| + return range(ERR_PTR(err), 0); |
| + } |
| + pr_debug("Computed file digest: %s:%*phN\n", |
| + hash_alg->name, (int) verity_file_digest.len, |
| + verity_file_digest.data); |
| + return verity_file_digest; |
| +} |
| + |
| +static struct fsverity_descriptor *incfs_get_fsverity_descriptor( |
| + struct file *filp, int hash_algorithm) |
| +{ |
| + struct inode *inode = file_inode(filp); |
| + struct fsverity_descriptor *desc = kzalloc(sizeof(*desc), GFP_KERNEL); |
| + int err; |
| + |
| + if (!desc) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + *desc = (struct fsverity_descriptor) { |
| + .version = 1, |
| + .hash_algorithm = hash_algorithm, |
| + .log_blocksize = ilog2(INCFS_DATA_FILE_BLOCK_SIZE), |
| + .data_size = cpu_to_le64(inode->i_size), |
| + }; |
| + |
| + err = incfs_get_root_hash(filp, desc->root_hash); |
| + if (err) { |
| + kfree(desc); |
| + return ERR_PTR(err); |
| + } |
| + |
| + return desc; |
| +} |
| + |
| +static struct mem_range incfs_calc_verity_digest( |
| + struct inode *inode, struct file *filp, |
| + int hash_algorithm) |
| +{ |
| + struct fsverity_descriptor *desc = incfs_get_fsverity_descriptor(filp, |
| + hash_algorithm); |
| + struct mem_range verity_file_digest; |
| + |
| + if (IS_ERR(desc)) |
| + return range((u8 *)desc, 0); |
| + verity_file_digest = incfs_calc_verity_digest_from_desc(inode, desc); |
| + kfree(desc); |
| + return verity_file_digest; |
| +} |
| + |
| +static int incfs_build_merkle_tree(struct file *f, struct data_file *df, |
| + struct backing_file_context *bfc, |
| + struct mtree *hash_tree, loff_t hash_offset, |
| + struct incfs_hash_alg *alg, struct mem_range hash) |
| +{ |
| + int error = 0; |
| + int limit, lvl, i, result; |
| + struct mem_range buf = {.len = INCFS_DATA_FILE_BLOCK_SIZE}; |
| + struct mem_range tmp = {.len = 2 * INCFS_DATA_FILE_BLOCK_SIZE}; |
| + |
| + buf.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(buf.len)); |
| + tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); |
| + if (!buf.data || !tmp.data) { |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + /* |
| + * lvl - 1 is the level we are reading, lvl the level we are writing |
| + * lvl == -1 means actual blocks |
| + * lvl == hash_tree->depth means root hash |
| + */ |
| + limit = df->df_data_block_count; |
| + for (lvl = 0; lvl <= hash_tree->depth; lvl++) { |
| + for (i = 0; i < limit; ++i) { |
| + loff_t hash_level_offset; |
| + struct mem_range partial_buf = buf; |
| + |
| + if (lvl == 0) |
| + result = incfs_read_data_file_block(partial_buf, |
| + f, i, tmp, NULL, NULL); |
| + else { |
| + hash_level_offset = hash_offset + |
| + hash_tree->hash_level_suboffset[lvl - 1]; |
| + |
| + result = incfs_kread(bfc, partial_buf.data, |
| + partial_buf.len, |
| + hash_level_offset + i * |
| + INCFS_DATA_FILE_BLOCK_SIZE); |
| + } |
| + |
| + if (result < 0) { |
| + error = result; |
| + goto out; |
| + } |
| + |
| + partial_buf.len = result; |
| + error = incfs_calc_digest(alg, partial_buf, hash); |
| + if (error) |
| + goto out; |
| + |
| + /* |
| + * last level - only one hash to take and it is stored |
| + * in the incfs signature record |
| + */ |
| + if (lvl == hash_tree->depth) |
| + break; |
| + |
| + hash_level_offset = hash_offset + |
| + hash_tree->hash_level_suboffset[lvl]; |
| + |
| + result = incfs_kwrite(bfc, hash.data, hash.len, |
| + hash_level_offset + hash.len * i); |
| + |
| + if (result < 0) { |
| + error = result; |
| + goto out; |
| + } |
| + |
| + if (result != hash.len) { |
| + error = -EIO; |
| + goto out; |
| + } |
| + } |
| + limit = DIV_ROUND_UP(limit, |
| + INCFS_DATA_FILE_BLOCK_SIZE / hash.len); |
| + } |
| + |
| +out: |
| + free_pages((unsigned long)tmp.data, get_order(tmp.len)); |
| + free_pages((unsigned long)buf.data, get_order(buf.len)); |
| + return error; |
| +} |
| + |
| +/* |
| + * incfs files have a signature record that is separate from the |
| + * verity_signature record. The signature record does not actually contain a |
| + * signature, rather it contains the size/offset of the hash tree, and a binary |
| + * blob which contains the root hash and potentially a signature. |
| + * |
| + * If the file was created with a signature record, then this function simply |
| + * returns. |
| + * |
| + * Otherwise it will create a signature record with a minimal binary blob as |
| + * defined by the structure below, create space for the hash tree and then |
| + * populate it using incfs_build_merkle_tree |
| + */ |
| +static int incfs_add_signature_record(struct file *f) |
| +{ |
| + /* See incfs_parse_signature */ |
| + struct { |
| + __le32 version; |
| + __le32 size_of_hash_info_section; |
| + struct { |
| + __le32 hash_algorithm; |
| + u8 log2_blocksize; |
| + __le32 salt_size; |
| + u8 salt[0]; |
| + __le32 hash_size; |
| + u8 root_hash[32]; |
| + } __packed hash_section; |
| + __le32 size_of_signing_info_section; |
| + u8 signing_info_section[0]; |
| + } __packed sig = { |
| + .version = cpu_to_le32(INCFS_SIGNATURE_VERSION), |
| + .size_of_hash_info_section = |
| + cpu_to_le32(sizeof(sig.hash_section)), |
| + .hash_section = { |
| + .hash_algorithm = cpu_to_le32(INCFS_HASH_TREE_SHA256), |
| + .log2_blocksize = ilog2(INCFS_DATA_FILE_BLOCK_SIZE), |
| + .hash_size = cpu_to_le32(SHA256_DIGEST_SIZE), |
| + }, |
| + }; |
| + |
| + struct data_file *df = get_incfs_data_file(f); |
| + struct mtree *hash_tree = NULL; |
| + struct backing_file_context *bfc; |
| + int error; |
| + loff_t hash_offset, sig_offset; |
| + struct incfs_hash_alg *alg = incfs_get_hash_alg(INCFS_HASH_TREE_SHA256); |
| + u8 hash_buf[INCFS_MAX_HASH_SIZE]; |
| + int hash_size = alg->digest_size; |
| + struct mem_range hash = range(hash_buf, hash_size); |
| + int result; |
| + struct incfs_df_signature *signature = NULL; |
| + |
| + if (!df) |
| + return -EINVAL; |
| + |
| + if (df->df_header_flags & INCFS_FILE_MAPPED) |
| + return -EINVAL; |
| + |
| + /* Already signed? */ |
| + if (df->df_signature && df->df_hash_tree) |
| + return 0; |
| + |
| + if (df->df_signature || df->df_hash_tree) |
| + return -EFSCORRUPTED; |
| + |
| + /* Add signature metadata record to file */ |
| + hash_tree = incfs_alloc_mtree(range((u8 *)&sig, sizeof(sig)), |
| + df->df_data_block_count); |
| + if (IS_ERR(hash_tree)) |
| + return PTR_ERR(hash_tree); |
| + |
| + bfc = df->df_backing_file_context; |
| + if (!bfc) { |
| + error = -EFSCORRUPTED; |
| + goto out; |
| + } |
| + |
| + error = mutex_lock_interruptible(&bfc->bc_mutex); |
| + if (error) |
| + goto out; |
| + |
| + error = incfs_write_signature_to_backing_file(bfc, |
| + range((u8 *)&sig, sizeof(sig)), |
| + hash_tree->hash_tree_area_size, |
| + &hash_offset, &sig_offset); |
| + mutex_unlock(&bfc->bc_mutex); |
| + if (error) |
| + goto out; |
| + |
| + /* Populate merkle tree */ |
| + error = incfs_build_merkle_tree(f, df, bfc, hash_tree, hash_offset, alg, |
| + hash); |
| + if (error) |
| + goto out; |
| + |
| + /* Update signature metadata record */ |
| + memcpy(sig.hash_section.root_hash, hash.data, alg->digest_size); |
| + result = incfs_kwrite(bfc, &sig, sizeof(sig), sig_offset); |
| + if (result < 0) { |
| + error = result; |
| + goto out; |
| + } |
| + |
| + if (result != sizeof(sig)) { |
| + error = -EIO; |
| + goto out; |
| + } |
| + |
| + /* Update in-memory records */ |
| + memcpy(hash_tree->root_hash, hash.data, alg->digest_size); |
| + signature = kzalloc(sizeof(*signature), GFP_NOFS); |
| + if (!signature) { |
| + error = -ENOMEM; |
| + goto out; |
| + } |
| + *signature = (struct incfs_df_signature) { |
| + .hash_offset = hash_offset, |
| + .hash_size = hash_tree->hash_tree_area_size, |
| + .sig_offset = sig_offset, |
| + .sig_size = sizeof(sig), |
| + }; |
| + df->df_signature = signature; |
| + signature = NULL; |
| + |
| + /* |
| + * Use memory barrier to prevent readpage seeing the hash tree until |
| + * it's fully there |
| + */ |
| + smp_store_release(&df->df_hash_tree, hash_tree); |
| + hash_tree = NULL; |
| + |
| +out: |
| + kfree(signature); |
| + kfree(hash_tree); |
| + return error; |
| +} |
| + |
| +static int incfs_enable_verity(struct file *filp, |
| + const struct fsverity_enable_arg *arg) |
| +{ |
| + struct inode *inode = file_inode(filp); |
| + struct data_file *df = get_incfs_data_file(filp); |
| + u8 *signature = NULL; |
| + struct mem_range verity_file_digest = range(NULL, 0); |
| + int err; |
| + |
| + if (!df) |
| + return -EFSCORRUPTED; |
| + |
| + err = mutex_lock_interruptible(&df->df_enable_verity); |
| + if (err) |
| + return err; |
| + |
| + if (IS_VERITY(inode)) { |
| + err = -EEXIST; |
| + goto out; |
| + } |
| + |
| + err = incfs_add_signature_record(filp); |
| + if (err) |
| + goto out; |
| + |
| + /* Get the signature if the user provided one */ |
| + if (arg->sig_size) { |
| + signature = memdup_user(u64_to_user_ptr(arg->sig_ptr), |
| + arg->sig_size); |
| + if (IS_ERR(signature)) { |
| + err = PTR_ERR(signature); |
| + signature = NULL; |
| + goto out; |
| + } |
| + } |
| + |
| + verity_file_digest = incfs_calc_verity_digest(inode, filp, |
| + arg->hash_algorithm); |
| + if (IS_ERR(verity_file_digest.data)) { |
| + err = PTR_ERR(verity_file_digest.data); |
| + verity_file_digest.data = NULL; |
| + goto out; |
| + } |
| + |
| + err = incfs_end_enable_verity(filp, signature, arg->sig_size); |
| + if (err) |
| + goto out; |
| + |
| + /* Successfully enabled verity */ |
| + incfs_set_verity_digest(inode, verity_file_digest); |
| + verity_file_digest.data = NULL; |
| +out: |
| + mutex_unlock(&df->df_enable_verity); |
| + kfree(signature); |
| + kfree(verity_file_digest.data); |
| + if (err) |
| + pr_err("%s failed with err %d\n", __func__, err); |
| + return err; |
| +} |
| + |
| +int incfs_ioctl_enable_verity(struct file *filp, const void __user *uarg) |
| +{ |
| + struct inode *inode = file_inode(filp); |
| + struct fsverity_enable_arg arg; |
| + |
| + if (copy_from_user(&arg, uarg, sizeof(arg))) |
| + return -EFAULT; |
| + |
| + if (arg.version != 1) |
| + return -EINVAL; |
| + |
| + if (arg.__reserved1 || |
| + memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2))) |
| + return -EINVAL; |
| + |
| + if (arg.hash_algorithm != FS_VERITY_HASH_ALG_SHA256) |
| + return -EINVAL; |
| + |
| + if (arg.block_size != PAGE_SIZE) |
| + return -EINVAL; |
| + |
| + if (arg.salt_size) |
| + return -EINVAL; |
| + |
| + if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE) |
| + return -EMSGSIZE; |
| + |
| + if (S_ISDIR(inode->i_mode)) |
| + return -EISDIR; |
| + |
| + if (!S_ISREG(inode->i_mode)) |
| + return -EINVAL; |
| + |
| + return incfs_enable_verity(filp, &arg); |
| +} |
| + |
| +static u8 *incfs_get_verity_signature(struct file *filp, size_t *sig_size) |
| +{ |
| + struct data_file *df = get_incfs_data_file(filp); |
| + struct incfs_df_verity_signature *vs; |
| + u8 *signature; |
| + int res; |
| + |
| + if (!df || !df->df_backing_file_context) |
| + return ERR_PTR(-EFSCORRUPTED); |
| + |
| + vs = df->df_verity_signature; |
| + if (!vs) { |
| + *sig_size = 0; |
| + return NULL; |
| + } |
| + |
| + if (!vs->size) { |
| + *sig_size = 0; |
| + return ERR_PTR(-EFSCORRUPTED); |
| + } |
| + |
| + signature = kzalloc(vs->size, GFP_KERNEL); |
| + if (!signature) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + res = incfs_kread(df->df_backing_file_context, |
| + signature, vs->size, vs->offset); |
| + |
| + if (res < 0) |
| + goto err_out; |
| + |
| + if (res != vs->size) { |
| + res = -EINVAL; |
| + goto err_out; |
| + } |
| + |
| + *sig_size = vs->size; |
| + return signature; |
| + |
| +err_out: |
| + kfree(signature); |
| + return ERR_PTR(res); |
| +} |
| + |
| +/* Ensure data_file->df_verity_file_digest is populated */ |
| +static int ensure_verity_info(struct inode *inode, struct file *filp) |
| +{ |
| + struct mem_range verity_file_digest; |
| + |
| + /* See if this file's verity file digest is already cached */ |
| + verity_file_digest = incfs_get_verity_digest(inode); |
| + if (verity_file_digest.data) |
| + return 0; |
| + |
| + verity_file_digest = incfs_calc_verity_digest(inode, filp, |
| + FS_VERITY_HASH_ALG_SHA256); |
| + if (IS_ERR(verity_file_digest.data)) |
| + return PTR_ERR(verity_file_digest.data); |
| + |
| + incfs_set_verity_digest(inode, verity_file_digest); |
| + return 0; |
| +} |
| + |
| +/** |
| + * incfs_fsverity_file_open() - prepare to open a file that may be |
| + * verity-enabled |
| + * @inode: the inode being opened |
| + * @filp: the struct file being set up |
| + * |
| + * When opening a verity file, set up data_file->df_verity_file_digest if not |
| + * already done. Note that incfs does not allow opening for writing, so there is |
| + * no need for that check. |
| + * |
| + * Return: 0 on success, -errno on failure |
| + */ |
| +int incfs_fsverity_file_open(struct inode *inode, struct file *filp) |
| +{ |
| + if (IS_VERITY(inode)) |
| + return ensure_verity_info(inode, filp); |
| + |
| + return 0; |
| +} |
| + |
| +int incfs_ioctl_measure_verity(struct file *filp, void __user *_uarg) |
| +{ |
| + struct inode *inode = file_inode(filp); |
| + struct mem_range verity_file_digest = incfs_get_verity_digest(inode); |
| + struct fsverity_digest __user *uarg = _uarg; |
| + struct fsverity_digest arg; |
| + |
| + if (!verity_file_digest.data || !verity_file_digest.len) |
| + return -ENODATA; /* not a verity file */ |
| + |
| + /* |
| + * The user specifies the digest_size their buffer has space for; we can |
| + * return the digest if it fits in the available space. We write back |
| + * the actual size, which may be shorter than the user-specified size. |
| + */ |
| + |
| + if (get_user(arg.digest_size, &uarg->digest_size)) |
| + return -EFAULT; |
| + if (arg.digest_size < verity_file_digest.len) |
| + return -EOVERFLOW; |
| + |
| + memset(&arg, 0, sizeof(arg)); |
| + arg.digest_algorithm = FS_VERITY_HASH_ALG_SHA256; |
| + arg.digest_size = verity_file_digest.len; |
| + |
| + if (copy_to_user(uarg, &arg, sizeof(arg))) |
| + return -EFAULT; |
| + |
| + if (copy_to_user(uarg->digest, verity_file_digest.data, |
| + verity_file_digest.len)) |
| + return -EFAULT; |
| + |
| + return 0; |
| +} |
| + |
| +static int incfs_read_merkle_tree(struct file *filp, void __user *buf, |
| + u64 start_offset, int length) |
| +{ |
| + struct mem_range tmp_buf; |
| + size_t offset; |
| + int retval = 0; |
| + int err = 0; |
| + struct data_file *df = get_incfs_data_file(filp); |
| + |
| + if (!df) |
| + return -EINVAL; |
| + |
| + tmp_buf = (struct mem_range) { |
| + .data = kzalloc(INCFS_DATA_FILE_BLOCK_SIZE, GFP_NOFS), |
| + .len = INCFS_DATA_FILE_BLOCK_SIZE, |
| + }; |
| + if (!tmp_buf.data) |
| + return -ENOMEM; |
| + |
| + for (offset = start_offset; offset < start_offset + length; |
| + offset += tmp_buf.len) { |
| + err = incfs_read_merkle_tree_blocks(tmp_buf, df, offset); |
| + |
| + if (err < 0) |
| + break; |
| + |
| + if (err != tmp_buf.len) |
| + break; |
| + |
| + if (copy_to_user(buf, tmp_buf.data, tmp_buf.len)) |
| + break; |
| + |
| + buf += tmp_buf.len; |
| + retval += tmp_buf.len; |
| + } |
| + |
| + kfree(tmp_buf.data); |
| + return retval ? retval : err; |
| +} |
| + |
| +static int incfs_read_descriptor(struct file *filp, |
| + void __user *buf, u64 offset, int length) |
| +{ |
| + int err; |
| + struct fsverity_descriptor *desc = incfs_get_fsverity_descriptor(filp, |
| + FS_VERITY_HASH_ALG_SHA256); |
| + |
| + if (IS_ERR(desc)) |
| + return PTR_ERR(desc); |
| + length = min_t(u64, length, sizeof(*desc)); |
| + err = copy_to_user(buf, desc, length); |
| + kfree(desc); |
| + return err ? err : length; |
| +} |
| + |
| +static int incfs_read_signature(struct file *filp, |
| + void __user *buf, u64 offset, int length) |
| +{ |
| + size_t sig_size; |
| + static u8 *signature; |
| + int err; |
| + |
| + signature = incfs_get_verity_signature(filp, &sig_size); |
| + if (IS_ERR(signature)) |
| + return PTR_ERR(signature); |
| + |
| + if (!signature) |
| + return -ENODATA; |
| + |
| + length = min_t(u64, length, sig_size); |
| + err = copy_to_user(buf, signature, length); |
| + kfree(signature); |
| + return err ? err : length; |
| +} |
| + |
| +int incfs_ioctl_read_verity_metadata(struct file *filp, |
| + const void __user *uarg) |
| +{ |
| + struct fsverity_read_metadata_arg arg; |
| + int length; |
| + void __user *buf; |
| + |
| + if (copy_from_user(&arg, uarg, sizeof(arg))) |
| + return -EFAULT; |
| + |
| + if (arg.__reserved) |
| + return -EINVAL; |
| + |
| + /* offset + length must not overflow. */ |
| + if (arg.offset + arg.length < arg.offset) |
| + return -EINVAL; |
| + |
| + /* Ensure that the return value will fit in INT_MAX. */ |
| + length = min_t(u64, arg.length, INT_MAX); |
| + |
| + buf = u64_to_user_ptr(arg.buf_ptr); |
| + |
| + switch (arg.metadata_type) { |
| + case FS_VERITY_METADATA_TYPE_MERKLE_TREE: |
| + return incfs_read_merkle_tree(filp, buf, arg.offset, length); |
| + case FS_VERITY_METADATA_TYPE_DESCRIPTOR: |
| + return incfs_read_descriptor(filp, buf, arg.offset, length); |
| + case FS_VERITY_METADATA_TYPE_SIGNATURE: |
| + return incfs_read_signature(filp, buf, arg.offset, length); |
| + default: |
| + return -EINVAL; |
| + } |
| +} |
| diff --git a/fs/incfs/verity.h b/fs/incfs/verity.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/verity.h |
| @@ -0,0 +1,49 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2020 Google LLC |
| + */ |
| + |
| +#ifndef _INCFS_VERITY_H |
| +#define _INCFS_VERITY_H |
| + |
| +/* Arbitrary limit to bound the kmalloc() size. Can be changed. */ |
| +#define FS_VERITY_MAX_SIGNATURE_SIZE 16128 |
| + |
| +#ifdef CONFIG_FS_VERITY |
| + |
| +int incfs_ioctl_enable_verity(struct file *filp, const void __user *uarg); |
| +int incfs_ioctl_measure_verity(struct file *filp, void __user *_uarg); |
| + |
| +int incfs_fsverity_file_open(struct inode *inode, struct file *filp); |
| +int incfs_ioctl_read_verity_metadata(struct file *filp, |
| + const void __user *uarg); |
| + |
| +#else /* !CONFIG_FS_VERITY */ |
| + |
| +static inline int incfs_ioctl_enable_verity(struct file *filp, |
| + const void __user *uarg) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +static inline int incfs_ioctl_measure_verity(struct file *filp, |
| + void __user *_uarg) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +static inline int incfs_fsverity_file_open(struct inode *inode, |
| + struct file *filp) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +static inline int incfs_ioctl_read_verity_metadata(struct file *filp, |
| + const void __user *uarg) |
| +{ |
| + return -EOPNOTSUPP; |
| +} |
| + |
| +#endif /* !CONFIG_FS_VERITY */ |
| + |
| +#endif |
| diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/vfs.c |
| @@ -0,0 +1,1994 @@ |
| +// SPDX-License-Identifier: GPL-2.0 |
| +/* |
| + * Copyright 2018 Google LLC |
| + */ |
| + |
| +#include <linux/blkdev.h> |
| +#include <linux/compat.h> |
| +#include <linux/delay.h> |
| +#include <linux/file.h> |
| +#include <linux/fs.h> |
| +#include <linux/fs_stack.h> |
| +#include <linux/fsnotify.h> |
| +#include <linux/fsverity.h> |
| +#include <linux/mmap_lock.h> |
| +#include <linux/namei.h> |
| +#include <linux/pagemap.h> |
| +#include <linux/parser.h> |
| +#include <linux/seq_file.h> |
| +#include <linux/backing-dev-defs.h> |
| + |
| +#include <uapi/linux/incrementalfs.h> |
| + |
| +#include "vfs.h" |
| + |
| +#include "data_mgmt.h" |
| +#include "format.h" |
| +#include "internal.h" |
| +#include "pseudo_files.h" |
| +#include "sysfs.h" |
| +#include "verity.h" |
| + |
| +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); |
| + |
| +static int dentry_revalidate(struct dentry *dentry, unsigned int flags); |
| +static void dentry_release(struct dentry *d); |
| + |
| +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx); |
| +static struct dentry *dir_lookup(struct inode *dir_inode, |
| + struct dentry *dentry, unsigned int flags); |
| +static int dir_mkdir(struct mnt_idmap *idmap, struct inode *dir, |
| + struct dentry *dentry, umode_t mode); |
| +static int dir_unlink(struct inode *dir, struct dentry *dentry); |
| +static int dir_link(struct dentry *old_dentry, struct inode *dir, |
| + struct dentry *new_dentry); |
| +static int dir_rmdir(struct inode *dir, struct dentry *dentry); |
| +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, |
| + struct inode *new_dir, struct dentry *new_dentry, |
| + unsigned int flags); |
| + |
| +static int file_open(struct inode *inode, struct file *file); |
| +static int file_release(struct inode *inode, struct file *file); |
| +static int read_folio(struct file *f, struct folio *folio); |
| +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); |
| + |
| +#ifdef CONFIG_COMPAT |
| +static long incfs_compat_ioctl(struct file *file, unsigned int cmd, |
| + unsigned long arg); |
| +#endif |
| + |
| +static struct inode *alloc_inode(struct super_block *sb); |
| +static void free_inode(struct inode *inode); |
| +static void evict_inode(struct inode *inode); |
| + |
| +static int incfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, |
| + struct iattr *ia); |
| +static int incfs_getattr(struct mnt_idmap *idmap, const struct path *path, |
| + struct kstat *stat, u32 request_mask, |
| + unsigned int query_flags); |
| +static ssize_t incfs_getxattr(struct dentry *d, const char *name, |
| + void *value, size_t size); |
| +static ssize_t incfs_setxattr(struct mnt_idmap *idmap, struct dentry *d, |
| + const char *name, void *value, size_t size, |
| + int flags); |
| +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); |
| + |
| +static int show_options(struct seq_file *, struct dentry *); |
| + |
| +static const struct super_operations incfs_super_ops = { |
| + .statfs = simple_statfs, |
| + .remount_fs = incfs_remount_fs, |
| + .alloc_inode = alloc_inode, |
| + .destroy_inode = free_inode, |
| + .evict_inode = evict_inode, |
| + .show_options = show_options |
| +}; |
| + |
| +static int dir_rename_wrap(struct mnt_idmap *idmap, struct inode *old_dir, |
| + struct dentry *old_dentry, struct inode *new_dir, |
| + struct dentry *new_dentry, unsigned int flags) |
| +{ |
| + return dir_rename(old_dir, old_dentry, new_dir, new_dentry, flags); |
| +} |
| + |
| +static const struct inode_operations incfs_dir_inode_ops = { |
| + .lookup = dir_lookup, |
| + .mkdir = dir_mkdir, |
| + .rename = dir_rename_wrap, |
| + .unlink = dir_unlink, |
| + .link = dir_link, |
| + .rmdir = dir_rmdir, |
| + .setattr = incfs_setattr, |
| +}; |
| + |
| +WRAP_DIR_ITER(iterate_incfs_dir) // FIXME! |
| +static const struct file_operations incfs_dir_fops = { |
| + .llseek = generic_file_llseek, |
| + .read = generic_read_dir, |
| + .iterate_shared = shared_iterate_incfs_dir, |
| + .open = file_open, |
| + .release = file_release, |
| +}; |
| + |
| +static const struct dentry_operations incfs_dentry_ops = { |
| + .d_revalidate = dentry_revalidate, |
| + .d_release = dentry_release |
| +}; |
| + |
| +static const struct address_space_operations incfs_address_space_ops = { |
| + .read_folio = read_folio, |
| + /* .readpages = readpages */ |
| +}; |
| + |
| +static vm_fault_t incfs_fault(struct vm_fault *vmf) |
| +{ |
| + vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY; |
| + return filemap_fault(vmf); |
| +} |
| + |
| +static const struct vm_operations_struct incfs_file_vm_ops = { |
| + .fault = incfs_fault, |
| + .map_pages = filemap_map_pages, |
| + .page_mkwrite = filemap_page_mkwrite, |
| +}; |
| + |
| +/* This is used for a general mmap of a disk file */ |
| + |
| +static int incfs_file_mmap(struct file *file, struct vm_area_struct *vma) |
| +{ |
| + struct address_space *mapping = file->f_mapping; |
| + |
| + if (!mapping->a_ops->read_folio) |
| + return -ENOEXEC; |
| + file_accessed(file); |
| + vma->vm_ops = &incfs_file_vm_ops; |
| + return 0; |
| +} |
| + |
| +const struct file_operations incfs_file_ops = { |
| + .open = file_open, |
| + .release = file_release, |
| + .read_iter = generic_file_read_iter, |
| + .mmap = incfs_file_mmap, |
| + .splice_read = filemap_splice_read, |
| + .llseek = generic_file_llseek, |
| + .unlocked_ioctl = dispatch_ioctl, |
| +#ifdef CONFIG_COMPAT |
| + .compat_ioctl = incfs_compat_ioctl, |
| +#endif |
| +}; |
| + |
| +const struct inode_operations incfs_file_inode_ops = { |
| + .setattr = incfs_setattr, |
| + .getattr = incfs_getattr, |
| + .listxattr = incfs_listxattr |
| +}; |
| + |
| +static int incfs_handler_getxattr(const struct xattr_handler *xh, |
| + struct dentry *d, struct inode *inode, |
| + const char *name, void *buffer, size_t size) |
| +{ |
| + return incfs_getxattr(d, name, buffer, size); |
| +} |
| + |
| +static int incfs_handler_setxattr(const struct xattr_handler *xh, |
| + struct mnt_idmap *idmap, |
| + struct dentry *d, struct inode *inode, |
| + const char *name, const void *buffer, |
| + size_t size, int flags) |
| +{ |
| + return incfs_setxattr(idmap, d, name, (void *)buffer, size, flags); |
| +} |
| + |
| +static const struct xattr_handler incfs_xattr_handler = { |
| + .prefix = "", /* AKA all attributes */ |
| + .get = incfs_handler_getxattr, |
| + .set = incfs_handler_setxattr, |
| +}; |
| + |
| +static const struct xattr_handler *incfs_xattr_ops[] = { |
| + &incfs_xattr_handler, |
| + NULL, |
| +}; |
| + |
| +struct inode_search { |
| + unsigned long ino; |
| + |
| + struct dentry *backing_dentry; |
| + |
| + size_t size; |
| + |
| + bool verity; |
| +}; |
| + |
| +enum parse_parameter { |
| + Opt_read_timeout, |
| + Opt_readahead_pages, |
| + Opt_rlog_pages, |
| + Opt_rlog_wakeup_cnt, |
| + Opt_report_uid, |
| + Opt_sysfs_name, |
| + Opt_err |
| +}; |
| + |
| +static const match_table_t option_tokens = { |
| + { Opt_read_timeout, "read_timeout_ms=%u" }, |
| + { Opt_readahead_pages, "readahead=%u" }, |
| + { Opt_rlog_pages, "rlog_pages=%u" }, |
| + { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, |
| + { Opt_report_uid, "report_uid" }, |
| + { Opt_sysfs_name, "sysfs_name=%s" }, |
| + { Opt_err, NULL } |
| +}; |
| + |
| +static void free_options(struct mount_options *opts) |
| +{ |
| + kfree(opts->sysfs_name); |
| + opts->sysfs_name = NULL; |
| +} |
| + |
| +static int parse_options(struct mount_options *opts, char *str) |
| +{ |
| + substring_t args[MAX_OPT_ARGS]; |
| + int value; |
| + char *position; |
| + |
| + if (opts == NULL) |
| + return -EFAULT; |
| + |
| + *opts = (struct mount_options) { |
| + .read_timeout_ms = 1000, /* Default: 1s */ |
| + .readahead_pages = 10, |
| + .read_log_pages = 2, |
| + .read_log_wakeup_count = 10, |
| + }; |
| + |
| + if (str == NULL || *str == 0) |
| + return 0; |
| + |
| + while ((position = strsep(&str, ",")) != NULL) { |
| + int token; |
| + |
| + if (!*position) |
| + continue; |
| + |
| + token = match_token(position, option_tokens, args); |
| + |
| + switch (token) { |
| + case Opt_read_timeout: |
| + if (match_int(&args[0], &value)) |
| + return -EINVAL; |
| + if (value > 3600000) |
| + return -EINVAL; |
| + opts->read_timeout_ms = value; |
| + break; |
| + case Opt_readahead_pages: |
| + if (match_int(&args[0], &value)) |
| + return -EINVAL; |
| + opts->readahead_pages = value; |
| + break; |
| + case Opt_rlog_pages: |
| + if (match_int(&args[0], &value)) |
| + return -EINVAL; |
| + opts->read_log_pages = value; |
| + break; |
| + case Opt_rlog_wakeup_cnt: |
| + if (match_int(&args[0], &value)) |
| + return -EINVAL; |
| + opts->read_log_wakeup_count = value; |
| + break; |
| + case Opt_report_uid: |
| + opts->report_uid = true; |
| + break; |
| + case Opt_sysfs_name: |
| + opts->sysfs_name = match_strdup(&args[0]); |
| + break; |
| + default: |
| + free_options(opts); |
| + return -EINVAL; |
| + } |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +/* Read file size from the attribute. Quicker than reading the header */ |
| +static u64 read_size_attr(struct dentry *backing_dentry) |
| +{ |
| + __le64 attr_value; |
| + ssize_t bytes_read; |
| + |
| + bytes_read = vfs_getxattr(&nop_mnt_idmap, backing_dentry, INCFS_XATTR_SIZE_NAME, |
| + (char *)&attr_value, sizeof(attr_value)); |
| + |
| + if (bytes_read != sizeof(attr_value)) |
| + return 0; |
| + |
| + return le64_to_cpu(attr_value); |
| +} |
| + |
| +/* Read verity flag from the attribute. Quicker than reading the header */ |
| +static bool read_verity_attr(struct dentry *backing_dentry) |
| +{ |
| + return vfs_getxattr(&nop_mnt_idmap, backing_dentry, INCFS_XATTR_VERITY_NAME, NULL, 0) |
| + >= 0; |
| +} |
| + |
| +static int inode_test(struct inode *inode, void *opaque) |
| +{ |
| + struct inode_search *search = opaque; |
| + struct inode_info *node = get_incfs_node(inode); |
| + struct inode *backing_inode = d_inode(search->backing_dentry); |
| + |
| + if (!node) |
| + return 0; |
| + |
| + return node->n_backing_inode == backing_inode && |
| + inode->i_ino == search->ino; |
| +} |
| + |
| +static int inode_set(struct inode *inode, void *opaque) |
| +{ |
| + struct inode_search *search = opaque; |
| + struct inode_info *node = get_incfs_node(inode); |
| + struct dentry *backing_dentry = search->backing_dentry; |
| + struct inode *backing_inode = d_inode(backing_dentry); |
| + |
| + fsstack_copy_attr_all(inode, backing_inode); |
| + if (S_ISREG(inode->i_mode)) { |
| + u64 size = search->size; |
| + |
| + inode->i_size = size; |
| + inode->i_blocks = get_blocks_count_for_size(size); |
| + inode->i_mapping->a_ops = &incfs_address_space_ops; |
| + inode->i_op = &incfs_file_inode_ops; |
| + inode->i_fop = &incfs_file_ops; |
| + inode->i_mode &= ~0222; |
| + if (search->verity) |
| + inode_set_flags(inode, S_VERITY, S_VERITY); |
| + } else if (S_ISDIR(inode->i_mode)) { |
| + inode->i_size = 0; |
| + inode->i_blocks = 1; |
| + inode->i_mapping->a_ops = &incfs_address_space_ops; |
| + inode->i_op = &incfs_dir_inode_ops; |
| + inode->i_fop = &incfs_dir_fops; |
| + } else { |
| + pr_warn_once("incfs: Unexpected inode type\n"); |
| + return -EBADF; |
| + } |
| + |
| + ihold(backing_inode); |
| + node->n_backing_inode = backing_inode; |
| + node->n_mount_info = get_mount_info(inode->i_sb); |
| + inode_set_ctime_to_ts(inode, inode_get_ctime(backing_inode)); |
| + inode_set_mtime_to_ts(inode, inode_get_mtime(backing_inode)); |
| + inode_set_atime_to_ts(inode, inode_get_atime(backing_inode)); |
| + inode->i_ino = backing_inode->i_ino; |
| + if (backing_inode->i_ino < INCFS_START_INO_RANGE) { |
| + pr_warn("incfs: ino conflict with backing FS %ld\n", |
| + backing_inode->i_ino); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static struct inode *fetch_regular_inode(struct super_block *sb, |
| + struct dentry *backing_dentry) |
| +{ |
| + struct inode *backing_inode = d_inode(backing_dentry); |
| + struct inode_search search = { |
| + .ino = backing_inode->i_ino, |
| + .backing_dentry = backing_dentry, |
| + .size = read_size_attr(backing_dentry), |
| + .verity = read_verity_attr(backing_dentry), |
| + }; |
| + struct inode *inode = iget5_locked(sb, search.ino, inode_test, |
| + inode_set, &search); |
| + |
| + if (!inode) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + if (inode->i_state & I_NEW) |
| + unlock_new_inode(inode); |
| + |
| + return inode; |
| +} |
| + |
| +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) |
| +{ |
| + struct dir_file *dir = get_incfs_dir_file(file); |
| + int error = 0; |
| + struct mount_info *mi = get_mount_info(file_superblock(file)); |
| + bool root; |
| + |
| + if (!dir) { |
| + error = -EBADF; |
| + goto out; |
| + } |
| + |
| + root = dir->backing_dir->f_inode |
| + == d_inode(mi->mi_backing_dir_path.dentry); |
| + |
| + if (root) { |
| + error = emit_pseudo_files(ctx); |
| + if (error) |
| + goto out; |
| + } |
| + |
| + ctx->pos -= PSEUDO_FILE_COUNT; |
| + error = iterate_dir(dir->backing_dir, ctx); |
| + ctx->pos += PSEUDO_FILE_COUNT; |
| + file->f_pos = dir->backing_dir->f_pos; |
| +out: |
| + if (error) |
| + pr_warn("incfs: %s %s %d\n", __func__, |
| + file->f_path.dentry->d_name.name, error); |
| + return error; |
| +} |
| + |
| +static int incfs_init_dentry(struct dentry *dentry, struct path *path) |
| +{ |
| + struct dentry_info *d_info = NULL; |
| + |
| + if (!dentry || !path) |
| + return -EFAULT; |
| + |
| + d_info = kzalloc(sizeof(*d_info), GFP_NOFS); |
| + if (!d_info) |
| + return -ENOMEM; |
| + |
| + d_info->backing_path = *path; |
| + path_get(path); |
| + |
| + dentry->d_fsdata = d_info; |
| + return 0; |
| +} |
| + |
| +static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, |
| + const char *name, |
| + bool *created) |
| +{ |
| + struct dentry *index_dentry; |
| + struct inode *backing_inode = d_inode(backing_dir); |
| + int err = 0; |
| + |
| + index_dentry = incfs_lookup_dentry(backing_dir, name); |
| + if (!index_dentry) { |
| + return ERR_PTR(-EINVAL); |
| + } else if (IS_ERR(index_dentry)) { |
| + return index_dentry; |
| + } else if (d_really_is_positive(index_dentry)) { |
| + /* Index already exists. */ |
| + *created = false; |
| + return index_dentry; |
| + } |
| + |
| + /* Index needs to be created. */ |
| + inode_lock_nested(backing_inode, I_MUTEX_PARENT); |
| + err = vfs_mkdir(&nop_mnt_idmap, backing_inode, index_dentry, 0777); |
| + inode_unlock(backing_inode); |
| + |
| + if (err) { |
| + dput(index_dentry); |
| + return ERR_PTR(err); |
| + } |
| + |
| + if (!d_really_is_positive(index_dentry) || |
| + unlikely(d_unhashed(index_dentry))) { |
| + dput(index_dentry); |
| + return ERR_PTR(-EINVAL); |
| + } |
| + |
| + *created = true; |
| + return index_dentry; |
| +} |
| + |
| +static int read_single_page_timeouts(struct data_file *df, struct file *f, |
| + int block_index, struct mem_range range, |
| + struct mem_range tmp, |
| + unsigned int *delayed_min_us) |
| +{ |
| + struct mount_info *mi = df->df_mount_info; |
| + struct incfs_read_data_file_timeouts timeouts = { |
| + .max_pending_time_us = U32_MAX, |
| + }; |
| + int uid = current_uid().val; |
| + int i; |
| + |
| + spin_lock(&mi->mi_per_uid_read_timeouts_lock); |
| + for (i = 0; i < mi->mi_per_uid_read_timeouts_size / |
| + sizeof(*mi->mi_per_uid_read_timeouts); ++i) { |
| + struct incfs_per_uid_read_timeouts *t = |
| + &mi->mi_per_uid_read_timeouts[i]; |
| + |
| + if(t->uid == uid) { |
| + timeouts.min_time_us = t->min_time_us; |
| + timeouts.min_pending_time_us = t->min_pending_time_us; |
| + timeouts.max_pending_time_us = t->max_pending_time_us; |
| + break; |
| + } |
| + } |
| + spin_unlock(&mi->mi_per_uid_read_timeouts_lock); |
| + if (timeouts.max_pending_time_us == U32_MAX) { |
| + u64 read_timeout_us = (u64)mi->mi_options.read_timeout_ms * |
| + 1000; |
| + |
| + timeouts.max_pending_time_us = read_timeout_us <= U32_MAX ? |
| + read_timeout_us : U32_MAX; |
| + } |
| + |
| + return incfs_read_data_file_block(range, f, block_index, tmp, |
| + &timeouts, delayed_min_us); |
| +} |
| + |
| +static int usleep_interruptible(u32 us) |
| +{ |
| + /* See: |
| + * https://www.kernel.org/doc/Documentation/timers/timers-howto.txt |
| + * for explanation |
| + */ |
| + if (us < 10) { |
| + udelay(us); |
| + return 0; |
| + } else if (us < 20000) { |
| + usleep_range(us, us + us / 10); |
| + return 0; |
| + } else |
| + return msleep_interruptible(us / 1000); |
| +} |
| + |
| +static int read_folio(struct file *f, struct folio *folio) |
| +{ |
| + struct page *page = &folio->page; |
| + loff_t offset = 0; |
| + loff_t size = 0; |
| + ssize_t bytes_to_read = 0; |
| + ssize_t read_result = 0; |
| + struct data_file *df = get_incfs_data_file(f); |
| + int result = 0; |
| + void *page_start; |
| + int block_index; |
| + unsigned int delayed_min_us = 0; |
| + |
| + if (!df) { |
| + SetPageError(page); |
| + unlock_page(page); |
| + return -EBADF; |
| + } |
| + |
| + page_start = kmap(page); |
| + offset = page_offset(page); |
| + block_index = (offset + df->df_mapped_offset) / |
| + INCFS_DATA_FILE_BLOCK_SIZE; |
| + size = df->df_size; |
| + |
| + if (offset < size) { |
| + struct mem_range tmp = { |
| + .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE |
| + }; |
| + tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); |
| + if (!tmp.data) { |
| + read_result = -ENOMEM; |
| + goto err; |
| + } |
| + bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); |
| + |
| + read_result = read_single_page_timeouts(df, f, block_index, |
| + range(page_start, bytes_to_read), tmp, |
| + &delayed_min_us); |
| + |
| + free_pages((unsigned long)tmp.data, get_order(tmp.len)); |
| + } else { |
| + bytes_to_read = 0; |
| + read_result = 0; |
| + } |
| + |
| +err: |
| + if (read_result < 0) |
| + result = read_result; |
| + else if (read_result < PAGE_SIZE) |
| + zero_user(page, read_result, PAGE_SIZE - read_result); |
| + |
| + if (result == 0) |
| + SetPageUptodate(page); |
| + else |
| + SetPageError(page); |
| + |
| + flush_dcache_page(page); |
| + kunmap(page); |
| + unlock_page(page); |
| + if (delayed_min_us) |
| + usleep_interruptible(delayed_min_us); |
| + return result; |
| +} |
| + |
| +int incfs_link(struct dentry *what, struct dentry *where) |
| +{ |
| + struct dentry *parent_dentry = dget_parent(where); |
| + struct inode *pinode = d_inode(parent_dentry); |
| + int error = 0; |
| + |
| + inode_lock_nested(pinode, I_MUTEX_PARENT); |
| + error = vfs_link(what, &nop_mnt_idmap, pinode, where, NULL); |
| + inode_unlock(pinode); |
| + |
| + dput(parent_dentry); |
| + return error; |
| +} |
| + |
| +int incfs_unlink(struct dentry *dentry) |
| +{ |
| + struct dentry *parent_dentry = dget_parent(dentry); |
| + struct inode *pinode = d_inode(parent_dentry); |
| + int error = 0; |
| + |
| + inode_lock_nested(pinode, I_MUTEX_PARENT); |
| + error = vfs_unlink(&nop_mnt_idmap, pinode, dentry, NULL); |
| + inode_unlock(pinode); |
| + |
| + dput(parent_dentry); |
| + return error; |
| +} |
| + |
| +static int incfs_rmdir(struct dentry *dentry) |
| +{ |
| + struct dentry *parent_dentry = dget_parent(dentry); |
| + struct inode *pinode = d_inode(parent_dentry); |
| + int error = 0; |
| + |
| + inode_lock_nested(pinode, I_MUTEX_PARENT); |
| + error = vfs_rmdir(&nop_mnt_idmap, pinode, dentry); |
| + inode_unlock(pinode); |
| + |
| + dput(parent_dentry); |
| + return error; |
| +} |
| + |
| +static void notify_unlink(struct dentry *dentry, const char *file_id_str, |
| + const char *special_directory) |
| +{ |
| + struct dentry *root = dentry; |
| + struct dentry *file = NULL; |
| + struct dentry *dir = NULL; |
| + int error = 0; |
| + bool take_lock = root->d_parent != root->d_parent->d_parent; |
| + |
| + while (root != root->d_parent) |
| + root = root->d_parent; |
| + |
| + if (take_lock) |
| + dir = incfs_lookup_dentry(root, special_directory); |
| + else |
| + dir = lookup_one_len(special_directory, root, |
| + strlen(special_directory)); |
| + |
| + if (IS_ERR(dir)) { |
| + error = PTR_ERR(dir); |
| + goto out; |
| + } |
| + if (d_is_negative(dir)) { |
| + error = -ENOENT; |
| + goto out; |
| + } |
| + |
| + file = incfs_lookup_dentry(dir, file_id_str); |
| + if (IS_ERR(file)) { |
| + error = PTR_ERR(file); |
| + goto out; |
| + } |
| + if (d_is_negative(file)) { |
| + error = -ENOENT; |
| + goto out; |
| + } |
| + |
| + fsnotify_unlink(d_inode(dir), file); |
| + d_delete(file); |
| + |
| +out: |
| + if (error) |
| + pr_warn("%s failed with error %d\n", __func__, error); |
| + |
| + dput(dir); |
| + dput(file); |
| +} |
| + |
| +static void handle_file_completed(struct file *f, struct data_file *df) |
| +{ |
| + struct backing_file_context *bfc; |
| + struct mount_info *mi = df->df_mount_info; |
| + char *file_id_str = NULL; |
| + struct dentry *incomplete_file_dentry = NULL; |
| + const struct cred *old_cred = override_creds(mi->mi_owner); |
| + int error; |
| + |
| + /* Truncate file to remove any preallocated space */ |
| + bfc = df->df_backing_file_context; |
| + if (bfc) { |
| + struct file *f = bfc->bc_file; |
| + |
| + if (f) { |
| + loff_t size = i_size_read(file_inode(f)); |
| + |
| + error = vfs_truncate(&f->f_path, size); |
| + if (error) |
| + /* No useful action on failure */ |
| + pr_warn("incfs: Failed to truncate complete file: %d\n", |
| + error); |
| + } |
| + } |
| + |
| + /* This is best effort - there is no useful action to take on failure */ |
| + file_id_str = file_id_to_str(df->df_id); |
| + if (!file_id_str) |
| + goto out; |
| + |
| + incomplete_file_dentry = incfs_lookup_dentry( |
| + df->df_mount_info->mi_incomplete_dir, |
| + file_id_str); |
| + if (!incomplete_file_dentry || IS_ERR(incomplete_file_dentry)) { |
| + incomplete_file_dentry = NULL; |
| + goto out; |
| + } |
| + |
| + if (!d_really_is_positive(incomplete_file_dentry)) |
| + goto out; |
| + |
| + vfs_fsync(df->df_backing_file_context->bc_file, 0); |
| + error = incfs_unlink(incomplete_file_dentry); |
| + if (error) { |
| + pr_warn("incfs: Deleting incomplete file failed: %d\n", error); |
| + goto out; |
| + } |
| + |
| + notify_unlink(f->f_path.dentry, file_id_str, INCFS_INCOMPLETE_NAME); |
| + |
| +out: |
| + dput(incomplete_file_dentry); |
| + kfree(file_id_str); |
| + revert_creds(old_cred); |
| +} |
| + |
| +static long ioctl_fill_blocks(struct file *f, void __user *arg) |
| +{ |
| + struct incfs_fill_blocks __user *usr_fill_blocks = arg; |
| + struct incfs_fill_blocks fill_blocks; |
| + struct incfs_fill_block __user *usr_fill_block_array; |
| + struct data_file *df = get_incfs_data_file(f); |
| + struct incfs_file_data *fd = f->private_data; |
| + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; |
| + u8 *data_buf = NULL; |
| + ssize_t error = 0; |
| + int i = 0; |
| + bool complete = false; |
| + |
| + if (!df) |
| + return -EBADF; |
| + |
| + if (!fd || fd->fd_fill_permission != CAN_FILL) |
| + return -EPERM; |
| + |
| + if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) |
| + return -EFAULT; |
| + |
| + usr_fill_block_array = u64_to_user_ptr(fill_blocks.fill_blocks); |
| + data_buf = (u8 *)__get_free_pages(GFP_NOFS | __GFP_COMP, |
| + get_order(data_buf_size)); |
| + if (!data_buf) |
| + return -ENOMEM; |
| + |
| + for (i = 0; i < fill_blocks.count; i++) { |
| + struct incfs_fill_block fill_block = {}; |
| + |
| + if (copy_from_user(&fill_block, &usr_fill_block_array[i], |
| + sizeof(fill_block)) > 0) { |
| + error = -EFAULT; |
| + break; |
| + } |
| + |
| + if (fill_block.data_len > data_buf_size) { |
| + error = -E2BIG; |
| + break; |
| + } |
| + |
| + if (copy_from_user(data_buf, u64_to_user_ptr(fill_block.data), |
| + fill_block.data_len) > 0) { |
| + error = -EFAULT; |
| + break; |
| + } |
| + fill_block.data = 0; /* To make sure nobody uses it. */ |
| + if (fill_block.flags & INCFS_BLOCK_FLAGS_HASH) { |
| + error = incfs_process_new_hash_block(df, &fill_block, |
| + data_buf); |
| + } else { |
| + error = incfs_process_new_data_block(df, &fill_block, |
| + data_buf, &complete); |
| + } |
| + if (error) |
| + break; |
| + } |
| + |
| + if (data_buf) |
| + free_pages((unsigned long)data_buf, get_order(data_buf_size)); |
| + |
| + if (complete) |
| + handle_file_completed(f, df); |
| + |
| + /* |
| + * Only report the error if no records were processed, otherwise |
| + * just return how many were processed successfully. |
| + */ |
| + if (i == 0) |
| + return error; |
| + |
| + return i; |
| +} |
| + |
| +static long ioctl_read_file_signature(struct file *f, void __user *arg) |
| +{ |
| + struct incfs_get_file_sig_args __user *args_usr_ptr = arg; |
| + struct incfs_get_file_sig_args args = {}; |
| + u8 *sig_buffer = NULL; |
| + size_t sig_buf_size = 0; |
| + int error = 0; |
| + int read_result = 0; |
| + struct data_file *df = get_incfs_data_file(f); |
| + |
| + if (!df) |
| + return -EINVAL; |
| + |
| + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) |
| + return -EINVAL; |
| + |
| + sig_buf_size = args.file_signature_buf_size; |
| + if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) |
| + return -E2BIG; |
| + |
| + sig_buffer = kzalloc(sig_buf_size, GFP_NOFS | __GFP_COMP); |
| + if (!sig_buffer) |
| + return -ENOMEM; |
| + |
| + read_result = incfs_read_file_signature(df, |
| + range(sig_buffer, sig_buf_size)); |
| + |
| + if (read_result < 0) { |
| + error = read_result; |
| + goto out; |
| + } |
| + |
| + if (copy_to_user(u64_to_user_ptr(args.file_signature), sig_buffer, |
| + read_result)) { |
| + error = -EFAULT; |
| + goto out; |
| + } |
| + |
| + args.file_signature_len_out = read_result; |
| + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) |
| + error = -EFAULT; |
| + |
| +out: |
| + kfree(sig_buffer); |
| + |
| + return error; |
| +} |
| + |
| +static long ioctl_get_filled_blocks(struct file *f, void __user *arg) |
| +{ |
| + struct incfs_get_filled_blocks_args __user *args_usr_ptr = arg; |
| + struct incfs_get_filled_blocks_args args = {}; |
| + struct data_file *df = get_incfs_data_file(f); |
| + struct incfs_file_data *fd = f->private_data; |
| + int error; |
| + |
| + if (!df || !fd) |
| + return -EINVAL; |
| + |
| + if (fd->fd_fill_permission != CAN_FILL) |
| + return -EPERM; |
| + |
| + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) |
| + return -EINVAL; |
| + |
| + error = incfs_get_filled_blocks(df, fd, &args); |
| + |
| + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) |
| + return -EFAULT; |
| + |
| + return error; |
| +} |
| + |
| +static long ioctl_get_block_count(struct file *f, void __user *arg) |
| +{ |
| + struct incfs_get_block_count_args __user *args_usr_ptr = arg; |
| + struct incfs_get_block_count_args args = {}; |
| + struct data_file *df = get_incfs_data_file(f); |
| + |
| + if (!df) |
| + return -EINVAL; |
| + |
| + args.total_data_blocks_out = df->df_data_block_count; |
| + args.filled_data_blocks_out = atomic_read(&df->df_data_blocks_written); |
| + args.total_hash_blocks_out = df->df_total_block_count - |
| + df->df_data_block_count; |
| + args.filled_hash_blocks_out = atomic_read(&df->df_hash_blocks_written); |
| + |
| + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) |
| + return -EFAULT; |
| + |
| + return 0; |
| +} |
| + |
| +static int incfs_ioctl_get_flags(struct file *f, void __user *arg) |
| +{ |
| + u32 flags = IS_VERITY(file_inode(f)) ? FS_VERITY_FL : 0; |
| + |
| + return put_user(flags, (int __user *) arg); |
| +} |
| + |
| +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) |
| +{ |
| + switch (req) { |
| + case INCFS_IOC_FILL_BLOCKS: |
| + return ioctl_fill_blocks(f, (void __user *)arg); |
| + case INCFS_IOC_READ_FILE_SIGNATURE: |
| + return ioctl_read_file_signature(f, (void __user *)arg); |
| + case INCFS_IOC_GET_FILLED_BLOCKS: |
| + return ioctl_get_filled_blocks(f, (void __user *)arg); |
| + case INCFS_IOC_GET_BLOCK_COUNT: |
| + return ioctl_get_block_count(f, (void __user *)arg); |
| + case FS_IOC_ENABLE_VERITY: |
| + return incfs_ioctl_enable_verity(f, (const void __user *)arg); |
| + case FS_IOC_GETFLAGS: |
| + return incfs_ioctl_get_flags(f, (void __user *) arg); |
| + case FS_IOC_MEASURE_VERITY: |
| + return incfs_ioctl_measure_verity(f, (void __user *)arg); |
| + case FS_IOC_READ_VERITY_METADATA: |
| + return incfs_ioctl_read_verity_metadata(f, (void __user *)arg); |
| + default: |
| + return -EINVAL; |
| + } |
| +} |
| + |
| +#ifdef CONFIG_COMPAT |
| +static long incfs_compat_ioctl(struct file *file, unsigned int cmd, |
| + unsigned long arg) |
| +{ |
| + switch (cmd) { |
| + case FS_IOC32_GETFLAGS: |
| + cmd = FS_IOC_GETFLAGS; |
| + break; |
| + case INCFS_IOC_FILL_BLOCKS: |
| + case INCFS_IOC_READ_FILE_SIGNATURE: |
| + case INCFS_IOC_GET_FILLED_BLOCKS: |
| + case INCFS_IOC_GET_BLOCK_COUNT: |
| + case FS_IOC_ENABLE_VERITY: |
| + case FS_IOC_MEASURE_VERITY: |
| + case FS_IOC_READ_VERITY_METADATA: |
| + break; |
| + default: |
| + return -ENOIOCTLCMD; |
| + } |
| + return dispatch_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); |
| +} |
| +#endif |
| + |
| +static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, |
| + unsigned int flags) |
| +{ |
| + struct mount_info *mi = get_mount_info(dir_inode->i_sb); |
| + struct dentry *dir_dentry = NULL; |
| + struct dentry *backing_dentry = NULL; |
| + struct path dir_backing_path = {}; |
| + struct inode_info *dir_info = get_incfs_node(dir_inode); |
| + int err = 0; |
| + |
| + if (!mi || !dir_info || !dir_info->n_backing_inode) |
| + return ERR_PTR(-EBADF); |
| + |
| + if (d_inode(mi->mi_backing_dir_path.dentry) == |
| + dir_info->n_backing_inode) { |
| + /* We do lookup in the FS root. Show pseudo files. */ |
| + err = dir_lookup_pseudo_files(dir_inode->i_sb, dentry); |
| + if (err != -ENOENT) |
| + goto out; |
| + err = 0; |
| + } |
| + |
| + dir_dentry = dget_parent(dentry); |
| + get_incfs_backing_path(dir_dentry, &dir_backing_path); |
| + backing_dentry = incfs_lookup_dentry(dir_backing_path.dentry, |
| + dentry->d_name.name); |
| + |
| + if (!backing_dentry || IS_ERR(backing_dentry)) { |
| + err = IS_ERR(backing_dentry) |
| + ? PTR_ERR(backing_dentry) |
| + : -EFAULT; |
| + backing_dentry = NULL; |
| + goto out; |
| + } else { |
| + struct inode *inode = NULL; |
| + struct path backing_path = { |
| + .mnt = dir_backing_path.mnt, |
| + .dentry = backing_dentry |
| + }; |
| + |
| + err = incfs_init_dentry(dentry, &backing_path); |
| + if (err) |
| + goto out; |
| + |
| + if (!d_really_is_positive(backing_dentry)) { |
| + /* |
| + * No such entry found in the backing dir. |
| + * Create a negative entry. |
| + */ |
| + d_add(dentry, NULL); |
| + err = 0; |
| + goto out; |
| + } |
| + |
| + if (d_inode(backing_dentry)->i_sb != |
| + dir_info->n_backing_inode->i_sb) { |
| + /* |
| + * Somehow after the path lookup we ended up in a |
| + * different fs mount. If we keep going it's going |
| + * to end badly. |
| + */ |
| + err = -EXDEV; |
| + goto out; |
| + } |
| + |
| + inode = fetch_regular_inode(dir_inode->i_sb, backing_dentry); |
| + if (IS_ERR(inode)) { |
| + err = PTR_ERR(inode); |
| + goto out; |
| + } |
| + |
| + d_add(dentry, inode); |
| + } |
| + |
| +out: |
| + dput(dir_dentry); |
| + dput(backing_dentry); |
| + path_put(&dir_backing_path); |
| + if (err) |
| + pr_debug("incfs: %s %s %d\n", __func__, |
| + dentry->d_name.name, err); |
| + return ERR_PTR(err); |
| +} |
| + |
| +static int dir_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) |
| +{ |
| + struct mount_info *mi = get_mount_info(dir->i_sb); |
| + struct inode_info *dir_node = get_incfs_node(dir); |
| + struct dentry *backing_dentry = NULL; |
| + struct path backing_path = {}; |
| + int err = 0; |
| + |
| + |
| + if (!mi || !dir_node || !dir_node->n_backing_inode) |
| + return -EBADF; |
| + |
| + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); |
| + if (err) |
| + return err; |
| + |
| + get_incfs_backing_path(dentry, &backing_path); |
| + backing_dentry = backing_path.dentry; |
| + |
| + if (!backing_dentry) { |
| + err = -EBADF; |
| + goto path_err; |
| + } |
| + |
| + if (backing_dentry->d_parent == mi->mi_index_dir) { |
| + /* Can't create a subdir inside .index */ |
| + err = -EBUSY; |
| + goto out; |
| + } |
| + |
| + if (backing_dentry->d_parent == mi->mi_incomplete_dir) { |
| + /* Can't create a subdir inside .incomplete */ |
| + err = -EBUSY; |
| + goto out; |
| + } |
| + inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); |
| + err = vfs_mkdir(idmap, dir_node->n_backing_inode, backing_dentry, mode | 0222); |
| + inode_unlock(dir_node->n_backing_inode); |
| + if (!err) { |
| + struct inode *inode = NULL; |
| + |
| + if (d_really_is_negative(backing_dentry) || |
| + unlikely(d_unhashed(backing_dentry))) { |
| + err = -EINVAL; |
| + goto out; |
| + } |
| + |
| + inode = fetch_regular_inode(dir->i_sb, backing_dentry); |
| + if (IS_ERR(inode)) { |
| + err = PTR_ERR(inode); |
| + goto out; |
| + } |
| + d_instantiate(dentry, inode); |
| + } |
| + |
| +out: |
| + if (d_really_is_negative(dentry)) |
| + d_drop(dentry); |
| + path_put(&backing_path); |
| + |
| +path_err: |
| + mutex_unlock(&mi->mi_dir_struct_mutex); |
| + if (err) |
| + pr_debug("incfs: %s err:%d\n", __func__, err); |
| + return err; |
| +} |
| + |
| +/* |
| + * Delete file referenced by backing_dentry and if appropriate its hardlink |
| + * from .index and .incomplete |
| + */ |
| +static int file_delete(struct mount_info *mi, struct dentry *dentry, |
| + struct dentry *backing_dentry, int nlink) |
| +{ |
| + struct dentry *index_file_dentry = NULL; |
| + struct dentry *incomplete_file_dentry = NULL; |
| + /* 2 chars per byte of file ID + 1 char for \0 */ |
| + char file_id_str[2 * sizeof(incfs_uuid_t) + 1] = {0}; |
| + ssize_t uuid_size = 0; |
| + int error = 0; |
| + |
| + WARN_ON(!mutex_is_locked(&mi->mi_dir_struct_mutex)); |
| + |
| + if (nlink > 3) |
| + goto just_unlink; |
| + |
| + uuid_size = vfs_getxattr(&nop_mnt_idmap, backing_dentry, INCFS_XATTR_ID_NAME, |
| + file_id_str, 2 * sizeof(incfs_uuid_t)); |
| + if (uuid_size < 0) { |
| + error = uuid_size; |
| + goto out; |
| + } |
| + |
| + if (uuid_size != 2 * sizeof(incfs_uuid_t)) { |
| + error = -EBADMSG; |
| + goto out; |
| + } |
| + |
| + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); |
| + if (IS_ERR(index_file_dentry)) { |
| + error = PTR_ERR(index_file_dentry); |
| + index_file_dentry = NULL; |
| + goto out; |
| + } |
| + |
| + if (d_really_is_positive(index_file_dentry) && nlink > 0) |
| + nlink--; |
| + |
| + if (nlink > 2) |
| + goto just_unlink; |
| + |
| + incomplete_file_dentry = incfs_lookup_dentry(mi->mi_incomplete_dir, |
| + file_id_str); |
| + if (IS_ERR(incomplete_file_dentry)) { |
| + error = PTR_ERR(incomplete_file_dentry); |
| + incomplete_file_dentry = NULL; |
| + goto out; |
| + } |
| + |
| + if (d_really_is_positive(incomplete_file_dentry) && nlink > 0) |
| + nlink--; |
| + |
| + if (nlink > 1) |
| + goto just_unlink; |
| + |
| + if (d_really_is_positive(index_file_dentry)) { |
| + error = incfs_unlink(index_file_dentry); |
| + if (error) |
| + goto out; |
| + notify_unlink(dentry, file_id_str, INCFS_INDEX_NAME); |
| + } |
| + |
| + if (d_really_is_positive(incomplete_file_dentry)) { |
| + error = incfs_unlink(incomplete_file_dentry); |
| + if (error) |
| + goto out; |
| + notify_unlink(dentry, file_id_str, INCFS_INCOMPLETE_NAME); |
| + } |
| + |
| +just_unlink: |
| + error = incfs_unlink(backing_dentry); |
| + |
| +out: |
| + dput(index_file_dentry); |
| + dput(incomplete_file_dentry); |
| + if (error) |
| + pr_debug("incfs: delete_file_from_index err:%d\n", error); |
| + return error; |
| +} |
| + |
| +static int dir_unlink(struct inode *dir, struct dentry *dentry) |
| +{ |
| + struct mount_info *mi = get_mount_info(dir->i_sb); |
| + struct path backing_path = {}; |
| + struct kstat stat; |
| + int err = 0; |
| + |
| + if (!mi) |
| + return -EBADF; |
| + |
| + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); |
| + if (err) |
| + return err; |
| + |
| + get_incfs_backing_path(dentry, &backing_path); |
| + if (!backing_path.dentry) { |
| + err = -EBADF; |
| + goto path_err; |
| + } |
| + |
| + if (backing_path.dentry->d_parent == mi->mi_index_dir) { |
| + /* Direct unlink from .index are not allowed. */ |
| + err = -EBUSY; |
| + goto out; |
| + } |
| + |
| + if (backing_path.dentry->d_parent == mi->mi_incomplete_dir) { |
| + /* Direct unlink from .incomplete are not allowed. */ |
| + err = -EBUSY; |
| + goto out; |
| + } |
| + |
| + err = vfs_getattr(&backing_path, &stat, STATX_NLINK, |
| + AT_STATX_SYNC_AS_STAT); |
| + if (err) |
| + goto out; |
| + |
| + err = file_delete(mi, dentry, backing_path.dentry, stat.nlink); |
| + |
| + d_drop(dentry); |
| +out: |
| + path_put(&backing_path); |
| +path_err: |
| + if (err) |
| + pr_debug("incfs: %s err:%d\n", __func__, err); |
| + mutex_unlock(&mi->mi_dir_struct_mutex); |
| + return err; |
| +} |
| + |
| +static int dir_link(struct dentry *old_dentry, struct inode *dir, |
| + struct dentry *new_dentry) |
| +{ |
| + struct mount_info *mi = get_mount_info(dir->i_sb); |
| + struct path backing_old_path = {}; |
| + struct path backing_new_path = {}; |
| + int error = 0; |
| + |
| + if (!mi) |
| + return -EBADF; |
| + |
| + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); |
| + if (error) |
| + return error; |
| + |
| + get_incfs_backing_path(old_dentry, &backing_old_path); |
| + get_incfs_backing_path(new_dentry, &backing_new_path); |
| + |
| + if (backing_new_path.dentry->d_parent == mi->mi_index_dir) { |
| + /* Can't link to .index */ |
| + error = -EBUSY; |
| + goto out; |
| + } |
| + |
| + if (backing_new_path.dentry->d_parent == mi->mi_incomplete_dir) { |
| + /* Can't link to .incomplete */ |
| + error = -EBUSY; |
| + goto out; |
| + } |
| + |
| + error = incfs_link(backing_old_path.dentry, backing_new_path.dentry); |
| + if (!error) { |
| + struct inode *inode = NULL; |
| + struct dentry *bdentry = backing_new_path.dentry; |
| + |
| + if (d_really_is_negative(bdentry)) { |
| + error = -EINVAL; |
| + goto out; |
| + } |
| + |
| + inode = fetch_regular_inode(dir->i_sb, bdentry); |
| + if (IS_ERR(inode)) { |
| + error = PTR_ERR(inode); |
| + goto out; |
| + } |
| + d_instantiate(new_dentry, inode); |
| + } |
| + |
| +out: |
| + path_put(&backing_old_path); |
| + path_put(&backing_new_path); |
| + if (error) |
| + pr_debug("incfs: %s err:%d\n", __func__, error); |
| + mutex_unlock(&mi->mi_dir_struct_mutex); |
| + return error; |
| +} |
| + |
| +static int dir_rmdir(struct inode *dir, struct dentry *dentry) |
| +{ |
| + struct mount_info *mi = get_mount_info(dir->i_sb); |
| + struct path backing_path = {}; |
| + int err = 0; |
| + |
| + if (!mi) |
| + return -EBADF; |
| + |
| + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); |
| + if (err) |
| + return err; |
| + |
| + get_incfs_backing_path(dentry, &backing_path); |
| + if (!backing_path.dentry) { |
| + err = -EBADF; |
| + goto path_err; |
| + } |
| + |
| + if (backing_path.dentry == mi->mi_index_dir) { |
| + /* Can't delete .index */ |
| + err = -EBUSY; |
| + goto out; |
| + } |
| + |
| + if (backing_path.dentry == mi->mi_incomplete_dir) { |
| + /* Can't delete .incomplete */ |
| + err = -EBUSY; |
| + goto out; |
| + } |
| + |
| + err = incfs_rmdir(backing_path.dentry); |
| + if (!err) |
| + d_drop(dentry); |
| +out: |
| + path_put(&backing_path); |
| + |
| +path_err: |
| + if (err) |
| + pr_debug("incfs: %s err:%d\n", __func__, err); |
| + mutex_unlock(&mi->mi_dir_struct_mutex); |
| + return err; |
| +} |
| + |
| +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, |
| + struct inode *new_dir, struct dentry *new_dentry, |
| + unsigned int flags) |
| +{ |
| + struct mount_info *mi = get_mount_info(old_dir->i_sb); |
| + struct dentry *backing_old_dentry; |
| + struct dentry *backing_new_dentry; |
| + struct dentry *backing_old_dir_dentry; |
| + struct dentry *backing_new_dir_dentry; |
| + struct inode *target_inode; |
| + struct dentry *trap; |
| + struct renamedata rd = {}; |
| + int error = 0; |
| + |
| + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); |
| + if (error) |
| + return error; |
| + |
| + backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; |
| + |
| + if (!backing_old_dentry || backing_old_dentry == mi->mi_index_dir || |
| + backing_old_dentry == mi->mi_incomplete_dir) { |
| + /* Renaming .index or .incomplete not allowed */ |
| + error = -EBUSY; |
| + goto exit; |
| + } |
| + |
| + backing_new_dentry = get_incfs_dentry(new_dentry)->backing_path.dentry; |
| + dget(backing_old_dentry); |
| + dget(backing_new_dentry); |
| + |
| + backing_old_dir_dentry = dget_parent(backing_old_dentry); |
| + backing_new_dir_dentry = dget_parent(backing_new_dentry); |
| + target_inode = d_inode(new_dentry); |
| + |
| + if (backing_old_dir_dentry == mi->mi_index_dir || |
| + backing_old_dir_dentry == mi->mi_incomplete_dir) { |
| + /* Direct moves from .index or .incomplete are not allowed. */ |
| + error = -EBUSY; |
| + goto out; |
| + } |
| + |
| + trap = lock_rename(backing_old_dir_dentry, backing_new_dir_dentry); |
| + |
| + if (trap == backing_old_dentry) { |
| + error = -EINVAL; |
| + goto unlock_out; |
| + } |
| + if (trap == backing_new_dentry) { |
| + error = -ENOTEMPTY; |
| + goto unlock_out; |
| + } |
| + |
| + rd.old_dir = d_inode(backing_old_dir_dentry); |
| + rd.old_dentry = backing_old_dentry; |
| + rd.new_dir = d_inode(backing_new_dir_dentry); |
| + rd.new_dentry = backing_new_dentry; |
| + rd.flags = flags; |
| + rd.old_mnt_idmap = &nop_mnt_idmap; |
| + rd.new_mnt_idmap = &nop_mnt_idmap; |
| + rd.delegated_inode = NULL; |
| + |
| + error = vfs_rename(&rd); |
| + if (error) |
| + goto unlock_out; |
| + if (target_inode) |
| + fsstack_copy_attr_all(target_inode, |
| + get_incfs_node(target_inode)->n_backing_inode); |
| + fsstack_copy_attr_all(new_dir, d_inode(backing_new_dir_dentry)); |
| + if (new_dir != old_dir) |
| + fsstack_copy_attr_all(old_dir, d_inode(backing_old_dir_dentry)); |
| + |
| +unlock_out: |
| + unlock_rename(backing_old_dir_dentry, backing_new_dir_dentry); |
| + |
| +out: |
| + dput(backing_new_dir_dentry); |
| + dput(backing_old_dir_dentry); |
| + dput(backing_new_dentry); |
| + dput(backing_old_dentry); |
| + |
| +exit: |
| + mutex_unlock(&mi->mi_dir_struct_mutex); |
| + if (error) |
| + pr_debug("incfs: %s err:%d\n", __func__, error); |
| + return error; |
| +} |
| + |
| + |
| +static int file_open(struct inode *inode, struct file *file) |
| +{ |
| + struct mount_info *mi = get_mount_info(inode->i_sb); |
| + struct file *backing_file = NULL; |
| + struct path backing_path = {}; |
| + int err = 0; |
| + int flags = O_NOATIME | O_LARGEFILE | |
| + (S_ISDIR(inode->i_mode) ? O_RDONLY : O_RDWR); |
| + const struct cred *old_cred; |
| + |
| + WARN_ON(file->private_data); |
| + |
| + if (!mi) |
| + return -EBADF; |
| + |
| + get_incfs_backing_path(file->f_path.dentry, &backing_path); |
| + if (!backing_path.dentry) |
| + return -EBADF; |
| + |
| + old_cred = override_creds(mi->mi_owner); |
| + backing_file = dentry_open(&backing_path, flags, current_cred()); |
| + revert_creds(old_cred); |
| + path_put(&backing_path); |
| + |
| + if (IS_ERR(backing_file)) { |
| + err = PTR_ERR(backing_file); |
| + backing_file = NULL; |
| + goto out; |
| + } |
| + |
| + if (S_ISREG(inode->i_mode)) { |
| + struct incfs_file_data *fd = kzalloc(sizeof(*fd), GFP_NOFS); |
| + |
| + if (!fd) { |
| + err = -ENOMEM; |
| + goto out; |
| + } |
| + |
| + *fd = (struct incfs_file_data) { |
| + .fd_fill_permission = CANT_FILL, |
| + }; |
| + file->private_data = fd; |
| + |
| + err = make_inode_ready_for_data_ops(mi, inode, backing_file); |
| + if (err) |
| + goto out; |
| + |
| + err = incfs_fsverity_file_open(inode, file); |
| + if (err) |
| + goto out; |
| + } else if (S_ISDIR(inode->i_mode)) { |
| + struct dir_file *dir = NULL; |
| + |
| + dir = incfs_open_dir_file(mi, backing_file); |
| + if (IS_ERR(dir)) |
| + err = PTR_ERR(dir); |
| + else |
| + file->private_data = dir; |
| + } else |
| + err = -EBADF; |
| + |
| +out: |
| + if (err) { |
| + pr_debug("name:%s err: %d\n", |
| + file->f_path.dentry->d_name.name, err); |
| + if (S_ISREG(inode->i_mode)) |
| + kfree(file->private_data); |
| + else if (S_ISDIR(inode->i_mode)) |
| + incfs_free_dir_file(file->private_data); |
| + |
| + file->private_data = NULL; |
| + } |
| + |
| + if (backing_file) |
| + fput(backing_file); |
| + return err; |
| +} |
| + |
| +static int file_release(struct inode *inode, struct file *file) |
| +{ |
| + if (S_ISREG(inode->i_mode)) { |
| + kfree(file->private_data); |
| + file->private_data = NULL; |
| + } else if (S_ISDIR(inode->i_mode)) { |
| + struct dir_file *dir = get_incfs_dir_file(file); |
| + |
| + incfs_free_dir_file(dir); |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static int dentry_revalidate(struct dentry *d, unsigned int flags) |
| +{ |
| + struct path backing_path = {}; |
| + struct inode_info *info = get_incfs_node(d_inode(d)); |
| + struct inode *binode = (info == NULL) ? NULL : info->n_backing_inode; |
| + struct dentry *backing_dentry = NULL; |
| + int result = 0; |
| + |
| + if (flags & LOOKUP_RCU) |
| + return -ECHILD; |
| + |
| + get_incfs_backing_path(d, &backing_path); |
| + backing_dentry = backing_path.dentry; |
| + if (!backing_dentry) |
| + goto out; |
| + |
| + if (d_inode(backing_dentry) != binode) { |
| + /* |
| + * Backing inodes obtained via dentry and inode don't match. |
| + * It indicates that most likely backing dir has changed |
| + * directly bypassing Incremental FS interface. |
| + */ |
| + goto out; |
| + } |
| + |
| + if (backing_dentry->d_flags & DCACHE_OP_REVALIDATE) { |
| + result = backing_dentry->d_op->d_revalidate(backing_dentry, |
| + flags); |
| + } else |
| + result = 1; |
| + |
| +out: |
| + path_put(&backing_path); |
| + return result; |
| +} |
| + |
| +static void dentry_release(struct dentry *d) |
| +{ |
| + struct dentry_info *di = get_incfs_dentry(d); |
| + |
| + if (di) |
| + path_put(&di->backing_path); |
| + kfree(d->d_fsdata); |
| + d->d_fsdata = NULL; |
| +} |
| + |
| +static struct inode *alloc_inode(struct super_block *sb) |
| +{ |
| + struct inode_info *node = kzalloc(sizeof(*node), GFP_NOFS); |
| + |
| + /* TODO: add a slab-based cache here. */ |
| + if (!node) |
| + return NULL; |
| + inode_init_once(&node->n_vfs_inode); |
| + return &node->n_vfs_inode; |
| +} |
| + |
| +static void free_inode(struct inode *inode) |
| +{ |
| + struct inode_info *node = get_incfs_node(inode); |
| + |
| + kfree(node); |
| +} |
| + |
| +static void evict_inode(struct inode *inode) |
| +{ |
| + struct inode_info *node = get_incfs_node(inode); |
| + |
| + if (node) { |
| + if (node->n_backing_inode) { |
| + iput(node->n_backing_inode); |
| + node->n_backing_inode = NULL; |
| + } |
| + if (node->n_file) { |
| + incfs_free_data_file(node->n_file); |
| + node->n_file = NULL; |
| + } |
| + } |
| + |
| + truncate_inode_pages(&inode->i_data, 0); |
| + clear_inode(inode); |
| +} |
| + |
| +static int incfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, |
| + struct iattr *ia) |
| +{ |
| + struct dentry_info *di = get_incfs_dentry(dentry); |
| + struct dentry *backing_dentry; |
| + struct inode *backing_inode; |
| + int error; |
| + |
| + if (ia->ia_valid & ATTR_SIZE) |
| + return -EINVAL; |
| + |
| + if ((ia->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) && |
| + (ia->ia_valid & ATTR_MODE)) |
| + return -EINVAL; |
| + |
| + if (!di) |
| + return -EINVAL; |
| + backing_dentry = di->backing_path.dentry; |
| + if (!backing_dentry) |
| + return -EINVAL; |
| + |
| + backing_inode = d_inode(backing_dentry); |
| + |
| + /* incfs files are readonly, but the backing files must be writeable */ |
| + if (S_ISREG(backing_inode->i_mode)) { |
| + if ((ia->ia_valid & ATTR_MODE) && (ia->ia_mode & 0222)) |
| + return -EINVAL; |
| + |
| + ia->ia_mode |= 0222; |
| + } |
| + |
| + inode_lock(d_inode(backing_dentry)); |
| + error = notify_change(idmap, backing_dentry, ia, NULL); |
| + inode_unlock(d_inode(backing_dentry)); |
| + |
| + if (error) |
| + return error; |
| + |
| + if (S_ISREG(backing_inode->i_mode)) |
| + ia->ia_mode &= ~0222; |
| + |
| + return simple_setattr(idmap, dentry, ia); |
| +} |
| + |
| + |
| +static int incfs_getattr(struct mnt_idmap *idmap, const struct path *path, |
| + struct kstat *stat, u32 request_mask, |
| + unsigned int query_flags) |
| +{ |
| + struct inode *inode = d_inode(path->dentry); |
| + |
| + generic_fillattr(idmap, request_mask, inode, stat); |
| + |
| + if (inode->i_ino < INCFS_START_INO_RANGE) |
| + return 0; |
| + |
| + stat->attributes &= ~STATX_ATTR_VERITY; |
| + if (IS_VERITY(inode)) |
| + stat->attributes |= STATX_ATTR_VERITY; |
| + stat->attributes_mask |= STATX_ATTR_VERITY; |
| + |
| + if (request_mask & STATX_BLOCKS) { |
| + struct kstat backing_kstat; |
| + struct dentry_info *di = get_incfs_dentry(path->dentry); |
| + int error = 0; |
| + struct path *backing_path; |
| + |
| + if (!di) |
| + return -EFSCORRUPTED; |
| + backing_path = &di->backing_path; |
| + error = vfs_getattr(backing_path, &backing_kstat, STATX_BLOCKS, |
| + AT_STATX_SYNC_AS_STAT); |
| + if (error) |
| + return error; |
| + |
| + stat->blocks = backing_kstat.blocks; |
| + } |
| + |
| + return 0; |
| +} |
| + |
| +static ssize_t incfs_getxattr(struct dentry *d, const char *name, |
| + void *value, size_t size) |
| +{ |
| + struct dentry_info *di = get_incfs_dentry(d); |
| + struct mount_info *mi = get_mount_info(d->d_sb); |
| + char *stored_value; |
| + size_t stored_size; |
| + int i; |
| + |
| + if (di && di->backing_path.dentry) |
| + return vfs_getxattr(&nop_mnt_idmap, di->backing_path.dentry, name, value, size); |
| + |
| + if (strcmp(name, "security.selinux")) |
| + return -ENODATA; |
| + |
| + for (i = 0; i < PSEUDO_FILE_COUNT; ++i) |
| + if (!strcmp(d->d_iname, incfs_pseudo_file_names[i].data)) |
| + break; |
| + if (i == PSEUDO_FILE_COUNT) |
| + return -ENODATA; |
| + |
| + stored_value = mi->pseudo_file_xattr[i].data; |
| + stored_size = mi->pseudo_file_xattr[i].len; |
| + if (!stored_value) |
| + return -ENODATA; |
| + |
| + if (stored_size > size) |
| + return -E2BIG; |
| + |
| + memcpy(value, stored_value, stored_size); |
| + return stored_size; |
| +} |
| + |
| + |
| +static ssize_t incfs_setxattr(struct mnt_idmap *idmap, struct dentry *d, |
| + const char *name, void *value, size_t size, |
| + int flags) |
| +{ |
| + struct dentry_info *di = get_incfs_dentry(d); |
| + struct mount_info *mi = get_mount_info(d->d_sb); |
| + u8 **stored_value; |
| + size_t *stored_size; |
| + int i; |
| + |
| + if (di && di->backing_path.dentry) |
| + return vfs_setxattr(idmap, di->backing_path.dentry, name, value, |
| + size, flags); |
| + |
| + if (strcmp(name, "security.selinux")) |
| + return -ENODATA; |
| + |
| + if (size > INCFS_MAX_FILE_ATTR_SIZE) |
| + return -E2BIG; |
| + |
| + for (i = 0; i < PSEUDO_FILE_COUNT; ++i) |
| + if (!strcmp(d->d_iname, incfs_pseudo_file_names[i].data)) |
| + break; |
| + if (i == PSEUDO_FILE_COUNT) |
| + return -ENODATA; |
| + |
| + stored_value = &mi->pseudo_file_xattr[i].data; |
| + stored_size = &mi->pseudo_file_xattr[i].len; |
| + kfree (*stored_value); |
| + *stored_value = kzalloc(size, GFP_NOFS); |
| + if (!*stored_value) |
| + return -ENOMEM; |
| + |
| + memcpy(*stored_value, value, size); |
| + *stored_size = size; |
| + return 0; |
| +} |
| + |
| +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) |
| +{ |
| + struct dentry_info *di = get_incfs_dentry(d); |
| + |
| + if (!di || !di->backing_path.dentry) |
| + return -ENODATA; |
| + |
| + return vfs_listxattr(di->backing_path.dentry, list, size); |
| +} |
| + |
| +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, |
| + const char *dev_name, void *data) |
| +{ |
| + struct mount_options options = {}; |
| + struct mount_info *mi = NULL; |
| + struct path backing_dir_path = {}; |
| + struct dentry *index_dir = NULL; |
| + struct dentry *incomplete_dir = NULL; |
| + struct super_block *src_fs_sb = NULL; |
| + struct inode *root_inode = NULL; |
| + struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); |
| + bool dir_created = false; |
| + int error = 0; |
| + |
| + if (IS_ERR(sb)) |
| + return ERR_CAST(sb); |
| + |
| + sb->s_op = &incfs_super_ops; |
| + sb->s_d_op = &incfs_dentry_ops; |
| + sb->s_flags |= S_NOATIME; |
| + sb->s_magic = INCFS_MAGIC_NUMBER; |
| + sb->s_time_gran = 1; |
| + sb->s_blocksize = INCFS_DATA_FILE_BLOCK_SIZE; |
| + sb->s_blocksize_bits = blksize_bits(sb->s_blocksize); |
| + sb->s_xattr = incfs_xattr_ops; |
| + |
| + BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); |
| + |
| + if (!dev_name) { |
| + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); |
| + error = -ENOENT; |
| + goto err_deactivate; |
| + } |
| + |
| + error = parse_options(&options, (char *)data); |
| + if (error != 0) { |
| + pr_err("incfs: Options parsing error. %d\n", error); |
| + goto err_deactivate; |
| + } |
| + |
| + sb->s_bdi->ra_pages = options.readahead_pages; |
| + if (!dev_name) { |
| + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); |
| + error = -ENOENT; |
| + goto err_free_opts; |
| + } |
| + |
| + error = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, |
| + &backing_dir_path); |
| + if (error || backing_dir_path.dentry == NULL || |
| + !d_really_is_positive(backing_dir_path.dentry)) { |
| + pr_err("incfs: Error accessing: %s.\n", |
| + dev_name); |
| + goto err_free_opts; |
| + } |
| + src_fs_sb = backing_dir_path.dentry->d_sb; |
| + sb->s_maxbytes = src_fs_sb->s_maxbytes; |
| + sb->s_stack_depth = src_fs_sb->s_stack_depth + 1; |
| + |
| + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { |
| + error = -EINVAL; |
| + goto err_put_path; |
| + } |
| + |
| + mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); |
| + if (IS_ERR_OR_NULL(mi)) { |
| + error = PTR_ERR(mi); |
| + pr_err("incfs: Error allocating mount info. %d\n", error); |
| + goto err_put_path; |
| + } |
| + |
| + sb->s_fs_info = mi; |
| + mi->mi_backing_dir_path = backing_dir_path; |
| + index_dir = open_or_create_special_dir(backing_dir_path.dentry, |
| + INCFS_INDEX_NAME, &dir_created); |
| + if (IS_ERR_OR_NULL(index_dir)) { |
| + error = PTR_ERR(index_dir); |
| + pr_err("incfs: Can't find or create .index dir in %s\n", |
| + dev_name); |
| + /* No need to null index_dir since we don't put it */ |
| + goto err_put_path; |
| + } |
| + |
| + mi->mi_index_dir = index_dir; |
| + mi->mi_index_free = dir_created; |
| + |
| + incomplete_dir = open_or_create_special_dir(backing_dir_path.dentry, |
| + INCFS_INCOMPLETE_NAME, |
| + &dir_created); |
| + if (IS_ERR_OR_NULL(incomplete_dir)) { |
| + error = PTR_ERR(incomplete_dir); |
| + pr_err("incfs: Can't find or create .incomplete dir in %s\n", |
| + dev_name); |
| + /* No need to null incomplete_dir since we don't put it */ |
| + goto err_put_path; |
| + } |
| + mi->mi_incomplete_dir = incomplete_dir; |
| + mi->mi_incomplete_free = dir_created; |
| + |
| + root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); |
| + if (IS_ERR(root_inode)) { |
| + error = PTR_ERR(root_inode); |
| + goto err_put_path; |
| + } |
| + |
| + sb->s_root = d_make_root(root_inode); |
| + if (!sb->s_root) { |
| + error = -ENOMEM; |
| + goto err_put_path; |
| + } |
| + error = incfs_init_dentry(sb->s_root, &backing_dir_path); |
| + if (error) |
| + goto err_put_path; |
| + |
| + path_put(&backing_dir_path); |
| + sb->s_flags |= SB_ACTIVE; |
| + |
| + pr_debug("incfs: mount\n"); |
| + return dget(sb->s_root); |
| + |
| +err_put_path: |
| + path_put(&backing_dir_path); |
| +err_free_opts: |
| + free_options(&options); |
| +err_deactivate: |
| + deactivate_locked_super(sb); |
| + pr_err("incfs: mount failed %d\n", error); |
| + return ERR_PTR(error); |
| +} |
| + |
| +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) |
| +{ |
| + struct mount_options options; |
| + struct mount_info *mi = get_mount_info(sb); |
| + int err = 0; |
| + |
| + sync_filesystem(sb); |
| + err = parse_options(&options, (char *)data); |
| + if (err) |
| + return err; |
| + |
| + if (options.report_uid != mi->mi_options.report_uid) { |
| + pr_err("incfs: Can't change report_uid mount option on remount\n"); |
| + err = -EOPNOTSUPP; |
| + goto out; |
| + } |
| + |
| + err = incfs_realloc_mount_info(mi, &options); |
| + if (err) |
| + goto out; |
| + |
| + pr_debug("incfs: remount\n"); |
| + |
| +out: |
| + free_options(&options); |
| + return err; |
| +} |
| + |
| +void incfs_kill_sb(struct super_block *sb) |
| +{ |
| + struct mount_info *mi = sb->s_fs_info; |
| + struct inode *dinode = NULL; |
| + |
| + pr_debug("incfs: unmount\n"); |
| + |
| + /* |
| + * We must kill the super before freeing mi, since killing the super |
| + * triggers inode eviction, which triggers the final update of the |
| + * backing file, which uses certain information for mi |
| + */ |
| + kill_anon_super(sb); |
| + |
| + if (mi) { |
| + if (mi->mi_backing_dir_path.dentry) |
| + dinode = d_inode(mi->mi_backing_dir_path.dentry); |
| + |
| + if (dinode) { |
| + if (mi->mi_index_dir && mi->mi_index_free) |
| + vfs_rmdir(&nop_mnt_idmap, dinode, |
| + mi->mi_index_dir); |
| + |
| + if (mi->mi_incomplete_dir && mi->mi_incomplete_free) |
| + vfs_rmdir(&nop_mnt_idmap, dinode, |
| + mi->mi_incomplete_dir); |
| + } |
| + |
| + incfs_free_mount_info(mi); |
| + sb->s_fs_info = NULL; |
| + } |
| +} |
| + |
| +static int show_options(struct seq_file *m, struct dentry *root) |
| +{ |
| + struct mount_info *mi = get_mount_info(root->d_sb); |
| + |
| + seq_printf(m, ",read_timeout_ms=%u", mi->mi_options.read_timeout_ms); |
| + seq_printf(m, ",readahead=%u", mi->mi_options.readahead_pages); |
| + if (mi->mi_options.read_log_pages != 0) { |
| + seq_printf(m, ",rlog_pages=%u", mi->mi_options.read_log_pages); |
| + seq_printf(m, ",rlog_wakeup_cnt=%u", |
| + mi->mi_options.read_log_wakeup_count); |
| + } |
| + if (mi->mi_options.report_uid) |
| + seq_puts(m, ",report_uid"); |
| + |
| + if (mi->mi_sysfs_node) |
| + seq_printf(m, ",sysfs_name=%s", |
| + kobject_name(&mi->mi_sysfs_node->isn_sysfs_node)); |
| + return 0; |
| +} |
| diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/fs/incfs/vfs.h |
| @@ -0,0 +1,33 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 */ |
| +/* |
| + * Copyright 2018 Google LLC |
| + */ |
| + |
| +#ifndef _INCFS_VFS_H |
| +#define _INCFS_VFS_H |
| + |
| +extern const struct file_operations incfs_file_ops; |
| +extern const struct inode_operations incfs_file_inode_ops; |
| + |
| +void incfs_kill_sb(struct super_block *sb); |
| +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, |
| + const char *dev_name, void *data); |
| +int incfs_link(struct dentry *what, struct dentry *where); |
| +int incfs_unlink(struct dentry *dentry); |
| + |
| +static inline struct mount_info *get_mount_info(struct super_block *sb) |
| +{ |
| + struct mount_info *result = sb->s_fs_info; |
| + |
| + WARN_ON(!result); |
| + return result; |
| +} |
| + |
| +static inline struct super_block *file_superblock(struct file *f) |
| +{ |
| + struct inode *inode = file_inode(f); |
| + |
| + return inode->i_sb; |
| +} |
| + |
| +#endif |
| diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h |
| new file mode 100644 |
| --- /dev/null |
| +++ b/include/uapi/linux/incrementalfs.h |
| @@ -0,0 +1,590 @@ |
| +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
| +/* |
| + * Userspace interface for Incremental FS. |
| + * |
| + * Incremental FS is special-purpose Linux virtual file system that allows |
| + * execution of a program while its binary and resource files are still being |
| + * lazily downloaded over the network, USB etc. |
| + * |
| + * Copyright 2019 Google LLC |
| + */ |
| +#ifndef _UAPI_LINUX_INCREMENTALFS_H |
| +#define _UAPI_LINUX_INCREMENTALFS_H |
| + |
| +#include <linux/limits.h> |
| +#include <linux/ioctl.h> |
| +#include <linux/types.h> |
| +#include <linux/xattr.h> |
| + |
| +/* ===== constants ===== */ |
| +#define INCFS_NAME "incremental-fs" |
| + |
| +/* |
| + * Magic number used in file header and in memory superblock |
| + * Note that it is a 5 byte unsigned long. Thus on 32 bit kernels, it is |
| + * truncated to a 4 byte number |
| + */ |
| +#define INCFS_MAGIC_NUMBER (0x5346434e49ul & ULONG_MAX) |
| + |
| +#define INCFS_DATA_FILE_BLOCK_SIZE 4096 |
| +#define INCFS_HEADER_VER 1 |
| + |
| +/* TODO: This value is assumed in incfs_copy_signature_info_from_user to be the |
| + * actual signature length. Set back to 64 when fixed. |
| + */ |
| +#define INCFS_MAX_HASH_SIZE 32 |
| +#define INCFS_MAX_FILE_ATTR_SIZE 512 |
| + |
| +#define INCFS_INDEX_NAME ".index" |
| +#define INCFS_INCOMPLETE_NAME ".incomplete" |
| +#define INCFS_PENDING_READS_FILENAME ".pending_reads" |
| +#define INCFS_LOG_FILENAME ".log" |
| +#define INCFS_BLOCKS_WRITTEN_FILENAME ".blocks_written" |
| +#define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id") |
| +#define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size") |
| +#define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") |
| +#define INCFS_XATTR_VERITY_NAME (XATTR_USER_PREFIX "incfs.verity") |
| + |
| +#define INCFS_MAX_SIGNATURE_SIZE 8096 |
| +#define INCFS_SIGNATURE_VERSION 2 |
| +#define INCFS_SIGNATURE_SECTIONS 2 |
| + |
| +#define INCFS_IOCTL_BASE_CODE 'g' |
| + |
| +/* ===== ioctl requests on the command dir ===== */ |
| + |
| +/* |
| + * Create a new file |
| + * May only be called on .pending_reads file |
| + */ |
| +#define INCFS_IOC_CREATE_FILE \ |
| + _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) |
| + |
| +/* Read file signature */ |
| +#define INCFS_IOC_READ_FILE_SIGNATURE \ |
| + _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) |
| + |
| +/* |
| + * Fill in one or more data block. This may only be called on a handle |
| + * passed as a parameter to INCFS_IOC_PERMIT_FILLING |
| + * |
| + * Returns number of blocks filled in, or error if none were |
| + */ |
| +#define INCFS_IOC_FILL_BLOCKS \ |
| + _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) |
| + |
| +/* |
| + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor |
| + * May only be called on .pending_reads file |
| + * |
| + * Returns 0 on success or error |
| + */ |
| +#define INCFS_IOC_PERMIT_FILL \ |
| + _IOW(INCFS_IOCTL_BASE_CODE, 33, struct incfs_permit_fill) |
| + |
| +/* |
| + * Fills buffer with ranges of populated blocks |
| + * |
| + * Returns 0 if all ranges written |
| + * error otherwise |
| + * |
| + * Either way, range_buffer_size_out is set to the number |
| + * of bytes written. Should be set to 0 by caller. The ranges |
| + * filled are valid, but if an error was returned there might |
| + * be more ranges to come. |
| + * |
| + * Ranges are ranges of filled blocks: |
| + * |
| + * 1 2 7 9 |
| + * |
| + * means blocks 1, 2, 7, 8, 9 are filled, 0, 3, 4, 5, 6 and 10 on |
| + * are not |
| + * |
| + * If hashing is enabled for the file, the hash blocks are simply |
| + * treated as though they immediately followed the data blocks. |
| + */ |
| +#define INCFS_IOC_GET_FILLED_BLOCKS \ |
| + _IOR(INCFS_IOCTL_BASE_CODE, 34, struct incfs_get_filled_blocks_args) |
| + |
| +/* |
| + * Creates a new mapped file |
| + * May only be called on .pending_reads file |
| + */ |
| +#define INCFS_IOC_CREATE_MAPPED_FILE \ |
| + _IOWR(INCFS_IOCTL_BASE_CODE, 35, struct incfs_create_mapped_file_args) |
| + |
| +/* |
| + * Get number of blocks, total and filled |
| + * May only be called on .pending_reads file |
| + */ |
| +#define INCFS_IOC_GET_BLOCK_COUNT \ |
| + _IOR(INCFS_IOCTL_BASE_CODE, 36, struct incfs_get_block_count_args) |
| + |
| +/* |
| + * Get per UID read timeouts |
| + * May only be called on .pending_reads file |
| + */ |
| +#define INCFS_IOC_GET_READ_TIMEOUTS \ |
| + _IOR(INCFS_IOCTL_BASE_CODE, 37, struct incfs_get_read_timeouts_args) |
| + |
| +/* |
| + * Set per UID read timeouts |
| + * May only be called on .pending_reads file |
| + */ |
| +#define INCFS_IOC_SET_READ_TIMEOUTS \ |
| + _IOW(INCFS_IOCTL_BASE_CODE, 38, struct incfs_set_read_timeouts_args) |
| + |
| +/* |
| + * Get last read error |
| + * May only be called on .pending_reads file |
| + */ |
| +#define INCFS_IOC_GET_LAST_READ_ERROR \ |
| + _IOW(INCFS_IOCTL_BASE_CODE, 39, struct incfs_get_last_read_error_args) |
| + |
| +/* ===== sysfs feature flags ===== */ |
| +/* |
| + * Each flag is represented by a file in /sys/fs/incremental-fs/features |
| + * If the file exists the feature is supported |
| + * Also the file contents will be the line "supported" |
| + */ |
| + |
| +/* |
| + * Basic flag stating that the core incfs file system is available |
| + */ |
| +#define INCFS_FEATURE_FLAG_COREFS "corefs" |
| + |
| +/* |
| + * zstd compression support |
| + */ |
| +#define INCFS_FEATURE_FLAG_ZSTD "zstd" |
| + |
| +/* |
| + * v2 feature set support. Covers: |
| + * INCFS_IOC_CREATE_MAPPED_FILE |
| + * INCFS_IOC_GET_BLOCK_COUNT |
| + * INCFS_IOC_GET_READ_TIMEOUTS/INCFS_IOC_SET_READ_TIMEOUTS |
| + * .blocks_written status file |
| + * .incomplete folder |
| + * report_uid mount option |
| + */ |
| +#define INCFS_FEATURE_FLAG_V2 "v2" |
| + |
| +enum incfs_compression_alg { |
| + COMPRESSION_NONE = 0, |
| + COMPRESSION_LZ4 = 1, |
| + COMPRESSION_ZSTD = 2, |
| +}; |
| + |
| +enum incfs_block_flags { |
| + INCFS_BLOCK_FLAGS_NONE = 0, |
| + INCFS_BLOCK_FLAGS_HASH = 1, |
| +}; |
| + |
| +typedef struct { |
| + __u8 bytes[16]; |
| +} incfs_uuid_t __attribute__((aligned (8))); |
| + |
| +/* |
| + * Description of a pending read. A pending read - a read call by |
| + * a userspace program for which the filesystem currently doesn't have data. |
| + * |
| + * Reads from .pending_reads and .log return an array of these structure |
| + */ |
| +struct incfs_pending_read_info { |
| + /* Id of a file that is being read from. */ |
| + incfs_uuid_t file_id; |
| + |
| + /* A number of microseconds since system boot to the read. */ |
| + __aligned_u64 timestamp_us; |
| + |
| + /* Index of a file block that is being read. */ |
| + __u32 block_index; |
| + |
| + /* A serial number of this pending read. */ |
| + __u32 serial_number; |
| +}; |
| + |
| +/* |
| + * Description of a pending read. A pending read - a read call by |
| + * a userspace program for which the filesystem currently doesn't have data. |
| + * |
| + * This version of incfs_pending_read_info is used whenever the file system is |
| + * mounted with the report_uid flag |
| + */ |
| +struct incfs_pending_read_info2 { |
| + /* Id of a file that is being read from. */ |
| + incfs_uuid_t file_id; |
| + |
| + /* A number of microseconds since system boot to the read. */ |
| + __aligned_u64 timestamp_us; |
| + |
| + /* Index of a file block that is being read. */ |
| + __u32 block_index; |
| + |
| + /* A serial number of this pending read. */ |
| + __u32 serial_number; |
| + |
| + /* The UID of the reading process */ |
| + __u32 uid; |
| + |
| + __u32 reserved; |
| +}; |
| + |
| +/* |
| + * Description of a data or hash block to add to a data file. |
| + */ |
| +struct incfs_fill_block { |
| + /* Index of a data block. */ |
| + __u32 block_index; |
| + |
| + /* Length of data */ |
| + __u32 data_len; |
| + |
| + /* |
| + * A pointer to an actual data for the block. |
| + * |
| + * Equivalent to: __u8 *data; |
| + */ |
| + __aligned_u64 data; |
| + |
| + /* |
| + * Compression algorithm used to compress the data block. |
| + * Values from enum incfs_compression_alg. |
| + */ |
| + __u8 compression; |
| + |
| + /* Values from enum incfs_block_flags */ |
| + __u8 flags; |
| + |
| + __u16 reserved1; |
| + |
| + __u32 reserved2; |
| + |
| + __aligned_u64 reserved3; |
| +}; |
| + |
| +/* |
| + * Description of a number of blocks to add to a data file |
| + * |
| + * Argument for INCFS_IOC_FILL_BLOCKS |
| + */ |
| +struct incfs_fill_blocks { |
| + /* Number of blocks */ |
| + __u64 count; |
| + |
| + /* A pointer to an array of incfs_fill_block structs */ |
| + __aligned_u64 fill_blocks; |
| +}; |
| + |
| +/* |
| + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor |
| + * May only be called on .pending_reads file |
| + * |
| + * Argument for INCFS_IOC_PERMIT_FILL |
| + */ |
| +struct incfs_permit_fill { |
| + /* File to permit fills on */ |
| + __u32 file_descriptor; |
| +}; |
| + |
| +enum incfs_hash_tree_algorithm { |
| + INCFS_HASH_TREE_NONE = 0, |
| + INCFS_HASH_TREE_SHA256 = 1 |
| +}; |
| + |
| +/* |
| + * Create a new file or directory. |
| + */ |
| +struct incfs_new_file_args { |
| + /* Id of a file to create. */ |
| + incfs_uuid_t file_id; |
| + |
| + /* |
| + * Total size of the new file. Ignored if S_ISDIR(mode). |
| + */ |
| + __aligned_u64 size; |
| + |
| + /* |
| + * File mode. Permissions and dir flag. |
| + */ |
| + __u16 mode; |
| + |
| + __u16 reserved1; |
| + |
| + __u32 reserved2; |
| + |
| + /* |
| + * A pointer to a null-terminated relative path to the file's parent |
| + * dir. |
| + * Max length: PATH_MAX |
| + * |
| + * Equivalent to: char *directory_path; |
| + */ |
| + __aligned_u64 directory_path; |
| + |
| + /* |
| + * A pointer to a null-terminated file's name. |
| + * Max length: PATH_MAX |
| + * |
| + * Equivalent to: char *file_name; |
| + */ |
| + __aligned_u64 file_name; |
| + |
| + /* |
| + * A pointer to a file attribute to be set on creation. |
| + * |
| + * Equivalent to: u8 *file_attr; |
| + */ |
| + __aligned_u64 file_attr; |
| + |
| + /* |
| + * Length of the data buffer specfied by file_attr. |
| + * Max value: INCFS_MAX_FILE_ATTR_SIZE |
| + */ |
| + __u32 file_attr_len; |
| + |
| + __u32 reserved4; |
| + |
| + /* |
| + * Points to an APK V4 Signature data blob |
| + * Signature must have two sections |
| + * Format is: |
| + * u32 version |
| + * u32 size_of_hash_info_section |
| + * u8 hash_info_section[] |
| + * u32 size_of_signing_info_section |
| + * u8 signing_info_section[] |
| + * |
| + * Note that incfs does not care about what is in signing_info_section |
| + * |
| + * hash_info_section has following format: |
| + * u32 hash_algorithm; // Must be SHA256 == 1 |
| + * u8 log2_blocksize; // Must be 12 for 4096 byte blocks |
| + * u32 salt_size; |
| + * u8 salt[]; |
| + * u32 hash_size; |
| + * u8 root_hash[]; |
| + */ |
| + __aligned_u64 signature_info; |
| + |
| + /* Size of signature_info */ |
| + __aligned_u64 signature_size; |
| + |
| + __aligned_u64 reserved6; |
| +}; |
| + |
| +/* |
| + * Request a digital signature blob for a given file. |
| + * Argument for INCFS_IOC_READ_FILE_SIGNATURE ioctl |
| + */ |
| +struct incfs_get_file_sig_args { |
| + /* |
| + * A pointer to the data buffer to save an signature blob to. |
| + * |
| + * Equivalent to: u8 *file_signature; |
| + */ |
| + __aligned_u64 file_signature; |
| + |
| + /* Size of the buffer at file_signature. */ |
| + __u32 file_signature_buf_size; |
| + |
| + /* |
| + * Number of bytes save file_signature buffer. |
| + * It is set after ioctl done. |
| + */ |
| + __u32 file_signature_len_out; |
| +}; |
| + |
| +struct incfs_filled_range { |
| + __u32 begin; |
| + __u32 end; |
| +}; |
| + |
| +/* |
| + * Request ranges of filled blocks |
| + * Argument for INCFS_IOC_GET_FILLED_BLOCKS |
| + */ |
| +struct incfs_get_filled_blocks_args { |
| + /* |
| + * A buffer to populate with ranges of filled blocks |
| + * |
| + * Equivalent to struct incfs_filled_ranges *range_buffer |
| + */ |
| + __aligned_u64 range_buffer; |
| + |
| + /* Size of range_buffer */ |
| + __u32 range_buffer_size; |
| + |
| + /* Start index to read from */ |
| + __u32 start_index; |
| + |
| + /* |
| + * End index to read to. 0 means read to end. This is a range, |
| + * so incfs will read from start_index to end_index - 1 |
| + */ |
| + __u32 end_index; |
| + |
| + /* Actual number of blocks in file */ |
| + __u32 total_blocks_out; |
| + |
| + /* The number of data blocks in file */ |
| + __u32 data_blocks_out; |
| + |
| + /* Number of bytes written to range buffer */ |
| + __u32 range_buffer_size_out; |
| + |
| + /* Sector scanned up to, if the call was interrupted */ |
| + __u32 index_out; |
| +}; |
| + |
| +/* |
| + * Create a new mapped file |
| + * Argument for INCFS_IOC_CREATE_MAPPED_FILE |
| + */ |
| +struct incfs_create_mapped_file_args { |
| + /* |
| + * Total size of the new file. |
| + */ |
| + __aligned_u64 size; |
| + |
| + /* |
| + * File mode. Permissions and dir flag. |
| + */ |
| + __u16 mode; |
| + |
| + __u16 reserved1; |
| + |
| + __u32 reserved2; |
| + |
| + /* |
| + * A pointer to a null-terminated relative path to the incfs mount |
| + * point |
| + * Max length: PATH_MAX |
| + * |
| + * Equivalent to: char *directory_path; |
| + */ |
| + __aligned_u64 directory_path; |
| + |
| + /* |
| + * A pointer to a null-terminated file name. |
| + * Max length: PATH_MAX |
| + * |
| + * Equivalent to: char *file_name; |
| + */ |
| + __aligned_u64 file_name; |
| + |
| + /* Id of source file to map. */ |
| + incfs_uuid_t source_file_id; |
| + |
| + /* |
| + * Offset in source file to start mapping. Must be a multiple of |
| + * INCFS_DATA_FILE_BLOCK_SIZE |
| + */ |
| + __aligned_u64 source_offset; |
| +}; |
| + |
| +/* |
| + * Get information about the blocks in this file |
| + * Argument for INCFS_IOC_GET_BLOCK_COUNT |
| + */ |
| +struct incfs_get_block_count_args { |
| + /* Total number of data blocks in the file */ |
| + __u32 total_data_blocks_out; |
| + |
| + /* Number of filled data blocks in the file */ |
| + __u32 filled_data_blocks_out; |
| + |
| + /* Total number of hash blocks in the file */ |
| + __u32 total_hash_blocks_out; |
| + |
| + /* Number of filled hash blocks in the file */ |
| + __u32 filled_hash_blocks_out; |
| +}; |
| + |
| +/* Description of timeouts for one UID */ |
| +struct incfs_per_uid_read_timeouts { |
| + /* UID to apply these timeouts to */ |
| + __u32 uid; |
| + |
| + /* |
| + * Min time in microseconds to read any block. Note that this doesn't |
| + * apply to reads which are satisfied from the page cache. |
| + */ |
| + __u32 min_time_us; |
| + |
| + /* |
| + * Min time in microseconds to satisfy a pending read. Any pending read |
| + * which is filled before this time will be delayed so that the total |
| + * read time >= this value. |
| + */ |
| + __u32 min_pending_time_us; |
| + |
| + /* |
| + * Max time in microseconds to satisfy a pending read before the read |
| + * times out. If set to U32_MAX, defaults to mount options |
| + * read_timeout_ms * 1000. Must be >= min_pending_time_us |
| + */ |
| + __u32 max_pending_time_us; |
| +}; |
| + |
| +/* |
| + * Get the read timeouts array |
| + * Argument for INCFS_IOC_GET_READ_TIMEOUTS |
| + */ |
| +struct incfs_get_read_timeouts_args { |
| + /* |
| + * A pointer to a buffer to fill with the current timeouts |
| + * |
| + * Equivalent to struct incfs_per_uid_read_timeouts * |
| + */ |
| + __aligned_u64 timeouts_array; |
| + |
| + /* Size of above buffer in bytes */ |
| + __u32 timeouts_array_size; |
| + |
| + /* Size used in bytes, or size needed if -ENOMEM returned */ |
| + __u32 timeouts_array_size_out; |
| +}; |
| + |
| +/* |
| + * Set the read timeouts array |
| + * Arguments for INCFS_IOC_SET_READ_TIMEOUTS |
| + */ |
| +struct incfs_set_read_timeouts_args { |
| + /* |
| + * A pointer to an array containing the new timeouts |
| + * This will replace any existing timeouts |
| + * |
| + * Equivalent to struct incfs_per_uid_read_timeouts * |
| + */ |
| + __aligned_u64 timeouts_array; |
| + |
| + /* Size of above array in bytes. Must be < 256 */ |
| + __u32 timeouts_array_size; |
| +}; |
| + |
| +/* |
| + * Get last read error struct |
| + * Arguments for INCFS_IOC_GET_LAST_READ_ERROR |
| + */ |
| +struct incfs_get_last_read_error_args { |
| + /* File id of last file that had a read error */ |
| + incfs_uuid_t file_id_out; |
| + |
| + /* Time of last read error, in us, from CLOCK_MONOTONIC */ |
| + __u64 time_us_out; |
| + |
| + /* Index of page that was being read at last read error */ |
| + __u32 page_out; |
| + |
| + /* errno of last read error */ |
| + __u32 errno_out; |
| + |
| + /* uid of last read error */ |
| + __u32 uid_out; |
| + |
| + __u32 reserved1; |
| + __u64 reserved2; |
| +}; |
| + |
| +#endif /* _UAPI_LINUX_INCREMENTALFS_H */ |