| /* |
| * Copyright (C) 2015-2016 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <assert.h> |
| #include <inttypes.h> |
| #include <lk/compiler.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <string.h> |
| |
| #ifndef LOCAL_TRACE |
| #define LOCAL_TRACE TRACE_LEVEL_INIT |
| #endif |
| #ifndef LOCAL_TRACE_ERR |
| #define LOCAL_TRACE_ERR TRACE_LEVEL_INIT |
| #endif |
| |
| #include "array.h" |
| #include "block_allocator.h" |
| #include "block_cache.h" |
| #include "block_set.h" |
| #include "checkpoint.h" |
| #include "debug.h" |
| #include "file.h" |
| #include "fs.h" |
| #include "transaction.h" |
| |
| #define SUPER_BLOCK_MAGIC (0x0073797473757274ULL) /* trustys */ |
| #define SUPER_BLOCK_FLAGS_VERSION_MASK (0x3U) |
| #define SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK (0x1U) |
| #define SUPER_BLOCK_FLAGS_EMPTY (0x4U) |
| #define SUPER_BLOCK_FLAGS_ALTERNATE (0x8U) |
| #define SUPER_BLOCK_FLAGS_SUPPORTED_MASK (0xfU) |
| #define SUPER_BLOCK_FS_VERSION (0U) |
| |
| /** |
| * typedef super_block_opt_flags8_t - Optional flags, can be ORed together |
| * |
| * %SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 |
| * Indicates that the superblock has additional data after flags2 and that |
| * flags3 should be set to the same value as flags |
| * %SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT |
| * Indicates that the superblock contains the @checkpoint field |
| */ |
| typedef uint8_t super_block_opt_flags8_t; |
| #define SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 (0x1U) |
| #define SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT (0x2U) |
| |
| /** |
| * struct super_block - On-disk root block for file system state |
| * @iv: Initial value used for encrypt/decrypt. |
| * @magic: SUPER_BLOCK_MAGIC. |
| * @flags: Version in bottom two bits, other bits are reserved. |
| * @fs_version: Required file system version. If greater than |
| * %SUPER_BLOCK_FS_VERSION, do not mount or overwrite |
| * filesystem. |
| * @block_size: Block size of file system. |
| * @block_num_size: Number of bytes used to store block numbers. |
| * @mac_size: number of bytes used to store mac values. |
| * @opt_flags: Optional flags, any of &typedef super_block_opt_flags8_t |
| * ORed together. |
| * @res2: Reserved for future use. Write 0, read ignore. |
| * @block_count: Size of file system. |
| * @free: Block and mac of free set root node. |
| * @free_count: Currently unused. |
| * @files: Block and mac of files tree root node. |
| * @res3: Reserved for future use. Write 0, read ignore. |
| * @flags2: Copy of @flags. Allows storing the super-block in a device |
| * that does not support an atomic write of the entire |
| * super-block. |
| * @backup: Backup of previous super-block, used to support an alternate |
| * backing store. 0 if no backup has ever been written. Once a |
| * backup exists, it will only ever be swapped, not cleared. |
| * @checkpoint: Block and mac of checkpoint metadata block. 0 if a |
| * checkpoint does not exist. |
| * @res4: Reserved for future use. Write 0, read ignore. |
| * @flags3: Copy of @flags. Allows storing the super-block in a device |
| * that does not support an atomic write of the entire |
| * super-block. If SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 is not set, |
| * @flags3 is not checked and fields after @flags2 are ignored. |
| * |
| * Block numbers and macs in @free and @files are packed as indicated by |
| * @block_num_size and @mac_size, but unlike other on-disk data, the size of the |
| * whole field is always the full 24 bytes needed for a 8 byte block number and |
| * 16 byte mac This allows the @flags2 and @flags3 to be validated before |
| * knowing @block_num_size and @mac_size. |
| */ |
| struct super_block { |
| struct iv iv; |
| uint64_t magic; |
| uint32_t flags; |
| uint32_t fs_version; |
| uint32_t block_size; |
| uint8_t block_num_size; |
| uint8_t mac_size; |
| super_block_opt_flags8_t opt_flags; |
| uint8_t res2; |
| data_block_t block_count; |
| struct block_mac free; |
| data_block_t free_count; |
| struct block_mac files; |
| uint32_t res3[5]; |
| uint32_t flags2; |
| struct super_block_backup backup; |
| struct block_mac checkpoint; |
| uint32_t res4[6]; |
| uint32_t flags3; |
| }; |
| STATIC_ASSERT(offsetof(struct super_block, flags2) == 124); |
| STATIC_ASSERT(offsetof(struct super_block, flags3) == 252); |
| STATIC_ASSERT(sizeof(struct super_block) == 256); |
| |
| /* block_device_tipc.c ensures that we have at least 256 bytes in RPMB blocks */ |
| STATIC_ASSERT(sizeof(struct super_block) <= 256); |
| |
| static struct list_node fs_list = LIST_INITIAL_VALUE(fs_list); |
| |
| /** |
| * update_super_block_internal - Generate and write superblock |
| * @tr: Transaction object. |
| * @free: New free root. |
| * @files: New files root. |
| * @checkpoint: New checkpoint metadata block. |
| * @pinned: New block should not be reused in the block cache until |
| * it is successfully written. |
| * |
| * Return: %true if super block was updated (in cache), %false if transaction |
| * failed before super block was updated. |
| */ |
| static bool update_super_block_internal(struct transaction* tr, |
| const struct block_mac* free, |
| const struct block_mac* files, |
| const struct block_mac* checkpoint, |
| bool pinned) { |
| struct super_block* super_rw; |
| struct obj_ref super_ref = OBJ_REF_INITIAL_VALUE(super_ref); |
| unsigned int ver; |
| unsigned int index; |
| uint32_t flags; |
| uint32_t block_size = tr->fs->super_dev->block_size; |
| super_block_opt_flags8_t opt_flags = SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3 | |
| SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT; |
| |
| assert(block_size >= sizeof(struct super_block)); |
| assert(tr->fs->initial_super_block_tr == NULL || |
| tr->fs->initial_super_block_tr == tr); |
| |
| ver = (tr->fs->super_block_version + 1) & SUPER_BLOCK_FLAGS_VERSION_MASK; |
| index = ver & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK; |
| flags = ver; |
| if (!free && !files) { |
| /* |
| * If the free and files trees are not provided, the filesystem is in |
| * the initial empty state. |
| */ |
| flags |= SUPER_BLOCK_FLAGS_EMPTY; |
| } else { |
| /* Non-empty filesystems must have both trees (with root node blocks) */ |
| assert(free); |
| assert(files); |
| } |
| if (tr->fs->alternate_data) { |
| flags |= SUPER_BLOCK_FLAGS_ALTERNATE; |
| } |
| |
| pr_write("write super block %" PRIu64 ", ver %d\n", |
| tr->fs->super_block[index], ver); |
| |
| super_rw = block_get_cleared_super(tr, tr->fs->super_block[index], |
| &super_ref, pinned); |
| if (tr->failed) { |
| block_put_dirty_discard(super_rw, &super_ref); |
| return false; |
| } |
| super_rw->magic = SUPER_BLOCK_MAGIC; |
| super_rw->flags = flags; |
| /* TODO: keep existing fs version when possible */ |
| super_rw->fs_version = SUPER_BLOCK_FS_VERSION; |
| super_rw->block_size = tr->fs->dev->block_size; |
| super_rw->block_num_size = tr->fs->block_num_size; |
| super_rw->mac_size = tr->fs->mac_size; |
| super_rw->opt_flags = opt_flags; |
| super_rw->block_count = tr->fs->dev->block_count; |
| if (free) { |
| super_rw->free = *free; |
| } |
| super_rw->free_count = 0; /* TODO: remove or update */ |
| if (files) { |
| super_rw->files = *files; |
| } |
| if (checkpoint) { |
| super_rw->checkpoint = *checkpoint; |
| } |
| super_rw->flags2 = flags; |
| super_rw->backup = tr->fs->backup; |
| super_rw->flags3 = flags; |
| tr->fs->written_super_block_version = ver; |
| |
| block_put_dirty_no_mac(super_rw, &super_ref, tr->fs->allow_tampering); |
| |
| return true; |
| } |
| |
| /** |
| * update_super_block - Generate and write superblock |
| * @tr: Transaction object. |
| * @free: New free root. |
| * @files: New files root. |
| * @checkpoint: New checkpoint metadata block. |
| * |
| * Return: %true if super block was updated (in cache), %false if transaction |
| * failed before super block was updated. |
| */ |
| bool update_super_block(struct transaction* tr, |
| const struct block_mac* free, |
| const struct block_mac* files, |
| const struct block_mac* checkpoint) { |
| return update_super_block_internal(tr, free, files, checkpoint, false); |
| } |
| |
| /** |
| * write_initial_super_block - Write initial superblock to internal transaction |
| * @fs: File system state object. |
| * |
| * When needed, this must be called before creating any other transactions on |
| * this filesystem so we don't fill up the cache with entries that can't be |
| * flushed to make room for this block. |
| * |
| * Return: %true if the initial empty superblock was successfully written to the |
| * cache, or %false otherwise. |
| */ |
| static bool write_initial_super_block(struct fs* fs) { |
| struct transaction* tr; |
| tr = calloc(1, sizeof(*tr)); |
| if (!tr) { |
| return false; |
| } |
| fs->initial_super_block_tr = tr; |
| |
| transaction_init(tr, fs, true); |
| return update_super_block_internal(tr, NULL, NULL, NULL, true); |
| } |
| |
| /** |
| * write_current_super_block - Write current superblock to internal transaction |
| * @fs: File system state object. |
| * @reinitialize: Allow the special transaction to be reinitialized if it has |
| * failed |
| * |
| * Write the current state of the super block to an internal transaction that |
| * will be written before any other block. This can be used to re-sync the |
| * in-memory fs-state with the on-disk state after detecting a write failure |
| * where no longer know the on-disk super block state. |
| */ |
| void write_current_super_block(struct fs* fs, bool reinitialize) { |
| bool super_block_updated; |
| struct transaction* tr; |
| |
| if (fs->initial_super_block_tr) { |
| /* |
| * If initial_super_block_tr is already pending and not failed there is |
| * no need to allocate a new one so return early. |
| * |
| * If the special transaction has failed, we need to re-initialize it so |
| * that we can attempt to recover to a good state. |
| * |
| * We are only allowed to reinitialze if the @reinitialize parameter is |
| * true. We don't want to allow reinitialization while cleaning blocks |
| * (i.e. via fs_unknown_super_block_state_all()), as this would reset |
| * the special transaction to non-failed state and create a situation |
| * where transaction_initial_super_block_complete() cannot know if it |
| * successfully flushed the special transaction to disk. Therefore we |
| * only allow transaction_initial_super_block_complete() to reinitialize |
| * a failed special transaction after it attempts and fails to write the |
| * block to disk. |
| * |
| * Since we pin special superblock entries in the block cache and |
| * therefore cannot evict them with normal transactions, |
| * transaction_initial_super_block_complete() is the only place we can |
| * attempt a special transaction write, and if it fails the transaction |
| * is immediately reinitialized. Therefore we should only ever be in a |
| * failed state if reinitialize is true (i.e. we are being called from |
| * transaction_initial_super_block_complete()). |
| */ |
| |
| assert(reinitialize || !fs->initial_super_block_tr->failed); |
| if (!fs->initial_super_block_tr->failed || !reinitialize) { |
| return; |
| } |
| |
| tr = fs->initial_super_block_tr; |
| transaction_activate(tr); |
| } else { |
| tr = calloc(1, sizeof(*tr)); |
| if (!tr) { |
| /* Not safe to proceed. TODO: add flag to defer this allocation? */ |
| abort(); |
| } |
| transaction_init(tr, fs, true); |
| fs->initial_super_block_tr = tr; |
| } |
| |
| /* |
| * Until the filesystem contains committed data, fs->free.block_tree.root |
| * will be zero, i.e. an invalid block mac. fs->free.block_tree.root is only |
| * updated in transaction_complete() after successfully writing a new |
| * superblock. If the filesystem is empty, we need to emit a cleared |
| * superblock with a special flag to prevent the superblock state from |
| * getting out of sync with the filesystem data if a reboot occurrs before |
| * committing a superblock with data. |
| * |
| * We can't use fs->files.root here because it may be invalid if there are |
| * no files in the filesystem. If the free node is zero, then the files node |
| * must be as well, so we assert this. |
| */ |
| bool fs_is_cleared = !block_mac_valid(tr, &fs->free.block_tree.root); |
| if (fs_is_cleared) { |
| assert(!block_mac_valid(tr, &fs->files.root)); |
| super_block_updated = |
| update_super_block_internal(tr, NULL, NULL, NULL, true); |
| } else { |
| super_block_updated = update_super_block_internal( |
| tr, &fs->free.block_tree.root, &fs->files.root, &fs->checkpoint, |
| true); |
| } |
| if (!super_block_updated) { |
| /* Not safe to proceed. TODO: add flag to try again? */ |
| fprintf(stderr, |
| "Could not create pending write for current superblock state. " |
| "Not safe to proceed.\n"); |
| abort(); |
| } |
| } |
| |
| /** |
| * super_block_valid - Check if superblock is valid |
| * @dev: Block device that supoer block was read from. |
| * @super: Super block data. |
| * |
| * Return: %true if @super is valid for @dev, %false otherwise. |
| */ |
| static bool super_block_valid(const struct block_device* dev, |
| const struct super_block* super) { |
| if (super->magic != SUPER_BLOCK_MAGIC) { |
| pr_init("bad magic, 0x%" PRIx64 "\n", super->magic); |
| return false; |
| } |
| if (super->flags != super->flags2) { |
| pr_warn("flags, 0x%x, does not match flags2, 0x%x\n", super->flags, |
| super->flags2); |
| return false; |
| } |
| if ((super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3) && |
| super->flags != super->flags3) { |
| pr_warn("flags, 0x%x, does not match flags3, 0x%x\n", super->flags, |
| super->flags3); |
| return false; |
| } |
| if (super->fs_version > SUPER_BLOCK_FS_VERSION) { |
| pr_warn("super block is from the future: 0x%x\n", super->fs_version); |
| return true; |
| } |
| if (super->flags & ~SUPER_BLOCK_FLAGS_SUPPORTED_MASK) { |
| pr_warn("unknown flags set, 0x%x\n", super->flags); |
| return false; |
| } |
| if (super->block_size != dev->block_size) { |
| pr_warn("bad block size 0x%x, expected 0x%zx\n", super->block_size, |
| dev->block_size); |
| return false; |
| } |
| if (super->block_num_size != dev->block_num_size) { |
| pr_warn("invalid block_num_size %d, expected %zd\n", |
| super->block_num_size, dev->block_num_size); |
| return false; |
| } |
| if (super->mac_size != dev->mac_size) { |
| pr_warn("invalid mac_size %d, expected %zd\n", super->mac_size, |
| dev->mac_size); |
| return false; |
| } |
| if (!dev->tamper_detecting && super->mac_size != sizeof(struct mac)) { |
| pr_warn("invalid mac_size %d != %zd\n", super->mac_size, |
| sizeof(data_block_t)); |
| return false; |
| } |
| /* |
| * This check only disallows shrinking the block device without clearing the |
| * filesystem as we don't currently check and shrink the backing file on the |
| * block device. However, we don't actually read this value from the |
| * super-block after this check and we instead use the value from the block |
| * device (which may be larger), and save that value to future super-blocks. |
| * Since we don't use this value from the super-block, we don't need a |
| * separate block count for the alternate backup roots as long as the block |
| * device doesn't shrink. |
| */ |
| if (super->block_count > dev->block_count) { |
| pr_warn("bad block count 0x%" PRIx64 ", expected <= 0x%" PRIx64 "\n", |
| super->block_count, dev->block_count); |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * super_version_delta - Find the version delta between two superblocks |
| * @new_super: Candidate new superblock |
| * @old_super: Old superblock |
| * |
| * The overflow in this function is intentional as a way to use a wrapping |
| * two-bit counter. |
| * |
| * Return: Wrapped difference between the two bit version numbers in the two |
| * superblocks. This will be 1 when new is newer than old, 3 when old is |
| * newer than new, and any other number indicates an invalid/corrupt version. |
| */ |
| __attribute__((no_sanitize("unsigned-integer-overflow"))) static inline uint8_t |
| super_version_delta(const struct super_block* new_super, |
| const struct super_block* old_super) { |
| return (new_super->flags - old_super->flags) & |
| SUPER_BLOCK_FLAGS_VERSION_MASK; |
| } |
| |
| /** |
| * use_new_super - Check if new superblock is valid and more recent than old |
| * @dev: Block device that super block was read from. |
| * @new_super: New super block data. |
| * @new_super_index: Index that @new_super was read from. |
| * @old_super: Old super block data, or %NULL. |
| * |
| * Return: %true if @new_super is valid for @dev, and more recent than |
| * @old_super (or @old_super is %NULL), %false otherwise. |
| */ |
| static bool use_new_super(const struct block_device* dev, |
| const struct super_block* new_super, |
| unsigned int new_super_index, |
| const struct super_block* old_super) { |
| uint8_t dv; |
| if (!super_block_valid(dev, new_super)) { |
| return false; |
| } |
| if ((new_super->flags & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK) != |
| new_super_index) { |
| pr_warn("block index, 0x%x, does not match flags, 0x%x\n", |
| new_super_index, new_super->flags); |
| return false; |
| } |
| if (!old_super) { |
| return true; |
| } |
| dv = super_version_delta(new_super, old_super); |
| pr_read("version delta, %d (new flags 0x%x, old flags 0x%x)\n", dv, |
| new_super->flags, old_super->flags); |
| if (dv == 1) { |
| return true; |
| } |
| if (dv == 3) { |
| return false; |
| } |
| pr_warn("bad version delta, %d (new flags 0x%x, old flags 0x%x)\n", dv, |
| new_super->flags, old_super->flags); |
| return false; |
| } |
| |
| /** |
| * fs_set_roots - Initialize fs state from super block roots |
| * @fs: File system state object |
| * @free: Free set root node |
| * @files: Files tree root node |
| * @checkpoint: Checkpoint metadata block. May be NULL. |
| * |
| * Returns %true if fs roots were correctly initialized, %false otherwise. |
| */ |
| static bool fs_set_roots(struct fs* fs, |
| const struct block_mac* free, |
| const struct block_mac* files, |
| const struct block_mac* checkpoint) { |
| bool success = true; |
| struct transaction tr; |
| |
| fs->free.block_tree.root = *free; |
| fs->files.root = *files; |
| |
| if (checkpoint) { |
| fs->checkpoint = *checkpoint; |
| transaction_init(&tr, fs, true); |
| assert(!block_range_empty(fs->checkpoint_free.initial_range)); |
| /* |
| * fs->checkpoint_free is initialized to contain all blocks, so we don't |
| * have to initialize it if there is no checkpoint on disk |
| */ |
| if (block_mac_valid(&tr, &fs->checkpoint)) { |
| success = checkpoint_read(&tr, &fs->checkpoint, NULL, |
| &fs->checkpoint_free); |
| } |
| if (!tr.failed) { |
| /* temporary transaction is only for reading, drop it */ |
| transaction_fail(&tr); |
| } |
| transaction_free(&tr); |
| } |
| |
| return success; |
| } |
| |
| /** |
| * fs_init_free_set - Initialize an initial free set for a file system |
| * @fs: File system state object. |
| * @set: Block set to initialize |
| * |
| * Initializes @set to the entire range of @fs, i.e. all blocks are free. |
| */ |
| static void fs_init_free_set(struct fs* fs, struct block_set* set) { |
| struct block_range range = { |
| .start = fs->min_block_num, |
| .end = fs->dev->block_count, |
| }; |
| block_set_add_initial_range(set, range); |
| } |
| |
| /** |
| * fs_init_from_super - Initialize file system from super block |
| * @fs: File system state object. |
| * @super: Superblock data, or %NULL. |
| * @flags: Any of &typedef fs_init_flags32_t, ORed together. |
| * |
| * Return: 0 if super block was usable, -1 if not. |
| */ |
| static int fs_init_from_super(struct fs* fs, |
| const struct super_block* super, |
| fs_init_flags32_t flags) { |
| bool is_clear = false; |
| bool do_clear = flags & FS_INIT_FLAGS_DO_CLEAR; |
| bool do_swap = false; /* Does the active superblock alternate mode match the |
| current mode? */ |
| bool has_backup_field = |
| super && (super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_FLAGS3); |
| bool has_checkpoint_field = |
| has_backup_field && super && |
| (super->opt_flags & SUPER_BLOCK_OPT_FLAGS_HAS_CHECKPOINT); |
| bool recovery_allowed = flags & FS_INIT_FLAGS_RECOVERY_CLEAR_ALLOWED; |
| const struct block_mac* new_files_root; |
| const struct block_mac* new_free_root; |
| const struct block_mac* new_checkpoint = NULL; |
| |
| if (super && super->fs_version > SUPER_BLOCK_FS_VERSION) { |
| pr_err("ERROR: super block is from the future 0x%x\n", |
| super->fs_version); |
| return -1; |
| } |
| |
| /* |
| * We check that the super-block matches these block device params in |
| * super_block_valid(). If these params change, the filesystem (and |
| * alternate backup) will be wiped and reset with the new params. |
| */ |
| fs->block_num_size = fs->dev->block_num_size; |
| fs->mac_size = fs->dev->mac_size; |
| |
| block_set_init(fs, &fs->free); |
| fs->free.block_tree.copy_on_write = true; |
| fs_file_tree_init(fs, &fs->files); |
| fs->files.copy_on_write = true; |
| fs->files.allow_copy_on_write = true; |
| |
| memset(&fs->checkpoint, 0, sizeof(fs->checkpoint)); |
| block_set_init(fs, &fs->checkpoint_free); |
| /* |
| * checkpoint_init() will clear the checkpoint initial range if a valid |
| * checkpoint exists. |
| */ |
| fs_init_free_set(fs, &fs->checkpoint_free); |
| |
| /* Reserve 1/4 for tmp blocks plus half of the remaining space */ |
| fs->reserved_count = fs->dev->block_count / 8 * 5; |
| |
| fs->alternate_data = flags & FS_INIT_FLAGS_ALTERNATE_DATA; |
| |
| if (super) { |
| fs->super_block_version = super->flags & SUPER_BLOCK_FLAGS_VERSION_MASK; |
| |
| do_swap = !(super->flags & SUPER_BLOCK_FLAGS_ALTERNATE) != |
| !(flags & FS_INIT_FLAGS_ALTERNATE_DATA); |
| |
| if (do_swap) { |
| pr_init("Swapping super-block with alternate\n"); |
| |
| fs->backup.flags = super->flags & (SUPER_BLOCK_FLAGS_EMPTY | |
| SUPER_BLOCK_FLAGS_ALTERNATE); |
| fs->backup.free = super->free; |
| fs->backup.files = super->files; |
| fs->backup.checkpoint = super->checkpoint; |
| |
| if (!has_backup_field || |
| super->backup.flags & SUPER_BLOCK_FLAGS_EMPTY) { |
| is_clear = true; |
| } else if (has_backup_field) { |
| new_files_root = &super->backup.files; |
| new_free_root = &super->backup.free; |
| if (has_checkpoint_field) { |
| new_checkpoint = &super->backup.checkpoint; |
| } |
| } |
| } else { |
| if (has_backup_field) { |
| fs->backup = super->backup; |
| } |
| |
| if (super->flags & SUPER_BLOCK_FLAGS_EMPTY) { |
| is_clear = true; |
| } else { |
| new_files_root = &super->files; |
| new_free_root = &super->free; |
| if (has_checkpoint_field) { |
| new_checkpoint = &super->checkpoint; |
| } |
| } |
| } |
| |
| if (!is_clear && !do_clear && |
| (!block_probe(fs, new_files_root) || |
| !block_probe(fs, new_free_root))) { |
| pr_init("Backing file probe failed, fs is corrupted.\n"); |
| if (recovery_allowed) { |
| pr_init("Attempting to clear corrupted fs.\n"); |
| do_clear = true; |
| } |
| } |
| } |
| |
| /* |
| * If we are initializing a new fs or if we are not swapping but detect an |
| * old superblock without the backup slot, ensure that the backup slot is a |
| * valid empty filesystem in case we later switch filesystems without an |
| * explicit clear flag. |
| */ |
| if (!super || (!do_swap && !has_backup_field)) { |
| fs->backup = (struct super_block_backup){ |
| .flags = SUPER_BLOCK_FLAGS_EMPTY, |
| .files = {0}, |
| .free = {0}, |
| .checkpoint = {0}, |
| }; |
| } |
| |
| if (super && !is_clear && !do_clear) { |
| if (!fs_set_roots(fs, new_free_root, new_files_root, new_checkpoint)) { |
| pr_err("failed to initialize filesystem roots\n"); |
| return -1; |
| } |
| pr_init("loaded super block version %d\n", fs->super_block_version); |
| } else { |
| if (is_clear) { |
| pr_init("superblock, version %d, is empty fs\n", |
| fs->super_block_version); |
| } else if (do_clear) { |
| pr_init("clear requested, create empty, version %d\n", |
| fs->super_block_version); |
| } else { |
| pr_init("no valid super-block found, create empty\n"); |
| } |
| fs_init_free_set(fs, &fs->free); |
| } |
| assert(fs->block_num_size >= fs->dev->block_num_size); |
| assert(fs->block_num_size <= sizeof(data_block_t)); |
| assert(fs->mac_size >= fs->dev->mac_size); |
| assert(fs->mac_size <= sizeof(struct mac)); |
| assert(fs->mac_size == sizeof(struct mac) || fs->dev->tamper_detecting); |
| |
| if (do_clear && !is_clear) { |
| if (!write_initial_super_block(fs)) { |
| return -1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * load_super_block - Find and load superblock and initialize file system state |
| * @fs: File system state object. |
| * @flags: Any of &typedef fs_init_flags32_t, ORed together. |
| * |
| * Return: 0 if super block was readable and not from a future file system |
| * version (regardless of its other content), -1 if not. |
| */ |
| static int load_super_block(struct fs* fs, fs_init_flags32_t flags) { |
| unsigned int i; |
| int ret; |
| const struct super_block* new_super; |
| struct obj_ref new_super_ref = OBJ_REF_INITIAL_VALUE(new_super_ref); |
| const struct super_block* old_super = NULL; |
| struct obj_ref old_super_ref = OBJ_REF_INITIAL_VALUE(old_super_ref); |
| |
| assert(fs->super_dev->block_size >= sizeof(struct super_block)); |
| |
| for (i = 0; i < countof(fs->super_block); i++) { |
| new_super = block_get_super(fs, fs->super_block[i], &new_super_ref); |
| if (!new_super) { |
| if (fs->allow_tampering) { |
| /* |
| * Superblock may not exist yet in non-secure storage, proceed |
| * anyway |
| */ |
| continue; |
| } |
| pr_err("failed to read super-block\n"); |
| ret = -1; // -EIO ? ERR_IO?; |
| goto err; |
| } |
| if (use_new_super(fs->dev, new_super, i, old_super)) { |
| if (old_super) { |
| block_put(old_super, &old_super_ref); |
| } |
| old_super = new_super; |
| obj_ref_transfer(&old_super_ref, &new_super_ref); |
| } else { |
| block_put(new_super, &new_super_ref); |
| } |
| } |
| |
| ret = fs_init_from_super(fs, old_super, flags); |
| err: |
| if (old_super) { |
| block_put(old_super, &old_super_ref); |
| } |
| return ret; |
| } |
| |
| struct fs_check_state { |
| struct file_iterate_state iter; |
| bool check_all_data_blocks; |
| bool delete_invalid_files; |
| |
| bool internal_state_valid; |
| }; |
| |
| static bool fs_check_delete_file(struct fs* fs, char* path) { |
| struct transaction tr; |
| bool ret; |
| |
| pr_err("deleting invalid file %s\n", path); |
| transaction_init(&tr, fs, true); |
| if (!file_delete(&tr, path)) { |
| if (!tr.failed) { |
| transaction_fail(&tr); |
| } |
| goto err_delete; |
| } |
| transaction_complete_etc(&tr, false); |
| |
| err_delete: |
| ret = !tr.failed; |
| transaction_free(&tr); |
| return ret; |
| } |
| |
| static bool fs_check_file(struct file_iterate_state* iter, |
| struct transaction* tr, |
| const struct block_mac* block_mac, |
| bool added, |
| bool removed) { |
| struct fs_check_state* fs_check_state = |
| containerof(iter, struct fs_check_state, iter); |
| struct obj_ref info_ref = OBJ_REF_INITIAL_VALUE(info_ref); |
| struct obj_ref data_ref = OBJ_REF_INITIAL_VALUE(data_ref); |
| struct file_handle file; |
| const void* data = NULL; |
| char path[FS_PATH_MAX]; |
| bool needs_delete = false; |
| |
| const struct file_info* info = file_get_info(tr, block_mac, &info_ref); |
| if (!info) { |
| pr_err("could not get file info at block %" PRIu64 "\n", |
| block_mac_to_block(tr, block_mac)); |
| fs_check_state->internal_state_valid = false; |
| goto err_file_info; |
| } |
| strncpy(path, info->path, sizeof(path)); |
| path[sizeof(path) - 1] = '\0'; |
| file_info_put(info, &info_ref); |
| |
| bool opened = file_open(tr, path, &file, FILE_OPEN_NO_CREATE); |
| if (!opened) { |
| /* TODO: is it ok to leak the filename here? we do it elsewhere */ |
| pr_err("could not open file %s\n", path); |
| needs_delete = true; |
| goto err_file_open; |
| } |
| |
| data_block_t data_check_count = fs_check_state->check_all_data_blocks |
| ? file.size |
| : MIN(1, file.size); |
| |
| for (data_block_t i = 0; i < data_check_count; ++i) { |
| data = file_get_block(tr, &file, i, &data_ref); |
| if (data) { |
| file_block_put(data, &data_ref); |
| data = NULL; |
| } else { |
| /* TODO: is it ok to leak the filename here? we do it elsewhere */ |
| pr_err("invalid file data at block %" PRIu64 " of file %s\n", i, |
| path); |
| assert(tr->failed); |
| needs_delete = true; |
| break; |
| } |
| } |
| |
| file_close(&file); |
| |
| err_file_open: |
| if (needs_delete) { |
| if (fs_check_state->delete_invalid_files) { |
| if (!fs_check_delete_file(tr->fs, path)) { |
| pr_err("delete failed, internal state is corrupted\n"); |
| fs_check_state->internal_state_valid = false; |
| } |
| } else { |
| fs_check_state->internal_state_valid = false; |
| } |
| } |
| err_file_info: |
| if (tr->failed) { |
| transaction_activate(tr); |
| } |
| |
| /* Continue iterating unconditionally */ |
| return false; |
| } |
| |
| bool fs_check(struct fs* fs, |
| bool delete_invalid_files, |
| bool check_all_data_blocks) { |
| struct transaction iterate_tr; |
| struct fs_check_state state = { |
| .iter.file = fs_check_file, |
| .check_all_data_blocks = check_all_data_blocks, |
| .delete_invalid_files = delete_invalid_files, |
| .internal_state_valid = true, |
| }; |
| |
| transaction_init(&iterate_tr, fs, true); |
| file_iterate(&iterate_tr, NULL, false, &state.iter); |
| if (iterate_tr.failed) { |
| state.internal_state_valid = false; |
| goto finished; |
| } |
| |
| /* Check the free list for consistency */ |
| if (!block_set_check(&iterate_tr, &fs->free) || iterate_tr.failed) { |
| pr_err("free block set is corrupted\n"); |
| state.internal_state_valid = false; |
| } |
| |
| finished: |
| if (!iterate_tr.failed) { |
| transaction_fail(&iterate_tr); |
| } |
| transaction_free(&iterate_tr); |
| |
| return state.internal_state_valid; |
| } |
| |
| /** |
| * fs_file_tree_init - Initialize an empty file tree for a file system |
| * @fs: File system state object. |
| * @tree: Block tree to initialize as a file tree. |
| */ |
| void fs_file_tree_init(const struct fs* fs, struct block_tree* tree) { |
| size_t block_num_size; |
| size_t block_mac_size; |
| |
| block_num_size = fs->block_num_size; |
| block_mac_size = block_num_size + fs->mac_size; |
| block_tree_init(tree, fs->dev->block_size, block_num_size, block_mac_size, |
| block_mac_size); |
| } |
| |
| /** |
| * fs_init - Initialize file system state |
| * @fs: File system state object. |
| * @key: Key pointer. Must not be freed while @fs is in use. |
| * @dev: Main block device. |
| * @super_dev: Block device for super block. |
| * @flags: Any of &typedef fs_init_flags32_t, ORed together. |
| */ |
| int fs_init(struct fs* fs, |
| const struct key* key, |
| struct block_device* dev, |
| struct block_device* super_dev, |
| fs_init_flags32_t flags) { |
| int ret; |
| |
| if (super_dev->block_size < sizeof(struct super_block)) { |
| pr_err("unsupported block size for super_dev, %zd < %zd\n", |
| super_dev->block_size, sizeof(struct super_block)); |
| return -1; // ERR_NOT_VALID? |
| } |
| |
| if (super_dev->block_count < 2) { |
| pr_err("unsupported block count for super_dev, %" PRIu64 "\n", |
| super_dev->block_count); |
| return -1; // ERR_NOT_VALID? |
| } |
| |
| fs->key = key; |
| fs->dev = dev; |
| fs->super_dev = super_dev; |
| fs->allow_tampering = flags & FS_INIT_FLAGS_ALLOW_TAMPERING; |
| list_initialize(&fs->transactions); |
| list_initialize(&fs->allocated); |
| fs->initial_super_block_tr = NULL; |
| list_add_tail(&fs_list, &fs->node); |
| |
| if (dev == super_dev) { |
| fs->min_block_num = 2; |
| } else { |
| /* TODO: use 0 when btree code allows it */ |
| fs->min_block_num = 1; |
| } |
| fs->super_block[0] = 0; |
| fs->super_block[1] = 1; |
| ret = load_super_block(fs, flags); |
| if (ret) { |
| fs_destroy(fs); |
| fs->dev = NULL; |
| fs->super_dev = NULL; |
| return ret; |
| } |
| |
| return 0; |
| } |
| |
| /** |
| * fs_destroy - Destroy file system state |
| * @fs: File system state object. |
| * |
| * Free any dynamically allocated state and check that @fs is not referenced by |
| * any transactions. |
| */ |
| void fs_destroy(struct fs* fs) { |
| if (fs->initial_super_block_tr) { |
| if (!fs->initial_super_block_tr->failed) { |
| transaction_fail(fs->initial_super_block_tr); |
| } |
| transaction_free(fs->initial_super_block_tr); |
| free(fs->initial_super_block_tr); |
| fs->initial_super_block_tr = NULL; |
| } |
| assert(list_is_empty(&fs->transactions)); |
| assert(list_is_empty(&fs->allocated)); |
| list_delete(&fs->node); |
| } |
| |
| /** |
| * fs_unknown_super_block_state_all - Notify filesystems of unknown disk state |
| * |
| * Call from other layers when detecting write failues that can cause the |
| * in-memory state of super blocks (or other block that we don't care about) to |
| * be different from the on-disk state. Write in-memory state to disk before |
| * writing any other block. |
| */ |
| void fs_unknown_super_block_state_all(void) { |
| struct fs* fs; |
| list_for_every_entry(&fs_list, fs, struct fs, node) { |
| /* TODO: filter out filesystems that are not affected? */ |
| /* |
| * We can't reinitialize an existing, failed special transaction here. |
| * If a initial superblock write failed and triggered |
| * fs_unknown_super_block_state_all() we need to leave that superblock |
| * transaction in a failed state so that the transaction that that |
| * triggered the failing write can also be failed further up the call |
| * chain. If a special transaction already exists we are guaranteed that |
| * it will be reinitialized and flushed to disk before any new writes to |
| * that FS, so we don't need to reinitialize it here. |
| */ |
| write_current_super_block(fs, false /* reinitialize */); |
| } |
| } |
| |
| void fs_fail_all_transactions(void) { |
| struct transaction* tmp_tr; |
| struct transaction* tr; |
| struct fs* fs; |
| list_for_every_entry(&fs_list, fs, struct fs, node) { |
| list_for_every_entry_safe(&fs->transactions, tr, tmp_tr, |
| struct transaction, node) { |
| if (transaction_is_active(tr) && !tr->failed) { |
| transaction_fail(tr); |
| } |
| } |
| } |
| } |