storage: Re-sync superblock state on superblock write errors.
Instead of aborting when detecting write errors that leaves us in a
state were the on-disk super block state is unknown, rewrite the
in-memory version before any other writes.
Bug: 193801567
Change-Id: I2a145b138c9df5693f76ecde449687f3c8b10f8f
diff --git a/fs.h b/fs.h
index bff3e64..2f955a4 100644
--- a/fs.h
+++ b/fs.h
@@ -37,6 +37,7 @@
/**
* struct fs - File system state
+ * @node: List node for fs_list.
* @dev: Main block device.
* @transactions: Transaction list.
* @allocated: List of block sets containing blocks
@@ -63,6 +64,7 @@
*/
struct fs {
+ struct list_node node;
struct block_device* dev;
struct list_node transactions;
struct list_node allocated;
@@ -90,4 +92,6 @@
struct block_device* super_dev,
bool clear);
+void fs_unknown_super_block_state_all(void);
+
void fs_destroy(struct fs* fs);
diff --git a/super.c b/super.c
index e6dbc4b..3f57721 100644
--- a/super.c
+++ b/super.c
@@ -92,6 +92,8 @@
STATIC_ASSERT(sizeof(struct super_block) <= 128);
STATIC_ASSERT(sizeof(struct super_block) >= 128);
+static struct list_node fs_list = LIST_INITIAL_VALUE(fs_list);
+
/**
* update_super_block - Generate and write superblock
* @tr: Transaction object.
@@ -112,6 +114,8 @@
uint32_t block_size = tr->fs->super_dev->block_size;
assert(block_size >= sizeof(struct super_block));
+ assert(tr->fs->initial_super_block_tr == NULL ||
+ tr->fs->initial_super_block_tr == tr);
ver = (tr->fs->super_block_version + 1) & SUPER_BLOCK_FLAGS_VERSION_MASK;
index = ver & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK;
@@ -184,6 +188,47 @@
}
/**
+ * write_current_super_block - Write current superblock to internal transaction
+ * @fs: File system state object.
+ *
+ * Write the current state of the super block to an internal transaction that
+ * will be written before any other block. This can be used to re-sync the
+ * in-memory fs-state with the on-disk state after detecting a write failure
+ * where no longer know the on-disk super block state.
+ */
+static void write_current_super_block(struct fs* fs) {
+ bool super_block_updated;
+ struct transaction* tr;
+
+ if (fs->initial_super_block_tr) {
+ /*
+ * If initial_super_block_tr is already set there is no need to allocate
+ * a new one so return early.
+ *
+ * Currently initial_super_block_tr can point to a failed transaction.
+ * If that is the case @fs will never be write-able again.
+ * TODO: Make sure initial_super_block_tr does not stay in a failed
+ * state.
+ */
+ return;
+ }
+ tr = calloc(1, sizeof(*tr));
+ if (!tr) {
+ /* Not safe to proceed. TODO: add flag to defer this allocation? */
+ abort();
+ }
+ fs->initial_super_block_tr = tr;
+
+ transaction_init(tr, fs, true);
+ super_block_updated =
+ update_super_block(tr, &fs->free.block_tree.root, &fs->files.root);
+ if (!super_block_updated) {
+ /* Not safe to proceed. TODO: add flag to try again? */
+ abort();
+ }
+}
+
+/**
* super_block_valid - Check if superblock is valid
* @dev: Block device that supoer block was read from.
* @super: Super block data.
@@ -464,6 +509,7 @@
list_initialize(&fs->transactions);
list_initialize(&fs->allocated);
fs->initial_super_block_tr = NULL;
+ list_add_tail(&fs_list, &fs->node);
if (dev == super_dev) {
fs->min_block_num = 2;
@@ -500,4 +546,21 @@
}
assert(list_is_empty(&fs->transactions));
assert(list_is_empty(&fs->allocated));
+ list_delete(&fs->node);
+}
+
+/**
+ * fs_unknown_super_block_state_all - Notify filesystems of unknown disk state
+ *
+ * Call from other layers when detecting write failues that can cause the
+ * in-memory state of super blocks (or other block that we don't care about) to
+ * be different from the on-disk state. Write in-memory state to disk before
+ * writing any other block.
+ */
+void fs_unknown_super_block_state_all(void) {
+ struct fs* fs;
+ list_for_every_entry(&fs_list, fs, struct fs, node) {
+ /* TODO: filter out filesystems that are not affected? */
+ write_current_super_block(fs);
+ }
}
diff --git a/test/block_test.c b/test/block_test.c
index 23efb97..266bfbc 100644
--- a/test/block_test.c
+++ b/test/block_test.c
@@ -105,6 +105,8 @@
static bool print_test_verbose = false;
static bool print_block_tree_test_verbose = false;
+data_block_t block_test_fail_write_blocks;
+
static void block_test_start_read(struct block_device* dev,
data_block_t block) {
assert(dev->block_size <= BLOCK_SIZE);
@@ -120,7 +122,8 @@
assert(block < countof(blocks));
assert(data_size <= sizeof(blocks[block].data));
memcpy(blocks[block].data, data, data_size);
- block_cache_complete_write(dev, block, false);
+ block_cache_complete_write(dev, block,
+ block < block_test_fail_write_blocks);
}
#if FULL_ASSERT
@@ -847,6 +850,20 @@
transaction_activate(tr);
}
+static void super_block_write_failure_test(struct transaction* tr) {
+ data_block_t block1 = block_allocate(tr);
+ /* trigger a superblock write failure */
+ block_test_fail_write_blocks = 2;
+ transaction_complete(tr);
+ block_test_fail_write_blocks = 0;
+ assert(tr->failed);
+ transaction_activate(tr);
+ assert(block_allocate(tr) == block1);
+ transaction_complete(tr);
+ transaction_activate(tr);
+ block_free(tr, block1);
+}
+
static void open_test_file_etc(struct transaction* tr,
struct file_handle* file,
const char* path,
@@ -1746,6 +1763,7 @@
TEST(free_2_transactions_same_test_2),
TEST(allocate_all_test),
TEST(block_tree_allocate_all_test),
+ TEST(super_block_write_failure_test),
TEST(file_create1_small_test),
TEST(file_write1_small_test),
TEST(file_delete1_small_test),
diff --git a/transaction.c b/transaction.c
index dc0faef..9e88714 100644
--- a/transaction.c
+++ b/transaction.c
@@ -294,6 +294,28 @@
assert(block_range_empty(new_free_set.initial_range));
check_free_tree(tr, &new_free_set);
+ if (block_mac_same_block(tr, &tr->fs->free.block_tree.root,
+ &new_free_set.block_tree.root)) {
+ /*
+ * If the root block of the free tree did not move, there can be no
+ * other changes to the filesystem.
+ */
+ assert(block_mac_eq(tr, &tr->fs->free.block_tree.root,
+ &new_free_set.block_tree.root));
+ assert(block_mac_eq(tr, &tr->fs->files.root, &new_files));
+
+ /*
+ * Skip super block write if there are no changes to the filesystem.
+ * This is needed in case a previous write error has triggered a request
+ * to write another copy of the old super block. There can only be one
+ * copy of each block in the cache. If we try to write a new super block
+ * here before cleaning the pending one, we get a conflict. If there
+ * were changes to the filesystem, the pending super block has already
+ * been cleaned at this point.
+ */
+ goto complete_nop_transaction;
+ }
+
super_block_updated =
update_super_block(tr, &new_free_set.block_tree.root, &new_files);
if (!super_block_updated) {
@@ -307,7 +329,19 @@
* If an error was detected writing the super block, it is not safe to
* continue as we do not know if the write completed.
*/
- assert(!tr->failed);
+ if (tr->failed) {
+ pr_warn("failed to write super block, notify fs and abort\n");
+ /*
+ * Superblock could have been written or not. Make sure no other blocks
+ * are written to the filesystem before writing another copy of the
+ * superblock with the existing file and free trees.
+ *
+ * TODO: Don't trigger a superblock write on unaffected filesystems.
+ * We update all for now to simplify testing.
+ */
+ fs_unknown_super_block_state_all();
+ goto err_transaction_failed;
+ }
tr->fs->free.block_tree.root = new_free_set.block_tree.root;
block_range_clear(
@@ -316,6 +350,7 @@
tr->fs->files.root = new_files;
tr->fs->super_block_version = tr->fs->written_super_block_version;
+complete_nop_transaction:
transaction_delete_active(tr);
tr->complete = true;