storage: Re-sync superblock state on superblock write errors.

Instead of aborting when detecting write errors that leaves us in a
state were the on-disk super block state is unknown, rewrite the
in-memory version before any other writes.

Bug: 193801567
Change-Id: I2a145b138c9df5693f76ecde449687f3c8b10f8f
diff --git a/fs.h b/fs.h
index bff3e64..2f955a4 100644
--- a/fs.h
+++ b/fs.h
@@ -37,6 +37,7 @@
 
 /**
  * struct fs - File system state
+ * @node:                           List node for fs_list.
  * @dev:                            Main block device.
  * @transactions:                   Transaction list.
  * @allocated:                      List of block sets containing blocks
@@ -63,6 +64,7 @@
  */
 
 struct fs {
+    struct list_node node;
     struct block_device* dev;
     struct list_node transactions;
     struct list_node allocated;
@@ -90,4 +92,6 @@
             struct block_device* super_dev,
             bool clear);
 
+void fs_unknown_super_block_state_all(void);
+
 void fs_destroy(struct fs* fs);
diff --git a/super.c b/super.c
index e6dbc4b..3f57721 100644
--- a/super.c
+++ b/super.c
@@ -92,6 +92,8 @@
 STATIC_ASSERT(sizeof(struct super_block) <= 128);
 STATIC_ASSERT(sizeof(struct super_block) >= 128);
 
+static struct list_node fs_list = LIST_INITIAL_VALUE(fs_list);
+
 /**
  * update_super_block - Generate and write superblock
  * @tr:         Transaction object.
@@ -112,6 +114,8 @@
     uint32_t block_size = tr->fs->super_dev->block_size;
 
     assert(block_size >= sizeof(struct super_block));
+    assert(tr->fs->initial_super_block_tr == NULL ||
+           tr->fs->initial_super_block_tr == tr);
 
     ver = (tr->fs->super_block_version + 1) & SUPER_BLOCK_FLAGS_VERSION_MASK;
     index = ver & SUPER_BLOCK_FLAGS_BLOCK_INDEX_MASK;
@@ -184,6 +188,47 @@
 }
 
 /**
+ * write_current_super_block - Write current superblock to internal transaction
+ * @fs:         File system state object.
+ *
+ * Write the current state of the super block to an internal transaction that
+ * will be written before any other block. This can be used to re-sync the
+ * in-memory fs-state with the on-disk state after detecting a write failure
+ * where no longer know the on-disk super block state.
+ */
+static void write_current_super_block(struct fs* fs) {
+    bool super_block_updated;
+    struct transaction* tr;
+
+    if (fs->initial_super_block_tr) {
+        /*
+         * If initial_super_block_tr is already set there is no need to allocate
+         * a new one so return early.
+         *
+         * Currently initial_super_block_tr can point to a failed transaction.
+         * If that is the case @fs will never be write-able again.
+         * TODO: Make sure initial_super_block_tr does not stay in a failed
+         * state.
+         */
+        return;
+    }
+    tr = calloc(1, sizeof(*tr));
+    if (!tr) {
+        /* Not safe to proceed. TODO: add flag to defer this allocation? */
+        abort();
+    }
+    fs->initial_super_block_tr = tr;
+
+    transaction_init(tr, fs, true);
+    super_block_updated =
+            update_super_block(tr, &fs->free.block_tree.root, &fs->files.root);
+    if (!super_block_updated) {
+        /* Not safe to proceed. TODO: add flag to try again? */
+        abort();
+    }
+}
+
+/**
  * super_block_valid - Check if superblock is valid
  * @dev:        Block device that supoer block was read from.
  * @super:      Super block data.
@@ -464,6 +509,7 @@
     list_initialize(&fs->transactions);
     list_initialize(&fs->allocated);
     fs->initial_super_block_tr = NULL;
+    list_add_tail(&fs_list, &fs->node);
 
     if (dev == super_dev) {
         fs->min_block_num = 2;
@@ -500,4 +546,21 @@
     }
     assert(list_is_empty(&fs->transactions));
     assert(list_is_empty(&fs->allocated));
+    list_delete(&fs->node);
+}
+
+/**
+ * fs_unknown_super_block_state_all - Notify filesystems of unknown disk state
+ *
+ * Call from other layers when detecting write failues that can cause the
+ * in-memory state of super blocks (or other block that we don't care about) to
+ * be different from the on-disk state. Write in-memory state to disk before
+ * writing any other block.
+ */
+void fs_unknown_super_block_state_all(void) {
+    struct fs* fs;
+    list_for_every_entry(&fs_list, fs, struct fs, node) {
+        /* TODO: filter out filesystems that are not affected? */
+        write_current_super_block(fs);
+    }
 }
diff --git a/test/block_test.c b/test/block_test.c
index 23efb97..266bfbc 100644
--- a/test/block_test.c
+++ b/test/block_test.c
@@ -105,6 +105,8 @@
 static bool print_test_verbose = false;
 static bool print_block_tree_test_verbose = false;
 
+data_block_t block_test_fail_write_blocks;
+
 static void block_test_start_read(struct block_device* dev,
                                   data_block_t block) {
     assert(dev->block_size <= BLOCK_SIZE);
@@ -120,7 +122,8 @@
     assert(block < countof(blocks));
     assert(data_size <= sizeof(blocks[block].data));
     memcpy(blocks[block].data, data, data_size);
-    block_cache_complete_write(dev, block, false);
+    block_cache_complete_write(dev, block,
+                               block < block_test_fail_write_blocks);
 }
 
 #if FULL_ASSERT
@@ -847,6 +850,20 @@
     transaction_activate(tr);
 }
 
+static void super_block_write_failure_test(struct transaction* tr) {
+    data_block_t block1 = block_allocate(tr);
+    /* trigger a superblock write failure */
+    block_test_fail_write_blocks = 2;
+    transaction_complete(tr);
+    block_test_fail_write_blocks = 0;
+    assert(tr->failed);
+    transaction_activate(tr);
+    assert(block_allocate(tr) == block1);
+    transaction_complete(tr);
+    transaction_activate(tr);
+    block_free(tr, block1);
+}
+
 static void open_test_file_etc(struct transaction* tr,
                                struct file_handle* file,
                                const char* path,
@@ -1746,6 +1763,7 @@
         TEST(free_2_transactions_same_test_2),
         TEST(allocate_all_test),
         TEST(block_tree_allocate_all_test),
+        TEST(super_block_write_failure_test),
         TEST(file_create1_small_test),
         TEST(file_write1_small_test),
         TEST(file_delete1_small_test),
diff --git a/transaction.c b/transaction.c
index dc0faef..9e88714 100644
--- a/transaction.c
+++ b/transaction.c
@@ -294,6 +294,28 @@
     assert(block_range_empty(new_free_set.initial_range));
     check_free_tree(tr, &new_free_set);
 
+    if (block_mac_same_block(tr, &tr->fs->free.block_tree.root,
+                             &new_free_set.block_tree.root)) {
+        /*
+         * If the root block of the free tree did not move, there can be no
+         * other changes to the filesystem.
+         */
+        assert(block_mac_eq(tr, &tr->fs->free.block_tree.root,
+                            &new_free_set.block_tree.root));
+        assert(block_mac_eq(tr, &tr->fs->files.root, &new_files));
+
+        /*
+         * Skip super block write if there are no changes to the filesystem.
+         * This is needed in case a previous write error has triggered a request
+         * to write another copy of the old super block. There can only be one
+         * copy of each block in the cache. If we try to write a new super block
+         * here before cleaning the pending one, we get a conflict. If there
+         * were changes to the filesystem, the pending super block has already
+         * been cleaned at this point.
+         */
+        goto complete_nop_transaction;
+    }
+
     super_block_updated =
             update_super_block(tr, &new_free_set.block_tree.root, &new_files);
     if (!super_block_updated) {
@@ -307,7 +329,19 @@
      * If an error was detected writing the super block, it is not safe to
      * continue as we do not know if the write completed.
      */
-    assert(!tr->failed);
+    if (tr->failed) {
+        pr_warn("failed to write super block, notify fs and abort\n");
+        /*
+         * Superblock could have been written or not. Make sure no other blocks
+         * are written to the filesystem before writing another copy of the
+         * superblock with the existing file and free trees.
+         *
+         * TODO: Don't trigger a superblock write on unaffected filesystems.
+         * We update all for now to simplify testing.
+         */
+        fs_unknown_super_block_state_all();
+        goto err_transaction_failed;
+    }
 
     tr->fs->free.block_tree.root = new_free_set.block_tree.root;
     block_range_clear(
@@ -316,6 +350,7 @@
     tr->fs->files.root = new_files;
     tr->fs->super_block_version = tr->fs->written_super_block_version;
 
+complete_nop_transaction:
     transaction_delete_active(tr);
     tr->complete = true;