blob: 913a16386a0480d4791df2956b435478bd294717 [file] [log] [blame]
/*
* Copyright (C) 2015-2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <assert.h>
#include <inttypes.h>
#include <lk/compiler.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "array.h"
#include "block_allocator.h"
#include "block_set.h"
#include "checkpoint.h"
#include "debug.h"
#include "file.h"
#include "transaction.h"
bool print_merge_free;
/**
* transaction_check_free
* @tr: Transaction object.
* @set: Free set to check.
* @min_free Number of block that must be in @set
*
* Return: %true is @set contains @min_free or more blocks, %false otherwise.
*/
static bool transaction_check_free(struct transaction* tr,
struct block_set* set,
data_block_t min_free) {
data_block_t next_block;
struct block_range free_range;
data_block_t count;
next_block = 0;
while (true) {
free_range = block_set_find_next_range(tr, set, next_block);
if (block_range_empty(free_range)) {
return false;
}
count = free_range.end - free_range.start;
if (count >= min_free) {
return true;
}
min_free -= count;
next_block = free_range.end;
}
}
/**
* transaction_merge_free_sets
* @tr: Transaction object.
* @new_set: Output set.
* @set_i: Initial set.
* @set_d: Set of blocks to delete.
* @set_a: Set of blocks to add.
*
* Helper function to update the free_set when committing a transaction.
* @new_set = @set_i - @set_d - @new_set-blocks + @set_a + set_[ida]-blocks
* @new_set must start empty and will be initialized and filled in this
* function.
*/
static void transaction_merge_free_sets(struct transaction* tr,
struct block_set* new_set,
struct block_set* set_i,
struct block_set* set_d,
struct block_set* set_a) {
data_block_t next_block;
struct block_range delete_range = BLOCK_RANGE_INITIAL_VALUE(delete_range);
struct block_range add_range = BLOCK_RANGE_INITIAL_VALUE(add_range);
/* new_set should start empty. */
assert(block_set_find_next_block(tr, new_set, 1, true) == 0);
block_set_copy(tr, new_set, set_i);
full_assert(block_set_check(tr, set_i));
full_assert(block_set_check(tr, set_d));
full_assert(block_set_check(tr, set_a));
assert(!block_mac_valid(tr, &set_i->block_tree.root) ||
transaction_block_need_copy(
tr, block_mac_to_block(tr, &set_i->block_tree.root)));
if (print_merge_free) {
printf("%s\n", __func__);
}
/* TODO: don't walk the whole tree each time */
next_block = 1;
while (next_block != 0) {
tr->min_free_block = next_block;
delete_range = block_set_find_next_range(tr, set_d, next_block);
add_range = block_set_find_next_range(tr, set_a, next_block);
if (print_merge_free) {
printf("%s: add %" PRIu64 "-%" PRIu64 " or delete %" PRIu64
"-%" PRIu64 "\n",
__func__, add_range.start, add_range.end - 1,
delete_range.start, delete_range.end - 1);
}
assert(!block_range_overlap(delete_range, add_range));
if (block_range_before(delete_range, add_range)) {
assert(delete_range.start >= next_block);
tr->min_free_block = delete_range.end;
block_allocator_suspend_set_updates(tr);
block_set_remove_range(tr, new_set, delete_range);
block_allocator_process_queue(tr);
next_block = delete_range.end;
} else if (!block_range_empty(add_range)) {
assert(add_range.start >= next_block);
tr->min_free_block = add_range.end;
block_allocator_suspend_set_updates(tr);
block_set_add_range(tr, new_set, add_range);
block_allocator_process_queue(tr);
next_block = add_range.end;
} else {
assert(block_range_empty(delete_range));
assert(block_range_empty(add_range));
next_block = 0;
}
if (tr->failed) {
pr_warn("transaction failed, abort\n");
return;
}
}
full_assert(block_set_check(tr, new_set));
}
/** transaction_rebuild_free_set - Rebuild free set from referenced file blocks
* @tr: Transaction object.
* @new_free_set: Output free set.
* @files: Root block and mac of the files tree.
* @checkpoint: Checkpoint metadata block and mac
*
* Rebuilds the file system free set by walking the current files tree and
* ensuring that all referenced blocks are marked as not free. @new_free_set
* will be initialized to contain all blocks not referenced from the files root.
* The @checkpoint metadata block will also be removed from the free set, but
* its children (checkpoint files tree and free set) will not be checked. The
* blocks in the checkpoint (beside the metadata block) are tracked as
* free/allocated by the checkpoint free set rather than the active file system
* free set.
*
* We ignore tr->freed and tr->fs->free here because we are reconstructing the
* entire free set. All blocks that were freed in this transaction will not be
* referenced by @new_files.
*/
static void transaction_rebuild_free_set(struct transaction* tr,
struct block_set* new_free_set,
struct block_mac* new_files,
struct block_mac* new_checkpoint) {
struct block_range init_range = {
.start = tr->fs->min_block_num,
.end = tr->fs->dev->block_count,
};
struct block_range range;
struct block_set previously_allocated =
BLOCK_SET_INITIAL_VALUE(previously_allocated);
/*
* Copy and save tr->allocated so that we can keep track of the blocks
* already allocated for the current transaction when performing allocations
* for the new free set tree nodes. We then reset tr->allocated so that it
* will only hold new blocks allocated for new_free_set. All blocks
* allocated for files will already be referenced in new_files, so we'll
* already be removing them from new_free_set.
*/
assert(list_in_list(&tr->allocated.node));
list_delete(&tr->allocated.node);
block_set_copy_ro(tr, &previously_allocated, &tr->allocated);
list_add_tail(&tr->fs->allocated, &previously_allocated.node);
block_set_init(tr->fs, &tr->allocated);
list_add_tail(&tr->fs->allocated, &tr->allocated.node);
block_set_init(tr->fs, new_free_set);
new_free_set->block_tree.copy_on_write = true;
new_free_set->block_tree.allow_copy_on_write = true;
block_set_add_range(tr, new_free_set, init_range);
if (block_mac_valid(tr, new_checkpoint)) {
block_set_remove_block(tr, new_free_set,
block_mac_to_block(tr, new_checkpoint));
}
if (tr->failed) {
pr_warn("transaction failed, abort\n");
return;
}
if (block_mac_valid(tr, new_files)) {
files_rebuild_free_set(tr, new_free_set, new_files);
if (tr->failed) {
pr_warn("transaction failed, abort\n");
return;
}
}
for (range = block_set_find_next_range(tr, &tr->allocated, 1);
!block_range_empty(range);
range = block_set_find_next_range(tr, &tr->allocated, range.end)) {
tr->min_free_block = range.end;
block_allocator_suspend_set_updates(tr);
block_set_remove_range(tr, new_free_set, range);
block_allocator_process_queue(tr);
}
/*
* Copy the rest of the allocated blocks back to tr->allocated to maintain a
* consistent state. We don't actually need to do this with the current code
* calling this function, but this restores the transaction state to what
* would be expected if it were to be used in the future.
*/
for (range = block_set_find_next_range(tr, &previously_allocated, 1);
!block_range_empty(range);
range = block_set_find_next_range(tr, &previously_allocated,
range.end)) {
block_set_add_range(tr, &tr->allocated, range);
}
list_delete(&previously_allocated.node);
full_assert(block_set_check(tr, new_free_set));
}
/**
* transaction_block_need_copy - Check if block needs copy
* @tr: Transaction object.
* @block: Block number to check.
*
* Return: %true if block has not been allocated as a non-tmp block for @tr,
* %false otherwise.
*/
bool transaction_block_need_copy(struct transaction* tr, data_block_t block) {
assert(block);
assert(!block_set_block_in_set(tr, &tr->tmp_allocated, block));
assert(!block_allocator_allocation_queued(tr, block, true));
return !block_set_block_in_set(tr, &tr->allocated, block) &&
!block_allocator_allocation_queued(tr, block, false);
}
/**
* transaction_delete_active - Remove transaction from active lists (internal)
* @tr: Transaction object.
*/
static void transaction_delete_active(struct transaction* tr) {
assert(list_in_list(&tr->allocated.node));
assert(list_in_list(&tr->tmp_allocated.node));
list_delete(&tr->allocated.node);
list_delete(&tr->tmp_allocated.node);
}
/**
* transaction_fail - Fail transaction
* @tr: Transaction object.
*
* Marks transaction as failed, removes it from active list, discards dirty
* cache entries and restore open files to last committed state.
*/
void transaction_fail(struct transaction* tr) {
assert(!tr->failed);
tr->failed = true;
if (tr->complete) {
return;
}
block_cache_discard_transaction(tr, true);
transaction_delete_active(tr);
file_transaction_failed(tr);
}
/**
* transaction_free - Free transaction
* @tr: Transaction object.
*
* Prepare @tr for free. @tr must not be active and all open files must already
* be closed.
*/
void transaction_free(struct transaction* tr) {
assert(!transaction_is_active(tr));
assert(list_is_empty(&tr->open_files));
assert(list_in_list(&tr->node));
list_delete(&tr->node);
}
/**
* check_free_tree - Check tree of free set (internal)
* @tr: Transaction object.
* @free: Set object.
*
* Check that the blocks used by the tree for a free set are not in the same
* set.
*/
static void check_free_tree(struct transaction* tr, struct block_set* free) {
unsigned int i;
struct block_tree_path path;
block_tree_walk(tr, &free->block_tree, 0, true, &path);
while (block_tree_path_get_key(&path)) {
for (i = 0; i < path.count; i++) {
assert(!block_set_block_in_set(
tr, free,
block_mac_to_block(tr, &path.entry[i].block_mac)));
}
block_tree_path_next(&path);
}
}
/**
* transaction_complete - Complete transaction, optionally updating checkpoint
* @tr: Transaction object.
* @update_checkpoint: If true, update checkpoint with the new file-system
* state.
*/
void transaction_complete_etc(struct transaction* tr, bool update_checkpoint) {
struct block_mac new_files;
struct transaction* tmp_tr;
struct transaction* other_tr;
struct block_set new_free_set = BLOCK_SET_INITIAL_VALUE(new_free_set);
struct checkpoint* new_checkpoint = NULL;
struct block_mac new_checkpoint_mac;
struct obj_ref new_checkpoint_ref =
OBJ_REF_INITIAL_VALUE(new_checkpoint_ref);
bool super_block_updated;
assert(tr->fs);
assert(!tr->complete);
if (tr->fs->checkpoint_required) {
tr->fs->checkpoint_required = false;
if (!checkpoint_commit(tr->fs)) {
/*
* checkpoint creation failed, so we need to try again before we
* commit the next transaction
*/
tr->fs->checkpoint_required = true;
transaction_fail(tr);
pr_warn("auto-checkpoint failed, abort\n");
goto err_transaction_failed;
}
}
// printf("%s: %" PRIu64 "\n", __func__, tr->version);
block_mac_copy(tr, &new_checkpoint_mac, &tr->fs->checkpoint);
if (tr->failed) {
pr_warn("transaction failed, abort\n");
goto err_transaction_failed;
}
assert(transaction_is_active(tr));
file_transaction_complete(tr, &new_files);
if (tr->failed) {
pr_warn("transaction failed, abort\n");
goto err_transaction_failed;
}
if (update_checkpoint) {
new_checkpoint = checkpoint_get_new_block(tr, &new_checkpoint_ref,
&new_checkpoint_mac);
if (tr->failed) {
pr_warn("transaction failed, abort\n");
goto err_transaction_failed;
}
assert(new_checkpoint);
}
tr->new_free_set = &new_free_set;
if (tr->rebuild_free_set) {
transaction_rebuild_free_set(tr, &new_free_set, &new_files,
&new_checkpoint_mac);
} else {
transaction_merge_free_sets(tr, &new_free_set, &tr->fs->free,
&tr->allocated, &tr->freed);
}
if (tr->failed) {
pr_warn("transaction failed, abort\n");
goto err_transaction_failed;
}
if (!transaction_check_free(tr, &new_free_set, tr->fs->reserved_count)) {
if (!tr->failed) {
transaction_fail(tr);
}
pr_warn("transaction would leave fs too full, abort\n");
goto err_transaction_failed;
}
if (tr->fs->alternate_data && tr->repaired) {
if (!tr->failed) {
transaction_fail(tr);
}
pr_warn("transaction cannot repair alternate fs, abort\n");
goto err_transaction_failed;
}
if (0) {
printf("%s: old free:\n", __func__);
block_set_print(tr, &tr->fs->free);
printf("%s: tmp_allocated:\n", __func__);
block_set_print(tr, &tr->tmp_allocated);
printf("%s: allocated:\n", __func__);
block_set_print(tr, &tr->allocated);
printf("%s: freed:\n", __func__);
block_set_print(tr, &tr->freed);
printf("%s: new free:\n", __func__);
block_set_print(tr, &new_free_set);
}
if (tr->failed) {
pr_warn("transaction failed, abort\n");
goto err_transaction_failed;
}
if (update_checkpoint) {
checkpoint_update_roots(tr, new_checkpoint, &new_files,
&new_free_set.block_tree.root);
block_put_dirty(tr, new_checkpoint, &new_checkpoint_ref,
&new_checkpoint_mac, NULL);
/*
* We have now released the block reference new_checkpoint_ref, so make
* sure we don't release it again in err_transaction_failed
*/
new_checkpoint = NULL;
}
block_cache_clean_transaction(tr);
if (tr->failed) {
pr_warn("transaction failed, abort\n");
goto err_transaction_failed;
}
assert(block_range_empty(new_free_set.initial_range));
check_free_tree(tr, &new_free_set);
if (block_mac_same_block(tr, &tr->fs->free.block_tree.root,
&new_free_set.block_tree.root)) {
/*
* If the root block of the free tree did not move, there can be no
* other changes to the filesystem.
*/
assert(block_mac_eq(tr, &tr->fs->free.block_tree.root,
&new_free_set.block_tree.root));
assert(block_mac_eq(tr, &tr->fs->files.root, &new_files));
/*
* Skip super block write if there are no changes to the filesystem.
* This is needed in case a previous write error has triggered a request
* to write another copy of the old super block. There can only be one
* copy of each block in the cache. If we try to write a new super block
* here before cleaning the pending one, we get a conflict. If there
* were changes to the filesystem, the pending super block has already
* been cleaned at this point.
*/
goto complete_nop_transaction;
}
super_block_updated = update_super_block(tr, &new_free_set.block_tree.root,
&new_files, &new_checkpoint_mac);
if (!super_block_updated) {
assert(tr->failed);
pr_warn("failed to update super block, abort\n");
goto err_transaction_failed;
}
block_cache_clean_transaction(tr);
/*
* If an error was detected writing the super block, it is not safe to
* continue as we do not know if the write completed. We need to rewrite a
* known state over the unknown super block to avoid an inconsistent view of
* the filesystem.
*
* At this point block_cache_complete_write has been called by the block
* device, so the current superblock slot in the block cache is free and not
* associated with the pending transaction.
*/
if (tr->failed) {
pr_warn("failed to write super block, notify fs and abort the transaction\n");
/*
* Superblock could have been written or not. Make sure no other blocks
* are written to the filesystem before writing another copy of the
* superblock with the existing file and free trees.
*
* TODO: Don't trigger a superblock write on unaffected filesystems.
* We update all for now to simplify testing.
*/
fs_unknown_super_block_state_all();
goto err_transaction_failed;
}
tr->fs->free.block_tree.root = new_free_set.block_tree.root;
block_range_clear(
&tr->fs->free
.initial_range); /* clear for initial file-system state */
tr->fs->files.root = new_files;
tr->fs->super_block_version = tr->fs->written_super_block_version;
tr->fs->checkpoint = new_checkpoint_mac;
if (tr->repaired) {
assert(!tr->fs->alternate_data);
tr->fs->main_repaired = true;
}
if (update_checkpoint) {
tr->fs->checkpoint_free.block_tree.root = new_free_set.block_tree.root;
block_range_clear(&tr->fs->checkpoint_free.initial_range);
}
complete_nop_transaction:
transaction_delete_active(tr);
tr->complete = true;
file_transaction_success(tr);
assert(!tr->failed);
check_free_tree(tr, &tr->fs->free);
list_for_every_entry_safe(&tr->fs->transactions, other_tr, tmp_tr,
struct transaction, node) {
if (tr->failed) {
break;
}
if (!transaction_is_active(other_tr)) {
continue;
}
if (tr->rebuild_free_set) {
/*
* TODO: only fail actually conflicting transactions when rebuilding
* the free set. When rebuilding, tr->freed does not contain all
* freed blocks if tree nodes were dropped. We could rebuild a free
* set delta by subtracting the new free set from the old one and
* then compare this delta against other transactions.
*/
pr_warn("Rebuilding free set requires failing all pending transactions\n");
transaction_fail(other_tr);
} else if (block_set_overlap(tr, &tr->freed, &other_tr->freed)) {
pr_warn("fail conflicting transaction\n");
transaction_fail(other_tr);
}
}
if (tr->failed) {
pr_warn("transaction failed while failing conflicting transactions\n");
tr->failed = false;
list_for_every_entry_safe(&tr->fs->transactions, other_tr, tmp_tr,
struct transaction, node) {
if (!transaction_is_active(other_tr)) {
continue;
}
pr_warn("fail possibly conflicting transaction\n");
transaction_fail(other_tr);
}
}
assert(!tr->failed);
block_cache_discard_transaction(tr, false);
err_transaction_failed:
if (new_checkpoint) {
block_put_dirty_discard(new_checkpoint, &new_checkpoint_ref);
}
if (tr->failed) {
file_transaction_complete_failed(tr);
}
assert(!block_cache_debug_get_ref_block_count());
}
/**
* transaction_initial_super_block_complete - Complete special transaction
* @tr: Transaction object. Must match initial_super_block_tr in fs.
*
* Flush the initial superblock in @tr to disk. If the block could not be
* written re-initialize @tr and leave it in place for another attempt.
* Otherwise clear @tr->fs->initial_super_block_tr and free @tr.
*
* The initial superblock can only be flushed from the block cache by the
* block_cache_clean_transaction() call here, as we do not allow initial
* superblocks to be flushed to make room for other data. This ensures that we
* don't run out of room to recreate the superblock write in case it fails.
*/
void transaction_initial_super_block_complete(struct transaction* tr) {
assert(tr == tr->fs->initial_super_block_tr);
block_cache_clean_transaction(tr);
if (tr->failed) {
/*
* If we failed to write the superblock we re-initialize a new attempt
* to write that superblock before the next time we write to this
* filesystem.
*/
pr_err("%s: failed to write initial superblock, version %d.\n",
__func__, tr->fs->written_super_block_version);
write_current_super_block(tr->fs, true /* reinitialize */);
return;
}
printf("%s: write initial superblock, version %d -> %d\n", __func__,
tr->fs->super_block_version, tr->fs->written_super_block_version);
assert(tr == tr->fs->initial_super_block_tr);
tr->fs->super_block_version = tr->fs->written_super_block_version;
tr->fs->initial_super_block_tr = NULL;
/* not a real transaction, discard the state so it can be freed */
transaction_fail(tr);
transaction_free(tr);
free(tr);
}
/**
* transaction_activate - Activate transaction
* @tr: Transaction object.
*/
void transaction_activate(struct transaction* tr) {
assert(tr->fs);
assert(!transaction_is_active(tr));
tr->failed = false;
tr->invalid_block_found = false;
tr->complete = false;
tr->rebuild_free_set = false;
tr->repaired = false;
tr->min_free_block = 0;
tr->last_free_block = 0;
tr->last_tmp_free_block = 0;
tr->new_free_set = NULL;
block_set_init(tr->fs, &tr->tmp_allocated);
block_set_init(tr->fs, &tr->allocated);
block_set_init(tr->fs, &tr->freed);
fs_file_tree_init(tr->fs, &tr->files_added);
fs_file_tree_init(tr->fs, &tr->files_updated);
fs_file_tree_init(tr->fs, &tr->files_removed);
list_add_tail(&tr->fs->allocated, &tr->allocated.node);
list_add_tail(&tr->fs->allocated, &tr->tmp_allocated.node);
}
/**
* transaction_init - Initialize new transaction object
* @tr: Transaction object.
* @fs: File system state object.
* @activate: Activate transaction
*/
void transaction_init(struct transaction* tr, struct fs* fs, bool activate) {
assert(fs);
assert(fs->dev);
memset(tr, 0, sizeof(*tr));
tr->fs = fs;
list_initialize(&tr->open_files);
list_add_tail(&fs->transactions, &tr->node);
if (activate) {
transaction_activate(tr);
}
}