libsnapshot: Restrict when snapshots can be deleted.

Do not delete snapshots for any reason other than merge-complete or a
cancel request from update_engine. Additionally, create a rollback
indicator file if booting into the source slot, so update_engine can
attempt to cancel the update.

Bug: 147819418
Bug: 147347110
Test: libsnapshot_test gtest
Change-Id: Id357a91cec467a60246c7c3d133f6c54ccb3fc93
(cherry picked from commit 367cca3937bd449f6762f625336ca10d28076d17)
diff --git a/fs_mgr/libsnapshot/device_info.cpp b/fs_mgr/libsnapshot/device_info.cpp
index bacb41c..0e90100 100644
--- a/fs_mgr/libsnapshot/device_info.cpp
+++ b/fs_mgr/libsnapshot/device_info.cpp
@@ -22,6 +22,7 @@
 namespace snapshot {
 
 #ifdef LIBSNAPSHOT_USE_HAL
+using android::hardware::boot::V1_0::BoolResult;
 using android::hardware::boot::V1_0::CommandResult;
 #endif
 
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index e786bc9..ed92dd7 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -174,6 +174,7 @@
     // See InitiateMerge() and ProcessUpdateState() for details.
     // Returns:
     //   - None if no merge to initiate
+    //   - Unverified if called on the source slot
     //   - MergeCompleted if merge is completed
     //   - other states indicating an error has occurred
     UpdateState InitiateMergeAndWait();
@@ -273,6 +274,7 @@
     FRIEND_TEST(SnapshotTest, UpdateBootControlHal);
     FRIEND_TEST(SnapshotUpdateTest, DataWipeAfterRollback);
     FRIEND_TEST(SnapshotUpdateTest, DataWipeRollbackInRecovery);
+    FRIEND_TEST(SnapshotUpdateTest, FullUpdateFlow);
     FRIEND_TEST(SnapshotUpdateTest, MergeCannotRemoveCow);
     FRIEND_TEST(SnapshotUpdateTest, MergeInRecovery);
     FRIEND_TEST(SnapshotUpdateTest, SnapshotStatusFileWithoutCow);
@@ -374,7 +376,7 @@
     bool HandleCancelledUpdate(LockedFile* lock);
 
     // Helper for HandleCancelledUpdate. Assumes booting from new slot.
-    bool HandleCancelledUpdateOnNewSlot(LockedFile* lock);
+    bool AreAllSnapshotsCancelled(LockedFile* lock);
 
     // Remove artifacts created by the update process, such as snapshots, and
     // set the update state to None.
@@ -439,7 +441,7 @@
     std::string GetSnapshotStatusFilePath(const std::string& name);
 
     std::string GetSnapshotBootIndicatorPath();
-    void RemoveSnapshotBootIndicator();
+    std::string GetRollbackIndicatorPath();
 
     // Return the name of the device holding the "snapshot" or "snapshot-merge"
     // target. This may not be the final device presented via MapSnapshot(), if
@@ -503,6 +505,8 @@
     friend std::ostream& operator<<(std::ostream& os, SnapshotManager::Slot slot);
     Slot GetCurrentSlot();
 
+    std::string ReadUpdateSourceSlotSuffix();
+
     std::string gsid_dir_;
     std::string metadata_dir_;
     std::unique_ptr<IDeviceInfo> device_;
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 63a9302..a937b43 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -188,11 +188,19 @@
     return true;
 }
 
-SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
+std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
     auto boot_file = GetSnapshotBootIndicatorPath();
     std::string contents;
     if (!android::base::ReadFileToString(boot_file, &contents)) {
         PLOG(WARNING) << "Cannot read " << boot_file;
+        return {};
+    }
+    return contents;
+}
+
+SnapshotManager::Slot SnapshotManager::GetCurrentSlot() {
+    auto contents = ReadUpdateSourceSlotSuffix();
+    if (contents.empty()) {
         return Slot::Unknown;
     }
     if (device_->GetSlotSuffix() == contents) {
@@ -201,6 +209,15 @@
     return Slot::Target;
 }
 
+static bool RemoveFileIfExists(const std::string& path) {
+    std::string message;
+    if (!android::base::RemoveFileIfExists(path, &message)) {
+        LOG(ERROR) << "Remove failed: " << path << ": " << message;
+        return false;
+    }
+    return true;
+}
+
 bool SnapshotManager::RemoveAllUpdateState(LockedFile* lock) {
     LOG(INFO) << "Removing all update state.";
 
@@ -223,7 +240,13 @@
         return false;
     }
 
-    RemoveSnapshotBootIndicator();
+    // It's okay if these fail - first-stage init performs a deeper check after
+    // reading the indicator file, so it's not a problem if it still exists
+    // after the update completes.
+    std::vector<std::string> files = {GetSnapshotBootIndicatorPath(), GetRollbackIndicatorPath()};
+    for (const auto& file : files) {
+        RemoveFileIfExists(file);
+    }
 
     // If this fails, we'll keep trying to remove the update state (as the
     // device reboots or starts a new update) until it finally succeeds.
@@ -250,6 +273,13 @@
         return false;
     }
 
+    // This file is written on boot to detect whether a rollback occurred. It
+    // MUST NOT exist before rebooting, otherwise, we're at risk of deleting
+    // snapshots too early.
+    if (!RemoveFileIfExists(GetRollbackIndicatorPath())) {
+        return false;
+    }
+
     // This file acts as both a quick indicator for init (it can use access(2)
     // to decide how to do first-stage mounts), and it stores the old slot, so
     // we can tell whether or not we performed a rollback.
@@ -966,14 +996,8 @@
     return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
 }
 
-void SnapshotManager::RemoveSnapshotBootIndicator() {
-    // It's okay if this fails - first-stage init performs a deeper check after
-    // reading the indicator file, so it's not a problem if it still exists
-    // after the update completes.
-    auto boot_file = GetSnapshotBootIndicatorPath();
-    if (unlink(boot_file.c_str()) == -1 && errno != ENOENT) {
-        PLOG(ERROR) << "unlink " << boot_file;
-    }
+std::string SnapshotManager::GetRollbackIndicatorPath() {
+    return metadata_dir_ + "/rollback-indicator";
 }
 
 void SnapshotManager::AcknowledgeMergeSuccess(LockedFile* lock) {
@@ -1144,25 +1168,18 @@
     if (slot == Slot::Unknown) {
         return false;
     }
-    if (slot == Slot::Target) {
-        // We're booted into the target slot, which means we just rebooted
-        // after applying the update.
-        if (!HandleCancelledUpdateOnNewSlot(lock)) {
-            return false;
-        }
+
+    // If all snapshots were reflashed, then cancel the entire update.
+    if (AreAllSnapshotsCancelled(lock)) {
+        RemoveAllUpdateState(lock);
+        return true;
     }
 
-    // The only way we can get here is if:
-    //  (1) The device rolled back to the previous slot.
-    //  (2) This function was called prematurely before rebooting the device.
-    //  (3) fastboot set_active was used.
-    //  (4) The device updates to the new slot but re-flashed *all* partitions
-    //      in the new slot.
-    //
-    // In any case, delete the snapshots. It may be worth using the boot_control
-    // HAL to differentiate case (2).
-    RemoveAllUpdateState(lock);
-    return true;
+    // This unverified update might be rolled back, or it might not (b/147347110
+    // comment #77). Take no action, as update_engine is responsible for deciding
+    // whether to cancel.
+    LOG(ERROR) << "Update state is being processed before reboot, taking no action.";
+    return false;
 }
 
 std::unique_ptr<LpMetadata> SnapshotManager::ReadCurrentMetadata() {
@@ -1187,7 +1204,7 @@
     return MetadataPartitionState::Flashed;
 }
 
-bool SnapshotManager::HandleCancelledUpdateOnNewSlot(LockedFile* lock) {
+bool SnapshotManager::AreAllSnapshotsCancelled(LockedFile* lock) {
     std::vector<std::string> snapshots;
     if (!ListSnapshots(lock, &snapshots)) {
         LOG(WARNING) << "Failed to list snapshots to determine whether device has been flashed "
@@ -1196,35 +1213,45 @@
         return true;
     }
 
+    auto source_slot_suffix = ReadUpdateSourceSlotSuffix();
+    if (source_slot_suffix.empty()) {
+        return false;
+    }
+    uint32_t source_slot = SlotNumberForSlotSuffix(source_slot_suffix);
+    uint32_t target_slot = (source_slot == 0) ? 1 : 0;
+
     // Attempt to detect re-flashing on each partition.
     // - If all partitions are re-flashed, we can proceed to cancel the whole update.
     // - If only some of the partitions are re-flashed, snapshots for re-flashed partitions are
     //   deleted. Caller is responsible for merging the rest of the snapshots.
     // - If none of the partitions are re-flashed, caller is responsible for merging the snapshots.
-    auto metadata = ReadCurrentMetadata();
-    if (!metadata) return false;
-    bool all_snapshot_cancelled = true;
+    //
+    // Note that we use target slot metadata, since if an OTA has been applied
+    // to the target slot, we can detect the UPDATED flag. Any kind of flash
+    // operation against dynamic partitions ensures that all copies of the
+    // metadata are in sync, so flashing all partitions on the source slot will
+    // remove the UPDATED flag on the target slot as well.
+    const auto& opener = device_->GetPartitionOpener();
+    auto super_device = device_->GetSuperDevice(target_slot);
+    auto metadata = android::fs_mgr::ReadMetadata(opener, super_device, target_slot);
+    if (!metadata) {
+        return false;
+    }
+
+    bool all_snapshots_cancelled = true;
     for (const auto& snapshot_name : snapshots) {
         if (GetMetadataPartitionState(*metadata, snapshot_name) ==
             MetadataPartitionState::Updated) {
-            LOG(WARNING) << "Cannot cancel update because snapshot" << snapshot_name
-                         << " is in use.";
-            all_snapshot_cancelled = false;
+            all_snapshots_cancelled = false;
             continue;
         }
         // Delete snapshots for partitions that are re-flashed after the update.
-        LOG(INFO) << "Detected re-flashing of partition " << snapshot_name << ".";
-        if (!DeleteSnapshot(lock, snapshot_name)) {
-            // This is an error, but it is okay to leave the snapshot in the short term.
-            // However, if all_snapshot_cancelled == false after exiting the loop, caller may
-            // initiate merge for this unused snapshot, which is likely to fail.
-            LOG(WARNING) << "Failed to delete snapshot for re-flashed partition " << snapshot_name;
-        }
+        LOG(WARNING) << "Detected re-flashing of partition " << snapshot_name << ".";
     }
-    if (!all_snapshot_cancelled) return false;
-
-    LOG(INFO) << "All partitions are re-flashed after update, removing all update states.";
-    return true;
+    if (all_snapshots_cancelled) {
+        LOG(WARNING) << "All partitions are re-flashed after update, removing all update states.";
+    }
+    return all_snapshots_cancelled;
 }
 
 bool SnapshotManager::RemoveAllSnapshots(LockedFile* lock) {
@@ -1344,7 +1371,16 @@
     // the reason be clearer? Because the indicator file still exists, and
     // if this was FATAL, reverting to the old slot would be broken.
     auto slot = GetCurrentSlot();
+
     if (slot != Slot::Target) {
+        if (slot == Slot::Source && !device_->IsRecovery()) {
+            // Device is rebooting into the original slot, so mark this as a
+            // rollback.
+            auto path = GetRollbackIndicatorPath();
+            if (!android::base::WriteStringToFile("1", path)) {
+                PLOG(ERROR) << "Unable to write rollback indicator: " << path;
+            }
+        }
         LOG(INFO) << "Not booting from new slot. Will not mount snapshots.";
         return false;
     }
@@ -2370,6 +2406,10 @@
         return state;
     }
     if (state == UpdateState::Unverified) {
+        if (GetCurrentSlot() != Slot::Target) {
+            LOG(INFO) << "Cannot merge until device reboots.";
+            return state;
+        }
         if (!InitiateMerge()) {
             LOG(ERROR) << "Failed to initiate merge.";
             return state;
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index c5ad44c..9777264 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -447,6 +447,9 @@
     auto sm = SnapshotManager::NewForFirstStageMount(info);
     ASSERT_NE(sm, nullptr);
     ASSERT_FALSE(sm->NeedSnapshotsInFirstStageMount());
+
+    auto indicator = sm->GetRollbackIndicatorPath();
+    ASSERT_EQ(access(indicator.c_str(), R_OK), 0);
 }
 
 TEST_F(SnapshotTest, Merge) {
@@ -1015,6 +1018,9 @@
     ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount());
     ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
 
+    auto indicator = sm->GetRollbackIndicatorPath();
+    ASSERT_NE(access(indicator.c_str(), R_OK), 0);
+
     // Check that the target partitions have the same content.
     for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
         ASSERT_TRUE(IsPartitionUnchanged(name));
@@ -1681,9 +1687,11 @@
     ASSERT_NE(nullptr, flashed_builder->FindPartition("prd" + flashed_slot_suffix));
     flashed_builder->RemovePartition("prd" + flashed_slot_suffix);
 
+    // Note that fastbootd always updates the partition table of both slots.
     auto flashed_metadata = flashed_builder->Export();
     ASSERT_NE(nullptr, flashed_metadata);
-    ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *flashed_metadata, flashed_slot));
+    ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *flashed_metadata, 0));
+    ASSERT_TRUE(UpdatePartitionTable(*opener_, "super", *flashed_metadata, 1));
 
     std::string path;
     for (const auto& name : {"sys", "vnd"}) {
diff --git a/fs_mgr/libsnapshot/snapshotctl.cpp b/fs_mgr/libsnapshot/snapshotctl.cpp
index 9f23c45..d724be3 100644
--- a/fs_mgr/libsnapshot/snapshotctl.cpp
+++ b/fs_mgr/libsnapshot/snapshotctl.cpp
@@ -112,7 +112,9 @@
 
     auto state = SnapshotManager::New()->InitiateMergeAndWait();
 
-    if (state == UpdateState::None) {
+    // We could wind up in the Unverified state if the device rolled back or
+    // hasn't fully rebooted. Ignore this.
+    if (state == UpdateState::None || state == UpdateState::Unverified) {
         return true;
     }
     if (state == UpdateState::MergeCompleted) {