| // |
| // Copyright (C) 2020 The Android Open Source Project |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| |
| #include "update_engine/payload_generator/merge_sequence_generator.h" |
| |
| #include <algorithm> |
| #include <limits> |
| |
| #include "update_engine/payload_generator/extent_ranges.h" |
| #include "update_engine/payload_generator/extent_utils.h" |
| #include "update_engine/update_metadata.pb.h" |
| |
| namespace chromeos_update_engine { |
| |
| CowMergeOperation CreateCowMergeOperation(const Extent& src_extent, |
| const Extent& dst_extent, |
| CowMergeOperation::Type op_type, |
| uint32_t src_offset) { |
| CowMergeOperation ret; |
| ret.set_type(op_type); |
| *ret.mutable_src_extent() = src_extent; |
| *ret.mutable_dst_extent() = dst_extent; |
| ret.set_src_offset(src_offset); |
| return ret; |
| } |
| |
| std::ostream& operator<<(std::ostream& os, |
| const CowMergeOperation& merge_operation) { |
| os << "CowMergeOperation src extent: " |
| << ExtentsToString({merge_operation.src_extent()}) |
| << ", dst extent: " << ExtentsToString({merge_operation.dst_extent()}); |
| if (merge_operation.has_src_offset()) { |
| os << ", src offset: " << merge_operation.src_offset(); |
| } |
| os << " op_type: "; |
| if (merge_operation.type() == CowMergeOperation::COW_COPY) { |
| os << "COW_COPY"; |
| } else if (merge_operation.type() == CowMergeOperation::COW_XOR) { |
| os << "COW_XOR"; |
| } else { |
| os << merge_operation.type(); |
| } |
| return os; |
| } |
| |
| // The OTA generation guarantees that all blocks in the dst extent will be |
| // written only once. So we can use it to order the CowMergeOperation. |
| bool operator<(const CowMergeOperation& op1, const CowMergeOperation& op2) { |
| return op1.dst_extent().start_block() < op2.dst_extent().start_block(); |
| } |
| |
| bool operator==(const CowMergeOperation& op1, const CowMergeOperation& op2) { |
| return op1.type() == op2.type() && op1.src_extent() == op2.src_extent() && |
| op1.dst_extent() == op2.dst_extent(); |
| } |
| |
| template <typename T> |
| constexpr T GetDifference(T first, T second) { |
| T abs_diff = (first > second) ? (first - second) : (second - first); |
| return abs_diff; |
| } |
| |
| CowMergeOperation::Type GetCowOpType(InstallOperation::Type install_op_type) { |
| switch (install_op_type) { |
| case InstallOperation::SOURCE_COPY: |
| return CowMergeOperation::COW_COPY; |
| case InstallOperation::SOURCE_BSDIFF: |
| case InstallOperation::BROTLI_BSDIFF: |
| case InstallOperation::PUFFDIFF: |
| return CowMergeOperation::COW_XOR; |
| default: |
| CHECK(false) << "Unknown install op type: " << install_op_type; |
| return CowMergeOperation::COW_REPLACE; |
| } |
| } |
| |
| void SplitSelfOverlapping(const Extent& src_extent, |
| const Extent& dst_extent, |
| std::vector<CowMergeOperation>* sequence) { |
| CHECK_EQ(src_extent.num_blocks(), dst_extent.num_blocks()); |
| if (src_extent.start_block() == dst_extent.start_block()) { |
| sequence->emplace_back(CreateCowMergeOperation( |
| src_extent, dst_extent, CowMergeOperation::COW_COPY)); |
| return; |
| } |
| |
| const size_t diff = |
| GetDifference(src_extent.start_block(), dst_extent.start_block()); |
| for (size_t i = 0; i < src_extent.num_blocks(); i += diff) { |
| auto num_blocks = std::min<size_t>(diff, src_extent.num_blocks() - i); |
| sequence->emplace_back(CreateCowMergeOperation( |
| ExtentForRange(i + src_extent.start_block(), num_blocks), |
| ExtentForRange(i + dst_extent.start_block(), num_blocks), |
| CowMergeOperation::COW_COPY)); |
| } |
| } |
| |
| static bool ProcessXorOps(std::vector<CowMergeOperation>* sequence, |
| const AnnotatedOperation& aop) { |
| const auto size_before = sequence->size(); |
| sequence->insert(sequence->end(), aop.xor_ops.begin(), aop.xor_ops.end()); |
| std::for_each( |
| sequence->begin() + size_before, |
| sequence->end(), |
| [](CowMergeOperation& op) { |
| CHECK_EQ(op.type(), CowMergeOperation::COW_XOR); |
| // If |src_offset| is greater than 0, then we are reading 1 |
| // extra block at the end of src_extent. This dependency must |
| // be honored during merge sequence generation, or we can end |
| // up with a corrupted device after merge. |
| if (op.src_offset() > 0) { |
| if (op.src_extent().num_blocks() == op.dst_extent().num_blocks()) { |
| op.mutable_src_extent()->set_num_blocks( |
| op.src_extent().num_blocks() + 1); |
| } |
| CHECK_EQ(op.src_extent().num_blocks(), |
| op.dst_extent().num_blocks() + 1); |
| } |
| CHECK_NE(op.src_extent().start_block(), |
| std::numeric_limits<uint64_t>::max()); |
| }); |
| return true; |
| } |
| |
| static bool ProcessCopyOps(std::vector<CowMergeOperation>* sequence, |
| const AnnotatedOperation& aop) { |
| CHECK_EQ(GetCowOpType(aop.op.type()), CowMergeOperation::COW_COPY); |
| if (aop.op.dst_extents().size() != 1) { |
| std::vector<Extent> out_extents; |
| ExtentsToVector(aop.op.dst_extents(), &out_extents); |
| LOG(ERROR) |
| << "The dst extents for source_copy are expected to be contiguous," |
| << " dst extents: " << ExtentsToString(out_extents); |
| return false; |
| } |
| // Split the source extents. |
| size_t used_blocks = 0; |
| for (const auto& src_extent : aop.op.src_extents()) { |
| // The dst_extent in the merge sequence will be a subset of |
| // InstallOperation's dst_extent. This will simplify the OTA -> COW |
| // conversion when we install the payload. |
| Extent dst_extent = |
| ExtentForRange(aop.op.dst_extents(0).start_block() + used_blocks, |
| src_extent.num_blocks()); |
| // Self-overlapping operation, must split into multiple non |
| // self-overlapping ops |
| if (ExtentRanges::ExtentsOverlap(src_extent, dst_extent)) { |
| SplitSelfOverlapping(src_extent, dst_extent, sequence); |
| } else { |
| sequence->emplace_back(CreateCowMergeOperation( |
| src_extent, dst_extent, CowMergeOperation::COW_COPY)); |
| } |
| used_blocks += src_extent.num_blocks(); |
| } |
| |
| if (used_blocks != aop.op.dst_extents(0).num_blocks()) { |
| LOG(ERROR) << "Number of blocks in src extents doesn't equal to the" |
| << " ones in the dst extents, src blocks " << used_blocks |
| << ", dst blocks " << aop.op.dst_extents(0).num_blocks(); |
| return false; |
| } |
| return true; |
| } |
| |
| std::unique_ptr<MergeSequenceGenerator> MergeSequenceGenerator::Create( |
| const std::vector<AnnotatedOperation>& aops, |
| std::string_view partition_name) { |
| std::vector<CowMergeOperation> sequence; |
| |
| for (const auto& aop : aops) { |
| if (aop.op.type() == InstallOperation::SOURCE_COPY) { |
| if (!ProcessCopyOps(&sequence, aop)) { |
| return nullptr; |
| } |
| } else if (!aop.xor_ops.empty()) { |
| if (!ProcessXorOps(&sequence, aop)) { |
| return nullptr; |
| } |
| } |
| } |
| |
| return std::unique_ptr<MergeSequenceGenerator>( |
| new MergeSequenceGenerator(sequence, partition_name)); |
| } |
| |
| template <typename T> |
| CowMergeOperation MaxOutDegree( |
| const T& nodes, |
| const std::map<CowMergeOperation, std::set<CowMergeOperation>>& |
| merge_after) { |
| // Rationale for this algorithm: |
| // We only need to remove nodes from the graph if the graph contains a cycle. |
| // Any graph of N nodes has cycle iff number of edges >= N. |
| // So, to restore the graph back to an acyclic state, we need to keep removing |
| // edges until we have <N edges left. To minimize the number of nodes removed, |
| // we always remove the node with maximum out degree. |
| CowMergeOperation best; |
| size_t max_out_degree = 0; |
| const auto has_xor = |
| std::any_of(nodes.begin(), |
| nodes.end(), |
| [&merge_after](const CowMergeOperation& node) { |
| if (node.type() != CowMergeOperation::COW_XOR) { |
| return false; |
| } |
| auto it = merge_after.find(node); |
| if (it == merge_after.end()) { |
| return false; |
| } |
| return it->second.size() > 0; |
| }); |
| for (const auto& op : nodes) { |
| if (has_xor && op.type() != CowMergeOperation::COW_XOR) { |
| continue; |
| } |
| const auto out_degree = merge_after.at(op).size(); |
| if (out_degree > max_out_degree) { |
| best = op; |
| max_out_degree = out_degree; |
| } else if (out_degree == max_out_degree) { |
| if (op.src_extent().num_blocks() < best.src_extent().num_blocks()) { |
| best = op; |
| } |
| } |
| } |
| CHECK_NE(max_out_degree, 0UL); |
| return best; |
| } |
| |
| template <typename T> |
| struct MapKeyIterator { |
| MapKeyIterator<T>& operator++() { |
| ++it; |
| return *this; |
| } |
| MapKeyIterator<T>& operator--() { |
| --it; |
| return *this; |
| } |
| bool operator==(const MapKeyIterator<T>& rhs) const { return it == rhs.it; } |
| bool operator!=(const MapKeyIterator<T>& rhs) const { return it != rhs.it; } |
| auto&& operator->() const { return it->first; } |
| auto&& operator*() const { return it->first; } |
| T it; |
| }; |
| |
| template <typename T, typename U> |
| struct MapKeyRange { |
| auto begin() const { |
| return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.begin()}; |
| } |
| auto end() const { |
| return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.end()}; |
| } |
| std::map<T, U> map; |
| }; |
| |
| CowMergeOperation MaxOutDegree( |
| const std::map<CowMergeOperation, int>& incoming_edges, |
| const std::map<CowMergeOperation, std::set<CowMergeOperation>>& |
| merge_after) { |
| return MaxOutDegree(MapKeyRange<CowMergeOperation, int>{incoming_edges}, |
| merge_after); |
| } |
| |
| // Given a potentially cyclic graph, return a node to remove to break cycles |
| // |incoming_edges| stores nodes' in degree, and |merge_after| is an outgoing |
| // edge list. For example, |merge_after[a]| returns all nodes which `a` has an |
| // out going edge to. |
| // The only requirement of this function is to return a node which is in |
| // |incoming_edges| . As long as this is satisfied, merge sequence generation |
| // will work. Caller will keep removing nodes returned by this function until |
| // the graph has no cycles. However, the choice of which node to remove can |
| // greatly impact COW sizes. Nodes removed from the graph will be converted to a |
| // COW_REPLACE operation, taking more disk space. So this function should try to |
| // pick a node which minimizes number of nodes we have to remove. (Modulo the |
| // weight of each node, which is how many blocks a CowMergeOperation touches) |
| CowMergeOperation PickConvertToRaw( |
| const std::map<CowMergeOperation, int>& incoming_edges, |
| const std::map<CowMergeOperation, std::set<CowMergeOperation>>& |
| merge_after) { |
| return MaxOutDegree(incoming_edges, merge_after); |
| } |
| |
| std::map<CowMergeOperation, std::set<CowMergeOperation>> |
| MergeSequenceGenerator::FindDependency( |
| const std::vector<CowMergeOperation>& operations) { |
| LOG(INFO) << "Finding dependencies"; |
| |
| // Since the OTA operation may reuse some source blocks, use the binary |
| // search on sorted dst extents to find overlaps. |
| std::map<CowMergeOperation, std::set<CowMergeOperation>> merge_after; |
| for (const auto& op : operations) { |
| // lower bound (inclusive): dst extent's end block >= src extent's start |
| // block. |
| const auto lower_it = std::lower_bound( |
| operations.begin(), |
| operations.end(), |
| op, |
| [](const CowMergeOperation& it, const CowMergeOperation& op) { |
| auto dst_end_block = |
| it.dst_extent().start_block() + it.dst_extent().num_blocks() - 1; |
| return dst_end_block < op.src_extent().start_block(); |
| }); |
| // upper bound: dst extent's start block > src extent's end block |
| const auto upper_it = std::upper_bound( |
| lower_it, |
| operations.end(), |
| op, |
| [](const CowMergeOperation& op, const CowMergeOperation& it) { |
| auto src_end_block = |
| op.src_extent().start_block() + op.src_extent().num_blocks() - 1; |
| return src_end_block < it.dst_extent().start_block(); |
| }); |
| |
| // TODO(xunchang) skip inserting the empty set to merge_after. |
| if (lower_it == upper_it) { |
| merge_after.insert({op, {}}); |
| } else { |
| std::set<CowMergeOperation> operations(lower_it, upper_it); |
| auto it = operations.find(op); |
| if (it != operations.end()) { |
| LOG(INFO) << "Self overlapping " << op; |
| operations.erase(it); |
| } |
| auto ret = merge_after.emplace(op, std::move(operations)); |
| // Check the insertion indeed happens. |
| CHECK(ret.second) << op; |
| } |
| } |
| |
| return merge_after; |
| } |
| |
| bool MergeSequenceGenerator::Generate( |
| std::vector<CowMergeOperation>* sequence) const { |
| sequence->clear(); |
| |
| LOG(INFO) << "Generating sequence"; |
| |
| // Use the non-DFS version of the topology sort. So we can control the |
| // operations to discard to break cycles; thus yielding a deterministic |
| // sequence. |
| std::map<CowMergeOperation, int> incoming_edges; |
| for (const auto& it : merge_after_) { |
| for (const auto& blocked : it.second) { |
| // Value is default initialized to 0. |
| incoming_edges[blocked] += 1; |
| } |
| } |
| |
| // Technically, we can use std::unordered_set or just a std::vector. but |
| // std::set gives the benefit where operations are sorted by dst blocks. This |
| // will ensure that operations that do not have dependency constraints appear |
| // in increasing block order. Such order would help snapuserd batch merges and |
| // improve boot time, but isn't strictly needed for correctness. |
| std::set<CowMergeOperation> free_operations; |
| for (const auto& op : operations_) { |
| if (incoming_edges.find(op) == incoming_edges.end()) { |
| free_operations.insert(op); |
| } |
| } |
| |
| std::vector<CowMergeOperation> merge_sequence; |
| std::set<CowMergeOperation> convert_to_raw; |
| while (!incoming_edges.empty()) { |
| if (!free_operations.empty()) { |
| merge_sequence.insert( |
| merge_sequence.end(), free_operations.begin(), free_operations.end()); |
| } else { |
| auto to_convert = PickConvertToRaw(incoming_edges, merge_after_); |
| // The operation we pick must be one of the nodes not already in merge |
| // sequence. |
| CHECK(incoming_edges.find(to_convert) != incoming_edges.end()); |
| |
| free_operations.insert(to_convert); |
| convert_to_raw.insert(to_convert); |
| LOG(INFO) << "Converting operation to raw " << to_convert; |
| } |
| |
| std::set<CowMergeOperation> next_free_operations; |
| for (const auto& op : free_operations) { |
| incoming_edges.erase(op); |
| |
| // Now that this particular operation is merged, other operations |
| // blocked by this one may be free. Decrement the count of blocking |
| // operations, and set up the free operations for the next iteration. |
| for (const auto& blocked : merge_after_.at(op)) { |
| auto it = incoming_edges.find(blocked); |
| if (it == incoming_edges.end()) { |
| continue; |
| } |
| |
| auto blocking_transfer_count = &it->second; |
| if (*blocking_transfer_count <= 0) { |
| LOG(ERROR) << "Unexpected count in merge after map " |
| << blocking_transfer_count; |
| return false; |
| } |
| // This operation is no longer blocked by anyone. Add it to the merge |
| // sequence in the next iteration. |
| *blocking_transfer_count -= 1; |
| if (*blocking_transfer_count == 0) { |
| next_free_operations.insert(blocked); |
| } |
| } |
| } |
| |
| LOG(INFO) << "Remaining transfers " << incoming_edges.size() |
| << ", free transfers " << free_operations.size() |
| << ", merge_sequence size " << merge_sequence.size(); |
| free_operations = std::move(next_free_operations); |
| } |
| |
| if (!free_operations.empty()) { |
| merge_sequence.insert( |
| merge_sequence.end(), free_operations.begin(), free_operations.end()); |
| } |
| |
| CHECK_EQ(operations_.size(), merge_sequence.size() + convert_to_raw.size()); |
| |
| size_t blocks_in_sequence = 0; |
| for (const CowMergeOperation& transfer : merge_sequence) { |
| blocks_in_sequence += transfer.dst_extent().num_blocks(); |
| } |
| |
| size_t blocks_in_raw = 0; |
| for (const CowMergeOperation& transfer : convert_to_raw) { |
| blocks_in_raw += transfer.dst_extent().num_blocks(); |
| } |
| |
| LOG(INFO) << "Blocks in merge sequence " << blocks_in_sequence |
| << ", blocks in raw " << blocks_in_raw << ", partition " |
| << partition_name_; |
| if (!ValidateSequence(merge_sequence)) { |
| LOG(ERROR) << "Invalid Sequence"; |
| return false; |
| } |
| |
| *sequence = std::move(merge_sequence); |
| return true; |
| } |
| |
| bool MergeSequenceGenerator::ValidateSequence( |
| const std::vector<CowMergeOperation>& sequence) { |
| LOG(INFO) << "Validating merge sequence"; |
| ExtentRanges visited; |
| for (const auto& op : sequence) { |
| // If |src_offset| is greater than zero, dependency should include 1 extra |
| // block at end of src_extent, as the OP actually references data past |
| // original src_extent. |
| if (op.src_offset() > 0) { |
| CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks() + 1) |
| << op; |
| } else { |
| CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks()) |
| << op; |
| } |
| if (visited.OverlapsWithExtent(op.src_extent())) { |
| LOG(ERROR) << "Transfer violates the merge sequence " << op |
| << "Visited extent ranges: "; |
| visited.Dump(); |
| return false; |
| } |
| |
| CHECK(!visited.OverlapsWithExtent(op.dst_extent())) |
| << "dst extent should write only once."; |
| visited.AddExtent(op.dst_extent()); |
| } |
| |
| return true; |
| } |
| |
| } // namespace chromeos_update_engine |