blob: fb43bfd11fce3f7e8701935cab12feab5aef0091 [file] [log] [blame]
//
// Copyright (C) 2020 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "update_engine/payload_generator/merge_sequence_generator.h"
#include <algorithm>
#include <limits>
#include "update_engine/payload_generator/extent_ranges.h"
#include "update_engine/payload_generator/extent_utils.h"
#include "update_engine/update_metadata.pb.h"
namespace chromeos_update_engine {
CowMergeOperation CreateCowMergeOperation(const Extent& src_extent,
const Extent& dst_extent,
CowMergeOperation::Type op_type,
uint32_t src_offset) {
CowMergeOperation ret;
ret.set_type(op_type);
*ret.mutable_src_extent() = src_extent;
*ret.mutable_dst_extent() = dst_extent;
ret.set_src_offset(src_offset);
return ret;
}
std::ostream& operator<<(std::ostream& os,
const CowMergeOperation& merge_operation) {
os << "CowMergeOperation src extent: "
<< ExtentsToString({merge_operation.src_extent()})
<< ", dst extent: " << ExtentsToString({merge_operation.dst_extent()});
if (merge_operation.has_src_offset()) {
os << ", src offset: " << merge_operation.src_offset();
}
os << " op_type: ";
if (merge_operation.type() == CowMergeOperation::COW_COPY) {
os << "COW_COPY";
} else if (merge_operation.type() == CowMergeOperation::COW_XOR) {
os << "COW_XOR";
} else {
os << merge_operation.type();
}
return os;
}
// The OTA generation guarantees that all blocks in the dst extent will be
// written only once. So we can use it to order the CowMergeOperation.
bool operator<(const CowMergeOperation& op1, const CowMergeOperation& op2) {
return op1.dst_extent().start_block() < op2.dst_extent().start_block();
}
bool operator==(const CowMergeOperation& op1, const CowMergeOperation& op2) {
return op1.type() == op2.type() && op1.src_extent() == op2.src_extent() &&
op1.dst_extent() == op2.dst_extent();
}
template <typename T>
constexpr T GetDifference(T first, T second) {
T abs_diff = (first > second) ? (first - second) : (second - first);
return abs_diff;
}
CowMergeOperation::Type GetCowOpType(InstallOperation::Type install_op_type) {
switch (install_op_type) {
case InstallOperation::SOURCE_COPY:
return CowMergeOperation::COW_COPY;
case InstallOperation::SOURCE_BSDIFF:
case InstallOperation::BROTLI_BSDIFF:
case InstallOperation::PUFFDIFF:
return CowMergeOperation::COW_XOR;
default:
CHECK(false) << "Unknown install op type: " << install_op_type;
return CowMergeOperation::COW_REPLACE;
}
}
void SplitSelfOverlapping(const Extent& src_extent,
const Extent& dst_extent,
std::vector<CowMergeOperation>* sequence) {
CHECK_EQ(src_extent.num_blocks(), dst_extent.num_blocks());
if (src_extent.start_block() == dst_extent.start_block()) {
sequence->emplace_back(CreateCowMergeOperation(
src_extent, dst_extent, CowMergeOperation::COW_COPY));
return;
}
const size_t diff =
GetDifference(src_extent.start_block(), dst_extent.start_block());
for (size_t i = 0; i < src_extent.num_blocks(); i += diff) {
auto num_blocks = std::min<size_t>(diff, src_extent.num_blocks() - i);
sequence->emplace_back(CreateCowMergeOperation(
ExtentForRange(i + src_extent.start_block(), num_blocks),
ExtentForRange(i + dst_extent.start_block(), num_blocks),
CowMergeOperation::COW_COPY));
}
}
static bool ProcessXorOps(std::vector<CowMergeOperation>* sequence,
const AnnotatedOperation& aop) {
const auto size_before = sequence->size();
sequence->insert(sequence->end(), aop.xor_ops.begin(), aop.xor_ops.end());
std::for_each(
sequence->begin() + size_before,
sequence->end(),
[](CowMergeOperation& op) {
CHECK_EQ(op.type(), CowMergeOperation::COW_XOR);
// If |src_offset| is greater than 0, then we are reading 1
// extra block at the end of src_extent. This dependency must
// be honored during merge sequence generation, or we can end
// up with a corrupted device after merge.
if (op.src_offset() > 0) {
if (op.src_extent().num_blocks() == op.dst_extent().num_blocks()) {
op.mutable_src_extent()->set_num_blocks(
op.src_extent().num_blocks() + 1);
}
CHECK_EQ(op.src_extent().num_blocks(),
op.dst_extent().num_blocks() + 1);
}
CHECK_NE(op.src_extent().start_block(),
std::numeric_limits<uint64_t>::max());
});
return true;
}
static bool ProcessCopyOps(std::vector<CowMergeOperation>* sequence,
const AnnotatedOperation& aop) {
CHECK_EQ(GetCowOpType(aop.op.type()), CowMergeOperation::COW_COPY);
if (aop.op.dst_extents().size() != 1) {
std::vector<Extent> out_extents;
ExtentsToVector(aop.op.dst_extents(), &out_extents);
LOG(ERROR)
<< "The dst extents for source_copy are expected to be contiguous,"
<< " dst extents: " << ExtentsToString(out_extents);
return false;
}
// Split the source extents.
size_t used_blocks = 0;
for (const auto& src_extent : aop.op.src_extents()) {
// The dst_extent in the merge sequence will be a subset of
// InstallOperation's dst_extent. This will simplify the OTA -> COW
// conversion when we install the payload.
Extent dst_extent =
ExtentForRange(aop.op.dst_extents(0).start_block() + used_blocks,
src_extent.num_blocks());
// Self-overlapping operation, must split into multiple non
// self-overlapping ops
if (ExtentRanges::ExtentsOverlap(src_extent, dst_extent)) {
SplitSelfOverlapping(src_extent, dst_extent, sequence);
} else {
sequence->emplace_back(CreateCowMergeOperation(
src_extent, dst_extent, CowMergeOperation::COW_COPY));
}
used_blocks += src_extent.num_blocks();
}
if (used_blocks != aop.op.dst_extents(0).num_blocks()) {
LOG(ERROR) << "Number of blocks in src extents doesn't equal to the"
<< " ones in the dst extents, src blocks " << used_blocks
<< ", dst blocks " << aop.op.dst_extents(0).num_blocks();
return false;
}
return true;
}
std::unique_ptr<MergeSequenceGenerator> MergeSequenceGenerator::Create(
const std::vector<AnnotatedOperation>& aops,
std::string_view partition_name) {
std::vector<CowMergeOperation> sequence;
for (const auto& aop : aops) {
if (aop.op.type() == InstallOperation::SOURCE_COPY) {
if (!ProcessCopyOps(&sequence, aop)) {
return nullptr;
}
} else if (!aop.xor_ops.empty()) {
if (!ProcessXorOps(&sequence, aop)) {
return nullptr;
}
}
}
return std::unique_ptr<MergeSequenceGenerator>(
new MergeSequenceGenerator(sequence, partition_name));
}
template <typename T>
CowMergeOperation MaxOutDegree(
const T& nodes,
const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
merge_after) {
// Rationale for this algorithm:
// We only need to remove nodes from the graph if the graph contains a cycle.
// Any graph of N nodes has cycle iff number of edges >= N.
// So, to restore the graph back to an acyclic state, we need to keep removing
// edges until we have <N edges left. To minimize the number of nodes removed,
// we always remove the node with maximum out degree.
CowMergeOperation best;
size_t max_out_degree = 0;
const auto has_xor =
std::any_of(nodes.begin(),
nodes.end(),
[&merge_after](const CowMergeOperation& node) {
if (node.type() != CowMergeOperation::COW_XOR) {
return false;
}
auto it = merge_after.find(node);
if (it == merge_after.end()) {
return false;
}
return it->second.size() > 0;
});
for (const auto& op : nodes) {
if (has_xor && op.type() != CowMergeOperation::COW_XOR) {
continue;
}
const auto out_degree = merge_after.at(op).size();
if (out_degree > max_out_degree) {
best = op;
max_out_degree = out_degree;
} else if (out_degree == max_out_degree) {
if (op.src_extent().num_blocks() < best.src_extent().num_blocks()) {
best = op;
}
}
}
CHECK_NE(max_out_degree, 0UL);
return best;
}
template <typename T>
struct MapKeyIterator {
MapKeyIterator<T>& operator++() {
++it;
return *this;
}
MapKeyIterator<T>& operator--() {
--it;
return *this;
}
bool operator==(const MapKeyIterator<T>& rhs) const { return it == rhs.it; }
bool operator!=(const MapKeyIterator<T>& rhs) const { return it != rhs.it; }
auto&& operator->() const { return it->first; }
auto&& operator*() const { return it->first; }
T it;
};
template <typename T, typename U>
struct MapKeyRange {
auto begin() const {
return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.begin()};
}
auto end() const {
return MapKeyIterator<typename std::map<T, U>::const_iterator>{map.end()};
}
std::map<T, U> map;
};
CowMergeOperation MaxOutDegree(
const std::map<CowMergeOperation, int>& incoming_edges,
const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
merge_after) {
return MaxOutDegree(MapKeyRange<CowMergeOperation, int>{incoming_edges},
merge_after);
}
// Given a potentially cyclic graph, return a node to remove to break cycles
// |incoming_edges| stores nodes' in degree, and |merge_after| is an outgoing
// edge list. For example, |merge_after[a]| returns all nodes which `a` has an
// out going edge to.
// The only requirement of this function is to return a node which is in
// |incoming_edges| . As long as this is satisfied, merge sequence generation
// will work. Caller will keep removing nodes returned by this function until
// the graph has no cycles. However, the choice of which node to remove can
// greatly impact COW sizes. Nodes removed from the graph will be converted to a
// COW_REPLACE operation, taking more disk space. So this function should try to
// pick a node which minimizes number of nodes we have to remove. (Modulo the
// weight of each node, which is how many blocks a CowMergeOperation touches)
CowMergeOperation PickConvertToRaw(
const std::map<CowMergeOperation, int>& incoming_edges,
const std::map<CowMergeOperation, std::set<CowMergeOperation>>&
merge_after) {
return MaxOutDegree(incoming_edges, merge_after);
}
std::map<CowMergeOperation, std::set<CowMergeOperation>>
MergeSequenceGenerator::FindDependency(
const std::vector<CowMergeOperation>& operations) {
LOG(INFO) << "Finding dependencies";
// Since the OTA operation may reuse some source blocks, use the binary
// search on sorted dst extents to find overlaps.
std::map<CowMergeOperation, std::set<CowMergeOperation>> merge_after;
for (const auto& op : operations) {
// lower bound (inclusive): dst extent's end block >= src extent's start
// block.
const auto lower_it = std::lower_bound(
operations.begin(),
operations.end(),
op,
[](const CowMergeOperation& it, const CowMergeOperation& op) {
auto dst_end_block =
it.dst_extent().start_block() + it.dst_extent().num_blocks() - 1;
return dst_end_block < op.src_extent().start_block();
});
// upper bound: dst extent's start block > src extent's end block
const auto upper_it = std::upper_bound(
lower_it,
operations.end(),
op,
[](const CowMergeOperation& op, const CowMergeOperation& it) {
auto src_end_block =
op.src_extent().start_block() + op.src_extent().num_blocks() - 1;
return src_end_block < it.dst_extent().start_block();
});
// TODO(xunchang) skip inserting the empty set to merge_after.
if (lower_it == upper_it) {
merge_after.insert({op, {}});
} else {
std::set<CowMergeOperation> operations(lower_it, upper_it);
auto it = operations.find(op);
if (it != operations.end()) {
LOG(INFO) << "Self overlapping " << op;
operations.erase(it);
}
auto ret = merge_after.emplace(op, std::move(operations));
// Check the insertion indeed happens.
CHECK(ret.second) << op;
}
}
return merge_after;
}
bool MergeSequenceGenerator::Generate(
std::vector<CowMergeOperation>* sequence) const {
sequence->clear();
LOG(INFO) << "Generating sequence";
// Use the non-DFS version of the topology sort. So we can control the
// operations to discard to break cycles; thus yielding a deterministic
// sequence.
std::map<CowMergeOperation, int> incoming_edges;
for (const auto& it : merge_after_) {
for (const auto& blocked : it.second) {
// Value is default initialized to 0.
incoming_edges[blocked] += 1;
}
}
// Technically, we can use std::unordered_set or just a std::vector. but
// std::set gives the benefit where operations are sorted by dst blocks. This
// will ensure that operations that do not have dependency constraints appear
// in increasing block order. Such order would help snapuserd batch merges and
// improve boot time, but isn't strictly needed for correctness.
std::set<CowMergeOperation> free_operations;
for (const auto& op : operations_) {
if (incoming_edges.find(op) == incoming_edges.end()) {
free_operations.insert(op);
}
}
std::vector<CowMergeOperation> merge_sequence;
std::set<CowMergeOperation> convert_to_raw;
while (!incoming_edges.empty()) {
if (!free_operations.empty()) {
merge_sequence.insert(
merge_sequence.end(), free_operations.begin(), free_operations.end());
} else {
auto to_convert = PickConvertToRaw(incoming_edges, merge_after_);
// The operation we pick must be one of the nodes not already in merge
// sequence.
CHECK(incoming_edges.find(to_convert) != incoming_edges.end());
free_operations.insert(to_convert);
convert_to_raw.insert(to_convert);
LOG(INFO) << "Converting operation to raw " << to_convert;
}
std::set<CowMergeOperation> next_free_operations;
for (const auto& op : free_operations) {
incoming_edges.erase(op);
// Now that this particular operation is merged, other operations
// blocked by this one may be free. Decrement the count of blocking
// operations, and set up the free operations for the next iteration.
for (const auto& blocked : merge_after_.at(op)) {
auto it = incoming_edges.find(blocked);
if (it == incoming_edges.end()) {
continue;
}
auto blocking_transfer_count = &it->second;
if (*blocking_transfer_count <= 0) {
LOG(ERROR) << "Unexpected count in merge after map "
<< blocking_transfer_count;
return false;
}
// This operation is no longer blocked by anyone. Add it to the merge
// sequence in the next iteration.
*blocking_transfer_count -= 1;
if (*blocking_transfer_count == 0) {
next_free_operations.insert(blocked);
}
}
}
LOG(INFO) << "Remaining transfers " << incoming_edges.size()
<< ", free transfers " << free_operations.size()
<< ", merge_sequence size " << merge_sequence.size();
free_operations = std::move(next_free_operations);
}
if (!free_operations.empty()) {
merge_sequence.insert(
merge_sequence.end(), free_operations.begin(), free_operations.end());
}
CHECK_EQ(operations_.size(), merge_sequence.size() + convert_to_raw.size());
size_t blocks_in_sequence = 0;
for (const CowMergeOperation& transfer : merge_sequence) {
blocks_in_sequence += transfer.dst_extent().num_blocks();
}
size_t blocks_in_raw = 0;
for (const CowMergeOperation& transfer : convert_to_raw) {
blocks_in_raw += transfer.dst_extent().num_blocks();
}
LOG(INFO) << "Blocks in merge sequence " << blocks_in_sequence
<< ", blocks in raw " << blocks_in_raw << ", partition "
<< partition_name_;
if (!ValidateSequence(merge_sequence)) {
LOG(ERROR) << "Invalid Sequence";
return false;
}
*sequence = std::move(merge_sequence);
return true;
}
bool MergeSequenceGenerator::ValidateSequence(
const std::vector<CowMergeOperation>& sequence) {
LOG(INFO) << "Validating merge sequence";
ExtentRanges visited;
for (const auto& op : sequence) {
// If |src_offset| is greater than zero, dependency should include 1 extra
// block at end of src_extent, as the OP actually references data past
// original src_extent.
if (op.src_offset() > 0) {
CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks() + 1)
<< op;
} else {
CHECK_EQ(op.src_extent().num_blocks(), op.dst_extent().num_blocks())
<< op;
}
if (visited.OverlapsWithExtent(op.src_extent())) {
LOG(ERROR) << "Transfer violates the merge sequence " << op
<< "Visited extent ranges: ";
visited.Dump();
return false;
}
CHECK(!visited.OverlapsWithExtent(op.dst_extent()))
<< "dst extent should write only once.";
visited.AddExtent(op.dst_extent());
}
return true;
}
} // namespace chromeos_update_engine