Snap for 12538224 from b4144d58f628a2b1108351585bd4528f364ff2f8 to mainline-adservices-release
Change-Id: Id47fd51039008b1928f5a15b86a67ee8a8f0495e
diff --git a/btf_reader.h b/btf_reader.h
index 74896cf..9570a0c 100644
--- a/btf_reader.h
+++ b/btf_reader.h
@@ -22,6 +22,7 @@
#define STG_BTF_READER_H_
#include <string>
+#include <string_view>
#include "graph.h"
#include "reader_options.h"
diff --git a/comparison.cc b/comparison.cc
index b748de5..847ed5c 100644
--- a/comparison.cc
+++ b/comparison.cc
@@ -40,12 +40,14 @@
namespace stg {
namespace diff {
+namespace {
+
struct IgnoreDescriptor {
std::string_view name;
Ignore::Value value;
};
-static constexpr std::array<IgnoreDescriptor, 9> kIgnores{{
+constexpr std::array<IgnoreDescriptor, 9> kIgnores{{
{"type_declaration_status", Ignore::TYPE_DECLARATION_STATUS },
{"symbol_type_presence", Ignore::SYMBOL_TYPE_PRESENCE },
{"primitive_type_encoding", Ignore::PRIMITIVE_TYPE_ENCODING },
@@ -57,6 +59,8 @@
{"type_definition_addition", Ignore::TYPE_DEFINITION_ADDITION },
}};
+} // namespace
+
std::optional<Ignore::Value> ParseIgnore(std::string_view ignore) {
for (const auto& [name, value] : kIgnores) {
if (name == ignore) {
@@ -74,12 +78,16 @@
return os << '\n';
}
+namespace {
+
std::string QualifiersMessage(Qualifier qualifier, const std::string& action) {
std::ostringstream os;
os << "qualifier " << qualifier << ' ' << action;
return os.str();
}
+} // namespace
+
/*
* We compute a diff for every visited node.
*
@@ -244,7 +252,7 @@
return result.MarkIncomparable();
}
const auto type_diff = (*this)(x1.pointee_type_id, x2.pointee_type_id);
- const auto text =
+ const char* text =
x1.kind == PointerReference::Kind::POINTER ? "pointed-to" : "referred-to";
result.MaybeAddEdgeDiff(text, type_diff);
return result;
@@ -489,7 +497,9 @@
return result;
}
-static KeyIndexPairs MatchingKeys(const Enumeration::Enumerators& enums) {
+namespace {
+
+KeyIndexPairs MatchingKeys(const Enumeration::Enumerators& enums) {
KeyIndexPairs names;
const auto size = enums.size();
names.reserve(size);
@@ -501,6 +511,8 @@
return names;
}
+} // namespace
+
Result Compare::operator()(const Enumeration& x1, const Enumeration& x2) {
Result result;
// Compare two anonymous types recursively, not holding diffs.
diff --git a/comparison.h b/comparison.h
index 4ccc613..6b55558 100644
--- a/comparison.h
+++ b/comparison.h
@@ -25,8 +25,6 @@
#include <cstddef>
#include <cstdint>
#include <functional>
-#include <map>
-#include <memory>
#include <optional>
#include <ostream>
#include <set>
@@ -73,7 +71,7 @@
bitset = bitset | (1 << other);
}
bool Test(Value other) const {
- return bitset & (1 << other);
+ return (bitset & (1 << other)) != 0;
}
Bitset bitset = 0;
diff --git a/deduplication.h b/deduplication.h
index adb8c89..723bb19 100644
--- a/deduplication.h
+++ b/deduplication.h
@@ -20,7 +20,6 @@
#ifndef STG_DEDUPLICATION_H_
#define STG_DEDUPLICATION_H_
-#include <cstdint>
#include <unordered_map>
#include "graph.h"
diff --git a/doc/index.md b/doc/index.md
index bd57638..c6b1fc6 100644
--- a/doc/index.md
+++ b/doc/index.md
@@ -9,6 +9,10 @@
* [`stg`](stg.md): ABI extraction and transformation
* [`stgdiff`](stgdiff.md): ABI comparison and reporting
+## Reference
+
+* [`reference`](reference.md): work-in-progress covering concepts etc.
+
## Design and Implementation
Incomplete descriptions of some of the more interesting things that make up
diff --git a/doc/reference.md b/doc/reference.md
new file mode 100644
index 0000000..21bf8d2
--- /dev/null
+++ b/doc/reference.md
@@ -0,0 +1,193 @@
+# STG
+
+STG stands for Symbol-Type Graph.
+
+# Overview
+
+STG models Application Binary Interfaces. It supports extraction of ABIs from
+DWARF and ingestion of BTF and libabigail XML into its model. Its primary
+purpose is monitoring an ABI for changes over time and reporting such changes in
+a comprehensible fashion.
+
+STG captures symbol information, the size and layout of structs, function
+argument and return types and much more, in a graph representation. Difference
+reporting happens via a graph comparison.
+
+Currently, STG functionality is exposed as two command-line tools, `stg` (for
+ABI extraction) and `stgdiff` (for ABI comparison), and a native file format.
+
+## Model
+
+STG's model is an *abstraction* which does not and cannot capture every possible
+interface property, invariant or behaviour. Conversely, the model includes
+distinctions which are API significant but not ABI significant.
+
+Concretely, STG's model is a rooted, connected, directed graph where each kind
+of node corresponds to a meaningful ABI entity such as a symbol, function type
+or struct member.
+
+Nodes have specific attributes, such as name or size. Outgoing edges specify
+things like return type. STG's model does not impose any constraints on which
+nodes may be joined by edges.
+
+Each node has an identity. However, for the purpose of comparison, nodes are
+considered equal if they are of the same kind, have the same attributes and
+matching outgoing edges and all nodes reachable via a pair of matching edges are
+(recursively) equal. Renumbering nodes, (de)duplicating nodes and
+adding/removing unreachable nodes do not affect this relationship.
+
+### Symbols
+
+As modelled by STG, symbols correspond closely to ELF symbols as seen in
+`.dynsym` for shared object files or in `.symtab` for object files. In the case
+of the Linux kernel, the `.symtab` is enriched with metadata and the effective
+"ksymtab" is actually a subset of the ELF symbols together with CRC and
+namespace information.
+
+STG links symbols to their source-level types where these are known. Symbols
+defined purely in assembly language will not have type information.
+
+The symbol table is contained in the root node of the graph, which is an
+*Interface* node.
+
+### Types
+
+STG models the C, C++ and (to a limited extent) Rust type systems.
+
+For example, C++ template value parameters are poorly modelled for the simple
+reason that this would require modelling C++ *values* as well as types,
+something that DWARF itself doesn't do to the full extent permitted by C++20.
+
+As type definitions are in general mutually recursive, an STG ABI is in general
+a cyclic graph.
+
+The root node of the graph can also contain a list of interface types, which may
+not necessarily be reachable from the interface symbols.
+
+## Supported Input Formats, Parsers and Limitations
+
+STG can read its own native format for processing or comparison. It can also
+process libabigail XML and BTF (`.BTF` ELF sections), with some limitations due
+to model, design and implementation differences including missing features.
+
+### Kinds of Node
+
+STG has the following kinds of node.
+
+* **Special** - used for `void` and `...`
+* **Pointer / Reference** - `*`, `&` and `&&`
+* **Pointer to Member** - `foo::*`
+* **Typedef** - `typedef` and `using ... = ...`
+* **Qualified** - `const` and friends
+* **Primitive** - concrete types such as `int` and friends
+* **Array** - `foo[N]` - there is no distinction between zero and
+ indeterminate length in the model
+* **Base Class** - inheritance metadata
+* **Method** - (only) virtual function
+* **Member** - data member
+* **Variant Member** - discriminated member
+* **Struct / Union** - `struct foo` etc., Rust tuples too
+* **Enumeration** - including the underlying value type - only values that are
+ within the range of signed 64-bit integer are correctly modelled
+* **Variant** - for Rust enums holding data
+* **Function** - multiple argument, single return type
+* **ELF Symbol** - name, version, ELF metadata, Linux kernel metadata
+* **Interface** - top-level collection of symbols and types
+
+An STG ABI consists of a rooted, connected graph of such nodes, and *nothing
+else*. STG is blind to anything that cannot be represented by its model.
+
+### Native Format
+
+STG's native file format is a protocol buffer text format. It is suitable for
+revision control, rather than human consumption. It is effectively described by
+[`stg.proto`](../stg.proto).
+
+In this textual serialisation of ABI graphs, external node identifiers and node
+order are chosen to minimise file changes when a small subset of the graph
+changes.
+
+As an example, this is the definition of the **Typedef** node kind:
+
+```proto
+message Typedef {
+ fixed32 id = 1;
+ string name = 2;
+ fixed32 referred_type_id = 3;
+}
+```
+
+### Abigail (a.k.a. libabigail XML)
+
+[libabigail](https://sourceware.org/libabigail/) is another project for ABI
+monitoring. It uses a format that can be parsed as XML.
+
+This command will transform Abigail into STG:
+
+```shell
+stg --abi library.xml --output library.stg
+```
+
+The main features modelled in Abigail but not STG are:
+
+* source file, line and column information
+* C++ access specifiers (public, protected, private)
+
+The Abigail reader has these distinct phases of operation:
+
+1. text parsed into an XML tree
+2. XML cleaning - whitespace and unused attributes are stripped
+3. XML tidying - issues like duplicate nodes are resolved, if possible
+4. XML parsed into a graph with symbol information held separately
+5. symbols and root node added to the graph
+6. useless type qualifiers are stripped in post-processing
+
+### BTF
+
+[BTF](https://docs.kernel.org/bpf/btf.html) is typically used for the Linux
+kernel where it is generated by `pahole -J` from ELF and DWARF information. It
+can also be generated natively instead of DWARF using `gcc -gbtf` and by Clang,
+but only for eBPF targets.
+
+This command will transform BTF into STG:
+
+```shell
+stg --btf vmlinux --output vmlinux.stg
+```
+
+STG has primarily been tested against the `pahole` (libbtf) dialect of BTF and
+support is not complete.
+
+* split BTF is not supported at all
+* any `.BTF.ext` section is just ignored
+* some kinds of BTF node are not handled:
+ * `BTF_KIND_DATASEC` - skip
+ * `BTF_KIND_DECL_TAG` - abort
+ * `BTF_KIND_TYPE_TAG` - abort
+
+The BTF reader has these distinct phases of operation:
+
+1. file is opened as ELF and `.BTF` section data found
+2. BTF header processed
+3. BTF nodes parsed into a graph with symbol information held separately
+4. symbols and root node added to the graph
+
+### DWARF
+
+The ELF / DWARF reader operates similarly to the other readers at a high level,
+but much more work has to be done to turn ELF symbols and DWARF DIEs into STG
+nodes.
+
+1. the ELF file is checked for DWARF - missing DWARF results in a warning
+2. the ELF symbols are read (from `.dynsym` in the case of shared object file)
+3. the DWARF information is parsed into a partial STG graph
+4. the ELF and DWARF information are stitched together, adding symbols and a
+ root node to the graph
+5. useless type qualifiers are stripped in post-processing
+
+## Output preprocessing
+
+Before `stg` outputs a serialised graph, it performs:
+
+1. a type normalisation step that unifies overlapping type definitions
+2. a final deduplication step to eliminate other redundant nodes
diff --git a/dwarf_processor.h b/dwarf_processor.h
index 7eaa8ed..93947a7 100644
--- a/dwarf_processor.h
+++ b/dwarf_processor.h
@@ -23,7 +23,7 @@
#include <elfutils/libdw.h>
#include <cstddef>
-#include <optional>
+#include <memory>
#include <string>
#include <vector>
diff --git a/dwarf_wrappers.h b/dwarf_wrappers.h
index f09ac98..cddd414 100644
--- a/dwarf_wrappers.h
+++ b/dwarf_wrappers.h
@@ -20,7 +20,6 @@
#ifndef STG_DWARF_WRAPPERS_H_
#define STG_DWARF_WRAPPERS_H_
-#include <elf.h>
#include <elfutils/libdw.h>
#include <cstddef>
diff --git a/elf_dwarf_handle.h b/elf_dwarf_handle.h
index cca4a0e..37face6 100644
--- a/elf_dwarf_handle.h
+++ b/elf_dwarf_handle.h
@@ -20,9 +20,9 @@
#ifndef STG_ELF_DWARF_HANDLE_H_
#define STG_ELF_DWARF_HANDLE_H_
-#include <elf.h>
#include <elfutils/libdw.h>
#include <elfutils/libdwfl.h>
+#include <libelf.h>
#include <cstddef>
#include <functional>
diff --git a/elf_loader.h b/elf_loader.h
index 561e1dd..8eaf9bc 100644
--- a/elf_loader.h
+++ b/elf_loader.h
@@ -24,9 +24,7 @@
#include <cstddef>
#include <cstdint>
-#include <memory>
#include <ostream>
-#include <string>
#include <string_view>
#include <vector>
diff --git a/equality_cache.h b/equality_cache.h
index c1aa42b..138e0c1 100644
--- a/equality_cache.h
+++ b/equality_cache.h
@@ -21,10 +21,10 @@
#define STG_EQUALITY_CACHE_H_
#include <cstddef>
-#include <cstdint>
#include <optional>
#include <unordered_map>
#include <unordered_set>
+#include <utility>
#include <vector>
#include "graph.h"
diff --git a/error.h b/error.h
index 4680cae..a88e345 100644
--- a/error.h
+++ b/error.h
@@ -26,6 +26,7 @@
#include <ostream>
#include <sstream>
#include <string>
+#include <system_error>
namespace stg {
diff --git a/fingerprint.h b/fingerprint.h
index 43fe694..f05d042 100644
--- a/fingerprint.h
+++ b/fingerprint.h
@@ -20,7 +20,6 @@
#ifndef STG_FINGERPRINT_H_
#define STG_FINGERPRINT_H_
-#include <cstdint>
#include <unordered_map>
#include "graph.h"
diff --git a/fuzz/abigail_reader_fuzzer.cc b/fuzz/abigail_reader_fuzzer.cc
index e7a9926..4b8cada 100644
--- a/fuzz/abigail_reader_fuzzer.cc
+++ b/fuzz/abigail_reader_fuzzer.cc
@@ -26,7 +26,11 @@
#include "error.h"
#include "graph.h"
-static void DoNothing(void*, const char*, ...) {}
+namespace {
+
+void DoNothing(void*, const char*, ...) {}
+
+} // namespace
extern "C" int LLVMFuzzerTestOneInput(const char* data, size_t size) {
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
diff --git a/graph.h b/graph.h
index 3a31540..79ba7b8 100644
--- a/graph.h
+++ b/graph.h
@@ -22,9 +22,9 @@
#ifndef STG_GRAPH_H_
#define STG_GRAPH_H_
-#include <compare>
#include <cstddef>
#include <cstdint>
+#include <exception>
#include <functional>
#include <map>
#include <optional>
diff --git a/hashing.h b/hashing.h
index 71fef1e..41efeba 100644
--- a/hashing.h
+++ b/hashing.h
@@ -21,7 +21,6 @@
#ifndef STG_HASHING_H_
#define STG_HASHING_H_
-#include <compare>
#include <cstddef>
#include <cstdint>
#include <functional>
diff --git a/post_processing.h b/post_processing.h
index 1b6453f..94421dd 100644
--- a/post_processing.h
+++ b/post_processing.h
@@ -20,7 +20,6 @@
#ifndef STG_POST_PROCESSING_H_
#define STG_POST_PROCESSING_H_
-#include <cstddef>
#include <string>
#include <vector>
diff --git a/reporting.cc b/reporting.cc
index 42933f3..3e1dae2 100644
--- a/reporting.cc
+++ b/reporting.cc
@@ -44,12 +44,14 @@
namespace stg {
namespace reporting {
+namespace {
+
struct FormatDescriptor {
std::string_view name;
OutputFormat value;
};
-static constexpr std::array<FormatDescriptor, 5> kFormats{{
+constexpr std::array<FormatDescriptor, 5> kFormats{{
{"plain", OutputFormat::PLAIN},
{"flat", OutputFormat::FLAT },
{"small", OutputFormat::SMALL},
@@ -57,6 +59,8 @@
{"viz", OutputFormat::VIZ },
}};
+} // namespace
+
std::optional<OutputFormat> ParseOutputFormat(std::string_view format) {
for (const auto& [name, value] : kFormats) {
if (name == format) {
@@ -136,7 +140,7 @@
return false;
}
-static constexpr size_t INDENT_INCREMENT = 2;
+constexpr size_t INDENT_INCREMENT = 2;
class Plain {
// unvisited (absent) -> started (false) -> finished (true)
diff --git a/reporting.h b/reporting.h
index f5cbe3f..929aec6 100644
--- a/reporting.h
+++ b/reporting.h
@@ -20,7 +20,6 @@
#ifndef STG_REPORTING_H_
#define STG_REPORTING_H_
-#include <cstddef>
#include <optional>
#include <ostream>
#include <string_view>
diff --git a/scc.h b/scc.h
index 3199a53..374c982 100644
--- a/scc.h
+++ b/scc.h
@@ -22,6 +22,7 @@
#include <cstddef>
#include <exception>
+#include <functional>
#include <iterator>
#include <optional>
#include <unordered_map>
@@ -62,6 +63,8 @@
*
* USAGE
*
+ * Create an SCC finder with a lifetime bracketing a top-level DFS invocation.
+ *
* Before examining a node, check it's not been assigned to an SCC already and
* then call Open. If the node is already "open" (i.e., is already waiting to be
* assigned to an SCC), this will return an empty optional value and the node
@@ -78,8 +81,8 @@
* the nodes as visited), this should be done now. Otherwise, an empty vector
* will be returned.
*
- * After a top-level DFS has completed, the SCC finder should be carrying no
- * state. This can be verified by calling Empty.
+ * On destruction, after a top-level DFS invocation has completed, the SCC
+ * finder will check that it is carrying no state.
*/
template <typename Node, typename Hash = std::hash<Node>>
class SCC {
diff --git a/stable_hash.h b/stable_hash.h
index b0b9265..f7b33df 100644
--- a/stable_hash.h
+++ b/stable_hash.h
@@ -20,10 +20,7 @@
#ifndef STG_STABLE_HASH_H_
#define STG_STABLE_HASH_H_
-#include <cstdint>
-#include <iostream>
#include <unordered_map>
-#include <vector>
#include "graph.h"
#include "hashing.h"
diff --git a/stgdiff.cc b/stgdiff.cc
index 1189076..3ef0155 100644
--- a/stgdiff.cc
+++ b/stgdiff.cc
@@ -21,7 +21,6 @@
#include <getopt.h>
-#include <cstddef>
#include <cstring>
#include <fstream>
#include <iostream>
diff --git a/unification.h b/unification.h
index d8fde2e..081b3fa 100644
--- a/unification.h
+++ b/unification.h
@@ -21,8 +21,6 @@
#define STG_UNIFICATION_H_
#include <exception>
-#include <unordered_map>
-#include <unordered_set>
#include "graph.h"
#include "runtime.h"