Snap for 12538224 from b4144d58f628a2b1108351585bd4528f364ff2f8 to mainline-adservices-release

Change-Id: Id47fd51039008b1928f5a15b86a67ee8a8f0495e
diff --git a/btf_reader.h b/btf_reader.h
index 74896cf..9570a0c 100644
--- a/btf_reader.h
+++ b/btf_reader.h
@@ -22,6 +22,7 @@
 #define STG_BTF_READER_H_
 
 #include <string>
+#include <string_view>
 
 #include "graph.h"
 #include "reader_options.h"
diff --git a/comparison.cc b/comparison.cc
index b748de5..847ed5c 100644
--- a/comparison.cc
+++ b/comparison.cc
@@ -40,12 +40,14 @@
 namespace stg {
 namespace diff {
 
+namespace {
+
 struct IgnoreDescriptor {
   std::string_view name;
   Ignore::Value value;
 };
 
-static constexpr std::array<IgnoreDescriptor, 9> kIgnores{{
+constexpr std::array<IgnoreDescriptor, 9> kIgnores{{
   {"type_declaration_status",  Ignore::TYPE_DECLARATION_STATUS  },
   {"symbol_type_presence",     Ignore::SYMBOL_TYPE_PRESENCE     },
   {"primitive_type_encoding",  Ignore::PRIMITIVE_TYPE_ENCODING  },
@@ -57,6 +59,8 @@
   {"type_definition_addition", Ignore::TYPE_DEFINITION_ADDITION },
 }};
 
+}  // namespace
+
 std::optional<Ignore::Value> ParseIgnore(std::string_view ignore) {
   for (const auto& [name, value] : kIgnores) {
     if (name == ignore) {
@@ -74,12 +78,16 @@
   return os << '\n';
 }
 
+namespace {
+
 std::string QualifiersMessage(Qualifier qualifier, const std::string& action) {
   std::ostringstream os;
   os << "qualifier " << qualifier << ' ' << action;
   return os.str();
 }
 
+}  // namespace
+
 /*
  * We compute a diff for every visited node.
  *
@@ -244,7 +252,7 @@
     return result.MarkIncomparable();
   }
   const auto type_diff = (*this)(x1.pointee_type_id, x2.pointee_type_id);
-  const auto text =
+  const char* text =
       x1.kind == PointerReference::Kind::POINTER ? "pointed-to" : "referred-to";
   result.MaybeAddEdgeDiff(text, type_diff);
   return result;
@@ -489,7 +497,9 @@
   return result;
 }
 
-static KeyIndexPairs MatchingKeys(const Enumeration::Enumerators& enums) {
+namespace {
+
+KeyIndexPairs MatchingKeys(const Enumeration::Enumerators& enums) {
   KeyIndexPairs names;
   const auto size = enums.size();
   names.reserve(size);
@@ -501,6 +511,8 @@
   return names;
 }
 
+}  // namespace
+
 Result Compare::operator()(const Enumeration& x1, const Enumeration& x2) {
   Result result;
   // Compare two anonymous types recursively, not holding diffs.
diff --git a/comparison.h b/comparison.h
index 4ccc613..6b55558 100644
--- a/comparison.h
+++ b/comparison.h
@@ -25,8 +25,6 @@
 #include <cstddef>
 #include <cstdint>
 #include <functional>
-#include <map>
-#include <memory>
 #include <optional>
 #include <ostream>
 #include <set>
@@ -73,7 +71,7 @@
     bitset = bitset | (1 << other);
   }
   bool Test(Value other) const {
-    return bitset & (1 << other);
+    return (bitset & (1 << other)) != 0;
   }
 
   Bitset bitset = 0;
diff --git a/deduplication.h b/deduplication.h
index adb8c89..723bb19 100644
--- a/deduplication.h
+++ b/deduplication.h
@@ -20,7 +20,6 @@
 #ifndef STG_DEDUPLICATION_H_
 #define STG_DEDUPLICATION_H_
 
-#include <cstdint>
 #include <unordered_map>
 
 #include "graph.h"
diff --git a/doc/index.md b/doc/index.md
index bd57638..c6b1fc6 100644
--- a/doc/index.md
+++ b/doc/index.md
@@ -9,6 +9,10 @@
 *   [`stg`](stg.md): ABI extraction and transformation
 *   [`stgdiff`](stgdiff.md): ABI comparison and reporting
 
+## Reference
+
+*   [`reference`](reference.md): work-in-progress covering concepts etc.
+
 ## Design and Implementation
 
 Incomplete descriptions of some of the more interesting things that make up
diff --git a/doc/reference.md b/doc/reference.md
new file mode 100644
index 0000000..21bf8d2
--- /dev/null
+++ b/doc/reference.md
@@ -0,0 +1,193 @@
+# STG
+
+STG stands for Symbol-Type Graph.
+
+# Overview
+
+STG models Application Binary Interfaces. It supports extraction of ABIs from
+DWARF and ingestion of BTF and libabigail XML into its model. Its primary
+purpose is monitoring an ABI for changes over time and reporting such changes in
+a comprehensible fashion.
+
+STG captures symbol information, the size and layout of structs, function
+argument and return types and much more, in a graph representation. Difference
+reporting happens via a graph comparison.
+
+Currently, STG functionality is exposed as two command-line tools, `stg` (for
+ABI extraction) and `stgdiff` (for ABI comparison), and a native file format.
+
+## Model
+
+STG's model is an *abstraction* which does not and cannot capture every possible
+interface property, invariant or behaviour. Conversely, the model includes
+distinctions which are API significant but not ABI significant.
+
+Concretely, STG's model is a rooted, connected, directed graph where each kind
+of node corresponds to a meaningful ABI entity such as a symbol, function type
+or struct member.
+
+Nodes have specific attributes, such as name or size. Outgoing edges specify
+things like return type. STG's model does not impose any constraints on which
+nodes may be joined by edges.
+
+Each node has an identity. However, for the purpose of comparison, nodes are
+considered equal if they are of the same kind, have the same attributes and
+matching outgoing edges and all nodes reachable via a pair of matching edges are
+(recursively) equal. Renumbering nodes, (de)duplicating nodes and
+adding/removing unreachable nodes do not affect this relationship.
+
+### Symbols
+
+As modelled by STG, symbols correspond closely to ELF symbols as seen in
+`.dynsym` for shared object files or in `.symtab` for object files. In the case
+of the Linux kernel, the `.symtab` is enriched with metadata and the effective
+"ksymtab" is actually a subset of the ELF symbols together with CRC and
+namespace information.
+
+STG links symbols to their source-level types where these are known. Symbols
+defined purely in assembly language will not have type information.
+
+The symbol table is contained in the root node of the graph, which is an
+*Interface* node.
+
+### Types
+
+STG models the C, C++ and (to a limited extent) Rust type systems.
+
+For example, C++ template value parameters are poorly modelled for the simple
+reason that this would require modelling C++ *values* as well as types,
+something that DWARF itself doesn't do to the full extent permitted by C++20.
+
+As type definitions are in general mutually recursive, an STG ABI is in general
+a cyclic graph.
+
+The root node of the graph can also contain a list of interface types, which may
+not necessarily be reachable from the interface symbols.
+
+## Supported Input Formats, Parsers and Limitations
+
+STG can read its own native format for processing or comparison. It can also
+process libabigail XML and BTF (`.BTF` ELF sections), with some limitations due
+to model, design and implementation differences including missing features.
+
+### Kinds of Node
+
+STG has the following kinds of node.
+
+*   **Special** - used for `void` and `...`
+*   **Pointer / Reference** - `*`, `&` and `&&`
+*   **Pointer to Member** - `foo::*`
+*   **Typedef** - `typedef` and `using ... = ...`
+*   **Qualified** - `const` and friends
+*   **Primitive** - concrete types such as `int` and friends
+*   **Array** - `foo[N]` - there is no distinction between zero and
+    indeterminate length in the model
+*   **Base Class** - inheritance metadata
+*   **Method** - (only) virtual function
+*   **Member** - data member
+*   **Variant Member** - discriminated member
+*   **Struct / Union** - `struct foo` etc., Rust tuples too
+*   **Enumeration** - including the underlying value type - only values that are
+    within the range of signed 64-bit integer are correctly modelled
+*   **Variant** - for Rust enums holding data
+*   **Function** - multiple argument, single return type
+*   **ELF Symbol** - name, version, ELF metadata, Linux kernel metadata
+*   **Interface** - top-level collection of symbols and types
+
+An STG ABI consists of a rooted, connected graph of such nodes, and *nothing
+else*. STG is blind to anything that cannot be represented by its model.
+
+### Native Format
+
+STG's native file format is a protocol buffer text format. It is suitable for
+revision control, rather than human consumption. It is effectively described by
+[`stg.proto`](../stg.proto).
+
+In this textual serialisation of ABI graphs, external node identifiers and node
+order are chosen to minimise file changes when a small subset of the graph
+changes.
+
+As an example, this is the definition of the **Typedef** node kind:
+
+```proto
+message Typedef {
+  fixed32 id = 1;
+  string name = 2;
+  fixed32 referred_type_id = 3;
+}
+```
+
+### Abigail (a.k.a. libabigail XML)
+
+[libabigail](https://sourceware.org/libabigail/) is another project for ABI
+monitoring. It uses a format that can be parsed as XML.
+
+This command will transform Abigail into STG:
+
+```shell
+stg --abi library.xml --output library.stg
+```
+
+The main features modelled in Abigail but not STG are:
+
+*   source file, line and column information
+*   C++ access specifiers (public, protected, private)
+
+The Abigail reader has these distinct phases of operation:
+
+1.  text parsed into an XML tree
+2.  XML cleaning - whitespace and unused attributes are stripped
+3.  XML tidying - issues like duplicate nodes are resolved, if possible
+4.  XML parsed into a graph with symbol information held separately
+5.  symbols and root node added to the graph
+6.  useless type qualifiers are stripped in post-processing
+
+### BTF
+
+[BTF](https://docs.kernel.org/bpf/btf.html) is typically used for the Linux
+kernel where it is generated by `pahole -J` from ELF and DWARF information. It
+can also be generated natively instead of DWARF using `gcc -gbtf` and by Clang,
+but only for eBPF targets.
+
+This command will transform BTF into STG:
+
+```shell
+stg --btf vmlinux --output vmlinux.stg
+```
+
+STG has primarily been tested against the `pahole` (libbtf) dialect of BTF and
+support is not complete.
+
+*   split BTF is not supported at all
+*   any `.BTF.ext` section is just ignored
+*   some kinds of BTF node are not handled:
+    *   `BTF_KIND_DATASEC` - skip
+    *   `BTF_KIND_DECL_TAG` - abort
+    *   `BTF_KIND_TYPE_TAG` - abort
+
+The BTF reader has these distinct phases of operation:
+
+1.  file is opened as ELF and `.BTF` section data found
+2.  BTF header processed
+3.  BTF nodes parsed into a graph with symbol information held separately
+4.  symbols and root node added to the graph
+
+### DWARF
+
+The ELF / DWARF reader operates similarly to the other readers at a high level,
+but much more work has to be done to turn ELF symbols and DWARF DIEs into STG
+nodes.
+
+1.  the ELF file is checked for DWARF - missing DWARF results in a warning
+2.  the ELF symbols are read (from `.dynsym` in the case of shared object file)
+3.  the DWARF information is parsed into a partial STG graph
+4.  the ELF and DWARF information are stitched together, adding symbols and a
+    root node to the graph
+5.  useless type qualifiers are stripped in post-processing
+
+## Output preprocessing
+
+Before `stg` outputs a serialised graph, it performs:
+
+1.  a type normalisation step that unifies overlapping type definitions
+2.  a final deduplication step to eliminate other redundant nodes
diff --git a/dwarf_processor.h b/dwarf_processor.h
index 7eaa8ed..93947a7 100644
--- a/dwarf_processor.h
+++ b/dwarf_processor.h
@@ -23,7 +23,7 @@
 #include <elfutils/libdw.h>
 
 #include <cstddef>
-#include <optional>
+#include <memory>
 #include <string>
 #include <vector>
 
diff --git a/dwarf_wrappers.h b/dwarf_wrappers.h
index f09ac98..cddd414 100644
--- a/dwarf_wrappers.h
+++ b/dwarf_wrappers.h
@@ -20,7 +20,6 @@
 #ifndef STG_DWARF_WRAPPERS_H_
 #define STG_DWARF_WRAPPERS_H_
 
-#include <elf.h>
 #include <elfutils/libdw.h>
 
 #include <cstddef>
diff --git a/elf_dwarf_handle.h b/elf_dwarf_handle.h
index cca4a0e..37face6 100644
--- a/elf_dwarf_handle.h
+++ b/elf_dwarf_handle.h
@@ -20,9 +20,9 @@
 #ifndef STG_ELF_DWARF_HANDLE_H_
 #define STG_ELF_DWARF_HANDLE_H_
 
-#include <elf.h>
 #include <elfutils/libdw.h>
 #include <elfutils/libdwfl.h>
+#include <libelf.h>
 
 #include <cstddef>
 #include <functional>
diff --git a/elf_loader.h b/elf_loader.h
index 561e1dd..8eaf9bc 100644
--- a/elf_loader.h
+++ b/elf_loader.h
@@ -24,9 +24,7 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <memory>
 #include <ostream>
-#include <string>
 #include <string_view>
 #include <vector>
 
diff --git a/equality_cache.h b/equality_cache.h
index c1aa42b..138e0c1 100644
--- a/equality_cache.h
+++ b/equality_cache.h
@@ -21,10 +21,10 @@
 #define STG_EQUALITY_CACHE_H_
 
 #include <cstddef>
-#include <cstdint>
 #include <optional>
 #include <unordered_map>
 #include <unordered_set>
+#include <utility>
 #include <vector>
 
 #include "graph.h"
diff --git a/error.h b/error.h
index 4680cae..a88e345 100644
--- a/error.h
+++ b/error.h
@@ -26,6 +26,7 @@
 #include <ostream>
 #include <sstream>
 #include <string>
+#include <system_error>
 
 namespace stg {
 
diff --git a/fingerprint.h b/fingerprint.h
index 43fe694..f05d042 100644
--- a/fingerprint.h
+++ b/fingerprint.h
@@ -20,7 +20,6 @@
 #ifndef STG_FINGERPRINT_H_
 #define STG_FINGERPRINT_H_
 
-#include <cstdint>
 #include <unordered_map>
 
 #include "graph.h"
diff --git a/fuzz/abigail_reader_fuzzer.cc b/fuzz/abigail_reader_fuzzer.cc
index e7a9926..4b8cada 100644
--- a/fuzz/abigail_reader_fuzzer.cc
+++ b/fuzz/abigail_reader_fuzzer.cc
@@ -26,7 +26,11 @@
 #include "error.h"
 #include "graph.h"
 
-static void DoNothing(void*, const char*, ...) {}
+namespace {
+
+void DoNothing(void*, const char*, ...) {}
+
+}  // namespace
 
 extern "C" int LLVMFuzzerTestOneInput(const char* data, size_t size) {
   xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
diff --git a/graph.h b/graph.h
index 3a31540..79ba7b8 100644
--- a/graph.h
+++ b/graph.h
@@ -22,9 +22,9 @@
 #ifndef STG_GRAPH_H_
 #define STG_GRAPH_H_
 
-#include <compare>
 #include <cstddef>
 #include <cstdint>
+#include <exception>
 #include <functional>
 #include <map>
 #include <optional>
diff --git a/hashing.h b/hashing.h
index 71fef1e..41efeba 100644
--- a/hashing.h
+++ b/hashing.h
@@ -21,7 +21,6 @@
 #ifndef STG_HASHING_H_
 #define STG_HASHING_H_
 
-#include <compare>
 #include <cstddef>
 #include <cstdint>
 #include <functional>
diff --git a/post_processing.h b/post_processing.h
index 1b6453f..94421dd 100644
--- a/post_processing.h
+++ b/post_processing.h
@@ -20,7 +20,6 @@
 #ifndef STG_POST_PROCESSING_H_
 #define STG_POST_PROCESSING_H_
 
-#include <cstddef>
 #include <string>
 #include <vector>
 
diff --git a/reporting.cc b/reporting.cc
index 42933f3..3e1dae2 100644
--- a/reporting.cc
+++ b/reporting.cc
@@ -44,12 +44,14 @@
 namespace stg {
 namespace reporting {
 
+namespace {
+
 struct FormatDescriptor {
   std::string_view name;
   OutputFormat value;
 };
 
-static constexpr std::array<FormatDescriptor, 5> kFormats{{
+constexpr std::array<FormatDescriptor, 5> kFormats{{
   {"plain", OutputFormat::PLAIN},
   {"flat",  OutputFormat::FLAT },
   {"small", OutputFormat::SMALL},
@@ -57,6 +59,8 @@
   {"viz",   OutputFormat::VIZ  },
 }};
 
+}  // namespace
+
 std::optional<OutputFormat> ParseOutputFormat(std::string_view format) {
   for (const auto& [name, value] : kFormats) {
     if (name == format) {
@@ -136,7 +140,7 @@
   return false;
 }
 
-static constexpr size_t INDENT_INCREMENT = 2;
+constexpr size_t INDENT_INCREMENT = 2;
 
 class Plain {
   // unvisited (absent) -> started (false) -> finished (true)
diff --git a/reporting.h b/reporting.h
index f5cbe3f..929aec6 100644
--- a/reporting.h
+++ b/reporting.h
@@ -20,7 +20,6 @@
 #ifndef STG_REPORTING_H_
 #define STG_REPORTING_H_
 
-#include <cstddef>
 #include <optional>
 #include <ostream>
 #include <string_view>
diff --git a/scc.h b/scc.h
index 3199a53..374c982 100644
--- a/scc.h
+++ b/scc.h
@@ -22,6 +22,7 @@
 
 #include <cstddef>
 #include <exception>
+#include <functional>
 #include <iterator>
 #include <optional>
 #include <unordered_map>
@@ -62,6 +63,8 @@
  *
  * USAGE
  *
+ * Create an SCC finder with a lifetime bracketing a top-level DFS invocation.
+ *
  * Before examining a node, check it's not been assigned to an SCC already and
  * then call Open. If the node is already "open" (i.e., is already waiting to be
  * assigned to an SCC), this will return an empty optional value and the node
@@ -78,8 +81,8 @@
  * the nodes as visited), this should be done now. Otherwise, an empty vector
  * will be returned.
  *
- * After a top-level DFS has completed, the SCC finder should be carrying no
- * state. This can be verified by calling Empty.
+ * On destruction, after a top-level DFS invocation has completed, the SCC
+ * finder will check that it is carrying no state.
  */
 template <typename Node, typename Hash = std::hash<Node>>
 class SCC {
diff --git a/stable_hash.h b/stable_hash.h
index b0b9265..f7b33df 100644
--- a/stable_hash.h
+++ b/stable_hash.h
@@ -20,10 +20,7 @@
 #ifndef STG_STABLE_HASH_H_
 #define STG_STABLE_HASH_H_
 
-#include <cstdint>
-#include <iostream>
 #include <unordered_map>
-#include <vector>
 
 #include "graph.h"
 #include "hashing.h"
diff --git a/stgdiff.cc b/stgdiff.cc
index 1189076..3ef0155 100644
--- a/stgdiff.cc
+++ b/stgdiff.cc
@@ -21,7 +21,6 @@
 
 #include <getopt.h>
 
-#include <cstddef>
 #include <cstring>
 #include <fstream>
 #include <iostream>
diff --git a/unification.h b/unification.h
index d8fde2e..081b3fa 100644
--- a/unification.h
+++ b/unification.h
@@ -21,8 +21,6 @@
 #define STG_UNIFICATION_H_
 
 #include <exception>
-#include <unordered_map>
-#include <unordered_set>
 
 #include "graph.h"
 #include "runtime.h"