blob: e2c7f65375d94be68f38ab2a94fd16f242918a71 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- Mode: C++ -*-
//
// Copyright (C) 2021-2022 Google, Inc.
//
// Author: Giuliano Procida
/// @file
///
/// This file contains ABI XML manipulation routines and a main driver.
///
/// The libxml Tree API is used. The XPath API is not used as it proved
/// to be many times slower than direct traversal but only slightly more
/// convenient.
#include <fcntl.h>
#include <unistd.h>
#include <algorithm>
#include <array>
#include <cassert>
#include <cctype>
#include <cstring>
#include <fstream>
#include <functional>
#include <ios>
#include <iostream>
#include <map>
#include <optional>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <libxml/globals.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
/// Convenience typedef referring to a namespace scope.
using namespace_scope = std::vector<std::string>;
/// Convenience typedef referring to a set of symbols.
using symbol_set = std::unordered_set<std::string>;
/// Level of location information to preserve.
enum struct LocationInfo { COLUMN, LINE, FILE, NONE };
static const std::map<std::string, LocationInfo> LOCATION_INFO_NAME = {
{"column", LocationInfo::COLUMN},
{"line", LocationInfo::LINE},
{"file", LocationInfo::FILE},
{"none", LocationInfo::NONE},
};
static const std::map<std::string, std::string, std::less<>> NAMED_TYPES = {
{"enum-decl", "__anonymous_enum__"},
{"class-decl", "__anonymous_struct__"},
{"union-decl", "__anonymous_union__"},
};
/// Compare optional strings.
///
/// TODO: Obsoleted by C++20 std::optional::operator<=>.
///
/// @param a first operand of comparison
///
/// @param b second operand of comparison
///
/// @return an integral result
int
compare_optional(const std::optional<std::string>& a,
const std::optional<std::string>& b)
{
int result = b.has_value() - a.has_value();
if (result)
return result;
return a ? a.value().compare(b.value()) : 0;
}
/// Cast a C string to a libxml string.
///
/// @param str the C string (pointer)
///
/// @return the same thing, as a type compatible with the libxml API
static const xmlChar*
to_libxml(const char* str)
{
return reinterpret_cast<const xmlChar*>(str);
}
/// Cast a libxml string to C string.
///
/// @param str the libxml string (pointer)
///
/// @return the same thing, as a type compatible with the C library API
static const char*
from_libxml(const xmlChar* str)
{
return reinterpret_cast<const char*>(str);
}
/// Get comment node corresponding to a given node if it exists.
///
/// Returns nullptr if previous node does not exist or is not a comment,
/// otherwise returns the previous node.
///
/// @param node the node for which comment has to be returned
///
/// @return pointer to the comment node
static xmlNodePtr
get_comment_node(xmlNodePtr node)
{
xmlNodePtr previous_node = node->prev;
return previous_node && previous_node->type == XML_COMMENT_NODE
? previous_node : nullptr;
}
/// Remove a node from its document and free its storage.
///
/// @param node the node to remove
static void
remove_node(xmlNodePtr node)
{
xmlUnlinkNode(node);
xmlFreeNode(node);
}
/// Remove an XML element and any immediately preceding comment.
///
/// @param node the element to remove
static void
remove_element(xmlNodePtr node)
{
if (auto comment_node = get_comment_node(node))
remove_node(comment_node);
remove_node(node);
}
/// Move a node to an element.
///
/// @param node the node to move
///
/// @param destination the destination element
static void
move_node(xmlNodePtr node, xmlNodePtr destination)
{
xmlUnlinkNode(node);
xmlAddChild(destination, node);
}
/// Move an XML element and any immediately preceding comment to another
/// element.
///
/// @param node the element to remove
///
/// @param destination the destination element
static void
move_element(xmlNodePtr node, xmlNodePtr destination)
{
if (auto comment_node = get_comment_node(node))
move_node(comment_node, destination);
move_node(node, destination);
}
/// Get child nodes of given node.
///
/// @param node the node whose children to fetch
///
/// @return a vector of child nodes
static std::vector<xmlNodePtr>
get_children(xmlNodePtr node)
{
std::vector<xmlNodePtr> result;
for (xmlNodePtr child = node->children; child; child = child->next)
result.push_back(child);
return result;
}
/// Fetch an attribute from a node.
///
/// @param node the node
///
/// @param name the attribute name
///
/// @return the attribute value, if present
static std::optional<std::string>
get_attribute(xmlNodePtr node, const char* name)
{
std::optional<std::string> result;
xmlChar* attribute = xmlGetProp(node, to_libxml(name));
if (attribute)
{
result = from_libxml(attribute);
xmlFree(attribute);
}
return result;
}
/// Set an attribute value.
///
/// @param node the node
///
/// @param name the attribute name
///
/// @param value the attribute value
static void
set_attribute(xmlNodePtr node, const char* name,
const std::string& value)
{
xmlSetProp(node, to_libxml(name), to_libxml(value.c_str()));
}
/// Unset an attribute value.
///
/// @param node the node
///
/// @param name the attribute name
static void
unset_attribute(xmlNodePtr node, const char* name)
{
xmlUnsetProp(node, to_libxml(name));
}
/// Remove text nodes, recursively.
///
/// This simplifies subsequent analysis and manipulation. Removing and
/// moving elements will destroy formatting anyway. The only remaining
/// node types should be elements and comments.
///
/// @param node the node to process
static void
strip_text(xmlNodePtr node)
{
if (node->type == XML_TEXT_NODE)
remove_node(node);
else if (node->type == XML_ELEMENT_NODE)
for (xmlNodePtr child : get_children(node))
strip_text(child);
}
/// Add text before / after a node.
///
/// @param node the node
///
/// @param after whether the next should go after
///
/// @param text the text
static void
add_text(xmlNodePtr node, bool after, const std::string& text)
{
xmlNodePtr text_node = xmlNewTextLen(to_libxml(text.data()), text.size());
if (after)
xmlAddNextSibling(node, text_node);
else
xmlAddPrevSibling(node, text_node);
}
/// Format an XML element by adding internal indentation and newlines.
///
/// This makes the XML readable.
///
/// @param indentation what to add to the line indentation prefix
///
/// @param prefix the current line indentation prefix
///
/// @param node the node to format
static void
format_xml(const std::string& indentation, std::string prefix, xmlNodePtr node)
{
std::vector<xmlNodePtr> children = get_children(node);
if (children.empty())
return;
// The ordering of operations here is incidental. The outcomes we want
// are: 1. an extra newline after the opening tag and indentation of
// the closing tag to match, and 2. indentation and newline for each
// child.
add_text(children[0], false, "\n");
add_text(children[children.size() - 1], true, prefix);
prefix += indentation;
for (xmlNodePtr child : children)
{
add_text(child, false, prefix);
format_xml(indentation, prefix, child);
add_text(child, true, "\n");
}
}
/// Rewrite attributes using single quotes.
///
/// libxml uses double quotes but libabigail uses single quotes.
///
/// Note that libabigail does not emit attributes *containing* single
/// quotes and if it did it would escape them as &quot; which libxml
/// would in turn preserve. However, the code here will handle all forms
/// of quotes, conservatively.
///
/// Annotation comments can contain single quote characters so just
/// checking for any single quotes at all is insufficiently precise.
///
/// @param start a pointer to the start of the XML text
///
/// @param limit a pointer to just past the end of the XML text
static void
adjust_quotes(xmlChar* start, xmlChar* limit)
{
const std::string open{"<!--"};
const std::string close{"-->"};
while (start < limit)
{
// Look for a '<'
start = std::find(start, limit, '<');
if (start == limit)
break;
if (start + open.size() < limit
&& std::equal(open.begin(), open.end(), start))
{
// Have a comment, skip to the end.
start += open.size();
xmlChar* end = std::search(start, limit, close.begin(), close.end());
if (end == limit)
break;
start = end + close.size();
}
else
{
// Have some tag, search for the end.
start += 1;
xmlChar* end = std::find(start, limit, '>');
if (end == limit)
break;
// In general, inside a tag we could find either ' or " being
// used to quote attributes and the other quote character
// being used as part of the attribute data. However, libxml's
// xmlDocDump* functions use " to quote attributes and it's
// safe to substitute this quote character with ' so long as '
// does not appear within the attribute data.
if (std::find(start, end, '\'') == end)
for (xmlChar* c = start; c < end; ++c)
if (*c == '"')
*c = '\'';
start = end + 1;
}
}
}
/// Compare given attribute of 2 XML nodes.
///
/// @param attribute the attribute to compare
///
/// @param a first XML node to compare
///
/// @param b second XML node to compare
///
/// @return an integral result
static int
compare_attributes(
const char* attribute, const xmlNodePtr& a, const xmlNodePtr& b)
{
return compare_optional(get_attribute(a, attribute),
get_attribute(b, attribute));
}
static const std::set<std::string> DROP_IF_EMPTY = {
"elf-variable-symbols",
"elf-function-symbols",
"namespace-decl",
"abi-instr",
"abi-corpus",
"abi-corpus-group",
};
/// Drop empty elements, if safe to do so, recursively.
///
/// @param node the element to process
static void
drop_empty(xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return;
for (xmlNodePtr child : get_children(node))
drop_empty(child);
// Do not drop the root element, even if empty.
if (node->parent->type == XML_DOCUMENT_NODE)
return;
if (!node->children && DROP_IF_EMPTY.count(from_libxml(node->name)))
remove_element(node);
}
/// Get ELF symbol id.
///
/// This is not an explicit attribute. It takes one of these forms:
///
/// * name (if symbol is not versioned)
/// * name@version (if symbol is versioned but not the default version)
/// * name@@version (if symbol is versioned and the default version)
///
/// @param node the elf-symbol element
///
/// @return the ELF symbol id
static std::string
get_elf_symbol_id(xmlNodePtr node)
{
const auto name = get_attribute(node, "name");
assert(name);
std::string result = name.value();
const auto version = get_attribute(node, "version");
if (version)
{
result += '@';
const auto is_default = get_attribute(node, "is-default-version");
if (is_default && is_default.value() == "yes")
result += '@';
result += version.value();
}
return result;
}
static const std::set<std::string> HAS_LOCATION = {
"class-decl",
"enum-decl",
"function-decl",
"parameter",
"typedef-decl",
"union-decl",
"var-decl"
};
/// Limit location information.
///
/// @param location_info the level of location information to retain
///
/// @param node the element to process
static void
limit_locations(LocationInfo location_info, xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return;
if (HAS_LOCATION.count(from_libxml(node->name)))
{
if (location_info > LocationInfo::COLUMN)
{
unset_attribute(node, "column");
if (location_info > LocationInfo::LINE)
{
unset_attribute(node, "line");
if (location_info > LocationInfo::FILE)
unset_attribute(node, "filepath");
}
}
}
for (xmlNodePtr child : get_children(node))
limit_locations(location_info, child);
}
/// Handle unreachable elements.
///
/// Reachability is defined to be union of contains, containing and
/// refers-to relationships for types, declarations and symbols. The
/// roots for reachability are the ELF elements in the ABI.
///
/// The subrange element requires special treatment. It has a useless
/// type id, but it is not a type and its type id aliases with that of
/// all other subranges of the same length. So don't treat it as a type.
///
/// @param prune whether to prune unreachable elements
///
/// @param report whether to report untyped symbols
///
/// @param alias_map mapping from corpus to alias to main elf-symbol-id
///
/// @param root the XML root element
///
/// @return the number of untyped symbols
static size_t
handle_unreachable(
bool prune, bool report,
const std::unordered_map<xmlNodePtr,
std::unordered_map<std::string,
std::string>>& alias_map,
xmlNodePtr root)
{
// ELF symbol ids, per corpus.
std::set<std::pair<xmlNodePtr, std::string>> elf_symbol_ids;
// Simple way of allowing two kinds of nodes: nullptr=>type,
// node=>symbol.
using vertex_t = std::pair<xmlNodePtr, std::string>;
// Graph vertices.
std::set<vertex_t> vertices;
// Graph edges.
std::map<vertex_t, std::set<vertex_t>> edges;
// Keep track of type / symbol nesting so we can identify contains,
// containing and refers-to relationships.
std::vector<vertex_t> stack;
// Keep track of which corpus we are in as symbols and elf-symbol-ids are
// scoped per corpus.
xmlNodePtr current_corpus = nullptr;
// Process an XML node, adding a vertex and possibly some edges.
std::function<void(xmlNodePtr)> process_node = [&](xmlNodePtr node) {
// We only care about elements and not comments, at this stage.
if (node->type != XML_ELEMENT_NODE)
return;
const char* node_name = from_libxml(node->name);
// Is this a corpus?
if (strcmp(node_name, "abi-corpus") == 0)
current_corpus = node;
// Is this an ELF symbol?
if (strcmp(node_name, "elf-symbol") == 0)
{
elf_symbol_ids.insert(
std::make_pair(current_corpus, get_elf_symbol_id(node)));
// Early return is safe, but not necessary.
return;
}
// Is this a type? Note that the same id may appear multiple times.
const auto id = strcmp(node_name, "subrange") != 0
? get_attribute(node, "id")
: std::optional<std::string>();
if (id)
{
vertex_t type_vertex{nullptr, id.value()};
vertices.insert(type_vertex);
const auto naming_typedef_id = get_attribute(node, "naming-typedef-id");
if (naming_typedef_id)
{
// This is an odd one, there can be a backwards link from an
// anonymous type to a typedef that refers to it. The -t
// option will drop these, but if they are still present, we
// should model the link to avoid the risk of dangling
// references.
vertex_t naming_typedef_vertex{nullptr, naming_typedef_id.value()};
edges[type_vertex].insert(naming_typedef_vertex);
}
if (!stack.empty())
{
// Parent<->child dependencies; record dependencies both
// ways to avoid holes in XML types and declarations.
const auto& parent = stack.back();
edges[parent].insert(type_vertex);
edges[type_vertex].insert(parent);
}
// Record the type.
stack.push_back(type_vertex);
}
// Is this a (declaration expected to be linked to a) symbol?
const auto symbol = get_attribute(node, "elf-symbol-id");
if (symbol)
{
vertex_t symbol_vertex{current_corpus, symbol.value()};
vertices.insert(symbol_vertex);
if (!stack.empty())
{
// Parent<->child dependencies; record dependencies both
// ways to avoid making holes in XML types and declarations.
//
// Symbols exist outside of the type hierarchy, so choosing
// to make them depend on a containing type scope and vice
// versa is conservative and probably not necessary.
const auto& parent = stack.back();
edges[parent].insert(symbol_vertex);
edges[symbol_vertex].insert(parent);
}
// Record the symbol.
stack.push_back(symbol_vertex);
// In practice there will be at most one symbol on the stack; we could
// verify this here, but it wouldn't achieve anything.
}
// Being both would make the stack ordering ambiguous.
if (id && symbol)
{
std::cerr << "cannot handle element which is both type and symbol\n";
exit(1);
}
// Is there a reference to another type?
const auto type_id = get_attribute(node, "type-id");
if (type_id && !stack.empty())
{
// The enclosing type or symbol refers to another type.
const auto& parent = stack.back();
vertex_t type_id_vertex{nullptr, type_id.value()};
edges[parent].insert(type_id_vertex);
}
// Process recursively.
for (auto child : get_children(node))
process_node(child);
// Restore the stack.
if (symbol)
stack.pop_back();
if (id)
stack.pop_back();
};
// Traverse the whole root element and build a graph.
process_node(root);
// Simple DFS.
std::set<vertex_t> seen;
std::function<void(vertex_t)> dfs = [&](vertex_t vertex) {
if (!seen.insert(vertex).second)
return;
auto it = edges.find(vertex);
if (it != edges.end())
for (auto to : it->second)
dfs(to);
};
// Count of how many symbols are untyped.
size_t untyped = 0;
// Traverse the graph, starting from the ELF symbols.
for (const auto& [corpus, symbol_id] : elf_symbol_ids)
{
const auto corpus_it = alias_map.find(corpus);
assert(corpus_it != alias_map.end());
const auto& corpus_alias_map = corpus_it->second;
const auto it = corpus_alias_map.find(symbol_id);
const auto& mapped_symbol_id = it != corpus_alias_map.end()
? it->second : symbol_id;
vertex_t symbol_vertex{corpus, mapped_symbol_id};
if (vertices.count(symbol_vertex))
{
dfs(symbol_vertex);
}
else
{
if (report)
std::cerr << "no declaration found for ELF symbol with id "
<< symbol_id << '\n';
++untyped;
}
}
// This is a DFS with early stopping.
std::function<void(xmlNodePtr)> remove_unseen = [&](xmlNodePtr node) {
if (node->type != XML_ELEMENT_NODE)
return;
const char* node_name = from_libxml(node->name);
// Is this a corpus?
if (strcmp(node_name, "abi-corpus") == 0)
current_corpus = node;
// Return if we know that this is a type to keep or drop in its
// entirety.
const auto id = strcmp(node_name, "subrange") != 0
? get_attribute(node, "id")
: std::optional<std::string>();
if (id)
{
if (!seen.count(vertex_t{nullptr, id.value()}))
remove_element(node);
return;
}
// Return if we know that this is a declaration to keep or drop in
// its entirety. Note that var-decl and function-decl are the only
// elements that can have an elf-symbol-id attribute.
if (strcmp(node_name, "var-decl") == 0
|| strcmp(node_name, "function-decl") == 0)
{
const auto symbol = get_attribute(node, "elf-symbol-id");
if (!(symbol && seen.count(vertex_t{current_corpus, symbol.value()})))
remove_element(node);
return;
}
// Otherwise, this is not a type, declaration or part thereof, so
// process child elements.
for (auto child : get_children(node))
remove_unseen(child);
};
if (prune)
// Traverse the XML, removing unseen elements.
remove_unseen(root);
return untyped;
}
/// Tidy anonymous types in various ways.
///
/// 1. Normalise anonymous type names by removing the numerical suffix.
///
/// Anonymous type names take the form __anonymous_foo__N where foo is
/// one of enum, struct or union and N is an optional numerical suffix.
/// The suffices are senstive to processing order and do not convey
/// useful ABI information. They can cause spurious harmless diffs and
/// make XML diffing and rebasing harder.
///
/// It's best to remove the suffix.
///
/// 2. Reanonymise anonymous types that have been given names.
///
/// A recent change to abidw changed its behaviour for any anonymous
/// type that has a naming typedef. In addition to linking the typedef
/// and type in both directions, the code now gives (some) anonymous
/// types the same name as the typedef. This misrepresents the original
/// types.
///
/// Such types should be anonymous.
///
/// 3. Discard naming typedef backlinks.
///
/// The attribute naming-typedef-id is a backwards link from an
/// anonymous type to the typedef that refers to it. It is ignored by
/// abidiff.
///
/// Unfortunately, libabigail sometimes conflates multiple anonymous
/// types that have naming typedefs and only one of the typedefs can
/// "win". ABI XML is thus sensitive to processing order and can also
/// end up containing definitions of an anonymous type with differing
/// naming-typedef-id attributes.
///
/// It's best to just drop the attribute.
///
/// @param node the XML node to process
static void
handle_anonymous_types(bool normalise, bool reanonymise, bool discard_naming,
xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return;
const auto it = NAMED_TYPES.find(from_libxml(node->name));
if (it != NAMED_TYPES.end())
{
const auto& anon = it->second;
const auto name_attribute = get_attribute(node, "name");
const auto& name =
name_attribute ? name_attribute.value() : std::string();
const auto anon_attr = get_attribute(node, "is-anonymous");
const bool is_anon = anon_attr && anon_attr.value() == "yes";
const auto naming_attribute = get_attribute(node, "naming-typedef-id");
if (normalise && is_anon && name != anon) {
// __anonymous_foo__123 -> __anonymous_foo__
set_attribute(node, "name", anon);
}
if (reanonymise && !is_anon && naming_attribute) {
// bar with naming typedef -> __anonymous_foo__
set_attribute(node, "is-anonymous", "yes");
set_attribute(node, "name", anon);
}
if (discard_naming && naming_attribute)
unset_attribute(node, "naming-typedef-id");
}
for (auto child : get_children(node))
handle_anonymous_types(normalise, reanonymise, discard_naming, child);
}
/// Builds a mapping from qualified types to the underlying type ids.
///
/// Recursively constructs a mapping from qualified types to the underlying
/// type ids found in the XML tree rooted at the given node.
///
/// @param node node of the XML tree to process
///
/// @param qualifier_id_to_type_id map from qualified types to underlying type
/// ids being constructed
static void
build_qualifier_id_to_type_id_map(
const xmlNodePtr node,
std::unordered_map<std::string, std::string>& qualifier_id_to_type_id)
{
if (node->type != XML_ELEMENT_NODE)
return;
if (strcmp(from_libxml(node->name), "qualified-type-def") == 0)
{
const auto id = get_attribute(node, "id");
const auto type_id = get_attribute(node, "type-id");
if (!id || !type_id)
{
std::cerr << "found qualified type definition with missing id and/or "
<< "type id\nid: " << id.value_or("(missing)")
<< "\ntype id: " << type_id.value_or("(missing)") << '\n';
exit(1);
}
const auto& id_value = id.value();
const auto& type_id_value = type_id.value();
auto [it, inserted] =
qualifier_id_to_type_id.insert({id_value, type_id_value});
if (!inserted && it->second != type_id_value)
{
std::cerr << "conflicting type ids ('" << it->second << "' & '"
<< type_id_value << "') found for qualified type with "
<< "id: " << id_value << '\n';
exit(1);
}
}
else
{
for (auto child : get_children(node))
build_qualifier_id_to_type_id_map(child, qualifier_id_to_type_id);
}
}
/// Determine mapping from qualified type to underlying unqualified type.
///
/// This resolves chains of qualifiers on qualified types. Note that this does
/// not attempt to look through typedefs.
///
/// @param qualifier_id_to_type_id map from qualified types to underlying type
/// ids
static void
resolve_qualifier_chains(
std::unordered_map<std::string, std::string>& qualifier_id_to_type_id)
{
for (auto& [id, type_id] : qualifier_id_to_type_id)
{
std::unordered_set<std::string> seen;
while (true)
{
if (!seen.insert(type_id).second)
{
std::cerr << "dequalification of type with id '" << id
<< "' ran into a self referencing loop\n";
exit(1);
}
auto it = qualifier_id_to_type_id.find(type_id);
if (it == qualifier_id_to_type_id.end())
break;
type_id = it->second;
}
}
}
/// Removes top-level qualifiers from function parameter and return types.
///
/// Recursively removes top-level qualifiers from parameter and return types of
/// all function declarations and function types found in the XML tree rooted
/// at the given node.
///
/// This requires also requires a map of qualified types to the underlying type
/// ids, which enables the unqualification of qualified types.
///
/// @param node node of the XML tree to process
///
/// @param qualifier_id_to_type_id map from qualified types to underlying type
/// ids
static void
remove_function_parameter_type_qualifiers(
const xmlNodePtr node,
const std::unordered_map<std::string, std::string>& qualifier_id_to_type_id)
{
if (node->type != XML_ELEMENT_NODE)
return;
if (strcmp(from_libxml(node->name), "function-decl") == 0 ||
strcmp(from_libxml(node->name), "function-type") == 0)
{
bool type_changed = false;
for (auto child : get_children(node))
if (const auto type_id = get_attribute(child, "type-id"))
{
const auto& type_id_value = type_id.value();
auto it = qualifier_id_to_type_id.find(type_id_value);
if (it != qualifier_id_to_type_id.end())
{
type_changed = true;
set_attribute(child, "type-id", it->second);
// Parameter or return type has been modified, making a comment
// describing the type for this node inconsistent. Thus the
// comment must be removed if it exists.
if (auto comment_node = get_comment_node(child))
remove_node(comment_node);
}
}
if (type_changed)
{
// Parameter or return type has been modified, making a comment
// describing the type for this node inconsistent. Thus the comment
// must be removed if it exists.
if (auto comment_node = get_comment_node(node))
remove_node(comment_node);
}
}
else
{
for (auto child : get_children(node))
remove_function_parameter_type_qualifiers(child, qualifier_id_to_type_id);
}
}
/// Remove attributes emitted by abidw --load-all-types.
///
/// With this invocation and if any user-defined types are deemed
/// unreachable, libabigail will output a tracking-non-reachable-types
/// attribute on top-level elements and a is-non-reachable attribute on
/// each such type element.
///
/// abitidy has its own graph-theoretic notion of reachability and these
/// attributes have no ABI relevance.
///
/// It's best to just drop them.
///
/// @param node the XML node to process
void
clear_non_reachable(xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return;
const char* node_name = from_libxml(node->name);
if (strcmp(node_name, "abi-corpus-group") == 0
|| strcmp(node_name, "abi-corpus") == 0)
unset_attribute(node, "tracking-non-reachable-types");
else if (NAMED_TYPES.find(node_name) != NAMED_TYPES.end())
unset_attribute(node, "is-non-reachable");
for (auto child : get_children(node))
clear_non_reachable(child);
}
/// Determine the effective name of a given node.
///
/// The effective name is same as the value of the 'name' attribute for all
/// nodes except nodes which represent anonymous types. For anonymous types, the
/// function returns std::nullopt.
///
/// @param node the node for which effective name has to be determined
///
/// @return an optional name string
std::optional<std::string>
get_effective_name(xmlNodePtr node)
{
return get_attribute(node, "is-anonymous")
? std::nullopt : get_attribute(node, "name");
}
/// Record type ids for anonymous types that have to be renumbered.
///
/// This constructs a map from the ids that need to be renumbered to the XML
/// node where the id is defined/declared. Also records hexadecimal hashes used
/// by non-anonymous types.
///
/// @param node the node being processed
///
/// @param to_renumber map from ids to be renumbered to corresponding XML node
///
/// @param used_hashes set of hashes used by non-anonymous type ids
static void
record_ids_to_renumber(
xmlNodePtr node,
std::unordered_map<std::string, xmlNodePtr>& to_renumber,
std::unordered_set<size_t>& used_hashes)
{
if (node->type != XML_ELEMENT_NODE)
return;
for (auto child : get_children(node))
record_ids_to_renumber(child, to_renumber, used_hashes);
const auto& id_attr = get_attribute(node, "id");
if (!id_attr)
return;
const auto& id = id_attr.value();
const std::string_view node_name(from_libxml(node->name));
const bool is_anonymous_type_candidate = NAMED_TYPES.count(node_name);
if (!is_anonymous_type_candidate || get_effective_name(node))
{
const bool is_hexadecimal = std::all_of(
id.begin(), id.end(), [](unsigned char c){ return std::isxdigit(c); });
if (id.size() == 8 && is_hexadecimal)
{
// Do not check for successful insertion since there can be multiple
// declarations/definitions for a type.
size_t hash = std::stoul(id, nullptr, 16);
used_hashes.insert(hash);
}
}
else
{
// Check for successful insertion since anonymous types are not prone to
// having multiple definitions/declarations.
if (!to_renumber.insert({id, node}).second)
{
std::cerr << "Found multiple definitions/declarations of anonmyous "
<< "type with id: " << id << '\n';
exit(1);
}
}
}
/// Compute a stable string hash.
///
/// This is the 32-bit FNV-1a algorithm. The algorithm, reference code
/// and constants are all unencumbered. It is fast and has reasonable
/// distribution properties.
///
/// std::hash has no portability or stability guarantees so is
/// unsuitable where reproducibility is a requirement such as in XML
/// output.
///
/// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
///
/// @param str the string to hash
///
/// @return an unsigned 32 bit hash value
static uint32_t
fnv_hash(const std::string& str)
{
const uint32_t prime = 0x01000193;
const uint32_t offset_basis = 0x811c9dc5;
uint32_t hash = offset_basis;
for (const char& c : str)
{
uint8_t byte = c;
hash = hash ^ byte;
hash = hash * prime;
}
return hash;
}
/// Generate a new 32 bit type id and return its hexadecimal representation.
///
/// Generates hash of the given hash content. Uses linear probing to resolve
/// hash collisions. Also, records the newly generated hash in a set of used
/// hashes.
///
/// @param hash_content the string which is used to generate a hash
///
/// @param used_hashes the set of hashes which have already been used
///
/// @return the hexadecimal representation of the newly generated hash
static std::string
generate_new_id(const std::string& hash_content,
std::unordered_set<size_t>& used_hashes)
{
auto hash = fnv_hash(hash_content);
while (!used_hashes.insert(hash).second)
++hash;
std::ostringstream os;
os << std::hex << std::setfill('0') << std::setw(8) << hash;
return os.str();
}
/// Find the first member for a user defined type.
///
/// The first member for enums is the first enumerator while for structs and
/// unions it is the variable declaration of the first data member.
///
/// @param node the node being processed
///
/// @return the node which represents the first member
static xmlNodePtr
find_first_member(xmlNodePtr node)
{
auto first_child_by_xml_node_name =
[](const xmlNodePtr node, const std::string_view name) -> xmlNodePtr {
for (auto child : get_children(node))
if (child->type == XML_ELEMENT_NODE && from_libxml(child->name) == name)
return child;
return nullptr;
};
if (strcmp(from_libxml(node->name), "enum-decl") == 0)
return first_child_by_xml_node_name(node, "enumerator");
if (auto data_member = first_child_by_xml_node_name(node, "data-member"))
return first_child_by_xml_node_name(data_member, "var-decl");
return nullptr;
}
/// Calculate new type id for a given old type id.
///
/// This resolves the old type ids for anonymous types to new ones, while ids
/// which do not belong to anonymous types are returned as they are.
///
/// @param type_id old type id
///
/// @param to_renumber map from ids to be renumbered to corresponding XML node
///
/// @param used_hashes set of hashes used by other type ids
///
/// @param type_id_map mapping from old type ids to new ones
///
/// @return resolved type id
static std::string
resolve_ids_to_renumber(
const std::string& type_id,
const std::unordered_map<std::string, xmlNodePtr>& to_renumber,
std::unordered_set<size_t>& used_hashes,
std::unordered_map<std::string, std::string>& type_id_map)
{
// Check whether the given type_id needs to be renumbered. If not, the type_id
// can be returned since it does not represent an anonymous type.
const auto to_renumber_it = to_renumber.find(type_id);
if (to_renumber_it == to_renumber.end())
return type_id;
// Insert an empty string placeholder to prevent infinite loops.
const auto& [type_mapping, inserted] = type_id_map.insert({type_id, {}});
if (!inserted)
{
if (!type_mapping->second.empty())
return type_mapping->second;
std::cerr << "new type id depends on itself for type with id: "
<< type_id << '\n';
exit(1);
}
const auto& node = to_renumber_it->second;
std::ostringstream hash_content;
hash_content << from_libxml(node->name);
if (auto first_member = find_first_member(node))
{
// Create hash content by combining the name & resolved type id of the
// first member and the kind of anonymous type.
if (auto name = get_effective_name(first_member))
hash_content << '-' << name.value();
if (auto type_id = get_attribute(first_member, "type-id"))
hash_content << '-' << resolve_ids_to_renumber(
type_id.value(), to_renumber, used_hashes, type_id_map);
}
else
{
// No member information available. Possibly type is empty.
hash_content << "__empty";
}
return type_mapping->second =
generate_new_id(hash_content.str(), used_hashes);
}
/// Replace old type ids by new ones.
///
/// @param node the node which is being processed
///
/// @param type_id_map map from old type ids to replace to new ones
static void
renumber_type_ids(
xmlNodePtr node,
const std::unordered_map<std::string, std::string>& type_id_map)
{
if (node->type != XML_ELEMENT_NODE)
return;
auto maybe_replace = [&](const char* attribute_name) {
const auto& attribute = get_attribute(node, attribute_name);
if (attribute)
{
const auto it = type_id_map.find(attribute.value());
if (it != type_id_map.end())
set_attribute(node, attribute_name, it->second);
}
};
maybe_replace("id");
maybe_replace("type-id");
maybe_replace("naming-typedef-id");
for (auto child : get_children(node))
renumber_type_ids(child, type_id_map);
}
/// Determine whether one XML element is a subtree of another.
///
/// XML elements representing types are sometimes emitted multiple
/// times, identically. Also, member typedefs are sometimes emitted
/// separately from their types, resulting in duplicate XML fragments.
///
/// Both these issues can be resolved by first detecting duplicate
/// occurrences of a given type id and then checking to see if there's
/// an instance that subsumes the others, which can then be eliminated.
///
/// @param left the first element to compare
///
/// @param right the second element to compare
///
/// @return whether the first element is a subtree of the second
bool
sub_tree(xmlNodePtr left, xmlNodePtr right)
{
// The set of attributes that should be excluded from consideration when
// comparing XML elements. These attributes are either irrelevant for ABI
// monitoring or already handled by another check.
static const std::unordered_set<std::string> IRRELEVANT_ATTRIBUTES = {
// Source location information. This can vary between duplicate type
// definitions.
"filepath",
"line",
"column",
// Anonymous type to typedef backlinks.
"naming-typedef-id",
// Annotation that can appear with --load-all-types.
"is-non-reachable",
// Handled while checking for effective name equivalence.
"name",
"is-anonymous",
};
// Node names must match.
const char* left_name = from_libxml(left->name);
const char* right_name = from_libxml(right->name);
if (strcmp(left_name, right_name) != 0)
return false;
// Effective names must match.
if (get_effective_name(left) != get_effective_name(right))
return false;
// Attributes may be missing on the left, but must match otherwise.
for (auto p = left->properties; p; p = p->next)
{
const char* attribute_name = from_libxml(p->name);
if (IRRELEVANT_ATTRIBUTES.count(attribute_name))
continue;
// EXCEPTION: libabigail emits the access specifier for the type
// it's trying to "emit in scope" rather than for what may be a
// containing type; so allow member-type attribute access to differ.
if (strcmp(left_name, "member-type") == 0
&& strcmp(attribute_name, "access") == 0)
continue;
const auto left_value = get_attribute(left, attribute_name);
assert(left_value);
const auto right_value = get_attribute(right, attribute_name);
if (!right_value || left_value.value() != right_value.value())
return false;
}
// The left subelements must be a subsequence of the right ones.
xmlNodePtr left_child = xmlFirstElementChild(left);
xmlNodePtr right_child = xmlFirstElementChild(right);
while (left_child && right_child)
{
if (sub_tree(left_child, right_child))
left_child = xmlNextElementSibling(left_child);
right_child = xmlNextElementSibling(right_child);
}
return !left_child;
}
/// Eliminate non-conflicting / report conflicting duplicate definitions.
///
/// This function can eliminate exact type duplicates and duplicates
/// where there is at least one maximal definition. It can report the
/// remaining, conflicting duplicate definitions.
///
/// If a type has duplicate definitions in multiple namespace scopes or
/// definitions with different effective names, these are considered as
/// conflicting duplicate definitions and should not be reordered. This function
/// reports how many such types it finds.
///
/// @param eliminate whether to eliminate non-conflicting duplicates
///
/// @param report whether to report conflicting duplicate definitions
///
/// @param root the root XML element
///
/// @return the number of conflicting duplicate definitions
size_t handle_duplicate_types(bool eliminate, bool report, xmlNodePtr root)
{
// map of type-id to pair of set of namespace scopes and vector of
// xmlNodes
std::unordered_map<
std::string,
std::pair<
std::set<namespace_scope>,
std::vector<xmlNodePtr>>> types;
namespace_scope namespaces;
// find all type occurrences
std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) {
if (node->type != XML_ELEMENT_NODE)
return;
const char* node_name = from_libxml(node->name);
std::optional<std::string> namespace_name;
if (strcmp(node_name, "namespace-decl") == 0)
namespace_name = get_attribute(node, "name");
if (namespace_name)
namespaces.push_back(namespace_name.value());
if (strcmp(node_name, "abi-corpus-group") == 0
|| strcmp(node_name, "abi-corpus") == 0
|| strcmp(node_name, "abi-instr") == 0
|| namespace_name)
{
for (auto child : get_children(node))
dfs(child);
}
else
{
const auto id = get_attribute(node, "id");
if (id)
{
auto& info = types[id.value()];
info.first.insert(namespaces);
info.second.push_back(node);
}
}
if (namespace_name)
namespaces.pop_back();
};
dfs(root);
size_t conflicting_types = 0;
for (const auto& [id, scopes_and_definitions] : types)
{
const auto& [scopes, definitions] = scopes_and_definitions;
if (scopes.size() > 1)
{
if (report)
std::cerr << "conflicting scopes found for type '" << id << "'\n";
++conflicting_types;
continue;
}
const size_t count = definitions.size();
if (count <= 1)
continue;
// Find a potentially maximal candidate by scanning through and
// retaining the new definition if it's a supertree of the current
// candidate.
std::vector<bool> ok(count);
size_t candidate = 0;
ok[candidate] = true;
for (size_t ix = 1; ix < count; ++ix)
if (sub_tree(definitions[candidate], definitions[ix]))
{
candidate = ix;
ok[candidate] = true;
}
// Verify the candidate is indeed maximal by scanning the
// definitions not already known to be subtrees of it.
bool bad = false;
const auto& candidate_definition = definitions[candidate];
const char* candidate_node_name = from_libxml(candidate_definition->name);
const auto& candidate_effective_name =
get_effective_name(candidate_definition);
for (size_t ix = 0; ix < count; ++ix)
{
const auto& definition = definitions[ix];
if (!ok[ix] && !sub_tree(definition, candidate_definition))
{
if (strcmp(from_libxml(definition->name), candidate_node_name) != 0
|| get_effective_name(definition) != candidate_effective_name)
++conflicting_types;
bad = true;
break;
}
}
if (bad)
{
if (report)
std::cerr << "unresolvable duplicate definitions found for type '"
<< id << "'\n";
continue;
}
if (eliminate)
// Remove all but the maximal definition.
for (size_t ix = 0; ix < count; ++ix)
if (ix != candidate)
remove_element(definitions[ix]);
}
return conflicting_types;
}
static const std::set<std::string> INSTR_VARIABLE_ATTRIBUTES = {
"path",
"comp-dir-path",
"language",
};
/// Collect elements of abi-instr elements by namespace.
///
/// Namespaces are not returned but are recursively traversed with the
/// namespace stack being maintained. Other elements are associated with
/// the current namespace.
///
/// @param nodes the nodes to traverse
///
/// @return child elements grouped by namespace scope
static std::map<namespace_scope, std::vector<xmlNodePtr>>
get_children_by_namespace(const std::vector<xmlNodePtr>& nodes)
{
std::map<namespace_scope, std::vector<xmlNodePtr>> result;
namespace_scope scope;
std::function<void(xmlNodePtr)> process = [&](xmlNodePtr node) {
if (node->type != XML_ELEMENT_NODE)
return;
std::optional<std::string> namespace_name;
const char* node_name = from_libxml(node->name);
if (strcmp(node_name, "namespace-decl") == 0)
namespace_name = get_attribute(node, "name");
if (namespace_name)
{
scope.push_back(namespace_name.value());
for (auto child : get_children(node))
process(child);
scope.pop_back();
}
else
result[scope].push_back(node);
};
for (auto node : nodes)
for (auto child : get_children(node))
process(child);
return result;
}
/// Determine whether an element contains an elf-symbol-id attribute.
///
/// @param node the node to examine recursively
///
/// @return whether or not an elf-symbol-id attribute was found
static bool
contains_elf_symbol_id(xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return false;
if (get_attribute(node, "elf-symbol-id"))
return true;
for (auto child : get_children(node))
if (contains_elf_symbol_id(child))
return true;
return false;
}
/// Sort instrs into a corpus.
///
/// The given instrs (grouped by source corpus) are sorted and moved
/// into the destination corpus, except that elements containing
/// symbol-linked declarations are not moved between corpora.
///
/// This loses annotations (XML comments) on namespace-decl elements.
/// It would have been a fair amount of extra work to preserve them.
///
/// @param where the XML abi-corpus element into which to move elements
///
/// @param instrs a list of pairs of containing corpus and XML abi-instr
/// element out of which to move elements
static void
sort_instrs_into_corpus(
xmlNodePtr where,
const std::vector<std::pair<xmlNodePtr, std::vector<xmlNodePtr>>>& instrs)
{
if (instrs.empty())
return;
// Collect the attributes of all the instrs.
std::map<std::string, std::set<std::string>> attributes;
for (const auto& [corpus, corpus_instrs] : instrs)
for (const auto& instr : corpus_instrs)
for (auto p = instr->properties; p; p = p->next)
{
// This is horrible. There should be a better way of iterating.
const char* attribute_name = from_libxml(p->name);
const auto attribute_value = get_attribute(instr, attribute_name);
assert(attribute_value);
attributes[attribute_name].insert(attribute_value.value());
}
// Create and attach a replacement instr and populate its attributes.
xmlNodePtr replacement =
xmlAddChild(where, xmlNewNode(nullptr, to_libxml("abi-instr")));
for (const auto& attribute : attributes)
{
const char* attribute_name = attribute.first.c_str();
const auto& attribute_values = attribute.second;
if (attribute_values.size() == 1)
set_attribute(replacement, attribute_name, *attribute_values.begin());
else if (INSTR_VARIABLE_ATTRIBUTES.count(attribute_name))
set_attribute(replacement, attribute_name, "various");
else
{
std::cerr << "unexpectedly variable abi-instr attribute '"
<< attribute_name << "'\n";
remove_node(replacement);
return;
}
}
// Order XML nodes by XML element names, effective names, mangled names and
// type ids.
struct Compare {
int
cmp(xmlNodePtr a, xmlNodePtr b) const
{
int result;
// Compare XML element names.
result = strcmp(from_libxml(a->name), from_libxml(b->name));
if (result)
return result;
// Compare effective names.
const auto a_effective_name = get_effective_name(a);
const auto b_effective_name = get_effective_name(b);
result = compare_optional(a_effective_name, b_effective_name);
if (result)
return result;
// Compare declarations using mangled names.
result = compare_attributes("mangled-name", a, b);
if (result)
return result;
// Compare types using ids.
return compare_attributes("id", a, b);
}
bool
operator()(xmlNodePtr a, xmlNodePtr b) const
{
return cmp(a, b) < 0;
}
};
// Collect the child elements of all the instrs, by namespace scope.
std::map<namespace_scope, std::vector<xmlNodePtr>> scoped_children;
std::unordered_map<xmlNodePtr, xmlNodePtr> child_corpus;
for (const auto& [corpus, corpus_instrs] : instrs)
for (const auto& [scope, children] : get_children_by_namespace(corpus_instrs))
{
auto& these_scoped_children = scoped_children[scope];
for (auto child : children)
{
these_scoped_children.push_back(child);
child_corpus[child] = corpus;
}
}
for (auto& [scope, children] : scoped_children)
// Sort the children, preserving order of duplicates.
std::stable_sort(children.begin(), children.end(), Compare());
// Create namespace elements on demand. The global namespace, with
// empty scope, is just the replacement instr itself.
std::map<namespace_scope, xmlNodePtr> namespace_elements{{{}, replacement}};
std::function<xmlNodePtr(const namespace_scope&)> get_namespace_element =
[&](const namespace_scope& scope) {
auto insertion = namespace_elements.insert({scope, nullptr});
if (insertion.second)
{
// Insertion succeeded, so the scope cannot be empty.
namespace_scope truncated = scope;
truncated.pop_back();
xmlNodePtr parent = get_namespace_element(truncated);
// We can now create an XML element in the right place.
xmlNodePtr child = xmlNewNode(nullptr, to_libxml("namespace-decl"));
set_attribute(child, "name", scope.back());
xmlAddChild(parent, child);
insertion.first->second = child;
}
return insertion.first->second;
};
// Move each child to the replacement instr or namespace subelement
// thereof, unless the child would move between corpora and is or
// contains a symbol-linked declaration.
for (const auto& [scope, elements] : scoped_children)
{
xmlNodePtr namespace_element = get_namespace_element(scope);
for (auto element : elements)
if (child_corpus[element] == where || !contains_elf_symbol_id(element))
move_element(element, namespace_element);
}
// Remove each original instr if now effectively empty.
for (const auto& [corpus, corpus_instrs] : instrs)
for (auto instr : corpus_instrs)
if (get_children_by_namespace({instr}).empty())
remove_node(instr);
// Remove the replacement if it wasn't used.
if (get_children(replacement).empty())
remove_node(replacement);
}
/// Get corpora instrs.
///
/// @param corpora a vector of corpus elements
///
/// @return a vector of pairs of corpus and contained instr elements
std::vector<std::pair<xmlNodePtr, std::vector<xmlNodePtr>>>
get_corpora_instrs(const std::vector<xmlNodePtr>& corpora)
{
std::vector<std::pair<xmlNodePtr, std::vector<xmlNodePtr>>> result;
for (auto corpus : corpora)
{
result.push_back({corpus, {}});
auto& corpus_instrs = result.back().second;
for (auto instr : get_children(corpus))
if (strcmp(from_libxml(instr->name), "abi-instr") == 0)
corpus_instrs.push_back(instr);
}
return result;
}
/// Sort namespaces, types and declarations.
///
/// @param root the XML root element
static void
sort_namespaces_types_and_declarations(xmlNodePtr root)
{
// There are (currently) 2 ABI formats we handle here.
//
// 1. An abi-corpus containing one or more abi-instr. In this case, we
// move all namespaces, types and declarations to a replacement
// abi-instr at the end of the abi-corpus.
//
// 2. An abi-corpus-group containing one or more abi-corpus each
// containing zero or more abi-instr (with at least one abi-instr
// altogether). In this case all the corpora are sorted together into
// a replacement abi-instr created within the first corpus, except
// that symbol-linked declarations in subsequent corpora are not moved.
//
// Anything else is left alone. For example, single abi-instr elements
// are present in some libabigail test suite files.
// We first need to identify where to place the new abi-instr and
// collect all the abi-instr to process.
const char* root_name = from_libxml(root->name);
if (strcmp(root_name, "abi-corpus-group") == 0)
{
// Process all corpora in a corpus group together.
std::vector<xmlNodePtr> corpora;
xmlNodePtr first = nullptr;
for (auto corpus : get_children(root))
if (strcmp(from_libxml(corpus->name), "abi-corpus") == 0)
{
if (!first)
first = corpus;
corpora.push_back(corpus);
}
if (first)
sort_instrs_into_corpus(first, get_corpora_instrs(corpora));
// An extra pass to sort whatever may have been left behind.
for (auto corpus : corpora)
if (corpus != first)
sort_instrs_into_corpus(corpus, get_corpora_instrs({corpus}));
}
else if (strcmp(root_name, "abi-corpus") == 0)
sort_instrs_into_corpus(root, get_corpora_instrs({root}));
}
static constexpr std::array<std::string_view, 2> SYMBOL_SECTION_SUFFICES = {
"symbol_list",
"whitelist",
};
/// Read symbols from a file.
///
/// This aims to be compatible with the .ini format used by libabigail
/// for suppression specifications and symbol lists. All symbol list
/// sections in the given file are combined into a single set of
/// symbols.
///
/// @param filename the name of the file from which to read
///
/// @return a set of symbols
symbol_set
read_symbols(const char* filename)
{
symbol_set symbols;
std::ifstream file(filename);
if (!file)
{
std::cerr << "error opening symbol file '" << filename << "'\n";
exit(1);
}
bool in_symbol_section = false;
std::string line;
while (std::getline(file, line))
{
size_t start = 0;
size_t limit = line.size();
// Strip comments and leading / trailing whitespace.
while (start < limit)
{
if (std::isspace(line[start]))
++start;
else if (line[start] == '#')
start = limit;
else
break;
}
while (start < limit)
{
if (std::isspace(line[limit - 1]))
--limit;
else
break;
}
// Skip empty lines.
if (start == limit)
continue;
// See if we are entering a symbol list section.
if (line[start] == '[' && line[limit - 1] == ']')
{
std::string_view section(&line[start + 1], limit - start - 2);
bool found = false;
for (const auto& suffix : SYMBOL_SECTION_SUFFICES)
if (section.size() >= suffix.size()
&& section.substr(section.size() - suffix.size()) == suffix)
{
found = true;
break;
}
in_symbol_section = found;
continue;
}
// Add symbol.
if (in_symbol_section)
symbols.insert(std::string(&line[start], limit - start));
}
if (!file.eof())
{
std::cerr << "error reading symbol file '" << filename << "'\n";
exit(1);
}
return symbols;
}
/// Get aliases from XML node.
///
/// @param node the XML node to process
///
/// @return an ordered set of aliases
std::set<std::string>
get_aliases(xmlNodePtr node)
{
std::set<std::string> aliases;
const auto alias = get_attribute(node, "alias");
if (alias)
{
std::istringstream is(alias.value());
std::string item;
while (std::getline(is, item, ','))
aliases.insert(item);
}
return aliases;
}
/// Set aliases in XML node.
///
/// @param node the XML node to process
///
/// @param aliases an ordered set of aliases
void
set_aliases(xmlNodePtr node, const std::set<std::string>& aliases)
{
if (aliases.empty())
{
unset_attribute(node, "alias");
}
else
{
std::ostringstream os;
bool first = true;
for (const auto& alias : aliases)
{
if (first)
first = false;
else
os << ',';
os << alias;
}
set_attribute(node, "alias", os.str());
}
}
/// Gather information about symbols and record alias <-> main mappings.
///
/// @param symbol_map a map from elf-symbol-id to XML node
///
/// @param alias_map a map from alias elf-symbol-id to main
///
/// @param main_map a map from main elf-symbol-id to aliases
///
/// @param node the XML node to process
void
process_symbols(
std::unordered_map<std::string, xmlNodePtr>& symbol_map,
std::unordered_map<std::string, std::string>& alias_map,
std::unordered_map<std::string, std::set<std::string>>& main_map,
xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return;
const char* node_name = from_libxml(node->name);
if (strcmp(node_name, "abi-corpus-group") == 0)
{
std::cerr << "symbol processing must be per corpus\n";
exit(1);
}
else if (strcmp(node_name, "abi-corpus") == 0
|| strcmp(node_name, "elf-variable-symbols") == 0
|| strcmp(node_name, "elf-function-symbols") == 0)
{
// Process children.
for (auto child : get_children(node))
process_symbols(symbol_map, alias_map, main_map, child);
}
else if (strcmp(node_name, "elf-symbol") == 0)
{
const auto id = get_elf_symbol_id(node);
if (!symbol_map.insert({id, node}).second)
{
std::cerr << "multiple symbols with id " << id << "\n";
exit(1);
}
const auto aliases = get_aliases(node);
for (const auto& alias : aliases)
if (!alias_map.insert({alias, id}).second)
{
std::cerr << "multiple aliases with id " << alias << "\n";
exit(1);
}
if (!aliases.empty())
main_map.insert({id, aliases});
}
}
/// Rewrite elf-symbol-id attributes following ELF symbol removal.
///
/// @param mapping map from old to new elf-symbol-id, if any
void
rewrite_symbols_in_declarations(
const std::unordered_map<std::string, std::optional<std::string>>& mapping,
xmlNodePtr node)
{
if (node->type != XML_ELEMENT_NODE)
return;
const char* node_name = from_libxml(node->name);
if (strcmp(node_name, "var-decl") == 0
|| strcmp(node_name, "function-decl") == 0)
{
auto symbol = get_attribute(node, "elf-symbol-id");
bool changed = false;
while (symbol)
{
const auto it = mapping.find(symbol.value());
if (it == mapping.end())
break;
symbol = it->second;
changed = true;
}
if (changed)
{
if (symbol)
set_attribute(node, "elf-symbol-id", symbol.value());
else
unset_attribute(node, "elf-symbol-id");
}
}
for (xmlNodePtr child : get_children(node))
rewrite_symbols_in_declarations(mapping, child);
}
/// Remove unlisted ELF symbols.
///
/// @param symbols the set of symbols
///
/// @param corpus the XML corpus element
///
/// @return mapping from alias to main elf-symbol-id
std::unordered_map<std::string, std::string>
filter_corpus_symbols(const std::optional<symbol_set>& symbols,
xmlNodePtr corpus)
{
// find symbols and record alias <-> main mappings
std::unordered_map<std::string, xmlNodePtr> symbol_map;
std::unordered_map<std::string, std::string> alias_map;
std::unordered_map<std::string, std::set<std::string>> main_map;
process_symbols(symbol_map, alias_map, main_map, corpus);
// check that aliases and main symbols are disjoint
for (const auto& [alias, main] : alias_map)
if (alias_map.count(main))
{
std::cerr << "found main symbol and alias with id " << main << '\n';
exit(1);
}
if (!symbols)
return alias_map;
// Track when an alias is promoted to a main symbol or a symbol is deleted as
// these are the cases when we need update references to symbols in
// declarations.
std::unordered_map<std::string, std::optional<std::string>> mapping;
// filter the symbols, preserving those listed
for (const auto& [id, node] : symbol_map)
{
const auto name = get_attribute(node, "name");
assert(name);
if (symbols->count(name.value()))
continue;
remove_element(node);
// The symbol has been removed, so remove its id from the alias <-> main
// mappings, promoting another alias to main symbol if needed, and
// updating XML alias attributes.
//
// There are 3 cases:
// a main symbol - with one or more aliases
// an alias - with a main symbol
// an unaliased symbol
if (const auto main_it = main_map.find(id);
main_it != main_map.end())
{
// A main symbol with one or more aliases.
std::set<std::string> aliases;
std::swap(aliases, main_it->second);
main_map.erase(main_it);
// the first alias will be the new main symbol
const auto first_it = aliases.begin();
assert(first_it != aliases.end());
const auto first = *first_it;
// remove first from the list of aliases and its link to id
aliases.erase(first_it);
alias_map.erase(first);
if (!aliases.empty())
{
// update the XML attribute
set_aliases(symbol_map[first], aliases);
// update the maps
for (const auto& alias : aliases)
alias_map[alias] = first;
std::swap(aliases, main_map[first]);
}
// declarations referring to id must be repointed at first
mapping[id] = {first};
}
else if (const auto alias_it = alias_map.find(id);
alias_it != alias_map.end())
{
// An alias with a main symbol.
const auto main = alias_it->second;
auto& aliases = main_map[main];
// remove id from the maps
alias_map.erase(alias_it);
aliases.erase(id);
// update the XML attribute
set_aliases(symbol_map[main], aliases);
if (aliases.empty())
// main hasn't changed but is no longer aliased
main_map.erase(main);
}
else
{
// An unaliased symbol.
//
// declaration references to id must be removed
mapping[id] = {};
}
}
rewrite_symbols_in_declarations(mapping, corpus);
return alias_map;
}
/// Remove unlisted ELF symbols.
///
/// @param symbols the set of symbols
///
/// @param root the XML root element
///
/// @return mapping from corpus to alias to main elf-symbol-id
std::unordered_map<xmlNodePtr, std::unordered_map<std::string, std::string>>
filter_symbols(const std::optional<symbol_set>& symbols, xmlNodePtr root)
{
std::unordered_map<xmlNodePtr, std::unordered_map<std::string, std::string>>
result;
const char* node_name = from_libxml(root->name);
if (strcmp(node_name, "abi-corpus-group") == 0)
{
for (auto child : get_children(root))
result[child] = filter_corpus_symbols(symbols, child);
}
else if (strcmp(node_name, "abi-corpus") == 0)
{
result[root] = filter_corpus_symbols(symbols, root);
}
else
{
std::cerr << "unexpected root element: " << node_name << '\n';
exit(1);
}
return result;
}
/// Main program.
///
/// Read and write ABI XML, with optional processing passes.
///
/// @param argc argument count
///
/// @param argv argument vector
///
/// @return exit status
int
main(int argc, char* argv[])
{
// Defaults.
const char* opt_input = nullptr;
const char* opt_output = nullptr;
std::optional<symbol_set> opt_symbols;
LocationInfo opt_locations = LocationInfo::COLUMN;
int opt_indentation = 2;
bool opt_normalise_anonymous = false;
bool opt_reanonymise_anonymous = false;
bool opt_discard_naming_typedefs = false;
bool opt_remove_function_parameter_type_qualifiers = false;
bool opt_prune_unreachable = false;
bool opt_report_untyped = false;
bool opt_abort_on_untyped = false;
bool opt_clear_non_reachable = false;
bool opt_eliminate_duplicates = false;
bool opt_report_conflicts = false;
bool opt_sort = false;
bool opt_drop_empty = false;
// Experimental flags. These are not part of --all.
//
// TODO: Move out of experimental status when stable.
bool opt_renumber_anonymous_types = false;
// Process command line.
auto usage = [&]() -> int {
std::cerr << "usage: " << argv[0] << '\n'
<< " [-i|--input file]\n"
<< " [-o|--output file]\n"
<< " [-S|--symbols file]\n"
<< " [-L|--locations {column|line|file|none}]\n"
<< " [-I|--indentation n]\n"
<< " [-a|--all] (implies -n -r -t -f -p -u -b -e -c -s -d)\n"
<< " [-n|--[no-]normalise-anonymous]\n"
<< " [-r|--[no-]reanonymise-anonymous]\n"
<< " [-t|--[no-]discard-naming-typedefs]\n"
<< " [-f|--[no-]remove-function-parameter-type-qualifiers]\n"
<< " [-p|--[no-]prune-unreachable]\n"
<< " [-u|--[no-]report-untyped]\n"
<< " [-U|--abort-on-untyped-symbols]\n"
<< " [-b|--[no-]clear-non-reachable]\n"
<< " [-e|--[no-]eliminate-duplicates]\n"
<< " [-c|--[no-]report-conflicts]\n"
<< " [-s|--[no-]sort]\n"
<< " [-d|--[no-]drop-empty]\n"
<< "\nExperimental flags, not part of --all\n"
<< " [-M|--[no-]renumber-anonymous-types]\n";
return 1;
};
int opt_index = 1;
auto get_arg = [&]() {
if (opt_index < argc)
return argv[opt_index++];
exit(usage());
};
while (opt_index < argc)
{
const std::string arg = get_arg();
if (arg == "-i" || arg == "--input")
opt_input = get_arg();
else if (arg == "-o" || arg == "--output")
opt_output = get_arg();
else if (arg == "-S" || arg == "--symbols")
opt_symbols = read_symbols(get_arg());
else if (arg == "-L" || arg == "--locations")
{
auto it = LOCATION_INFO_NAME.find(get_arg());
if (it == LOCATION_INFO_NAME.end())
exit(usage());
opt_locations = it->second;
}
else if (arg == "-I" || arg == "--indentation")
{
std::istringstream is(get_arg());
is >> std::noskipws >> opt_indentation;
if (!is || !is.eof() || opt_indentation < 0)
exit(usage());
}
else if (arg == "-a" || arg == "--all")
opt_normalise_anonymous = opt_reanonymise_anonymous
= opt_discard_naming_typedefs
= opt_remove_function_parameter_type_qualifiers
= opt_prune_unreachable
= opt_report_untyped
= opt_clear_non_reachable
= opt_eliminate_duplicates
= opt_report_conflicts
= opt_sort
= opt_drop_empty
= true;
else if (arg == "-n" || arg == "--normalise-anonymous")
opt_normalise_anonymous = true;
else if (arg == "--no-normalise-anonymous")
opt_normalise_anonymous = false;
else if (arg == "-r" || arg == "--reanonymise-anonymous")
opt_reanonymise_anonymous = true;
else if (arg == "--no-reanonymise-anonymous")
opt_reanonymise_anonymous = false;
else if (arg == "-t" || arg == "--discard-naming-typedefs")
opt_discard_naming_typedefs = true;
else if (arg == "--no-discard-naming-typedefs")
opt_discard_naming_typedefs = false;
else if (arg == "-f" ||
arg == "--remove-function-parameter-type-qualifiers")
opt_remove_function_parameter_type_qualifiers = true;
else if (arg == "--no-remove-function-parameter-type-qualifiers")
opt_remove_function_parameter_type_qualifiers = false;
else if (arg == "-p" || arg == "--prune-unreachable")
opt_prune_unreachable = true;
else if (arg == "--no-prune-unreachable")
opt_prune_unreachable = false;
else if (arg == "-u" || arg == "--report-untyped")
opt_report_untyped = true;
else if (arg == "--no-report-untyped")
opt_report_untyped = false;
else if (arg == "-U" || arg == "--abort-on-untyped-symbols")
opt_abort_on_untyped = true;
else if (arg == "-b" || arg == "--clear-non-reachable")
opt_clear_non_reachable = true;
else if (arg == "--no-clear-non-reachable")
opt_clear_non_reachable = false;
else if (arg == "-e" || arg == "--eliminate-duplicates")
opt_eliminate_duplicates = true;
else if (arg == "--no-eliminate-duplicates")
opt_eliminate_duplicates = false;
else if (arg == "-c" || arg == "--report-conflicts")
opt_report_conflicts = true;
else if (arg == "--no-report-conflicts")
opt_report_conflicts = false;
else if (arg == "-s" || arg == "--sort")
opt_sort = true;
else if (arg == "--no-sort")
opt_sort = false;
else if (arg == "-d" || arg == "--drop-empty")
opt_drop_empty = true;
else if (arg == "--no-drop-empty")
opt_drop_empty = false;
else if (arg == "-M" || arg == "--renumber-anonymous-types")
opt_renumber_anonymous_types = true;
else if (arg == "--no-renumber-anonymous-types")
opt_renumber_anonymous_types = false;
else
exit(usage());
}
// Open input for reading.
int in_fd = STDIN_FILENO;
if (opt_input)
{
in_fd = open(opt_input, O_RDONLY);
if (in_fd < 0)
{
std::cerr << "could not open '" << opt_input << "' for reading: "
<< strerror(errno) << '\n';
exit(1);
}
}
// Read the XML.
xmlParserCtxtPtr parser_context = xmlNewParserCtxt();
xmlDocPtr document
= xmlCtxtReadFd(parser_context, in_fd, nullptr, nullptr, 0);
if (!document)
{
std::cerr << "failed to parse input as XML\n";
exit(1);
}
xmlFreeParserCtxt(parser_context);
close(in_fd);
// Get the root element.
xmlNodePtr root = xmlDocGetRootElement(document);
if (!root)
{
std::cerr << "XML document has no root element\n";
exit(1);
}
// Strip text nodes to simplify other operations.
strip_text(root);
// Get corpus -> alias -> main mapping and remove unlisted symbols.
const auto alias_map = filter_symbols(opt_symbols, root);
// Record type ids which correspond to anonymous types.
// Renumber recorded type ids using information about the type.
// Replace recorded type ids by renumbered ones.
if (opt_renumber_anonymous_types)
{
std::unordered_map<std::string, xmlNodePtr> to_renumber;
std::unordered_set<size_t> used_hashes;
record_ids_to_renumber(root, to_renumber, used_hashes);
std::unordered_map<std::string, std::string> type_id_map;
for (const auto& [type_id, node] : to_renumber)
resolve_ids_to_renumber(type_id, to_renumber, used_hashes, type_id_map);
renumber_type_ids(root, type_id_map);
}
// Normalise anonymous type names.
// Reanonymise anonymous types.
// Discard naming typedef backlinks.
if (opt_normalise_anonymous || opt_reanonymise_anonymous
|| opt_discard_naming_typedefs)
handle_anonymous_types(opt_normalise_anonymous, opt_reanonymise_anonymous,
opt_discard_naming_typedefs, root);
// Remove useless top-level qualifiers on function parameter and return
// types.
if (opt_remove_function_parameter_type_qualifiers)
{
std::unordered_map<std::string, std::string> qualifier_id_to_type_id;
build_qualifier_id_to_type_id_map(root, qualifier_id_to_type_id);
resolve_qualifier_chains(qualifier_id_to_type_id);
remove_function_parameter_type_qualifiers(root, qualifier_id_to_type_id);
}
// Prune unreachable elements and/or report untyped symbols.
size_t untyped_symbols = 0;
if (opt_prune_unreachable || opt_report_untyped || opt_abort_on_untyped)
untyped_symbols += handle_unreachable(
opt_prune_unreachable, opt_report_untyped, alias_map, root);
if (opt_abort_on_untyped && untyped_symbols)
{
std::cerr << "found " << untyped_symbols << " untyped symbols\n";
exit(1);
}
// Limit location information.
if (opt_locations > LocationInfo::COLUMN)
limit_locations(opt_locations, root);
// Clear unwanted non-reachable attributes.
if (opt_clear_non_reachable)
clear_non_reachable(root);
// Eliminate complete duplicates and extra fragments of types.
// Report conflicting duplicate defintions.
// Record whether there are conflicting duplicate definitions.
size_t conflicting_types = 0;
if (opt_eliminate_duplicates || opt_report_conflicts || opt_sort)
conflicting_types += handle_duplicate_types(
opt_eliminate_duplicates, opt_report_conflicts, root);
// Sort namespaces, types and declarations.
if (opt_sort)
{
if (conflicting_types)
std::cerr << "found type definition conflicts, skipping sort\n";
else
sort_namespaces_types_and_declarations(root);
}
// Drop empty subelements.
if (opt_drop_empty)
drop_empty(root);
// Reformat root element for human consumption.
format_xml(std::string(opt_indentation, ' '), std::string(), root);
// Open output for writing.
int out_fd = STDOUT_FILENO;
if (opt_output)
{
out_fd = open(opt_output, O_CREAT | O_TRUNC | O_WRONLY,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
if (out_fd < 0)
{
std::cerr << "could not open '" << opt_output << "' for writing: "
<< strerror(errno) << '\n';
exit(1);
}
}
// Write the XML.
//
// First to memory, as we need to do a little post-processing.
xmlChar* out_data;
int out_size;
xmlDocDumpMemory(document, &out_data, &out_size);
// Remove the XML declaration as it currently upsets abidiff.
xmlChar* out_limit = out_data + out_size;
while (out_data < out_limit && *out_data != '\n')
++out_data;
if (out_data < out_limit)
++out_data;
// Adjust quotes to match abidw.
adjust_quotes(out_data, out_limit);
// And now to a file.
size_t count = out_limit - out_data;
if (write(out_fd, out_data, count) != count)
{
std::cerr << "could not write output: " << strerror(errno) << '\n';
exit(1);
}
if (close(out_fd) < 0)
{
std::cerr << "could not close output: " << strerror(errno) << '\n';
exit(1);
}
// Free libxml document.
xmlFreeDoc(document);
return 0;
}