| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // -*- Mode: C++ -*- |
| // |
| // Copyright (C) 2021-2022 Google, Inc. |
| // |
| // Author: Giuliano Procida |
| |
| /// @file |
| /// |
| /// This file contains ABI XML manipulation routines and a main driver. |
| /// |
| /// The libxml Tree API is used. The XPath API is not used as it proved |
| /// to be many times slower than direct traversal but only slightly more |
| /// convenient. |
| |
| #include <fcntl.h> |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <array> |
| #include <cassert> |
| #include <cctype> |
| #include <cstring> |
| #include <fstream> |
| #include <functional> |
| #include <ios> |
| #include <iostream> |
| #include <map> |
| #include <optional> |
| #include <set> |
| #include <sstream> |
| #include <string> |
| #include <unordered_map> |
| #include <unordered_set> |
| #include <vector> |
| |
| #include <libxml/globals.h> |
| #include <libxml/parser.h> |
| #include <libxml/tree.h> |
| |
| /// Convenience typedef referring to a namespace scope. |
| using namespace_scope = std::vector<std::string>; |
| |
| /// Convenience typedef referring to a set of symbols. |
| using symbol_set = std::unordered_set<std::string>; |
| |
| /// Level of location information to preserve. |
| enum struct LocationInfo { COLUMN, LINE, FILE, NONE }; |
| |
| static const std::map<std::string, LocationInfo> LOCATION_INFO_NAME = { |
| {"column", LocationInfo::COLUMN}, |
| {"line", LocationInfo::LINE}, |
| {"file", LocationInfo::FILE}, |
| {"none", LocationInfo::NONE}, |
| }; |
| |
| static const std::map<std::string, std::string, std::less<>> NAMED_TYPES = { |
| {"enum-decl", "__anonymous_enum__"}, |
| {"class-decl", "__anonymous_struct__"}, |
| {"union-decl", "__anonymous_union__"}, |
| }; |
| |
| /// Compare optional strings. |
| /// |
| /// TODO: Obsoleted by C++20 std::optional::operator<=>. |
| /// |
| /// @param a first operand of comparison |
| /// |
| /// @param b second operand of comparison |
| /// |
| /// @return an integral result |
| int |
| compare_optional(const std::optional<std::string>& a, |
| const std::optional<std::string>& b) |
| { |
| int result = b.has_value() - a.has_value(); |
| if (result) |
| return result; |
| return a ? a.value().compare(b.value()) : 0; |
| } |
| |
| /// Cast a C string to a libxml string. |
| /// |
| /// @param str the C string (pointer) |
| /// |
| /// @return the same thing, as a type compatible with the libxml API |
| static const xmlChar* |
| to_libxml(const char* str) |
| { |
| return reinterpret_cast<const xmlChar*>(str); |
| } |
| |
| /// Cast a libxml string to C string. |
| /// |
| /// @param str the libxml string (pointer) |
| /// |
| /// @return the same thing, as a type compatible with the C library API |
| static const char* |
| from_libxml(const xmlChar* str) |
| { |
| return reinterpret_cast<const char*>(str); |
| } |
| |
| /// Get comment node corresponding to a given node if it exists. |
| /// |
| /// Returns nullptr if previous node does not exist or is not a comment, |
| /// otherwise returns the previous node. |
| /// |
| /// @param node the node for which comment has to be returned |
| /// |
| /// @return pointer to the comment node |
| static xmlNodePtr |
| get_comment_node(xmlNodePtr node) |
| { |
| xmlNodePtr previous_node = node->prev; |
| return previous_node && previous_node->type == XML_COMMENT_NODE |
| ? previous_node : nullptr; |
| } |
| |
| /// Remove a node from its document and free its storage. |
| /// |
| /// @param node the node to remove |
| static void |
| remove_node(xmlNodePtr node) |
| { |
| xmlUnlinkNode(node); |
| xmlFreeNode(node); |
| } |
| |
| /// Remove an XML element and any immediately preceding comment. |
| /// |
| /// @param node the element to remove |
| static void |
| remove_element(xmlNodePtr node) |
| { |
| if (auto comment_node = get_comment_node(node)) |
| remove_node(comment_node); |
| remove_node(node); |
| } |
| |
| /// Move a node to an element. |
| /// |
| /// @param node the node to move |
| /// |
| /// @param destination the destination element |
| static void |
| move_node(xmlNodePtr node, xmlNodePtr destination) |
| { |
| xmlUnlinkNode(node); |
| xmlAddChild(destination, node); |
| } |
| |
| /// Move an XML element and any immediately preceding comment to another |
| /// element. |
| /// |
| /// @param node the element to remove |
| /// |
| /// @param destination the destination element |
| static void |
| move_element(xmlNodePtr node, xmlNodePtr destination) |
| { |
| if (auto comment_node = get_comment_node(node)) |
| move_node(comment_node, destination); |
| move_node(node, destination); |
| } |
| |
| /// Get child nodes of given node. |
| /// |
| /// @param node the node whose children to fetch |
| /// |
| /// @return a vector of child nodes |
| static std::vector<xmlNodePtr> |
| get_children(xmlNodePtr node) |
| { |
| std::vector<xmlNodePtr> result; |
| for (xmlNodePtr child = node->children; child; child = child->next) |
| result.push_back(child); |
| return result; |
| } |
| |
| /// Fetch an attribute from a node. |
| /// |
| /// @param node the node |
| /// |
| /// @param name the attribute name |
| /// |
| /// @return the attribute value, if present |
| static std::optional<std::string> |
| get_attribute(xmlNodePtr node, const char* name) |
| { |
| std::optional<std::string> result; |
| xmlChar* attribute = xmlGetProp(node, to_libxml(name)); |
| if (attribute) |
| { |
| result = from_libxml(attribute); |
| xmlFree(attribute); |
| } |
| return result; |
| } |
| |
| /// Set an attribute value. |
| /// |
| /// @param node the node |
| /// |
| /// @param name the attribute name |
| /// |
| /// @param value the attribute value |
| static void |
| set_attribute(xmlNodePtr node, const char* name, |
| const std::string& value) |
| { |
| xmlSetProp(node, to_libxml(name), to_libxml(value.c_str())); |
| } |
| |
| /// Unset an attribute value. |
| /// |
| /// @param node the node |
| /// |
| /// @param name the attribute name |
| static void |
| unset_attribute(xmlNodePtr node, const char* name) |
| { |
| xmlUnsetProp(node, to_libxml(name)); |
| } |
| |
| /// Remove text nodes, recursively. |
| /// |
| /// This simplifies subsequent analysis and manipulation. Removing and |
| /// moving elements will destroy formatting anyway. The only remaining |
| /// node types should be elements and comments. |
| /// |
| /// @param node the node to process |
| static void |
| strip_text(xmlNodePtr node) |
| { |
| if (node->type == XML_TEXT_NODE) |
| remove_node(node); |
| else if (node->type == XML_ELEMENT_NODE) |
| for (xmlNodePtr child : get_children(node)) |
| strip_text(child); |
| } |
| |
| /// Add text before / after a node. |
| /// |
| /// @param node the node |
| /// |
| /// @param after whether the next should go after |
| /// |
| /// @param text the text |
| static void |
| add_text(xmlNodePtr node, bool after, const std::string& text) |
| { |
| xmlNodePtr text_node = xmlNewTextLen(to_libxml(text.data()), text.size()); |
| if (after) |
| xmlAddNextSibling(node, text_node); |
| else |
| xmlAddPrevSibling(node, text_node); |
| } |
| |
| /// Format an XML element by adding internal indentation and newlines. |
| /// |
| /// This makes the XML readable. |
| /// |
| /// @param indentation what to add to the line indentation prefix |
| /// |
| /// @param prefix the current line indentation prefix |
| /// |
| /// @param node the node to format |
| static void |
| format_xml(const std::string& indentation, std::string prefix, xmlNodePtr node) |
| { |
| std::vector<xmlNodePtr> children = get_children(node); |
| if (children.empty()) |
| return; |
| |
| // The ordering of operations here is incidental. The outcomes we want |
| // are: 1. an extra newline after the opening tag and indentation of |
| // the closing tag to match, and 2. indentation and newline for each |
| // child. |
| add_text(children[0], false, "\n"); |
| add_text(children[children.size() - 1], true, prefix); |
| prefix += indentation; |
| for (xmlNodePtr child : children) |
| { |
| add_text(child, false, prefix); |
| format_xml(indentation, prefix, child); |
| add_text(child, true, "\n"); |
| } |
| } |
| |
| /// Rewrite attributes using single quotes. |
| /// |
| /// libxml uses double quotes but libabigail uses single quotes. |
| /// |
| /// Note that libabigail does not emit attributes *containing* single |
| /// quotes and if it did it would escape them as " which libxml |
| /// would in turn preserve. However, the code here will handle all forms |
| /// of quotes, conservatively. |
| /// |
| /// Annotation comments can contain single quote characters so just |
| /// checking for any single quotes at all is insufficiently precise. |
| /// |
| /// @param start a pointer to the start of the XML text |
| /// |
| /// @param limit a pointer to just past the end of the XML text |
| static void |
| adjust_quotes(xmlChar* start, xmlChar* limit) |
| { |
| const std::string open{"<!--"}; |
| const std::string close{"-->"}; |
| while (start < limit) |
| { |
| // Look for a '<' |
| start = std::find(start, limit, '<'); |
| if (start == limit) |
| break; |
| if (start + open.size() < limit |
| && std::equal(open.begin(), open.end(), start)) |
| { |
| // Have a comment, skip to the end. |
| start += open.size(); |
| xmlChar* end = std::search(start, limit, close.begin(), close.end()); |
| if (end == limit) |
| break; |
| start = end + close.size(); |
| } |
| else |
| { |
| // Have some tag, search for the end. |
| start += 1; |
| xmlChar* end = std::find(start, limit, '>'); |
| if (end == limit) |
| break; |
| // In general, inside a tag we could find either ' or " being |
| // used to quote attributes and the other quote character |
| // being used as part of the attribute data. However, libxml's |
| // xmlDocDump* functions use " to quote attributes and it's |
| // safe to substitute this quote character with ' so long as ' |
| // does not appear within the attribute data. |
| if (std::find(start, end, '\'') == end) |
| for (xmlChar* c = start; c < end; ++c) |
| if (*c == '"') |
| *c = '\''; |
| start = end + 1; |
| } |
| } |
| } |
| |
| /// Compare given attribute of 2 XML nodes. |
| /// |
| /// @param attribute the attribute to compare |
| /// |
| /// @param a first XML node to compare |
| /// |
| /// @param b second XML node to compare |
| /// |
| /// @return an integral result |
| static int |
| compare_attributes( |
| const char* attribute, const xmlNodePtr& a, const xmlNodePtr& b) |
| { |
| return compare_optional(get_attribute(a, attribute), |
| get_attribute(b, attribute)); |
| } |
| |
| static const std::set<std::string> DROP_IF_EMPTY = { |
| "elf-variable-symbols", |
| "elf-function-symbols", |
| "namespace-decl", |
| "abi-instr", |
| "abi-corpus", |
| "abi-corpus-group", |
| }; |
| |
| /// Drop empty elements, if safe to do so, recursively. |
| /// |
| /// @param node the element to process |
| static void |
| drop_empty(xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| for (xmlNodePtr child : get_children(node)) |
| drop_empty(child); |
| // Do not drop the root element, even if empty. |
| if (node->parent->type == XML_DOCUMENT_NODE) |
| return; |
| if (!node->children && DROP_IF_EMPTY.count(from_libxml(node->name))) |
| remove_element(node); |
| } |
| |
| /// Get ELF symbol id. |
| /// |
| /// This is not an explicit attribute. It takes one of these forms: |
| /// |
| /// * name (if symbol is not versioned) |
| /// * name@version (if symbol is versioned but not the default version) |
| /// * name@@version (if symbol is versioned and the default version) |
| /// |
| /// @param node the elf-symbol element |
| /// |
| /// @return the ELF symbol id |
| static std::string |
| get_elf_symbol_id(xmlNodePtr node) |
| { |
| const auto name = get_attribute(node, "name"); |
| assert(name); |
| std::string result = name.value(); |
| const auto version = get_attribute(node, "version"); |
| if (version) |
| { |
| result += '@'; |
| const auto is_default = get_attribute(node, "is-default-version"); |
| if (is_default && is_default.value() == "yes") |
| result += '@'; |
| result += version.value(); |
| } |
| return result; |
| } |
| |
| static const std::set<std::string> HAS_LOCATION = { |
| "class-decl", |
| "enum-decl", |
| "function-decl", |
| "parameter", |
| "typedef-decl", |
| "union-decl", |
| "var-decl" |
| }; |
| |
| /// Limit location information. |
| /// |
| /// @param location_info the level of location information to retain |
| /// |
| /// @param node the element to process |
| static void |
| limit_locations(LocationInfo location_info, xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| if (HAS_LOCATION.count(from_libxml(node->name))) |
| { |
| if (location_info > LocationInfo::COLUMN) |
| { |
| unset_attribute(node, "column"); |
| if (location_info > LocationInfo::LINE) |
| { |
| unset_attribute(node, "line"); |
| if (location_info > LocationInfo::FILE) |
| unset_attribute(node, "filepath"); |
| } |
| } |
| } |
| for (xmlNodePtr child : get_children(node)) |
| limit_locations(location_info, child); |
| } |
| |
| /// Handle unreachable elements. |
| /// |
| /// Reachability is defined to be union of contains, containing and |
| /// refers-to relationships for types, declarations and symbols. The |
| /// roots for reachability are the ELF elements in the ABI. |
| /// |
| /// The subrange element requires special treatment. It has a useless |
| /// type id, but it is not a type and its type id aliases with that of |
| /// all other subranges of the same length. So don't treat it as a type. |
| /// |
| /// @param prune whether to prune unreachable elements |
| /// |
| /// @param report whether to report untyped symbols |
| /// |
| /// @param alias_map mapping from corpus to alias to main elf-symbol-id |
| /// |
| /// @param root the XML root element |
| /// |
| /// @return the number of untyped symbols |
| static size_t |
| handle_unreachable( |
| bool prune, bool report, |
| const std::unordered_map<xmlNodePtr, |
| std::unordered_map<std::string, |
| std::string>>& alias_map, |
| xmlNodePtr root) |
| { |
| // ELF symbol ids, per corpus. |
| std::set<std::pair<xmlNodePtr, std::string>> elf_symbol_ids; |
| |
| // Simple way of allowing two kinds of nodes: nullptr=>type, |
| // node=>symbol. |
| using vertex_t = std::pair<xmlNodePtr, std::string>; |
| |
| // Graph vertices. |
| std::set<vertex_t> vertices; |
| // Graph edges. |
| std::map<vertex_t, std::set<vertex_t>> edges; |
| |
| // Keep track of type / symbol nesting so we can identify contains, |
| // containing and refers-to relationships. |
| std::vector<vertex_t> stack; |
| |
| // Keep track of which corpus we are in as symbols and elf-symbol-ids are |
| // scoped per corpus. |
| xmlNodePtr current_corpus = nullptr; |
| |
| // Process an XML node, adding a vertex and possibly some edges. |
| std::function<void(xmlNodePtr)> process_node = [&](xmlNodePtr node) { |
| // We only care about elements and not comments, at this stage. |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| const char* node_name = from_libxml(node->name); |
| |
| // Is this a corpus? |
| if (strcmp(node_name, "abi-corpus") == 0) |
| current_corpus = node; |
| |
| // Is this an ELF symbol? |
| if (strcmp(node_name, "elf-symbol") == 0) |
| { |
| elf_symbol_ids.insert( |
| std::make_pair(current_corpus, get_elf_symbol_id(node))); |
| // Early return is safe, but not necessary. |
| return; |
| } |
| |
| // Is this a type? Note that the same id may appear multiple times. |
| const auto id = strcmp(node_name, "subrange") != 0 |
| ? get_attribute(node, "id") |
| : std::optional<std::string>(); |
| if (id) |
| { |
| vertex_t type_vertex{nullptr, id.value()}; |
| vertices.insert(type_vertex); |
| const auto naming_typedef_id = get_attribute(node, "naming-typedef-id"); |
| if (naming_typedef_id) |
| { |
| // This is an odd one, there can be a backwards link from an |
| // anonymous type to a typedef that refers to it. The -t |
| // option will drop these, but if they are still present, we |
| // should model the link to avoid the risk of dangling |
| // references. |
| vertex_t naming_typedef_vertex{nullptr, naming_typedef_id.value()}; |
| edges[type_vertex].insert(naming_typedef_vertex); |
| } |
| if (!stack.empty()) |
| { |
| // Parent<->child dependencies; record dependencies both |
| // ways to avoid holes in XML types and declarations. |
| const auto& parent = stack.back(); |
| edges[parent].insert(type_vertex); |
| edges[type_vertex].insert(parent); |
| } |
| // Record the type. |
| stack.push_back(type_vertex); |
| } |
| |
| // Is this a (declaration expected to be linked to a) symbol? |
| const auto symbol = get_attribute(node, "elf-symbol-id"); |
| if (symbol) |
| { |
| vertex_t symbol_vertex{current_corpus, symbol.value()}; |
| vertices.insert(symbol_vertex); |
| if (!stack.empty()) |
| { |
| // Parent<->child dependencies; record dependencies both |
| // ways to avoid making holes in XML types and declarations. |
| // |
| // Symbols exist outside of the type hierarchy, so choosing |
| // to make them depend on a containing type scope and vice |
| // versa is conservative and probably not necessary. |
| const auto& parent = stack.back(); |
| edges[parent].insert(symbol_vertex); |
| edges[symbol_vertex].insert(parent); |
| } |
| // Record the symbol. |
| stack.push_back(symbol_vertex); |
| // In practice there will be at most one symbol on the stack; we could |
| // verify this here, but it wouldn't achieve anything. |
| } |
| |
| // Being both would make the stack ordering ambiguous. |
| if (id && symbol) |
| { |
| std::cerr << "cannot handle element which is both type and symbol\n"; |
| exit(1); |
| } |
| |
| // Is there a reference to another type? |
| const auto type_id = get_attribute(node, "type-id"); |
| if (type_id && !stack.empty()) |
| { |
| // The enclosing type or symbol refers to another type. |
| const auto& parent = stack.back(); |
| vertex_t type_id_vertex{nullptr, type_id.value()}; |
| edges[parent].insert(type_id_vertex); |
| } |
| |
| // Process recursively. |
| for (auto child : get_children(node)) |
| process_node(child); |
| |
| // Restore the stack. |
| if (symbol) |
| stack.pop_back(); |
| if (id) |
| stack.pop_back(); |
| }; |
| |
| // Traverse the whole root element and build a graph. |
| process_node(root); |
| |
| // Simple DFS. |
| std::set<vertex_t> seen; |
| std::function<void(vertex_t)> dfs = [&](vertex_t vertex) { |
| if (!seen.insert(vertex).second) |
| return; |
| auto it = edges.find(vertex); |
| if (it != edges.end()) |
| for (auto to : it->second) |
| dfs(to); |
| }; |
| |
| // Count of how many symbols are untyped. |
| size_t untyped = 0; |
| |
| // Traverse the graph, starting from the ELF symbols. |
| for (const auto& [corpus, symbol_id] : elf_symbol_ids) |
| { |
| const auto corpus_it = alias_map.find(corpus); |
| assert(corpus_it != alias_map.end()); |
| const auto& corpus_alias_map = corpus_it->second; |
| const auto it = corpus_alias_map.find(symbol_id); |
| const auto& mapped_symbol_id = it != corpus_alias_map.end() |
| ? it->second : symbol_id; |
| |
| vertex_t symbol_vertex{corpus, mapped_symbol_id}; |
| if (vertices.count(symbol_vertex)) |
| { |
| dfs(symbol_vertex); |
| } |
| else |
| { |
| if (report) |
| std::cerr << "no declaration found for ELF symbol with id " |
| << symbol_id << '\n'; |
| ++untyped; |
| } |
| } |
| |
| // This is a DFS with early stopping. |
| std::function<void(xmlNodePtr)> remove_unseen = [&](xmlNodePtr node) { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| const char* node_name = from_libxml(node->name); |
| |
| // Is this a corpus? |
| if (strcmp(node_name, "abi-corpus") == 0) |
| current_corpus = node; |
| |
| // Return if we know that this is a type to keep or drop in its |
| // entirety. |
| const auto id = strcmp(node_name, "subrange") != 0 |
| ? get_attribute(node, "id") |
| : std::optional<std::string>(); |
| if (id) |
| { |
| if (!seen.count(vertex_t{nullptr, id.value()})) |
| remove_element(node); |
| return; |
| } |
| |
| // Return if we know that this is a declaration to keep or drop in |
| // its entirety. Note that var-decl and function-decl are the only |
| // elements that can have an elf-symbol-id attribute. |
| if (strcmp(node_name, "var-decl") == 0 |
| || strcmp(node_name, "function-decl") == 0) |
| { |
| const auto symbol = get_attribute(node, "elf-symbol-id"); |
| if (!(symbol && seen.count(vertex_t{current_corpus, symbol.value()}))) |
| remove_element(node); |
| return; |
| } |
| |
| // Otherwise, this is not a type, declaration or part thereof, so |
| // process child elements. |
| for (auto child : get_children(node)) |
| remove_unseen(child); |
| }; |
| |
| if (prune) |
| // Traverse the XML, removing unseen elements. |
| remove_unseen(root); |
| |
| return untyped; |
| } |
| |
| /// Tidy anonymous types in various ways. |
| /// |
| /// 1. Normalise anonymous type names by removing the numerical suffix. |
| /// |
| /// Anonymous type names take the form __anonymous_foo__N where foo is |
| /// one of enum, struct or union and N is an optional numerical suffix. |
| /// The suffices are senstive to processing order and do not convey |
| /// useful ABI information. They can cause spurious harmless diffs and |
| /// make XML diffing and rebasing harder. |
| /// |
| /// It's best to remove the suffix. |
| /// |
| /// 2. Reanonymise anonymous types that have been given names. |
| /// |
| /// A recent change to abidw changed its behaviour for any anonymous |
| /// type that has a naming typedef. In addition to linking the typedef |
| /// and type in both directions, the code now gives (some) anonymous |
| /// types the same name as the typedef. This misrepresents the original |
| /// types. |
| /// |
| /// Such types should be anonymous. |
| /// |
| /// 3. Discard naming typedef backlinks. |
| /// |
| /// The attribute naming-typedef-id is a backwards link from an |
| /// anonymous type to the typedef that refers to it. It is ignored by |
| /// abidiff. |
| /// |
| /// Unfortunately, libabigail sometimes conflates multiple anonymous |
| /// types that have naming typedefs and only one of the typedefs can |
| /// "win". ABI XML is thus sensitive to processing order and can also |
| /// end up containing definitions of an anonymous type with differing |
| /// naming-typedef-id attributes. |
| /// |
| /// It's best to just drop the attribute. |
| /// |
| /// @param node the XML node to process |
| static void |
| handle_anonymous_types(bool normalise, bool reanonymise, bool discard_naming, |
| xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| const auto it = NAMED_TYPES.find(from_libxml(node->name)); |
| if (it != NAMED_TYPES.end()) |
| { |
| const auto& anon = it->second; |
| const auto name_attribute = get_attribute(node, "name"); |
| const auto& name = |
| name_attribute ? name_attribute.value() : std::string(); |
| const auto anon_attr = get_attribute(node, "is-anonymous"); |
| const bool is_anon = anon_attr && anon_attr.value() == "yes"; |
| const auto naming_attribute = get_attribute(node, "naming-typedef-id"); |
| if (normalise && is_anon && name != anon) { |
| // __anonymous_foo__123 -> __anonymous_foo__ |
| set_attribute(node, "name", anon); |
| } |
| if (reanonymise && !is_anon && naming_attribute) { |
| // bar with naming typedef -> __anonymous_foo__ |
| set_attribute(node, "is-anonymous", "yes"); |
| set_attribute(node, "name", anon); |
| } |
| if (discard_naming && naming_attribute) |
| unset_attribute(node, "naming-typedef-id"); |
| } |
| |
| for (auto child : get_children(node)) |
| handle_anonymous_types(normalise, reanonymise, discard_naming, child); |
| } |
| |
| /// Builds a mapping from qualified types to the underlying type ids. |
| /// |
| /// Recursively constructs a mapping from qualified types to the underlying |
| /// type ids found in the XML tree rooted at the given node. |
| /// |
| /// @param node node of the XML tree to process |
| /// |
| /// @param qualifier_id_to_type_id map from qualified types to underlying type |
| /// ids being constructed |
| static void |
| build_qualifier_id_to_type_id_map( |
| const xmlNodePtr node, |
| std::unordered_map<std::string, std::string>& qualifier_id_to_type_id) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| if (strcmp(from_libxml(node->name), "qualified-type-def") == 0) |
| { |
| const auto id = get_attribute(node, "id"); |
| const auto type_id = get_attribute(node, "type-id"); |
| if (!id || !type_id) |
| { |
| std::cerr << "found qualified type definition with missing id and/or " |
| << "type id\nid: " << id.value_or("(missing)") |
| << "\ntype id: " << type_id.value_or("(missing)") << '\n'; |
| exit(1); |
| } |
| const auto& id_value = id.value(); |
| const auto& type_id_value = type_id.value(); |
| auto [it, inserted] = |
| qualifier_id_to_type_id.insert({id_value, type_id_value}); |
| if (!inserted && it->second != type_id_value) |
| { |
| std::cerr << "conflicting type ids ('" << it->second << "' & '" |
| << type_id_value << "') found for qualified type with " |
| << "id: " << id_value << '\n'; |
| exit(1); |
| } |
| } |
| else |
| { |
| for (auto child : get_children(node)) |
| build_qualifier_id_to_type_id_map(child, qualifier_id_to_type_id); |
| } |
| } |
| |
| /// Determine mapping from qualified type to underlying unqualified type. |
| /// |
| /// This resolves chains of qualifiers on qualified types. Note that this does |
| /// not attempt to look through typedefs. |
| /// |
| /// @param qualifier_id_to_type_id map from qualified types to underlying type |
| /// ids |
| static void |
| resolve_qualifier_chains( |
| std::unordered_map<std::string, std::string>& qualifier_id_to_type_id) |
| { |
| for (auto& [id, type_id] : qualifier_id_to_type_id) |
| { |
| std::unordered_set<std::string> seen; |
| while (true) |
| { |
| if (!seen.insert(type_id).second) |
| { |
| std::cerr << "dequalification of type with id '" << id |
| << "' ran into a self referencing loop\n"; |
| exit(1); |
| } |
| auto it = qualifier_id_to_type_id.find(type_id); |
| if (it == qualifier_id_to_type_id.end()) |
| break; |
| type_id = it->second; |
| } |
| } |
| } |
| |
| /// Removes top-level qualifiers from function parameter and return types. |
| /// |
| /// Recursively removes top-level qualifiers from parameter and return types of |
| /// all function declarations and function types found in the XML tree rooted |
| /// at the given node. |
| /// |
| /// This requires also requires a map of qualified types to the underlying type |
| /// ids, which enables the unqualification of qualified types. |
| /// |
| /// @param node node of the XML tree to process |
| /// |
| /// @param qualifier_id_to_type_id map from qualified types to underlying type |
| /// ids |
| static void |
| remove_function_parameter_type_qualifiers( |
| const xmlNodePtr node, |
| const std::unordered_map<std::string, std::string>& qualifier_id_to_type_id) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| if (strcmp(from_libxml(node->name), "function-decl") == 0 || |
| strcmp(from_libxml(node->name), "function-type") == 0) |
| { |
| bool type_changed = false; |
| for (auto child : get_children(node)) |
| if (const auto type_id = get_attribute(child, "type-id")) |
| { |
| const auto& type_id_value = type_id.value(); |
| auto it = qualifier_id_to_type_id.find(type_id_value); |
| if (it != qualifier_id_to_type_id.end()) |
| { |
| type_changed = true; |
| set_attribute(child, "type-id", it->second); |
| |
| // Parameter or return type has been modified, making a comment |
| // describing the type for this node inconsistent. Thus the |
| // comment must be removed if it exists. |
| if (auto comment_node = get_comment_node(child)) |
| remove_node(comment_node); |
| } |
| } |
| |
| if (type_changed) |
| { |
| // Parameter or return type has been modified, making a comment |
| // describing the type for this node inconsistent. Thus the comment |
| // must be removed if it exists. |
| if (auto comment_node = get_comment_node(node)) |
| remove_node(comment_node); |
| } |
| } |
| else |
| { |
| for (auto child : get_children(node)) |
| remove_function_parameter_type_qualifiers(child, qualifier_id_to_type_id); |
| } |
| } |
| |
| /// Remove attributes emitted by abidw --load-all-types. |
| /// |
| /// With this invocation and if any user-defined types are deemed |
| /// unreachable, libabigail will output a tracking-non-reachable-types |
| /// attribute on top-level elements and a is-non-reachable attribute on |
| /// each such type element. |
| /// |
| /// abitidy has its own graph-theoretic notion of reachability and these |
| /// attributes have no ABI relevance. |
| /// |
| /// It's best to just drop them. |
| /// |
| /// @param node the XML node to process |
| void |
| clear_non_reachable(xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| const char* node_name = from_libxml(node->name); |
| |
| if (strcmp(node_name, "abi-corpus-group") == 0 |
| || strcmp(node_name, "abi-corpus") == 0) |
| unset_attribute(node, "tracking-non-reachable-types"); |
| else if (NAMED_TYPES.find(node_name) != NAMED_TYPES.end()) |
| unset_attribute(node, "is-non-reachable"); |
| |
| for (auto child : get_children(node)) |
| clear_non_reachable(child); |
| } |
| |
| /// Determine the effective name of a given node. |
| /// |
| /// The effective name is same as the value of the 'name' attribute for all |
| /// nodes except nodes which represent anonymous types. For anonymous types, the |
| /// function returns std::nullopt. |
| /// |
| /// @param node the node for which effective name has to be determined |
| /// |
| /// @return an optional name string |
| std::optional<std::string> |
| get_effective_name(xmlNodePtr node) |
| { |
| return get_attribute(node, "is-anonymous") |
| ? std::nullopt : get_attribute(node, "name"); |
| } |
| |
| /// Record type ids for anonymous types that have to be renumbered. |
| /// |
| /// This constructs a map from the ids that need to be renumbered to the XML |
| /// node where the id is defined/declared. Also records hexadecimal hashes used |
| /// by non-anonymous types. |
| /// |
| /// @param node the node being processed |
| /// |
| /// @param to_renumber map from ids to be renumbered to corresponding XML node |
| /// |
| /// @param used_hashes set of hashes used by non-anonymous type ids |
| static void |
| record_ids_to_renumber( |
| xmlNodePtr node, |
| std::unordered_map<std::string, xmlNodePtr>& to_renumber, |
| std::unordered_set<size_t>& used_hashes) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| for (auto child : get_children(node)) |
| record_ids_to_renumber(child, to_renumber, used_hashes); |
| |
| const auto& id_attr = get_attribute(node, "id"); |
| if (!id_attr) |
| return; |
| |
| const auto& id = id_attr.value(); |
| const std::string_view node_name(from_libxml(node->name)); |
| const bool is_anonymous_type_candidate = NAMED_TYPES.count(node_name); |
| if (!is_anonymous_type_candidate || get_effective_name(node)) |
| { |
| const bool is_hexadecimal = std::all_of( |
| id.begin(), id.end(), [](unsigned char c){ return std::isxdigit(c); }); |
| if (id.size() == 8 && is_hexadecimal) |
| { |
| // Do not check for successful insertion since there can be multiple |
| // declarations/definitions for a type. |
| size_t hash = std::stoul(id, nullptr, 16); |
| used_hashes.insert(hash); |
| } |
| } |
| else |
| { |
| // Check for successful insertion since anonymous types are not prone to |
| // having multiple definitions/declarations. |
| if (!to_renumber.insert({id, node}).second) |
| { |
| std::cerr << "Found multiple definitions/declarations of anonmyous " |
| << "type with id: " << id << '\n'; |
| exit(1); |
| } |
| } |
| } |
| |
| /// Compute a stable string hash. |
| /// |
| /// This is the 32-bit FNV-1a algorithm. The algorithm, reference code |
| /// and constants are all unencumbered. It is fast and has reasonable |
| /// distribution properties. |
| /// |
| /// std::hash has no portability or stability guarantees so is |
| /// unsuitable where reproducibility is a requirement such as in XML |
| /// output. |
| /// |
| /// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function |
| /// |
| /// @param str the string to hash |
| /// |
| /// @return an unsigned 32 bit hash value |
| static uint32_t |
| fnv_hash(const std::string& str) |
| { |
| const uint32_t prime = 0x01000193; |
| const uint32_t offset_basis = 0x811c9dc5; |
| uint32_t hash = offset_basis; |
| for (const char& c : str) |
| { |
| uint8_t byte = c; |
| hash = hash ^ byte; |
| hash = hash * prime; |
| } |
| return hash; |
| } |
| |
| /// Generate a new 32 bit type id and return its hexadecimal representation. |
| /// |
| /// Generates hash of the given hash content. Uses linear probing to resolve |
| /// hash collisions. Also, records the newly generated hash in a set of used |
| /// hashes. |
| /// |
| /// @param hash_content the string which is used to generate a hash |
| /// |
| /// @param used_hashes the set of hashes which have already been used |
| /// |
| /// @return the hexadecimal representation of the newly generated hash |
| static std::string |
| generate_new_id(const std::string& hash_content, |
| std::unordered_set<size_t>& used_hashes) |
| { |
| auto hash = fnv_hash(hash_content); |
| while (!used_hashes.insert(hash).second) |
| ++hash; |
| std::ostringstream os; |
| os << std::hex << std::setfill('0') << std::setw(8) << hash; |
| return os.str(); |
| } |
| |
| /// Find the first member for a user defined type. |
| /// |
| /// The first member for enums is the first enumerator while for structs and |
| /// unions it is the variable declaration of the first data member. |
| /// |
| /// @param node the node being processed |
| /// |
| /// @return the node which represents the first member |
| static xmlNodePtr |
| find_first_member(xmlNodePtr node) |
| { |
| auto first_child_by_xml_node_name = |
| [](const xmlNodePtr node, const std::string_view name) -> xmlNodePtr { |
| for (auto child : get_children(node)) |
| if (child->type == XML_ELEMENT_NODE && from_libxml(child->name) == name) |
| return child; |
| return nullptr; |
| }; |
| |
| if (strcmp(from_libxml(node->name), "enum-decl") == 0) |
| return first_child_by_xml_node_name(node, "enumerator"); |
| if (auto data_member = first_child_by_xml_node_name(node, "data-member")) |
| return first_child_by_xml_node_name(data_member, "var-decl"); |
| return nullptr; |
| } |
| |
| /// Calculate new type id for a given old type id. |
| /// |
| /// This resolves the old type ids for anonymous types to new ones, while ids |
| /// which do not belong to anonymous types are returned as they are. |
| /// |
| /// @param type_id old type id |
| /// |
| /// @param to_renumber map from ids to be renumbered to corresponding XML node |
| /// |
| /// @param used_hashes set of hashes used by other type ids |
| /// |
| /// @param type_id_map mapping from old type ids to new ones |
| /// |
| /// @return resolved type id |
| static std::string |
| resolve_ids_to_renumber( |
| const std::string& type_id, |
| const std::unordered_map<std::string, xmlNodePtr>& to_renumber, |
| std::unordered_set<size_t>& used_hashes, |
| std::unordered_map<std::string, std::string>& type_id_map) |
| { |
| // Check whether the given type_id needs to be renumbered. If not, the type_id |
| // can be returned since it does not represent an anonymous type. |
| const auto to_renumber_it = to_renumber.find(type_id); |
| if (to_renumber_it == to_renumber.end()) |
| return type_id; |
| |
| // Insert an empty string placeholder to prevent infinite loops. |
| const auto& [type_mapping, inserted] = type_id_map.insert({type_id, {}}); |
| if (!inserted) |
| { |
| if (!type_mapping->second.empty()) |
| return type_mapping->second; |
| std::cerr << "new type id depends on itself for type with id: " |
| << type_id << '\n'; |
| exit(1); |
| } |
| |
| const auto& node = to_renumber_it->second; |
| std::ostringstream hash_content; |
| hash_content << from_libxml(node->name); |
| if (auto first_member = find_first_member(node)) |
| { |
| // Create hash content by combining the name & resolved type id of the |
| // first member and the kind of anonymous type. |
| if (auto name = get_effective_name(first_member)) |
| hash_content << '-' << name.value(); |
| if (auto type_id = get_attribute(first_member, "type-id")) |
| hash_content << '-' << resolve_ids_to_renumber( |
| type_id.value(), to_renumber, used_hashes, type_id_map); |
| } |
| else |
| { |
| // No member information available. Possibly type is empty. |
| hash_content << "__empty"; |
| } |
| |
| return type_mapping->second = |
| generate_new_id(hash_content.str(), used_hashes); |
| } |
| |
| /// Replace old type ids by new ones. |
| /// |
| /// @param node the node which is being processed |
| /// |
| /// @param type_id_map map from old type ids to replace to new ones |
| static void |
| renumber_type_ids( |
| xmlNodePtr node, |
| const std::unordered_map<std::string, std::string>& type_id_map) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| auto maybe_replace = [&](const char* attribute_name) { |
| const auto& attribute = get_attribute(node, attribute_name); |
| if (attribute) |
| { |
| const auto it = type_id_map.find(attribute.value()); |
| if (it != type_id_map.end()) |
| set_attribute(node, attribute_name, it->second); |
| } |
| }; |
| |
| maybe_replace("id"); |
| maybe_replace("type-id"); |
| maybe_replace("naming-typedef-id"); |
| |
| for (auto child : get_children(node)) |
| renumber_type_ids(child, type_id_map); |
| } |
| |
| /// Determine whether one XML element is a subtree of another. |
| /// |
| /// XML elements representing types are sometimes emitted multiple |
| /// times, identically. Also, member typedefs are sometimes emitted |
| /// separately from their types, resulting in duplicate XML fragments. |
| /// |
| /// Both these issues can be resolved by first detecting duplicate |
| /// occurrences of a given type id and then checking to see if there's |
| /// an instance that subsumes the others, which can then be eliminated. |
| /// |
| /// @param left the first element to compare |
| /// |
| /// @param right the second element to compare |
| /// |
| /// @return whether the first element is a subtree of the second |
| bool |
| sub_tree(xmlNodePtr left, xmlNodePtr right) |
| { |
| // The set of attributes that should be excluded from consideration when |
| // comparing XML elements. These attributes are either irrelevant for ABI |
| // monitoring or already handled by another check. |
| static const std::unordered_set<std::string> IRRELEVANT_ATTRIBUTES = { |
| // Source location information. This can vary between duplicate type |
| // definitions. |
| "filepath", |
| "line", |
| "column", |
| // Anonymous type to typedef backlinks. |
| "naming-typedef-id", |
| // Annotation that can appear with --load-all-types. |
| "is-non-reachable", |
| // Handled while checking for effective name equivalence. |
| "name", |
| "is-anonymous", |
| }; |
| |
| // Node names must match. |
| const char* left_name = from_libxml(left->name); |
| const char* right_name = from_libxml(right->name); |
| if (strcmp(left_name, right_name) != 0) |
| return false; |
| |
| // Effective names must match. |
| if (get_effective_name(left) != get_effective_name(right)) |
| return false; |
| |
| // Attributes may be missing on the left, but must match otherwise. |
| for (auto p = left->properties; p; p = p->next) |
| { |
| const char* attribute_name = from_libxml(p->name); |
| if (IRRELEVANT_ATTRIBUTES.count(attribute_name)) |
| continue; |
| // EXCEPTION: libabigail emits the access specifier for the type |
| // it's trying to "emit in scope" rather than for what may be a |
| // containing type; so allow member-type attribute access to differ. |
| if (strcmp(left_name, "member-type") == 0 |
| && strcmp(attribute_name, "access") == 0) |
| continue; |
| const auto left_value = get_attribute(left, attribute_name); |
| assert(left_value); |
| const auto right_value = get_attribute(right, attribute_name); |
| if (!right_value || left_value.value() != right_value.value()) |
| return false; |
| } |
| |
| // The left subelements must be a subsequence of the right ones. |
| xmlNodePtr left_child = xmlFirstElementChild(left); |
| xmlNodePtr right_child = xmlFirstElementChild(right); |
| while (left_child && right_child) |
| { |
| if (sub_tree(left_child, right_child)) |
| left_child = xmlNextElementSibling(left_child); |
| right_child = xmlNextElementSibling(right_child); |
| } |
| return !left_child; |
| } |
| |
| /// Eliminate non-conflicting / report conflicting duplicate definitions. |
| /// |
| /// This function can eliminate exact type duplicates and duplicates |
| /// where there is at least one maximal definition. It can report the |
| /// remaining, conflicting duplicate definitions. |
| /// |
| /// If a type has duplicate definitions in multiple namespace scopes or |
| /// definitions with different effective names, these are considered as |
| /// conflicting duplicate definitions and should not be reordered. This function |
| /// reports how many such types it finds. |
| /// |
| /// @param eliminate whether to eliminate non-conflicting duplicates |
| /// |
| /// @param report whether to report conflicting duplicate definitions |
| /// |
| /// @param root the root XML element |
| /// |
| /// @return the number of conflicting duplicate definitions |
| size_t handle_duplicate_types(bool eliminate, bool report, xmlNodePtr root) |
| { |
| // map of type-id to pair of set of namespace scopes and vector of |
| // xmlNodes |
| std::unordered_map< |
| std::string, |
| std::pair< |
| std::set<namespace_scope>, |
| std::vector<xmlNodePtr>>> types; |
| namespace_scope namespaces; |
| |
| // find all type occurrences |
| std::function<void(xmlNodePtr)> dfs = [&](xmlNodePtr node) { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| const char* node_name = from_libxml(node->name); |
| std::optional<std::string> namespace_name; |
| if (strcmp(node_name, "namespace-decl") == 0) |
| namespace_name = get_attribute(node, "name"); |
| if (namespace_name) |
| namespaces.push_back(namespace_name.value()); |
| if (strcmp(node_name, "abi-corpus-group") == 0 |
| || strcmp(node_name, "abi-corpus") == 0 |
| || strcmp(node_name, "abi-instr") == 0 |
| || namespace_name) |
| { |
| for (auto child : get_children(node)) |
| dfs(child); |
| } |
| else |
| { |
| const auto id = get_attribute(node, "id"); |
| if (id) |
| { |
| auto& info = types[id.value()]; |
| info.first.insert(namespaces); |
| info.second.push_back(node); |
| } |
| } |
| if (namespace_name) |
| namespaces.pop_back(); |
| }; |
| dfs(root); |
| |
| size_t conflicting_types = 0; |
| for (const auto& [id, scopes_and_definitions] : types) |
| { |
| const auto& [scopes, definitions] = scopes_and_definitions; |
| |
| if (scopes.size() > 1) |
| { |
| if (report) |
| std::cerr << "conflicting scopes found for type '" << id << "'\n"; |
| ++conflicting_types; |
| continue; |
| } |
| |
| const size_t count = definitions.size(); |
| if (count <= 1) |
| continue; |
| |
| // Find a potentially maximal candidate by scanning through and |
| // retaining the new definition if it's a supertree of the current |
| // candidate. |
| std::vector<bool> ok(count); |
| size_t candidate = 0; |
| ok[candidate] = true; |
| for (size_t ix = 1; ix < count; ++ix) |
| if (sub_tree(definitions[candidate], definitions[ix])) |
| { |
| candidate = ix; |
| ok[candidate] = true; |
| } |
| |
| // Verify the candidate is indeed maximal by scanning the |
| // definitions not already known to be subtrees of it. |
| bool bad = false; |
| const auto& candidate_definition = definitions[candidate]; |
| const char* candidate_node_name = from_libxml(candidate_definition->name); |
| const auto& candidate_effective_name = |
| get_effective_name(candidate_definition); |
| for (size_t ix = 0; ix < count; ++ix) |
| { |
| const auto& definition = definitions[ix]; |
| if (!ok[ix] && !sub_tree(definition, candidate_definition)) |
| { |
| if (strcmp(from_libxml(definition->name), candidate_node_name) != 0 |
| || get_effective_name(definition) != candidate_effective_name) |
| ++conflicting_types; |
| bad = true; |
| break; |
| } |
| } |
| |
| if (bad) |
| { |
| if (report) |
| std::cerr << "unresolvable duplicate definitions found for type '" |
| << id << "'\n"; |
| continue; |
| } |
| |
| if (eliminate) |
| // Remove all but the maximal definition. |
| for (size_t ix = 0; ix < count; ++ix) |
| if (ix != candidate) |
| remove_element(definitions[ix]); |
| } |
| |
| return conflicting_types; |
| } |
| |
| static const std::set<std::string> INSTR_VARIABLE_ATTRIBUTES = { |
| "path", |
| "comp-dir-path", |
| "language", |
| }; |
| |
| /// Collect elements of abi-instr elements by namespace. |
| /// |
| /// Namespaces are not returned but are recursively traversed with the |
| /// namespace stack being maintained. Other elements are associated with |
| /// the current namespace. |
| /// |
| /// @param nodes the nodes to traverse |
| /// |
| /// @return child elements grouped by namespace scope |
| static std::map<namespace_scope, std::vector<xmlNodePtr>> |
| get_children_by_namespace(const std::vector<xmlNodePtr>& nodes) |
| { |
| std::map<namespace_scope, std::vector<xmlNodePtr>> result; |
| namespace_scope scope; |
| |
| std::function<void(xmlNodePtr)> process = [&](xmlNodePtr node) { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| std::optional<std::string> namespace_name; |
| const char* node_name = from_libxml(node->name); |
| if (strcmp(node_name, "namespace-decl") == 0) |
| namespace_name = get_attribute(node, "name"); |
| if (namespace_name) |
| { |
| scope.push_back(namespace_name.value()); |
| for (auto child : get_children(node)) |
| process(child); |
| scope.pop_back(); |
| } |
| else |
| result[scope].push_back(node); |
| }; |
| |
| for (auto node : nodes) |
| for (auto child : get_children(node)) |
| process(child); |
| return result; |
| } |
| |
| /// Determine whether an element contains an elf-symbol-id attribute. |
| /// |
| /// @param node the node to examine recursively |
| /// |
| /// @return whether or not an elf-symbol-id attribute was found |
| static bool |
| contains_elf_symbol_id(xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return false; |
| if (get_attribute(node, "elf-symbol-id")) |
| return true; |
| for (auto child : get_children(node)) |
| if (contains_elf_symbol_id(child)) |
| return true; |
| return false; |
| } |
| |
| /// Sort instrs into a corpus. |
| /// |
| /// The given instrs (grouped by source corpus) are sorted and moved |
| /// into the destination corpus, except that elements containing |
| /// symbol-linked declarations are not moved between corpora. |
| /// |
| /// This loses annotations (XML comments) on namespace-decl elements. |
| /// It would have been a fair amount of extra work to preserve them. |
| /// |
| /// @param where the XML abi-corpus element into which to move elements |
| /// |
| /// @param instrs a list of pairs of containing corpus and XML abi-instr |
| /// element out of which to move elements |
| static void |
| sort_instrs_into_corpus( |
| xmlNodePtr where, |
| const std::vector<std::pair<xmlNodePtr, std::vector<xmlNodePtr>>>& instrs) |
| { |
| if (instrs.empty()) |
| return; |
| |
| // Collect the attributes of all the instrs. |
| std::map<std::string, std::set<std::string>> attributes; |
| for (const auto& [corpus, corpus_instrs] : instrs) |
| for (const auto& instr : corpus_instrs) |
| for (auto p = instr->properties; p; p = p->next) |
| { |
| // This is horrible. There should be a better way of iterating. |
| const char* attribute_name = from_libxml(p->name); |
| const auto attribute_value = get_attribute(instr, attribute_name); |
| assert(attribute_value); |
| attributes[attribute_name].insert(attribute_value.value()); |
| } |
| |
| // Create and attach a replacement instr and populate its attributes. |
| xmlNodePtr replacement = |
| xmlAddChild(where, xmlNewNode(nullptr, to_libxml("abi-instr"))); |
| for (const auto& attribute : attributes) |
| { |
| const char* attribute_name = attribute.first.c_str(); |
| const auto& attribute_values = attribute.second; |
| if (attribute_values.size() == 1) |
| set_attribute(replacement, attribute_name, *attribute_values.begin()); |
| else if (INSTR_VARIABLE_ATTRIBUTES.count(attribute_name)) |
| set_attribute(replacement, attribute_name, "various"); |
| else |
| { |
| std::cerr << "unexpectedly variable abi-instr attribute '" |
| << attribute_name << "'\n"; |
| remove_node(replacement); |
| return; |
| } |
| } |
| |
| // Order XML nodes by XML element names, effective names, mangled names and |
| // type ids. |
| struct Compare { |
| int |
| cmp(xmlNodePtr a, xmlNodePtr b) const |
| { |
| int result; |
| |
| // Compare XML element names. |
| result = strcmp(from_libxml(a->name), from_libxml(b->name)); |
| if (result) |
| return result; |
| |
| // Compare effective names. |
| const auto a_effective_name = get_effective_name(a); |
| const auto b_effective_name = get_effective_name(b); |
| |
| result = compare_optional(a_effective_name, b_effective_name); |
| if (result) |
| return result; |
| |
| // Compare declarations using mangled names. |
| result = compare_attributes("mangled-name", a, b); |
| if (result) |
| return result; |
| |
| // Compare types using ids. |
| return compare_attributes("id", a, b); |
| } |
| |
| bool |
| operator()(xmlNodePtr a, xmlNodePtr b) const |
| { |
| return cmp(a, b) < 0; |
| } |
| }; |
| |
| // Collect the child elements of all the instrs, by namespace scope. |
| std::map<namespace_scope, std::vector<xmlNodePtr>> scoped_children; |
| std::unordered_map<xmlNodePtr, xmlNodePtr> child_corpus; |
| for (const auto& [corpus, corpus_instrs] : instrs) |
| for (const auto& [scope, children] : get_children_by_namespace(corpus_instrs)) |
| { |
| auto& these_scoped_children = scoped_children[scope]; |
| for (auto child : children) |
| { |
| these_scoped_children.push_back(child); |
| child_corpus[child] = corpus; |
| } |
| } |
| for (auto& [scope, children] : scoped_children) |
| // Sort the children, preserving order of duplicates. |
| std::stable_sort(children.begin(), children.end(), Compare()); |
| |
| // Create namespace elements on demand. The global namespace, with |
| // empty scope, is just the replacement instr itself. |
| std::map<namespace_scope, xmlNodePtr> namespace_elements{{{}, replacement}}; |
| std::function<xmlNodePtr(const namespace_scope&)> get_namespace_element = |
| [&](const namespace_scope& scope) { |
| auto insertion = namespace_elements.insert({scope, nullptr}); |
| if (insertion.second) |
| { |
| // Insertion succeeded, so the scope cannot be empty. |
| namespace_scope truncated = scope; |
| truncated.pop_back(); |
| xmlNodePtr parent = get_namespace_element(truncated); |
| // We can now create an XML element in the right place. |
| xmlNodePtr child = xmlNewNode(nullptr, to_libxml("namespace-decl")); |
| set_attribute(child, "name", scope.back()); |
| xmlAddChild(parent, child); |
| insertion.first->second = child; |
| } |
| return insertion.first->second; |
| }; |
| |
| // Move each child to the replacement instr or namespace subelement |
| // thereof, unless the child would move between corpora and is or |
| // contains a symbol-linked declaration. |
| for (const auto& [scope, elements] : scoped_children) |
| { |
| xmlNodePtr namespace_element = get_namespace_element(scope); |
| for (auto element : elements) |
| if (child_corpus[element] == where || !contains_elf_symbol_id(element)) |
| move_element(element, namespace_element); |
| } |
| |
| // Remove each original instr if now effectively empty. |
| for (const auto& [corpus, corpus_instrs] : instrs) |
| for (auto instr : corpus_instrs) |
| if (get_children_by_namespace({instr}).empty()) |
| remove_node(instr); |
| |
| // Remove the replacement if it wasn't used. |
| if (get_children(replacement).empty()) |
| remove_node(replacement); |
| } |
| |
| /// Get corpora instrs. |
| /// |
| /// @param corpora a vector of corpus elements |
| /// |
| /// @return a vector of pairs of corpus and contained instr elements |
| std::vector<std::pair<xmlNodePtr, std::vector<xmlNodePtr>>> |
| get_corpora_instrs(const std::vector<xmlNodePtr>& corpora) |
| { |
| std::vector<std::pair<xmlNodePtr, std::vector<xmlNodePtr>>> result; |
| for (auto corpus : corpora) |
| { |
| result.push_back({corpus, {}}); |
| auto& corpus_instrs = result.back().second; |
| for (auto instr : get_children(corpus)) |
| if (strcmp(from_libxml(instr->name), "abi-instr") == 0) |
| corpus_instrs.push_back(instr); |
| } |
| return result; |
| } |
| |
| /// Sort namespaces, types and declarations. |
| /// |
| /// @param root the XML root element |
| static void |
| sort_namespaces_types_and_declarations(xmlNodePtr root) |
| { |
| // There are (currently) 2 ABI formats we handle here. |
| // |
| // 1. An abi-corpus containing one or more abi-instr. In this case, we |
| // move all namespaces, types and declarations to a replacement |
| // abi-instr at the end of the abi-corpus. |
| // |
| // 2. An abi-corpus-group containing one or more abi-corpus each |
| // containing zero or more abi-instr (with at least one abi-instr |
| // altogether). In this case all the corpora are sorted together into |
| // a replacement abi-instr created within the first corpus, except |
| // that symbol-linked declarations in subsequent corpora are not moved. |
| // |
| // Anything else is left alone. For example, single abi-instr elements |
| // are present in some libabigail test suite files. |
| |
| // We first need to identify where to place the new abi-instr and |
| // collect all the abi-instr to process. |
| const char* root_name = from_libxml(root->name); |
| if (strcmp(root_name, "abi-corpus-group") == 0) |
| { |
| // Process all corpora in a corpus group together. |
| std::vector<xmlNodePtr> corpora; |
| xmlNodePtr first = nullptr; |
| for (auto corpus : get_children(root)) |
| if (strcmp(from_libxml(corpus->name), "abi-corpus") == 0) |
| { |
| if (!first) |
| first = corpus; |
| corpora.push_back(corpus); |
| } |
| if (first) |
| sort_instrs_into_corpus(first, get_corpora_instrs(corpora)); |
| // An extra pass to sort whatever may have been left behind. |
| for (auto corpus : corpora) |
| if (corpus != first) |
| sort_instrs_into_corpus(corpus, get_corpora_instrs({corpus})); |
| } |
| else if (strcmp(root_name, "abi-corpus") == 0) |
| sort_instrs_into_corpus(root, get_corpora_instrs({root})); |
| } |
| |
| static constexpr std::array<std::string_view, 2> SYMBOL_SECTION_SUFFICES = { |
| "symbol_list", |
| "whitelist", |
| }; |
| |
| /// Read symbols from a file. |
| /// |
| /// This aims to be compatible with the .ini format used by libabigail |
| /// for suppression specifications and symbol lists. All symbol list |
| /// sections in the given file are combined into a single set of |
| /// symbols. |
| /// |
| /// @param filename the name of the file from which to read |
| /// |
| /// @return a set of symbols |
| symbol_set |
| read_symbols(const char* filename) |
| { |
| symbol_set symbols; |
| std::ifstream file(filename); |
| if (!file) |
| { |
| std::cerr << "error opening symbol file '" << filename << "'\n"; |
| exit(1); |
| } |
| |
| bool in_symbol_section = false; |
| std::string line; |
| while (std::getline(file, line)) |
| { |
| size_t start = 0; |
| size_t limit = line.size(); |
| // Strip comments and leading / trailing whitespace. |
| while (start < limit) |
| { |
| if (std::isspace(line[start])) |
| ++start; |
| else if (line[start] == '#') |
| start = limit; |
| else |
| break; |
| } |
| while (start < limit) |
| { |
| if (std::isspace(line[limit - 1])) |
| --limit; |
| else |
| break; |
| } |
| // Skip empty lines. |
| if (start == limit) |
| continue; |
| // See if we are entering a symbol list section. |
| if (line[start] == '[' && line[limit - 1] == ']') |
| { |
| std::string_view section(&line[start + 1], limit - start - 2); |
| bool found = false; |
| for (const auto& suffix : SYMBOL_SECTION_SUFFICES) |
| if (section.size() >= suffix.size() |
| && section.substr(section.size() - suffix.size()) == suffix) |
| { |
| found = true; |
| break; |
| } |
| in_symbol_section = found; |
| continue; |
| } |
| // Add symbol. |
| if (in_symbol_section) |
| symbols.insert(std::string(&line[start], limit - start)); |
| } |
| if (!file.eof()) |
| { |
| std::cerr << "error reading symbol file '" << filename << "'\n"; |
| exit(1); |
| } |
| return symbols; |
| } |
| |
| /// Get aliases from XML node. |
| /// |
| /// @param node the XML node to process |
| /// |
| /// @return an ordered set of aliases |
| std::set<std::string> |
| get_aliases(xmlNodePtr node) |
| { |
| std::set<std::string> aliases; |
| const auto alias = get_attribute(node, "alias"); |
| if (alias) |
| { |
| std::istringstream is(alias.value()); |
| std::string item; |
| while (std::getline(is, item, ',')) |
| aliases.insert(item); |
| } |
| return aliases; |
| } |
| |
| /// Set aliases in XML node. |
| /// |
| /// @param node the XML node to process |
| /// |
| /// @param aliases an ordered set of aliases |
| void |
| set_aliases(xmlNodePtr node, const std::set<std::string>& aliases) |
| { |
| if (aliases.empty()) |
| { |
| unset_attribute(node, "alias"); |
| } |
| else |
| { |
| std::ostringstream os; |
| bool first = true; |
| for (const auto& alias : aliases) |
| { |
| if (first) |
| first = false; |
| else |
| os << ','; |
| os << alias; |
| } |
| set_attribute(node, "alias", os.str()); |
| } |
| } |
| |
| /// Gather information about symbols and record alias <-> main mappings. |
| /// |
| /// @param symbol_map a map from elf-symbol-id to XML node |
| /// |
| /// @param alias_map a map from alias elf-symbol-id to main |
| /// |
| /// @param main_map a map from main elf-symbol-id to aliases |
| /// |
| /// @param node the XML node to process |
| void |
| process_symbols( |
| std::unordered_map<std::string, xmlNodePtr>& symbol_map, |
| std::unordered_map<std::string, std::string>& alias_map, |
| std::unordered_map<std::string, std::set<std::string>>& main_map, |
| xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| const char* node_name = from_libxml(node->name); |
| if (strcmp(node_name, "abi-corpus-group") == 0) |
| { |
| std::cerr << "symbol processing must be per corpus\n"; |
| exit(1); |
| } |
| else if (strcmp(node_name, "abi-corpus") == 0 |
| || strcmp(node_name, "elf-variable-symbols") == 0 |
| || strcmp(node_name, "elf-function-symbols") == 0) |
| { |
| // Process children. |
| for (auto child : get_children(node)) |
| process_symbols(symbol_map, alias_map, main_map, child); |
| } |
| else if (strcmp(node_name, "elf-symbol") == 0) |
| { |
| const auto id = get_elf_symbol_id(node); |
| if (!symbol_map.insert({id, node}).second) |
| { |
| std::cerr << "multiple symbols with id " << id << "\n"; |
| exit(1); |
| } |
| const auto aliases = get_aliases(node); |
| for (const auto& alias : aliases) |
| if (!alias_map.insert({alias, id}).second) |
| { |
| std::cerr << "multiple aliases with id " << alias << "\n"; |
| exit(1); |
| } |
| if (!aliases.empty()) |
| main_map.insert({id, aliases}); |
| } |
| } |
| |
| /// Rewrite elf-symbol-id attributes following ELF symbol removal. |
| /// |
| /// @param mapping map from old to new elf-symbol-id, if any |
| void |
| rewrite_symbols_in_declarations( |
| const std::unordered_map<std::string, std::optional<std::string>>& mapping, |
| xmlNodePtr node) |
| { |
| if (node->type != XML_ELEMENT_NODE) |
| return; |
| |
| const char* node_name = from_libxml(node->name); |
| if (strcmp(node_name, "var-decl") == 0 |
| || strcmp(node_name, "function-decl") == 0) |
| { |
| auto symbol = get_attribute(node, "elf-symbol-id"); |
| bool changed = false; |
| while (symbol) |
| { |
| const auto it = mapping.find(symbol.value()); |
| if (it == mapping.end()) |
| break; |
| symbol = it->second; |
| changed = true; |
| } |
| if (changed) |
| { |
| if (symbol) |
| set_attribute(node, "elf-symbol-id", symbol.value()); |
| else |
| unset_attribute(node, "elf-symbol-id"); |
| } |
| } |
| |
| for (xmlNodePtr child : get_children(node)) |
| rewrite_symbols_in_declarations(mapping, child); |
| } |
| |
| /// Remove unlisted ELF symbols. |
| /// |
| /// @param symbols the set of symbols |
| /// |
| /// @param corpus the XML corpus element |
| /// |
| /// @return mapping from alias to main elf-symbol-id |
| std::unordered_map<std::string, std::string> |
| filter_corpus_symbols(const std::optional<symbol_set>& symbols, |
| xmlNodePtr corpus) |
| { |
| // find symbols and record alias <-> main mappings |
| std::unordered_map<std::string, xmlNodePtr> symbol_map; |
| std::unordered_map<std::string, std::string> alias_map; |
| std::unordered_map<std::string, std::set<std::string>> main_map; |
| process_symbols(symbol_map, alias_map, main_map, corpus); |
| // check that aliases and main symbols are disjoint |
| for (const auto& [alias, main] : alias_map) |
| if (alias_map.count(main)) |
| { |
| std::cerr << "found main symbol and alias with id " << main << '\n'; |
| exit(1); |
| } |
| |
| if (!symbols) |
| return alias_map; |
| |
| // Track when an alias is promoted to a main symbol or a symbol is deleted as |
| // these are the cases when we need update references to symbols in |
| // declarations. |
| std::unordered_map<std::string, std::optional<std::string>> mapping; |
| |
| // filter the symbols, preserving those listed |
| for (const auto& [id, node] : symbol_map) |
| { |
| const auto name = get_attribute(node, "name"); |
| assert(name); |
| if (symbols->count(name.value())) |
| continue; |
| remove_element(node); |
| |
| // The symbol has been removed, so remove its id from the alias <-> main |
| // mappings, promoting another alias to main symbol if needed, and |
| // updating XML alias attributes. |
| // |
| // There are 3 cases: |
| // a main symbol - with one or more aliases |
| // an alias - with a main symbol |
| // an unaliased symbol |
| if (const auto main_it = main_map.find(id); |
| main_it != main_map.end()) |
| { |
| // A main symbol with one or more aliases. |
| std::set<std::string> aliases; |
| std::swap(aliases, main_it->second); |
| main_map.erase(main_it); |
| // the first alias will be the new main symbol |
| const auto first_it = aliases.begin(); |
| assert(first_it != aliases.end()); |
| const auto first = *first_it; |
| // remove first from the list of aliases and its link to id |
| aliases.erase(first_it); |
| alias_map.erase(first); |
| if (!aliases.empty()) |
| { |
| // update the XML attribute |
| set_aliases(symbol_map[first], aliases); |
| // update the maps |
| for (const auto& alias : aliases) |
| alias_map[alias] = first; |
| std::swap(aliases, main_map[first]); |
| } |
| // declarations referring to id must be repointed at first |
| mapping[id] = {first}; |
| } |
| else if (const auto alias_it = alias_map.find(id); |
| alias_it != alias_map.end()) |
| { |
| // An alias with a main symbol. |
| const auto main = alias_it->second; |
| auto& aliases = main_map[main]; |
| // remove id from the maps |
| alias_map.erase(alias_it); |
| aliases.erase(id); |
| // update the XML attribute |
| set_aliases(symbol_map[main], aliases); |
| if (aliases.empty()) |
| // main hasn't changed but is no longer aliased |
| main_map.erase(main); |
| } |
| else |
| { |
| // An unaliased symbol. |
| // |
| // declaration references to id must be removed |
| mapping[id] = {}; |
| } |
| } |
| |
| rewrite_symbols_in_declarations(mapping, corpus); |
| |
| return alias_map; |
| } |
| |
| /// Remove unlisted ELF symbols. |
| /// |
| /// @param symbols the set of symbols |
| /// |
| /// @param root the XML root element |
| /// |
| /// @return mapping from corpus to alias to main elf-symbol-id |
| std::unordered_map<xmlNodePtr, std::unordered_map<std::string, std::string>> |
| filter_symbols(const std::optional<symbol_set>& symbols, xmlNodePtr root) |
| { |
| std::unordered_map<xmlNodePtr, std::unordered_map<std::string, std::string>> |
| result; |
| const char* node_name = from_libxml(root->name); |
| if (strcmp(node_name, "abi-corpus-group") == 0) |
| { |
| for (auto child : get_children(root)) |
| result[child] = filter_corpus_symbols(symbols, child); |
| } |
| else if (strcmp(node_name, "abi-corpus") == 0) |
| { |
| result[root] = filter_corpus_symbols(symbols, root); |
| } |
| else |
| { |
| std::cerr << "unexpected root element: " << node_name << '\n'; |
| exit(1); |
| } |
| return result; |
| } |
| |
| /// Main program. |
| /// |
| /// Read and write ABI XML, with optional processing passes. |
| /// |
| /// @param argc argument count |
| /// |
| /// @param argv argument vector |
| /// |
| /// @return exit status |
| int |
| main(int argc, char* argv[]) |
| { |
| // Defaults. |
| const char* opt_input = nullptr; |
| const char* opt_output = nullptr; |
| std::optional<symbol_set> opt_symbols; |
| LocationInfo opt_locations = LocationInfo::COLUMN; |
| int opt_indentation = 2; |
| bool opt_normalise_anonymous = false; |
| bool opt_reanonymise_anonymous = false; |
| bool opt_discard_naming_typedefs = false; |
| bool opt_remove_function_parameter_type_qualifiers = false; |
| bool opt_prune_unreachable = false; |
| bool opt_report_untyped = false; |
| bool opt_abort_on_untyped = false; |
| bool opt_clear_non_reachable = false; |
| bool opt_eliminate_duplicates = false; |
| bool opt_report_conflicts = false; |
| bool opt_sort = false; |
| bool opt_drop_empty = false; |
| |
| // Experimental flags. These are not part of --all. |
| // |
| // TODO: Move out of experimental status when stable. |
| bool opt_renumber_anonymous_types = false; |
| |
| // Process command line. |
| auto usage = [&]() -> int { |
| std::cerr << "usage: " << argv[0] << '\n' |
| << " [-i|--input file]\n" |
| << " [-o|--output file]\n" |
| << " [-S|--symbols file]\n" |
| << " [-L|--locations {column|line|file|none}]\n" |
| << " [-I|--indentation n]\n" |
| << " [-a|--all] (implies -n -r -t -f -p -u -b -e -c -s -d)\n" |
| << " [-n|--[no-]normalise-anonymous]\n" |
| << " [-r|--[no-]reanonymise-anonymous]\n" |
| << " [-t|--[no-]discard-naming-typedefs]\n" |
| << " [-f|--[no-]remove-function-parameter-type-qualifiers]\n" |
| << " [-p|--[no-]prune-unreachable]\n" |
| << " [-u|--[no-]report-untyped]\n" |
| << " [-U|--abort-on-untyped-symbols]\n" |
| << " [-b|--[no-]clear-non-reachable]\n" |
| << " [-e|--[no-]eliminate-duplicates]\n" |
| << " [-c|--[no-]report-conflicts]\n" |
| << " [-s|--[no-]sort]\n" |
| << " [-d|--[no-]drop-empty]\n" |
| << "\nExperimental flags, not part of --all\n" |
| << " [-M|--[no-]renumber-anonymous-types]\n"; |
| return 1; |
| }; |
| int opt_index = 1; |
| auto get_arg = [&]() { |
| if (opt_index < argc) |
| return argv[opt_index++]; |
| exit(usage()); |
| }; |
| while (opt_index < argc) |
| { |
| const std::string arg = get_arg(); |
| if (arg == "-i" || arg == "--input") |
| opt_input = get_arg(); |
| else if (arg == "-o" || arg == "--output") |
| opt_output = get_arg(); |
| else if (arg == "-S" || arg == "--symbols") |
| opt_symbols = read_symbols(get_arg()); |
| else if (arg == "-L" || arg == "--locations") |
| { |
| auto it = LOCATION_INFO_NAME.find(get_arg()); |
| if (it == LOCATION_INFO_NAME.end()) |
| exit(usage()); |
| opt_locations = it->second; |
| } |
| else if (arg == "-I" || arg == "--indentation") |
| { |
| std::istringstream is(get_arg()); |
| is >> std::noskipws >> opt_indentation; |
| if (!is || !is.eof() || opt_indentation < 0) |
| exit(usage()); |
| } |
| else if (arg == "-a" || arg == "--all") |
| opt_normalise_anonymous = opt_reanonymise_anonymous |
| = opt_discard_naming_typedefs |
| = opt_remove_function_parameter_type_qualifiers |
| = opt_prune_unreachable |
| = opt_report_untyped |
| = opt_clear_non_reachable |
| = opt_eliminate_duplicates |
| = opt_report_conflicts |
| = opt_sort |
| = opt_drop_empty |
| = true; |
| else if (arg == "-n" || arg == "--normalise-anonymous") |
| opt_normalise_anonymous = true; |
| else if (arg == "--no-normalise-anonymous") |
| opt_normalise_anonymous = false; |
| else if (arg == "-r" || arg == "--reanonymise-anonymous") |
| opt_reanonymise_anonymous = true; |
| else if (arg == "--no-reanonymise-anonymous") |
| opt_reanonymise_anonymous = false; |
| else if (arg == "-t" || arg == "--discard-naming-typedefs") |
| opt_discard_naming_typedefs = true; |
| else if (arg == "--no-discard-naming-typedefs") |
| opt_discard_naming_typedefs = false; |
| else if (arg == "-f" || |
| arg == "--remove-function-parameter-type-qualifiers") |
| opt_remove_function_parameter_type_qualifiers = true; |
| else if (arg == "--no-remove-function-parameter-type-qualifiers") |
| opt_remove_function_parameter_type_qualifiers = false; |
| else if (arg == "-p" || arg == "--prune-unreachable") |
| opt_prune_unreachable = true; |
| else if (arg == "--no-prune-unreachable") |
| opt_prune_unreachable = false; |
| else if (arg == "-u" || arg == "--report-untyped") |
| opt_report_untyped = true; |
| else if (arg == "--no-report-untyped") |
| opt_report_untyped = false; |
| else if (arg == "-U" || arg == "--abort-on-untyped-symbols") |
| opt_abort_on_untyped = true; |
| else if (arg == "-b" || arg == "--clear-non-reachable") |
| opt_clear_non_reachable = true; |
| else if (arg == "--no-clear-non-reachable") |
| opt_clear_non_reachable = false; |
| else if (arg == "-e" || arg == "--eliminate-duplicates") |
| opt_eliminate_duplicates = true; |
| else if (arg == "--no-eliminate-duplicates") |
| opt_eliminate_duplicates = false; |
| else if (arg == "-c" || arg == "--report-conflicts") |
| opt_report_conflicts = true; |
| else if (arg == "--no-report-conflicts") |
| opt_report_conflicts = false; |
| else if (arg == "-s" || arg == "--sort") |
| opt_sort = true; |
| else if (arg == "--no-sort") |
| opt_sort = false; |
| else if (arg == "-d" || arg == "--drop-empty") |
| opt_drop_empty = true; |
| else if (arg == "--no-drop-empty") |
| opt_drop_empty = false; |
| else if (arg == "-M" || arg == "--renumber-anonymous-types") |
| opt_renumber_anonymous_types = true; |
| else if (arg == "--no-renumber-anonymous-types") |
| opt_renumber_anonymous_types = false; |
| else |
| exit(usage()); |
| } |
| |
| // Open input for reading. |
| int in_fd = STDIN_FILENO; |
| if (opt_input) |
| { |
| in_fd = open(opt_input, O_RDONLY); |
| if (in_fd < 0) |
| { |
| std::cerr << "could not open '" << opt_input << "' for reading: " |
| << strerror(errno) << '\n'; |
| exit(1); |
| } |
| } |
| |
| // Read the XML. |
| xmlParserCtxtPtr parser_context = xmlNewParserCtxt(); |
| xmlDocPtr document |
| = xmlCtxtReadFd(parser_context, in_fd, nullptr, nullptr, 0); |
| if (!document) |
| { |
| std::cerr << "failed to parse input as XML\n"; |
| exit(1); |
| } |
| xmlFreeParserCtxt(parser_context); |
| close(in_fd); |
| |
| // Get the root element. |
| xmlNodePtr root = xmlDocGetRootElement(document); |
| if (!root) |
| { |
| std::cerr << "XML document has no root element\n"; |
| exit(1); |
| } |
| |
| // Strip text nodes to simplify other operations. |
| strip_text(root); |
| |
| // Get corpus -> alias -> main mapping and remove unlisted symbols. |
| const auto alias_map = filter_symbols(opt_symbols, root); |
| |
| // Record type ids which correspond to anonymous types. |
| // Renumber recorded type ids using information about the type. |
| // Replace recorded type ids by renumbered ones. |
| if (opt_renumber_anonymous_types) |
| { |
| std::unordered_map<std::string, xmlNodePtr> to_renumber; |
| std::unordered_set<size_t> used_hashes; |
| record_ids_to_renumber(root, to_renumber, used_hashes); |
| |
| std::unordered_map<std::string, std::string> type_id_map; |
| for (const auto& [type_id, node] : to_renumber) |
| resolve_ids_to_renumber(type_id, to_renumber, used_hashes, type_id_map); |
| |
| renumber_type_ids(root, type_id_map); |
| } |
| |
| // Normalise anonymous type names. |
| // Reanonymise anonymous types. |
| // Discard naming typedef backlinks. |
| if (opt_normalise_anonymous || opt_reanonymise_anonymous |
| || opt_discard_naming_typedefs) |
| handle_anonymous_types(opt_normalise_anonymous, opt_reanonymise_anonymous, |
| opt_discard_naming_typedefs, root); |
| |
| // Remove useless top-level qualifiers on function parameter and return |
| // types. |
| if (opt_remove_function_parameter_type_qualifiers) |
| { |
| std::unordered_map<std::string, std::string> qualifier_id_to_type_id; |
| build_qualifier_id_to_type_id_map(root, qualifier_id_to_type_id); |
| resolve_qualifier_chains(qualifier_id_to_type_id); |
| remove_function_parameter_type_qualifiers(root, qualifier_id_to_type_id); |
| } |
| |
| // Prune unreachable elements and/or report untyped symbols. |
| size_t untyped_symbols = 0; |
| if (opt_prune_unreachable || opt_report_untyped || opt_abort_on_untyped) |
| untyped_symbols += handle_unreachable( |
| opt_prune_unreachable, opt_report_untyped, alias_map, root); |
| if (opt_abort_on_untyped && untyped_symbols) |
| { |
| std::cerr << "found " << untyped_symbols << " untyped symbols\n"; |
| exit(1); |
| } |
| |
| // Limit location information. |
| if (opt_locations > LocationInfo::COLUMN) |
| limit_locations(opt_locations, root); |
| |
| // Clear unwanted non-reachable attributes. |
| if (opt_clear_non_reachable) |
| clear_non_reachable(root); |
| |
| // Eliminate complete duplicates and extra fragments of types. |
| // Report conflicting duplicate defintions. |
| // Record whether there are conflicting duplicate definitions. |
| size_t conflicting_types = 0; |
| if (opt_eliminate_duplicates || opt_report_conflicts || opt_sort) |
| conflicting_types += handle_duplicate_types( |
| opt_eliminate_duplicates, opt_report_conflicts, root); |
| |
| // Sort namespaces, types and declarations. |
| if (opt_sort) |
| { |
| if (conflicting_types) |
| std::cerr << "found type definition conflicts, skipping sort\n"; |
| else |
| sort_namespaces_types_and_declarations(root); |
| } |
| |
| // Drop empty subelements. |
| if (opt_drop_empty) |
| drop_empty(root); |
| |
| // Reformat root element for human consumption. |
| format_xml(std::string(opt_indentation, ' '), std::string(), root); |
| |
| // Open output for writing. |
| int out_fd = STDOUT_FILENO; |
| if (opt_output) |
| { |
| out_fd = open(opt_output, O_CREAT | O_TRUNC | O_WRONLY, |
| S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); |
| if (out_fd < 0) |
| { |
| std::cerr << "could not open '" << opt_output << "' for writing: " |
| << strerror(errno) << '\n'; |
| exit(1); |
| } |
| } |
| |
| // Write the XML. |
| // |
| // First to memory, as we need to do a little post-processing. |
| xmlChar* out_data; |
| int out_size; |
| xmlDocDumpMemory(document, &out_data, &out_size); |
| // Remove the XML declaration as it currently upsets abidiff. |
| xmlChar* out_limit = out_data + out_size; |
| while (out_data < out_limit && *out_data != '\n') |
| ++out_data; |
| if (out_data < out_limit) |
| ++out_data; |
| // Adjust quotes to match abidw. |
| adjust_quotes(out_data, out_limit); |
| // And now to a file. |
| size_t count = out_limit - out_data; |
| if (write(out_fd, out_data, count) != count) |
| { |
| std::cerr << "could not write output: " << strerror(errno) << '\n'; |
| exit(1); |
| } |
| if (close(out_fd) < 0) |
| { |
| std::cerr << "could not close output: " << strerror(errno) << '\n'; |
| exit(1); |
| } |
| |
| // Free libxml document. |
| xmlFreeDoc(document); |
| return 0; |
| } |