tools/abitidy.cc - platform/external/libabigail - Git at Google

 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 // -*- Mode: C++ -*-
 //
 // Copyright (C) 2021 Google, Inc.
 //
 // Author: Giuliano Procida

 /// @file
 ///
 /// This file contains ABI XML manipulation routines and a main driver.
 ///
 /// The libxml Tree API is used. The XPath API is not used as it proved
 /// to be many times slower than direct traversal but only slightly more
 /// convenient.

 #include <fcntl.h>
 #include <unistd.h>

 #include <cstring>
 #include <functional>
 #include <ios>
 #include <iostream>
 #include <map>
 #include <optional>
 #include <set>
 #include <sstream>
 #include <string>
 #include <vector>

 #include <libxml/globals.h>
 #include <libxml/parser.h>
 #include <libxml/tree.h>

 /// Cast a C string to a libxml string.
 ///
 /// @param str the C string (pointer)
 ///
 /// @return the same thing, as a type compatible with the libxml API
 static const xmlChar*
 to_libxml(const char* str)
 {
   return reinterpret_cast<const xmlChar*>(str);
 }

 /// Cast a libxml string to C string.
 ///
 /// @param str the libxml string (pointer)
 ///
 /// @return the same thing, as a type compatible with the C library API
 static const char*
 from_libxml(const xmlChar* str)
 {
   return reinterpret_cast<const char*>(str);
 }

 /// Remove a node from its document and free its storage.
 ///
 /// @param node the node to remove
 static void
 remove_node(xmlNodePtr node)
 {
   xmlUnlinkNode(node);
   xmlFreeNode(node);
 }

 /// Remove an XML element and any immediately preceding comment.
 ///
 /// @param node the element to remove
 static void
 remove_element(xmlNodePtr node)
 {
   xmlNodePtr previous_node = node->prev;
   if (previous_node && previous_node->type == XML_COMMENT_NODE)
     remove_node(previous_node);
   remove_node(node);
 }

 /// Get child nodes of given node.
 ///
 /// @param node the node whose children to fetch
 ///
 /// @return a vector of child nodes
 static std::vector<xmlNodePtr>
 get_children(xmlNodePtr node)
 {
   std::vector<xmlNodePtr> result;
   for (xmlNodePtr child = node->children; child; child = child->next)
     result.push_back(child);
   return result;
 }

 /// Fetch an attribute from a node.
 ///
 /// @param node the node
 ///
 /// @param name the attribute name
 ///
 /// @return the attribute value, if present
 static std::optional<std::string>
 get_attribute(xmlNodePtr node, const char* name)
 {
   std::optional<std::string> result;
   xmlChar* attribute = xmlGetProp(node, to_libxml(name));
   if (attribute)
     {
       result = from_libxml(attribute);
       xmlFree(attribute);
     }
   return result;
 }

 /// Store an attribute value.
 ///
 /// @param node the node
 ///
 /// @param name the attribute name
 ///
 /// @param value the attribute value, optionally
 static void
 set_attribute(xmlNodePtr node, const char* name,
               const std::optional<std::string>& value)
 {
   if (value)
     xmlSetProp(node, to_libxml(name), to_libxml(value.value().c_str()));
   else
     xmlUnsetProp(node, to_libxml(name));
 }

 /// Remove text nodes, recursively.
 ///
 /// This simplifies subsequent analysis and manipulation. Removing and
 /// moving elements will destroy formatting anyway. The only remaining
 /// node types should be elements and comments.
 ///
 /// @param node the node to process
 static void
 strip_text(xmlNodePtr node)
 {
   if (node->type == XML_TEXT_NODE)
     remove_node(node);
   else if (node->type == XML_ELEMENT_NODE)
     for (xmlNodePtr child : get_children(node))
       strip_text(child);
 }

 /// Add text before / after a node.
 ///
 /// @param node the node
 ///
 /// @param after whether the next should go after
 ///
 /// @param text the text
 static void
 add_text(xmlNodePtr node, bool after, const std::string& text)
 {
   xmlNodePtr text_node = xmlNewTextLen(to_libxml(text.data()), text.size());
   if (after)
     xmlAddNextSibling(node, text_node);
   else
     xmlAddPrevSibling(node, text_node);
 }

 /// Format an XML elementy by adding internal indentation and newlines.
 ///
 /// This makes the XML readable.
 ///
 /// @param indentation what to add to the line indentation prefix
 ///
 /// @param prefix the current line indentation prefix
 ///
 /// @param node the node to format
 static void
 format_xml(const std::string& indentation, std::string prefix, xmlNodePtr node)
 {
   std::vector<xmlNodePtr> children = get_children(node);
   if (children.empty())
     return;

   // The ordering of operations here is incidental. The outcomes we want
   // are: 1. an extra newline after the opening tag and indentation of
   // the closing tag to match, and 2. indentation and newline for each
   // child.
   add_text(children[0], false, "\n");
   add_text(children[children.size() - 1], true, prefix);
   prefix += indentation;
   for (xmlNodePtr child : children)
     {
       add_text(child, false, prefix);
       format_xml(indentation, prefix, child);
       add_text(child, true, "\n");
     }
 }

 /// Rewrite attributes using single quotes.
 ///
 /// libxml uses double quotes but libabigail uses single quotes.
 ///
 /// Note that libabigail does not emit attributes *containing* single
 /// quotes and if it did it would escape them as &quot; which libxml
 /// would in turn preserve. However, the code here will handle all forms
 /// of quotes, conservatively.
 ///
 /// Annotation comments can contain single quote characters so just
 /// checking for any single quotes at all is insufficiently precise.
 ///
 /// @param start a pointer to the start of the XML text
 ///
 /// @param limit a pointer to just past the end of the XML text
 static void
 adjust_quotes(xmlChar* start, xmlChar* limit)
 {
   const std::string open{"<!--"};
   const std::string close{"-->"};
   while (start < limit)
     {
       // Look for a '<'
       start = std::find(start, limit, '<');
       if (start == limit)
         break;
       if (start + open.size() < limit
           && std::equal(open.begin(), open.end(), start))
         {
           // Have a comment, skip to the end.
           start += open.size();
           xmlChar* end = std::search(start, limit, close.begin(), close.end());
           if (end == limit)
             break;
           start = end + close.size();
         }
       else
         {
           // Have some tag, search for the end.
           start += 1;
           xmlChar* end = std::find(start, limit, '>');
           if (end == limit)
             break;
           // In general, inside a tag we could find either ' or " being
           // used to quote attributes and the other quote character
           // being used as part of the attribute data. However, libxml's
           // xmlDocDump* functions use " to quote attributes and it's
           // safe to substitute this quote character with ' so long as '
           // does not appear within the attribute data.
           if (std::find(start, end, '\'') == end)
             for (xmlChar* c = start; c < end; ++c)
               if (*c == '"')
                 *c = '\'';
           start = end + 1;
         }
     }
 }

 static const std::set<std::string> DROP_IF_EMPTY = {
   "elf-variable-symbols",
   "elf-function-symbols",
   "namespace-decl",
   "abi-instr",
   "abi-corpus",
   "abi-corpus-group",
 };

 /// Drop empty elements, if safe to do so, recursively.
 ///
 /// @param element node to process
 static void
 drop_empty(xmlNodePtr node)
 {
   if (node->type != XML_ELEMENT_NODE)
     return;
   for (xmlNodePtr child : get_children(node))
     drop_empty(child);
   // Do not drop the root element, even if empty.
   if (node->parent->type == XML_DOCUMENT_NODE)
     return;
   if (!node->children && DROP_IF_EMPTY.count(from_libxml(node->name)))
     remove_element(node);
 }

 /// Prune unreachable elements.
 ///
 /// Reachability is defined to be union of contains, containing and
 /// refers-to relationships for types, declarations and symbols. The
 /// roots for reachability are the ELF elements in the ABI.
 ///
 /// @param document the XML document to process
 static void
 prune_unreachable(xmlDocPtr document)
 {
   // ELF symbol names.
   std::set<std::string> elf_symbols;

   // Simple way of allowing two kinds of nodes: false=>type,
   // true=>symbol.
   using vertex_t = std::pair<bool, std::string>;

   // Graph vertices.
   std::set<vertex_t> vertices;
   // Graph edges.
   std::map<vertex_t, std::set<vertex_t>> edges;

   // Keep track of type / symbol nesting so we can identify contains,
   // containing and refers-to relationships.
   std::vector<vertex_t> stack;

   // Process an XML node, adding a vertex and possibly some edges.
   std::function<void(xmlNodePtr)> process_node = [&](xmlNodePtr node) {
     // We only care about elements and not comments, at this stage.
     if (node->type != XML_ELEMENT_NODE)
       return;

     // Is this an ELF symbol?
     if (!strcmp(from_libxml(node->name), "elf-symbol"))
       {
         const auto name = get_attribute(node, "name");
         if (name)
           elf_symbols.insert(name.value());
         // Early return is safe, but not necessary.
         return;
       }

     // Is this a type? Note that the same id may appear multiple times.
     const auto id = get_attribute(node, "id");
     if (id)
       {
         vertex_t type_vertex{false, id.value()};
         vertices.insert(type_vertex);
         const auto naming_typedef_id = get_attribute(node, "naming-typedef-id");
         if (naming_typedef_id)
           {
             // This is an odd one, there can be a backwards link from an
             // anonymous type to the typedef that refers to it. We could
             // either remove these links when pruning a typedef but it's
             // simpler just to pull in the typedef, even if nothing else
             // refers to it.
             vertex_t naming_typedef_vertex{false, naming_typedef_id.value()};
             edges[type_vertex].insert(naming_typedef_vertex);
           }
         if (!stack.empty())
           {
             // Parent<->child dependencies; record dependencies both ways to
             // avoid holes in XML types and declarations.
             const auto& parent = stack.back();
             edges[parent].insert(type_vertex);
             edges[type_vertex].insert(parent);
           }
         // Record the type.
         stack.push_back(type_vertex);
       }

     // Is this a (declaration expected to be linked to a) symbol?
     const auto symbol = get_attribute(node, "mangled-name");
     if (symbol)
       {
         vertex_t symbol_vertex{true, symbol.value()};
         vertices.insert(symbol_vertex);
         if (!stack.empty())
           {
             // Parent<->child dependencies; record dependencies both ways to
             // avoid making holes in XML types and declarations.
             //
             // Symbols exist outside of the type hierarchy, so choosing to make
             // them depend on a containing type scope and vice versa is
             // conservative and probably not necessary.
             const auto& parent = stack.back();
             edges[parent].insert(symbol_vertex);
             edges[symbol_vertex].insert(parent);
           }
         // Record the symbol.
         stack.push_back(symbol_vertex);
         // In practice there will be at most one symbol on the stack; we could
         // verify this here, but it wouldn't achieve anything.
       }

     // Being both would make the stack ordering ambiguous.
     if (id && symbol)
       {
         std::cerr << "cannot handle element which is both type and symbol\n";
         exit(1);
       }

     // Is there a reference to another type?
     const auto type_id = get_attribute(node, "type-id");
     if (type_id && !stack.empty())
       {
         // The enclosing type or symbol refers to another type.
         const auto& parent = stack.back();
         vertex_t type_id_vertex{false, type_id.value()};
         edges[parent].insert(type_id_vertex);
       }

     // Process recursively.
     for (auto child : get_children(node))
       process_node(child);

     // Restore the stack.
     if (symbol)
       stack.pop_back();
     if (id)
       stack.pop_back();
   };

   // Traverse the whole XML document and build a graph.
   for (xmlNodePtr node = document->children; node; node = node->next)
     process_node(node);

   // Simple DFS.
   std::set<vertex_t> seen;
   std::function<void(vertex_t)> dfs = [&](vertex_t vertex) {
     if (!seen.insert(vertex).second)
       return;
     auto it = edges.find(vertex);
     if (it != edges.end())
       for (auto to : it->second)
         dfs(to);
   };

   // Traverse the graph, starting from the ELF symbols.
   for (const auto& symbol : elf_symbols)
     {
       vertex_t symbol_vertex{true, symbol};
       if (vertices.count(symbol_vertex))
         dfs(symbol_vertex);
       else
         std::cerr << "no declaration found for ELF symbol " << symbol << '\n';
     }

   // This is a DFS with early stopping.
   std::function<void(xmlNodePtr)> remove_unseen = [&](xmlNodePtr node) {
     if (node->type != XML_ELEMENT_NODE)
       return;

     // Return if we know that this is a type to keep or drop in its
     // entirety.
     const auto id = get_attribute(node, "id");
     if (id)
       {
         if (!seen.count(vertex_t{false, id.value()}))
           remove_element(node);
         return;
       }

     // Return if we know that this is a declaration to keep or drop in
     // its entirety. Note that var-decl and function-decl are the only
     // elements that can have a mangled-name attribute.
     const char* node_name = from_libxml(node->name);
     if (!strcmp(node_name, "var-decl") || !strcmp(node_name, "function-decl"))
       {
         const auto symbol = get_attribute(node, "mangled-name");
         if (!(symbol && seen.count(vertex_t{true, symbol.value()})))
           remove_element(node);
         return;
       }

     // Otherwise, this is not a type, declaration or part thereof, so
     // process child elements.
     for (auto child : get_children(node))
       remove_unseen(child);
   };

   // Traverse the document, removing unseen elements.
   for (xmlNodePtr node = document->children; node; node = node->next)
     remove_unseen(node);
 }

 static const std::map<std::string, std::string> ANONYMOUS_TYPE_NAMES = {
   {"enum-decl", "__anonymous_enum__"},
   {"class-decl", "__anonymous_struct__"},
   {"union-decl", "__anonymous_union__"},
 };

 /// Normalise anonymous type names by removing the numerical suffix.
 ///
 /// Anonymous type names take the form __anonymous_foo__N where foo is
 /// one of enum, struct or union and N is an optional numerical suffix.
 /// The suffices are senstive to processing order and do not convey
 /// useful ABI information. They can cause spurious harmless diffs and
 /// make XML diffing and rebasing harder.
 ///
 /// @param node the XML node to process
 static void
 normalise_anonymous_type_names(xmlNodePtr node)
 {
   if (node->type != XML_ELEMENT_NODE)
     return;

   const auto it = ANONYMOUS_TYPE_NAMES.find(from_libxml(node->name));
   if (it != ANONYMOUS_TYPE_NAMES.end())
     if (const auto attribute = get_attribute(node, "name"))
       {
         const auto& anon = it->second;
         const auto& name = attribute.value();
         // __anonymous_foo__123 -> __anonymous_foo__
         if (!name.compare(0, anon.size(), anon) &&
             name.find_first_not_of("0123456789", anon.size()) == name.npos)
           set_attribute(node, "name", anon);
       }

   for (auto child : get_children(node))
     normalise_anonymous_type_names(child);
 }

 /// Main program.
 ///
 /// Read and write ABI XML, with optional extra processing passes.
 ///
 /// @param argc argument count
 ///
 /// @param argv atgument vector
 ///
 /// @return exit status
 int
 main(int argc, char* argv[])
 {
   // Defaults.
   const char* opt_input = NULL;
   const char* opt_output = NULL;
   int opt_indentation = 2;
   bool opt_drop_empty = false;
   bool opt_prune_unreachable = false;
   bool opt_normalise_anonymous = false;

   // Process command line.
   auto usage = [&]() -> int {
     std::cerr << "usage: " << argv[0]
               << " [-i|--input file]"
               << " [-o|--output file]"
               << " [-I|--indentation n]"
               << " [-a|--all]"
               << " [-d|--[no-]drop-empty]"
               << " [-p|--[no-]prune-unreachable]"
               << " [-n|--[no-]normalise-anonymous]"
               << '\n';
     return 1;
   };
   int opt_index = 1;
   auto get_arg = [&]() {
     if (opt_index < argc)
       return argv[opt_index++];
     exit(usage());
   };
   while (opt_index < argc)
     {
       const char* arg = get_arg();
       if (!strcmp(arg, "-i") || !strcmp(arg, "--input"))
         opt_input = get_arg();
       else if (!strcmp(arg, "-o") || !strcmp(arg, "--output"))
         opt_output = get_arg();
       else if (!strcmp(arg, "-I") || !strcmp(arg, "--indentation"))
         {
           std::istringstream is(get_arg());
           is >> std::noskipws >> opt_indentation;
           if (!is || !is.eof() || opt_indentation < 0)
             exit(usage());
         }
       else if (!strcmp(arg, "-a") || !strcmp(arg, "--all"))
         opt_drop_empty = opt_prune_unreachable = opt_normalise_anonymous = true;
       else if (!strcmp(arg, "-d") || !strcmp(arg, "--drop-empty"))
         opt_drop_empty = true;
       else if (!strcmp(arg, "--no-drop-empty"))
         opt_drop_empty = false;
       else if (!strcmp(arg, "-p") || !strcmp(arg, "--prune-unreachable"))
         opt_prune_unreachable = true;
       else if (!strcmp(arg, "--no-prune-unreachable"))
         opt_prune_unreachable = false;
       else if (!strcmp(arg, "-n") || !strcmp(arg, "--normalise-anonymous"))
         opt_normalise_anonymous = true;
       else if (!strcmp(arg, "--no-normalise-anonymous"))
         opt_normalise_anonymous = false;
       else
         exit(usage());
     }

   // Open input for reading.
   int in_fd = STDIN_FILENO;
   if (opt_input)
     {
       in_fd = open(opt_input, O_RDONLY);
       if (in_fd < 0)
         {
           std::cerr << "could not open '" << opt_input << "' for reading: "
                     << strerror(errno) << '\n';
           exit(1);
         }
     }

   // Read the XML.
   xmlParserCtxtPtr parser_context = xmlNewParserCtxt();
   xmlDocPtr document = xmlCtxtReadFd(parser_context, in_fd, NULL, NULL, 0);
   if (!document)
     {
       std::cerr << "failed to parse input as XML\n";
       exit(1);
     }
   xmlFreeParserCtxt(parser_context);
   close(in_fd);

   // Strip text nodes to simplify other operations.
   for (xmlNodePtr node = document->children; node; node = node->next)
     strip_text(node);

   // Normalise anonymous type names.
   if (opt_normalise_anonymous)
     for (xmlNodePtr node = document->children; node; node = node->next)
       normalise_anonymous_type_names(node);

   // Prune unreachable elements.
   if (opt_prune_unreachable)
     prune_unreachable(document);

   // Drop empty elements.
   if (opt_drop_empty)
     for (xmlNodePtr node = document->children; node; node = node->next)
       drop_empty(node);

   // Reformat XML for human consumption.
   for (xmlNodePtr node = document->children; node; node = node->next)
     format_xml(std::string(opt_indentation, ' '), std::string(), node);

   // Open output for writing.
   int out_fd = STDOUT_FILENO;
   if (opt_output)
     {
       out_fd = open(opt_output, O_CREAT | O_TRUNC | O_WRONLY,
                     S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
       if (out_fd < 0)
         {
           std::cerr << "could not open '" << opt_output << "' for writing: "
                     << strerror(errno) << '\n';
           exit(1);
         }
     }

   // Write the XML.
   //
   // First to memory, as we need to do a little post-processing.
   xmlChar* out_data;
   int out_size;
   xmlDocDumpMemory(document, &out_data, &out_size);
   // Remove the XML declaration as this is not currently accepted by abidiff.
   xmlChar* out_limit = out_data + out_size;
   while (out_data < out_limit && *out_data != '\n')
     ++out_data;
   if (out_data < out_limit)
     ++out_data;
   // Adjust quotes to match abidw.
   adjust_quotes(out_data, out_limit);
   // And now to a file.
   size_t count = out_limit - out_data;
   if (write(out_fd, out_data, count) != count)
     {
       std::cerr << "could not write output: " << strerror(errno) << '\n';
       exit(1);
     }
   if (close(out_fd) < 0)
     {
       std::cerr << "could not close output: " << strerror(errno) << '\n';
       exit(1);
     }

   // Free libxml document.
   xmlFreeDoc(document);
   return 0;
 }
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	// -- Mode: C++ --
	//
	// Copyright (C) 2021 Google, Inc.
	//
	// Author: Giuliano Procida

	/// @file
	///
	/// This file contains ABI XML manipulation routines and a main driver.
	///
	/// The libxml Tree API is used. The XPath API is not used as it proved
	/// to be many times slower than direct traversal but only slightly more
	/// convenient.

	#include <fcntl.h>
	#include <unistd.h>

	#include <cstring>
	#include <functional>
	#include <ios>
	#include <iostream>
	#include <map>
	#include <optional>
	#include <set>
	#include <sstream>
	#include <string>
	#include <vector>

	#include <libxml/globals.h>
	#include <libxml/parser.h>
	#include <libxml/tree.h>

	/// Cast a C string to a libxml string.
	///
	/// @param str the C string (pointer)
	///
	/// @return the same thing, as a type compatible with the libxml API
	static const xmlChar*
	to_libxml(const char* str)
	{
	return reinterpret_cast<const xmlChar*>(str);
	}

	/// Cast a libxml string to C string.
	///
	/// @param str the libxml string (pointer)
	///
	/// @return the same thing, as a type compatible with the C library API
	static const char*
	from_libxml(const xmlChar* str)
	{
	return reinterpret_cast<const char*>(str);
	}

	/// Remove a node from its document and free its storage.
	///
	/// @param node the node to remove
	static void
	remove_node(xmlNodePtr node)
	{
	xmlUnlinkNode(node);
	xmlFreeNode(node);
	}

	/// Remove an XML element and any immediately preceding comment.
	///
	/// @param node the element to remove
	static void
	remove_element(xmlNodePtr node)
	{
	xmlNodePtr previous_node = node->prev;
	if (previous_node && previous_node->type == XML_COMMENT_NODE)
	remove_node(previous_node);
	remove_node(node);
	}

	/// Get child nodes of given node.
	///
	/// @param node the node whose children to fetch
	///
	/// @return a vector of child nodes
	static std::vector<xmlNodePtr>
	get_children(xmlNodePtr node)
	{
	std::vector<xmlNodePtr> result;
	for (xmlNodePtr child = node->children; child; child = child->next)
	result.push_back(child);
	return result;
	}

	/// Fetch an attribute from a node.
	///
	/// @param node the node
	///
	/// @param name the attribute name
	///
	/// @return the attribute value, if present
	static std::optional<std::string>
	get_attribute(xmlNodePtr node, const char* name)
	{
	std::optional<std::string> result;
	xmlChar* attribute = xmlGetProp(node, to_libxml(name));
	if (attribute)
	{
	result = from_libxml(attribute);
	xmlFree(attribute);
	}
	return result;
	}

	/// Store an attribute value.
	///
	/// @param node the node
	///
	/// @param name the attribute name
	///
	/// @param value the attribute value, optionally
	static void
	set_attribute(xmlNodePtr node, const char* name,
	const std::optional<std::string>& value)
	{
	if (value)
	xmlSetProp(node, to_libxml(name), to_libxml(value.value().c_str()));
	else
	xmlUnsetProp(node, to_libxml(name));
	}

	/// Remove text nodes, recursively.
	///
	/// This simplifies subsequent analysis and manipulation. Removing and
	/// moving elements will destroy formatting anyway. The only remaining
	/// node types should be elements and comments.
	///
	/// @param node the node to process
	static void
	strip_text(xmlNodePtr node)
	{
	if (node->type == XML_TEXT_NODE)
	remove_node(node);
	else if (node->type == XML_ELEMENT_NODE)
	for (xmlNodePtr child : get_children(node))
	strip_text(child);
	}

	/// Add text before / after a node.
	///
	/// @param node the node
	///
	/// @param after whether the next should go after
	///
	/// @param text the text
	static void
	add_text(xmlNodePtr node, bool after, const std::string& text)
	{
	xmlNodePtr text_node = xmlNewTextLen(to_libxml(text.data()), text.size());
	if (after)
	xmlAddNextSibling(node, text_node);
	else
	xmlAddPrevSibling(node, text_node);
	}

	/// Format an XML elementy by adding internal indentation and newlines.
	///
	/// This makes the XML readable.
	///
	/// @param indentation what to add to the line indentation prefix
	///
	/// @param prefix the current line indentation prefix
	///
	/// @param node the node to format
	static void
	format_xml(const std::string& indentation, std::string prefix, xmlNodePtr node)
	{
	std::vector<xmlNodePtr> children = get_children(node);
	if (children.empty())
	return;

	// The ordering of operations here is incidental. The outcomes we want
	// are: 1. an extra newline after the opening tag and indentation of
	// the closing tag to match, and 2. indentation and newline for each
	// child.
	add_text(children[0], false, "\n");
	add_text(children[children.size() - 1], true, prefix);
	prefix += indentation;
	for (xmlNodePtr child : children)
	{
	add_text(child, false, prefix);
	format_xml(indentation, prefix, child);
	add_text(child, true, "\n");
	}
	}

	/// Rewrite attributes using single quotes.
	///
	/// libxml uses double quotes but libabigail uses single quotes.
	///
	/// Note that libabigail does not emit attributes containing single
	/// quotes and if it did it would escape them as " which libxml
	/// would in turn preserve. However, the code here will handle all forms
	/// of quotes, conservatively.
	///
	/// Annotation comments can contain single quote characters so just
	/// checking for any single quotes at all is insufficiently precise.
	///
	/// @param start a pointer to the start of the XML text
	///
	/// @param limit a pointer to just past the end of the XML text
	static void
	adjust_quotes(xmlChar* start, xmlChar* limit)
	{
	const std::string open{"<!--"};
	const std::string close{"-->"};
	while (start < limit)
	{
	// Look for a '<'
	start = std::find(start, limit, '<');
	if (start == limit)
	break;
	if (start + open.size() < limit
	&& std::equal(open.begin(), open.end(), start))
	{
	// Have a comment, skip to the end.
	start += open.size();
	xmlChar* end = std::search(start, limit, close.begin(), close.end());
	if (end == limit)
	break;
	start = end + close.size();
	}
	else
	{
	// Have some tag, search for the end.
	start += 1;
	xmlChar* end = std::find(start, limit, '>');
	if (end == limit)
	break;
	// In general, inside a tag we could find either ' or " being
	// used to quote attributes and the other quote character
	// being used as part of the attribute data. However, libxml's
	// xmlDocDump* functions use " to quote attributes and it's
	// safe to substitute this quote character with ' so long as '
	// does not appear within the attribute data.
	if (std::find(start, end, '\'') == end)
	for (xmlChar* c = start; c < end; ++c)
	if (*c == '"')
	*c = '\'';
	start = end + 1;
	}
	}
	}

	static const std::set<std::string> DROP_IF_EMPTY = {
	"elf-variable-symbols",
	"elf-function-symbols",
	"namespace-decl",
	"abi-instr",
	"abi-corpus",
	"abi-corpus-group",
	};

	/// Drop empty elements, if safe to do so, recursively.
	///
	/// @param element node to process
	static void
	drop_empty(xmlNodePtr node)
	{
	if (node->type != XML_ELEMENT_NODE)
	return;
	for (xmlNodePtr child : get_children(node))
	drop_empty(child);
	// Do not drop the root element, even if empty.
	if (node->parent->type == XML_DOCUMENT_NODE)
	return;
	if (!node->children && DROP_IF_EMPTY.count(from_libxml(node->name)))
	remove_element(node);
	}

	/// Prune unreachable elements.
	///
	/// Reachability is defined to be union of contains, containing and
	/// refers-to relationships for types, declarations and symbols. The
	/// roots for reachability are the ELF elements in the ABI.
	///
	/// @param document the XML document to process
	static void
	prune_unreachable(xmlDocPtr document)
	{
	// ELF symbol names.
	std::set<std::string> elf_symbols;

	// Simple way of allowing two kinds of nodes: false=>type,
	// true=>symbol.
	using vertex_t = std::pair<bool, std::string>;

	// Graph vertices.
	std::set<vertex_t> vertices;
	// Graph edges.
	std::map<vertex_t, std::set<vertex_t>> edges;

	// Keep track of type / symbol nesting so we can identify contains,
	// containing and refers-to relationships.
	std::vector<vertex_t> stack;

	// Process an XML node, adding a vertex and possibly some edges.
	std::function<void(xmlNodePtr)> process_node = [&](xmlNodePtr node) {
	// We only care about elements and not comments, at this stage.
	if (node->type != XML_ELEMENT_NODE)
	return;

	// Is this an ELF symbol?
	if (!strcmp(from_libxml(node->name), "elf-symbol"))
	{
	const auto name = get_attribute(node, "name");
	if (name)
	elf_symbols.insert(name.value());
	// Early return is safe, but not necessary.
	return;
	}

	// Is this a type? Note that the same id may appear multiple times.
	const auto id = get_attribute(node, "id");
	if (id)
	{
	vertex_t type_vertex{false, id.value()};
	vertices.insert(type_vertex);
	const auto naming_typedef_id = get_attribute(node, "naming-typedef-id");
	if (naming_typedef_id)
	{
	// This is an odd one, there can be a backwards link from an
	// anonymous type to the typedef that refers to it. We could
	// either remove these links when pruning a typedef but it's
	// simpler just to pull in the typedef, even if nothing else
	// refers to it.
	vertex_t naming_typedef_vertex{false, naming_typedef_id.value()};
	edges[type_vertex].insert(naming_typedef_vertex);
	}
	if (!stack.empty())
	{
	// Parent<->child dependencies; record dependencies both ways to
	// avoid holes in XML types and declarations.
	const auto& parent = stack.back();
	edges[parent].insert(type_vertex);
	edges[type_vertex].insert(parent);
	}
	// Record the type.
	stack.push_back(type_vertex);
	}

	// Is this a (declaration expected to be linked to a) symbol?
	const auto symbol = get_attribute(node, "mangled-name");
	if (symbol)
	{
	vertex_t symbol_vertex{true, symbol.value()};
	vertices.insert(symbol_vertex);
	if (!stack.empty())
	{
	// Parent<->child dependencies; record dependencies both ways to
	// avoid making holes in XML types and declarations.
	//
	// Symbols exist outside of the type hierarchy, so choosing to make
	// them depend on a containing type scope and vice versa is
	// conservative and probably not necessary.
	const auto& parent = stack.back();
	edges[parent].insert(symbol_vertex);
	edges[symbol_vertex].insert(parent);
	}
	// Record the symbol.
	stack.push_back(symbol_vertex);
	// In practice there will be at most one symbol on the stack; we could
	// verify this here, but it wouldn't achieve anything.
	}

	// Being both would make the stack ordering ambiguous.
	if (id && symbol)
	{
	std::cerr << "cannot handle element which is both type and symbol\n";
	exit(1);
	}

	// Is there a reference to another type?
	const auto type_id = get_attribute(node, "type-id");
	if (type_id && !stack.empty())
	{
	// The enclosing type or symbol refers to another type.
	const auto& parent = stack.back();
	vertex_t type_id_vertex{false, type_id.value()};
	edges[parent].insert(type_id_vertex);
	}

	// Process recursively.
	for (auto child : get_children(node))
	process_node(child);

	// Restore the stack.
	if (symbol)
	stack.pop_back();
	if (id)
	stack.pop_back();
	};

	// Traverse the whole XML document and build a graph.
	for (xmlNodePtr node = document->children; node; node = node->next)
	process_node(node);

	// Simple DFS.
	std::set<vertex_t> seen;
	std::function<void(vertex_t)> dfs = [&](vertex_t vertex) {
	if (!seen.insert(vertex).second)
	return;
	auto it = edges.find(vertex);
	if (it != edges.end())
	for (auto to : it->second)
	dfs(to);
	};

	// Traverse the graph, starting from the ELF symbols.
	for (const auto& symbol : elf_symbols)
	{
	vertex_t symbol_vertex{true, symbol};
	if (vertices.count(symbol_vertex))
	dfs(symbol_vertex);
	else
	std::cerr << "no declaration found for ELF symbol " << symbol << '\n';
	}

	// This is a DFS with early stopping.
	std::function<void(xmlNodePtr)> remove_unseen = [&](xmlNodePtr node) {
	if (node->type != XML_ELEMENT_NODE)
	return;

	// Return if we know that this is a type to keep or drop in its
	// entirety.
	const auto id = get_attribute(node, "id");
	if (id)
	{
	if (!seen.count(vertex_t{false, id.value()}))
	remove_element(node);
	return;
	}

	// Return if we know that this is a declaration to keep or drop in
	// its entirety. Note that var-decl and function-decl are the only
	// elements that can have a mangled-name attribute.
	const char* node_name = from_libxml(node->name);
	if (!strcmp(node_name, "var-decl") \|\| !strcmp(node_name, "function-decl"))
	{
	const auto symbol = get_attribute(node, "mangled-name");
	if (!(symbol && seen.count(vertex_t{true, symbol.value()})))
	remove_element(node);
	return;
	}

	// Otherwise, this is not a type, declaration or part thereof, so
	// process child elements.
	for (auto child : get_children(node))
	remove_unseen(child);
	};

	// Traverse the document, removing unseen elements.
	for (xmlNodePtr node = document->children; node; node = node->next)
	remove_unseen(node);
	}

	static const std::map<std::string, std::string> ANONYMOUS_TYPE_NAMES = {
	{"enum-decl", "__anonymous_enum__"},
	{"class-decl", "__anonymous_struct__"},
	{"union-decl", "__anonymous_union__"},
	};

	/// Normalise anonymous type names by removing the numerical suffix.
	///
	/// Anonymous type names take the form __anonymous_foo__N where foo is
	/// one of enum, struct or union and N is an optional numerical suffix.
	/// The suffices are senstive to processing order and do not convey
	/// useful ABI information. They can cause spurious harmless diffs and
	/// make XML diffing and rebasing harder.
	///
	/// @param node the XML node to process
	static void
	normalise_anonymous_type_names(xmlNodePtr node)
	{
	if (node->type != XML_ELEMENT_NODE)
	return;

	const auto it = ANONYMOUS_TYPE_NAMES.find(from_libxml(node->name));
	if (it != ANONYMOUS_TYPE_NAMES.end())
	if (const auto attribute = get_attribute(node, "name"))
	{
	const auto& anon = it->second;
	const auto& name = attribute.value();
	// __anonymous_foo__123 -> __anonymous_foo__
	if (!name.compare(0, anon.size(), anon) &&
	name.find_first_not_of("0123456789", anon.size()) == name.npos)
	set_attribute(node, "name", anon);
	}

	for (auto child : get_children(node))
	normalise_anonymous_type_names(child);
	}

	/// Main program.
	///
	/// Read and write ABI XML, with optional extra processing passes.
	///
	/// @param argc argument count
	///
	/// @param argv atgument vector
	///
	/// @return exit status
	int
	main(int argc, char* argv[])
	{
	// Defaults.
	const char* opt_input = NULL;
	const char* opt_output = NULL;
	int opt_indentation = 2;
	bool opt_drop_empty = false;
	bool opt_prune_unreachable = false;
	bool opt_normalise_anonymous = false;

	// Process command line.
	auto usage = [&]() -> int {
	std::cerr << "usage: " << argv[0]
	<< " [-i\|--input file]"
	<< " [-o\|--output file]"
	<< " [-I\|--indentation n]"
	<< " [-a\|--all]"
	<< " [-d\|--[no-]drop-empty]"
	<< " [-p\|--[no-]prune-unreachable]"
	<< " [-n\|--[no-]normalise-anonymous]"
	<< '\n';
	return 1;
	};
	int opt_index = 1;
	auto get_arg = [&]() {
	if (opt_index < argc)
	return argv[opt_index++];
	exit(usage());
	};
	while (opt_index < argc)
	{
	const char* arg = get_arg();
	if (!strcmp(arg, "-i") \|\| !strcmp(arg, "--input"))
	opt_input = get_arg();
	else if (!strcmp(arg, "-o") \|\| !strcmp(arg, "--output"))
	opt_output = get_arg();
	else if (!strcmp(arg, "-I") \|\| !strcmp(arg, "--indentation"))
	{
	std::istringstream is(get_arg());
	is >> std::noskipws >> opt_indentation;
	if (!is \|\| !is.eof() \|\| opt_indentation < 0)
	exit(usage());
	}
	else if (!strcmp(arg, "-a") \|\| !strcmp(arg, "--all"))
	opt_drop_empty = opt_prune_unreachable = opt_normalise_anonymous = true;
	else if (!strcmp(arg, "-d") \|\| !strcmp(arg, "--drop-empty"))
	opt_drop_empty = true;
	else if (!strcmp(arg, "--no-drop-empty"))
	opt_drop_empty = false;
	else if (!strcmp(arg, "-p") \|\| !strcmp(arg, "--prune-unreachable"))
	opt_prune_unreachable = true;
	else if (!strcmp(arg, "--no-prune-unreachable"))
	opt_prune_unreachable = false;
	else if (!strcmp(arg, "-n") \|\| !strcmp(arg, "--normalise-anonymous"))
	opt_normalise_anonymous = true;
	else if (!strcmp(arg, "--no-normalise-anonymous"))
	opt_normalise_anonymous = false;
	else
	exit(usage());
	}

	// Open input for reading.
	int in_fd = STDIN_FILENO;
	if (opt_input)
	{
	in_fd = open(opt_input, O_RDONLY);
	if (in_fd < 0)
	{
	std::cerr << "could not open '" << opt_input << "' for reading: "
	<< strerror(errno) << '\n';
	exit(1);
	}
	}

	// Read the XML.
	xmlParserCtxtPtr parser_context = xmlNewParserCtxt();
	xmlDocPtr document = xmlCtxtReadFd(parser_context, in_fd, NULL, NULL, 0);
	if (!document)
	{
	std::cerr << "failed to parse input as XML\n";
	exit(1);
	}
	xmlFreeParserCtxt(parser_context);
	close(in_fd);

	// Strip text nodes to simplify other operations.
	for (xmlNodePtr node = document->children; node; node = node->next)
	strip_text(node);

	// Normalise anonymous type names.
	if (opt_normalise_anonymous)
	for (xmlNodePtr node = document->children; node; node = node->next)
	normalise_anonymous_type_names(node);

	// Prune unreachable elements.
	if (opt_prune_unreachable)
	prune_unreachable(document);

	// Drop empty elements.
	if (opt_drop_empty)
	for (xmlNodePtr node = document->children; node; node = node->next)
	drop_empty(node);

	// Reformat XML for human consumption.
	for (xmlNodePtr node = document->children; node; node = node->next)
	format_xml(std::string(opt_indentation, ' '), std::string(), node);

	// Open output for writing.
	int out_fd = STDOUT_FILENO;
	if (opt_output)
	{
	out_fd = open(opt_output, O_CREAT \| O_TRUNC \| O_WRONLY,
	S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IWGRP \| S_IROTH \| S_IWOTH);
	if (out_fd < 0)
	{
	std::cerr << "could not open '" << opt_output << "' for writing: "
	<< strerror(errno) << '\n';
	exit(1);
	}
	}

	// Write the XML.
	//
	// First to memory, as we need to do a little post-processing.
	xmlChar* out_data;
	int out_size;
	xmlDocDumpMemory(document, &out_data, &out_size);
	// Remove the XML declaration as this is not currently accepted by abidiff.
	xmlChar* out_limit = out_data + out_size;
	while (out_data < out_limit && *out_data != '\n')
	++out_data;
	if (out_data < out_limit)
	++out_data;
	// Adjust quotes to match abidw.
	adjust_quotes(out_data, out_limit);
	// And now to a file.
	size_t count = out_limit - out_data;
	if (write(out_fd, out_data, count) != count)
	{
	std::cerr << "could not write output: " << strerror(errno) << '\n';
	exit(1);
	}
	if (close(out_fd) < 0)
	{
	std::cerr << "could not close output: " << strerror(errno) << '\n';
	exit(1);
	}

	// Free libxml document.
	xmlFreeDoc(document);
	return 0;
	}