blob: 4bd715647b536ee516dc1b9b9e4ba08a6b6351a4 [file] [log] [blame]
// Copyright (c) 2010 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
// Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
// For <inttypes.h> PRI* macros, before anything else might #include it.
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif /* __STDC_FORMAT_MACROS */
#include "common/dwarf_cu_to_module.h"
#include <assert.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <algorithm>
#include <memory>
#include <numeric>
#include <utility>
#include "common/string_view.h"
#include "common/dwarf_line_to_module.h"
#include "google_breakpad/common/breakpad_types.h"
namespace google_breakpad {
using std::accumulate;
using std::map;
using std::pair;
using std::sort;
using std::vector;
using std::unique_ptr;
// Data provided by a DWARF specification DIE.
//
// In DWARF, the DIE for a definition may contain a DW_AT_specification
// attribute giving the offset of the corresponding declaration DIE, and
// the definition DIE may omit information given in the declaration. For
// example, it's common for a function's address range to appear only in
// its definition DIE, but its name to appear only in its declaration
// DIE.
//
// The dumper needs to be able to follow DW_AT_specification links to
// bring all this information together in a FUNC record. Conveniently,
// DIEs that are the target of such links have a DW_AT_declaration flag
// set, so we can identify them when we first see them, and record their
// contents for later reference.
//
// A Specification holds information gathered from a declaration DIE that
// we may need if we find a DW_AT_specification link pointing to it.
struct DwarfCUToModule::Specification {
// The qualified name that can be found by demangling DW_AT_MIPS_linkage_name.
StringView qualified_name;
// The name of the enclosing scope, or the empty string if there is none.
StringView enclosing_name;
// The name for the specification DIE itself, without any enclosing
// name components.
StringView unqualified_name;
};
// An abstract origin -- base definition of an inline function.
struct AbstractOrigin {
explicit AbstractOrigin(StringView name) : name(name) {}
StringView name;
};
typedef map<uint64_t, AbstractOrigin> AbstractOriginByOffset;
// Data global to the DWARF-bearing file that is private to the
// DWARF-to-Module process.
struct DwarfCUToModule::FilePrivate {
// A map from offsets of DIEs within the .debug_info section to
// Specifications describing those DIEs. Specification references can
// cross compilation unit boundaries.
SpecificationByOffset specifications;
AbstractOriginByOffset origins;
// Keep a list of forward references from DW_AT_abstract_origin and
// DW_AT_specification attributes so names can be fixed up.
std::map<uint64_t, Module::Function*> forward_ref_die_to_func;
};
DwarfCUToModule::FileContext::FileContext(const string& filename,
Module* module,
bool handle_inter_cu_refs)
: filename_(filename),
module_(module),
handle_inter_cu_refs_(handle_inter_cu_refs),
file_private_(new FilePrivate()) {
}
DwarfCUToModule::FileContext::~FileContext() {
}
void DwarfCUToModule::FileContext::AddSectionToSectionMap(
const string& name, const uint8_t* contents, uint64_t length) {
section_map_[name] = std::make_pair(contents, length);
}
void DwarfCUToModule::FileContext::ClearSectionMapForTest() {
section_map_.clear();
}
const SectionMap&
DwarfCUToModule::FileContext::section_map() const {
return section_map_;
}
void DwarfCUToModule::FileContext::ClearSpecifications() {
if (!handle_inter_cu_refs_)
file_private_->specifications.clear();
}
bool DwarfCUToModule::FileContext::IsUnhandledInterCUReference(
uint64_t offset, uint64_t compilation_unit_start) const {
if (handle_inter_cu_refs_)
return false;
return offset < compilation_unit_start;
}
// Information global to the particular compilation unit we're
// parsing. This is for data shared across the CU's entire DIE tree,
// and parameters from the code invoking the CU parser.
struct DwarfCUToModule::CUContext {
CUContext(FileContext* file_context_arg, WarningReporter* reporter_arg,
RangesHandler* ranges_handler_arg)
: version(0),
file_context(file_context_arg),
reporter(reporter_arg),
ranges_handler(ranges_handler_arg),
language(Language::CPlusPlus),
low_pc(0),
high_pc(0),
ranges_form(DW_FORM_sec_offset),
ranges_data(0),
ranges_base(0),
str_offsets_base(0) { }
~CUContext() {
for (vector<Module::Function*>::iterator it = functions.begin();
it != functions.end(); ++it) {
delete *it;
}
};
// Dwarf version of the source CU.
uint8_t version;
// The DWARF-bearing file into which this CU was incorporated.
FileContext* file_context;
// For printing error messages.
WarningReporter* reporter;
// For reading ranges from the .debug_ranges section
RangesHandler* ranges_handler;
// The source language of this compilation unit.
const Language* language;
// Addresses covered by this CU. If high_pc_ is non-zero then the CU covers
// low_pc to high_pc, otherwise ranges_data is non-zero and low_pc represents
// the base address of the ranges covered by the CU. ranges_data will define
// the CU's actual ranges.
uint64_t low_pc;
uint64_t high_pc;
// Ranges for this CU are read according to this form.
enum DwarfForm ranges_form;
uint64_t ranges_data;
// Offset into .debug_rngslists where this CU's ranges are stored.
// Data in DW_FORM_rnglistx is relative to this offset.
uint64_t ranges_base;
// Offset into .debug_addr where this CU's addresses are stored. Data in
// form DW_FORM_addrxX is relative to this offset.
uint64_t addr_base;
// Offset into this CU's contribution to .debug_str_offsets.
uint64_t str_offsets_base;
// Collect all the data from the CU that a RangeListReader needs to read a
// range.
bool AssembleRangeListInfo(
RangeListReader::CURangesInfo* info) {
const SectionMap& section_map
= file_context->section_map();
info->version_ = version;
info->base_address_ = low_pc;
info->ranges_base_ = ranges_base;
const char* section_name = (version <= 4 ?
".debug_ranges" : ".debug_rnglists");
SectionMap::const_iterator map_entry
= GetSectionByName(section_map, section_name);
if (map_entry == section_map.end()) {
return false;
}
info->buffer_ = map_entry->second.first;
info->size_ = map_entry->second.second;
if (version > 4) {
SectionMap::const_iterator map_entry
= GetSectionByName(section_map, ".debug_addr");
if (map_entry == section_map.end()) {
return false;
}
info->addr_buffer_ = map_entry->second.first;
info->addr_buffer_size_ = map_entry->second.second;
info->addr_base_ = addr_base;
}
return true;
}
// The functions defined in this compilation unit. We accumulate
// them here during parsing. Then, in DwarfCUToModule::Finish, we
// assign them lines and add them to file_context->module.
//
// Destroying this destroys all the functions this vector points to.
vector<Module::Function*> functions;
// A map of function pointers to the its forward specification DIE's offset.
map<Module::Function*, uint64_t> spec_function_offsets;
// From file index to vector of subprogram's offset in this CU.
map<uint64_t, vector<uint64_t>> inline_origins;
};
// Information about the context of a particular DIE. This is for
// information that changes as we descend the tree towards the leaves:
// the containing classes/namespaces, etc.
struct DwarfCUToModule::DIEContext {
// The fully-qualified name of the context. For example, for a
// tree like:
//
// DW_TAG_namespace Foo
// DW_TAG_class Bar
// DW_TAG_subprogram Baz
//
// in a C++ compilation unit, the DIEContext's name for the
// DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
// name for the DW_TAG_namespace DIE would be "".
StringView name;
};
// An abstract base class for all the dumper's DIE handlers.
class DwarfCUToModule::GenericDIEHandler: public DIEHandler {
public:
// Create a handler for the DIE at OFFSET whose compilation unit is
// described by CU_CONTEXT, and whose immediate context is described
// by PARENT_CONTEXT.
GenericDIEHandler(CUContext* cu_context, DIEContext* parent_context,
uint64_t offset)
: cu_context_(cu_context),
parent_context_(parent_context),
offset_(offset),
declaration_(false),
specification_(NULL),
abstract_origin_(NULL),
forward_ref_die_offset_(0), specification_offset_(0) { }
// Derived classes' ProcessAttributeUnsigned can defer to this to
// handle DW_AT_declaration, or simply not override it.
void ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data);
// Derived classes' ProcessAttributeReference can defer to this to
// handle DW_AT_specification, or simply not override it.
void ProcessAttributeReference(enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data);
// Derived classes' ProcessAttributeReference can defer to this to
// handle DW_AT_specification, or simply not override it.
void ProcessAttributeString(enum DwarfAttribute attr,
enum DwarfForm form,
const string& data);
protected:
// Compute and return the fully-qualified name of the DIE. If this
// DIE is a declaration DIE, to be cited by other DIEs'
// DW_AT_specification attributes, record its enclosing name and
// unqualified name in the specification table.
//
// Use this from EndAttributes member functions, not ProcessAttribute*
// functions; only the former can be sure that all the DIE's attributes
// have been seen.
StringView ComputeQualifiedName();
CUContext* cu_context_;
DIEContext* parent_context_;
uint64_t offset_;
// If this DIE has a DW_AT_declaration attribute, this is its value.
// It is false on DIEs with no DW_AT_declaration attribute.
bool declaration_;
// If this DIE has a DW_AT_specification attribute, this is the
// Specification structure for the DIE the attribute refers to.
// Otherwise, this is NULL.
Specification* specification_;
// If this DIE has a DW_AT_abstract_origin attribute, this is the
// AbstractOrigin structure for the DIE the attribute refers to.
// Otherwise, this is NULL.
const AbstractOrigin* abstract_origin_;
// If this DIE has a DW_AT_specification or DW_AT_abstract_origin and it is a
// forward reference, no Specification will be available. Track the reference
// to be fixed up when the DIE is parsed.
uint64_t forward_ref_die_offset_;
// The root offset of Specification or abstract origin.
uint64_t specification_offset_;
// The value of the DW_AT_name attribute, or the empty string if the
// DIE has no such attribute.
StringView name_attribute_;
// The demangled value of the DW_AT_MIPS_linkage_name attribute, or the empty
// string if the DIE has no such attribute or its content could not be
// demangled.
StringView demangled_name_;
// The non-demangled value of the DW_AT_MIPS_linkage_name attribute,
// it its content count not be demangled.
StringView raw_name_;
};
void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data) {
switch (attr) {
case DW_AT_declaration: declaration_ = (data != 0); break;
default: break;
}
}
void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data) {
switch (attr) {
case DW_AT_specification: {
FileContext* file_context = cu_context_->file_context;
if (file_context->IsUnhandledInterCUReference(
data, cu_context_->reporter->cu_offset())) {
cu_context_->reporter->UnhandledInterCUReference(offset_, data);
break;
}
// Find the Specification to which this attribute refers, and
// set specification_ appropriately. We could do more processing
// here, but it's better to leave the real work to our
// EndAttribute member function, at which point we know we have
// seen all the DIE's attributes.
SpecificationByOffset* specifications =
&file_context->file_private_->specifications;
SpecificationByOffset::iterator spec = specifications->find(data);
if (spec != specifications->end()) {
specification_ = &spec->second;
} else if (data > offset_) {
forward_ref_die_offset_ = data;
} else {
cu_context_->reporter->UnknownSpecification(offset_, data);
}
specification_offset_ = data;
break;
}
case DW_AT_abstract_origin: {
const AbstractOriginByOffset& origins =
cu_context_->file_context->file_private_->origins;
AbstractOriginByOffset::const_iterator origin = origins.find(data);
if (origin != origins.end()) {
abstract_origin_ = &(origin->second);
} else if (data > offset_) {
forward_ref_die_offset_ = data;
}
specification_offset_ = data;
break;
}
default: break;
}
}
void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
enum DwarfAttribute attr,
enum DwarfForm form,
const string& data) {
switch (attr) {
case DW_AT_name:
name_attribute_ =
cu_context_->file_context->module_->AddStringToPool(data);
break;
case DW_AT_MIPS_linkage_name:
case DW_AT_linkage_name: {
string demangled;
Language::DemangleResult result =
cu_context_->language->DemangleName(data, &demangled);
switch (result) {
case Language::kDemangleSuccess:
demangled_name_ =
cu_context_->file_context->module_->AddStringToPool(demangled);
break;
case Language::kDemangleFailure:
cu_context_->reporter->DemangleError(data);
// fallthrough
case Language::kDontDemangle:
demangled_name_ = StringView();
raw_name_ = cu_context_->file_context->module_->AddStringToPool(data);
break;
}
break;
}
default: break;
}
}
StringView DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
// Use the demangled name, if one is available. Demangled names are
// preferable to those inferred from the DWARF structure because they
// include argument types.
StringView* qualified_name = nullptr;
if (!demangled_name_.empty()) {
// Found it is this DIE.
qualified_name = &demangled_name_;
} else if (specification_ && !specification_->qualified_name.empty()) {
// Found it on the specification.
qualified_name = &specification_->qualified_name;
}
StringView* unqualified_name = nullptr;
StringView* enclosing_name = nullptr;
if (!qualified_name) {
// Find the unqualified name. If the DIE has its own DW_AT_name
// attribute, then use that; otherwise, check the specification.
if (!name_attribute_.empty()) {
unqualified_name = &name_attribute_;
} else if (specification_) {
unqualified_name = &specification_->unqualified_name;
} else if (!raw_name_.empty()) {
unqualified_name = &raw_name_;
}
// Find the name of the enclosing context. If this DIE has a
// specification, it's the specification's enclosing context that
// counts; otherwise, use this DIE's context.
if (specification_) {
enclosing_name = &specification_->enclosing_name;
} else {
enclosing_name = &parent_context_->name;
}
}
// Prepare the return value before upcoming mutations possibly invalidate the
// existing pointers.
string return_value;
if (qualified_name) {
return_value = qualified_name->str();
} else if (unqualified_name && enclosing_name) {
// Combine the enclosing name and unqualified name to produce our
// own fully-qualified name.
return_value = cu_context_->language->MakeQualifiedName(
enclosing_name->str(), unqualified_name->str());
}
// If this DIE was marked as a declaration, record its names in the
// specification table.
if ((declaration_ && qualified_name) ||
(unqualified_name && enclosing_name)) {
Specification spec;
if (qualified_name) {
spec.qualified_name = *qualified_name;
} else {
spec.enclosing_name = *enclosing_name;
spec.unqualified_name = *unqualified_name;
}
cu_context_->file_context->file_private_->specifications[offset_] = spec;
}
return cu_context_->file_context->module_->AddStringToPool(return_value);
}
static bool IsEmptyRange(const vector<Module::Range>& ranges) {
uint64_t size = accumulate(ranges.cbegin(), ranges.cend(), 0,
[](uint64_t total, Module::Range entry) {
return total + entry.size;
}
);
return size == 0;
}
// A handler for DW_TAG_inlined_subroutine DIEs.
class DwarfCUToModule::InlineHandler : public GenericDIEHandler {
public:
InlineHandler(CUContext* cu_context,
DIEContext* parent_context,
uint64_t offset,
int inline_nest_level,
vector<unique_ptr<Module::Inline>>& inlines)
: GenericDIEHandler(cu_context, parent_context, offset),
low_pc_(0),
high_pc_(0),
high_pc_form_(DW_FORM_addr),
ranges_form_(DW_FORM_sec_offset),
ranges_data_(0),
call_site_line_(0),
inline_nest_level_(inline_nest_level),
inlines_(inlines) {}
void ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data);
DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
bool EndAttributes();
void Finish();
private:
// The fully-qualified name, as derived from name_attribute_,
// specification_, parent_context_. Computed in EndAttributes.
StringView name_;
uint64_t low_pc_; // DW_AT_low_pc
uint64_t high_pc_; // DW_AT_high_pc
DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx
uint64_t ranges_data_; // DW_AT_ranges
int call_site_line_;
int inline_nest_level_;
// A vector of inlines in the same nest level. It's owned by its parent
// function/inline. At Finish(), add this inline into the vector.
vector<unique_ptr<Module::Inline>>& inlines_;
// A vector of child inlines.
vector<unique_ptr<Module::Inline>> child_inlines_;
};
void DwarfCUToModule::InlineHandler::ProcessAttributeUnsigned(
enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data) {
switch (attr) {
case DW_AT_low_pc:
low_pc_ = data;
break;
case DW_AT_high_pc:
high_pc_form_ = form;
high_pc_ = data;
break;
case DW_AT_ranges:
ranges_data_ = data;
ranges_form_ = form;
break;
case DW_AT_call_line:
call_site_line_ = data;
break;
default:
GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
break;
}
}
DIEHandler* DwarfCUToModule::InlineHandler::FindChildHandler(
uint64_t offset,
enum DwarfTag tag) {
switch (tag) {
case DW_TAG_inlined_subroutine:
return new InlineHandler(cu_context_, new DIEContext(), offset,
inline_nest_level_ + 1, child_inlines_);
default:
return NULL;
}
}
bool DwarfCUToModule::InlineHandler::EndAttributes() {
if (abstract_origin_)
name_ = abstract_origin_->name;
if (name_.empty()) {
// We haven't seen the abstract origin yet, which might appears later and we
// will fix the name after calling
// InlineOriginMap::GetOrCreateInlineOrigin with right name.
name_ =
cu_context_->file_context->module_->AddStringToPool("<name omitted>");
}
return true;
}
void DwarfCUToModule::InlineHandler::Finish() {
vector<Module::Range> ranges;
if (low_pc_ && high_pc_) {
if (high_pc_form_ != DW_FORM_addr &&
high_pc_form_ != DW_FORM_GNU_addr_index &&
high_pc_form_ != DW_FORM_addrx &&
high_pc_form_ != DW_FORM_addrx1 &&
high_pc_form_ != DW_FORM_addrx2 &&
high_pc_form_ != DW_FORM_addrx3 &&
high_pc_form_ != DW_FORM_addrx4) {
high_pc_ += low_pc_;
}
Module::Range range(low_pc_, high_pc_ - low_pc_);
ranges.push_back(range);
} else {
RangesHandler* ranges_handler = cu_context_->ranges_handler;
if (ranges_handler) {
RangeListReader::CURangesInfo cu_info;
if (cu_context_->AssembleRangeListInfo(&cu_info)) {
if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_,
&cu_info, &ranges)) {
ranges.clear();
cu_context_->reporter->MalformedRangeList(ranges_data_);
}
} else {
cu_context_->reporter->MissingRanges();
}
}
}
// Ignore DW_TAG_inlined_subroutine with empty range.
if (ranges.empty()) {
return;
}
// Every DW_TAG_inlined_subroutine should have a DW_AT_abstract_origin.
assert(specification_offset_ != 0);
cu_context_->file_context->module_->inline_origin_map.SetReference(
specification_offset_, specification_offset_);
Module::InlineOrigin* origin =
cu_context_->file_context->module_->inline_origin_map
.GetOrCreateInlineOrigin(specification_offset_, name_);
unique_ptr<Module::Inline> in(
new Module::Inline(origin, ranges, call_site_line_, inline_nest_level_,
std::move(child_inlines_)));
inlines_.push_back(std::move(in));
}
// A handler class for DW_TAG_subprogram DIEs.
class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
public:
FuncHandler(CUContext* cu_context,
DIEContext* parent_context,
uint64_t offset,
bool handle_inline)
: GenericDIEHandler(cu_context, parent_context, offset),
low_pc_(0),
high_pc_(0),
high_pc_form_(DW_FORM_addr),
ranges_form_(DW_FORM_sec_offset),
ranges_data_(0),
decl_file_data_(UINT64_MAX),
inline_(false),
handle_inline_(handle_inline) {}
void ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data);
void ProcessAttributeSigned(enum DwarfAttribute attr,
enum DwarfForm form,
int64_t data);
DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
bool EndAttributes();
void Finish();
private:
// The fully-qualified name, as derived from name_attribute_,
// specification_, parent_context_. Computed in EndAttributes.
StringView name_;
uint64_t low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
DwarfForm high_pc_form_; // DW_AT_high_pc can be length or address.
DwarfForm ranges_form_; // DW_FORM_sec_offset or DW_FORM_rnglistx
uint64_t ranges_data_; // DW_AT_ranges
// DW_AT_decl_file, value of UINT64_MAX means undefined.
uint64_t decl_file_data_;
bool inline_;
vector<unique_ptr<Module::Inline>> child_inlines_;
bool handle_inline_;
};
void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data) {
switch (attr) {
// If this attribute is present at all --- even if its value is
// DW_INL_not_inlined --- then GCC may cite it as someone else's
// DW_AT_abstract_origin attribute.
case DW_AT_inline: inline_ = true; break;
case DW_AT_low_pc: low_pc_ = data; break;
case DW_AT_high_pc:
high_pc_form_ = form;
high_pc_ = data;
break;
case DW_AT_ranges:
ranges_data_ = data;
ranges_form_ = form;
break;
case DW_AT_decl_file:
decl_file_data_ = data;
break;
default:
GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
break;
}
}
void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
enum DwarfAttribute attr,
enum DwarfForm form,
int64_t data) {
switch (attr) {
// If this attribute is present at all --- even if its value is
// DW_INL_not_inlined --- then GCC may cite it as someone else's
// DW_AT_abstract_origin attribute.
case DW_AT_inline: inline_ = true; break;
default:
break;
}
}
DIEHandler* DwarfCUToModule::FuncHandler::FindChildHandler(
uint64_t offset,
enum DwarfTag tag) {
switch (tag) {
case DW_TAG_inlined_subroutine:
if (handle_inline_)
return new InlineHandler(cu_context_, new DIEContext(), offset, 0,
child_inlines_);
default:
return NULL;
}
}
bool DwarfCUToModule::FuncHandler::EndAttributes() {
// Compute our name, and record a specification, if appropriate.
name_ = ComputeQualifiedName();
if (name_.empty() && abstract_origin_) {
name_ = abstract_origin_->name;
}
return true;
}
void DwarfCUToModule::FuncHandler::Finish() {
vector<Module::Range> ranges;
// Check if this DIE was one of the forward references that was not able
// to be processed, and fix up the name of the appropriate Module::Function.
// "name_" will have already been fixed up in EndAttributes().
if (!name_.empty()) {
auto iter =
cu_context_->file_context->file_private_->forward_ref_die_to_func.find(
offset_);
if (iter !=
cu_context_->file_context->file_private_->forward_ref_die_to_func.end())
iter->second->name = name_;
}
if (!ranges_data_) {
// Make high_pc_ an address, if it isn't already.
if (high_pc_form_ != DW_FORM_addr &&
high_pc_form_ != DW_FORM_GNU_addr_index &&
high_pc_form_ != DW_FORM_addrx &&
high_pc_form_ != DW_FORM_addrx1 &&
high_pc_form_ != DW_FORM_addrx2 &&
high_pc_form_ != DW_FORM_addrx3 &&
high_pc_form_ != DW_FORM_addrx4) {
high_pc_ += low_pc_;
}
Module::Range range(low_pc_, high_pc_ - low_pc_);
ranges.push_back(range);
} else {
RangesHandler* ranges_handler = cu_context_->ranges_handler;
if (ranges_handler) {
RangeListReader::CURangesInfo cu_info;
if (cu_context_->AssembleRangeListInfo(&cu_info)) {
if (!ranges_handler->ReadRanges(ranges_form_, ranges_data_,
&cu_info, &ranges)) {
ranges.clear();
cu_context_->reporter->MalformedRangeList(ranges_data_);
}
} else {
cu_context_->reporter->MissingRanges();
}
}
}
StringView name_omitted =
cu_context_->file_context->module_->AddStringToPool("<name omitted>");
bool empty_range = IsEmptyRange(ranges);
// Did we collect the information we need? Not all DWARF function
// entries are non-empty (for example, inlined functions that were never
// used), but all the ones we're interested in cover a non-empty range of
// bytes.
if (!empty_range) {
low_pc_ = ranges.front().address;
// Malformed DWARF may omit the name, but all Module::Functions must
// have names.
StringView name = name_.empty() ? name_omitted : name_;
// Create a Module::Function based on the data we've gathered, and
// add it to the functions_ list.
scoped_ptr<Module::Function> func(new Module::Function(name, low_pc_));
func->ranges = ranges;
func->parameter_size = 0;
if (func->address) {
// If the function address is zero this is a sign that this function
// description is just empty debug data and should just be discarded.
cu_context_->functions.push_back(func.release());
if (forward_ref_die_offset_ != 0) {
cu_context_->file_context->file_private_
->forward_ref_die_to_func[forward_ref_die_offset_] =
cu_context_->functions.back();
cu_context_->spec_function_offsets[cu_context_->functions.back()] =
forward_ref_die_offset_;
}
cu_context_->functions.back()->inlines.swap(child_inlines_);
}
} else if (inline_) {
AbstractOrigin origin(name_);
cu_context_->file_context->file_private_->origins.insert({offset_, origin});
}
// Only keep track of DW_TAG_subprogram which have the attributes we are
// interested.
if (handle_inline_ &&
(!empty_range || inline_ || decl_file_data_ != UINT64_MAX)) {
StringView name = name_.empty() ? name_omitted : name_;
uint64_t offset =
specification_offset_ != 0 ? specification_offset_ : offset_;
cu_context_->file_context->module_->inline_origin_map.SetReference(offset_,
offset);
cu_context_->file_context->module_->inline_origin_map
.GetOrCreateInlineOrigin(offset_, name);
if (decl_file_data_ != UINT64_MAX)
cu_context_->inline_origins[decl_file_data_].push_back(offset_);
}
}
// A handler for DIEs that contain functions and contribute a
// component to their names: namespaces, classes, etc.
class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
public:
NamedScopeHandler(CUContext* cu_context, DIEContext* parent_context,
uint64_t offset, bool handle_inline)
: GenericDIEHandler(cu_context, parent_context, offset),
handle_inline_(handle_inline) { }
bool EndAttributes();
DIEHandler* FindChildHandler(uint64_t offset, enum DwarfTag tag);
private:
DIEContext child_context_; // A context for our children.
bool handle_inline_;
};
bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
child_context_.name = ComputeQualifiedName();
return true;
}
DIEHandler* DwarfCUToModule::NamedScopeHandler::FindChildHandler(
uint64_t offset,
enum DwarfTag tag) {
switch (tag) {
case DW_TAG_subprogram:
return new FuncHandler(cu_context_, &child_context_, offset,
handle_inline_);
case DW_TAG_namespace:
case DW_TAG_class_type:
case DW_TAG_structure_type:
case DW_TAG_union_type:
return new NamedScopeHandler(cu_context_, &child_context_, offset,
handle_inline_);
default:
return NULL;
}
}
void DwarfCUToModule::WarningReporter::CUHeading() {
if (printed_cu_header_)
return;
fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%" PRIx64 "):\n",
filename_.c_str(), cu_name_.c_str(), cu_offset_);
printed_cu_header_ = true;
}
void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64_t offset,
uint64_t target) {
CUHeading();
fprintf(stderr, "%s: the DIE at offset 0x%" PRIx64 " has a "
"DW_AT_specification attribute referring to the DIE at offset 0x%"
PRIx64 ", which was not marked as a declaration\n",
filename_.c_str(), offset, target);
}
void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64_t offset,
uint64_t target) {
CUHeading();
fprintf(stderr, "%s: the DIE at offset 0x%" PRIx64 " has a "
"DW_AT_abstract_origin attribute referring to the DIE at offset 0x%"
PRIx64 ", which was not marked as an inline\n",
filename_.c_str(), offset, target);
}
void DwarfCUToModule::WarningReporter::MissingSection(const string& name) {
CUHeading();
fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n",
filename_.c_str(), name.c_str());
}
void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64_t offset) {
CUHeading();
fprintf(stderr, "%s: warning: line number data offset beyond end"
" of '.debug_line' section\n",
filename_.c_str());
}
void DwarfCUToModule::WarningReporter::UncoveredHeading() {
if (printed_unpaired_header_)
return;
CUHeading();
fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n",
filename_.c_str());
printed_unpaired_header_ = true;
}
void DwarfCUToModule::WarningReporter::UncoveredFunction(
const Module::Function& function) {
if (!uncovered_warnings_enabled_)
return;
UncoveredHeading();
fprintf(stderr, " function%s: %s\n",
IsEmptyRange(function.ranges) ? " (zero-length)" : "",
function.name.str().c_str());
}
void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line& line) {
if (!uncovered_warnings_enabled_)
return;
UncoveredHeading();
fprintf(stderr, " line%s: %s:%d at 0x%" PRIx64 "\n",
(line.size == 0 ? " (zero-length)" : ""),
line.file->name.c_str(), line.number, line.address);
}
void DwarfCUToModule::WarningReporter::UnnamedFunction(uint64_t offset) {
CUHeading();
fprintf(stderr, "%s: warning: function at offset 0x%" PRIx64 " has no name\n",
filename_.c_str(), offset);
}
void DwarfCUToModule::WarningReporter::DemangleError(const string& input) {
CUHeading();
fprintf(stderr, "%s: warning: failed to demangle %s\n",
filename_.c_str(), input.c_str());
}
void DwarfCUToModule::WarningReporter::UnhandledInterCUReference(
uint64_t offset, uint64_t target) {
CUHeading();
fprintf(stderr, "%s: warning: the DIE at offset 0x%" PRIx64 " has a "
"DW_FORM_ref_addr attribute with an inter-CU reference to "
"0x%" PRIx64 ", but inter-CU reference handling is turned "
" off.\n", filename_.c_str(), offset, target);
}
void DwarfCUToModule::WarningReporter::MalformedRangeList(uint64_t offset) {
CUHeading();
fprintf(stderr, "%s: warning: the range list at offset 0x%" PRIx64 " falls "
" out of the .debug_ranges section.\n",
filename_.c_str(), offset);
}
void DwarfCUToModule::WarningReporter::MissingRanges() {
CUHeading();
fprintf(stderr, "%s: warning: A DW_AT_ranges attribute was encountered but "
"the .debug_ranges section is missing.\n", filename_.c_str());
}
DwarfCUToModule::DwarfCUToModule(FileContext* file_context,
LineToModuleHandler* line_reader,
RangesHandler* ranges_handler,
WarningReporter* reporter,
bool handle_inline)
: RootDIEHandler(handle_inline),
line_reader_(line_reader),
cu_context_(new CUContext(file_context, reporter, ranges_handler)),
child_context_(new DIEContext()),
has_source_line_info_(false) {}
DwarfCUToModule::~DwarfCUToModule() {
}
void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
enum DwarfForm form,
int64_t data) {
switch (attr) {
case DW_AT_language: // source language of this CU
SetLanguage(static_cast<DwarfLanguage>(data));
break;
default:
break;
}
}
void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
enum DwarfForm form,
uint64_t data) {
switch (attr) {
case DW_AT_stmt_list: // Line number information.
has_source_line_info_ = true;
source_line_offset_ = data;
break;
case DW_AT_language: // source language of this CU
SetLanguage(static_cast<DwarfLanguage>(data));
break;
case DW_AT_low_pc:
cu_context_->low_pc = data;
break;
case DW_AT_high_pc:
cu_context_->high_pc = data;
break;
case DW_AT_ranges:
cu_context_->ranges_data = data;
cu_context_->ranges_form = form;
break;
case DW_AT_rnglists_base:
cu_context_->ranges_base = data;
break;
case DW_AT_addr_base:
case DW_AT_GNU_addr_base:
cu_context_->addr_base = data;
break;
case DW_AT_str_offsets_base:
cu_context_->str_offsets_base = data;
break;
default:
break;
}
}
void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
enum DwarfForm form,
const string& data) {
switch (attr) {
case DW_AT_name:
cu_context_->reporter->SetCUName(data);
break;
case DW_AT_comp_dir:
line_reader_->StartCompilationUnit(data);
break;
default:
break;
}
}
bool DwarfCUToModule::EndAttributes() {
return true;
}
DIEHandler* DwarfCUToModule::FindChildHandler(
uint64_t offset,
enum DwarfTag tag) {
switch (tag) {
case DW_TAG_subprogram:
return new FuncHandler(cu_context_.get(), child_context_.get(), offset,
handle_inline);
case DW_TAG_namespace:
case DW_TAG_class_type:
case DW_TAG_structure_type:
case DW_TAG_union_type:
case DW_TAG_module:
return new NamedScopeHandler(cu_context_.get(), child_context_.get(),
offset, handle_inline);
default:
return NULL;
}
}
void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
switch (language) {
case DW_LANG_Java:
cu_context_->language = Language::Java;
break;
case DW_LANG_Swift:
cu_context_->language = Language::Swift;
break;
case DW_LANG_Rust:
cu_context_->language = Language::Rust;
break;
// DWARF has no generic language code for assembly language; this is
// what the GNU toolchain uses.
case DW_LANG_Mips_Assembler:
cu_context_->language = Language::Assembler;
break;
// C++ covers so many cases that it probably has some way to cope
// with whatever the other languages throw at us. So make it the
// default.
//
// Objective C and Objective C++ seem to create entries for
// methods whose DW_AT_name values are already fully-qualified:
// "-[Classname method:]". These appear at the top level.
//
// DWARF data for C should never include namespaces or functions
// nested in struct types, but if it ever does, then C++'s
// notation is probably not a bad choice for that.
default:
case DW_LANG_ObjC:
case DW_LANG_ObjC_plus_plus:
case DW_LANG_C:
case DW_LANG_C89:
case DW_LANG_C99:
case DW_LANG_C_plus_plus:
cu_context_->language = Language::CPlusPlus;
break;
}
}
void DwarfCUToModule::ReadSourceLines(uint64_t offset) {
const SectionMap& section_map
= cu_context_->file_context->section_map();
SectionMap::const_iterator map_entry
= GetSectionByName(section_map, ".debug_line");
if (map_entry == section_map.end()) {
cu_context_->reporter->MissingSection(".debug_line");
return;
}
const uint8_t* line_section_start = map_entry->second.first + offset;
uint64_t line_section_length = map_entry->second.second;
if (offset >= line_section_length) {
cu_context_->reporter->BadLineInfoOffset(offset);
return;
}
line_section_length -= offset;
// When reading line tables, string sections are never needed for dwarf4, and
// may or may not be needed by dwarf5, so no error if they are missing.
const uint8_t* string_section_start = nullptr;
uint64_t string_section_length = 0;
map_entry = GetSectionByName(section_map, ".debug_str");
if (map_entry != section_map.end()) {
string_section_start = map_entry->second.first;
string_section_length = map_entry->second.second;
}
const uint8_t* line_string_section_start = nullptr;
uint64_t line_string_section_length = 0;
map_entry = GetSectionByName(section_map, ".debug_line_str");
if (map_entry != section_map.end()) {
line_string_section_start = map_entry->second.first;
line_string_section_length = map_entry->second.second;
}
line_reader_->ReadProgram(
line_section_start, line_section_length,
string_section_start, string_section_length,
line_string_section_start, line_string_section_length,
cu_context_->file_context->module_, &lines_, &files_);
}
namespace {
class FunctionRange {
public:
FunctionRange(const Module::Range& range, Module::Function* function) :
address(range.address), size(range.size), function(function) { }
void AddLine(Module::Line& line) {
function->lines.push_back(line);
}
Module::Address address;
Module::Address size;
Module::Function* function;
};
// Fills an array of ranges with pointers to the functions which owns
// them. The array is sorted in ascending order and the ranges are non
// empty and non-overlapping.
static void FillSortedFunctionRanges(vector<FunctionRange>& dest_ranges,
vector<Module::Function*>* functions) {
for (vector<Module::Function*>::const_iterator func_it = functions->cbegin();
func_it != functions->cend();
func_it++)
{
Module::Function* func = *func_it;
vector<Module::Range>& ranges = func->ranges;
for (vector<Module::Range>::const_iterator ranges_it = ranges.cbegin();
ranges_it != ranges.cend();
++ranges_it) {
FunctionRange range(*ranges_it, func);
if (range.size != 0) {
dest_ranges.push_back(range);
}
}
}
sort(dest_ranges.begin(), dest_ranges.end(),
[](const FunctionRange& fr1, const FunctionRange& fr2) {
return fr1.address < fr2.address;
}
);
}
// Return true if ADDRESS falls within the range of ITEM.
template <class T>
inline bool within(const T& item, Module::Address address) {
// Because Module::Address is unsigned, and unsigned arithmetic
// wraps around, this will be false if ADDRESS falls before the
// start of ITEM, or if it falls after ITEM's end.
return address - item.address < item.size;
}
}
void DwarfCUToModule::AssignLinesToFunctions() {
vector<Module::Function*>* functions = &cu_context_->functions;
WarningReporter* reporter = cu_context_->reporter;
// This would be simpler if we assumed that source line entries
// don't cross function boundaries. However, there's no real reason
// to assume that (say) a series of function definitions on the same
// line wouldn't get coalesced into one line number entry. The
// DWARF spec certainly makes no such promises.
//
// So treat the functions and lines as peers, and take the trouble
// to compute their ranges' intersections precisely. In any case,
// the hair here is a constant factor for performance; the
// complexity from here on out is linear.
// Put both our functions and lines in order by address.
std::sort(functions->begin(), functions->end(),
Module::Function::CompareByAddress);
std::sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
// The last line that we used any piece of. We use this only for
// generating warnings.
const Module::Line* last_line_used = NULL;
// The last function and line we warned about --- so we can avoid
// doing so more than once.
const Module::Function* last_function_cited = NULL;
const Module::Line* last_line_cited = NULL;
// Prepare a sorted list of ranges with range-to-function mapping
vector<FunctionRange> sorted_ranges;
FillSortedFunctionRanges(sorted_ranges, functions);
// Make a single pass through both the range and line vectors from lower to
// higher addresses, populating each range's function lines vector with lines
// from our lines_ vector that fall within the range.
vector<FunctionRange>::iterator range_it = sorted_ranges.begin();
vector<Module::Line>::const_iterator line_it = lines_.begin();
Module::Address current;
// Pointers to the referents of func_it and line_it, or NULL if the
// iterator is at the end of the sequence.
FunctionRange* range;
const Module::Line* line;
// Start current at the beginning of the first line or function,
// whichever is earlier.
if (range_it != sorted_ranges.end() && line_it != lines_.end()) {
range = &*range_it;
line = &*line_it;
current = std::min(range->address, line->address);
} else if (line_it != lines_.end()) {
range = NULL;
line = &*line_it;
current = line->address;
} else if (range_it != sorted_ranges.end()) {
range = &*range_it;
line = NULL;
current = range->address;
} else {
return;
}
// Some dwarf producers handle linker-removed functions by using -1 as a
// tombstone in the line table. So the end marker can be -1.
if (current == Module::kMaxAddress)
return;
while (range || line) {
// This loop has two invariants that hold at the top.
//
// First, at least one of the iterators is not at the end of its
// sequence, and those that are not refer to the earliest
// range or line that contains or starts after CURRENT.
//
// Note that every byte is in one of four states: it is covered
// or not covered by a range, and, independently, it is
// covered or not covered by a line.
//
// The second invariant is that CURRENT refers to a byte whose
// state is different from its predecessor, or it refers to the
// first byte in the address space. In other words, CURRENT is
// always the address of a transition.
//
// Note that, although each iteration advances CURRENT from one
// transition address to the next in each iteration, it might
// not advance the iterators. Suppose we have a range that
// starts with a line, has a gap, and then a second line, and
// suppose that we enter an iteration with CURRENT at the end of
// the first line. The next transition address is the start of
// the second line, after the gap, so the iteration should
// advance CURRENT to that point. At the head of that iteration,
// the invariants require that the line iterator be pointing at
// the second line. But this is also true at the head of the
// next. And clearly, the iteration must not change the range
// iterator. So neither iterator moves.
// Assert the first invariant (see above).
assert(!range || current < range->address || within(*range, current));
assert(!line || current < line->address || within(*line, current));
// The next transition after CURRENT.
Module::Address next_transition;
// Figure out which state we're in, add lines or warn, and compute
// the next transition address.
if (range && current >= range->address) {
if (line && current >= line->address) {
// Covered by both a line and a range.
Module::Address range_left = range->size - (current - range->address);
Module::Address line_left = line->size - (current - line->address);
// This may overflow, but things work out.
next_transition = current + std::min(range_left, line_left);
Module::Line l = *line;
l.address = current;
l.size = next_transition - current;
range->AddLine(l);
last_line_used = line;
} else {
// Covered by a range, but no line.
if (range->function != last_function_cited) {
reporter->UncoveredFunction(*(range->function));
last_function_cited = range->function;
}
if (line && within(*range, line->address))
next_transition = line->address;
else
// If this overflows, we'll catch it below.
next_transition = range->address + range->size;
}
} else {
if (line && current >= line->address) {
// Covered by a line, but no range.
//
// If GCC emits padding after one function to align the start
// of the next, then it will attribute the padding
// instructions to the last source line of function (to reduce
// the size of the line number info), but omit it from the
// DW_AT_{low,high}_pc range given in .debug_info (since it
// costs nothing to be precise there). If we did use at least
// some of the line we're about to skip, and it ends at the
// start of the next function, then assume this is what
// happened, and don't warn.
if (line != last_line_cited
&& !(range
&& line == last_line_used
&& range->address - line->address == line->size)) {
reporter->UncoveredLine(*line);
last_line_cited = line;
}
if (range && within(*line, range->address))
next_transition = range->address;
else
// If this overflows, we'll catch it below.
next_transition = line->address + line->size;
} else {
// Covered by neither a range nor a line. By the invariant,
// both range and line begin after CURRENT. The next transition
// is the start of the next range or next line, whichever
// is earliest.
assert(range || line);
if (range && line)
next_transition = std::min(range->address, line->address);
else if (range)
next_transition = range->address;
else
next_transition = line->address;
}
}
// If a function or line abuts the end of the address space, then
// next_transition may end up being zero, in which case we've completed
// our pass. Handle that here, instead of trying to deal with it in
// each place we compute next_transition.
// Some dwarf producers handle linker-removed functions by using -1 as a
// tombstone in the line table. So the end marker can be -1.
if (!next_transition || next_transition == Module::kMaxAddress)
break;
// Advance iterators as needed. If lines overlap or functions overlap,
// then we could go around more than once. We don't worry too much
// about what result we produce in that case, just as long as we don't
// hang or crash.
while (range_it != sorted_ranges.end()
&& next_transition >= range_it->address
&& !within(*range_it, next_transition))
range_it++;
range = (range_it != sorted_ranges.end()) ? &(*range_it) : NULL;
while (line_it != lines_.end()
&& next_transition >= line_it->address
&& !within(*line_it, next_transition))
line_it++;
line = (line_it != lines_.end()) ? &*line_it : NULL;
// We must make progress.
assert(next_transition > current);
current = next_transition;
}
}
void DwarfCUToModule::AssignFilesToInlines() {
for (auto iter : files_) {
cu_context_->file_context->module_->inline_origin_map
.AssignFilesToInlineOrigins(cu_context_->inline_origins[iter.first],
iter.second);
}
}
void DwarfCUToModule::Finish() {
// Assembly language files have no function data, and that gives us
// no place to store our line numbers (even though the GNU toolchain
// will happily produce source line info for assembly language
// files). To avoid spurious warnings about lines we can't assign
// to functions, skip CUs in languages that lack functions.
if (!cu_context_->language->HasFunctions())
return;
// Read source line info, if we have any.
if (has_source_line_info_)
ReadSourceLines(source_line_offset_);
vector<Module::Function*>* functions = &cu_context_->functions;
// Dole out lines to the appropriate functions.
AssignLinesToFunctions();
AssignFilesToInlines();
// Add our functions, which now have source lines assigned to them,
// to module_, and remove duplicate functions.
for (Module::Function* func : *functions)
if (!cu_context_->file_context->module_->AddFunction(func)) {
auto iter = cu_context_->spec_function_offsets.find(func);
if (iter != cu_context_->spec_function_offsets.end())
cu_context_->file_context->file_private_->forward_ref_die_to_func.erase(
iter->second);
delete func;
}
// Ownership of the function objects has shifted from cu_context to
// the Module.
functions->clear();
cu_context_->file_context->ClearSpecifications();
}
bool DwarfCUToModule::StartCompilationUnit(uint64_t offset,
uint8_t address_size,
uint8_t offset_size,
uint64_t cu_length,
uint8_t dwarf_version) {
cu_context_->version = dwarf_version;
return dwarf_version >= 2;
}
bool DwarfCUToModule::StartRootDIE(uint64_t offset, enum DwarfTag tag) {
// We don't deal with partial compilation units (the only other tag
// likely to be used for root DIE).
return (tag == DW_TAG_compile_unit
|| tag == DW_TAG_skeleton_unit);
}
} // namespace google_breakpad