blob: cf1d7239edf9735264f455af84c84fae3d9ec5ae [file] [log] [blame]
/* -*- mode: C; c-basic-offset: 3; -*- */
/*--------------------------------------------------------------------*/
/*--- Read DWARF3/4 ".debug_info" sections (DIE trees). ---*/
/*--- readdwarf3.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2008-2013 OpenWorks LLP
info@open-works.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
Neither the names of the U.S. Department of Energy nor the
University of California nor the names of its contributors may be
used to endorse or promote products derived from this software
without prior written permission.
*/
#if defined(VGO_linux) || defined(VGO_darwin)
/* REFERENCE (without which this code will not make much sense):
DWARF Debugging Information Format, Version 3,
dated 20 December 2005 (the "D3 spec").
Available at http://www.dwarfstd.org/Dwarf3.pdf. There's also a
.doc (MS Word) version, but for some reason the section numbers
between the Word and PDF versions differ by 1 in the first digit.
All section references in this code are to the PDF version.
CURRENT HACKS:
DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
assumed to mean "const void" or "volatile void" respectively.
GDB appears to interpret them like this, anyway.
In many cases it is important to know the svma of a CU (the "base
address of the CU", as the D3 spec calls it). There are some
situations in which the spec implies this value is unknown, but the
Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
merely zero when not explicitly stated. So we too have to make
that assumption.
POTENTIAL BUG? Spotted 6 Sept 08. Why doesn't
unitary_range_list() bias the resulting range list in the same way
that its more general cousin, get_range_list(), does? I don't
know.
TODO, 2008 Feb 17:
get rid of cu_svma_known and document the assumed-zero svma hack.
ML_(sizeOfType): differentiate between zero sized types and types
for which the size is unknown. Is this important? I don't know.
DW_TAG_array_types: deal with explicit sizes (currently we compute
the size from the bounds and the element size, although that's
fragile, if the bounds incompletely specified, or completely
absent)
Document reason for difference (by 1) of stack preening depth in
parse_var_DIE vs parse_type_DIE.
Don't hand to ML_(addVars), vars whose locations are entirely in
registers (DW_OP_reg*). This is merely a space-saving
optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
expressions correctly, by failing to evaluate them and hence
effectively ignoring the variable with which they are associated.
Deal with DW_TAG_array_types which have element size != stride
In some cases, the info for a variable is split between two
different DIEs (generally a declarer and a definer). We punt on
these. Could do better here.
The 'data_bias' argument passed to the expression evaluator
(ML_(evaluate_Dwarf3_Expr)) should really be changed to a
MaybeUWord, to make it clear when we do vs don't know what it is
for the evaluation of an expression. At the moment zero is passed
for this parameter in the don't know case. That's a bit fragile
and obscure; using a MaybeUWord would be clearer.
POTENTIAL PERFORMANCE IMPROVEMENTS:
Currently, duplicate removal and all other queries for the type
entities array is done using cuOffset-based pointing, which
involves a binary search (VG_(lookupXA)) for each access. This is
wildly inefficient, although simple. It would be better to
translate all the cuOffset-based references (iow, all the "R" and
"Rs" fields in the TyEnts in 'tyents') to direct index numbers in
'tyents' right at the start of dedup_types(), and use direct
indexing (VG_(indexXA)) wherever possible after that.
cmp__XArrays_of_AddrRange is also a performance bottleneck. Move
VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
points, and possibly also make an _UNCHECKED version which skips
the range checks in performance-critical situations such as this.
Handle interaction between read_DIE and parse_{var,type}_DIE
better. Currently read_DIE reads the entire DIE just to find where
the end is (and for debug printing), so that it can later reliably
move the cursor to the end regardless of what parse_{var,type}_DIE
do. This means many DIEs (most, even?) are read twice. It would
be smarter to make parse_{var,type}_DIE return a Bool indicating
whether or not they advanced the DIE cursor, and only if they
didn't should read_DIE itself read through the DIE.
ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
zero variables in their .vars XArray. Rather than have an XArray
with zero elements (which uses 2 malloc'd blocks), allow the .vars
pointer to be NULL in this case.
More generally, reduce the amount of memory allocated and freed
while reading Dwarf3 type/variable information. Even modest (20MB)
objects cause this module to allocate and free hundreds of
thousands of small blocks, and ML_(arena_malloc) and its various
groupies always show up at the top of performance profiles. */
#include "pub_core_basics.h"
#include "pub_core_debuginfo.h"
#include "pub_core_libcbase.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcprint.h"
#include "pub_core_libcsetjmp.h" // setjmp facilities
#include "pub_core_hashtable.h"
#include "pub_core_options.h"
#include "pub_core_tooliface.h" /* VG_(needs) */
#include "pub_core_xarray.h"
#include "pub_core_wordfm.h"
#include "priv_misc.h" /* dinfo_zalloc/free */
#include "priv_image.h"
#include "priv_tytypes.h"
#include "priv_d3basics.h"
#include "priv_storage.h"
#include "priv_readdwarf3.h" /* self */
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Basic machinery for parsing DIEs. ---*/
/*--- ---*/
/*------------------------------------------------------------*/
#define TRACE_D3(format, args...) \
if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
#define TD3 (UNLIKELY(td3))
#define D3_INVALID_CUOFF ((UWord)(-1UL))
#define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
typedef
struct {
DiSlice sli; // to which this cursor applies
DiOffT sli_next; // offset in underlying DiImage; must be >= sli.ioff
void (*barf)( const HChar* ) __attribute__((noreturn));
const HChar* barfstr;
}
Cursor;
static inline Bool is_sane_Cursor ( Cursor* c ) {
if (!c) return False;
if (!c->barf) return False;
if (!c->barfstr) return False;
if (!ML_(sli_is_valid)(c->sli)) return False;
if (c->sli.ioff == DiOffT_INVALID) return False;
if (c->sli_next < c->sli.ioff) return False;
return True;
}
// Initialise a cursor from a DiSlice (ELF section, really) so as to
// start reading at offset |sli_initial_offset| from the start of the
// slice.
static void init_Cursor ( /*OUT*/Cursor* c,
DiSlice sli,
ULong sli_initial_offset,
__attribute__((noreturn)) void (*barf)(const HChar*),
const HChar* barfstr )
{
vg_assert(c);
VG_(bzero_inline)(c, sizeof(*c));
c->sli = sli;
c->sli_next = c->sli.ioff + sli_initial_offset;
c->barf = barf;
c->barfstr = barfstr;
vg_assert(is_sane_Cursor(c));
}
static Bool is_at_end_Cursor ( Cursor* c ) {
vg_assert(is_sane_Cursor(c));
return c->sli_next >= c->sli.ioff + c->sli.szB;
}
static inline ULong get_position_of_Cursor ( Cursor* c ) {
vg_assert(is_sane_Cursor(c));
return c->sli_next - c->sli.ioff;
}
static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
c->sli_next = c->sli.ioff + pos;
vg_assert(is_sane_Cursor(c));
}
static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
c->sli_next += delta;
vg_assert(is_sane_Cursor(c));
}
static /*signed*/Long get_remaining_length_Cursor ( Cursor* c ) {
vg_assert(is_sane_Cursor(c));
return c->sli.ioff + c->sli.szB - c->sli_next;
}
//static void* get_address_of_Cursor ( Cursor* c ) {
// vg_assert(is_sane_Cursor(c));
// return &c->region_start_img[ c->region_next ];
//}
static DiCursor get_DiCursor_from_Cursor ( Cursor* c ) {
return mk_DiCursor(c->sli.img, c->sli_next);
}
/* FIXME: document assumptions on endianness for
get_UShort/UInt/ULong. */
static inline UChar get_UChar ( Cursor* c ) {
UChar r;
vg_assert(is_sane_Cursor(c));
if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
c->barf(c->barfstr);
/*NOTREACHED*/
vg_assert(0);
}
r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
c->sli_next += sizeof(UChar);
return r;
}
static UShort get_UShort ( Cursor* c ) {
UShort r;
vg_assert(is_sane_Cursor(c));
if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
c->barf(c->barfstr);
/*NOTREACHED*/
vg_assert(0);
}
r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
c->sli_next += sizeof(UShort);
return r;
}
static UInt get_UInt ( Cursor* c ) {
UInt r;
vg_assert(is_sane_Cursor(c));
if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
c->barf(c->barfstr);
/*NOTREACHED*/
vg_assert(0);
}
r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
c->sli_next += sizeof(UInt);
return r;
}
static ULong get_ULong ( Cursor* c ) {
ULong r;
vg_assert(is_sane_Cursor(c));
if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
c->barf(c->barfstr);
/*NOTREACHED*/
vg_assert(0);
}
r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
c->sli_next += sizeof(ULong);
return r;
}
static ULong get_ULEB128 ( Cursor* c ) {
ULong result;
Int shift;
UChar byte;
/* unroll first iteration */
byte = get_UChar( c );
result = (ULong)(byte & 0x7f);
if (LIKELY(!(byte & 0x80))) return result;
shift = 7;
/* end unroll first iteration */
do {
byte = get_UChar( c );
result |= ((ULong)(byte & 0x7f)) << shift;
shift += 7;
} while (byte & 0x80);
return result;
}
static Long get_SLEB128 ( Cursor* c ) {
ULong result = 0;
Int shift = 0;
UChar byte;
do {
byte = get_UChar(c);
result |= ((ULong)(byte & 0x7f)) << shift;
shift += 7;
} while (byte & 0x80);
if (shift < 64 && (byte & 0x40))
result |= -(1ULL << shift);
return result;
}
/* Assume 'c' points to the start of a string. Return a DiCursor of
whatever it points at, and advance it past the terminating zero.
This makes it safe for the caller to then copy the string with
ML_(addStr), since (w.r.t. image overruns) the process of advancing
past the terminating zero will already have "vetted" the string. */
static DiCursor get_AsciiZ ( Cursor* c ) {
UChar uc;
DiCursor res = get_DiCursor_from_Cursor(c);
do { uc = get_UChar(c); } while (uc != 0);
return res;
}
static ULong peek_ULEB128 ( Cursor* c ) {
DiOffT here = c->sli_next;
ULong r = get_ULEB128( c );
c->sli_next = here;
return r;
}
static UChar peek_UChar ( Cursor* c ) {
DiOffT here = c->sli_next;
UChar r = get_UChar( c );
c->sli_next = here;
return r;
}
static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
}
static UWord get_UWord ( Cursor* c ) {
vg_assert(sizeof(UWord) == sizeof(void*));
if (sizeof(UWord) == 4) return get_UInt(c);
if (sizeof(UWord) == 8) return get_ULong(c);
vg_assert(0);
}
/* Read a DWARF3 'Initial Length' field */
static ULong get_Initial_Length ( /*OUT*/Bool* is64,
Cursor* c,
const HChar* barfMsg )
{
ULong w64;
UInt w32;
*is64 = False;
w32 = get_UInt( c );
if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
c->barf( barfMsg );
}
else if (w32 == 0xFFFFFFFF) {
*is64 = True;
w64 = get_ULong( c );
} else {
*is64 = False;
w64 = (ULong)w32;
}
return w64;
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- "CUConst" structure ---*/
/*--- ---*/
/*------------------------------------------------------------*/
typedef
struct _name_form {
ULong at_name; // Dwarf Attribute name
ULong at_form; // Dward Attribute form
UInt skip_szB; // Nr of bytes skippable from here ...
UInt next_nf; // ... to reach this attr/form index in the g_abbv.nf
} name_form;
/* skip_szB and n_nf are used to optimise the skipping of uninteresting DIEs.
Each name_form maintains how many (fixed) nr of bytes can be skipped from
the beginning of this form till the next attr/form to look at.
The next form to look can be:
an 'interesting' attr/form to read while skipping a DIE
(currently, this is only DW_AT_sibling)
or
a variable length form which must be read to be skipped.
For a variable length form, the skip_szB will be equal to VARSZ_FORM.
Note: this technique could also be used to speed up the parsing
of DIEs : for each parser kind, we could have the nr of bytes
to skip to directly reach the interesting form(s) for the parser. */
typedef
struct _g_abbv {
struct _g_abbv *next; // read/write by hash table.
UWord abbv_code; // key, read by hash table
ULong atag;
ULong has_children;
name_form nf[0];
/* Variable-length array of name/form pairs, terminated
by a 0/0 pair.
The skip_szB/next_nf allows to skip efficiently a DIE
described by this g_abbv; */
} g_abbv;
/* Holds information that is constant through the parsing of a
Compilation Unit. This is basically plumbed through to
everywhere. */
typedef
struct {
/* Call here if anything goes wrong */
void (*barf)( const HChar* ) __attribute__((noreturn));
/* Is this 64-bit DWARF ? */
Bool is_dw64;
/* Which DWARF version ? (2, 3 or 4) */
UShort version;
/* Length of this Compilation Unit, as stated in the
.unit_length :: InitialLength field of the CU Header.
However, this size (as specified by the D3 spec) does not
include the size of the .unit_length field itself, which is
either 4 or 12 bytes (32-bit or 64-bit Dwarf3). That value
can be obtained through the expression ".is_dw64 ? 12 : 4". */
ULong unit_length;
/* Offset of start of this unit in .debug_info */
UWord cu_start_offset;
/* SVMA for this CU. In the D3 spec, is known as the "base
address of the compilation unit (last para sec 3.1.1).
Needed for (amongst things) interpretation of location-list
values. */
Addr cu_svma;
Bool cu_svma_known;
/* The debug_abbreviations table to be used for this Unit */
//UChar* debug_abbv;
/* Upper bound on size thereof (an overestimate, in general) */
//UWord debug_abbv_maxszB;
/* A bounded area of the image, to be used as the
debug_abbreviations table tobe used for this Unit. */
DiSlice debug_abbv;
/* Image information for various sections. */
DiSlice escn_debug_str;
DiSlice escn_debug_ranges;
DiSlice escn_debug_loc;
DiSlice escn_debug_line;
DiSlice escn_debug_info;
DiSlice escn_debug_types;
DiSlice escn_debug_info_alt;
DiSlice escn_debug_str_alt;
/* How much to add to .debug_types resp. alternate .debug_info offsets
in cook_die*. */
UWord types_cuOff_bias;
UWord alt_cuOff_bias;
/* --- Needed so we can add stuff to the string table. --- */
struct _DebugInfo* di;
/* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
VgHashTable ht_abbvs;
/* True if this came from .debug_types; otherwise it came from
.debug_info. */
Bool is_type_unit;
/* For a unit coming from .debug_types, these hold the TU's type
signature and the uncooked DIE offset of the TU's signatured
type. For a unit coming from .debug_info, these are unused. */
ULong type_signature;
ULong type_offset;
/* Signatured type hash; computed once and then shared by all
CUs. */
VgHashTable signature_types;
/* True if this came from alternate .debug_info; otherwise
it came from normal .debug_info or .debug_types. */
Bool is_alt_info;
}
CUConst;
/* Return the cooked value of DIE depending on whether CC represents a
.debug_types unit. To cook a DIE, we pretend that the .debug_info,
.debug_types and optional alternate .debug_info sections form
a contiguous whole, so that DIEs coming from .debug_types are numbered
starting at the end of .debug_info and DIEs coming from alternate
.debug_info are numbered starting at the end of .debug_types. */
static UWord cook_die( CUConst* cc, UWord die )
{
if (cc->is_type_unit)
die += cc->types_cuOff_bias;
else if (cc->is_alt_info)
die += cc->alt_cuOff_bias;
return die;
}
/* Like cook_die, but understand that DIEs coming from a
DW_FORM_ref_sig8 reference are already cooked. Also, handle
DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
as reference to alternate .debug_info. */
static UWord cook_die_using_form( CUConst *cc, UWord die, DW_FORM form)
{
if (form == DW_FORM_ref_sig8)
return die;
if (form == DW_FORM_GNU_ref_alt)
return die + cc->alt_cuOff_bias;
return cook_die( cc, die );
}
/* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
came from the .debug_types section and *ALT_FLAG to true if the DIE
came from alternate .debug_info section. */
static UWord uncook_die( CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
Bool *alt_flag )
{
*alt_flag = False;
*type_flag = False;
/* The use of escn_debug_{info,types}.szB seems safe to me even if
escn_debug_{info,types} are DiSlice_INVALID (meaning the
sections were not found), because DiSlice_INVALID.szB is always
zero. That said, it seems unlikely we'd ever get here if
.debug_info or .debug_types were missing. */
if (die >= cc->escn_debug_info.szB) {
if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
*alt_flag = True;
die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
} else {
*type_flag = True;
die -= cc->escn_debug_info.szB;
}
}
return die;
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Helper functions for Guarded Expressions ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/* Parse the location list starting at img-offset 'debug_loc_offset'
in .debug_loc. Results are biased with 'svma_of_referencing_CU'
and so I believe are correct SVMAs for the object as a whole. This
function allocates the UChar*, and the caller must deallocate it.
The resulting block is in so-called Guarded-Expression format.
Guarded-Expression format is similar but not identical to the DWARF3
location-list format. The format of each returned block is:
UChar biasMe;
UChar isEnd;
followed by zero or more of
(Addr aMin; Addr aMax; UShort nbytes; ..bytes..; UChar isEnd)
'..bytes..' is an standard DWARF3 location expression which is
valid when aMin <= pc <= aMax (possibly after suitable biasing).
The number of bytes in '..bytes..' is nbytes.
The end of the sequence is marked by an isEnd == 1 value. All
previous isEnd values must be zero.
biasMe is 1 if the aMin/aMax fields need this DebugInfo's
text_bias added before use, and 0 if the GX is this is not
necessary (is ready to go).
Hence the block can be quickly parsed and is self-describing. Note
that aMax is 1 less than the corresponding value in a DWARF3
location list. Zero length ranges, with aMax == aMin-1, are not
allowed.
*/
/* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
it more logically belongs. */
/* Apply a text bias to a GX. */
static void bias_GX ( /*MOD*/GExpr* gx, struct _DebugInfo* di )
{
UShort nbytes;
UChar* p = &gx->payload[0];
UChar* pA;
UChar uc;
uc = *p++; /*biasMe*/
if (uc == 0)
return;
vg_assert(uc == 1);
p[-1] = 0; /* mark it as done */
while (True) {
uc = *p++;
if (uc == 1)
break; /*isEnd*/
vg_assert(uc == 0);
/* t-bias aMin */
pA = (UChar*)p;
ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
p += sizeof(Addr);
/* t-bias aMax */
pA = (UChar*)p;
ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
p += sizeof(Addr);
/* nbytes, and actual expression */
nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
p += nbytes;
}
}
__attribute__((noinline))
static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
{
SizeT bytesReqd;
GExpr* gx;
UChar *p, *pstart;
vg_assert(sizeof(UWord) == sizeof(Addr));
vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
bytesReqd
= sizeof(UChar) /*biasMe*/ + sizeof(UChar) /*!isEnd*/
+ sizeof(UWord) /*aMin*/ + sizeof(UWord) /*aMax*/
+ sizeof(UShort) /*nbytes*/ + (SizeT)nbytes
+ sizeof(UChar); /*isEnd*/
gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
sizeof(GExpr) + bytesReqd );
vg_assert(gx);
p = pstart = &gx->payload[0];
p = ML_(write_UChar)(p, 0); /*biasMe*/
p = ML_(write_UChar)(p, 0); /*!isEnd*/
p = ML_(write_Addr)(p, 0); /*aMin*/
p = ML_(write_Addr)(p, ~0); /*aMax*/
p = ML_(write_UShort)(p, nbytes); /*nbytes*/
ML_(cur_read_get)(p, block, nbytes); p += nbytes;
p = ML_(write_UChar)(p, 1); /*isEnd*/
vg_assert( (SizeT)(p - pstart) == bytesReqd);
vg_assert( &gx->payload[bytesReqd]
== ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
return gx;
}
__attribute__((noinline))
static GExpr* make_general_GX ( CUConst* cc,
Bool td3,
ULong debug_loc_offset,
Addr svma_of_referencing_CU )
{
Addr base;
Cursor loc;
XArray* xa; /* XArray of UChar */
GExpr* gx;
Word nbytes;
vg_assert(sizeof(UWord) == sizeof(Addr));
if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
cc->barf("make_general_GX: .debug_loc is empty/missing");
init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
"Overrun whilst reading .debug_loc section(2)" );
set_position_of_Cursor( &loc, debug_loc_offset );
TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
debug_loc_offset, (ULong)get_DiCursor_from_Cursor(&loc).ioff );
/* Who frees this xa? It is freed before this fn exits. */
xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
ML_(dinfo_free),
sizeof(UChar) );
{ UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
base = 0;
while (True) {
Bool acquire;
UWord len;
/* Read a (host-)word pair. This is something of a hack since
the word size to read is really dictated by the ELF file;
however, we assume we're reading a file with the same
word-sizeness as the host. Reasonably enough. */
UWord w1 = get_UWord( &loc );
UWord w2 = get_UWord( &loc );
TRACE_D3(" %08lx %08lx\n", w1, w2);
if (w1 == 0 && w2 == 0)
break; /* end of list */
if (w1 == -1UL) {
/* new value for 'base' */
base = w2;
continue;
}
/* else a location expression follows */
/* else enumerate [w1+base, w2+base) */
/* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
(sec 2.17.2) */
if (w1 > w2) {
TRACE_D3("negative range is for .debug_loc expr at "
"file offset %llu\n",
debug_loc_offset);
cc->barf( "negative range in .debug_loc section" );
}
/* ignore zero length ranges */
acquire = w1 < w2;
len = (UWord)get_UShort( &loc );
if (acquire) {
UWord w;
UShort s;
UChar c;
c = 0; /* !isEnd*/
VG_(addBytesToXA)( xa, &c, sizeof(c) );
w = w1 + base + svma_of_referencing_CU;
VG_(addBytesToXA)( xa, &w, sizeof(w) );
w = w2 -1 + base + svma_of_referencing_CU;
VG_(addBytesToXA)( xa, &w, sizeof(w) );
s = (UShort)len;
VG_(addBytesToXA)( xa, &s, sizeof(s) );
}
while (len > 0) {
UChar byte = get_UChar( &loc );
TRACE_D3("%02x", (UInt)byte);
if (acquire)
VG_(addBytesToXA)( xa, &byte, 1 );
len--;
}
TRACE_D3("\n");
}
{ UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
nbytes = VG_(sizeXA)( xa );
vg_assert(nbytes >= 1);
gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
vg_assert(gx);
VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
vg_assert( &gx->payload[nbytes]
== ((UChar*)gx) + sizeof(GExpr) + nbytes );
VG_(deleteXA)( xa );
TRACE_D3("}\n");
return gx;
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Helper functions for range lists and CU headers ---*/
/*--- ---*/
/*------------------------------------------------------------*/
/* Denotes an address range. Both aMin and aMax are included in the
range; hence a complete range is (0, ~0) and an empty range is any
(X, X-1) for X > 0.*/
typedef
struct { Addr aMin; Addr aMax; }
AddrRange;
/* Generate an arbitrary structural total ordering on
XArray* of AddrRange. */
static Word cmp__XArrays_of_AddrRange ( XArray* rngs1, XArray* rngs2 )
{
Word n1, n2, i;
tl_assert(rngs1 && rngs2);
n1 = VG_(sizeXA)( rngs1 );
n2 = VG_(sizeXA)( rngs2 );
if (n1 < n2) return -1;
if (n1 > n2) return 1;
for (i = 0; i < n1; i++) {
AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
if (rng1->aMin < rng2->aMin) return -1;
if (rng1->aMin > rng2->aMin) return 1;
if (rng1->aMax < rng2->aMax) return -1;
if (rng1->aMax > rng2->aMax) return 1;
}
return 0;
}
__attribute__((noinline))
static XArray* /* of AddrRange */ empty_range_list ( void )
{
XArray* xa; /* XArray of AddrRange */
/* Who frees this xa? varstack_preen() does. */
xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
ML_(dinfo_free),
sizeof(AddrRange) );
return xa;
}
__attribute__((noinline))
static XArray* unitary_range_list ( Addr aMin, Addr aMax )
{
XArray* xa;
AddrRange pair;
vg_assert(aMin <= aMax);
/* Who frees this xa? varstack_preen() does. */
xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.url.1",
ML_(dinfo_free),
sizeof(AddrRange) );
pair.aMin = aMin;
pair.aMax = aMax;
VG_(addToXA)( xa, &pair );
return xa;
}
/* Enumerate the address ranges starting at img-offset
'debug_ranges_offset' in .debug_ranges. Results are biased with
'svma_of_referencing_CU' and so I believe are correct SVMAs for the
object as a whole. This function allocates the XArray, and the
caller must deallocate it. */
__attribute__((noinline))
static XArray* /* of AddrRange */
get_range_list ( CUConst* cc,
Bool td3,
UWord debug_ranges_offset,
Addr svma_of_referencing_CU )
{
Addr base;
Cursor ranges;
XArray* xa; /* XArray of AddrRange */
AddrRange pair;
if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
|| cc->escn_debug_ranges.szB == 0)
cc->barf("get_range_list: .debug_ranges is empty/missing");
init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
"Overrun whilst reading .debug_ranges section(2)" );
set_position_of_Cursor( &ranges, debug_ranges_offset );
/* Who frees this xa? varstack_preen() does. */
xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
sizeof(AddrRange) );
base = 0;
while (True) {
/* Read a (host-)word pair. This is something of a hack since
the word size to read is really dictated by the ELF file;
however, we assume we're reading a file with the same
word-sizeness as the host. Reasonably enough. */
UWord w1 = get_UWord( &ranges );
UWord w2 = get_UWord( &ranges );
if (w1 == 0 && w2 == 0)
break; /* end of list. */
if (w1 == -1UL) {
/* new value for 'base' */
base = w2;
continue;
}
/* else enumerate [w1+base, w2+base) */
/* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
(sec 2.17.2) */
if (w1 > w2)
cc->barf( "negative range in .debug_ranges section" );
if (w1 < w2) {
pair.aMin = w1 + base + svma_of_referencing_CU;
pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
vg_assert(pair.aMin <= pair.aMax);
VG_(addToXA)( xa, &pair );
}
}
return xa;
}
#define VARSZ_FORM 0xffffffff
static UInt get_Form_szB (CUConst* cc, DW_FORM form );
/* Initialises the hash table of abbreviations.
We do a single scan of the abbv slice to parse and
build all abbreviations, for the following reasons:
* all or most abbreviations will be needed in any case
(at least for var-info reading).
* re-reading each time an abbreviation causes a lot of calls
to get_ULEB128.
* a CU should not have many abbreviations. */
static void init_ht_abbvs (CUConst* cc,
Bool td3)
{
Cursor c;
g_abbv *ta; // temporary abbreviation, reallocated if needed.
UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
UInt ta_nf_n; // nr of pairs in ta->nf that are initialised.
g_abbv *ht_ta; // abbv to insert in hash table.
Int i;
#define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
"Overrun whilst parsing .debug_abbrev section(2)" );
while (True) {
ta->abbv_code = get_ULEB128( &c );
if (ta->abbv_code == 0) break; /* end of the table */
ta->atag = get_ULEB128( &c );
ta->has_children = get_UChar( &c );
ta_nf_n = 0;
while (True) {
if (ta_nf_n >= ta_nf_maxE) {
g_abbv *old_ta = ta;
ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
SZ_G_ABBV(2 * ta_nf_maxE));
ta_nf_maxE = 2 * ta_nf_maxE;
VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
ML_(dinfo_free) (old_ta);
}
ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
ta_nf_n++;
break;
}
ta_nf_n++;
}
// Initialises the skip_szB/next_nf elements : an element at position
// i must contain the sum of its own size + the sizes of all elements
// following i till either the next variable size element, the next
// sibling element or the end of the DIE.
ta->nf[ta_nf_n - 1].skip_szB = 0;
ta->nf[ta_nf_n - 1].next_nf = 0;
for (i = ta_nf_n - 2; i >= 0; i--) {
const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
if (ta->nf[i+1].at_name == DW_AT_sibling
|| ta->nf[i+1].skip_szB == VARSZ_FORM) {
ta->nf[i].skip_szB = form_szB;
ta->nf[i].next_nf = i+1;
} else if (form_szB == VARSZ_FORM) {
ta->nf[i].skip_szB = form_szB;
ta->nf[i].next_nf = i+1;
} else {
ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
ta->nf[i].next_nf = ta->nf[i+1].next_nf;
}
}
ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
if (TD3) {
TRACE_D3(" Adding abbv_code %llu TAG %s [%s] nf %d ",
(ULong) ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
ML_(pp_DW_children)(ht_ta->has_children),
ta_nf_n);
TRACE_D3(" ");
for (i = 0; i < ta_nf_n; i++)
TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
TRACE_D3("\n");
}
}
ML_(dinfo_free) (ta);
#undef SZ_G_ABBV
}
static g_abbv* get_abbv (CUConst* cc, ULong abbv_code)
{
g_abbv *abbv;
abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
if (!abbv)
cc->barf ("abbv_code not found in ht_abbvs table");
return abbv;
}
/* Free the memory allocated in CUConst. */
static void clear_CUConst (CUConst* cc)
{
VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
cc->ht_abbvs = NULL;
}
/* Parse the Compilation Unit header indicated at 'c' and
initialise 'cc' accordingly. */
static __attribute__((noinline))
void parse_CU_Header ( /*OUT*/CUConst* cc,
Bool td3,
Cursor* c,
DiSlice escn_debug_abbv,
Bool type_unit,
Bool alt_info )
{
UChar address_size;
ULong debug_abbrev_offset;
VG_(memset)(cc, 0, sizeof(*cc));
vg_assert(c && c->barf);
cc->barf = c->barf;
/* initial_length field */
cc->unit_length
= get_Initial_Length( &cc->is_dw64, c,
"parse_CU_Header: invalid initial-length field" );
TRACE_D3(" Length: %lld\n", cc->unit_length );
/* version */
cc->version = get_UShort( c );
if (cc->version != 2 && cc->version != 3 && cc->version != 4)
cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
TRACE_D3(" Version: %d\n", (Int)cc->version );
/* debug_abbrev_offset */
debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
if (debug_abbrev_offset >= escn_debug_abbv.szB)
cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
TRACE_D3(" Abbrev Offset: %lld\n", debug_abbrev_offset );
/* address size. If this isn't equal to the host word size, just
give up. This makes it safe to assume elsewhere that
DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
word. */
address_size = get_UChar( c );
if (address_size != sizeof(void*))
cc->barf( "parse_CU_Header: invalid address_size" );
TRACE_D3(" Pointer Size: %d\n", (Int)address_size );
cc->is_type_unit = type_unit;
cc->is_alt_info = alt_info;
if (type_unit) {
cc->type_signature = get_ULong( c );
cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
}
/* Set up cc->debug_abbv to point to the relevant table for this
CU. Set its .szB so that at least we can't read off the end of
the debug_abbrev section -- potentially (and quite likely) too
big, if this isn't the last table in the section, but at least
it's safe.
This amounts to taking debug_abbv_escn and moving the start
position along by debug_abbrev_offset bytes, hence forming a
smaller DiSlice which has the same end point. Since we checked
just above that debug_abbrev_offset is less than the size of
debug_abbv_escn, this should leave us with a nonempty slice. */
vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
cc->debug_abbv = escn_debug_abbv;
cc->debug_abbv.ioff += debug_abbrev_offset;
cc->debug_abbv.szB -= debug_abbrev_offset;
init_ht_abbvs(cc, td3);
}
/* This represents a single signatured type. It maps a type signature
(a ULong) to a cooked DIE offset. Objects of this type are stored
in the type signature hash table. */
typedef
struct D3SignatureType {
struct D3SignatureType *next;
UWord data;
ULong type_signature;
UWord die;
}
D3SignatureType;
/* Record a signatured type in the hash table. */
static void record_signatured_type ( VgHashTable tab,
ULong type_signature,
UWord die )
{
D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
sizeof(D3SignatureType) );
dstype->data = (UWord) type_signature;
dstype->type_signature = type_signature;
dstype->die = die;
VG_(HT_add_node) ( tab, dstype );
}
/* Given a type signature hash table and a type signature, return the
cooked DIE offset of the type. If the type cannot be found, call
BARF. */
static UWord lookup_signatured_type ( VgHashTable tab,
ULong type_signature,
void (*barf)( const HChar* ) __attribute__((noreturn)) )
{
D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
/* This may be unwarranted chumminess with the hash table
implementation. */
while ( dstype != NULL && dstype->type_signature != type_signature)
dstype = dstype->next;
if (dstype == NULL) {
barf("lookup_signatured_type: could not find signatured type");
/*NOTREACHED*/
vg_assert(0);
}
return dstype->die;
}
/* Represents Form data. If szB is 1/2/4/8 then the result is in the
lowest 1/2/4/8 bytes of u.val. If szB is zero or negative then the
result is an image section beginning at u.cur and with size -szB.
No other szB values are allowed. */
typedef
struct {
Long szB; // 1, 2, 4, 8 or non-positive values only.
union { ULong val; DiCursor cur; } u;
}
FormContents;
/* From 'c', get the Form data into 'cts'. Either it gets a 1/2/4/8
byte scalar value, or (a reference to) zero or more bytes starting
at a DiCursor.*/
static
void get_Form_contents ( /*OUT*/FormContents* cts,
CUConst* cc, Cursor* c,
Bool td3, DW_FORM form )
{
VG_(bzero_inline)(cts, sizeof(*cts));
// !!! keep switch in sync with get_Form_szB. The nr of characters read below
// must be computed similarly in get_Form_szB.
// The consistency is verified in trace_DIE.
switch (form) {
case DW_FORM_data1:
cts->u.val = (ULong)(UChar)get_UChar(c);
cts->szB = 1;
TRACE_D3("%u", (UInt)cts->u.val);
break;
case DW_FORM_data2:
cts->u.val = (ULong)(UShort)get_UShort(c);
cts->szB = 2;
TRACE_D3("%u", (UInt)cts->u.val);
break;
case DW_FORM_data4:
cts->u.val = (ULong)(UInt)get_UInt(c);
cts->szB = 4;
TRACE_D3("%u", (UInt)cts->u.val);
break;
case DW_FORM_data8:
cts->u.val = get_ULong(c);
cts->szB = 8;
TRACE_D3("%llu", cts->u.val);
break;
case DW_FORM_sec_offset:
cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
cts->szB = cc->is_dw64 ? 8 : 4;
TRACE_D3("%llu", cts->u.val);
break;
case DW_FORM_sdata:
cts->u.val = (ULong)(Long)get_SLEB128(c);
cts->szB = 8;
TRACE_D3("%lld", (Long)cts->u.val);
break;
case DW_FORM_udata:
cts->u.val = (ULong)(Long)get_ULEB128(c);
cts->szB = 8;
TRACE_D3("%llu", (Long)cts->u.val);
break;
case DW_FORM_addr:
/* note, this is a hack. DW_FORM_addr is defined as getting
a word the size of the target machine as defined by the
address_size field in the CU Header. However,
parse_CU_Header() rejects all inputs except those for
which address_size == sizeof(Word), hence we can just
treat it as a (host) Word. */
cts->u.val = (ULong)(UWord)get_UWord(c);
cts->szB = sizeof(UWord);
TRACE_D3("0x%lx", (UWord)cts->u.val);
break;
case DW_FORM_ref_addr:
/* We make the same word-size assumption as DW_FORM_addr. */
/* What does this really mean? From D3 Sec 7.5.4,
description of "reference", it would appear to reference
some other DIE, by specifying the offset from the
beginning of a .debug_info section. The D3 spec mentions
that this might be in some other shared object and
executable. But I don't see how the name of the other
object/exe is specified.
At least for the DW_FORM_ref_addrs created by icc11, the
references seem to be within the same object/executable.
So for the moment we merely range-check, to see that they
actually do specify a plausible offset within this
object's .debug_info, and return the value unchanged.
In DWARF 2, DW_FORM_ref_addr is address-sized, but in
DWARF 3 and later, it is offset-sized.
*/
if (cc->version == 2) {
cts->u.val = (ULong)(UWord)get_UWord(c);
cts->szB = sizeof(UWord);
} else {
cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
}
TRACE_D3("0x%lx", (UWord)cts->u.val);
if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
if (/* the following is surely impossible, but ... */
!ML_(sli_is_valid)(cc->escn_debug_info)
|| cts->u.val >= (ULong)cc->escn_debug_info.szB) {
/* Hmm. Offset is nonsensical for this object's .debug_info
section. Be safe and reject it. */
cc->barf("get_Form_contents: DW_FORM_ref_addr points "
"outside .debug_info");
}
break;
case DW_FORM_strp: {
/* this is an offset into .debug_str */
UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
if (!ML_(sli_is_valid)(cc->escn_debug_str)
|| uw >= cc->escn_debug_str.szB)
cc->barf("get_Form_contents: DW_FORM_strp "
"points outside .debug_str");
/* FIXME: check the entire string lies inside debug_str,
not just the first byte of it. */
DiCursor str
= ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
if (TD3) {
HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
ML_(dinfo_free)(tmp);
}
cts->u.cur = str;
cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
break;
}
case DW_FORM_string: {
DiCursor str = get_AsciiZ(c);
if (TD3) {
HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
TRACE_D3("%s", tmp);
ML_(dinfo_free)(tmp);
}
cts->u.cur = str;
/* strlen is safe because get_AsciiZ already 'vetted' the
entire string */
cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
break;
}
case DW_FORM_ref1: {
UChar u8 = get_UChar(c);
UWord res = cc->cu_start_offset + (UWord)u8;
cts->u.val = (ULong)res;
cts->szB = sizeof(UWord);
TRACE_D3("<%lx>", res);
break;
}
case DW_FORM_ref2: {
UShort u16 = get_UShort(c);
UWord res = cc->cu_start_offset + (UWord)u16;
cts->u.val = (ULong)res;
cts->szB = sizeof(UWord);
TRACE_D3("<%lx>", res);
break;
}
case DW_FORM_ref4: {
UInt u32 = get_UInt(c);
UWord res = cc->cu_start_offset + (UWord)u32;
cts->u.val = (ULong)res;
cts->szB = sizeof(UWord);
TRACE_D3("<%lx>", res);
break;
}
case DW_FORM_ref8: {
ULong u64 = get_ULong(c);
UWord res = cc->cu_start_offset + (UWord)u64;
cts->u.val = (ULong)res;
cts->szB = sizeof(UWord);
TRACE_D3("<%lx>", res);
break;
}
case DW_FORM_ref_udata: {
ULong u64 = get_ULEB128(c);
UWord res = cc->cu_start_offset + (UWord)u64;
cts->u.val = (ULong)res;
cts->szB = sizeof(UWord);
TRACE_D3("<%lx>", res);
break;
}
case DW_FORM_flag: {
UChar u8 = get_UChar(c);
TRACE_D3("%u", (UInt)u8);
cts->u.val = (ULong)u8;
cts->szB = 1;
break;
}
case DW_FORM_flag_present:
TRACE_D3("1");
cts->u.val = 1;
cts->szB = 1;
break;
case DW_FORM_block1: {
ULong u64b;
ULong u64 = (ULong)get_UChar(c);
DiCursor block = get_DiCursor_from_Cursor(c);
TRACE_D3("%llu byte block: ", u64);
for (u64b = u64; u64b > 0; u64b--) {
UChar u8 = get_UChar(c);
TRACE_D3("%x ", (UInt)u8);
}
cts->u.cur = block;
cts->szB = - (Long)u64;
break;
}
case DW_FORM_block2: {
ULong u64b;
ULong u64 = (ULong)get_UShort(c);
DiCursor block = get_DiCursor_from_Cursor(c);
TRACE_D3("%llu byte block: ", u64);
for (u64b = u64; u64b > 0; u64b--) {
UChar u8 = get_UChar(c);
TRACE_D3("%x ", (UInt)u8);
}
cts->u.cur = block;
cts->szB = - (Long)u64;
break;
}
case DW_FORM_block4: {
ULong u64b;
ULong u64 = (ULong)get_UInt(c);
DiCursor block = get_DiCursor_from_Cursor(c);
TRACE_D3("%llu byte block: ", u64);
for (u64b = u64; u64b > 0; u64b--) {
UChar u8 = get_UChar(c);
TRACE_D3("%x ", (UInt)u8);
}
cts->u.cur = block;
cts->szB = - (Long)u64;
break;
}
case DW_FORM_exprloc:
case DW_FORM_block: {
ULong u64b;
ULong u64 = (ULong)get_ULEB128(c);
DiCursor block = get_DiCursor_from_Cursor(c);
TRACE_D3("%llu byte block: ", u64);
for (u64b = u64; u64b > 0; u64b--) {
UChar u8 = get_UChar(c);
TRACE_D3("%x ", (UInt)u8);
}
cts->u.cur = block;
cts->szB = - (Long)u64;
break;
}
case DW_FORM_ref_sig8: {
ULong u64b;
ULong signature = get_ULong (c);
ULong work = signature;
TRACE_D3("8 byte signature: ");
for (u64b = 8; u64b > 0; u64b--) {
UChar u8 = work & 0xff;
TRACE_D3("%x ", (UInt)u8);
work >>= 8;
}
/* Due to the way that the hash table is constructed, the
resulting DIE offset here is already "cooked". See
cook_die_using_form. */
cts->u.val = lookup_signatured_type (cc->signature_types, signature,
c->barf);
cts->szB = sizeof(UWord);
break;
}
case DW_FORM_indirect:
get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
return;
case DW_FORM_GNU_ref_alt:
cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
cts->szB = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
TRACE_D3("0x%lx", (UWord)cts->u.val);
if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
if (/* the following is surely impossible, but ... */
!ML_(sli_is_valid)(cc->escn_debug_info_alt))
cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
"but no alternate .debug_info");
else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
/* Hmm. Offset is nonsensical for this object's .debug_info
section. Be safe and reject it. */
cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
"outside alternate .debug_info");
}
break;
case DW_FORM_GNU_strp_alt: {
/* this is an offset into alternate .debug_str */
SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
"but no alternate .debug_str");
else if (uw >= cc->escn_debug_str_alt.szB)
cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
"points outside alternate .debug_str");
/* FIXME: check the entire string lies inside debug_str,
not just the first byte of it. */
DiCursor str
= ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
if (TD3) {
HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
ML_(dinfo_free)(tmp);
}
cts->u.cur = str;
cts->szB = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
break;
}
default:
VG_(printf)(
"get_Form_contents: unhandled %d (%s) at <%llx>\n",
form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
c->barf("get_Form_contents: unhandled DW_FORM");
}
}
static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
{
if (is_dw64)
return sizeof(ULong);
else
return sizeof(UInt);
}
#define VARSZ_FORM 0xffffffff
/* If the form is a fixed length form, return the nr of bytes for this form.
If the form is a variable length form, return VARSZ_FORM. */
static
UInt get_Form_szB (CUConst* cc, DW_FORM form )
{
// !!! keep switch in sync with get_Form_contents : the nr of bytes
// read from a cursor by get_Form_contents must be returned by
// the below switch.
// The consistency is verified in trace_DIE.
switch (form) {
case DW_FORM_data1: return 1;
case DW_FORM_data2: return 2;
case DW_FORM_data4: return 4;
case DW_FORM_data8: return 8;
case DW_FORM_sec_offset:
if (cc->is_dw64)
return 8;
else
return 4;
case DW_FORM_sdata:
return VARSZ_FORM;
case DW_FORM_udata:
return VARSZ_FORM;
case DW_FORM_addr: // See hack in get_Form_contents
return sizeof(UWord);
case DW_FORM_ref_addr: // See hack in get_Form_contents
if (cc->version == 2)
return sizeof(UWord);
else
return sizeof_Dwarfish_UWord (cc->is_dw64);
case DW_FORM_strp:
return sizeof_Dwarfish_UWord (cc->is_dw64);
case DW_FORM_string:
return VARSZ_FORM;
case DW_FORM_ref1:
return 1;
case DW_FORM_ref2:
return 2;
case DW_FORM_ref4:
return 4;
case DW_FORM_ref8:
return 8;
case DW_FORM_ref_udata:
return VARSZ_FORM;
case DW_FORM_flag:
return 1;
case DW_FORM_flag_present:
return 0; // !!! special case, no data.
case DW_FORM_block1:
return VARSZ_FORM;
case DW_FORM_block2:
return VARSZ_FORM;
case DW_FORM_block4:
return VARSZ_FORM;
case DW_FORM_exprloc:
case DW_FORM_block:
return VARSZ_FORM;
case DW_FORM_ref_sig8:
return 8 + 8;
case DW_FORM_indirect:
return VARSZ_FORM;
case DW_FORM_GNU_ref_alt:
return sizeof_Dwarfish_UWord(cc->is_dw64);
case DW_FORM_GNU_strp_alt:
return sizeof_Dwarfish_UWord(cc->is_dw64);
default:
VG_(printf)(
"get_Form_szB: unhandled %d (%s)\n",
form, ML_(pp_DW_FORM)(form));
cc->barf("get_Form_contents: unhandled DW_FORM");
}
}
/* Skip a DIE as described by abbv.
If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
static
void skip_DIE (UWord *sibling,
Cursor* c_die,
g_abbv *abbv,
CUConst* cc)
{
UInt nf_i;
FormContents cts;
nf_i = 0;
while (True) {
if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
get_Form_contents( &cts, cc, c_die, False /*td3*/,
(DW_FORM)abbv->nf[nf_i].at_form );
if ( cts.szB > 0 )
*sibling = cts.u.val;
nf_i++;
} else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
get_Form_contents( &cts, cc, c_die, False /*td3*/,
(DW_FORM)abbv->nf[nf_i].at_form );
nf_i++;
} else {
advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
nf_i = abbv->nf[nf_i].next_nf;
}
if (nf_i == 0)
break;
}
}
/*------------------------------------------------------------*/
/*--- ---*/
/*--- Parsing of variable-related DIEs ---*/
/*--- ---*/
/*------------------------------------------------------------*/
typedef
struct _TempVar {
HChar* name; /* in DebugInfo's .strpool */
/* Represent ranges economically. nRanges is the number of
ranges. Cases:
0: .rngOneMin .rngOneMax .manyRanges are all zero
1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
This is merely an optimisation to avoid having to allocate
and free the XArray in the common (98%) of cases where there
is zero or one address ranges. */
UWord nRanges;
Addr rngOneMin;
Addr rngOneMax;
XArray* rngMany; /* of AddrRange. NON-UNIQUE PTR in AR_DINFO. */
/* Do not free .rngMany, since many TempVars will have the same
value. Instead the associated storage is to be freed by
deleting 'rangetree', which stores a single copy of each
range. */
/* --- */
Int level;
UWord typeR; /* a cuOff */
GExpr* gexpr; /* for this variable */
GExpr* fbGX; /* to find the frame base of the enclosing fn, if
any */
UInt fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
Int fLine; /* declaring file line number, or zero */
/* offset in .debug_info, so that abstract instances can be
found to satisfy references from concrete instances. */
UWord dioff;
UWord absOri; /* so the absOri fields refer to dioff fields
in some other, related TempVar. */
}
TempVar;
#define N_D3_VAR_STACK 48
typedef
struct {
/* Contains the range stack: a stack of address ranges, one
stack entry for each nested scope.
Some scope entries are created by function definitions
(DW_AT_subprogram), and for those, we also note the GExpr
derived from its DW_AT_frame_base attribute, if any.
Consequently it should be possible to find, for any
variable's DIE, the GExpr for the the containing function's
DW_AT_frame_base by scanning back through the stack to find
the nearest entry associated with a function. This somewhat
elaborate scheme is provided so as to make it possible to
obtain the correct DW_AT_frame_base expression even in the
presence of nested functions (or to be more precise, in the
presence of nested DW_AT_subprogram DIEs).
*/
Int sp; /* [sp] is innermost active entry; sp==-1 for empty
stack */
XArray* ranges[N_D3_VAR_STACK]; /* XArray of AddrRange */
Int level[N_D3_VAR_STACK]; /* D3 DIE levels */
Bool isFunc[N_D3_VAR_STACK]; /* from DW_AT_subprogram? */
GExpr* fbGX[N_D3_VAR_STACK]; /* if isFunc, contains the FB
expr, else NULL */
/* The fndn_ix file name/dirname table. Is a mapping from dwarf
integer index to the index in di->fndnpool. */
XArray* /* of UInt* */ fndn_ix_Table;
}
D3VarParser;
static void varstack_show ( D3VarParser* parser, const HChar* str ) {
Word i, j;
VG_(printf)(" varstack (%s) {\n", str);
for (i = 0; i <= parser->sp; i++) {
XArray* xa = parser->ranges[i];
vg_assert(xa);
VG_(printf)(" [%ld] (level %d)", i, parser->level[i]);
if (parser->isFunc[i]) {
VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
} else {
vg_assert(parser->fbGX[i] == NULL);
}
VG_(printf)(": ");
if (VG_(sizeXA)( xa ) == 0) {
VG_(printf)("** empty PC range array **");
} else {
for (j = 0; j < VG_(sizeXA)( xa ); j++) {
AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
vg_assert(range);
VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
}
}
VG_(printf)("\n");
}
VG_(printf)(" }\n");
}
/* Remove from the stack, all entries with .level > 'level' */
static
void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
{
Bool changed = False;
vg_assert(parser->sp < N_D3_VAR_STACK);
while (True) {
vg_assert(parser->sp >= -1);
if (parser->sp == -1) break;
if (parser->level[parser->sp] <= level) break;
if (0)
TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
vg_assert(parser->ranges[parser->sp]);
/* Who allocated this xa? get_range_list() or
unitary_range_list(). */
VG_(deleteXA)( parser->ranges[parser->sp] );
parser->ranges[parser->sp] = NULL;
parser->level[parser->sp] = 0;
parser->isFunc[parser->sp] = False;
parser->fbGX[parser->sp] = NULL;
parser->sp--;
changed = True;
}
if (changed && td3)
varstack_show( parser, "after preen" );
}
static void varstack_push ( CUConst* cc,
D3VarParser* parser,
Bool td3,
XArray* ranges, Int level,
Bool isFunc, GExpr* fbGX ) {
if (0)
TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d %p\n",
parser->sp+1, level, ranges);
/* First we need to zap everything >= 'level', as we are about to
replace any previous entry at 'level', so .. */
varstack_preen(parser, /*td3*/False, level-1);
vg_assert(parser->sp >= -1);
vg_assert(parser->sp < N_D3_VAR_STACK);
if (parser->sp == N_D3_VAR_STACK-1)
cc->barf("varstack_push: N_D3_VAR_STACK is too low; "
"increase and recompile");
if (parser->sp >= 0)
vg_assert(parser->level[parser->sp] < level);
parser->sp++;
vg_assert(parser->ranges[parser->sp] == NULL);
vg_assert(parser->level[parser->sp] == 0);
vg_assert(parser->isFunc[parser->sp] == False);
vg_assert(parser->fbGX[parser->sp] == NULL);
vg_assert(ranges != NULL);
if (!isFunc) vg_assert(fbGX == NULL);
parser->ranges[parser->sp] = ranges;
parser->level[parser->sp] = level;
parser->isFunc[parser->sp] = isFunc;
parser->fbGX[parser->sp] = fbGX;
if (TD3)
varstack_show( parser, "after push" );
}
/* cts is derived from a DW_AT_location and so refers either to a
location expression or to a location list. Figure out which, and
in both cases bundle the expression or location list into a
so-called GExpr (guarded expression). */
__attribute__((noinline))
static GExpr* get_GX ( CUConst* cc, Bool td3, const FormContents* cts )
{
GExpr* gexpr = NULL;
if (cts->szB < 0) {
/* represents a non-empty in-line location expression, and
cts->u.cur points at the image bytes */
gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
}
else
if (cts->szB > 0) {
/* represents a location list. cts->u.val is the offset of it
in .debug_loc. */
if (!cc->cu_svma_known)
cc->barf("get_GX: location list, but CU svma is unknown");
gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
}
else {
vg_assert(0); /* else caller is bogus */
}
return gexpr;
}
/* Returns an xarray* of directory names (indexed by the dwarf dirname
integer).
If 'compdir' is NULL, entry [0] will be set to "."
otherwise entry [0] is set to compdir.
Entry [0] basically means "the current directory of the compilation",
whatever that means, according to the DWARF3 spec.
FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
static
XArray* read_dirname_xa (struct _DebugInfo* di, const HChar *compdir,
Cursor *c,
Bool td3 )
{
XArray* dirname_xa; /* xarray of HChar* dirname */
const HChar* dirname;
UInt compdir_len = 0;
dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
sizeof(HChar*) );
if (compdir == NULL) {
dirname = ".";
compdir_len = 0;
} else {
dirname = compdir;
compdir_len = VG_(strlen)(compdir);
}
VG_(addToXA) (dirname_xa, &dirname);
TRACE_D3(" The Directory Table%s\n",
peek_UChar(c) == 0 ? " is empty." : ":" );
while (peek_UChar(c) != 0) {
# define NBUF 4096
static HChar buf[NBUF];
DiCursor cur = get_AsciiZ(c);
HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
TRACE_D3(" %s\n", data_str);
/* If data_str[0] is '/', then 'data' is an absolute path and we
don't mess with it. Otherwise, if we can, construct the
path 'compdir' ++ "/" ++ 'data'. */
if (data_str[0] != '/'
/* not an absolute path */
&& compdir
/* actually got something sensible for compdir */
&& compdir_len
+ VG_(strlen)(data_str) + 5/*paranoia*/ < NBUF
/* it's short enough to concatenate */)
{
buf[0] = 0;
VG_(strcat)(buf, compdir);
VG_(strcat)(buf, "/");
VG_(strcat)(buf, data_str);
vg_assert(VG_(strlen)(buf) < NBUF);
dirname = ML_(addStr)(di,buf,-1);
VG_(addToXA) (dirname_xa, &dirname);
if (0) VG_(printf)("rel path %s\n", buf);
} else {
/* just use 'data'. */
dirname = ML_(addStr)(di,data_str,-1);
VG_(addToXA) (dirname_xa, &dirname);
if (0) VG_(printf)("abs path %s\n", data_str);
}
ML_(dinfo_free)(data_str);
# undef NBUF
}
TRACE_D3 ("\n");
if (get_UChar (c) != 0) {
ML_(symerr)(NULL, True,
"could not get NUL at end of DWARF directory table");
VG_(deleteXA)(dirname_xa);
return NULL;
}
return dirname_xa;
}
static
void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
HChar* compdir,
CUConst* cc, ULong debug_line_offset,
Bool td3 )
{
Bool is_dw64;
Cursor c;
Word i;
UShort version;
UChar opcode_base;
HChar* str;
XArray* dirname_xa; /* xarray of HChar* dirname */
ULong dir_xa_ix; /* Index in dirname_xa, as read from dwarf info. */
HChar* dirname;
UInt fndn_ix;
vg_assert(fndn_ix_Table && cc && cc->barf);
if (!ML_(sli_is_valid)(cc->escn_debug_line)
|| cc->escn_debug_line.szB <= debug_line_offset) {
cc->barf("read_filename_table: .debug_line is missing?");
}
init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
"Overrun whilst reading .debug_line section(1)" );
/* unit_length = */
get_Initial_Length( &is_dw64, &c,
"read_filename_table: invalid initial-length field" );
version = get_UShort( &c );
if (version != 2 && version != 3 && version != 4)
cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
"is currently supported.");
/*header_length = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
/*minimum_instruction_length = */ get_UChar( &c );
if (version >= 4)
/*maximum_operations_per_insn = */ get_UChar( &c );
/*default_is_stmt = */ get_UChar( &c );
/*line_base = (Char)*/ get_UChar( &c );
/*line_range = */ get_UChar( &c );
opcode_base = get_UChar( &c );
/* skip over "standard_opcode_lengths" */
for (i = 1; i < (Word)opcode_base; i++)
(void)get_UChar( &c );
dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
/* Read and record the file names table */
vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
/* Add a dummy index-zero entry. DWARF3 numbers its files
from 1, for some reason. */
fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
VG_(addToXA)( fndn_ix_Table, &fndn_ix );
while (peek_UChar(&c) != 0) {
DiCursor cur = get_AsciiZ(&c);
str = ML_(addStrFromCursor)( cc->di, cur );
dir_xa_ix = get_ULEB128( &c );
if (dirname_xa != NULL
&& dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
else
dirname = NULL;
fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
TRACE_D3(" read_filename_table: %ld fndn_ix %d %s %s\n",
VG_(sizeXA)(fndn_ix_Table), fndn_ix,
dirname, str);
VG_(addToXA)( fndn_ix_Table, &fndn_ix );
(void)get_ULEB128( &c ); /* skip last mod time */
(void)get_ULEB128( &c ); /* file size */
}
/* We're done! The rest of it is not interesting. */
if (dirname_xa != NULL)
VG_(deleteXA)(dirname_xa);
}
/* setup_cu_svma to be called when a cu is found at level 0,
to establish the cu_svma. */
static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
{
Addr cu_svma;
/* We have potentially more than one type of parser parsing the
dwarf information. At least currently, each parser establishes
the cu_svma. So, in case cu_svma_known, we check that the same
result is obtained by the 2nd parsing of the cu.
Alternatively, we could reset cu_svma_known after each parsing
and then check that we only see a single DW_TAG_compile_unit DIE
at level 0, DWARF3 only allows exactly one top level DIE per
CU. */
if (have_lo)
cu_svma = ip_lo;
else {
/* Now, it may be that this DIE doesn't tell us the CU's
SVMA, by way of not having a DW_AT_low_pc. That's OK --
the CU doesn't *have* to have its SVMA specified.
But as per last para D3 spec sec 3.1.1 ("Normal and
Partial Compilation Unit Entries", "If the base address
(viz, the SVMA) is undefined, then any DWARF entry of
structure defined interms of the base address of that
compilation unit is not valid.". So that means, if whilst
processing the children of this top level DIE (or their
children, etc) we see a DW_AT_range, and cu_svma_known is
False, then the DIE that contains it is (per the spec)
invalid, and we can legitimately stop and complain. */
/* .. whereas The Reality is, simply assume the SVMA is zero
if it isn't specified. */
cu_svma = 0;
}
if (cc->cu_svma_known) {
vg_assert (cu_svma == cc->cu_svma);
} else {
cc->cu_svma_known = True;
cc->cu_svma = cu_svma;
if (0)
TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
}
}
static void trace_DIE(
DW_TAG dtag,
UWord posn,
Int level,
UWord saved_die_c_offset,
g_abbv *abbv,
CUConst* cc)
{
Cursor c;
FormContents cts;
UWord sibling = 0;
UInt nf_i;
Bool debug_types_flag;
Bool alt_flag;
Cursor check_skip;
UWord check_sibling = 0;
posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
init_Cursor (&c,
debug_types_flag ? cc->escn_debug_types :
alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
saved_die_c_offset, cc->barf,
"Overrun trace_DIE");
check_skip = c;
VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
debug_types_flag ? " (in .debug_types)" : "",
alt_flag ? " (in alternate .debug_info)" : "");
nf_i = 0;
while (True) {
DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
nf_i++;
if (attr == 0 && form == 0) break;
VG_(printf)(" %18s: ", ML_(pp_DW_AT)(attr));
/* Get the form contents, so as to print them */
get_Form_contents( &cts, cc, &c, True, form );
if (attr == DW_AT_sibling && cts.szB > 0) {
sibling = cts.u.val;
}
VG_(printf)("\t\n");
}
/* Verify that skipping a DIE gives the same displacement as
tracing (i.e. reading) a DIE. If there is an inconsistency in
the nr of bytes read by get_Form_contents and get_Form_szB, this
should be detected by the below. Using --trace-symtab=yes
--read-var-info=yes will ensure all DIEs are systematically
verified. */
skip_DIE (&check_sibling, &check_skip, abbv, cc);
vg_assert (check_sibling == sibling);
vg_assert (get_position_of_Cursor (&check_skip)
== get_position_of_Cursor (&c));
}
__attribute__((noreturn))
static void dump_bad_die_and_barf(
const HChar *whichparser,
DW_TAG dtag,
UWord posn,
Int level,
Cursor* c_die,
UWord saved_die_c_offset,
g_abbv *abbv,
CUConst* cc)
{
trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
VG_(printf)("%s:\n", whichparser);
cc->barf("confused by the above DIE");
}
__attribute__((noinline))
static void bad_DIE_confusion(int linenr)
{
VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
}
#define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
__attribute__((noinline))
static void parse_var_DIE (
/*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
/*MOD*/XArray* /* of TempVar* */ tempvars,
/*MOD*/XArray* /* of GExpr* */ gexprs,
/*MOD*/D3VarParser* parser,
DW_TAG dtag,
UWord posn,
Int level,
Cursor* c_die,
g_abbv *abbv,
CUConst* cc,
Bool td3
)
{
FormContents cts;
UInt nf_i;
UWord saved_die_c_offset = get_position_of_Cursor( c_die );
varstack_preen( parser, td3, level-1 );
if (dtag == DW_TAG_compile_unit
|| dtag == DW_TAG_type_unit
|| dtag == DW_TAG_partial_unit) {
Bool have_lo = False;
Bool have_hi1 = False;
Bool hiIsRelative = False;
Bool have_range = False;
Addr ip_lo = 0;
Addr ip_hi1 = 0;
Addr rangeoff = 0;
HChar *compdir = NULL;
nf_i = 0;
while (True) {
DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
nf_i++;
if (attr == 0 && form == 0) break;
get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
if (attr == DW_AT_low_pc && cts.szB > 0) {
ip_lo = cts.u.val;
have_lo = True;
}
if (attr == DW_AT_high_pc && cts.szB > 0) {
ip_hi1 = cts.u.val;
have_hi1 = True;
if (form != DW_FORM_addr)
hiIsRelative = True;
}
if (attr == DW_AT_ranges && cts.szB > 0) {
rangeoff = cts.u.val;
have_range = True;
}
if (attr == DW_AT_comp_dir) {
if (cts.szB >= 0)
cc->barf("parse_var_DIE compdir: expecting indirect string");
HChar *str = ML_(cur_read_strdup)( cts.u.cur,
"parse_var_DIE.compdir" );
compdir = ML_(addStr)(cc->di, str, -1);
ML_(dinfo_free) (str);
}
if (attr == DW_AT_stmt_list && cts.szB > 0) {
read_filename_table( parser->fndn_ix_Table, compdir,
cc, cts.u.val, td3 );
}
}
if (have_lo && have_hi1 && hiIsRelative)
ip_hi1 += ip_lo;
/* Now, does this give us an opportunity to find this
CU's svma? */
if (level == 0)
setup_cu_svma(cc, have_lo, ip_lo, td3);
/* Do we have something that looks sane? */
if (have_lo && have_hi1 && (!have_range)) {
if (ip_lo < ip_hi1)
varstack_push( cc, parser, td3,
unitary_range_list(ip_lo, ip_hi1 - 1),
level,
False/*isFunc*/, NULL/*fbGX*/ );
else if (ip_lo == 0 && ip_hi1 == 0)
/* CU has no code, presumably?
Such situations have been encountered for code
compiled with -ffunction-sections -fdata-sections
and linked with --gc-sections. Completely
eliminated CU gives such 0 lo/hi pc. Similarly
to a CU which has no lo/hi/range pc, we push
an empty range list. */
varstack_push( cc, parser, td3,
empty_range_list(),
level,
False/*isFunc*/, NULL/*fbGX*/ );
} else
if ((!have_lo) && (!have_hi1) && have_range) {
varstack_push( cc, parser, td3,
get_range_list( cc, td3,
rangeoff, cc->cu_svma ),
level,
False/*isFunc*/, NULL/*fbGX*/ );
} else
if ((!have_lo) && (!have_hi1) && (!have_range)) {
/* CU has no code, presumably? */
varstack_push( cc, parser, td3,
empty_range_list(),
level,
False/*isFunc*/, NULL/*fbGX*/ );
} else
if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
/* broken DIE created by gcc-4.3.X ? Ignore the
apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
instead. */
varstack_push( cc, parser, td3,
get_range_list( cc, td3,
rangeoff, cc->cu_svma ),
level,
False/*isFunc*/, NULL/*fbGX*/ );
} else {
if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
(Int)have_lo, (Int)have_hi1, (Int)have_range);
goto_bad_DIE;
}
}
if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
Bool have_lo = False;
Bool have_hi1 = False;
Bool have_range = False;
Bool hiIsRelative = False;
Addr ip_lo = 0;
Addr ip_hi1 = 0;
Addr rangeoff = 0;
Bool isFunc = dtag == DW_TAG_subprogram;
GExpr* fbGX = NULL;
nf_i = 0;
while (True) {
DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
nf_i++;
if (attr == 0 && form == 0) break;
get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
if (attr == DW_AT_low_pc && cts.szB > 0) {
ip_lo = cts.u.val;
have_lo = True;
}
if (attr == DW_AT_high_pc && cts.szB > 0) {
ip_hi1 = cts.u.val;
have_hi1 = True;
if (form != DW_FORM_addr)
hiIsRelative = True;
}
if (attr == DW_AT_ranges && cts.szB > 0) {
rangeoff = cts.u.val;
have_range = True;
}
if (isFunc
&& attr == DW_AT_frame_base
&& cts.szB != 0 /* either scalar or nonempty block */) {
fbGX = get_GX( cc, False/*td3*/, &cts );
vg_assert(fbGX);
VG_(addToXA)(gexprs, &fbGX);
}
}
if (have_lo && have_hi1 && hiIsRelative)
ip_hi1 += ip_lo;
/* Do we have something that looks sane? */
if (dtag == DW_TAG_subprogram
&& (!have_lo) && (!have_hi1) && (!have_range)) {
/* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
representing a subroutine declaration that is not also a
definition does not have code address or range
attributes." */
} else
if (dtag == DW_TAG_lexical_block
&& (!have_lo) && (!have_hi1) && (!have_range)) {
/* I believe this is legit, and means the lexical block
contains no insns (whatever that might mean). Ignore. */
} else
if (have_lo && have_hi1 && (!have_range)) {
/* This scope supplies just a single address range. */
if (ip_lo < ip_hi1)
varstack_push( cc, parser, td3,
unitary_range_list(ip_lo, ip_hi1 - 1),
level, isFunc, fbGX );
} else
if ((!have_lo) && (!have_hi1) && have_range) {
/* This scope supplies multiple address ranges via the use of
a range list. */
varstack_push( cc, parser, td3,
get_range_list( cc, td3,
rangeoff, cc->cu_svma ),
level, isFunc, fbGX );
} else
if (have_lo && (!have_hi1) && (!have_range)) {
/* This scope is bogus. The D3 spec sec 3.4 (Lexical Block
Entries) says fairly clearly that a scope must have either
_range or (_low_pc and _high_pc). */
/* The spec is a bit ambiguous though. Perhaps a single byte
range is intended? See sec 2.17 (Code Addresses And Ranges) */
/* This case is here because icc9 produced this:
<2><13bd>: DW_TAG_lexical_block
DW_AT_decl_line : 5229
DW_AT_decl_column : 37
DW_AT_decl_file : 1
DW_AT_low_pc : 0x401b03
*/
/* Ignore (seems safe than pushing a single byte range) */
} else
goto_bad_DIE;
}
if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
HChar* name = NULL;
UWord typeR = D3_INVALID_CUOFF;
Bool global = False;
GExpr* gexpr = NULL;
Int n_attrs = 0;
UWord abs_ori = (UWord)D3_INVALID_CUOFF;
Int lineNo = 0;
UInt fndn_ix = 0;
nf_i = 0;
while (True) {
DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
nf_i++;
if (attr == 0 && form == 0) break;
get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
n_attrs++;
if (attr == DW_AT_name && cts.szB < 0) {
name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
}
if (attr == DW_AT_location
&& cts.szB != 0 /* either scalar or nonempty block */) {
gexpr = get_GX( cc, False/*td3*/, &cts );
vg_assert(gexpr);
VG_(addToXA)(gexprs, &gexpr);
}
if (attr == DW_AT_type && cts.szB > 0) {
typeR = cook_die_using_form( cc, cts.u.val, form );
}
if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
global = True;
}
if (attr == DW_AT_abstract_origin && cts.szB > 0) {
abs_ori = (UWord)cts.u.val;
}
if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
/*declaration = True;*/
}
if (attr == DW_AT_decl_line && cts.szB > 0) {
lineNo = (Int)cts.u.val;
}
if (attr == DW_AT_decl_file && cts.szB > 0) {
Int ftabIx = (Int)cts.u.val;
if (ftabIx >= 1
&& ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
}
if (0) VG_(printf)("XXX filename fndn_ix = %d %s\n", fndn_ix,
ML_(fndn_ix2filename) (cc->di, fndn_ix));
}
}
if (!global && dtag == DW_TAG_variable && level == 1) {
/* Case of a static variable. It is better to declare
it global as the variable is not really related to
a PC range, as its address can be used by program
counters outside of the ranges where it is visible . */
global = True;
}
/* We'll collect it under if one of the following three
conditions holds:
(1) has location and type -> completed
(2) has type only -> is an abstract instance
(3) has location and abs_ori -> is a concrete instance
Name, fndn_ix and line number are all optional frills.
*/
if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
/* 2 */ || (typeR != D3_INVALID_CUOFF)
/* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
/* Add this variable to the list of interesting looking
variables. Crucially, note along with it the address
range(s) associated with the variable, which for locals
will be the address ranges at the top of the varparser's
stack. */
GExpr* fbGX = NULL;
Word i, nRanges;
XArray* /* of AddrRange */ xa;
TempVar* tv;
/* Stack can't be empty; we put a dummy entry on it for the
entire address range before starting with the DIEs for
this CU. */
vg_assert(parser->sp >= 0);
/* If this is a local variable (non-global), try to find
the GExpr for the DW_AT_frame_base of the containing
function. It should have been pushed on the stack at the
time we encountered its DW_TAG_subprogram DIE, so the way
to find it is to scan back down the stack looking for it.
If there isn't an enclosing stack entry marked 'isFunc'
then we must be seeing variable or formal param DIEs
outside of a function, so we deem the Dwarf to be
malformed if that happens. Note that the fbGX may be NULL
if the containing DT_TAG_subprogram didn't supply a
DW_AT_frame_base -- that's OK, but there must actually be
a containing DW_TAG_subprogram. */
if (!global) {
Bool found = False;
for (i = parser->sp; i >= 0; i--) {
if (parser->isFunc[i]) {
fbGX = parser->fbGX[i];
found = True;
break;
}
}
if (!found) {
if (0 && VG_(clo_verbosity) >= 0) {
VG_(message)(Vg_DebugMsg,
"warning: parse_var_DIE: non-global variable "
"outside DW_TAG_subprogram\n");
}
/* goto_bad_DIE; */
/* This seems to happen a lot. Just ignore it -- if,
when we come to evaluation of the location (guarded)
expression, it requires a frame base value, and
there's no expression for that, then evaluation as a
whole will fail. Harmless - a bit of a waste of
cycles but nothing more. */
}
}
/* re "global ? 0 : parser->sp" (twice), if the var is
marked 'global' then we must put it at the global scope,
as only the global scope (level 0) covers the entire PC
address space. It is asserted elsewhere that level 0
always covers the entire address space. */
xa = parser->ranges[global ? 0 : parser->sp];
nRanges = VG_(sizeXA)(xa);
vg_assert(nRanges >= 0);
tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
tv->name = name;
tv->level = global ? 0 : parser->sp;
tv->typeR = typeR;
tv->gexpr = gexpr;
tv->fbGX = fbGX;
tv->fndn_ix= fndn_ix;
tv->fLine = lineNo;
tv->dioff = posn;
tv->absOri = abs_ori;
/* See explanation on definition of type TempVar for the
reason for this elaboration. */
tv->nRanges = nRanges;
tv->rngOneMin = 0;
tv->rngOneMax = 0;
tv->rngMany = NULL;
if (nRanges == 1) {
AddrRange* range = VG_(indexXA)(xa, 0);
tv->rngOneMin = range->aMin;
tv->rngOneMax = range->aMax;
}
else if (nRanges > 1) {
/* See if we already have a range list which is
structurally identical. If so, use that; if not, clone
this one, and add it to our collection. */
UWord keyW, valW;
if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
XArray* old = (XArray*)keyW;
tl_assert(valW == 0);
tl_assert(old != xa);
tv->rngMany = old;
} else {
XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
tv->rngMany = cloned;
VG_(addToFM)( rangestree, (UWord)cloned, 0 );
}
}
VG_(addToXA)( tempvars, &tv );
TRACE_D3(" Recording this variable, with %ld PC range(s)\n",
VG_(sizeXA)(xa) );
/* collect stats on how effective the ->ranges special
casing is */
if (0) {
static Int ntot=0, ngt=0;
ntot++;
if (tv->rngMany) ngt++;
if (0 == (ntot % 100000))
VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
}
}
/* Here are some other weird cases seen in the wild:
We have a variable with a name and a type, but no
location. I guess that's a sign that it has been
optimised away. Ignore it. Here's an example:
static Int lc_compar(void* n1, void* n2) {
MC_Chunk* mc1 = *(MC_Chunk**)n1;
MC_Chunk* mc2 = *(MC_Chunk**)n2;
return (mc1->data < mc2->data ? -1 : 1);
}
Both mc1 and mc2 are like this
<2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
DW_AT_name : mc1
DW_AT_decl_file : 1
DW_AT_decl_line : 216
DW_AT_type : <5d3>
whereas n1 and n2 do have locations specified.
---------------------------------------------
We see a DW_TAG_formal_parameter with a type, but
no name and no location. It's probably part of a function type
construction, thusly, hence ignore it:
<1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
DW_AT_sibling : <2c9>
DW_AT_prototyped : 1
DW_AT_type : <114>
<2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
DW_AT_type : <13e>
<2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
DW_AT_type : <133>
---------------------------------------------
Is very minimal, like this:
<4><81d>: Abbrev Number: 44 (DW_TAG_variable)
DW_AT_abstract_origin: <7ba>
What that signifies I have no idea. Ignore.
----------------------------------------------
Is very minimal, like this:
<200f>: DW_TAG_formal_parameter
DW_AT_abstract_ori: <1f4c>
DW_AT_location : 13440
What that signifies I have no idea. Ignore.
It might be significant, though: the variable at least
has a location and so might exist somewhere.
Maybe we should handle this.
---------------------------------------------
<22407>: DW_TAG_variable
DW_AT_name : (indirect string, offset: 0x6579):
vgPlain_trampoline_stuff_start
DW_AT_decl_file : 29
DW_AT_decl_line : 56
DW_AT_external : 1
DW_AT_declaration : 1
Nameless and typeless variable that has a location? Who
knows. Not me.
<2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
DW_AT_location : 9 byte block: 3 c0 c7 13 38 0 0 0 0
(DW_OP_addr: 3813c7c0)
No, really. Check it out. gcc is quite simply borked.
<3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
// followed by no attributes, and the next DIE is a sibling,
// not a child
*/
}
return;
bad_DIE:
dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
c_die, saved_die_c_offset,
abbv,
cc);
/*NOTREACHED*/
}
typedef
struct {
/* The fndn_ix file name/dirname table. Is a mapping from dwarf
integer index to the index in di->fndnpool. */
XArray* /* of UInt* */ fndn_ix_Table;
UWord sibling; // sibling of the last read DIE (if it has a sibling).
}
D3InlParser;
/* Return the function name corresponding to absori.
absori is a 'cooked' reference to a DIE, i.e. absori can be either
in cc->escn_debug_info or in cc->escn_debug_info_alt.
get_inlFnName will uncook absori.
The returned value is a (permanent) string in DebugInfo's .strchunks.
LIMITATION: absori must point in the CU of cc. If absori points
in another CU, returns "UnknownInlinedFun".
Here are the problems to retrieve the fun name if absori is in
another CU: the DIE reading code cannot properly extract data from
another CU, as the abbv code retrieved in the other CU cannot be
translated in an abbreviation. Reading data from the alternate debug
info also gives problems as the string reference is also in the alternate
file, but when reading the alt DIE, the string form is a 'local' string,
but cannot be read in the current CU, but must be read in the alt CU.
See bug 338803 comment#3 and attachment for a failed attempt to handle
these problems (failed because with the patch, only one alt abbrev hash
table is kept, while we must handle all abbreviations in all CUs
referenced by an absori (being a reference to an alt CU, or a previous
or following CU). */
static HChar* get_inlFnName (Int absori, CUConst* cc, Bool td3)
{
Cursor c;
g_abbv *abbv;
ULong atag, abbv_code;
UInt has_children;
UWord posn;
Bool type_flag, alt_flag;
HChar *ret = NULL;
FormContents cts;
UInt nf_i;
posn = uncook_die( cc, absori, &type_flag, &alt_flag);
if (type_flag)
cc->barf("get_inlFnName: uncooked absori in type debug info");
/* LIMITATION: check we are in the same CU.
If not, return unknown inlined function name. */
/* if crossing between alt debug info<>normal info
or posn not in the cu range,
then it is in another CU. */
if (alt_flag != cc->is_alt_info
|| posn < cc->cu_start_offset
|| posn >= cc->cu_start_offset + cc->unit_length) {
static Bool reported = False;
if (!reported && VG_(clo_verbosity) > 1) {
VG_(message)(Vg_DebugMsg,
"Warning: cross-CU LIMITATION: some inlined fn names\n"
"might be shown as UnknownInlinedFun\n");
reported = True;
}
TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
}
init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
"Overrun get_inlFnName absori");
abbv_code = get_ULEB128( &c );
abbv = get_abbv ( cc, abbv_code);
atag = abbv->atag;
TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
if (atag == 0)
cc->barf("get_inlFnName: invalid zero tag on DIE");
has_children = abbv->has_children;
if (has_children != DW_children_no && has_children != DW_children_yes)
cc->barf("get_inlFnName: invalid has_children value");
if (atag != DW_TAG_subprogram)
cc->barf("get_inlFnName: absori not a subprogram");
nf_i = 0;
while (True) {
DW_AT attr = (DW_AT) abbv->nf[nf_i].at_name;
DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
nf_i++;
if (attr == 0 && form == 0) break;
get_Form_contents( &cts, cc, &c, False/*td3*/, form );
if (attr == DW_AT_name) {
HChar *fnname;
if (cts.szB >= 0)
cc->barf("get_inlFnName: expecting indirect string");
fnname = ML_(cur_read_strdup)( cts.u.cur,
"get_inlFnName.1" );
ret = ML_(addStr)(cc->di, fnname, -1);
ML_(dinfo_free) (fnname);
break; /* Name found, get out of the loop, as this has priority over
DW_AT_specification. */
}
if (attr == DW_AT_specification) {
UWord cdie;
if (cts.szB == 0)
cc->barf("get_inlFnName: AT specification missing");
/* The recursive call to get_inlFnName will uncook its arg.
So, we need to cook it here, so as to reference the
correct section (e.g. the alt info). */
cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
/* hoping that there is no loop */
ret = get_inlFnName (cdie, cc, td3);
/* Unclear if having both DW_AT_specification and DW_AT_name is
possible but in any case, we do not break here.
If we find later on a DW_AT_name, it will override the name found
in the DW_AT_specification.*/
}
}
if (ret)
return ret;
else {
TRACE_D3("AbsOriFnNameNotFound");
return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
}
}
/* Returns True if the (possibly) childrens of the current DIE are interesting
to parse. Returns False otherwise.
If the current DIE has a sibling, the non interesting children can
maybe be skipped (if the DIE has a DW_AT_sibling). */
__attribute__((noinline))
static Bool parse_inl_DIE (