blob: 1f02503f26110deaef4783852aaf9871fc189053 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- Format-neutral storage of and querying of info acquired from ---*/
/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info. ---*/
/*--- priv_storage.h ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2000-2013 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
/*
Stabs reader greatly improved by Nick Nethercote, Apr 02.
This module was also extensively hacked on by Jeremy Fitzhardinge
and Tom Hughes.
*/
/* See comment at top of debuginfo.c for explanation of
the _svma / _avma / _image / _bias naming scheme.
*/
/* Note this is not freestanding; needs pub_core_xarray.h and
priv_tytypes.h to be included before it. */
#ifndef __PRIV_STORAGE_H
#define __PRIV_STORAGE_H
#include "pub_core_basics.h" // Addr
#include "pub_core_xarray.h" // XArray
#include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
#include "priv_d3basics.h" // GExpr et al.
#include "priv_image.h" // DiCursor
/* --------------------- SYMBOLS --------------------- */
/* A structure to hold an ELF/MachO symbol (very crudely). Usually
the symbol only has one name, which is stored in ::pri_name, and
::sec_names is NULL. If there are other names, these are stored in
::sec_names, which is a NULL terminated vector holding the names.
The vector is allocated in VG_AR_DINFO, the names themselves live
in DebugInfo::strpool.
From the point of view of ELF, the primary vs secondary distinction
is artificial: they are all just names associated with the address,
none of which has higher precedence than any other. However, from
the point of view of mapping an address to a name to display to the
user, we need to choose one "preferred" name, and so that might as
well be installed as the pri_name, whilst all others can live in
sec_names[]. This has the convenient side effect that, in the
common case where there is only one name for the address,
sec_names[] does not need to be allocated.
*/
typedef
struct {
SymAVMAs avmas; /* Symbol Actual VMAs: lowest address of entity,
+ platform specific fields, to access with
the macros defined in pub_core_debuginfo.h */
const HChar* pri_name; /* primary name, never NULL */
const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
// XXX: this could be shrunk (on 32-bit platforms) by using 30
// bits for the size and 1 bit each for isText and isIFunc. If you
// do this, make sure that all assignments to the latter two use
// 0 or 1 (or True or False), and that a positive number larger
// than 1 is never used to represent True.
UInt size; /* size in bytes */
Bool isText;
Bool isIFunc; /* symbol is an indirect function? */
}
DiSym;
/* --------------------- SRCLOCS --------------------- */
/* Line count at which overflow happens, due to line numbers being
stored as shorts in `struct nlist' in a.out.h. */
#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))
#define LINENO_BITS 20
#define LOC_SIZE_BITS (32 - LINENO_BITS)
#define MAX_LINENO ((1 << LINENO_BITS) - 1)
/* Unlikely to have any lines with instruction ranges > 4096 bytes */
#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1)
/* Number used to detect line number overflows; if one line is
60000-odd smaller than the previous, it was probably an overflow.
*/
#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000)
/* Filename and Dirname pair. FnDn are stored in di->fndnpool
and are allocated using VG_(allocFixedEltDedupPA).
The filename/dirname strings are themselves stored in di->strpool. */
typedef
struct {
const HChar* filename; /* source filename */
const HChar* dirname; /* source directory name */
} FnDn;
/* A structure to hold addr-to-source info for a single line. There
can be a lot of these, hence the dense packing. */
typedef
struct {
/* Word 1 */
Addr addr; /* lowest address for this line */
/* Word 2 */
UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
UInt lineno:LINENO_BITS; /* source line number, or zero */
}
DiLoc;
#define LEVEL_BITS (32 - LINENO_BITS)
#define MAX_LEVEL ((1 << LEVEL_BITS) - 1)
/* A structure to hold addr-to-inlined fn info. There
can be a lot of these, hence the dense packing.
Only caller source filename and lineno are stored.
Handling dirname should be done using fndn_ix technique
similar to ML_(addLineInfo). */
typedef
struct {
/* Word 1 */
Addr addr_lo; /* lowest address for inlined fn */
/* Word 2 */
Addr addr_hi; /* highest address following the inlined fn */
/* Word 3 */
const HChar* inlinedfn; /* inlined function name */
/* Word 4 and 5 */
UInt fndn_ix; /* index in di->fndnpool of caller source
dirname/filename */
UInt lineno:LINENO_BITS; /* caller line number */
UShort level:LEVEL_BITS; /* level of inlining */
}
DiInlLoc;
/* --------------------- CF INFO --------------------- */
/* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
address range [base .. base+len-1].
On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
some point and {e,r}ip is in the range [base .. base+len-1], it
tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
current frame and also ra, the return address of the current frame.
First off, calculate CFA, the Canonical Frame Address, thusly:
cfa = case cfa_how of
CFIC_IA_SPREL -> {e,r}sp + cfa_off
CFIC_IA_BPREL -> {e,r}bp + cfa_off
CFIC_EXPR -> expr whose index is in cfa_off
Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
this frame's {e,r}ra value can be calculated like this:
old_{e,r}sp/{e,r}bp/ra
= case {e,r}sp/{e,r}bp/ra_how of
CFIR_UNKNOWN -> we don't know, sorry
CFIR_SAME -> same as it was before (sp/fp only)
CFIR_CFAREL -> cfa + sp/bp/ra_off
CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
CFIR_EXPR -> expr whose index is in sp/bp/ra_off
On ARM it's pretty much the same, except we have more registers to
keep track of:
cfa = case cfa_how of
CFIC_ARM_R13REL -> r13 + cfa_off
CFIC_ARM_R12REL -> r12 + cfa_off
CFIC_ARM_R11REL -> r11 + cfa_off
CFIC_ARM_R7REL -> r7 + cfa_off
CFIR_EXPR -> expr whose index is in cfa_off
old_r14/r13/r12/r11/r7/ra
= case r14/r13/r12/r11/r7/ra_how of
CFIR_UNKNOWN -> we don't know, sorry
CFIR_SAME -> same as it was before (r14/r13/r12/r11/r7 only)
CFIR_CFAREL -> cfa + r14/r13/r12/r11/r7/ra_off
CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
CFIR_EXPR -> expr whose index is in r14/r13/r12/r11/r7/ra_off
On ARM64:
cfa = case cfa_how of
CFIC_ARM64_SPREL -> sp + cfa_off
CFIC_ARM64_X29REL -> x29 + cfa_off
CFIC_EXPR -> expr whose index is in cfa_off
old_sp/x30/x29/ra
= case sp/x30/x29/ra_how of
CFIR_UNKNOWN -> we don't know, sorry
CFIR_SAME -> same as it was before
CFIR_CFAREL -> cfa + sp/x30/x29/ra_how
CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
CFIR_EXPR -> expr whose index is in sp/x30/x29/ra_off
On s390x we have a similar logic as x86 or amd64. We need the stack pointer
(r15), the frame pointer r11 (like BP) and together with the instruction
address in the PSW we can calculate the previous values:
cfa = case cfa_how of
CFIC_IA_SPREL -> r15 + cfa_off
CFIC_IA_BPREL -> r11 + cfa_off
CFIC_EXPR -> expr whose index is in cfa_off
old_sp/fp/ra
= case sp/fp/ra_how of
CFIR_UNKNOWN -> we don't know, sorry
CFIR_SAME -> same as it was before (sp/fp only)
CFIR_CFAREL -> cfa + sp/fp/ra_off
CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
CFIR_EXPR -> expr whose index is in sp/fp/ra_off
*/
#define CFIC_IA_SPREL ((UChar)1)
#define CFIC_IA_BPREL ((UChar)2)
#define CFIC_ARM_R13REL ((UChar)3)
#define CFIC_ARM_R12REL ((UChar)4)
#define CFIC_ARM_R11REL ((UChar)5)
#define CFIC_ARM_R7REL ((UChar)6)
#define CFIC_ARM64_SPREL ((UChar)7)
#define CFIC_ARM64_X29REL ((UChar)8)
#define CFIC_EXPR ((UChar)9) /* all targets */
#define CFIR_UNKNOWN ((UChar)64)
#define CFIR_SAME ((UChar)65)
#define CFIR_CFAREL ((UChar)66)
#define CFIR_MEMCFAREL ((UChar)67)
#define CFIR_EXPR ((UChar)68)
/* Definition of the DiCfSI_m DiCfSI machine dependent part.
These are highly duplicated, and are stored in a pool. */
#if defined(VGA_x86) || defined(VGA_amd64)
typedef
struct {
UChar cfa_how; /* a CFIC_IA value */
UChar ra_how; /* a CFIR_ value */
UChar sp_how; /* a CFIR_ value */
UChar bp_how; /* a CFIR_ value */
Int cfa_off;
Int ra_off;
Int sp_off;
Int bp_off;
}
DiCfSI_m;
#elif defined(VGA_arm)
typedef
struct {
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
UChar r14_how; /* a CFIR_ value */
UChar r13_how; /* a CFIR_ value */
UChar r12_how; /* a CFIR_ value */
UChar r11_how; /* a CFIR_ value */
UChar r7_how; /* a CFIR_ value */
Int cfa_off;
Int ra_off;
Int r14_off;
Int r13_off;
Int r12_off;
Int r11_off;
Int r7_off;
// If you add additional fields, don't forget to update the
// initialisation of this in readexidx.c accordingly.
}
DiCfSI_m;
#elif defined(VGA_arm64)
typedef
struct {
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
UChar sp_how; /* a CFIR_ value */ /*dw31=SP*/
UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
Int cfa_off;
Int ra_off;
Int sp_off;
Int x30_off;
Int x29_off;
}
DiCfSI_m;
#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
/* Just have a struct with the common fields in, so that code that
processes the common fields doesn't have to be ifdef'd against
VGP_/VGA_ symbols. These are not used in any way on ppc32/64-linux
at the moment. */
typedef
struct {
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
Int cfa_off;
Int ra_off;
}
DiCfSI_m;
#elif defined(VGA_s390x)
typedef
struct {
UChar cfa_how; /* a CFIC_ value */
UChar sp_how; /* a CFIR_ value */
UChar ra_how; /* a CFIR_ value */
UChar fp_how; /* a CFIR_ value */
Int cfa_off;
Int sp_off;
Int ra_off;
Int fp_off;
}
DiCfSI_m;
#elif defined(VGA_mips32) || defined(VGA_mips64)
typedef
struct {
UChar cfa_how; /* a CFIC_ value */
UChar ra_how; /* a CFIR_ value */
UChar sp_how; /* a CFIR_ value */
UChar fp_how; /* a CFIR_ value */
Int cfa_off;
Int ra_off;
Int sp_off;
Int fp_off;
}
DiCfSI_m;
#else
# error "Unknown arch"
#endif
typedef
struct {
Addr base;
UInt len;
UInt cfsi_m_ix;
}
DiCfSI;
typedef
enum {
Cunop_Abs=0x231,
Cunop_Neg,
Cunop_Not
}
CfiUnop;
typedef
enum {
Cbinop_Add=0x321,
Cbinop_Sub,
Cbinop_And,
Cbinop_Mul,
Cbinop_Shl,
Cbinop_Shr,
Cbinop_Eq,
Cbinop_Ge,
Cbinop_Gt,
Cbinop_Le,
Cbinop_Lt,
Cbinop_Ne
}
CfiBinop;
typedef
enum {
Creg_INVALID=0x213,
Creg_IA_SP,
Creg_IA_BP,
Creg_IA_IP,
Creg_ARM_R13,
Creg_ARM_R12,
Creg_ARM_R15,
Creg_ARM_R14,
Creg_ARM_R7,
Creg_ARM64_X30,
Creg_S390_R14,
Creg_MIPS_RA
}
CfiReg;
typedef
enum {
Cex_Undef=0x123,
Cex_Deref,
Cex_Const,
Cex_Unop,
Cex_Binop,
Cex_CfiReg,
Cex_DwReg
}
CfiExprTag;
typedef
struct {
CfiExprTag tag;
union {
struct {
} Undef;
struct {
Int ixAddr;
} Deref;
struct {
UWord con;
} Const;
struct {
CfiUnop op;
Int ix;
} Unop;
struct {
CfiBinop op;
Int ixL;
Int ixR;
} Binop;
struct {
CfiReg reg;
} CfiReg;
struct {
Int reg;
} DwReg;
}
Cex;
}
CfiExpr;
extern Int ML_(CfiExpr_Undef) ( XArray* dst );
extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
extern Int ML_(CfiExpr_Unop) ( XArray* dst, CfiUnop op, Int ix );
extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );
extern void ML_(ppCfiExpr)( const XArray* src, Int ix );
/* ---------------- FPO INFO (Windows PE) -------------- */
/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
a primitive CFI */
typedef
struct _FPO_DATA { /* 16 bytes */
UInt ulOffStart; /* offset of 1st byte of function code */
UInt cbProcSize; /* # bytes in function */
UInt cdwLocals; /* # bytes/4 in locals */
UShort cdwParams; /* # bytes/4 in params */
UChar cbProlog; /* # bytes in prolog */
UChar cbRegs :3; /* # regs saved */
UChar fHasSEH:1; /* Structured Exception Handling */
UChar fUseBP :1; /* EBP has been used */
UChar reserved:1;
UChar cbFrame:2; /* frame type */
}
FPO_DATA;
#define PDB_FRAME_FPO 0
#define PDB_FRAME_TRAP 1
#define PDB_FRAME_TSS 2
/* --------------------- VARIABLES --------------------- */
typedef
struct {
Addr aMin;
Addr aMax;
XArray* /* of DiVariable */ vars;
}
DiAddrRange;
typedef
struct {
const HChar* name; /* in DebugInfo.strpool */
UWord typeR; /* a cuOff */
GExpr* gexpr; /* on DebugInfo.gexprs list */
GExpr* fbGX; /* SHARED. */
UInt fndn_ix; /* where declared; may be zero. index
in DebugInfo.fndnpool */
Int lineNo; /* where declared; may be zero. */
}
DiVariable;
Word
ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );
/* --------------------- DEBUGINFO --------------------- */
/* This is the top-level data type. It's a structure which contains
information pertaining to one mapped ELF object. This type is
exported only abstractly - in pub_tool_debuginfo.h. */
/* First though, here's an auxiliary data structure. It is only ever
used as part of a struct _DebugInfo. We use it to record
observations about mappings and permission changes to the
associated file, so as to decide when to read debug info. It's
essentially an ultra-trivial finite state machine which, when it
reaches an accept state, signals that we should now read debug info
from the object into the associated struct _DebugInfo. The accept
state is arrived at when have_rx_map and have_rw_map both become
true. The initial state is one in which we have no observations,
so have_rx_map and have_rw_map are both false.
This all started as a rather ad-hoc solution, but was further
expanded to handle weird object layouts, e.g. more than one rw
or rx mapping for one binary.
The normal sequence of events is one of
start --> r-x mapping --> rw- mapping --> accept
start --> rw- mapping --> r-x mapping --> accept
that is, take the first r-x and rw- mapping we see, and we're done.
On MacOSX >= 10.7, 32-bit, there appears to be a new variant:
start --> r-- mapping --> rw- mapping
--> upgrade r-- mapping to r-x mapping --> accept
where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
Hence we need to also track this possibility.
From perusal of dyld sources, it appears that this scheme could
also be used 64 bit libraries, although that doesn't seem to happen
in practice. dyld uses this scheme when the text section requires
relocation, which only appears to be the case for 32 bit objects.
*/
typedef struct
{
Addr avma; /* these fields record the file offset, length */
SizeT size; /* and map address of each mapping */
OffT foff;
Bool rx, rw, ro; /* memory access flags for this mapping */
} DebugInfoMapping;
struct _DebugInfoFSM
{
HChar* filename; /* in mallocville (VG_AR_DINFO) */
HChar* dbgname; /* in mallocville (VG_AR_DINFO) */
XArray* maps; /* XArray of DebugInfoMapping structs */
Bool have_rx_map; /* did we see a r?x mapping yet for the file? */
Bool have_rw_map; /* did we see a rw? mapping yet for the file? */
Bool have_ro_map; /* did we see a r-- mapping yet for the file? */
};
/* To do with the string table in struct _DebugInfo (::strpool) */
#define SEGINFO_STRPOOLSIZE (64*1024)
/* We may encounter more than one .eh_frame section in an object --
unusual but apparently allowed by ELF. See
http://sourceware.org/bugzilla/show_bug.cgi?id=12675
*/
#define N_EHFRAME_SECTS 2
/* So, the main structure for holding debug info for one object. */
struct _DebugInfo {
/* Admin stuff */
struct _DebugInfo* next; /* list of DebugInfos */
Bool mark; /* marked for deletion? */
/* An abstract handle, which can be used by entities outside of
m_debuginfo to (in an abstract datatype sense) refer to this
struct _DebugInfo. A .handle of zero is invalid; valid handles
are 1 and above. The same handle is never issued twice (in any
given run of Valgrind), so a handle becomes invalid when the
associated struct _DebugInfo is discarded, and remains invalid
forever thereafter. The .handle field is set as soon as this
structure is allocated. */
ULong handle;
/* Used for debugging only - indicate what stuff to dump whilst
reading stuff into the seginfo. Are computed as early in the
lifetime of the DebugInfo as possible -- at the point when it is
created. Use these when deciding what to spew out; do not use
the global VG_(clo_blah) flags. */
Bool trace_symtab; /* symbols, our style */
Bool trace_cfi; /* dwarf frame unwind, our style */
Bool ddump_syms; /* mimic /usr/bin/readelf --syms */
Bool ddump_line; /* mimic /usr/bin/readelf --debug-dump=line */
Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */
/* The "decide when it is time to read debuginfo" state machine.
This structure must get filled in before we can start reading
anything from the ELF/MachO file. This structure is filled in
by VG_(di_notify_mmap) and its immediate helpers. */
struct _DebugInfoFSM fsm;
/* Once the ::fsm has reached an accept state -- typically, when
both a rw? and r?x mapping for .filename have been observed --
we can go on to read the symbol tables and debug info.
.have_dinfo changes from False to True when the debug info has
been completely read in and postprocessed (canonicalised) and is
now suitable for querying. */
/* If have_dinfo is False, then all fields below this point are
invalid and should not be consulted. */
Bool have_dinfo; /* initially False */
/* All the rest of the fields in this structure are filled in once
we have committed to reading the symbols and debug info (that
is, at the point where .have_dinfo is set to True). */
/* The file's soname. */
HChar* soname;
/* Description of some important mapped segments. The presence or
absence of the mapping is denoted by the _present field, since
in some obscure circumstances (to do with data/sdata/bss) it is
possible for the mapping to be present but have zero size.
Certainly text_ is mandatory on all platforms; not sure about
the rest though.
--------------------------------------------------------
Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)
or the normal case, which is the AND of the following:
(0) size of at least one rx mapping > 0
(1) no two DebugInfos with some rx mapping of size > 0
have overlapping rx mappings
(2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
[avma,+size) of one rx mapping; that is, the former
is a subrange or equal to the latter.
(3) all DiCfSI in the cfsi array all have ranges that fall within
[avma,+size) of that rx mapping.
(4) all DiCfSI in the cfsi array are non-overlapping
The cumulative effect of these restrictions is to ensure that
all the DiCfSI records in the entire system are non overlapping.
Hence any address falls into either exactly one DiCfSI record,
or none. Hence it is safe to cache the results of searches for
DiCfSI records. This is the whole point of these restrictions.
The caching of DiCfSI searches is done in VG_(use_CF_info). The
cache is flushed after any change to debugInfo_list. DiCfSI
searches are cached because they are central to stack unwinding
on amd64-linux.
Where are these invariants imposed and checked?
They are checked after a successful read of debuginfo into
a DebugInfo*, in check_CFSI_related_invariants.
(1) is not really imposed anywhere. We simply assume that the
kernel will not map the text segments from two different objects
into the same space. Sounds reasonable.
(2) follows from (4) and (3). It is ensured by canonicaliseCFI.
(3) is ensured by ML_(addDiCfSI).
(4) is ensured by canonicaliseCFI.
--------------------------------------------------------
Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:
The _debug_{svma,bias} fields were added as part of a fix to
#185816. The problem encompassed in that bug report was that it
wasn't correct to use apply the bias values deduced for a
primary object to its associated debuginfo object, because the
debuginfo object (or the primary) could have been prelinked to a
different SVMA. Hence debuginfo and primary objects need to
have their own biases.
------ JRS: (referring to r9329): ------
Let me see if I understand the workings correctly. Initially
the _debug_ values are set to the same values as the "normal"
ones, as there's a bunch of bits of code like this (in
readelf.c)
di->text_svma = svma;
...
di->text_bias = rx_bias;
di->text_debug_svma = svma;
di->text_debug_bias = rx_bias;
If a debuginfo object subsequently shows up then the
_debug_svma/bias are set for the debuginfo object. Result is
that if there's no debuginfo object then the values are the same
as the primary-object values, and if there is a debuginfo object
then they will (or at least may) be different.
Then when we need to actually bias something, we'll have to
decide whether to use the primary bias or the debuginfo bias.
And the strategy is to use the primary bias for ELF symbols but
the debuginfo bias for anything pulled out of Dwarf.
------ THH: ------
Correct - the debug_svma and bias values apply to any address
read from the debug data regardless of where that debug data is
stored and the other values are used for addresses from other
places (primarily the symbol table).
------ JRS: ------
Ok; so this was my only area of concern. Are there any
corner-case scenarios where this wouldn't be right? It sounds
like we're assuming the ELF symbols come from the primary object
and, if there is a debug object, then all the Dwarf comes from
there. But what if (eg) both symbols and Dwarf come from the
debug object? Is that even possible or allowable?
------ THH: ------
You may have a point...
The current logic is to try and take any one set of data from
either the base object or the debug object. There are four sets
of data we consider:
- Symbol Table
- Stabs
- DWARF1
- DWARF2
If we see the primary section for a given set in the base object
then we ignore all sections relating to that set in the debug
object.
Now in principle if we saw a secondary section (like debug_line
say) in the base object, but not the main section (debug_info in
this case) then we would take debug_info from the debug object
but would use the debug_line from the base object unless we saw
a replacement copy in the debug object. That's probably unlikely
however.
A bigger issue might be, as you say, the symbol table as we will
pick that up from the debug object if it isn't in the base. The
dynamic symbol table will always have to be in the base object
though so we will have to be careful when processing symbols to
know which table we are reading in that case.
What we probably need to do is tell read_elf_symtab which object
the symbols it is being asked to read came from.
(A followup patch to deal with this was committed in r9469).
*/
/* .text */
Bool text_present;
Addr text_avma;
Addr text_svma;
SizeT text_size;
PtrdiffT text_bias;
Addr text_debug_svma;
PtrdiffT text_debug_bias;
/* .data */
Bool data_present;
Addr data_svma;
Addr data_avma;
SizeT data_size;
PtrdiffT data_bias;
Addr data_debug_svma;
PtrdiffT data_debug_bias;
/* .sdata */
Bool sdata_present;
Addr sdata_svma;
Addr sdata_avma;
SizeT sdata_size;
PtrdiffT sdata_bias;
Addr sdata_debug_svma;
PtrdiffT sdata_debug_bias;
/* .rodata */
Bool rodata_present;
Addr rodata_svma;
Addr rodata_avma;
SizeT rodata_size;
PtrdiffT rodata_bias;
Addr rodata_debug_svma;
PtrdiffT rodata_debug_bias;
/* .bss */
Bool bss_present;
Addr bss_svma;
Addr bss_avma;
SizeT bss_size;
PtrdiffT bss_bias;
Addr bss_debug_svma;
PtrdiffT bss_debug_bias;
/* .sbss */
Bool sbss_present;
Addr sbss_svma;
Addr sbss_avma;
SizeT sbss_size;
PtrdiffT sbss_bias;
Addr sbss_debug_svma;
PtrdiffT sbss_debug_bias;
/* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
Bool exidx_present;
Addr exidx_avma;
Addr exidx_svma;
SizeT exidx_size;
PtrdiffT exidx_bias;
/* .ARM.extab -- sometimes present on arm32, containing unwind info. */
Bool extab_present;
Addr extab_avma;
Addr extab_svma;
SizeT extab_size;
PtrdiffT extab_bias;
/* .plt */
Bool plt_present;
Addr plt_avma;
SizeT plt_size;
/* .got */
Bool got_present;
Addr got_avma;
SizeT got_size;
/* .got.plt */
Bool gotplt_present;
Addr gotplt_avma;
SizeT gotplt_size;
/* .opd -- needed on ppc64be-linux for finding symbols */
Bool opd_present;
Addr opd_avma;
SizeT opd_size;
/* .ehframe -- needed on amd64-linux for stack unwinding. We might
see more than one, hence the arrays. */
UInt n_ehframe; /* 0 .. N_EHFRAME_SECTS */
Addr ehframe_avma[N_EHFRAME_SECTS];
SizeT ehframe_size[N_EHFRAME_SECTS];
/* Sorted tables of stuff we snarfed from the file. This is the
eventual product of reading the debug info. All this stuff
lives in VG_AR_DINFO. */
/* An expandable array of symbols. */
DiSym* symtab;
UWord symtab_used;
UWord symtab_size;
/* Two expandable arrays, storing locations and their filename/dirname. */
DiLoc* loctab;
UInt sizeof_fndn_ix; /* Similar use as sizeof_cfsi_m_ix below. */
void* loctab_fndn_ix; /* loctab[i] filename/dirname is identified by
loctab_fnindex_ix[i] (an index in di->fndnpool)
0 means filename/dirname unknown.
The void* is an UChar* or UShort* or UInt*
depending on sizeof_fndn_ix. */
UWord loctab_used;
UWord loctab_size;
/* An expandable array of inlined fn info.
maxinl_codesz is the biggest inlined piece of code
in inltab (i.e. the max of 'addr_hi - addr_lo'. */
DiInlLoc* inltab;
UWord inltab_used;
UWord inltab_size;
SizeT maxinl_codesz;
/* A set of expandable arrays to store CFI summary info records.
The machine specific information (i.e. the DiCfSI_m struct)
are stored in cfsi_m_pool, as these are highly duplicated.
The DiCfSI_m are allocated in cfsi_m_pool and identified using
a (we hope) small integer : often one byte is enough, sometimes
2 bytes are needed.
cfsi_base contains the bases of the code address ranges.
cfsi_size is the size of the cfsi_base array.
The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
Following elements are not used (yet).
For each base in cfsi_base, an index into cfsi_m_pool is stored
in cfsi_m_ix array. The size of cfsi_m_ix is equal to
cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].
cfsi_base[i] gives the base address of a code range covered by
some CF Info. The corresponding CF Info is identified by an index
in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
cfsi_base[i] is given
by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
by ((UInt*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.
The end of the code range starting at cfsi_base[i] is given by
cfsi_base[i+1]-1 (or cfsi_maxavma for cfsi_base[cfsi_used-1]).
Some code ranges between cfsi_minavma and cfsi_maxavma might not
be covered by cfi information. Such not covered ranges are stored by
a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.
A variable size representation has been chosen for the elements of
cfsi_m_ix as in many case, one byte is good enough. For big
objects, 2 bytes are needed. No object has yet been found where
4 bytes are needed (but the code is ready to handle this case).
Not covered ranges ('cfi holes') are stored explicitely in
cfsi_base/cfsi_m_ix as this is more memory efficient than storing
a length for each covered range : on x86 or amd64, we typically have
a hole every 8 covered ranges. On arm64, we have very few holes
(1 every 50 or 100 ranges).
The cfsi information is read and prepared in the cfsi_rd array.
Once all the information has been read, the cfsi_base and cfsi_m_ix
arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
This is all done by ML_(finish_CFSI_arrays).
Also includes summary address bounds, showing the min and max address
covered by any of the records, as an aid to fast searching. And, if the
records require any expression nodes, they are stored in
cfsi_exprs. */
Addr* cfsi_base;
UInt sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
The void* is an UChar* or UShort* or UInt*
depending on sizeof_cfsi_m_ix. */
DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
UWord cfsi_used;
UWord cfsi_size;
DedupPoolAlloc *cfsi_m_pool;
Addr cfsi_minavma;
Addr cfsi_maxavma;
XArray* cfsi_exprs; /* XArray of CfiExpr */
/* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
data. Non-expandable array, hence .size == .used. */
FPO_DATA* fpo;
UWord fpo_size;
Addr fpo_minavma;
Addr fpo_maxavma;
Addr fpo_base_avma;
/* Pool of strings -- the string table. Pointers
into this are stable (the memory is not reallocated). */
DedupPoolAlloc *strpool;
/* Pool of FnDn -- filename and dirname.
Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
DedupPoolAlloc *fndnpool;
/* Variable scope information, as harvested from Dwarf3 files.
In short it's an
array of (array of PC address ranges and variables)
The outer array indexes over scopes, with Entry 0 containing
information on variables which exist for any value of the program
counter (PC) -- that is, the outermost scope. Entries 1, 2, 3,
etc contain information on increasinly deeply nested variables.
Each inner array is an array of (an address range, and a set
of variables that are in scope over that address range).
The address ranges may not overlap.
Since Entry 0 in the outer array holds information on variables
that exist for any value of the PC (that is, global vars), it
follows that Entry 0's inner array can only have one address
range pair, one that covers the entire address space.
*/
XArray* /* of OSet of DiAddrRange */varinfo;
/* These are arrays of the relevant typed objects, held here
partially for the purposes of visiting each object exactly once
when we need to delete them. */
/* An array of TyEnts. These are needed to make sense of any types
in the .varinfo. Also, when deleting this DebugInfo, we must
first traverse this array and throw away malloc'd stuff hanging
off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
XArray* /* of TyEnt */ admin_tyents;
/* An array of guarded DWARF3 expressions. */
XArray* admin_gexprs;
/* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
This helps performance a lot during ML_(addLineInfo) etc., which can
easily be invoked hundreds of thousands of times. */
DebugInfoMapping* last_rx_map;
};
/* --------------------- functions --------------------- */
/* ------ Adding ------ */
/* Add a symbol to si's symbol table. The contents of 'sym' are
copied. It is assumed (and checked) that 'sym' only contains one
name, so there is no auxiliary ::sec_names vector to duplicate.
IOW, the copy is a shallow copy, and there are assertions in place
to ensure that's OK. */
extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );
/* Add a filename/dirname pair to a DebugInfo and returns the index
in the fndnpool fixed pool. */
extern UInt ML_(addFnDn) (struct _DebugInfo* di,
const HChar* filename,
const HChar* dirname); /* NULL is allowable */
/* Returns the filename of the fndn pair identified by fndn_ix.
Returns "???" if fndn_ix is 0. */
extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
UInt fndn_ix);
/* Returns the dirname of the fndn pair identified by fndn_ix.
Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
UInt fndn_ix);
/* Returns the fndn_ix for the LineInfo locno in di->loctab.
0 if filename/dirname are unknown. */
extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);
/* Add a line-number record to a DebugInfo.
fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
Give a 0 index for a unknown filename/dirname pair. */
extern
void ML_(addLineInfo) ( struct _DebugInfo* di,
UInt fndn_ix,
Addr this, Addr next, Int lineno, Int entry);
/* Add a call inlined record to a DebugInfo.
A call to the below means that inlinedfn code has been
inlined, resulting in code from [addr_lo, addr_hi[.
Note that addr_hi is excluded, i.e. is not part of the inlined code.
fndn_ix and lineno identifies the location of the call that caused
this inlining.
fndn_ix is an index in di->fndnpool, allocated using ML_(addFnDn).
Give a 0 index for an unknown filename/dirname pair.
In case of nested inlining, a small level indicates the call
is closer to main that a call with a higher level. */
extern
void ML_(addInlInfo) ( struct _DebugInfo* di,
Addr addr_lo, Addr addr_hi,
const HChar* inlinedfn,
UInt fndn_ix,
Int lineno, UShort level);
/* Add a CFI summary record. The supplied DiCfSI_m is copied. */
extern void ML_(addDiCfSI) ( struct _DebugInfo* di,
Addr base, UInt len, DiCfSI_m* cfsi_m );
/* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
the corresponding cfsi_m*. Return NULL if the position corresponds
to a cfsi hole. */
DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);
/* Add a string to the string table of a DebugInfo. If len==-1,
ML_(addStr) will itself measure the length of the string. */
extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );
/* Add a string to the string table of a DebugInfo, by copying the
string from the given DiCursor. Measures the length of the string
itself. */
extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );
extern void ML_(addVar)( struct _DebugInfo* di,
Int level,
Addr aMin,
Addr aMax,
const HChar* name,
UWord typeR, /* a cuOff */
GExpr* gexpr,
GExpr* fbGX, /* SHARED. */
UInt fndn_ix, /* where decl'd - may be zero */
Int lineNo, /* where decl'd - may be zero */
Bool show );
/* Note: fndn_ix identifies a filename/dirname pair similarly to
ML_(addInlInfo) and ML_(addLineInfo). */
/* Canonicalise the tables held by 'di', in preparation for use. Call
this after finishing adding entries to these tables. */
extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );
/* Canonicalise the call-frame-info table held by 'di', in preparation
for use. This is called by ML_(canonicaliseTables) but can also be
called on it's own to sort just this table. */
extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );
/* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
from cfsi_rd array. cfsi_rd is then freed. */
extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );
/* ------ Searching ------ */
/* Find a symbol-table index containing the specified pointer, or -1
if not found. Binary search. */
extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
Bool match_anywhere_in_sym,
Bool findText );
/* Find a location-table index containing the specified pointer, or -1
if not found. Binary search. */
extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr );
/* Find a CFI-table index containing the specified pointer, or -1 if
not found. Binary search. */
extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr );
/* Find a FPO-table index containing the specified pointer, or -1
if not found. Binary search. */
extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );
/* Helper function for the most often needed searching for an rx
mapping containing the specified address range. The range must
fall entirely within the mapping to be considered to be within it.
Asserts if lo > hi; caller must ensure this doesn't happen. */
extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
Addr lo, Addr hi );
/* ------ Misc ------ */
/* Show a non-fatal debug info reading error. Use vg_panic if
terminal. 'serious' errors are always shown, not 'serious' ones
are shown only at verbosity level 2 and above. */
extern
void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );
/* Print a symbol. */
extern void ML_(ppSym) ( Int idx, const DiSym* sym );
/* Print a call-frame-info summary. */
extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
Addr base, UInt len,
const DiCfSI_m* si_m );
#define TRACE_SYMTAB_ENABLED (di->trace_symtab)
#define TRACE_SYMTAB(format, args...) \
if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }
#endif /* ndef __PRIV_STORAGE_H */
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/