
/*--------------------------------------------------------------------*/
/*--- Format-neutral storage of and querying of info acquired from ---*/
/*--- ELF/XCOFF stabs/dwarf1/dwarf2 debug info.                    ---*/
/*---                                               priv_storage.h ---*/
/*--------------------------------------------------------------------*/

/*
   This file is part of Valgrind, a dynamic binary instrumentation
   framework.

   Copyright (C) 2000-2013 Julian Seward 
      jseward@acm.org

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file COPYING.
*/
/*
   Stabs reader greatly improved by Nick Nethercote, Apr 02.
   This module was also extensively hacked on by Jeremy Fitzhardinge
   and Tom Hughes.
*/
/* See comment at top of debuginfo.c for explanation of
   the _svma / _avma / _image / _bias naming scheme.
*/
/* Note this is not freestanding; needs pub_core_xarray.h and
   priv_tytypes.h to be included before it. */

#ifndef __PRIV_STORAGE_H
#define __PRIV_STORAGE_H

#include "pub_core_basics.h"   // Addr
#include "pub_core_xarray.h"   // XArray
#include "pub_core_deduppoolalloc.h" // DedupPoolAlloc
#include "priv_d3basics.h"     // GExpr et al.
#include "priv_image.h"        // DiCursor

/* --------------------- SYMBOLS --------------------- */

/* A structure to hold an ELF/MachO symbol (very crudely).  Usually
   the symbol only has one name, which is stored in ::pri_name, and
   ::sec_names is NULL.  If there are other names, these are stored in
   ::sec_names, which is a NULL terminated vector holding the names.
   The vector is allocated in VG_AR_DINFO, the names themselves live
   in DebugInfo::strpool.

   From the point of view of ELF, the primary vs secondary distinction
   is artificial: they are all just names associated with the address,
   none of which has higher precedence than any other.  However, from
   the point of view of mapping an address to a name to display to the
   user, we need to choose one "preferred" name, and so that might as
   well be installed as the pri_name, whilst all others can live in
   sec_names[].  This has the convenient side effect that, in the
   common case where there is only one name for the address,
   sec_names[] does not need to be allocated.
*/
typedef 
   struct {
      SymAVMAs avmas;    /* Symbol Actual VMAs: lowest address of entity,
                            + platform specific fields, to access with
                            the macros defined in pub_core_debuginfo.h */
      const HChar*  pri_name;  /* primary name, never NULL */
      const HChar** sec_names; /* NULL, or a NULL term'd array of other names */
      // XXX: this could be shrunk (on 32-bit platforms) by using 30
      // bits for the size and 1 bit each for isText and isIFunc.  If you
      // do this, make sure that all assignments to the latter two use
      // 0 or 1 (or True or False), and that a positive number larger
      // than 1 is never used to represent True.
      UInt    size;    /* size in bytes */
      Bool    isText;
      Bool    isIFunc; /* symbol is an indirect function? */
   }
   DiSym;

/* --------------------- SRCLOCS --------------------- */

/* Line count at which overflow happens, due to line numbers being
   stored as shorts in `struct nlist' in a.out.h. */
#define LINENO_OVERFLOW (1 << (sizeof(short) * 8))

#define LINENO_BITS     20
#define LOC_SIZE_BITS  (32 - LINENO_BITS)
#define MAX_LINENO     ((1 << LINENO_BITS) - 1)

/* Unlikely to have any lines with instruction ranges > 4096 bytes */
#define MAX_LOC_SIZE   ((1 << LOC_SIZE_BITS) - 1)

/* Number used to detect line number overflows; if one line is
   60000-odd smaller than the previous, it was probably an overflow.
 */
#define OVERFLOW_DIFFERENCE     (LINENO_OVERFLOW - 5000)

/* Filename and Dirname pair. FnDn are stored in di->fndnpool
   and are allocated using VG_(allocFixedEltDedupPA).
   The filename/dirname strings are themselves stored in di->strpool. */
typedef
   struct {
      const HChar* filename;     /* source filename */
      const HChar* dirname;      /* source directory name */
   } FnDn;

/* A structure to hold addr-to-source info for a single line.  There
  can be a lot of these, hence the dense packing. */
typedef
   struct {
      /* Word 1 */
      Addr   addr;               /* lowest address for this line */
      /* Word 2 */
      UShort size:LOC_SIZE_BITS; /* # bytes; we catch overflows of this */
      UInt   lineno:LINENO_BITS; /* source line number, or zero */
   }
   DiLoc;

#define LEVEL_BITS  (32 - LINENO_BITS)
#define MAX_LEVEL     ((1 << LEVEL_BITS) - 1)

/* A structure to hold addr-to-inlined fn info.  There
   can be a lot of these, hence the dense packing.
   Only caller source filename and lineno are stored.
   Handling dirname should be done using fndn_ix technique
   similar to  ML_(addLineInfo). */
typedef
   struct {
      /* Word 1 */
      Addr   addr_lo;            /* lowest address for inlined fn */
      /* Word 2 */
      Addr   addr_hi;            /* highest address following the inlined fn */
      /* Word 3 */
      const HChar* inlinedfn;    /* inlined function name */
      /* Word 4 and 5 */
      UInt   fndn_ix;            /* index in di->fndnpool of caller source
                                    dirname/filename */
      UInt   lineno:LINENO_BITS; /* caller line number */
      UShort level:LEVEL_BITS;   /* level of inlining */
   }
   DiInlLoc;

/* --------------------- CF INFO --------------------- */

/* DiCfSI: a structure to summarise DWARF2/3 CFA info for the code
   address range [base .. base+len-1].

   On x86 and amd64 ("IA"), if you know ({e,r}sp, {e,r}bp, {e,r}ip) at
   some point and {e,r}ip is in the range [base .. base+len-1], it
   tells you how to calculate ({e,r}sp, {e,r}bp) for the caller of the
   current frame and also ra, the return address of the current frame.

   First off, calculate CFA, the Canonical Frame Address, thusly:

     cfa = case cfa_how of
              CFIC_IA_SPREL -> {e,r}sp + cfa_off
              CFIC_IA_BPREL -> {e,r}bp + cfa_off
              CFIC_EXPR     -> expr whose index is in cfa_off

   Once that is done, the previous frame's {e,r}sp/{e,r}bp values and
   this frame's {e,r}ra value can be calculated like this:

     old_{e,r}sp/{e,r}bp/ra
         = case {e,r}sp/{e,r}bp/ra_how of
              CFIR_UNKNOWN   -> we don't know, sorry
              CFIR_SAME      -> same as it was before (sp/fp only)
              CFIR_CFAREL    -> cfa + sp/bp/ra_off
              CFIR_MEMCFAREL -> *( cfa + sp/bp/ra_off )
              CFIR_EXPR      -> expr whose index is in sp/bp/ra_off

   On ARM it's pretty much the same, except we have more registers to
   keep track of:

     cfa = case cfa_how of
              CFIC_ARM_R13REL -> r13 + cfa_off
              CFIC_ARM_R12REL -> r12 + cfa_off
              CFIC_ARM_R11REL -> r11 + cfa_off
              CFIC_ARM_R7REL  -> r7  + cfa_off
              CFIR_EXPR       -> expr whose index is in cfa_off

     old_r14/r13/r12/r11/r7/ra
         = case r14/r13/r12/r11/r7/ra_how of
              CFIR_UNKNOWN   -> we don't know, sorry
              CFIR_SAME      -> same as it was before (r14/r13/r12/r11/r7 only)
              CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
              CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
              CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off

   On ARM64:

     cfa = case cfa_how of
              CFIC_ARM64_SPREL  -> sp + cfa_off
              CFIC_ARM64_X29REL -> x29 + cfa_off
              CFIC_EXPR         -> expr whose index is in cfa_off

     old_sp/x30/x29/ra
         = case sp/x30/x29/ra_how of
              CFIR_UNKNOWN   -> we don't know, sorry
              CFIR_SAME      -> same as it was before
              CFIR_CFAREL    -> cfa + sp/x30/x29/ra_how
              CFIR_MEMCFAREL -> *( cfa + sp/x30/x29/ra_how )
              CFIR_EXPR      -> expr whose index is in sp/x30/x29/ra_off

   On s390x we have a similar logic as x86 or amd64. We need the stack pointer
   (r15), the frame pointer r11 (like BP) and together with the instruction
   address in the PSW we can calculate the previous values:
     cfa = case cfa_how of
              CFIC_IA_SPREL -> r15 + cfa_off
              CFIC_IA_BPREL -> r11 + cfa_off
              CFIC_EXPR     -> expr whose index is in cfa_off

     old_sp/fp/ra
         = case sp/fp/ra_how of
              CFIR_UNKNOWN   -> we don't know, sorry
              CFIR_SAME      -> same as it was before (sp/fp only)
              CFIR_CFAREL    -> cfa + sp/fp/ra_off
              CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
              CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
*/

#define CFIC_IA_SPREL     ((UChar)1)
#define CFIC_IA_BPREL     ((UChar)2)
#define CFIC_ARM_R13REL   ((UChar)3)
#define CFIC_ARM_R12REL   ((UChar)4)
#define CFIC_ARM_R11REL   ((UChar)5)
#define CFIC_ARM_R7REL    ((UChar)6)
#define CFIC_ARM64_SPREL  ((UChar)7)
#define CFIC_ARM64_X29REL ((UChar)8)
#define CFIC_EXPR         ((UChar)9)  /* all targets */

#define CFIR_UNKNOWN      ((UChar)64)
#define CFIR_SAME         ((UChar)65)
#define CFIR_CFAREL       ((UChar)66)
#define CFIR_MEMCFAREL    ((UChar)67)
#define CFIR_EXPR         ((UChar)68)

/* Definition of the DiCfSI_m DiCfSI machine dependent part.
   These are highly duplicated, and are stored in a pool. */
#if defined(VGA_x86) || defined(VGA_amd64)
typedef
   struct {
      UChar cfa_how; /* a CFIC_IA value */
      UChar ra_how;  /* a CFIR_ value */
      UChar sp_how;  /* a CFIR_ value */
      UChar bp_how;  /* a CFIR_ value */
      Int   cfa_off;
      Int   ra_off;
      Int   sp_off;
      Int   bp_off;
   }
   DiCfSI_m;
#elif defined(VGA_arm)
typedef
   struct {
      UChar cfa_how; /* a CFIC_ value */
      UChar ra_how;  /* a CFIR_ value */
      UChar r14_how; /* a CFIR_ value */
      UChar r13_how; /* a CFIR_ value */
      UChar r12_how; /* a CFIR_ value */
      UChar r11_how; /* a CFIR_ value */
      UChar r7_how;  /* a CFIR_ value */
      Int   cfa_off;
      Int   ra_off;
      Int   r14_off;
      Int   r13_off;
      Int   r12_off;
      Int   r11_off;
      Int   r7_off;
      // If you add additional fields, don't forget to update the
      // initialisation of this in readexidx.c accordingly.
   }
   DiCfSI_m;
#elif defined(VGA_arm64)
typedef
   struct {
      UChar cfa_how; /* a CFIC_ value */
      UChar ra_how;  /* a CFIR_ value */
      UChar sp_how;  /* a CFIR_ value */ /*dw31=SP*/
      UChar x30_how; /* a CFIR_ value */ /*dw30=LR*/
      UChar x29_how; /* a CFIR_ value */ /*dw29=FP*/
      Int   cfa_off;
      Int   ra_off;
      Int   sp_off;
      Int   x30_off;
      Int   x29_off;
   }
   DiCfSI_m;
#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
/* Just have a struct with the common fields in, so that code that
   processes the common fields doesn't have to be ifdef'd against
   VGP_/VGA_ symbols.  These are not used in any way on ppc32/64-linux
   at the moment. */
typedef
   struct {
      UChar cfa_how; /* a CFIC_ value */
      UChar ra_how;  /* a CFIR_ value */
      Int   cfa_off;
      Int   ra_off;
   }
   DiCfSI_m;
#elif defined(VGA_s390x)
typedef
   struct {
      UChar cfa_how; /* a CFIC_ value */
      UChar sp_how;  /* a CFIR_ value */
      UChar ra_how;  /* a CFIR_ value */
      UChar fp_how;  /* a CFIR_ value */
      Int   cfa_off;
      Int   sp_off;
      Int   ra_off;
      Int   fp_off;
   }
   DiCfSI_m;
#elif defined(VGA_mips32) || defined(VGA_mips64)
typedef
   struct {
      UChar cfa_how; /* a CFIC_ value */
      UChar ra_how;  /* a CFIR_ value */
      UChar sp_how;  /* a CFIR_ value */
      UChar fp_how;  /* a CFIR_ value */
      Int   cfa_off;
      Int   ra_off;
      Int   sp_off;
      Int   fp_off;
   }
   DiCfSI_m;
#elif defined(VGA_tilegx)
typedef
   struct {
      UChar cfa_how; /* a CFIC_IA value */
      UChar ra_how;  /* a CFIR_ value */
      UChar sp_how;  /* a CFIR_ value */
      UChar fp_how;  /* a CFIR_ value */
      Int   cfa_off;
      Int   ra_off;
      Int   sp_off;
      Int   fp_off;
   }
   DiCfSI_m;
#else
#  error "Unknown arch"
#endif

typedef
   struct {
      Addr  base;
      UInt  len;
      UInt  cfsi_m_ix;
   }
   DiCfSI;

typedef
   enum {
      Cunop_Abs=0x231,
      Cunop_Neg,
      Cunop_Not
   }
   CfiUnop;

typedef
   enum {
      Cbinop_Add=0x321,
      Cbinop_Sub,
      Cbinop_And,
      Cbinop_Mul,
      Cbinop_Shl,
      Cbinop_Shr,
      Cbinop_Eq,
      Cbinop_Ge,
      Cbinop_Gt,
      Cbinop_Le,
      Cbinop_Lt,
      Cbinop_Ne
   }
   CfiBinop;

typedef
   enum {
      Creg_INVALID=0x213,
      Creg_IA_SP,
      Creg_IA_BP,
      Creg_IA_IP,
      Creg_ARM_R13,
      Creg_ARM_R12,
      Creg_ARM_R15,
      Creg_ARM_R14,
      Creg_ARM_R7,
      Creg_ARM64_X30,
      Creg_S390_IA,
      Creg_S390_SP,
      Creg_S390_FP,
      Creg_S390_LR,
      Creg_MIPS_RA,
      Creg_TILEGX_IP,
      Creg_TILEGX_SP,
      Creg_TILEGX_BP,
      Creg_TILEGX_LR
   }
   CfiReg;

typedef
   enum {
      Cex_Undef=0x123,
      Cex_Deref,
      Cex_Const,
      Cex_Unop,
      Cex_Binop,
      Cex_CfiReg,
      Cex_DwReg
   }
   CfiExprTag;

typedef 
   struct {
      CfiExprTag tag;
      union {
         struct {
         } Undef;
         struct {
            Int ixAddr;
         } Deref;
         struct {
            UWord con;
         } Const;
         struct {
            CfiUnop op;
            Int ix;
         } Unop;
         struct {
            CfiBinop op;
            Int ixL;
            Int ixR;
         } Binop;
         struct {
            CfiReg reg;
         } CfiReg;
         struct {
            Int reg;
         } DwReg;
      }
      Cex;
   }
   CfiExpr;

extern Int ML_(CfiExpr_Undef) ( XArray* dst );
extern Int ML_(CfiExpr_Deref) ( XArray* dst, Int ixAddr );
extern Int ML_(CfiExpr_Const) ( XArray* dst, UWord con );
extern Int ML_(CfiExpr_Unop)  ( XArray* dst, CfiUnop op, Int ix );
extern Int ML_(CfiExpr_Binop) ( XArray* dst, CfiBinop op, Int ixL, Int ixR );
extern Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg );
extern Int ML_(CfiExpr_DwReg) ( XArray* dst, Int reg );

extern void ML_(ppCfiExpr)( const XArray* src, Int ix );

/* ---------------- FPO INFO (Windows PE) -------------- */

/* for apps using Wine: MSVC++ PDB FramePointerOmitted: somewhat like
   a primitive CFI */
typedef
   struct _FPO_DATA {  /* 16 bytes */
      UInt   ulOffStart; /* offset of 1st byte of function code */
      UInt   cbProcSize; /* # bytes in function */
      UInt   cdwLocals;  /* # bytes/4 in locals */
      UShort cdwParams;  /* # bytes/4 in params */
      UChar  cbProlog;   /* # bytes in prolog */
      UChar  cbRegs :3;  /* # regs saved */
      UChar  fHasSEH:1;  /* Structured Exception Handling */
      UChar  fUseBP :1;  /* EBP has been used */
      UChar  reserved:1;
      UChar  cbFrame:2;  /* frame type */
   }
   FPO_DATA;

#define PDB_FRAME_FPO  0
#define PDB_FRAME_TRAP 1
#define PDB_FRAME_TSS  2

/* --------------------- VARIABLES --------------------- */

typedef
   struct {
      Addr    aMin;
      Addr    aMax;
      XArray* /* of DiVariable */ vars;
   }
   DiAddrRange;

typedef
   struct {
      const  HChar* name;  /* in DebugInfo.strpool */
      UWord  typeR; /* a cuOff */
      const GExpr* gexpr; /* on DebugInfo.gexprs list */
      const GExpr* fbGX;  /* SHARED. */
      UInt   fndn_ix; /* where declared; may be zero. index
                         in DebugInfo.fndnpool */
      Int    lineNo;   /* where declared; may be zero. */
   }
   DiVariable;

Word 
ML_(cmp_for_DiAddrRange_range) ( const void* keyV, const void* elemV );

/* --------------------- DEBUGINFO --------------------- */

/* This is the top-level data type.  It's a structure which contains
   information pertaining to one mapped ELF object.  This type is
   exported only abstractly - in pub_tool_debuginfo.h. */

/* First though, here's an auxiliary data structure.  It is only ever
   used as part of a struct _DebugInfo.  We use it to record
   observations about mappings and permission changes to the
   associated file, so as to decide when to read debug info.  It's
   essentially an ultra-trivial finite state machine which, when it
   reaches an accept state, signals that we should now read debug info
   from the object into the associated struct _DebugInfo.  The accept
   state is arrived at when have_rx_map and have_rw_map both become
   true.  The initial state is one in which we have no observations,
   so have_rx_map and have_rw_map are both false.

   This all started as a rather ad-hoc solution, but was further
   expanded to handle weird object layouts, e.g. more than one rw
   or rx mapping for one binary.

   The normal sequence of events is one of

   start  -->  r-x mapping  -->  rw- mapping  -->  accept
   start  -->  rw- mapping  -->  r-x mapping  -->  accept

   that is, take the first r-x and rw- mapping we see, and we're done.

   On MacOSX >= 10.7, 32-bit, there appears to be a new variant:

   start  -->  r-- mapping  -->  rw- mapping  
          -->  upgrade r-- mapping to r-x mapping  -->  accept

   where the upgrade is done by a call to mach_vm_protect (OSX 10.7)
   or kernelrpc_mach_vm_protect_trap (OSX 10.9 and possibly 10.8).
   Hence we need to also track this possibility.

   From perusal of dyld sources, it appears that this scheme could
   also be used 64 bit libraries, although that doesn't seem to happen
   in practice.  dyld uses this scheme when the text section requires
   relocation, which only appears to be the case for 32 bit objects.
*/

typedef struct
{
   Addr  avma; /* these fields record the file offset, length */
   SizeT size; /* and map address of each mapping             */
   OffT  foff;
   Bool  rx, rw, ro;  /* memory access flags for this mapping */
} DebugInfoMapping;

struct _DebugInfoFSM
{
   HChar*  filename;  /* in mallocville (VG_AR_DINFO)               */
   HChar*  dbgname;   /* in mallocville (VG_AR_DINFO)               */
   XArray* maps;      /* XArray of DebugInfoMapping structs         */
   Bool  have_rx_map; /* did we see a r?x mapping yet for the file? */
   Bool  have_rw_map; /* did we see a rw? mapping yet for the file? */
   Bool  have_ro_map; /* did we see a r-- mapping yet for the file? */
};


/* To do with the string table in struct _DebugInfo (::strpool) */
#define SEGINFO_STRPOOLSIZE (64*1024)


/* We may encounter more than one .eh_frame section in an object --
   unusual but apparently allowed by ELF.  See
   http://sourceware.org/bugzilla/show_bug.cgi?id=12675
*/
#define N_EHFRAME_SECTS 2


/* So, the main structure for holding debug info for one object. */

struct _DebugInfo {

   /* Admin stuff */

   struct _DebugInfo* next;   /* list of DebugInfos */
   Bool               mark;   /* marked for deletion? */

   /* An abstract handle, which can be used by entities outside of
      m_debuginfo to (in an abstract datatype sense) refer to this
      struct _DebugInfo.  A .handle of zero is invalid; valid handles
      are 1 and above.  The same handle is never issued twice (in any
      given run of Valgrind), so a handle becomes invalid when the
      associated struct _DebugInfo is discarded, and remains invalid
      forever thereafter.  The .handle field is set as soon as this
      structure is allocated. */
   ULong handle;

   /* Used for debugging only - indicate what stuff to dump whilst
      reading stuff into the seginfo.  Are computed as early in the
      lifetime of the DebugInfo as possible -- at the point when it is
      created.  Use these when deciding what to spew out; do not use
      the global VG_(clo_blah) flags. */

   Bool trace_symtab; /* symbols, our style */
   Bool trace_cfi;    /* dwarf frame unwind, our style */
   Bool ddump_syms;   /* mimic /usr/bin/readelf --syms */
   Bool ddump_line;   /* mimic /usr/bin/readelf --debug-dump=line */
   Bool ddump_frames; /* mimic /usr/bin/readelf --debug-dump=frames */

   /* The "decide when it is time to read debuginfo" state machine.
      This structure must get filled in before we can start reading
      anything from the ELF/MachO file.  This structure is filled in
      by VG_(di_notify_mmap) and its immediate helpers. */
   struct _DebugInfoFSM fsm;

   /* Once the ::fsm has reached an accept state -- typically, when
      both a rw? and r?x mapping for .filename have been observed --
      we can go on to read the symbol tables and debug info.
      .have_dinfo changes from False to True when the debug info has
      been completely read in and postprocessed (canonicalised) and is
      now suitable for querying. */
   /* If have_dinfo is False, then all fields below this point are
      invalid and should not be consulted. */
   Bool  have_dinfo; /* initially False */

   /* All the rest of the fields in this structure are filled in once
      we have committed to reading the symbols and debug info (that
      is, at the point where .have_dinfo is set to True). */

   /* The file's soname. */
   HChar* soname;

   /* Description of some important mapped segments.  The presence or
      absence of the mapping is denoted by the _present field, since
      in some obscure circumstances (to do with data/sdata/bss) it is
      possible for the mapping to be present but have zero size.
      Certainly text_ is mandatory on all platforms; not sure about
      the rest though. 

      --------------------------------------------------------

      Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS: we require that
 
      either (size of all rx maps == 0 && cfsi == NULL) (the degenerate case)

      or the normal case, which is the AND of the following:
      (0) size of at least one rx mapping > 0
      (1) no two DebugInfos with some rx mapping of size > 0 
          have overlapping rx mappings
      (2) [cfsi_minavma,cfsi_maxavma] does not extend beyond
          [avma,+size) of one rx mapping; that is, the former
          is a subrange or equal to the latter.
      (3) all DiCfSI in the cfsi array all have ranges that fall within
          [avma,+size) of that rx mapping.
      (4) all DiCfSI in the cfsi array are non-overlapping

      The cumulative effect of these restrictions is to ensure that
      all the DiCfSI records in the entire system are non overlapping.
      Hence any address falls into either exactly one DiCfSI record,
      or none.  Hence it is safe to cache the results of searches for
      DiCfSI records.  This is the whole point of these restrictions.
      The caching of DiCfSI searches is done in VG_(use_CF_info).  The
      cache is flushed after any change to debugInfo_list.  DiCfSI
      searches are cached because they are central to stack unwinding
      on amd64-linux.

      Where are these invariants imposed and checked?

      They are checked after a successful read of debuginfo into
      a DebugInfo*, in check_CFSI_related_invariants.

      (1) is not really imposed anywhere.  We simply assume that the
      kernel will not map the text segments from two different objects
      into the same space.  Sounds reasonable.

      (2) follows from (4) and (3).  It is ensured by canonicaliseCFI.
      (3) is ensured by ML_(addDiCfSI).
      (4) is ensured by canonicaliseCFI.

      --------------------------------------------------------

      Comment_on_DEBUG_SVMA_and_DEBUG_BIAS_fields:

      The _debug_{svma,bias} fields were added as part of a fix to
      #185816.  The problem encompassed in that bug report was that it
      wasn't correct to use apply the bias values deduced for a
      primary object to its associated debuginfo object, because the
      debuginfo object (or the primary) could have been prelinked to a
      different SVMA.  Hence debuginfo and primary objects need to
      have their own biases.

      ------ JRS: (referring to r9329): ------
      Let me see if I understand the workings correctly.  Initially
      the _debug_ values are set to the same values as the "normal"
      ones, as there's a bunch of bits of code like this (in
      readelf.c)

         di->text_svma = svma;
         ...
         di->text_bias = rx_bias;
         di->text_debug_svma = svma;
         di->text_debug_bias = rx_bias;

      If a debuginfo object subsequently shows up then the
      _debug_svma/bias are set for the debuginfo object.  Result is
      that if there's no debuginfo object then the values are the same
      as the primary-object values, and if there is a debuginfo object
      then they will (or at least may) be different.

      Then when we need to actually bias something, we'll have to
      decide whether to use the primary bias or the debuginfo bias.
      And the strategy is to use the primary bias for ELF symbols but
      the debuginfo bias for anything pulled out of Dwarf.

      ------ THH: ------
      Correct - the debug_svma and bias values apply to any address
      read from the debug data regardless of where that debug data is
      stored and the other values are used for addresses from other
      places (primarily the symbol table).

      ------ JRS: ------ 
      Ok; so this was my only area of concern.  Are there any
      corner-case scenarios where this wouldn't be right?  It sounds
      like we're assuming the ELF symbols come from the primary object
      and, if there is a debug object, then all the Dwarf comes from
      there.  But what if (eg) both symbols and Dwarf come from the
      debug object?  Is that even possible or allowable?

      ------ THH: ------
      You may have a point...

      The current logic is to try and take any one set of data from
      either the base object or the debug object. There are four sets
      of data we consider:

         - Symbol Table
         - Stabs
         - DWARF1
         - DWARF2

      If we see the primary section for a given set in the base object
      then we ignore all sections relating to that set in the debug
      object.

      Now in principle if we saw a secondary section (like debug_line
      say) in the base object, but not the main section (debug_info in
      this case) then we would take debug_info from the debug object
      but would use the debug_line from the base object unless we saw
      a replacement copy in the debug object. That's probably unlikely
      however.

      A bigger issue might be, as you say, the symbol table as we will
      pick that up from the debug object if it isn't in the base. The
      dynamic symbol table will always have to be in the base object
      though so we will have to be careful when processing symbols to
      know which table we are reading in that case.

      What we probably need to do is tell read_elf_symtab which object
      the symbols it is being asked to read came from.

      (A followup patch to deal with this was committed in r9469).
   */
   /* .text */
   Bool     text_present;
   Addr     text_avma;
   Addr     text_svma;
   SizeT    text_size;
   PtrdiffT text_bias;
   Addr     text_debug_svma;
   PtrdiffT text_debug_bias;
   /* .data */
   Bool     data_present;
   Addr     data_svma;
   Addr     data_avma;
   SizeT    data_size;
   PtrdiffT data_bias;
   Addr     data_debug_svma;
   PtrdiffT data_debug_bias;
   /* .sdata */
   Bool     sdata_present;
   Addr     sdata_svma;
   Addr     sdata_avma;
   SizeT    sdata_size;
   PtrdiffT sdata_bias;
   Addr     sdata_debug_svma;
   PtrdiffT sdata_debug_bias;
   /* .rodata */
   Bool     rodata_present;
   Addr     rodata_svma;
   Addr     rodata_avma;
   SizeT    rodata_size;
   PtrdiffT rodata_bias;
   Addr     rodata_debug_svma;
   PtrdiffT rodata_debug_bias;
   /* .bss */
   Bool     bss_present;
   Addr     bss_svma;
   Addr     bss_avma;
   SizeT    bss_size;
   PtrdiffT bss_bias;
   Addr     bss_debug_svma;
   PtrdiffT bss_debug_bias;
   /* .sbss */
   Bool     sbss_present;
   Addr     sbss_svma;
   Addr     sbss_avma;
   SizeT    sbss_size;
   PtrdiffT sbss_bias;
   Addr     sbss_debug_svma;
   PtrdiffT sbss_debug_bias;
   /* .ARM.exidx -- sometimes present on arm32, containing unwind info. */
   Bool     exidx_present;
   Addr     exidx_avma;
   Addr     exidx_svma;
   SizeT    exidx_size;
   PtrdiffT exidx_bias;
   /* .ARM.extab -- sometimes present on arm32, containing unwind info. */
   Bool     extab_present;
   Addr     extab_avma;
   Addr     extab_svma;
   SizeT    extab_size;
   PtrdiffT extab_bias;
   /* .plt */
   Bool   plt_present;
   Addr	  plt_avma;
   SizeT  plt_size;
   /* .got */
   Bool   got_present;
   Addr   got_avma;
   SizeT  got_size;
   /* .got.plt */
   Bool   gotplt_present;
   Addr   gotplt_avma;
   SizeT  gotplt_size;
   /* .opd -- needed on ppc64be-linux for finding symbols */
   Bool   opd_present;
   Addr   opd_avma;
   SizeT  opd_size;
   /* .ehframe -- needed on amd64-linux for stack unwinding.  We might
      see more than one, hence the arrays. */
   UInt   n_ehframe;  /* 0 .. N_EHFRAME_SECTS */
   Addr   ehframe_avma[N_EHFRAME_SECTS];
   SizeT  ehframe_size[N_EHFRAME_SECTS];

   /* Sorted tables of stuff we snarfed from the file.  This is the
      eventual product of reading the debug info.  All this stuff
      lives in VG_AR_DINFO. */

   /* An expandable array of symbols. */
   DiSym*  symtab;
   UWord   symtab_used;
   UWord   symtab_size;
   /* Two expandable arrays, storing locations and their filename/dirname. */
   DiLoc*  loctab;
   UInt    sizeof_fndn_ix;  /* Similar use as sizeof_cfsi_m_ix below. */
   void*   loctab_fndn_ix;  /* loctab[i] filename/dirname is identified by
                               loctab_fnindex_ix[i] (an index in di->fndnpool)
                               0 means filename/dirname unknown.
                               The void* is an UChar* or UShort* or UInt*
                               depending on sizeof_fndn_ix. */
   UWord   loctab_used;
   UWord   loctab_size;
   /* An expandable array of inlined fn info.
      maxinl_codesz is the biggest inlined piece of code
      in inltab (i.e. the max of 'addr_hi - addr_lo'. */
   DiInlLoc* inltab;
   UWord   inltab_used;
   UWord   inltab_size;
   SizeT   maxinl_codesz;

   /* A set of expandable arrays to store CFI summary info records.
      The machine specific information (i.e. the DiCfSI_m struct)
      are stored in cfsi_m_pool, as these are highly duplicated.
      The DiCfSI_m are allocated in cfsi_m_pool and identified using
      a (we hope) small integer : often one byte is enough, sometimes
      2 bytes are needed.

      cfsi_base contains the bases of the code address ranges.
      cfsi_size is the size of the cfsi_base array.
      The elements cfsi_base[0] till cfsi_base[cfsi_used-1] are used.
      Following elements are not used (yet).

      For each base in cfsi_base, an index into cfsi_m_pool is stored
      in cfsi_m_ix array. The size of cfsi_m_ix is equal to
      cfsi_size*sizeof_cfsi_m_ix. The used portion of cfsi_m_ix is
      cfsi_m_ix[0] till cfsi_m_ix[(cfsi_used-1)*sizeof_cfsi_m_ix].

      cfsi_base[i] gives the base address of a code range covered by
      some CF Info. The corresponding CF Info is identified by an index
      in cfsi_m_pool. The DiCfSI_m index in cfsi_m_pool corresponding to
      cfsi_base[i] is given
        by ((UChar*) cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 1
        by ((UShort*)cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 2
        by ((UInt*)  cfsi_m_ix)[i] if sizeof_cfsi_m_ix == 4.

      The end of the code range starting at cfsi_base[i] is given by
      cfsi_base[i+1]-1 (or cfsi_maxavma for  cfsi_base[cfsi_used-1]).
      Some code ranges between cfsi_minavma and cfsi_maxavma might not
      be covered by cfi information. Such not covered ranges are stored by
      a base in cfsi_base and a corresponding 0 index in cfsi_m_ix.

      A variable size representation has been chosen for the elements of
      cfsi_m_ix as in many case, one byte is good enough. For big
      objects, 2 bytes are needed. No object has yet been found where
      4 bytes are needed (but the code is ready to handle this case).
      Not covered ranges ('cfi holes') are stored explicitely in
      cfsi_base/cfsi_m_ix as this is more memory efficient than storing
      a length for each covered range : on x86 or amd64, we typically have
      a hole every 8 covered ranges. On arm64, we have very few holes
      (1 every 50 or 100 ranges).
      
      The cfsi information is read and prepared in the cfsi_rd array.
      Once all the information has been read, the cfsi_base and cfsi_m_ix
      arrays will be filled in from cfsi_rd. cfsi_rd will then be freed.
      This is all done by ML_(finish_CFSI_arrays).

      Also includes summary address bounds, showing the min and max address
      covered by any of the records, as an aid to fast searching.  And, if the
      records require any expression nodes, they are stored in
      cfsi_exprs. */
   Addr* cfsi_base;
   UInt  sizeof_cfsi_m_ix; /* size in byte of indexes stored in cfsi_m_ix. */
   void* cfsi_m_ix; /* Each index occupies sizeof_cfsi_m_ix bytes.
                       The void* is an UChar* or UShort* or UInt*
                       depending on sizeof_cfsi_m_ix.  */

   DiCfSI* cfsi_rd; /* Only used during reading, NULL once info is read. */
                                   
   UWord   cfsi_used;
   UWord   cfsi_size;

   DedupPoolAlloc *cfsi_m_pool;
   Addr    cfsi_minavma;
   Addr    cfsi_maxavma;
   XArray* cfsi_exprs; /* XArray of CfiExpr */

   /* Optimized code under Wine x86: MSVC++ PDB FramePointerOmitted
      data.  Non-expandable array, hence .size == .used. */
   FPO_DATA* fpo;
   UWord     fpo_size;
   Addr      fpo_minavma;
   Addr      fpo_maxavma;
   Addr      fpo_base_avma;

   /* Pool of strings -- the string table.  Pointers
      into this are stable (the memory is not reallocated). */
   DedupPoolAlloc *strpool;

   /* Pool of FnDn -- filename and dirname.
      Elements in the pool are allocated using VG_(allocFixedEltDedupPA). */
   DedupPoolAlloc *fndnpool;

   /* Variable scope information, as harvested from Dwarf3 files.

      In short it's an

         array of (array of PC address ranges and variables)

      The outer array indexes over scopes, with Entry 0 containing
      information on variables which exist for any value of the program
      counter (PC) -- that is, the outermost scope.  Entries 1, 2, 3,
      etc contain information on increasinly deeply nested variables.

      Each inner array is an array of (an address range, and a set
      of variables that are in scope over that address range).  

      The address ranges may not overlap.
 
      Since Entry 0 in the outer array holds information on variables
      that exist for any value of the PC (that is, global vars), it
      follows that Entry 0's inner array can only have one address
      range pair, one that covers the entire address space.
   */
   XArray* /* of OSet of DiAddrRange */varinfo;

   /* These are arrays of the relevant typed objects, held here
      partially for the purposes of visiting each object exactly once
      when we need to delete them. */

   /* An array of TyEnts.  These are needed to make sense of any types
      in the .varinfo.  Also, when deleting this DebugInfo, we must
      first traverse this array and throw away malloc'd stuff hanging
      off it -- by calling ML_(TyEnt__make_EMPTY) on each entry. */
   XArray* /* of TyEnt */ admin_tyents;

   /* An array of guarded DWARF3 expressions. */
   XArray* admin_gexprs;

   /* Cached last rx mapping matched and returned by ML_(find_rx_mapping).
      This helps performance a lot during ML_(addLineInfo) etc., which can
      easily be invoked hundreds of thousands of times. */
   DebugInfoMapping* last_rx_map;
};

/* --------------------- functions --------------------- */

/* ------ Adding ------ */

/* Add a symbol to si's symbol table.  The contents of 'sym' are
   copied.  It is assumed (and checked) that 'sym' only contains one
   name, so there is no auxiliary ::sec_names vector to duplicate.
   IOW, the copy is a shallow copy, and there are assertions in place
   to ensure that's OK. */
extern void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym );

/* Add a filename/dirname pair to a DebugInfo and returns the index
   in the fndnpool fixed pool. */
extern UInt ML_(addFnDn) (struct _DebugInfo* di,
                          const HChar* filename, 
                          const HChar* dirname);  /* NULL is allowable */

/* Returns the filename of the fndn pair identified by fndn_ix.
   Returns "???" if fndn_ix is 0. */
extern const HChar* ML_(fndn_ix2filename) (const DebugInfo* di,
                                           UInt fndn_ix);

/* Returns the dirname of the fndn pair identified by fndn_ix.
   Returns "" if fndn_ix is 0 or fndn->dirname is NULL. */
extern const HChar* ML_(fndn_ix2dirname) (const DebugInfo* di,
                                          UInt fndn_ix);

/* Returns the fndn_ix for the LineInfo locno in di->loctab.
   0 if filename/dirname are unknown. */
extern UInt ML_(fndn_ix) (const DebugInfo* di, Word locno);

/* Add a line-number record to a DebugInfo.
   fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
   Give a 0 index for a unknown filename/dirname pair. */
extern
void ML_(addLineInfo) ( struct _DebugInfo* di, 
                        UInt fndn_ix,
                        Addr this, Addr next, Int lineno, Int entry);

/* Add a call inlined record to a DebugInfo.
   A call to the below means that inlinedfn code has been
   inlined, resulting in code from [addr_lo, addr_hi[.
   Note that addr_hi is excluded, i.e. is not part of the inlined code.
   fndn_ix and lineno identifies the location of the call that caused
   this inlining.
   fndn_ix is an index in di->fndnpool, allocated using  ML_(addFnDn).
   Give a 0 index for an unknown filename/dirname pair.
   In case of nested inlining, a small level indicates the call
   is closer to main that a call with a higher level. */
extern
void ML_(addInlInfo) ( struct _DebugInfo* di, 
                       Addr addr_lo, Addr addr_hi,
                       const HChar* inlinedfn,
                       UInt fndn_ix,
                       Int lineno, UShort level);

/* Add a CFI summary record.  The supplied DiCfSI_m is copied. */
extern void ML_(addDiCfSI) ( struct _DebugInfo* di, 
                             Addr base, UInt len, DiCfSI_m* cfsi_m );

/* Given a position in the di->cfsi_base/cfsi_m_ix arrays, return
   the corresponding cfsi_m*. Return NULL if the position corresponds
   to a cfsi hole. */
DiCfSI_m* ML_(get_cfsi_m) (const DebugInfo* di, UInt pos);

/* Add a string to the string table of a DebugInfo.  If len==-1,
   ML_(addStr) will itself measure the length of the string. */
extern const HChar* ML_(addStr) ( DebugInfo* di, const HChar* str, Int len );

/* Add a string to the string table of a DebugInfo, by copying the
   string from the given DiCursor.  Measures the length of the string
   itself. */
extern const HChar* ML_(addStrFromCursor)( DebugInfo* di, DiCursor c );

extern void ML_(addVar)( struct _DebugInfo* di,
                         Int    level,
                         Addr   aMin,
                         Addr   aMax,
                         const  HChar* name,
                         UWord  typeR, /* a cuOff */
                         const GExpr* gexpr,
                         const GExpr* fbGX, /* SHARED. */
                         UInt   fndn_ix, /* where decl'd - may be zero */
                         Int    lineNo, /* where decl'd - may be zero */
                         Bool   show );
/* Note: fndn_ix identifies a filename/dirname pair similarly to
   ML_(addInlInfo) and ML_(addLineInfo). */

/* Canonicalise the tables held by 'di', in preparation for use.  Call
   this after finishing adding entries to these tables. */
extern void ML_(canonicaliseTables) ( struct _DebugInfo* di );

/* Canonicalise the call-frame-info table held by 'di', in preparation
   for use. This is called by ML_(canonicaliseTables) but can also be
   called on it's own to sort just this table. */
extern void ML_(canonicaliseCFI) ( struct _DebugInfo* di );

/* ML_(finish_CFSI_arrays) fills in the cfsi_base and cfsi_m_ix arrays
   from cfsi_rd array. cfsi_rd is then freed. */
extern void ML_(finish_CFSI_arrays) ( struct _DebugInfo* di );

/* ------ Searching ------ */

/* Find a symbol-table index containing the specified pointer, or -1
   if not found.  Binary search.  */
extern Word ML_(search_one_symtab) ( const DebugInfo* di, Addr ptr,
                                     Bool match_anywhere_in_sym,
                                     Bool findText );

/* Find a location-table index containing the specified pointer, or -1
   if not found.  Binary search.  */
extern Word ML_(search_one_loctab) ( const DebugInfo* di, Addr ptr );

/* Find a CFI-table index containing the specified pointer, or -1 if
   not found.  Binary search.  */
extern Word ML_(search_one_cfitab) ( const DebugInfo* di, Addr ptr );

/* Find a FPO-table index containing the specified pointer, or -1
   if not found.  Binary search.  */
extern Word ML_(search_one_fpotab) ( const DebugInfo* di, Addr ptr );

/* Helper function for the most often needed searching for an rx
   mapping containing the specified address range.  The range must
   fall entirely within the mapping to be considered to be within it.
   Asserts if lo > hi; caller must ensure this doesn't happen. */
extern DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di,
                                                Addr lo, Addr hi );

/* ------ Misc ------ */

/* Show a non-fatal debug info reading error.  Use VG_(core_panic) for
   fatal errors.  'serious' errors are always shown, not 'serious' ones
   are shown only at verbosity level 2 and above. */
extern 
void ML_(symerr) ( const DebugInfo* di, Bool serious, const HChar* msg );

/* Print a symbol. */
extern void ML_(ppSym) ( Int idx, const DiSym* sym );

/* Print a call-frame-info summary. */
extern void ML_(ppDiCfSI) ( const XArray* /* of CfiExpr */ exprs,
                            Addr base, UInt len,
                            const DiCfSI_m* si_m );


#define TRACE_SYMTAB_ENABLED (di->trace_symtab)
#define TRACE_SYMTAB(format, args...) \
   if (TRACE_SYMTAB_ENABLED) { VG_(printf)(format, ## args); }


#endif /* ndef __PRIV_STORAGE_H */

/*--------------------------------------------------------------------*/
/*--- end                                                          ---*/
/*--------------------------------------------------------------------*/
