blob: c6fb0723c08e7ee712a04cb0c535b25fc9ac2298 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- LibHB: a library for implementing and checking ---*/
/*--- the happens-before relationship in concurrent programs. ---*/
/*--- libhb_main.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of LibHB, a library for implementing and checking
the happens-before relationship in concurrent programs.
Copyright (C) 2008-2013 OpenWorks Ltd
info@open-works.co.uk
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#include "pub_tool_basics.h"
#include "pub_tool_poolalloc.h"
#include "pub_tool_libcassert.h"
#include "pub_tool_libcbase.h"
#include "pub_tool_libcprint.h"
#include "pub_tool_mallocfree.h"
#include "pub_tool_wordfm.h"
#include "pub_tool_sparsewa.h"
#include "pub_tool_xarray.h"
#include "pub_tool_oset.h"
#include "pub_tool_threadstate.h"
#include "pub_tool_aspacemgr.h"
#include "pub_tool_execontext.h"
#include "pub_tool_errormgr.h"
#include "pub_tool_options.h" // VG_(clo_stats)
#include "hg_basics.h"
#include "hg_wordset.h"
#include "hg_lock_n_thread.h"
#include "hg_errors.h"
#include "libhb.h"
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// Debugging #defines //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/* Check the sanity of shadow values in the core memory state
machine. Change #if 0 to #if 1 to enable this. */
#if 0
# define CHECK_MSM 1
#else
# define CHECK_MSM 0
#endif
/* Check sanity (reference counts, etc) in the conflicting access
machinery. Change #if 0 to #if 1 to enable this. */
#if 0
# define CHECK_CEM 1
#else
# define CHECK_CEM 0
#endif
/* Check sanity in the compressed shadow memory machinery,
particularly in its caching innards. Unfortunately there's no
almost-zero-cost way to make them selectable at run time. Hence
set the #if 0 to #if 1 and rebuild if you want them. */
#if 0
# define CHECK_ZSM 1 /* do sanity-check CacheLine stuff */
# define inline __attribute__((noinline))
/* probably want to ditch -fomit-frame-pointer too */
#else
# define CHECK_ZSM 0 /* don't sanity-check CacheLine stuff */
#endif
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// data decls: VtsID //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/* VtsIDs: Unique small-integer IDs for VTSs. VtsIDs can't exceed 30
bits, since they have to be packed into the lowest 30 bits of an
SVal. */
typedef UInt VtsID;
#define VtsID_INVALID 0xFFFFFFFF
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// data decls: SVal //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
typedef ULong SVal;
/* This value has special significance to the implementation, and callers
may not store it in the shadow memory. */
#define SVal_INVALID (3ULL << 62)
/* This is the default value for shadow memory. Initially the shadow
memory contains no accessible areas and so all reads produce this
value. TODO: make this caller-defineable. */
#define SVal_NOACCESS (2ULL << 62)
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// data decls: ScalarTS //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/* Scalar Timestamp. We have to store a lot of these, so there is
some effort to make them as small as possible. Logically they are
a pair, (Thr*, ULong), but that takes 16 bytes on a 64-bit target.
We pack it into 64 bits by representing the Thr* using a ThrID, a
small integer (18 bits), and a 46 bit integer for the timestamp
number. The 46/18 split is arbitary, but has the effect that
Helgrind can only handle programs that create 2^18 or fewer threads
over their entire lifetime, and have no more than 2^46 timestamp
ticks (synchronisation operations on the same thread).
This doesn't seem like much of a limitation. 2^46 ticks is
7.06e+13, and if each tick (optimistically) takes the machine 1000
cycles to process, then the minimum time to process that many ticks
at a clock rate of 5 GHz is 162.9 days. And that's doing nothing
but VTS ticks, which isn't realistic.
NB1: SCALARTS_N_THRBITS must be 29 or lower. The obvious limit is
32 since a ThrID is a UInt. 29 comes from the fact that
'Thr_n_RCEC', which records information about old accesses, packs
not only a ThrID but also 2+1 other bits (access size and
writeness) in a UInt, hence limiting size to 32-(2+1) == 29.
NB2: thrid values are issued upwards from 1024, and values less
than that aren't valid. This isn't per se necessary (any order
will do, so long as they are unique), but it does help ensure they
are less likely to get confused with the various other kinds of
small-integer thread ids drifting around (eg, TId). See also NB5.
NB3: this probably also relies on the fact that Thr's are never
deallocated -- they exist forever. Hence the 1-1 mapping from
Thr's to thrid values (set up in Thr__new) persists forever.
NB4: temp_max_sized_VTS is allocated at startup and never freed.
It is a maximum sized VTS, so has (1 << SCALARTS_N_TYMBITS)
ScalarTSs. So we can't make SCALARTS_N_THRBITS too large without
making the memory use for this go sky-high. With
SCALARTS_N_THRBITS at 18, it occupies 2MB of memory, which seems
like an OK tradeoff. If more than 256k threads need to be
supported, we could change SCALARTS_N_THRBITS to 20, which would
facilitate supporting 1 million threads at the cost of 8MB storage
for temp_max_sized_VTS.
NB5: the conflicting-map mechanism (Thr_n_RCEC, specifically) uses
ThrID == 0 to denote an empty Thr_n_RCEC record. So ThrID == 0
must never be a valid ThrID. Given NB2 that's OK.
*/
#define SCALARTS_N_THRBITS 18 /* valid range: 11 to 29 inclusive */
#define SCALARTS_N_TYMBITS (64 - SCALARTS_N_THRBITS)
typedef
struct {
ThrID thrid : SCALARTS_N_THRBITS;
ULong tym : SCALARTS_N_TYMBITS;
}
ScalarTS;
#define ThrID_MAX_VALID ((1 << SCALARTS_N_THRBITS) - 1)
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// data decls: Filter //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// baseline: 5, 9
#define FI_LINE_SZB_LOG2 5
#define FI_NUM_LINES_LOG2 10
#define FI_LINE_SZB (1 << FI_LINE_SZB_LOG2)
#define FI_NUM_LINES (1 << FI_NUM_LINES_LOG2)
#define FI_TAG_MASK (~(Addr)(FI_LINE_SZB - 1))
#define FI_GET_TAG(_a) ((_a) & FI_TAG_MASK)
#define FI_GET_LINENO(_a) ( ((_a) >> FI_LINE_SZB_LOG2) \
& (Addr)(FI_NUM_LINES-1) )
/* In the lines, each 8 bytes are treated individually, and are mapped
to a UShort. Regardless of endianness of the underlying machine,
bits 1 and 0 pertain to the lowest address and bits 15 and 14 to
the highest address.
Of each bit pair, the higher numbered bit is set if a R has been
seen, so the actual layout is:
15 14 ... 01 00
R W for addr+7 ... R W for addr+0
So a mask for the R-bits is 0xAAAA and for the W bits is 0x5555.
*/
/* tags are separated from lines. tags are Addrs and are
the base address of the line. */
typedef
struct {
UShort u16s[FI_LINE_SZB / 8]; /* each UShort covers 8 bytes */
}
FiLine;
typedef
struct {
Addr tags[FI_NUM_LINES];
FiLine lines[FI_NUM_LINES];
}
Filter;
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// data decls: Thr, ULong_n_EC //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// Records stacks for H1 history mechanism (DRD-style)
typedef
struct { ULong ull; ExeContext* ec; }
ULong_n_EC;
/* How many of the above records to collect for each thread? Older
ones are dumped when we run out of space. 62.5k requires 1MB per
thread, since each ULong_n_EC record is 16 bytes long. When more
than N_KWs_N_STACKs_PER_THREAD are present, the older half are
deleted to make space. Hence in the worst case we will be able to
produce a stack at least for the last N_KWs_N_STACKs_PER_THREAD / 2
Kw transitions (segments in this thread). For the current setting
that gives a guaranteed stack for at least the last 31.25k
segments. */
#define N_KWs_N_STACKs_PER_THREAD 62500
struct _Thr {
/* Current VTSs for this thread. They change as we go along. viR
is the VTS to be used for reads, viW for writes. Usually they
are the same, but can differ when we deal with reader-writer
locks. It is always the case that
VtsID__cmpLEQ(viW,viR) == True
that is, viW must be the same, or lagging behind, viR. */
VtsID viR;
VtsID viW;
/* Is initially False, and is set to True after the thread really
has done a low-level exit. When True, we expect to never see
any more memory references done by this thread. */
Bool llexit_done;
/* Is initially False, and is set to True after the thread has been
joined with (reaped by some other thread). After this point, we
do not expect to see any uses of .viR or .viW, so it is safe to
set them to VtsID_INVALID. */
Bool joinedwith_done;
/* A small integer giving a unique identity to this Thr. See
comments on the definition of ScalarTS for details. */
ThrID thrid : SCALARTS_N_THRBITS;
/* A filter that removes references for which we believe that
msmcread/msmcwrite will not change the state, nor report a
race. */
Filter* filter;
/* A pointer back to the top level Thread structure. There is a
1-1 mapping between Thread and Thr structures -- each Thr points
at its corresponding Thread, and vice versa. Really, Thr and
Thread should be merged into a single structure. */
Thread* hgthread;
/* The ULongs (scalar Kws) in this accumulate in strictly
increasing order, without duplicates. This is important because
we need to be able to find a given scalar Kw in this array
later, by binary search. */
XArray* /* ULong_n_EC */ local_Kws_n_stacks;
};
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// data decls: SO //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// (UInt) `echo "Synchronisation object" | md5sum`
#define SO_MAGIC 0x56b3c5b0U
struct _SO {
struct _SO* admin_prev;
struct _SO* admin_next;
VtsID viR; /* r-clock of sender */
VtsID viW; /* w-clock of sender */
UInt magic;
};
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// Forward declarations //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/* fwds for
Globals needed by other parts of the library. These are set
once at startup and then never changed. */
static void (*main_get_stacktrace)( Thr*, Addr*, UWord ) = NULL;
static ExeContext* (*main_get_EC)( Thr* ) = NULL;
/* misc fn and data fwdses */
static void VtsID__rcinc ( VtsID ii );
static void VtsID__rcdec ( VtsID ii );
static inline Bool SVal__isC ( SVal s );
static inline VtsID SVal__unC_Rmin ( SVal s );
static inline VtsID SVal__unC_Wmin ( SVal s );
static inline SVal SVal__mkC ( VtsID rmini, VtsID wmini );
/* A double linked list of all the SO's. */
SO* admin_SO;
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION BEGIN compressed shadow memory //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
#ifndef __HB_ZSM_H
#define __HB_ZSM_H
/* Initialise the library. Once initialised, it will (or may) call
rcinc and rcdec in response to all the calls below, in order to
allow the user to do reference counting on the SVals stored herein.
It is important to understand, however, that due to internal
caching, the reference counts are in general inaccurate, and can be
both above or below the true reference count for an item. In
particular, the library may indicate that the reference count for
an item is zero, when in fact it is not.
To make the reference counting exact and therefore non-pointless,
call zsm_flush_cache. Immediately after it returns, the reference
counts for all items, as deduced by the caller by observing calls
to rcinc and rcdec, will be correct, and so any items with a zero
reference count may be freed (or at least considered to be
unreferenced by this library).
*/
static void zsm_init ( void(*rcinc)(SVal), void(*rcdec)(SVal) );
static void zsm_sset_range ( Addr, SizeT, SVal );
static void zsm_scopy_range ( Addr, Addr, SizeT );
static void zsm_flush_cache ( void );
#endif /* ! __HB_ZSM_H */
/* Round a up to the next multiple of N. N must be a power of 2 */
#define ROUNDUP(a, N) ((a + N - 1) & ~(N-1))
/* Round a down to the next multiple of N. N must be a power of 2 */
#define ROUNDDN(a, N) ((a) & ~(N-1))
/* ------ User-supplied RC functions ------ */
static void(*rcinc)(SVal) = NULL;
static void(*rcdec)(SVal) = NULL;
/* ------ CacheLine ------ */
#define N_LINE_BITS 6 /* must be >= 3 */
#define N_LINE_ARANGE (1 << N_LINE_BITS)
#define N_LINE_TREES (N_LINE_ARANGE >> 3)
typedef
struct {
UShort descrs[N_LINE_TREES];
SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
}
CacheLine;
#define TREE_DESCR_16_0 (1<<0)
#define TREE_DESCR_32_0 (1<<1)
#define TREE_DESCR_16_1 (1<<2)
#define TREE_DESCR_64 (1<<3)
#define TREE_DESCR_16_2 (1<<4)
#define TREE_DESCR_32_1 (1<<5)
#define TREE_DESCR_16_3 (1<<6)
#define TREE_DESCR_8_0 (1<<7)
#define TREE_DESCR_8_1 (1<<8)
#define TREE_DESCR_8_2 (1<<9)
#define TREE_DESCR_8_3 (1<<10)
#define TREE_DESCR_8_4 (1<<11)
#define TREE_DESCR_8_5 (1<<12)
#define TREE_DESCR_8_6 (1<<13)
#define TREE_DESCR_8_7 (1<<14)
#define TREE_DESCR_DTY (1<<15)
typedef
struct {
SVal dict[4]; /* can represent up to 4 diff values in the line */
UChar ix2s[N_LINE_ARANGE/4]; /* array of N_LINE_ARANGE 2-bit
dict indexes */
/* if dict[0] == SVal_INVALID then dict[1] is the index of the
LineF to use, and dict[2..] are also SVal_INVALID. */
}
LineZ; /* compressed rep for a cache line */
typedef
struct {
Bool inUse;
SVal w64s[N_LINE_ARANGE];
}
LineF; /* full rep for a cache line */
/* Shadow memory.
Primary map is a WordFM Addr SecMap*.
SecMaps cover some page-size-ish section of address space and hold
a compressed representation.
CacheLine-sized chunks of SecMaps are copied into a Cache, being
decompressed when moved into the cache and recompressed on the
way out. Because of this, the cache must operate as a writeback
cache, not a writethrough one.
Each SecMap must hold a power-of-2 number of CacheLines. Hence
N_SECMAP_BITS must >= N_LINE_BITS.
*/
#define N_SECMAP_BITS 13
#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
// # CacheLines held by a SecMap
#define N_SECMAP_ZLINES (N_SECMAP_ARANGE / N_LINE_ARANGE)
/* The data in the SecMap is held in the array of LineZs. Each LineZ
either carries the required data directly, in a compressed
representation, or it holds (in .dict[0]) an index to the LineF in
.linesF that holds the full representation.
Currently-unused LineF's have their .inUse bit set to zero.
Since each in-use LineF is referred to be exactly one LineZ,
the number of .linesZ[] that refer to .linesF should equal
the number of .linesF[] that have .inUse == True.
RC obligations: the RCs presented to the user include exactly
the values in:
* direct Z reps, that is, ones for which .dict[0] != SVal_INVALID
* F reps that are in use (.inUse == True)
Hence the following actions at the following transitions are required:
F rep: .inUse==True -> .inUse==False -- rcdec_LineF
F rep: .inUse==False -> .inUse==True -- rcinc_LineF
Z rep: .dict[0] from other to SVal_INVALID -- rcdec_LineZ
Z rep: .dict[0] from SVal_INVALID to other -- rcinc_LineZ
*/
typedef
struct {
UInt magic;
LineZ linesZ[N_SECMAP_ZLINES];
LineF* linesF;
UInt linesF_size;
}
SecMap;
#define SecMap_MAGIC 0x571e58cbU
__attribute__((unused))
static inline Bool is_sane_SecMap ( SecMap* sm ) {
return sm != NULL && sm->magic == SecMap_MAGIC;
}
/* ------ Cache ------ */
#define N_WAY_BITS 16
#define N_WAY_NENT (1 << N_WAY_BITS)
/* Each tag is the address of the associated CacheLine, rounded down
to a CacheLine address boundary. A CacheLine size must be a power
of 2 and must be 8 or more. Hence an easy way to initialise the
cache so it is empty is to set all the tag values to any value % 8
!= 0, eg 1. This means all queries in the cache initially miss.
It does however require us to detect and not writeback, any line
with a bogus tag. */
typedef
struct {
CacheLine lyns0[N_WAY_NENT];
Addr tags0[N_WAY_NENT];
}
Cache;
static inline Bool is_valid_scache_tag ( Addr tag ) {
/* a valid tag should be naturally aligned to the start of
a CacheLine. */
return 0 == (tag & (N_LINE_ARANGE - 1));
}
/* --------- Primary data structures --------- */
/* Shadow memory primary map */
static WordFM* map_shmem = NULL; /* WordFM Addr SecMap* */
static Cache cache_shmem;
static UWord stats__secmaps_search = 0; // # SM finds
static UWord stats__secmaps_search_slow = 0; // # SM lookupFMs
static UWord stats__secmaps_allocd = 0; // # SecMaps issued
static UWord stats__secmap_ga_space_covered = 0; // # ga bytes covered
static UWord stats__secmap_linesZ_allocd = 0; // # LineZ's issued
static UWord stats__secmap_linesZ_bytes = 0; // .. using this much storage
static UWord stats__secmap_linesF_allocd = 0; // # LineF's issued
static UWord stats__secmap_linesF_bytes = 0; // .. using this much storage
static UWord stats__secmap_iterator_steppings = 0; // # calls to stepSMIter
static UWord stats__cache_Z_fetches = 0; // # Z lines fetched
static UWord stats__cache_Z_wbacks = 0; // # Z lines written back
static UWord stats__cache_F_fetches = 0; // # F lines fetched
static UWord stats__cache_F_wbacks = 0; // # F lines written back
static UWord stats__cache_invals = 0; // # cache invals
static UWord stats__cache_flushes = 0; // # cache flushes
static UWord stats__cache_totrefs = 0; // # total accesses
static UWord stats__cache_totmisses = 0; // # misses
static ULong stats__cache_make_New_arange = 0; // total arange made New
static ULong stats__cache_make_New_inZrep = 0; // arange New'd on Z reps
static UWord stats__cline_normalises = 0; // # calls to cacheline_normalise
static UWord stats__cline_cread64s = 0; // # calls to s_m_read64
static UWord stats__cline_cread32s = 0; // # calls to s_m_read32
static UWord stats__cline_cread16s = 0; // # calls to s_m_read16
static UWord stats__cline_cread08s = 0; // # calls to s_m_read8
static UWord stats__cline_cwrite64s = 0; // # calls to s_m_write64
static UWord stats__cline_cwrite32s = 0; // # calls to s_m_write32
static UWord stats__cline_cwrite16s = 0; // # calls to s_m_write16
static UWord stats__cline_cwrite08s = 0; // # calls to s_m_write8
static UWord stats__cline_sread08s = 0; // # calls to s_m_set8
static UWord stats__cline_swrite08s = 0; // # calls to s_m_get8
static UWord stats__cline_swrite16s = 0; // # calls to s_m_get8
static UWord stats__cline_swrite32s = 0; // # calls to s_m_get8
static UWord stats__cline_swrite64s = 0; // # calls to s_m_get8
static UWord stats__cline_scopy08s = 0; // # calls to s_m_copy8
static UWord stats__cline_64to32splits = 0; // # 64-bit accesses split
static UWord stats__cline_32to16splits = 0; // # 32-bit accesses split
static UWord stats__cline_16to8splits = 0; // # 16-bit accesses split
static UWord stats__cline_64to32pulldown = 0; // # calls to pulldown_to_32
static UWord stats__cline_32to16pulldown = 0; // # calls to pulldown_to_16
static UWord stats__cline_16to8pulldown = 0; // # calls to pulldown_to_8
static UWord stats__vts__tick = 0; // # calls to VTS__tick
static UWord stats__vts__join = 0; // # calls to VTS__join
static UWord stats__vts__cmpLEQ = 0; // # calls to VTS__cmpLEQ
static UWord stats__vts__cmp_structural = 0; // # calls to VTS__cmp_structural
// # calls to VTS__cmp_structural w/ slow case
static UWord stats__vts__cmp_structural_slow = 0;
// # calls to VTS__indexAt_SLOW
static UWord stats__vts__indexat_slow = 0;
// # calls to vts_set__find__or__clone_and_add
static UWord stats__vts_set__focaa = 0;
// # calls to vts_set__find__or__clone_and_add that lead to an
// allocation
static UWord stats__vts_set__focaa_a = 0;
static inline Addr shmem__round_to_SecMap_base ( Addr a ) {
return a & ~(N_SECMAP_ARANGE - 1);
}
static inline UWord shmem__get_SecMap_offset ( Addr a ) {
return a & (N_SECMAP_ARANGE - 1);
}
/*----------------------------------------------------------------*/
/*--- map_shmem :: WordFM Addr SecMap ---*/
/*--- shadow memory (low level handlers) (shmem__* fns) ---*/
/*----------------------------------------------------------------*/
/*--------------- SecMap allocation --------------- */
static HChar* shmem__bigchunk_next = NULL;
static HChar* shmem__bigchunk_end1 = NULL;
static void* shmem__bigchunk_alloc ( SizeT n )
{
const SizeT sHMEM__BIGCHUNK_SIZE = 4096 * 256 * 4;
tl_assert(n > 0);
n = VG_ROUNDUP(n, 16);
tl_assert(shmem__bigchunk_next <= shmem__bigchunk_end1);
tl_assert(shmem__bigchunk_end1 - shmem__bigchunk_next
<= (SSizeT)sHMEM__BIGCHUNK_SIZE);
if (shmem__bigchunk_next + n > shmem__bigchunk_end1) {
if (0)
VG_(printf)("XXXXX bigchunk: abandoning %d bytes\n",
(Int)(shmem__bigchunk_end1 - shmem__bigchunk_next));
shmem__bigchunk_next = VG_(am_shadow_alloc)( sHMEM__BIGCHUNK_SIZE );
if (shmem__bigchunk_next == NULL)
VG_(out_of_memory_NORETURN)(
"helgrind:shmem__bigchunk_alloc", sHMEM__BIGCHUNK_SIZE );
shmem__bigchunk_end1 = shmem__bigchunk_next + sHMEM__BIGCHUNK_SIZE;
}
tl_assert(shmem__bigchunk_next);
tl_assert( 0 == (((Addr)shmem__bigchunk_next) & (16-1)) );
tl_assert(shmem__bigchunk_next + n <= shmem__bigchunk_end1);
shmem__bigchunk_next += n;
return shmem__bigchunk_next - n;
}
static SecMap* shmem__alloc_SecMap ( void )
{
Word i, j;
SecMap* sm = shmem__bigchunk_alloc( sizeof(SecMap) );
if (0) VG_(printf)("alloc_SecMap %p\n",sm);
tl_assert(sm);
sm->magic = SecMap_MAGIC;
for (i = 0; i < N_SECMAP_ZLINES; i++) {
sm->linesZ[i].dict[0] = SVal_NOACCESS;
sm->linesZ[i].dict[1] = SVal_INVALID;
sm->linesZ[i].dict[2] = SVal_INVALID;
sm->linesZ[i].dict[3] = SVal_INVALID;
for (j = 0; j < N_LINE_ARANGE/4; j++)
sm->linesZ[i].ix2s[j] = 0; /* all reference dict[0] */
}
sm->linesF = NULL;
sm->linesF_size = 0;
stats__secmaps_allocd++;
stats__secmap_ga_space_covered += N_SECMAP_ARANGE;
stats__secmap_linesZ_allocd += N_SECMAP_ZLINES;
stats__secmap_linesZ_bytes += N_SECMAP_ZLINES * sizeof(LineZ);
return sm;
}
typedef struct { Addr gaKey; SecMap* sm; } SMCacheEnt;
static SMCacheEnt smCache[3] = { {1,NULL}, {1,NULL}, {1,NULL} };
static SecMap* shmem__find_SecMap ( Addr ga )
{
SecMap* sm = NULL;
Addr gaKey = shmem__round_to_SecMap_base(ga);
// Cache
stats__secmaps_search++;
if (LIKELY(gaKey == smCache[0].gaKey))
return smCache[0].sm;
if (LIKELY(gaKey == smCache[1].gaKey)) {
SMCacheEnt tmp = smCache[0];
smCache[0] = smCache[1];
smCache[1] = tmp;
return smCache[0].sm;
}
if (gaKey == smCache[2].gaKey) {
SMCacheEnt tmp = smCache[1];
smCache[1] = smCache[2];
smCache[2] = tmp;
return smCache[1].sm;
}
// end Cache
stats__secmaps_search_slow++;
if (VG_(lookupFM)( map_shmem,
NULL/*keyP*/, (UWord*)&sm, (UWord)gaKey )) {
tl_assert(sm != NULL);
smCache[2] = smCache[1];
smCache[1] = smCache[0];
smCache[0].gaKey = gaKey;
smCache[0].sm = sm;
} else {
tl_assert(sm == NULL);
}
return sm;
}
static SecMap* shmem__find_or_alloc_SecMap ( Addr ga )
{
SecMap* sm = shmem__find_SecMap ( ga );
if (LIKELY(sm)) {
return sm;
} else {
/* create a new one */
Addr gaKey = shmem__round_to_SecMap_base(ga);
sm = shmem__alloc_SecMap();
tl_assert(sm);
VG_(addToFM)( map_shmem, (UWord)gaKey, (UWord)sm );
return sm;
}
}
/* ------------ LineF and LineZ related ------------ */
static void rcinc_LineF ( LineF* lineF ) {
UWord i;
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_ARANGE; i++)
rcinc(lineF->w64s[i]);
}
static void rcdec_LineF ( LineF* lineF ) {
UWord i;
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_ARANGE; i++)
rcdec(lineF->w64s[i]);
}
static void rcinc_LineZ ( LineZ* lineZ ) {
tl_assert(lineZ->dict[0] != SVal_INVALID);
rcinc(lineZ->dict[0]);
if (lineZ->dict[1] != SVal_INVALID) rcinc(lineZ->dict[1]);
if (lineZ->dict[2] != SVal_INVALID) rcinc(lineZ->dict[2]);
if (lineZ->dict[3] != SVal_INVALID) rcinc(lineZ->dict[3]);
}
static void rcdec_LineZ ( LineZ* lineZ ) {
tl_assert(lineZ->dict[0] != SVal_INVALID);
rcdec(lineZ->dict[0]);
if (lineZ->dict[1] != SVal_INVALID) rcdec(lineZ->dict[1]);
if (lineZ->dict[2] != SVal_INVALID) rcdec(lineZ->dict[2]);
if (lineZ->dict[3] != SVal_INVALID) rcdec(lineZ->dict[3]);
}
inline
static void write_twobit_array ( UChar* arr, UWord ix, UWord b2 ) {
Word bix, shft, mask, prep;
tl_assert(ix >= 0);
bix = ix >> 2;
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
mask = 3 << shft;
prep = b2 << shft;
arr[bix] = (arr[bix] & ~mask) | prep;
}
inline
static UWord read_twobit_array ( UChar* arr, UWord ix ) {
Word bix, shft;
tl_assert(ix >= 0);
bix = ix >> 2;
shft = 2 * (ix & 3); /* 0, 2, 4 or 6 */
return (arr[bix] >> shft) & 3;
}
/* Given address 'tag', find either the Z or F line containing relevant
data, so it can be read into the cache.
*/
static void find_ZF_for_reading ( /*OUT*/LineZ** zp,
/*OUT*/LineF** fp, Addr tag ) {
LineZ* lineZ;
LineF* lineF;
UWord zix;
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
UWord smoff = shmem__get_SecMap_offset(tag);
/* since smoff is derived from a valid tag, it should be
cacheline-aligned. */
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
zix = smoff >> N_LINE_BITS;
tl_assert(zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
lineF = NULL;
if (lineZ->dict[0] == SVal_INVALID) {
UInt fix = (UInt)lineZ->dict[1];
tl_assert(sm->linesF);
tl_assert(sm->linesF_size > 0);
tl_assert(fix >= 0 && fix < sm->linesF_size);
lineF = &sm->linesF[fix];
tl_assert(lineF->inUse);
lineZ = NULL;
}
*zp = lineZ;
*fp = lineF;
}
/* Given address 'tag', return the relevant SecMap and the index of
the LineZ within it, in the expectation that the line is to be
overwritten. Regardless of whether 'tag' is currently associated
with a Z or F representation, to rcdec on the current
representation, in recognition of the fact that the contents are
just about to be overwritten. */
static __attribute__((noinline))
void find_Z_for_writing ( /*OUT*/SecMap** smp,
/*OUT*/Word* zixp,
Addr tag ) {
LineZ* lineZ;
LineF* lineF;
UWord zix;
SecMap* sm = shmem__find_or_alloc_SecMap(tag);
UWord smoff = shmem__get_SecMap_offset(tag);
/* since smoff is derived from a valid tag, it should be
cacheline-aligned. */
tl_assert(0 == (smoff & (N_LINE_ARANGE - 1)));
zix = smoff >> N_LINE_BITS;
tl_assert(zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
lineF = NULL;
/* re RCs, we are freeing up this LineZ/LineF so that new data can
be parked in it. Hence have to rcdec it accordingly. */
/* If lineZ has an associated lineF, free it up. */
if (lineZ->dict[0] == SVal_INVALID) {
UInt fix = (UInt)lineZ->dict[1];
tl_assert(sm->linesF);
tl_assert(sm->linesF_size > 0);
tl_assert(fix >= 0 && fix < sm->linesF_size);
lineF = &sm->linesF[fix];
tl_assert(lineF->inUse);
rcdec_LineF(lineF);
lineF->inUse = False;
} else {
rcdec_LineZ(lineZ);
}
*smp = sm;
*zixp = zix;
}
static __attribute__((noinline))
void alloc_F_for_writing ( /*MOD*/SecMap* sm, /*OUT*/Word* fixp ) {
UInt i, new_size;
LineF* nyu;
if (sm->linesF) {
tl_assert(sm->linesF_size > 0);
} else {
tl_assert(sm->linesF_size == 0);
}
if (sm->linesF) {
for (i = 0; i < sm->linesF_size; i++) {
if (!sm->linesF[i].inUse) {
*fixp = (Word)i;
return;
}
}
}
/* No free F line found. Expand existing array and try again. */
new_size = sm->linesF_size==0 ? 1 : 2 * sm->linesF_size;
nyu = HG_(zalloc)( "libhb.aFfw.1 (LineF storage)",
new_size * sizeof(LineF) );
stats__secmap_linesF_allocd += (new_size - sm->linesF_size);
stats__secmap_linesF_bytes += (new_size - sm->linesF_size)
* sizeof(LineF);
if (0)
VG_(printf)("SM %p: expand F array from %d to %d\n",
sm, (Int)sm->linesF_size, new_size);
for (i = 0; i < new_size; i++)
nyu[i].inUse = False;
if (sm->linesF) {
for (i = 0; i < sm->linesF_size; i++) {
tl_assert(sm->linesF[i].inUse);
nyu[i] = sm->linesF[i];
}
VG_(memset)(sm->linesF, 0, sm->linesF_size * sizeof(LineF) );
HG_(free)(sm->linesF);
}
sm->linesF = nyu;
sm->linesF_size = new_size;
for (i = 0; i < sm->linesF_size; i++) {
if (!sm->linesF[i].inUse) {
*fixp = (Word)i;
return;
}
}
/*NOTREACHED*/
tl_assert(0);
}
/* ------------ CacheLine and implicit-tree related ------------ */
__attribute__((unused))
static void pp_CacheLine ( CacheLine* cl ) {
Word i;
if (!cl) {
VG_(printf)("%s","pp_CacheLine(NULL)\n");
return;
}
for (i = 0; i < N_LINE_TREES; i++)
VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
for (i = 0; i < N_LINE_ARANGE; i++)
VG_(printf)(" sval: %08lx\n", (UWord)cl->svals[i]);
}
static UChar descr_to_validbits ( UShort descr )
{
/* a.k.a Party Time for gcc's constant folder */
# define DESCR(b8_7, b8_6, b8_5, b8_4, b8_3, b8_2, b8_1, b8_0, \
b16_3, b32_1, b16_2, b64, b16_1, b32_0, b16_0) \
( (UShort) ( ( (b8_7) << 14) | ( (b8_6) << 13) | \
( (b8_5) << 12) | ( (b8_4) << 11) | \
( (b8_3) << 10) | ( (b8_2) << 9) | \
( (b8_1) << 8) | ( (b8_0) << 7) | \
( (b16_3) << 6) | ( (b32_1) << 5) | \
( (b16_2) << 4) | ( (b64) << 3) | \
( (b16_1) << 2) | ( (b32_0) << 1) | \
( (b16_0) << 0) ) )
# define BYTE(bit7, bit6, bit5, bit4, bit3, bit2, bit1, bit0) \
( (UChar) ( ( (bit7) << 7) | ( (bit6) << 6) | \
( (bit5) << 5) | ( (bit4) << 4) | \
( (bit3) << 3) | ( (bit2) << 2) | \
( (bit1) << 1) | ( (bit0) << 0) ) )
/* these should all get folded out at compile time */
tl_assert(DESCR(1,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_7);
tl_assert(DESCR(0,0,0,0,0,0,0,1, 0,0,0, 0, 0,0,0) == TREE_DESCR_8_0);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 1,0,0, 0, 0,0,0) == TREE_DESCR_16_3);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,0,0) == TREE_DESCR_32_1);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,1, 0, 0,0,0) == TREE_DESCR_16_2);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0) == TREE_DESCR_64);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 1,0,0) == TREE_DESCR_16_1);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,1,0) == TREE_DESCR_32_0);
tl_assert(DESCR(0,0,0,0,0,0,0,0, 0,0,0, 0, 0,0,1) == TREE_DESCR_16_0);
switch (descr) {
/*
+--------------------------------- TREE_DESCR_8_7
| +------------------- TREE_DESCR_8_0
| | +---------------- TREE_DESCR_16_3
| | | +-------------- TREE_DESCR_32_1
| | | | +------------ TREE_DESCR_16_2
| | | | | +--------- TREE_DESCR_64
| | | | | | +------ TREE_DESCR_16_1
| | | | | | | +---- TREE_DESCR_32_0
| | | | | | | | +-- TREE_DESCR_16_0
| | | | | | | | |
| | | | | | | | | GRANULARITY, 7 -> 0 */
case DESCR(1,1,1,1,1,1,1,1, 0,0,0, 0, 0,0,0): /* 8 8 8 8 8 8 8 8 */
return BYTE(1,1,1,1,1,1,1,1);
case DESCR(1,1,0,0,1,1,1,1, 0,0,1, 0, 0,0,0): /* 8 8 16 8 8 8 8 */
return BYTE(1,1,0,1,1,1,1,1);
case DESCR(0,0,1,1,1,1,1,1, 1,0,0, 0, 0,0,0): /* 16 8 8 8 8 8 8 */
return BYTE(0,1,1,1,1,1,1,1);
case DESCR(0,0,0,0,1,1,1,1, 1,0,1, 0, 0,0,0): /* 16 16 8 8 8 8 */
return BYTE(0,1,0,1,1,1,1,1);
case DESCR(1,1,1,1,1,1,0,0, 0,0,0, 0, 0,0,1): /* 8 8 8 8 8 8 16 */
return BYTE(1,1,1,1,1,1,0,1);
case DESCR(1,1,0,0,1,1,0,0, 0,0,1, 0, 0,0,1): /* 8 8 16 8 8 16 */
return BYTE(1,1,0,1,1,1,0,1);
case DESCR(0,0,1,1,1,1,0,0, 1,0,0, 0, 0,0,1): /* 16 8 8 8 8 16 */
return BYTE(0,1,1,1,1,1,0,1);
case DESCR(0,0,0,0,1,1,0,0, 1,0,1, 0, 0,0,1): /* 16 16 8 8 16 */
return BYTE(0,1,0,1,1,1,0,1);
case DESCR(1,1,1,1,0,0,1,1, 0,0,0, 0, 1,0,0): /* 8 8 8 8 16 8 8 */
return BYTE(1,1,1,1,0,1,1,1);
case DESCR(1,1,0,0,0,0,1,1, 0,0,1, 0, 1,0,0): /* 8 8 16 16 8 8 */
return BYTE(1,1,0,1,0,1,1,1);
case DESCR(0,0,1,1,0,0,1,1, 1,0,0, 0, 1,0,0): /* 16 8 8 16 8 8 */
return BYTE(0,1,1,1,0,1,1,1);
case DESCR(0,0,0,0,0,0,1,1, 1,0,1, 0, 1,0,0): /* 16 16 16 8 8 */
return BYTE(0,1,0,1,0,1,1,1);
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 1,0,1): /* 8 8 8 8 16 16 */
return BYTE(1,1,1,1,0,1,0,1);
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 1,0,1): /* 8 8 16 16 16 */
return BYTE(1,1,0,1,0,1,0,1);
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 1,0,1): /* 16 8 8 16 16 */
return BYTE(0,1,1,1,0,1,0,1);
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 1,0,1): /* 16 16 16 16 */
return BYTE(0,1,0,1,0,1,0,1);
case DESCR(0,0,0,0,1,1,1,1, 0,1,0, 0, 0,0,0): /* 32 8 8 8 8 */
return BYTE(0,0,0,1,1,1,1,1);
case DESCR(0,0,0,0,1,1,0,0, 0,1,0, 0, 0,0,1): /* 32 8 8 16 */
return BYTE(0,0,0,1,1,1,0,1);
case DESCR(0,0,0,0,0,0,1,1, 0,1,0, 0, 1,0,0): /* 32 16 8 8 */
return BYTE(0,0,0,1,0,1,1,1);
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 1,0,1): /* 32 16 16 */
return BYTE(0,0,0,1,0,1,0,1);
case DESCR(1,1,1,1,0,0,0,0, 0,0,0, 0, 0,1,0): /* 8 8 8 8 32 */
return BYTE(1,1,1,1,0,0,0,1);
case DESCR(1,1,0,0,0,0,0,0, 0,0,1, 0, 0,1,0): /* 8 8 16 32 */
return BYTE(1,1,0,1,0,0,0,1);
case DESCR(0,0,1,1,0,0,0,0, 1,0,0, 0, 0,1,0): /* 16 8 8 32 */
return BYTE(0,1,1,1,0,0,0,1);
case DESCR(0,0,0,0,0,0,0,0, 1,0,1, 0, 0,1,0): /* 16 16 32 */
return BYTE(0,1,0,1,0,0,0,1);
case DESCR(0,0,0,0,0,0,0,0, 0,1,0, 0, 0,1,0): /* 32 32 */
return BYTE(0,0,0,1,0,0,0,1);
case DESCR(0,0,0,0,0,0,0,0, 0,0,0, 1, 0,0,0): /* 64 */
return BYTE(0,0,0,0,0,0,0,1);
default: return BYTE(0,0,0,0,0,0,0,0);
/* INVALID - any valid descr produces at least one
valid bit in tree[0..7]*/
}
/* NOTREACHED*/
tl_assert(0);
# undef DESCR
# undef BYTE
}
__attribute__((unused))
static Bool is_sane_Descr ( UShort descr ) {
return descr_to_validbits(descr) != 0;
}
static void sprintf_Descr ( /*OUT*/HChar* dst, UShort descr ) {
VG_(sprintf)(dst,
"%d%d%d%d%d%d%d%d %d%d%d %d %d%d%d",
(Int)((descr & TREE_DESCR_8_7) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_6) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_5) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_4) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_3) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_2) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_1) ? 1 : 0),
(Int)((descr & TREE_DESCR_8_0) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_3) ? 1 : 0),
(Int)((descr & TREE_DESCR_32_1) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_2) ? 1 : 0),
(Int)((descr & TREE_DESCR_64) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_1) ? 1 : 0),
(Int)((descr & TREE_DESCR_32_0) ? 1 : 0),
(Int)((descr & TREE_DESCR_16_0) ? 1 : 0)
);
}
static void sprintf_Byte ( /*OUT*/HChar* dst, UChar byte ) {
VG_(sprintf)(dst, "%d%d%d%d%d%d%d%d",
(Int)((byte & 128) ? 1 : 0),
(Int)((byte & 64) ? 1 : 0),
(Int)((byte & 32) ? 1 : 0),
(Int)((byte & 16) ? 1 : 0),
(Int)((byte & 8) ? 1 : 0),
(Int)((byte & 4) ? 1 : 0),
(Int)((byte & 2) ? 1 : 0),
(Int)((byte & 1) ? 1 : 0)
);
}
static Bool is_sane_Descr_and_Tree ( UShort descr, SVal* tree ) {
Word i;
UChar validbits = descr_to_validbits(descr);
HChar buf[128], buf2[128]; // large enough
if (validbits == 0)
goto bad;
for (i = 0; i < 8; i++) {
if (validbits & (1<<i)) {
if (tree[i] == SVal_INVALID)
goto bad;
} else {
if (tree[i] != SVal_INVALID)
goto bad;
}
}
return True;
bad:
sprintf_Descr( buf, descr );
sprintf_Byte( buf2, validbits );
VG_(printf)("%s","is_sane_Descr_and_Tree: bad tree {\n");
VG_(printf)(" validbits 0x%02lx %s\n", (UWord)validbits, buf2);
VG_(printf)(" descr 0x%04lx %s\n", (UWord)descr, buf);
for (i = 0; i < 8; i++)
VG_(printf)(" [%ld] 0x%016llx\n", i, tree[i]);
VG_(printf)("%s","}\n");
return 0;
}
static Bool is_sane_CacheLine ( CacheLine* cl )
{
Word tno, cloff;
if (!cl) goto bad;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
UShort descr = cl->descrs[tno];
SVal* tree = &cl->svals[cloff];
if (!is_sane_Descr_and_Tree(descr, tree))
goto bad;
}
tl_assert(cloff == N_LINE_ARANGE);
return True;
bad:
pp_CacheLine(cl);
return False;
}
static UShort normalise_tree ( /*MOD*/SVal* tree )
{
UShort descr;
/* pre: incoming tree[0..7] does not have any invalid shvals, in
particular no zeroes. */
if (UNLIKELY(tree[7] == SVal_INVALID || tree[6] == SVal_INVALID
|| tree[5] == SVal_INVALID || tree[4] == SVal_INVALID
|| tree[3] == SVal_INVALID || tree[2] == SVal_INVALID
|| tree[1] == SVal_INVALID || tree[0] == SVal_INVALID))
tl_assert(0);
descr = TREE_DESCR_8_7 | TREE_DESCR_8_6 | TREE_DESCR_8_5
| TREE_DESCR_8_4 | TREE_DESCR_8_3 | TREE_DESCR_8_2
| TREE_DESCR_8_1 | TREE_DESCR_8_0;
/* build 16-bit layer */
if (tree[1] == tree[0]) {
tree[1] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_1 | TREE_DESCR_8_0);
descr |= TREE_DESCR_16_0;
}
if (tree[3] == tree[2]) {
tree[3] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_3 | TREE_DESCR_8_2);
descr |= TREE_DESCR_16_1;
}
if (tree[5] == tree[4]) {
tree[5] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_5 | TREE_DESCR_8_4);
descr |= TREE_DESCR_16_2;
}
if (tree[7] == tree[6]) {
tree[7] = SVal_INVALID;
descr &= ~(TREE_DESCR_8_7 | TREE_DESCR_8_6);
descr |= TREE_DESCR_16_3;
}
/* build 32-bit layer */
if (tree[2] == tree[0]
&& (descr & TREE_DESCR_16_1) && (descr & TREE_DESCR_16_0)) {
tree[2] = SVal_INVALID; /* [3,1] must already be SVal_INVALID */
descr &= ~(TREE_DESCR_16_1 | TREE_DESCR_16_0);
descr |= TREE_DESCR_32_0;
}
if (tree[6] == tree[4]
&& (descr & TREE_DESCR_16_3) && (descr & TREE_DESCR_16_2)) {
tree[6] = SVal_INVALID; /* [7,5] must already be SVal_INVALID */
descr &= ~(TREE_DESCR_16_3 | TREE_DESCR_16_2);
descr |= TREE_DESCR_32_1;
}
/* build 64-bit layer */
if (tree[4] == tree[0]
&& (descr & TREE_DESCR_32_1) && (descr & TREE_DESCR_32_0)) {
tree[4] = SVal_INVALID; /* [7,6,5,3,2,1] must already be SVal_INVALID */
descr &= ~(TREE_DESCR_32_1 | TREE_DESCR_32_0);
descr |= TREE_DESCR_64;
}
return descr;
}
/* This takes a cacheline where all the data is at the leaves
(w8[..]) and builds a correctly normalised tree. */
static void normalise_CacheLine ( /*MOD*/CacheLine* cl )
{
Word tno, cloff;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
SVal* tree = &cl->svals[cloff];
cl->descrs[tno] = normalise_tree( tree );
}
tl_assert(cloff == N_LINE_ARANGE);
if (CHECK_ZSM)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
stats__cline_normalises++;
}
typedef struct { UChar count; SVal sval; } CountedSVal;
static
void sequentialise_CacheLine ( /*OUT*/CountedSVal* dst,
/*OUT*/Word* dstUsedP,
Word nDst, CacheLine* src )
{
Word tno, cloff, dstUsed;
tl_assert(nDst == N_LINE_ARANGE);
dstUsed = 0;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
UShort descr = src->descrs[tno];
SVal* tree = &src->svals[cloff];
/* sequentialise the tree described by (descr,tree). */
# define PUT(_n,_v) \
do { dst[dstUsed ].count = (_n); \
dst[dstUsed++].sval = (_v); \
} while (0)
/* byte 0 */
if (descr & TREE_DESCR_64) PUT(8, tree[0]); else
if (descr & TREE_DESCR_32_0) PUT(4, tree[0]); else
if (descr & TREE_DESCR_16_0) PUT(2, tree[0]); else
if (descr & TREE_DESCR_8_0) PUT(1, tree[0]);
/* byte 1 */
if (descr & TREE_DESCR_8_1) PUT(1, tree[1]);
/* byte 2 */
if (descr & TREE_DESCR_16_1) PUT(2, tree[2]); else
if (descr & TREE_DESCR_8_2) PUT(1, tree[2]);
/* byte 3 */
if (descr & TREE_DESCR_8_3) PUT(1, tree[3]);
/* byte 4 */
if (descr & TREE_DESCR_32_1) PUT(4, tree[4]); else
if (descr & TREE_DESCR_16_2) PUT(2, tree[4]); else
if (descr & TREE_DESCR_8_4) PUT(1, tree[4]);
/* byte 5 */
if (descr & TREE_DESCR_8_5) PUT(1, tree[5]);
/* byte 6 */
if (descr & TREE_DESCR_16_3) PUT(2, tree[6]); else
if (descr & TREE_DESCR_8_6) PUT(1, tree[6]);
/* byte 7 */
if (descr & TREE_DESCR_8_7) PUT(1, tree[7]);
# undef PUT
/* END sequentialise the tree described by (descr,tree). */
}
tl_assert(cloff == N_LINE_ARANGE);
tl_assert(dstUsed <= nDst);
*dstUsedP = dstUsed;
}
/* Write the cacheline 'wix' to backing store. Where it ends up
is determined by its tag field. */
static __attribute__((noinline)) void cacheline_wback ( UWord wix )
{
Word i, j, k, m;
Addr tag;
SecMap* sm;
CacheLine* cl;
LineZ* lineZ;
LineF* lineF;
Word zix, fix, csvalsUsed;
CountedSVal csvals[N_LINE_ARANGE];
SVal sv;
if (0)
VG_(printf)("scache wback line %d\n", (Int)wix);
tl_assert(wix >= 0 && wix < N_WAY_NENT);
tag = cache_shmem.tags0[wix];
cl = &cache_shmem.lyns0[wix];
/* The cache line may have been invalidated; if so, ignore it. */
if (!is_valid_scache_tag(tag))
return;
/* Where are we going to put it? */
sm = NULL;
lineZ = NULL;
lineF = NULL;
zix = fix = -1;
/* find the Z line to write in and rcdec it or the associated F
line. */
find_Z_for_writing( &sm, &zix, tag );
tl_assert(sm);
tl_assert(zix >= 0 && zix < N_SECMAP_ZLINES);
lineZ = &sm->linesZ[zix];
/* Generate the data to be stored */
if (CHECK_ZSM)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
csvalsUsed = -1;
sequentialise_CacheLine( csvals, &csvalsUsed,
N_LINE_ARANGE, cl );
tl_assert(csvalsUsed >= 1 && csvalsUsed <= N_LINE_ARANGE);
if (0) VG_(printf)("%lu ", csvalsUsed);
lineZ->dict[0] = lineZ->dict[1]
= lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
/* i indexes actual shadow values, k is cursor in csvals */
i = 0;
for (k = 0; k < csvalsUsed; k++) {
sv = csvals[k].sval;
if (CHECK_ZSM)
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
/* do we already have it? */
if (sv == lineZ->dict[0]) { j = 0; goto dict_ok; }
if (sv == lineZ->dict[1]) { j = 1; goto dict_ok; }
if (sv == lineZ->dict[2]) { j = 2; goto dict_ok; }
if (sv == lineZ->dict[3]) { j = 3; goto dict_ok; }
/* no. look for a free slot. */
if (CHECK_ZSM)
tl_assert(sv != SVal_INVALID);
if (lineZ->dict[0]
== SVal_INVALID) { lineZ->dict[0] = sv; j = 0; goto dict_ok; }
if (lineZ->dict[1]
== SVal_INVALID) { lineZ->dict[1] = sv; j = 1; goto dict_ok; }
if (lineZ->dict[2]
== SVal_INVALID) { lineZ->dict[2] = sv; j = 2; goto dict_ok; }
if (lineZ->dict[3]
== SVal_INVALID) { lineZ->dict[3] = sv; j = 3; goto dict_ok; }
break; /* we'll have to use the f rep */
dict_ok:
m = csvals[k].count;
if (m == 8) {
write_twobit_array( lineZ->ix2s, i+0, j );
write_twobit_array( lineZ->ix2s, i+1, j );
write_twobit_array( lineZ->ix2s, i+2, j );
write_twobit_array( lineZ->ix2s, i+3, j );
write_twobit_array( lineZ->ix2s, i+4, j );
write_twobit_array( lineZ->ix2s, i+5, j );
write_twobit_array( lineZ->ix2s, i+6, j );
write_twobit_array( lineZ->ix2s, i+7, j );
i += 8;
}
else if (m == 4) {
write_twobit_array( lineZ->ix2s, i+0, j );
write_twobit_array( lineZ->ix2s, i+1, j );
write_twobit_array( lineZ->ix2s, i+2, j );
write_twobit_array( lineZ->ix2s, i+3, j );
i += 4;
}
else if (m == 1) {
write_twobit_array( lineZ->ix2s, i+0, j );
i += 1;
}
else if (m == 2) {
write_twobit_array( lineZ->ix2s, i+0, j );
write_twobit_array( lineZ->ix2s, i+1, j );
i += 2;
}
else {
tl_assert(0); /* 8 4 2 or 1 are the only legitimate values for m */
}
}
if (LIKELY(i == N_LINE_ARANGE)) {
/* Construction of the compressed representation was
successful. */
rcinc_LineZ(lineZ);
stats__cache_Z_wbacks++;
} else {
/* Cannot use the compressed(z) representation. Use the full(f)
rep instead. */
tl_assert(i >= 0 && i < N_LINE_ARANGE);
alloc_F_for_writing( sm, &fix );
tl_assert(sm->linesF);
tl_assert(sm->linesF_size > 0);
tl_assert(fix >= 0 && fix < (Word)sm->linesF_size);
lineF = &sm->linesF[fix];
tl_assert(!lineF->inUse);
lineZ->dict[0] = lineZ->dict[2] = lineZ->dict[3] = SVal_INVALID;
lineZ->dict[1] = (SVal)fix;
lineF->inUse = True;
i = 0;
for (k = 0; k < csvalsUsed; k++) {
if (CHECK_ZSM)
tl_assert(csvals[k].count >= 1 && csvals[k].count <= 8);
sv = csvals[k].sval;
if (CHECK_ZSM)
tl_assert(sv != SVal_INVALID);
for (m = csvals[k].count; m > 0; m--) {
lineF->w64s[i] = sv;
i++;
}
}
tl_assert(i == N_LINE_ARANGE);
rcinc_LineF(lineF);
stats__cache_F_wbacks++;
}
}
/* Fetch the cacheline 'wix' from the backing store. The tag
associated with 'wix' is assumed to have already been filled in;
hence that is used to determine where in the backing store to read
from. */
static __attribute__((noinline)) void cacheline_fetch ( UWord wix )
{
Word i;
Addr tag;
CacheLine* cl;
LineZ* lineZ;
LineF* lineF;
if (0)
VG_(printf)("scache fetch line %d\n", (Int)wix);
tl_assert(wix >= 0 && wix < N_WAY_NENT);
tag = cache_shmem.tags0[wix];
cl = &cache_shmem.lyns0[wix];
/* reject nonsense requests */
tl_assert(is_valid_scache_tag(tag));
lineZ = NULL;
lineF = NULL;
find_ZF_for_reading( &lineZ, &lineF, tag );
tl_assert( (lineZ && !lineF) || (!lineZ && lineF) );
/* expand the data into the bottom layer of the tree, then get
cacheline_normalise to build the descriptor array. */
if (lineF) {
tl_assert(lineF->inUse);
for (i = 0; i < N_LINE_ARANGE; i++) {
cl->svals[i] = lineF->w64s[i];
}
stats__cache_F_fetches++;
} else {
for (i = 0; i < N_LINE_ARANGE; i++) {
SVal sv;
UWord ix = read_twobit_array( lineZ->ix2s, i );
/* correct, but expensive: tl_assert(ix >= 0 && ix <= 3); */
sv = lineZ->dict[ix];
tl_assert(sv != SVal_INVALID);
cl->svals[i] = sv;
}
stats__cache_Z_fetches++;
}
normalise_CacheLine( cl );
}
static void shmem__invalidate_scache ( void ) {
Word wix;
if (0) VG_(printf)("%s","scache inval\n");
tl_assert(!is_valid_scache_tag(1));
for (wix = 0; wix < N_WAY_NENT; wix++) {
cache_shmem.tags0[wix] = 1/*INVALID*/;
}
stats__cache_invals++;
}
static void shmem__flush_and_invalidate_scache ( void ) {
Word wix;
Addr tag;
if (0) VG_(printf)("%s","scache flush and invalidate\n");
tl_assert(!is_valid_scache_tag(1));
for (wix = 0; wix < N_WAY_NENT; wix++) {
tag = cache_shmem.tags0[wix];
if (tag == 1/*INVALID*/) {
/* already invalid; nothing to do */
} else {
tl_assert(is_valid_scache_tag(tag));
cacheline_wback( wix );
}
cache_shmem.tags0[wix] = 1/*INVALID*/;
}
stats__cache_flushes++;
stats__cache_invals++;
}
static inline Bool aligned16 ( Addr a ) {
return 0 == (a & 1);
}
static inline Bool aligned32 ( Addr a ) {
return 0 == (a & 3);
}
static inline Bool aligned64 ( Addr a ) {
return 0 == (a & 7);
}
static inline UWord get_cacheline_offset ( Addr a ) {
return (UWord)(a & (N_LINE_ARANGE - 1));
}
static inline Addr cacheline_ROUNDUP ( Addr a ) {
return ROUNDUP(a, N_LINE_ARANGE);
}
static inline Addr cacheline_ROUNDDN ( Addr a ) {
return ROUNDDN(a, N_LINE_ARANGE);
}
static inline UWord get_treeno ( Addr a ) {
return get_cacheline_offset(a) >> 3;
}
static inline UWord get_tree_offset ( Addr a ) {
return a & 7;
}
static __attribute__((noinline))
CacheLine* get_cacheline_MISS ( Addr a ); /* fwds */
static inline CacheLine* get_cacheline ( Addr a )
{
/* tag is 'a' with the in-line offset masked out,
eg a[31]..a[4] 0000 */
Addr tag = a & ~(N_LINE_ARANGE - 1);
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
stats__cache_totrefs++;
if (LIKELY(tag == cache_shmem.tags0[wix])) {
return &cache_shmem.lyns0[wix];
} else {
return get_cacheline_MISS( a );
}
}
static __attribute__((noinline))
CacheLine* get_cacheline_MISS ( Addr a )
{
/* tag is 'a' with the in-line offset masked out,
eg a[31]..a[4] 0000 */
CacheLine* cl;
Addr* tag_old_p;
Addr tag = a & ~(N_LINE_ARANGE - 1);
UWord wix = (a >> N_LINE_BITS) & (N_WAY_NENT - 1);
tl_assert(tag != cache_shmem.tags0[wix]);
/* Dump the old line into the backing store. */
stats__cache_totmisses++;
cl = &cache_shmem.lyns0[wix];
tag_old_p = &cache_shmem.tags0[wix];
if (is_valid_scache_tag( *tag_old_p )) {
/* EXPENSIVE and REDUNDANT: callee does it */
if (CHECK_ZSM)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
cacheline_wback( wix );
}
/* and reload the new one */
*tag_old_p = tag;
cacheline_fetch( wix );
if (CHECK_ZSM)
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
return cl;
}
static UShort pulldown_to_32 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
stats__cline_64to32pulldown++;
switch (toff) {
case 0: case 4:
tl_assert(descr & TREE_DESCR_64);
tree[4] = tree[0];
descr &= ~TREE_DESCR_64;
descr |= (TREE_DESCR_32_1 | TREE_DESCR_32_0);
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pulldown_to_16 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
stats__cline_32to16pulldown++;
switch (toff) {
case 0: case 2:
if (!(descr & TREE_DESCR_32_0)) {
descr = pulldown_to_32(tree, 0, descr);
}
tl_assert(descr & TREE_DESCR_32_0);
tree[2] = tree[0];
descr &= ~TREE_DESCR_32_0;
descr |= (TREE_DESCR_16_1 | TREE_DESCR_16_0);
break;
case 4: case 6:
if (!(descr & TREE_DESCR_32_1)) {
descr = pulldown_to_32(tree, 4, descr);
}
tl_assert(descr & TREE_DESCR_32_1);
tree[6] = tree[4];
descr &= ~TREE_DESCR_32_1;
descr |= (TREE_DESCR_16_3 | TREE_DESCR_16_2);
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pulldown_to_8 ( /*MOD*/SVal* tree, UWord toff, UShort descr ) {
stats__cline_16to8pulldown++;
switch (toff) {
case 0: case 1:
if (!(descr & TREE_DESCR_16_0)) {
descr = pulldown_to_16(tree, 0, descr);
}
tl_assert(descr & TREE_DESCR_16_0);
tree[1] = tree[0];
descr &= ~TREE_DESCR_16_0;
descr |= (TREE_DESCR_8_1 | TREE_DESCR_8_0);
break;
case 2: case 3:
if (!(descr & TREE_DESCR_16_1)) {
descr = pulldown_to_16(tree, 2, descr);
}
tl_assert(descr & TREE_DESCR_16_1);
tree[3] = tree[2];
descr &= ~TREE_DESCR_16_1;
descr |= (TREE_DESCR_8_3 | TREE_DESCR_8_2);
break;
case 4: case 5:
if (!(descr & TREE_DESCR_16_2)) {
descr = pulldown_to_16(tree, 4, descr);
}
tl_assert(descr & TREE_DESCR_16_2);
tree[5] = tree[4];
descr &= ~TREE_DESCR_16_2;
descr |= (TREE_DESCR_8_5 | TREE_DESCR_8_4);
break;
case 6: case 7:
if (!(descr & TREE_DESCR_16_3)) {
descr = pulldown_to_16(tree, 6, descr);
}
tl_assert(descr & TREE_DESCR_16_3);
tree[7] = tree[6];
descr &= ~TREE_DESCR_16_3;
descr |= (TREE_DESCR_8_7 | TREE_DESCR_8_6);
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pullup_descr_to_16 ( UShort descr, UWord toff ) {
UShort mask;
switch (toff) {
case 0:
mask = TREE_DESCR_8_1 | TREE_DESCR_8_0;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_0;
break;
case 2:
mask = TREE_DESCR_8_3 | TREE_DESCR_8_2;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_1;
break;
case 4:
mask = TREE_DESCR_8_5 | TREE_DESCR_8_4;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_2;
break;
case 6:
mask = TREE_DESCR_8_7 | TREE_DESCR_8_6;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_16_3;
break;
default:
tl_assert(0);
}
return descr;
}
static UShort pullup_descr_to_32 ( UShort descr, UWord toff ) {
UShort mask;
switch (toff) {
case 0:
if (!(descr & TREE_DESCR_16_0))
descr = pullup_descr_to_16(descr, 0);
if (!(descr & TREE_DESCR_16_1))
descr = pullup_descr_to_16(descr, 2);
mask = TREE_DESCR_16_1 | TREE_DESCR_16_0;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_32_0;
break;
case 4:
if (!(descr & TREE_DESCR_16_2))
descr = pullup_descr_to_16(descr, 4);
if (!(descr & TREE_DESCR_16_3))
descr = pullup_descr_to_16(descr, 6);
mask = TREE_DESCR_16_3 | TREE_DESCR_16_2;
tl_assert( (descr & mask) == mask );
descr &= ~mask;
descr |= TREE_DESCR_32_1;
break;
default:
tl_assert(0);
}
return descr;
}
static Bool valid_value_is_above_me_32 ( UShort descr, UWord toff ) {
switch (toff) {
case 0: case 4:
return 0 != (descr & TREE_DESCR_64);
default:
tl_assert(0);
}
}
static Bool valid_value_is_below_me_16 ( UShort descr, UWord toff ) {
switch (toff) {
case 0:
return 0 != (descr & (TREE_DESCR_8_1 | TREE_DESCR_8_0));
case 2:
return 0 != (descr & (TREE_DESCR_8_3 | TREE_DESCR_8_2));
case 4:
return 0 != (descr & (TREE_DESCR_8_5 | TREE_DESCR_8_4));
case 6:
return 0 != (descr & (TREE_DESCR_8_7 | TREE_DESCR_8_6));
default:
tl_assert(0);
}
}
/* ------------ Cache management ------------ */
static void zsm_flush_cache ( void )
{
shmem__flush_and_invalidate_scache();
}
static void zsm_init ( void(*p_rcinc)(SVal), void(*p_rcdec)(SVal) )
{
tl_assert( sizeof(UWord) == sizeof(Addr) );
rcinc = p_rcinc;
rcdec = p_rcdec;
tl_assert(map_shmem == NULL);
map_shmem = VG_(newFM)( HG_(zalloc), "libhb.zsm_init.1 (map_shmem)",
HG_(free),
NULL/*unboxed UWord cmp*/);
shmem__invalidate_scache();
/* a SecMap must contain an integral number of CacheLines */
tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
/* also ... a CacheLine holds an integral number of trees */
tl_assert(0 == (N_LINE_ARANGE % 8));
}
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION END compressed shadow memory //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION BEGIN vts primitives //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/* There's a 1-1 mapping between Thr and ThrIDs -- the latter merely
being compact stand-ins for Thr*'s. Use these functions to map
between them. */
static ThrID Thr__to_ThrID ( Thr* thr ); /* fwds */
static Thr* Thr__from_ThrID ( ThrID thrid ); /* fwds */
__attribute__((noreturn))
static void scalarts_limitations_fail_NORETURN ( Bool due_to_nThrs )
{
if (due_to_nThrs) {
const HChar* s =
"\n"
"Helgrind: cannot continue, run aborted: too many threads.\n"
"Sorry. Helgrind can only handle programs that create\n"
"%'llu or fewer threads over their entire lifetime.\n"
"\n";
VG_(umsg)(s, (ULong)(ThrID_MAX_VALID - 1024));
} else {
const HChar* s =
"\n"
"Helgrind: cannot continue, run aborted: too many\n"
"synchronisation events. Sorry. Helgrind can only handle\n"
"programs which perform %'llu or fewer\n"
"inter-thread synchronisation events (locks, unlocks, etc).\n"
"\n";
VG_(umsg)(s, (1ULL << SCALARTS_N_TYMBITS) - 1);
}
VG_(exit)(1);
/*NOTREACHED*/
tl_assert(0); /*wtf?!*/
}
/* The dead thread (ThrID, actually) tables. A thread may only be
listed here if we have been notified thereof by libhb_async_exit.
New entries are added at the end. The order isn't important, but
the ThrID values must be unique.
verydead_thread_table_not_pruned lists the identity of the threads
that died since the previous round of pruning.
Once pruning is done, these ThrID are added in verydead_thread_table.
We don't actually need to keep the set of threads that have ever died --
only the threads that have died since the previous round of
pruning. But it's useful for sanity check purposes to keep the
entire set, so we do. */
static XArray* /* of ThrID */ verydead_thread_table_not_pruned = NULL;
static XArray* /* of ThrID */ verydead_thread_table = NULL;
/* Arbitrary total ordering on ThrIDs. */
static Int cmp__ThrID ( const void* v1, const void* v2 ) {
ThrID id1 = *(const ThrID*)v1;
ThrID id2 = *(const ThrID*)v2;
if (id1 < id2) return -1;
if (id1 > id2) return 1;
return 0;
}
static void verydead_thread_tables_init ( void )
{
tl_assert(!verydead_thread_table);
tl_assert(!verydead_thread_table_not_pruned);
verydead_thread_table
= VG_(newXA)( HG_(zalloc),
"libhb.verydead_thread_table_init.1",
HG_(free), sizeof(ThrID) );
VG_(setCmpFnXA)(verydead_thread_table, cmp__ThrID);
verydead_thread_table_not_pruned
= VG_(newXA)( HG_(zalloc),
"libhb.verydead_thread_table_init.2",
HG_(free), sizeof(ThrID) );
VG_(setCmpFnXA)(verydead_thread_table_not_pruned, cmp__ThrID);
}
static void verydead_thread_table_sort_and_check (XArray* thrids)
{
UWord i;
VG_(sortXA)( thrids );
/* Sanity check: check for unique .sts.thr values. */
UWord nBT = VG_(sizeXA)( thrids );
if (nBT > 0) {
ThrID thrid1, thrid2;
thrid2 = *(ThrID*)VG_(indexXA)( thrids, 0 );
for (i = 1; i < nBT; i++) {
thrid1 = thrid2;
thrid2 = *(ThrID*)VG_(indexXA)( thrids, i );
tl_assert(thrid1 < thrid2);
}
}
/* Ok, so the dead thread table thrids has unique and in-order keys. */
}
/* A VTS contains .ts, its vector clock, and also .id, a field to hold
a backlink for the caller's convenience. Since we have no idea
what to set that to in the library, it always gets set to
VtsID_INVALID. */
typedef
struct {
VtsID id;
UInt usedTS;
UInt sizeTS;
ScalarTS ts[0];
}
VTS;
/* Allocate a VTS capable of storing 'sizeTS' entries. */
static VTS* VTS__new ( const HChar* who, UInt sizeTS );
/* Make a clone of 'vts', sizing the new array to exactly match the
number of ScalarTSs present. */
static VTS* VTS__clone ( const HChar* who, VTS* vts );
/* Make a clone of 'vts' with the thrids in 'thrids' removed. The new
array is sized exactly to hold the number of required elements.
'thridsToDel' is an array of ThrIDs to be omitted in the clone, and
must be in strictly increasing order. */
static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel );
/* Delete this VTS in its entirety. */
static void VTS__delete ( VTS* vts );
/* Create a new singleton VTS in 'out'. Caller must have
pre-allocated 'out' sufficiently big to hold the result in all
possible cases. */
static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym );
/* Create in 'out' a VTS which is the same as 'vts' except with
vts[me]++, so to speak. Caller must have pre-allocated 'out'
sufficiently big to hold the result in all possible cases. */
static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts );
/* Create in 'out' a VTS which is the join (max) of 'a' and
'b'. Caller must have pre-allocated 'out' sufficiently big to hold
the result in all possible cases. */
static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b );
/* Compute the partial ordering relation of the two args. Although we
could be completely general and return an enumeration value (EQ,
LT, GT, UN), in fact we only need LEQ, and so we may as well
hardwire that fact.
Returns zero iff LEQ(A,B), or a valid ThrID if not (zero is an
invald ThrID). In the latter case, the returned ThrID indicates
the discovered point for which they are not. There may be more
than one such point, but we only care about seeing one of them, not
all of them. This rather strange convention is used because
sometimes we want to know the actual index at which they first
differ. */
static UInt VTS__cmpLEQ ( VTS* a, VTS* b );
/* Compute an arbitrary structural (total) ordering on the two args,
based on their VCs, so they can be looked up in a table, tree, etc.
Returns -1, 0 or 1. */
static Word VTS__cmp_structural ( VTS* a, VTS* b );
/* Debugging only. Display the given VTS. */
static void VTS__show ( const VTS* vts );
/* Debugging only. Return vts[index], so to speak. */
static ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx );
/* Notify the VTS machinery that a thread has been declared
comprehensively dead: that is, it has done an async exit AND it has
been joined with. This should ensure that its local clocks (.viR
and .viW) will never again change, and so all mentions of this
thread from all VTSs in the system may be removed. */
static void VTS__declare_thread_very_dead ( Thr* idx );
/*--------------- to do with Vector Timestamps ---------------*/
static Bool is_sane_VTS ( VTS* vts )
{
UWord i, n;
ScalarTS *st1, *st2;
if (!vts) return False;
if (vts->usedTS > vts->sizeTS) return False;
n = vts->usedTS;
if (n == 1) {
st1 = &vts->ts[0];
if (st1->tym == 0)
return False;
}
else
if (n >= 2) {
for (i = 0; i < n-1; i++) {
st1 = &vts->ts[i];
st2 = &vts->ts[i+1];
if (st1->thrid >= st2->thrid)
return False;
if (st1->tym == 0 || st2->tym == 0)
return False;
}
}
return True;
}
/* Create a new, empty VTS.
*/
static VTS* VTS__new ( const HChar* who, UInt sizeTS )
{
VTS* vts = HG_(zalloc)(who, sizeof(VTS) + (sizeTS+1) * sizeof(ScalarTS));
tl_assert(vts->usedTS == 0);
vts->sizeTS = sizeTS;
*(ULong*)(&vts->ts[sizeTS]) = 0x0ddC0ffeeBadF00dULL;
return vts;
}
/* Clone this VTS.
*/
static VTS* VTS__clone ( const HChar* who, VTS* vts )
{
tl_assert(vts);
tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
UInt nTS = vts->usedTS;
VTS* clone = VTS__new(who, nTS);
clone->id = vts->id;
clone->sizeTS = nTS;
clone->usedTS = nTS;
UInt i;
for (i = 0; i < nTS; i++) {
clone->ts[i] = vts->ts[i];
}
tl_assert( *(ULong*)(&clone->ts[clone->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
return clone;
}
/* Make a clone of a VTS with specified ThrIDs removed. 'thridsToDel'
must be in strictly increasing order. We could obviously do this
much more efficiently (in linear time) if necessary.
*/
static VTS* VTS__subtract ( const HChar* who, VTS* vts, XArray* thridsToDel )
{
UInt i, j;
tl_assert(vts);
tl_assert(thridsToDel);
tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
UInt nTS = vts->usedTS;
/* Figure out how many ScalarTSs will remain in the output. */
UInt nReq = nTS;
for (i = 0; i < nTS; i++) {
ThrID thrid = vts->ts[i].thrid;
if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
nReq--;
}
tl_assert(nReq <= nTS);
/* Copy the ones that will remain. */
VTS* res = VTS__new(who, nReq);
j = 0;
for (i = 0; i < nTS; i++) {
ThrID thrid = vts->ts[i].thrid;
if (VG_(lookupXA)(thridsToDel, &thrid, NULL, NULL))
continue;
res->ts[j++] = vts->ts[i];
}
tl_assert(j == nReq);
tl_assert(j == res->sizeTS);
res->usedTS = j;
tl_assert( *(ULong*)(&res->ts[j]) == 0x0ddC0ffeeBadF00dULL);
return res;
}
/* Delete this VTS in its entirety.
*/
static void VTS__delete ( VTS* vts )
{
tl_assert(vts);
tl_assert(vts->usedTS <= vts->sizeTS);
tl_assert( *(ULong*)(&vts->ts[vts->sizeTS]) == 0x0ddC0ffeeBadF00dULL);
HG_(free)(vts);
}
/* Create a new singleton VTS.
*/
static void VTS__singleton ( /*OUT*/VTS* out, Thr* thr, ULong tym )
{
tl_assert(thr);
tl_assert(tym >= 1);
tl_assert(out);
tl_assert(out->usedTS == 0);
tl_assert(out->sizeTS >= 1);
UInt hi = out->usedTS++;
out->ts[hi].thrid = Thr__to_ThrID(thr);
out->ts[hi].tym = tym;
}
/* Return a new VTS in which vts[me]++, so to speak. 'vts' itself is
not modified.
*/
static void VTS__tick ( /*OUT*/VTS* out, Thr* me, VTS* vts )
{
UInt i, n;
ThrID me_thrid;
Bool found = False;
stats__vts__tick++;
tl_assert(out);
tl_assert(out->usedTS == 0);
if (vts->usedTS >= ThrID_MAX_VALID)
scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
tl_assert(out->sizeTS >= 1 + vts->usedTS);
tl_assert(me);
me_thrid = Thr__to_ThrID(me);
tl_assert(is_sane_VTS(vts));
n = vts->usedTS;
/* Copy all entries which precede 'me'. */
for (i = 0; i < n; i++) {
ScalarTS* here = &vts->ts[i];
if (UNLIKELY(here->thrid >= me_thrid))
break;
UInt hi = out->usedTS++;
out->ts[hi] = *here;
}
/* 'i' now indicates the next entry to copy, if any.
There are 3 possibilities:
(a) there is no next entry (we used them all up already):
add (me_thrid,1) to the output, and quit
(b) there is a next entry, and its thrid > me_thrid:
add (me_thrid,1) to the output, then copy the remaining entries
(c) there is a next entry, and its thrid == me_thrid:
copy it to the output but increment its timestamp value.
Then copy the remaining entries. (c) is the common case.
*/
tl_assert(i >= 0 && i <= n);
if (i == n) { /* case (a) */
UInt hi = out->usedTS++;
out->ts[hi].thrid = me_thrid;
out->ts[hi].tym = 1;
} else {
/* cases (b) and (c) */
ScalarTS* here = &vts->ts[i];
if (me_thrid == here->thrid) { /* case (c) */
if (UNLIKELY(here->tym >= (1ULL << SCALARTS_N_TYMBITS) - 2ULL)) {
/* We're hosed. We have to stop. */
scalarts_limitations_fail_NORETURN( False/*!due_to_nThrs*/ );
}
UInt hi = out->usedTS++;
out->ts[hi].thrid = here->thrid;
out->ts[hi].tym = here->tym + 1;
i++;
found = True;
} else { /* case (b) */
UInt hi = out->usedTS++;
out->ts[hi].thrid = me_thrid;
out->ts[hi].tym = 1;
}
/* And copy any remaining entries. */
for (/*keepgoing*/; i < n; i++) {
ScalarTS* here2 = &vts->ts[i];
UInt hi = out->usedTS++;
out->ts[hi] = *here2;
}
}
tl_assert(is_sane_VTS(out));
tl_assert(out->usedTS == vts->usedTS + (found ? 0 : 1));
tl_assert(out->usedTS <= out->sizeTS);
}
/* Return a new VTS constructed as the join (max) of the 2 args.
Neither arg is modified.
*/
static void VTS__join ( /*OUT*/VTS* out, VTS* a, VTS* b )
{
UInt ia, ib, useda, usedb;
ULong tyma, tymb, tymMax;
ThrID thrid;
UInt ncommon = 0;
stats__vts__join++;
tl_assert(a);
tl_assert(b);
useda = a->usedTS;
usedb = b->usedTS;
tl_assert(out);
tl_assert(out->usedTS == 0);
/* overly conservative test, but doing better involves comparing
the two VTSs, which we don't want to do at this point. */
if (useda + usedb >= ThrID_MAX_VALID)
scalarts_limitations_fail_NORETURN( True/*due_to_nThrs*/ );
tl_assert(out->sizeTS >= useda + usedb);
ia = ib = 0;
while (1) {
/* This logic is to enumerate triples (thrid, tyma, tymb) drawn
from a and b in order, where thrid is the next ThrID
occurring in either a or b, and tyma/b are the relevant
scalar timestamps, taking into account implicit zeroes. */
tl_assert(ia >= 0 && ia <= useda);
tl_assert(ib >= 0 && ib <= usedb);
if (ia == useda && ib == usedb) {
/* both empty - done */
break;
} else if (ia == useda && ib != usedb) {
/* a empty, use up b */
ScalarTS* tmpb = &b->ts[ib];
thrid = tmpb->thrid;
tyma = 0;
tymb = tmpb->tym;
ib++;
} else if (ia != useda && ib == usedb) {
/* b empty, use up a */
ScalarTS* tmpa = &a->ts[ia];
thrid = tmpa->thrid;
tyma = tmpa->tym;
tymb = 0;
ia++;
} else {
/* both not empty; extract lowest-ThrID'd triple */
ScalarTS* tmpa = &a->ts[ia];
ScalarTS* tmpb = &b->ts[ib];
if (tmpa->thrid < tmpb->thrid) {
/* a has the lowest unconsidered ThrID */
thrid = tmpa->thrid;
tyma = tmpa->tym;
tymb = 0;
ia++;
} else if (tmpa->thrid > tmpb->thrid) {
/* b has the lowest unconsidered ThrID */
thrid = tmpb->thrid;
tyma = 0;
tymb = tmpb->tym;
ib++;
} else {
/* they both next mention the same ThrID */
tl_assert(tmpa->thrid == tmpb->thrid);
thrid = tmpa->thrid; /* == tmpb->thrid */
tyma = tmpa->tym;
tymb = tmpb->tym;
ia++;
ib++;
ncommon++;
}
}
/* having laboriously determined (thr, tyma, tymb), do something
useful with it. */
tymMax = tyma > tymb ? tyma : tymb;
if (tymMax > 0) {
UInt hi = out->usedTS++;
out->ts[hi].thrid = thrid;
out->ts[hi].tym = tymMax;
}
}
tl_assert(is_sane_VTS(out));
tl_assert(out->usedTS <= out->sizeTS);
tl_assert(out->usedTS == useda + usedb - ncommon);
}
/* Determine if 'a' <= 'b', in the partial ordering. Returns zero if
they are, or the first ThrID for which they are not (no valid ThrID
has the value zero). This rather strange convention is used
because sometimes we want to know the actual index at which they
first differ. */
static UInt/*ThrID*/ VTS__cmpLEQ ( VTS* a, VTS* b )
{
Word ia, ib, useda, usedb;
ULong tyma, tymb;
stats__vts__cmpLEQ++;
tl_assert(a);
tl_assert(b);
useda = a->usedTS;
usedb = b->usedTS;
ia = ib = 0;
while (1) {
/* This logic is to enumerate doubles (tyma, tymb) drawn
from a and b in order, and tyma/b are the relevant
scalar timestamps, taking into account implicit zeroes. */
ThrID thrid;
tl_assert(ia >= 0 && ia <= useda);
tl_assert(ib >= 0 && ib <= usedb);
if (ia == useda && ib == usedb) {
/* both empty - done */
break;
} else if (ia == useda && ib != usedb) {
/* a empty, use up b */
ScalarTS* tmpb = &b->ts[ib];
tyma = 0;
tymb = tmpb->tym;
thrid = tmpb->thrid;
ib++;
} else if (ia != useda && ib == usedb) {
/* b empty, use up a */
ScalarTS* tmpa = &a->ts[ia];
tyma = tmpa->tym;
thrid = tmpa->thrid;
tymb = 0;
ia++;
} else {
/* both not empty; extract lowest-ThrID'd triple */
ScalarTS* tmpa = &a->ts[ia];
ScalarTS* tmpb = &b->ts[ib];
if (tmpa->thrid < tmpb->thrid) {
/* a has the lowest unconsidered ThrID */
tyma = tmpa->tym;
thrid = tmpa->thrid;
tymb = 0;
ia++;
}
else
if (tmpa->thrid > tmpb->thrid) {
/* b has the lowest unconsidered ThrID */
tyma = 0;
tymb = tmpb->tym;
thrid = tmpb->thrid;
ib++;
} else {
/* they both next mention the same ThrID */
tl_assert(tmpa->thrid == tmpb->thrid);
tyma = tmpa->tym;
thrid = tmpa->thrid;
tymb = tmpb->tym;
ia++;
ib++;
}
}
/* having laboriously determined (tyma, tymb), do something
useful with it. */
if (tyma > tymb) {
/* not LEQ at this index. Quit, since the answer is
determined already. */
tl_assert(thrid >= 1024);
return thrid;
}
}
return 0; /* all points are LEQ => return an invalid ThrID */
}
/* Compute an arbitrary structural (total) ordering on the two args,
based on their VCs, so they can be looked up in a table, tree, etc.
Returns -1, 0 or 1. (really just 'deriving Ord' :-) This can be
performance critical so there is some effort expended to make it sa
fast as possible.
*/
Word VTS__cmp_structural ( VTS* a, VTS* b )
{
/* We just need to generate an arbitrary total ordering based on
a->ts and b->ts. Preferably do it in a way which comes across likely
differences relatively quickly. */
Word i;
Word useda = 0, usedb = 0;
ScalarTS *ctsa = NULL, *ctsb = NULL;
stats__vts__cmp_structural++;
tl_assert(a);
tl_assert(b);
ctsa = &a->ts[0]; useda = a->usedTS;
ctsb = &b->ts[0]; usedb = b->usedTS;
if (LIKELY(useda == usedb)) {
ScalarTS *tmpa = NULL, *tmpb = NULL;
stats__vts__cmp_structural_slow++;
/* Same length vectors. Find the first difference, if any, as
fast as possible. */
for (i = 0; i < useda; i++) {
tmpa = &ctsa[i];
tmpb = &ctsb[i];
if (LIKELY(tmpa->tym == tmpb->tym
&& tmpa->thrid == tmpb->thrid))
continue;
else
break;
}
if (UNLIKELY(i == useda)) {
/* They're identical. */
return 0;
} else {
tl_assert(i >= 0 && i < useda);
if (tmpa->tym < tmpb->tym) return -1;
if (tmpa->tym > tmpb->tym) return 1;
if (tmpa->thrid < tmpb->thrid) return -1;
if (tmpa->thrid > tmpb->thrid) return 1;
/* we just established them as non-identical, hence: */
}
/*NOTREACHED*/
tl_assert(0);
}
if (useda < usedb) return -1;
if (useda > usedb) return 1;
/*NOTREACHED*/
tl_assert(0);
}
/* Debugging only. Display the given VTS.
*/
static void VTS__show ( const VTS* vts )
{
Word i, n;
tl_assert(vts);
VG_(printf)("[");
n = vts->usedTS;
for (i = 0; i < n; i++) {
const ScalarTS *st = &vts->ts[i];
VG_(printf)(i < n-1 ? "%u:%llu " : "%u:%llu", st->thrid, (ULong)st->tym);
}
VG_(printf)("]");
}
/* Debugging only. Return vts[index], so to speak.
*/
ULong VTS__indexAt_SLOW ( VTS* vts, Thr* idx )
{
UWord i, n;
ThrID idx_thrid = Thr__to_ThrID(idx);
stats__vts__indexat_slow++;
tl_assert(vts);
n = vts->usedTS;
for (i = 0; i < n; i++) {
ScalarTS* st = &vts->ts[i];
if (st->thrid == idx_thrid)
return st->tym;
}
return 0;
}
/* See comment on prototype above.
*/
static void VTS__declare_thread_very_dead ( Thr* thr )
{
if (0) VG_(printf)("VTQ: tae %p\n", thr);
tl_assert(thr->llexit_done);
tl_assert(thr->joinedwith_done);
ThrID nyu;
nyu = Thr__to_ThrID(thr);
VG_(addToXA)( verydead_thread_table_not_pruned, &nyu );
/* We can only get here if we're assured that we'll never again
need to look at this thread's ::viR or ::viW. Set them to
VtsID_INVALID, partly so as to avoid holding on to the VTSs, but
mostly so that we don't wind up pruning them (as that would be
nonsensical: the only interesting ScalarTS entry for a dead
thread is its own index, and the pruning will remove that.). */
VtsID__rcdec(thr->viR);
VtsID__rcdec(thr->viW);
thr->viR = VtsID_INVALID;
thr->viW = VtsID_INVALID;
}
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION END vts primitives //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
// //
// SECTION BEGIN main library //
// //
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////
// //
// VTS set //
// //
/////////////////////////////////////////////////////////
static WordFM* /* WordFM VTS* void */ vts_set = NULL;
static void vts_set_init ( void )
{
tl_assert(!vts_set);
vts_set = VG_(newFM)( HG_(zalloc), "libhb.vts_set_init.1",
HG_(free),
(Word(*)(UWord,UWord))VTS__cmp_structural );
}
/* Given a VTS, look in vts_set to see if we already have a
structurally identical one. If yes, return the pair (True, pointer
to the existing one). If no, clone this one, add the clone to the
set, and return (False, pointer to the clone). */
static Bool vts_set__find__or__clone_and_add ( /*OUT*/VTS** res, VTS* cand )
{
UWord keyW, valW;
stats__vts_set__focaa++;
tl_assert(cand->id == VtsID_INVALID);
/* lookup cand (by value) */
if (VG_(lookupFM)( vts_set, &keyW, &valW, (UWord)cand )) {
/* found it */
tl_assert(valW == 0);
/* if this fails, cand (by ref) was already present (!) */
tl_assert(keyW != (UWord)cand);
*res = (VTS*)keyW;
return True;
} else {
/* not present. Clone, add and return address of clone. */
stats__vts_set__focaa_a++;
VTS* clone = VTS__clone( "libhb.vts_set_focaa.1", cand );
tl_assert(clone != cand);
VG_(addToFM)( vts_set, (UWord)clone, 0/*val is unused*/ );
*res = clone;
return False;
}
}
/////////////////////////////////////////////////////////
// //
// VTS table //
// //