blob: 13c912acae27dc01d87b6ad85f8b2b07aba25ab1 [file] [log] [blame]
From 84f0606557aa8e88332c23eb10da15a9c047a287 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Wed, 1 Dec 2021 17:55:14 +0100
Subject: [PATCH] Revert "tsan: new runtime (v3)"
This reverts commit 66d4ce7e26a5ab00f7e4946b6e1bac8f805010fa.
Chromium tests started failing:
https://bugs.chromium.org/p/chromium/issues/detail?id=1275581
---
.../sanitizer_thread_registry.h | 2 -
compiler-rt/lib/tsan/CMakeLists.txt | 1 +
compiler-rt/lib/tsan/check_analyze.sh | 12 +-
compiler-rt/lib/tsan/go/build.bat | 1 -
compiler-rt/lib/tsan/go/buildgo.sh | 1 -
compiler-rt/lib/tsan/go/tsan_go.cpp | 2 +-
compiler-rt/lib/tsan/rtl/tsan_defs.h | 23 +-
compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h | 9 -
compiler-rt/lib/tsan/rtl/tsan_flags.cpp | 6 +
compiler-rt/lib/tsan/rtl/tsan_flags.inc | 10 +-
.../lib/tsan/rtl/tsan_interceptors_posix.cpp | 3 +-
.../lib/tsan/rtl/tsan_interface_atomic.cpp | 87 +-
.../lib/tsan/rtl/tsan_interface_java.cpp | 4 +-
compiler-rt/lib/tsan/rtl/tsan_mman.cpp | 32 +-
compiler-rt/lib/tsan/rtl/tsan_mman.h | 2 -
compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp | 54 +-
compiler-rt/lib/tsan/rtl/tsan_mutexset.h | 11 +-
compiler-rt/lib/tsan/rtl/tsan_platform.h | 173 +++-
.../lib/tsan/rtl/tsan_platform_linux.cpp | 48 +-
.../lib/tsan/rtl/tsan_platform_mac.cpp | 9 +-
.../lib/tsan/rtl/tsan_platform_posix.cpp | 16 +-
.../lib/tsan/rtl/tsan_platform_windows.cpp | 3 +
compiler-rt/lib/tsan/rtl/tsan_rtl.cpp | 626 ++++---------
compiler-rt/lib/tsan/rtl/tsan_rtl.h | 325 ++++---
compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp | 860 ++++++++----------
compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp | 642 +++++++------
compiler-rt/lib/tsan/rtl/tsan_rtl_proc.cpp | 1 +
compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 367 +++++---
compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp | 191 ++--
compiler-rt/lib/tsan/rtl/tsan_shadow.h | 315 ++++---
compiler-rt/lib/tsan/rtl/tsan_sync.cpp | 82 +-
compiler-rt/lib/tsan/rtl/tsan_sync.h | 48 +-
compiler-rt/lib/tsan/rtl/tsan_trace.h | 73 +-
.../lib/tsan/rtl/tsan_update_shadow_word.inc | 59 ++
.../lib/tsan/tests/unit/tsan_flags_test.cpp | 4 +-
.../lib/tsan/tests/unit/tsan_shadow_test.cpp | 92 +-
.../lib/tsan/tests/unit/tsan_stack_test.cpp | 4 +-
.../lib/tsan/tests/unit/tsan_sync_test.cpp | 17 +-
.../lib/tsan/tests/unit/tsan_trace_test.cpp | 175 +---
compiler-rt/test/tsan/bench_threads.cpp | 5 +
compiler-rt/test/tsan/free_race2.c | 2 +-
compiler-rt/test/tsan/memcmp_race.cpp | 2 +-
compiler-rt/test/tsan/memcpy_race.cpp | 10 +-
compiler-rt/test/tsan/mutexset7.cpp | 6 +-
44 files changed, 2132 insertions(+), 2283 deletions(-)
create mode 100644 compiler-rt/lib/tsan/rtl/tsan_update_shadow_word.inc
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
index 89e5fefa3408..a259b324220f 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_thread_registry.h
@@ -104,8 +104,6 @@ class MUTEX ThreadRegistry {
return threads_.empty() ? nullptr : threads_[tid];
}
- u32 NumThreadsLocked() const { return threads_.size(); }
-
u32 CreateThread(uptr user_id, bool detached, u32 parent_tid, void *arg);
typedef void (*ThreadCallback)(ThreadContextBase *tctx, void *arg);
diff --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt
index aede54f689aa..c3284a5dc422 100644
--- a/compiler-rt/lib/tsan/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/CMakeLists.txt
@@ -119,6 +119,7 @@ set(TSAN_HEADERS
rtl/tsan_symbolize.h
rtl/tsan_sync.h
rtl/tsan_trace.h
+ rtl/tsan_update_shadow_word.inc
rtl/tsan_vector_clock.h
)
diff --git a/compiler-rt/lib/tsan/check_analyze.sh b/compiler-rt/lib/tsan/check_analyze.sh
index f507ba0172f3..3bd817c13697 100755
--- a/compiler-rt/lib/tsan/check_analyze.sh
+++ b/compiler-rt/lib/tsan/check_analyze.sh
@@ -34,27 +34,21 @@ check() {
fi
}
-# All hot functions must contain no PUSH/POP
-# and no CALLs (everything is tail-called).
for f in write1 write2 write4 write8; do
check $f rsp 1
- check $f push 0
- check $f pop 0
- check $f call 0
+ check $f push 2
done
for f in read1 read2 read4 read8; do
check $f rsp 1
- check $f push 0
- check $f pop 0
- check $f call 0
+ check $f push 3
done
for f in func_entry func_exit; do
check $f rsp 0
check $f push 0
check $f pop 0
- check $f call 0
+ check $f call 1 # TraceSwitch()
done
echo LGTM
diff --git a/compiler-rt/lib/tsan/go/build.bat b/compiler-rt/lib/tsan/go/build.bat
index e83410044314..496e127d9581 100644
--- a/compiler-rt/lib/tsan/go/build.bat
+++ b/compiler-rt/lib/tsan/go/build.bat
@@ -14,7 +14,6 @@ type ^
..\rtl\tsan_suppressions.cpp ^
..\rtl\tsan_sync.cpp ^
..\rtl\tsan_stack_trace.cpp ^
- ..\rtl\tsan_vector_clock.cpp ^
..\..\sanitizer_common\sanitizer_allocator.cpp ^
..\..\sanitizer_common\sanitizer_common.cpp ^
..\..\sanitizer_common\sanitizer_flags.cpp ^
diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh
index ab0db57b2783..8f6ffd4d34c5 100755
--- a/compiler-rt/lib/tsan/go/buildgo.sh
+++ b/compiler-rt/lib/tsan/go/buildgo.sh
@@ -19,7 +19,6 @@ SRCS="
../rtl/tsan_stack_trace.cpp
../rtl/tsan_suppressions.cpp
../rtl/tsan_sync.cpp
- ../rtl/tsan_vector_clock.cpp
../../sanitizer_common/sanitizer_allocator.cpp
../../sanitizer_common/sanitizer_common.cpp
../../sanitizer_common/sanitizer_common_libcdep.cpp
diff --git a/compiler-rt/lib/tsan/go/tsan_go.cpp b/compiler-rt/lib/tsan/go/tsan_go.cpp
index c689a51fb5e1..104c5b325aee 100644
--- a/compiler-rt/lib/tsan/go/tsan_go.cpp
+++ b/compiler-rt/lib/tsan/go/tsan_go.cpp
@@ -214,7 +214,7 @@ void __tsan_malloc(ThreadState *thr, uptr pc, uptr p, uptr sz) {
}
void __tsan_free(uptr p, uptr sz) {
- ctx->metamap.FreeRange(get_cur_proc(), p, sz, false);
+ ctx->metamap.FreeRange(get_cur_proc(), p, sz);
}
void __tsan_go_start(ThreadState *parent, ThreadState **pthr, void *pc) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h
index d9f20d14a92a..4712c2be1813 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_defs.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h
@@ -63,13 +63,6 @@ enum class Epoch : u16 {};
constexpr uptr kEpochBits = 14;
constexpr Epoch kEpochZero = static_cast<Epoch>(0);
constexpr Epoch kEpochOver = static_cast<Epoch>(1 << kEpochBits);
-constexpr Epoch kEpochLast = static_cast<Epoch>((1 << kEpochBits) - 1);
-
-inline Epoch EpochInc(Epoch epoch) {
- return static_cast<Epoch>(static_cast<u16>(epoch) + 1);
-}
-
-inline bool EpochOverflow(Epoch epoch) { return epoch == kEpochOver; }
const int kClkBits = 42;
const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
@@ -114,7 +107,7 @@ const uptr kShadowCnt = 4;
const uptr kShadowCell = 8;
// Single shadow value.
-enum class RawShadow : u32 {};
+typedef u64 RawShadow;
const uptr kShadowSize = sizeof(RawShadow);
// Shadow memory is kShadowMultiplier times larger than user memory.
@@ -191,13 +184,10 @@ MD5Hash md5_hash(const void *data, uptr size);
struct Processor;
struct ThreadState;
class ThreadContext;
-struct TidSlot;
struct Context;
struct ReportStack;
class ReportDesc;
class RegionAlloc;
-struct Trace;
-struct TracePart;
typedef uptr AccessType;
@@ -208,8 +198,6 @@ enum : AccessType {
kAccessVptr = 1 << 2, // read or write of an object virtual table pointer
kAccessFree = 1 << 3, // synthetic memory access during memory freeing
kAccessExternalPC = 1 << 4, // access PC can have kExternalPCBit set
- kAccessCheckOnly = 1 << 5, // check for races, but don't store
- kAccessNoRodata = 1 << 6, // don't check for .rodata marker
};
// Descriptor of user's memory block.
@@ -231,8 +219,9 @@ enum ExternalTag : uptr {
// as 16-bit values, see tsan_defs.h.
};
-enum {
- MutexTypeReport = MutexLastCommon,
+enum MutexType {
+ MutexTypeTrace = MutexLastCommon,
+ MutexTypeReport,
MutexTypeSyncVar,
MutexTypeAnnotations,
MutexTypeAtExit,
@@ -240,10 +229,6 @@ enum {
MutexTypeRacy,
MutexTypeGlobalProc,
MutexTypeInternalAlloc,
- MutexTypeTrace,
- MutexTypeSlot,
- MutexTypeSlots,
- MutexTypeMultiSlot,
};
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h
index 7a39a39d51de..9e15f74a0615 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_dense_alloc.h
@@ -104,15 +104,6 @@ class DenseSlabAlloc {
return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T);
}
- template <typename Func>
- void ForEach(Func func) {
- SpinMutexLock lock(&mtx_);
- uptr fillpos = atomic_load_relaxed(&fillpos_);
- for (uptr l1 = 0; l1 < fillpos; l1++) {
- for (IndexT l2 = l1 == 0 ? 1 : 0; l2 < kL2Size; l2++) func(&map_[l1][l2]);
- }
- }
-
private:
T *map_[kL1Size];
SpinMutex mtx_;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp
index 54bed9f9a6be..ee89862d17bd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp
@@ -110,6 +110,12 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) {
if (common_flags()->help) parser.PrintFlagDescriptions();
+ if (f->history_size < 0 || f->history_size > 7) {
+ Printf("ThreadSanitizer: incorrect value for history_size"
+ " (must be [0..7])\n");
+ Die();
+ }
+
if (f->io_sync < 0 || f->io_sync > 2) {
Printf("ThreadSanitizer: incorrect value for io_sync"
" (must be [0..2])\n");
diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.inc b/compiler-rt/lib/tsan/rtl/tsan_flags.inc
index 3df180ec68cc..7954a4307fa1 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_flags.inc
+++ b/compiler-rt/lib/tsan/rtl/tsan_flags.inc
@@ -59,10 +59,14 @@ TSAN_FLAG(bool, stop_on_start, false,
"Stops on start until __tsan_resume() is called (for debugging).")
TSAN_FLAG(bool, running_on_valgrind, false,
"Controls whether RunningOnValgrind() returns true or false.")
+// There are a lot of goroutines in Go, so we use smaller history.
TSAN_FLAG(
- uptr, history_size, 0,
- "Per-thread history size,"
- " controls how many extra previous memory accesses are remembered per thread.")
+ int, history_size, SANITIZER_GO ? 1 : 3,
+ "Per-thread history size, controls how many previous memory accesses "
+ "are remembered per thread. Possible values are [0..7]. "
+ "history_size=0 amounts to 32K memory accesses. Each next value doubles "
+ "the amount of memory accesses, up to history_size=7 that amounts to "
+ "4M memory accesses. The default value is 2 (128K memory accesses).")
TSAN_FLAG(int, io_sync, 1,
"Controls level of synchronization implied by IO operations. "
"0 - no synchronization "
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 280db4ae28e5..73df011b4212 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -1981,7 +1981,6 @@ static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) {
static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
int sig, __sanitizer_siginfo *info,
void *uctx) {
- CHECK(thr->slot);
__sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions;
if (acquire)
Acquire(thr, 0, (uptr)&sigactions[sig]);
@@ -2269,7 +2268,7 @@ struct dl_iterate_phdr_data {
};
static bool IsAppNotRodata(uptr addr) {
- return IsAppMem(addr) && *MemToShadow(addr) != Shadow::kRodata;
+ return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata;
}
static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
index f794a2fcdd0d..24ba3bb1f65d 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
@@ -235,9 +235,8 @@ static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
T v = NoTsanAtomicLoad(a, mo);
SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a);
if (s) {
- SlotLocker locker(thr);
- ReadLock lock(&s->mtx);
- thr->clock.Acquire(s->clock);
+ ReadLock l(&s->mtx);
+ AcquireImpl(thr, pc, &s->clock);
// Re-read under sync mutex because we need a consistent snapshot
// of the value and the clock we acquire.
v = NoTsanAtomicLoad(a, mo);
@@ -271,14 +270,14 @@ static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
NoTsanAtomicStore(a, v, mo);
return;
}
- SlotLocker locker(thr);
- {
- auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- Lock lock(&s->mtx);
- thr->clock.ReleaseStore(&s->clock);
- NoTsanAtomicStore(a, v, mo);
- }
- IncrementEpoch(thr);
+ __sync_synchronize();
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ Lock l(&s->mtx);
+ thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+ ReleaseStoreImpl(thr, pc, &s->clock);
+ NoTsanAtomicStore(a, v, mo);
}
template <typename T, T (*F)(volatile T *v, T op)>
@@ -286,21 +285,18 @@ static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
if (LIKELY(mo == mo_relaxed))
return F(a, v);
- SlotLocker locker(thr);
- {
- auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- RWLock lock(&s->mtx, IsReleaseOrder(mo));
- if (IsAcqRelOrder(mo))
- thr->clock.ReleaseAcquire(&s->clock);
- else if (IsReleaseOrder(mo))
- thr->clock.Release(&s->clock);
- else if (IsAcquireOrder(mo))
- thr->clock.Acquire(s->clock);
- v = F(a, v);
- }
- if (IsReleaseOrder(mo))
- IncrementEpoch(thr);
- return v;
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ Lock l(&s->mtx);
+ thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+ if (IsAcqRelOrder(mo))
+ AcquireReleaseImpl(thr, pc, &s->clock);
+ else if (IsReleaseOrder(mo))
+ ReleaseImpl(thr, pc, &s->clock);
+ else if (IsAcquireOrder(mo))
+ AcquireImpl(thr, pc, &s->clock);
+ return F(a, v);
}
template<typename T>
@@ -420,28 +416,27 @@ static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v,
*c = pr;
return false;
}
- SlotLocker locker(thr);
+
bool release = IsReleaseOrder(mo);
- bool success;
- {
- auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- RWLock lock(&s->mtx, release);
- T cc = *c;
- T pr = func_cas(a, cc, v);
- success = pr == cc;
- if (!success) {
- *c = pr;
- mo = fmo;
- }
- if (success && IsAcqRelOrder(mo))
- thr->clock.ReleaseAcquire(&s->clock);
- else if (success && IsReleaseOrder(mo))
- thr->clock.Release(&s->clock);
- else if (IsAcquireOrder(mo))
- thr->clock.Acquire(s->clock);
+ SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+ RWLock l(&s->mtx, release);
+ T cc = *c;
+ T pr = func_cas(a, cc, v);
+ bool success = pr == cc;
+ if (!success) {
+ *c = pr;
+ mo = fmo;
}
- if (success && release)
- IncrementEpoch(thr);
+ thr->fast_state.IncrementEpoch();
+ // Can't increment epoch w/o writing to the trace as well.
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+
+ if (success && IsAcqRelOrder(mo))
+ AcquireReleaseImpl(thr, pc, &s->clock);
+ else if (success && IsReleaseOrder(mo))
+ ReleaseImpl(thr, pc, &s->clock);
+ else if (IsAcquireOrder(mo))
+ AcquireImpl(thr, pc, &s->clock);
return success;
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
index 7c15a1638826..c090c1f08cbe 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_java.cpp
@@ -106,7 +106,7 @@ void __tsan_java_free(jptr ptr, jptr size) {
DCHECK_GE(ptr, jctx->heap_begin);
DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
- ctx->metamap.FreeRange(thr->proc(), ptr, size, false);
+ ctx->metamap.FreeRange(thr->proc(), ptr, size);
}
void __tsan_java_move(jptr src, jptr dst, jptr size) {
@@ -133,7 +133,7 @@ void __tsan_java_move(jptr src, jptr dst, jptr size) {
// support that anymore as it contains addresses of accesses.
RawShadow *d = MemToShadow(dst);
RawShadow *dend = MemToShadow(dst + size);
- ShadowSet(d, dend, Shadow::kEmpty);
+ internal_memset(d, 0, (dend - d) * sizeof(*d));
}
jptr __tsan_java_find(jptr *from_ptr, jptr to) {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
index 18022d012bbc..a31bebcb6ba9 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
@@ -125,6 +125,7 @@ ScopedGlobalProcessor::~ScopedGlobalProcessor() {
}
void AllocatorLock() NO_THREAD_SAFETY_ANALYSIS {
+ global_proc()->mtx.Lock();
global_proc()->internal_alloc_mtx.Lock();
InternalAllocatorLock();
}
@@ -132,13 +133,6 @@ void AllocatorLock() NO_THREAD_SAFETY_ANALYSIS {
void AllocatorUnlock() NO_THREAD_SAFETY_ANALYSIS {
InternalAllocatorUnlock();
global_proc()->internal_alloc_mtx.Unlock();
-}
-
-void GlobalProcessorLock() NO_THREAD_SAFETY_ANALYSIS {
- global_proc()->mtx.Lock();
-}
-
-void GlobalProcessorUnlock() NO_THREAD_SAFETY_ANALYSIS {
global_proc()->mtx.Unlock();
}
@@ -251,17 +245,8 @@ void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) {
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p);
- // Note: this can run before thread initialization/after finalization.
- // As a result this is not necessarily synchronized with DoReset,
- // which iterates over and resets all sync objects,
- // but it is fine to create new MBlocks in this context.
ctx->metamap.AllocBlock(thr, pc, p, sz);
- // If this runs before thread initialization/after finalization
- // and we don't have trace initialized, we can't imitate writes.
- // In such case just reset the shadow range, it is fine since
- // it affects only a small fraction of special objects.
- if (write && thr->ignore_reads_and_writes == 0 &&
- atomic_load_relaxed(&thr->trace_pos))
+ if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
else
MemoryResetRange(thr, pc, (uptr)p, sz);
@@ -269,16 +254,9 @@ void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
CHECK_NE(p, (void*)0);
- if (!thr->slot) {
- // Very early/late in thread lifetime, or during fork.
- UNUSED uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, false);
- DPrintf("#%d: free(0x%zx, %zu) (no slot)\n", thr->tid, p, sz);
- return;
- }
- SlotLocker locker(thr);
- uptr sz = ctx->metamap.FreeBlock(thr->proc(), p, true);
+ uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz);
- if (write && thr->ignore_reads_and_writes == 0)
+ if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
MemoryRangeFreed(thr, pc, (uptr)p, sz);
}
@@ -443,6 +421,8 @@ uptr __sanitizer_get_allocated_size(const void *p) {
void __tsan_on_thread_idle() {
ThreadState *thr = cur_thread();
+ thr->clock.ResetCached(&thr->proc()->clock_cache);
+ thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
allocator()->SwallowCache(&thr->proc()->alloc_cache);
internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache);
ctx->metamap.OnProcIdle(thr->proc());
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.h b/compiler-rt/lib/tsan/rtl/tsan_mman.h
index 2095f28c0253..db8488eabbe2 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.h
@@ -26,8 +26,6 @@ void AllocatorProcFinish(Processor *proc);
void AllocatorPrintStats();
void AllocatorLock();
void AllocatorUnlock();
-void GlobalProcessorLock();
-void GlobalProcessorUnlock();
// For user allocations.
void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
index 3a75b80ac30f..735179686ba9 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.cpp
@@ -19,7 +19,57 @@ namespace __tsan {
MutexSet::MutexSet() {
}
-void MutexSet::Reset() { internal_memset(this, 0, sizeof(*this)); }
+void MutexSet::Add(u64 id, bool write, u64 epoch) {
+ // Look up existing mutex with the same id.
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].id == id) {
+ descs_[i].count++;
+ descs_[i].epoch = epoch;
+ return;
+ }
+ }
+ // On overflow, find the oldest mutex and drop it.
+ if (size_ == kMaxSize) {
+ u64 minepoch = (u64)-1;
+ u64 mini = (u64)-1;
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].epoch < minepoch) {
+ minepoch = descs_[i].epoch;
+ mini = i;
+ }
+ }
+ RemovePos(mini);
+ CHECK_EQ(size_, kMaxSize - 1);
+ }
+ // Add new mutex descriptor.
+ descs_[size_].addr = 0;
+ descs_[size_].stack_id = kInvalidStackID;
+ descs_[size_].id = id;
+ descs_[size_].write = write;
+ descs_[size_].epoch = epoch;
+ descs_[size_].seq = seq_++;
+ descs_[size_].count = 1;
+ size_++;
+}
+
+void MutexSet::Del(u64 id, bool write) {
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].id == id) {
+ if (--descs_[i].count == 0)
+ RemovePos(i);
+ return;
+ }
+ }
+}
+
+void MutexSet::Remove(u64 id) {
+ for (uptr i = 0; i < size_; i++) {
+ if (descs_[i].id == id) {
+ RemovePos(i);
+ return;
+ }
+ }
+}
void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
// Look up existing mutex with the same id.
@@ -43,7 +93,9 @@ void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
// Add new mutex descriptor.
descs_[size_].addr = addr;
descs_[size_].stack_id = stack_id;
+ descs_[size_].id = 0;
descs_[size_].write = write;
+ descs_[size_].epoch = 0;
descs_[size_].seq = seq_++;
descs_[size_].count = 1;
size_++;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h
index aabd361e6afd..93776a664135 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mutexset.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_mutexset.h
@@ -25,6 +25,8 @@ class MutexSet {
struct Desc {
uptr addr;
StackID stack_id;
+ u64 id;
+ u64 epoch;
u32 seq;
u32 count;
bool write;
@@ -38,7 +40,10 @@ class MutexSet {
};
MutexSet();
- void Reset();
+ // The 'id' is obtained from SyncVar::GetId().
+ void Add(u64 id, bool write, u64 epoch);
+ void Del(u64 id, bool write);
+ void Remove(u64 id); // Removes the mutex completely (if it's destroyed).
void AddAddr(uptr addr, StackID stack_id, bool write);
void DelAddr(uptr addr, bool destroy = false);
uptr Size() const;
@@ -77,7 +82,9 @@ class DynamicMutexSet {
// in different goroutine).
#if SANITIZER_GO
MutexSet::MutexSet() {}
-void MutexSet::Reset() {}
+void MutexSet::Add(u64 id, bool write, u64 epoch) {}
+void MutexSet::Del(u64 id, bool write) {}
+void MutexSet::Remove(u64 id) {}
void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {}
void MutexSet::DelAddr(uptr addr, bool destroy) {}
uptr MutexSet::Size() const { return 0; }
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform.h b/compiler-rt/lib/tsan/rtl/tsan_platform.h
index e28bac2457aa..7ff0acace8f6 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform.h
@@ -18,8 +18,8 @@
# error "Only 64-bit is supported"
#endif
-#include "sanitizer_common/sanitizer_common.h"
#include "tsan_defs.h"
+#include "tsan_trace.h"
namespace __tsan {
@@ -45,7 +45,9 @@ C/C++ on linux/x86_64 and freebsd/x86_64
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
4000 0000 0000 - 5500 0000 0000: -
5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels
-5680 0000 0000 - 7d00 0000 0000: -
+5680 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 7d00 0000 0000: -
7b00 0000 0000 - 7c00 0000 0000: heap
7c00 0000 0000 - 7e80 0000 0000: -
7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
@@ -65,6 +67,8 @@ C/C++ on netbsd/amd64 can reuse the same mapping:
struct Mapping48AddressSpace {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x340000000000ull;
+ static const uptr kTraceMemBeg = 0x600000000000ull;
+ static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
static const uptr kShadowEnd = 0x200000000000ull;
static const uptr kHeapMemBeg = 0x7b0000000000ull;
@@ -85,12 +89,14 @@ struct Mapping48AddressSpace {
C/C++ on linux/mips64 (40-bit VMA)
0000 0000 00 - 0100 0000 00: - (4 GB)
0100 0000 00 - 0200 0000 00: main binary (4 GB)
-0200 0000 00 - 1200 0000 00: - (120 GB)
-1200 0000 00 - 4000 0000 00: shadow (128 GB)
+0200 0000 00 - 2000 0000 00: - (120 GB)
+2000 0000 00 - 4000 0000 00: shadow (128 GB)
4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects) (64 GB)
5000 0000 00 - aa00 0000 00: - (360 GB)
aa00 0000 00 - ab00 0000 00: main binary (PIE) (4 GB)
-ab00 0000 00 - fe00 0000 00: - (332 GB)
+ab00 0000 00 - b000 0000 00: - (20 GB)
+b000 0000 00 - b200 0000 00: traces (8 GB)
+b200 0000 00 - fe00 0000 00: - (304 GB)
fe00 0000 00 - ff00 0000 00: heap (4 GB)
ff00 0000 00 - ff80 0000 00: - (2 GB)
ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB)
@@ -98,7 +104,9 @@ ff80 0000 00 - ffff ffff ff: modules and main thread stack (<2 GB)
struct MappingMips64_40 {
static const uptr kMetaShadowBeg = 0x4000000000ull;
static const uptr kMetaShadowEnd = 0x5000000000ull;
- static const uptr kShadowBeg = 0x1200000000ull;
+ static const uptr kTraceMemBeg = 0xb000000000ull;
+ static const uptr kTraceMemEnd = 0xb200000000ull;
+ static const uptr kShadowBeg = 0x2000000000ull;
static const uptr kShadowEnd = 0x4000000000ull;
static const uptr kHeapMemBeg = 0xfe00000000ull;
static const uptr kHeapMemEnd = 0xff00000000ull;
@@ -123,7 +131,9 @@ C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM)
0400 0000 00 - 0c00 0000 00: shadow memory (32 GB)
0c00 0000 00 - 0d00 0000 00: - (4 GB)
0d00 0000 00 - 0e00 0000 00: metainfo (4 GB)
-0e00 0000 00 - 1000 0000 00: -
+0e00 0000 00 - 0f00 0000 00: - (4 GB)
+0f00 0000 00 - 0fc0 0000 00: traces (3 GB)
+0fc0 0000 00 - 1000 0000 00: -
*/
struct MappingAppleAarch64 {
static const uptr kLoAppMemBeg = 0x0100000000ull;
@@ -134,11 +144,13 @@ struct MappingAppleAarch64 {
static const uptr kShadowEnd = 0x0c00000000ull;
static const uptr kMetaShadowBeg = 0x0d00000000ull;
static const uptr kMetaShadowEnd = 0x0e00000000ull;
+ static const uptr kTraceMemBeg = 0x0f00000000ull;
+ static const uptr kTraceMemEnd = 0x0fc0000000ull;
static const uptr kHiAppMemBeg = 0x0fc0000000ull;
static const uptr kHiAppMemEnd = 0x0fc0000000ull;
static const uptr kShadowMsk = 0x0ull;
static const uptr kShadowXor = 0x0ull;
- static const uptr kShadowAdd = 0x0200000000ull;
+ static const uptr kShadowAdd = 0x0ull;
static const uptr kVdsoBeg = 0x7000000000000000ull;
static const uptr kMidAppMemBeg = 0;
static const uptr kMidAppMemEnd = 0;
@@ -147,25 +159,29 @@ struct MappingAppleAarch64 {
/*
C/C++ on linux/aarch64 (39-bit VMA)
0000 0010 00 - 0100 0000 00: main binary
-0100 0000 00 - 0400 0000 00: -
-0400 0000 00 - 2000 0000 00: shadow memory
+0100 0000 00 - 0800 0000 00: -
+0800 0000 00 - 2000 0000 00: shadow memory
2000 0000 00 - 3100 0000 00: -
3100 0000 00 - 3400 0000 00: metainfo
3400 0000 00 - 5500 0000 00: -
5500 0000 00 - 5600 0000 00: main binary (PIE)
-5600 0000 00 - 7c00 0000 00: -
+5600 0000 00 - 6000 0000 00: -
+6000 0000 00 - 6200 0000 00: traces
+6200 0000 00 - 7d00 0000 00: -
7c00 0000 00 - 7d00 0000 00: heap
7d00 0000 00 - 7fff ffff ff: modules and main thread stack
*/
struct MappingAarch64_39 {
static const uptr kLoAppMemBeg = 0x0000001000ull;
static const uptr kLoAppMemEnd = 0x0100000000ull;
- static const uptr kShadowBeg = 0x0400000000ull;
+ static const uptr kShadowBeg = 0x0800000000ull;
static const uptr kShadowEnd = 0x2000000000ull;
static const uptr kMetaShadowBeg = 0x3100000000ull;
static const uptr kMetaShadowEnd = 0x3400000000ull;
static const uptr kMidAppMemBeg = 0x5500000000ull;
- static const uptr kMidAppMemEnd = 0x5600000000ull;
+ static const uptr kMidAppMemEnd = 0x5600000000ull;
+ static const uptr kTraceMemBeg = 0x6000000000ull;
+ static const uptr kTraceMemEnd = 0x6200000000ull;
static const uptr kHeapMemBeg = 0x7c00000000ull;
static const uptr kHeapMemEnd = 0x7d00000000ull;
static const uptr kHiAppMemBeg = 0x7e00000000ull;
@@ -179,13 +195,15 @@ struct MappingAarch64_39 {
/*
C/C++ on linux/aarch64 (42-bit VMA)
00000 0010 00 - 01000 0000 00: main binary
-01000 0000 00 - 08000 0000 00: -
-08000 0000 00 - 20000 0000 00: shadow memory
+01000 0000 00 - 10000 0000 00: -
+10000 0000 00 - 20000 0000 00: shadow memory
20000 0000 00 - 26000 0000 00: -
26000 0000 00 - 28000 0000 00: metainfo
28000 0000 00 - 2aa00 0000 00: -
2aa00 0000 00 - 2ab00 0000 00: main binary (PIE)
-2ab00 0000 00 - 3e000 0000 00: -
+2ab00 0000 00 - 36200 0000 00: -
+36200 0000 00 - 36240 0000 00: traces
+36240 0000 00 - 3e000 0000 00: -
3e000 0000 00 - 3f000 0000 00: heap
3f000 0000 00 - 3ffff ffff ff: modules and main thread stack
*/
@@ -193,12 +211,14 @@ struct MappingAarch64_42 {
static const uptr kBroken = kBrokenReverseMapping;
static const uptr kLoAppMemBeg = 0x00000001000ull;
static const uptr kLoAppMemEnd = 0x01000000000ull;
- static const uptr kShadowBeg = 0x08000000000ull;
+ static const uptr kShadowBeg = 0x10000000000ull;
static const uptr kShadowEnd = 0x20000000000ull;
static const uptr kMetaShadowBeg = 0x26000000000ull;
static const uptr kMetaShadowEnd = 0x28000000000ull;
static const uptr kMidAppMemBeg = 0x2aa00000000ull;
- static const uptr kMidAppMemEnd = 0x2ab00000000ull;
+ static const uptr kMidAppMemEnd = 0x2ab00000000ull;
+ static const uptr kTraceMemBeg = 0x36200000000ull;
+ static const uptr kTraceMemEnd = 0x36400000000ull;
static const uptr kHeapMemBeg = 0x3e000000000ull;
static const uptr kHeapMemEnd = 0x3f000000000ull;
static const uptr kHiAppMemBeg = 0x3f000000000ull;
@@ -212,12 +232,14 @@ struct MappingAarch64_42 {
struct MappingAarch64_48 {
static const uptr kLoAppMemBeg = 0x0000000001000ull;
static const uptr kLoAppMemEnd = 0x0000200000000ull;
- static const uptr kShadowBeg = 0x0001000000000ull;
+ static const uptr kShadowBeg = 0x0002000000000ull;
static const uptr kShadowEnd = 0x0004000000000ull;
static const uptr kMetaShadowBeg = 0x0005000000000ull;
static const uptr kMetaShadowEnd = 0x0006000000000ull;
static const uptr kMidAppMemBeg = 0x0aaaa00000000ull;
- static const uptr kMidAppMemEnd = 0x0aaaf00000000ull;
+ static const uptr kMidAppMemEnd = 0x0aaaf00000000ull;
+ static const uptr kTraceMemBeg = 0x0f06000000000ull;
+ static const uptr kTraceMemEnd = 0x0f06200000000ull;
static const uptr kHeapMemBeg = 0x0ffff00000000ull;
static const uptr kHeapMemEnd = 0x0ffff00000000ull;
static const uptr kHiAppMemBeg = 0x0ffff00000000ull;
@@ -235,7 +257,9 @@ C/C++ on linux/powerpc64 (44-bit VMA)
0001 0000 0000 - 0b00 0000 0000: shadow
0b00 0000 0000 - 0b00 0000 0000: -
0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects)
-0d00 0000 0000 - 0f00 0000 0000: -
+0d00 0000 0000 - 0d00 0000 0000: -
+0d00 0000 0000 - 0f00 0000 0000: traces
+0f00 0000 0000 - 0f00 0000 0000: -
0f00 0000 0000 - 0f50 0000 0000: heap
0f50 0000 0000 - 0f60 0000 0000: -
0f60 0000 0000 - 1000 0000 0000: modules and main thread stack
@@ -245,6 +269,8 @@ struct MappingPPC64_44 {
kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity;
static const uptr kMetaShadowBeg = 0x0b0000000000ull;
static const uptr kMetaShadowEnd = 0x0d0000000000ull;
+ static const uptr kTraceMemBeg = 0x0d0000000000ull;
+ static const uptr kTraceMemEnd = 0x0f0000000000ull;
static const uptr kShadowBeg = 0x000100000000ull;
static const uptr kShadowEnd = 0x0b0000000000ull;
static const uptr kLoAppMemBeg = 0x000000000100ull;
@@ -269,7 +295,8 @@ C/C++ on linux/powerpc64 (46-bit VMA)
1000 0000 0000 - 1000 0000 0000: -
1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
2000 0000 0000 - 2000 0000 0000: -
-1200 0000 0000 - 3d00 0000 0000: -
+2000 0000 0000 - 2200 0000 0000: traces
+2200 0000 0000 - 3d00 0000 0000: -
3d00 0000 0000 - 3e00 0000 0000: heap
3e00 0000 0000 - 3e80 0000 0000: -
3e80 0000 0000 - 4000 0000 0000: modules and main thread stack
@@ -277,6 +304,8 @@ C/C++ on linux/powerpc64 (46-bit VMA)
struct MappingPPC64_46 {
static const uptr kMetaShadowBeg = 0x100000000000ull;
static const uptr kMetaShadowEnd = 0x200000000000ull;
+ static const uptr kTraceMemBeg = 0x200000000000ull;
+ static const uptr kTraceMemEnd = 0x220000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
static const uptr kShadowEnd = 0x100000000000ull;
static const uptr kHeapMemBeg = 0x3d0000000000ull;
@@ -300,7 +329,9 @@ C/C++ on linux/powerpc64 (47-bit VMA)
0100 0000 0000 - 1000 0000 0000: shadow
1000 0000 0000 - 1000 0000 0000: -
1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
-2000 0000 0000 - 7d00 0000 0000: -
+2000 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2200 0000 0000: traces
+2200 0000 0000 - 7d00 0000 0000: -
7d00 0000 0000 - 7e00 0000 0000: heap
7e00 0000 0000 - 7e80 0000 0000: -
7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
@@ -308,6 +339,8 @@ C/C++ on linux/powerpc64 (47-bit VMA)
struct MappingPPC64_47 {
static const uptr kMetaShadowBeg = 0x100000000000ull;
static const uptr kMetaShadowEnd = 0x200000000000ull;
+ static const uptr kTraceMemBeg = 0x200000000000ull;
+ static const uptr kTraceMemEnd = 0x220000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
static const uptr kShadowEnd = 0x100000000000ull;
static const uptr kHeapMemBeg = 0x7d0000000000ull;
@@ -329,17 +362,21 @@ C/C++ on linux/s390x
While the kernel provides a 64-bit address space, we have to restrict ourselves
to 48 bits due to how e.g. SyncVar::GetId() works.
0000 0000 1000 - 0e00 0000 0000: binary, modules, stacks - 14 TiB
-0e00 0000 0000 - 2000 0000 0000: -
-2000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
+0e00 0000 0000 - 4000 0000 0000: -
+4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
8000 0000 0000 - 9000 0000 0000: -
9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app)
-9800 0000 0000 - be00 0000 0000: -
+9800 0000 0000 - a000 0000 0000: -
+a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
+b000 0000 0000 - be00 0000 0000: -
be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator)
*/
struct MappingS390x {
static const uptr kMetaShadowBeg = 0x900000000000ull;
static const uptr kMetaShadowEnd = 0x980000000000ull;
- static const uptr kShadowBeg = 0x200000000000ull;
+ static const uptr kTraceMemBeg = 0xa00000000000ull;
+ static const uptr kTraceMemEnd = 0xb00000000000ull;
+ static const uptr kShadowBeg = 0x400000000000ull;
static const uptr kShadowEnd = 0x800000000000ull;
static const uptr kHeapMemBeg = 0xbe0000000000ull;
static const uptr kHeapMemEnd = 0xc00000000000ull;
@@ -363,12 +400,16 @@ struct MappingS390x {
2000 0000 0000 - 2380 0000 0000: shadow
2380 0000 0000 - 3000 0000 0000: -
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 8000 0000 0000: -
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGo48 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
+ static const uptr kTraceMemBeg = 0x600000000000ull;
+ static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x238000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -391,7 +432,7 @@ struct MappingGo48 {
00c0 0000 0000 - 00e0 0000 0000: heap
00e0 0000 0000 - 0100 0000 0000: -
0100 0000 0000 - 0500 0000 0000: shadow
-0500 0000 0000 - 0700 0000 0000: -
+0500 0000 0000 - 0700 0000 0000: traces
0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects)
07d0 0000 0000 - 8000 0000 0000: -
*/
@@ -399,6 +440,8 @@ struct MappingGo48 {
struct MappingGoWindows {
static const uptr kMetaShadowBeg = 0x070000000000ull;
static const uptr kMetaShadowEnd = 0x077000000000ull;
+ static const uptr kTraceMemBeg = 0x050000000000ull;
+ static const uptr kTraceMemEnd = 0x070000000000ull;
static const uptr kShadowBeg = 0x010000000000ull;
static const uptr kShadowEnd = 0x050000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -423,12 +466,16 @@ struct MappingGoWindows {
2000 0000 0000 - 2380 0000 0000: shadow
2380 0000 0000 - 2400 0000 0000: -
2400 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects)
-3400 0000 0000 - 4000 0000 0000: -
+3400 0000 0000 - 3600 0000 0000: -
+3600 0000 0000 - 3800 0000 0000: traces
+3800 0000 0000 - 4000 0000 0000: -
*/
struct MappingGoPPC64_46 {
static const uptr kMetaShadowBeg = 0x240000000000ull;
static const uptr kMetaShadowEnd = 0x340000000000ull;
+ static const uptr kTraceMemBeg = 0x360000000000ull;
+ static const uptr kTraceMemEnd = 0x380000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x238000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -453,12 +500,16 @@ struct MappingGoPPC64_46 {
2000 0000 0000 - 3000 0000 0000: shadow
3000 0000 0000 - 3000 0000 0000: -
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 8000 0000 0000: -
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGoPPC64_47 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
+ static const uptr kTraceMemBeg = 0x600000000000ull;
+ static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x300000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -483,11 +534,15 @@ struct MappingGoPPC64_47 {
2000 0000 0000 - 3000 0000 0000: shadow
3000 0000 0000 - 3000 0000 0000: -
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-4000 0000 0000 - 8000 0000 0000: -
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGoAarch64 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
+ static const uptr kTraceMemBeg = 0x600000000000ull;
+ static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x300000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -513,11 +568,15 @@ Go on linux/mips64 (47-bit VMA)
2000 0000 0000 - 3000 0000 0000: shadow
3000 0000 0000 - 3000 0000 0000: -
3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
-3200 0000 0000 - 8000 0000 0000: -
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
*/
struct MappingGoMips64_47 {
static const uptr kMetaShadowBeg = 0x300000000000ull;
static const uptr kMetaShadowEnd = 0x400000000000ull;
+ static const uptr kTraceMemBeg = 0x600000000000ull;
+ static const uptr kTraceMemEnd = 0x620000000000ull;
static const uptr kShadowBeg = 0x200000000000ull;
static const uptr kShadowEnd = 0x300000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -541,10 +600,14 @@ Go on linux/s390x
4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
8000 0000 0000 - 9000 0000 0000: -
9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app)
+9800 0000 0000 - a000 0000 0000: -
+a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
*/
struct MappingGoS390x {
static const uptr kMetaShadowBeg = 0x900000000000ull;
static const uptr kMetaShadowEnd = 0x980000000000ull;
+ static const uptr kTraceMemBeg = 0xa00000000000ull;
+ static const uptr kTraceMemEnd = 0xb00000000000ull;
static const uptr kShadowBeg = 0x400000000000ull;
static const uptr kShadowEnd = 0x800000000000ull;
static const uptr kLoAppMemBeg = 0x000000001000ull;
@@ -652,6 +715,8 @@ enum MappingType {
kShadowEnd,
kMetaShadowBeg,
kMetaShadowEnd,
+ kTraceMemBeg,
+ kTraceMemEnd,
kVdsoBeg,
};
@@ -685,6 +750,10 @@ struct MappingField {
return Mapping::kMetaShadowBeg;
case kMetaShadowEnd:
return Mapping::kMetaShadowEnd;
+ case kTraceMemBeg:
+ return Mapping::kTraceMemBeg;
+ case kTraceMemEnd:
+ return Mapping::kTraceMemEnd;
}
Die();
}
@@ -723,6 +792,11 @@ uptr MetaShadowBeg(void) { return SelectMapping<MappingField>(kMetaShadowBeg); }
ALWAYS_INLINE
uptr MetaShadowEnd(void) { return SelectMapping<MappingField>(kMetaShadowEnd); }
+ALWAYS_INLINE
+uptr TraceMemBeg(void) { return SelectMapping<MappingField>(kTraceMemBeg); }
+ALWAYS_INLINE
+uptr TraceMemEnd(void) { return SelectMapping<MappingField>(kTraceMemEnd); }
+
struct IsAppMemImpl {
template <typename Mapping>
static bool Apply(uptr mem) {
@@ -860,10 +934,43 @@ inline uptr RestoreAddr(uptr addr) {
return SelectMapping<RestoreAddrImpl>(addr);
}
+// The additional page is to catch shadow stack overflow as paging fault.
+// Windows wants 64K alignment for mmaps.
+const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace)
+ + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1);
+
+struct GetThreadTraceImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr tid) {
+ uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize;
+ DCHECK_LT(p, Mapping::kTraceMemEnd);
+ return p;
+ }
+};
+
+ALWAYS_INLINE
+uptr GetThreadTrace(int tid) { return SelectMapping<GetThreadTraceImpl>(tid); }
+
+struct GetThreadTraceHeaderImpl {
+ template <typename Mapping>
+ static uptr Apply(uptr tid) {
+ uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize +
+ kTraceSize * sizeof(Event);
+ DCHECK_LT(p, Mapping::kTraceMemEnd);
+ return p;
+ }
+};
+
+ALWAYS_INLINE
+uptr GetThreadTraceHeader(int tid) {
+ return SelectMapping<GetThreadTraceHeaderImpl>(tid);
+}
+
void InitializePlatform();
void InitializePlatformEarly();
void CheckAndProtect();
void InitializeShadowMemoryPlatform();
+void FlushShadowMemory();
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns);
int ExtractResolvFDs(void *state, int *fds, int nfd);
int ExtractRecvmsgFDs(void *msg, int *fds, int nfd);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
index 17dbdff8a539..73ec14892d28 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cpp
@@ -94,6 +94,7 @@ enum {
MemMeta,
MemFile,
MemMmap,
+ MemTrace,
MemHeap,
MemOther,
MemCount,
@@ -111,6 +112,8 @@ void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) {
mem[file ? MemFile : MemMmap] += rss;
else if (p >= HeapMemBeg() && p < HeapMemEnd())
mem[MemHeap] += rss;
+ else if (p >= TraceMemBeg() && p < TraceMemEnd())
+ mem[MemTrace] += rss;
else
mem[MemOther] += rss;
}
@@ -123,33 +126,42 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
StackDepotStats stacks = StackDepotGetStats();
uptr nthread, nlive;
ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
- uptr trace_mem;
- {
- Lock l(&ctx->slot_mtx);
- trace_mem = ctx->trace_part_total_allocated * sizeof(TracePart);
- }
uptr internal_stats[AllocatorStatCount];
internal_allocator()->GetStats(internal_stats);
// All these are allocated from the common mmap region.
- mem[MemMmap] -= meta.mem_block + meta.sync_obj + trace_mem +
- stacks.allocated + internal_stats[AllocatorStatMapped];
+ mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated +
+ internal_stats[AllocatorStatMapped];
if (s64(mem[MemMmap]) < 0)
mem[MemMmap] = 0;
internal_snprintf(
buf, buf_size,
- "==%zu== %llus [%zu]: RSS %zd MB: shadow:%zd meta:%zd file:%zd"
- " mmap:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
- " trace:%zu stacks=%zd threads=%zu/%zu\n",
- internal_getpid(), uptime_ns / (1000 * 1000 * 1000), ctx->global_epoch,
- mem[MemTotal] >> 20, mem[MemShadow] >> 20, mem[MemMeta] >> 20,
- mem[MemFile] >> 20, mem[MemMmap] >> 20, mem[MemHeap] >> 20,
+ "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
+ " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
+ " stacks=%zd[%zd] nthr=%zd/%zd\n",
+ uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20,
+ mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20,
+ mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20,
mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20,
- meta.mem_block >> 20, meta.sync_obj >> 20, trace_mem >> 20,
- stacks.allocated >> 20, nlive, nthread);
+ meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20,
+ stacks.n_uniq_ids, nlive, nthread);
+}
+
+# if SANITIZER_LINUX
+void FlushShadowMemoryCallback(
+ const SuspendedThreadsList &suspended_threads_list,
+ void *argument) {
+ ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd());
+}
+#endif
+
+void FlushShadowMemory() {
+#if SANITIZER_LINUX
+ StopTheWorld(FlushShadowMemoryCallback, 0);
+#endif
}
#if !SANITIZER_GO
-// Mark shadow for .rodata sections with the special Shadow::kRodata marker.
+// Mark shadow for .rodata sections with the special kShadowRodata marker.
// Accesses to .rodata can't race, so this saves time, memory and trace space.
static void MapRodata() {
// First create temp file.
@@ -170,13 +182,13 @@ static void MapRodata() {
return;
internal_unlink(name); // Unlink it now, so that we can reuse the buffer.
fd_t fd = openrv;
- // Fill the file with Shadow::kRodata.
+ // Fill the file with kShadowRodata.
const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow);
InternalMmapVector<RawShadow> marker(kMarkerSize);
// volatile to prevent insertion of memset
for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize;
p++)
- *p = Shadow::kRodata;
+ *p = kShadowRodata;
internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow));
// Map the file into memory.
uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
index 10e072559860..97ef9f7dfaab 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
@@ -112,6 +112,9 @@ void cur_thread_finalize() {
}
#endif
+void FlushShadowMemory() {
+}
+
static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
vm_address_t address = start;
vm_address_t end_address = end;
@@ -139,10 +142,12 @@ static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
uptr shadow_res, shadow_dirty;
uptr meta_res, meta_dirty;
+ uptr trace_res, trace_dirty;
RegionMemUsage(ShadowBeg(), ShadowEnd(), &shadow_res, &shadow_dirty);
RegionMemUsage(MetaShadowBeg(), MetaShadowEnd(), &meta_res, &meta_dirty);
+ RegionMemUsage(TraceMemBeg(), TraceMemEnd(), &trace_res, &trace_dirty);
-# if !SANITIZER_GO
+#if !SANITIZER_GO
uptr low_res, low_dirty;
uptr high_res, high_dirty;
uptr heap_res, heap_dirty;
@@ -161,6 +166,7 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
buf, buf_size,
"shadow (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
"meta (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+ "traces (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
# if !SANITIZER_GO
"low app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
"high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
@@ -173,6 +179,7 @@ void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
"------------------------------\n",
ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024,
MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024,
+ TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024,
# if !SANITIZER_GO
LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024,
HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
index 763a533de525..763ac444377e 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_posix.cpp
@@ -113,20 +113,24 @@ void CheckAndProtect() {
# if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS
ProtectRange(HeapMemEnd(), ShadowBeg());
ProtectRange(ShadowEnd(), MetaShadowBeg());
- ProtectRange(MetaShadowEnd(), HeapMemBeg());
-# else
+ ProtectRange(MetaShadowEnd(), TraceMemBeg());
+#else
ProtectRange(LoAppMemEnd(), ShadowBeg());
ProtectRange(ShadowEnd(), MetaShadowBeg());
if (MidAppMemBeg()) {
ProtectRange(MetaShadowEnd(), MidAppMemBeg());
- ProtectRange(MidAppMemEnd(), HeapMemBeg());
+ ProtectRange(MidAppMemEnd(), TraceMemBeg());
} else {
- ProtectRange(MetaShadowEnd(), HeapMemBeg());
+ ProtectRange(MetaShadowEnd(), TraceMemBeg());
}
+ // Memory for traces is mapped lazily in MapThreadTrace.
+ // Protect the whole range for now, so that user does not map something here.
+ ProtectRange(TraceMemBeg(), TraceMemEnd());
+ ProtectRange(TraceMemEnd(), HeapMemBeg());
ProtectRange(HeapEnd(), HiAppMemBeg());
-# endif
+#endif
-# if defined(__s390x__)
+#if defined(__s390x__)
// Protect the rest of the address space.
const uptr user_addr_max_l4 = 0x0020000000000000ull;
const uptr user_addr_max_l5 = 0xfffffffffffff000ull;
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
index eb8f354742f4..fea893768c79 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_windows.cpp
@@ -20,6 +20,9 @@
namespace __tsan {
+void FlushShadowMemory() {
+}
+
void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {}
void InitializePlatformEarly() {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index 507f93e6a4cc..6ff52e34a2c6 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -57,348 +57,110 @@ Context *ctx;
bool OnFinalize(bool failed);
void OnInitialize();
#else
+#include <dlfcn.h>
SANITIZER_WEAK_CXX_DEFAULT_IMPL
bool OnFinalize(bool failed) {
-# if !SANITIZER_GO
+#if !SANITIZER_GO
if (on_finalize)
return on_finalize(failed);
-# endif
+#endif
return failed;
}
-
SANITIZER_WEAK_CXX_DEFAULT_IMPL
void OnInitialize() {
-# if !SANITIZER_GO
+#if !SANITIZER_GO
if (on_initialize)
on_initialize();
-# endif
-}
#endif
-
-static TracePart* TracePartAlloc(ThreadState* thr) {
- TracePart* part = nullptr;
- {
- Lock lock(&ctx->slot_mtx);
- uptr max_parts = Trace::kMinParts + flags()->history_size;
- Trace* trace = &thr->tctx->trace;
- if (trace->parts_allocated == max_parts ||
- ctx->trace_part_finished_excess) {
- part = ctx->trace_part_recycle.PopFront();
- DPrintf("#%d: TracePartAlloc: part=%p\n", thr->tid, part);
- if (part && part->trace) {
- Trace* trace1 = part->trace;
- Lock trace_lock(&trace1->mtx);
- part->trace = nullptr;
- TracePart* part1 = trace1->parts.PopFront();
- CHECK_EQ(part, part1);
- if (trace1->parts_allocated > trace1->parts.Size()) {
- ctx->trace_part_finished_excess +=
- trace1->parts_allocated - trace1->parts.Size();
- trace1->parts_allocated = trace1->parts.Size();
- }
- }
- }
- if (trace->parts_allocated < max_parts) {
- trace->parts_allocated++;
- if (ctx->trace_part_finished_excess)
- ctx->trace_part_finished_excess--;
- }
- if (!part)
- ctx->trace_part_total_allocated++;
- else if (ctx->trace_part_recycle_finished)
- ctx->trace_part_recycle_finished--;
- }
- if (!part)
- part = new (MmapOrDie(sizeof(*part), "TracePart")) TracePart();
- return part;
-}
-
-static void TracePartFree(TracePart* part) REQUIRES(ctx->slot_mtx) {
- DCHECK(part->trace);
- part->trace = nullptr;
- ctx->trace_part_recycle.PushFront(part);
-}
-
-void TraceResetForTesting() {
- Lock lock(&ctx->slot_mtx);
- while (auto* part = ctx->trace_part_recycle.PopFront()) {
- if (auto trace = part->trace)
- CHECK_EQ(trace->parts.PopFront(), part);
- UnmapOrDie(part, sizeof(*part));
- }
- ctx->trace_part_total_allocated = 0;
- ctx->trace_part_recycle_finished = 0;
- ctx->trace_part_finished_excess = 0;
-}
-
-static void DoResetImpl(uptr epoch) {
- ThreadRegistryLock lock0(&ctx->thread_registry);
- Lock lock1(&ctx->slot_mtx);
- CHECK_EQ(ctx->global_epoch, epoch);
- ctx->global_epoch++;
- CHECK(!ctx->resetting);
- ctx->resetting = true;
- for (u32 i = ctx->thread_registry.NumThreadsLocked(); i--;) {
- ThreadContext* tctx = (ThreadContext*)ctx->thread_registry.GetThreadLocked(
- static_cast<Tid>(i));
- // Potentially we could purge all ThreadStatusDead threads from the
- // registry. Since we reset all shadow, they can't race with anything
- // anymore. However, their tid's can still be stored in some aux places
- // (e.g. tid of thread that created something).
- auto trace = &tctx->trace;
- Lock lock(&trace->mtx);
- bool attached = tctx->thr && tctx->thr->slot;
- auto parts = &trace->parts;
- bool local = false;
- while (!parts->Empty()) {
- auto part = parts->Front();
- local = local || part == trace->local_head;
- if (local)
- CHECK(!ctx->trace_part_recycle.Queued(part));
- else
- ctx->trace_part_recycle.Remove(part);
- if (attached && parts->Size() == 1) {
- // The thread is running and this is the last/current part.
- // Set the trace position to the end of the current part
- // to force the thread to call SwitchTracePart and re-attach
- // to a new slot and allocate a new trace part.
- // Note: the thread is concurrently modifying the position as well,
- // so this is only best-effort. The thread can only modify position
- // within this part, because switching parts is protected by
- // slot/trace mutexes that we hold here.
- atomic_store_relaxed(
- &tctx->thr->trace_pos,
- reinterpret_cast<uptr>(&part->events[TracePart::kSize]));
- break;
- }
- parts->Remove(part);
- TracePartFree(part);
- }
- CHECK_LE(parts->Size(), 1);
- trace->local_head = parts->Front();
- if (tctx->thr && !tctx->thr->slot) {
- atomic_store_relaxed(&tctx->thr->trace_pos, 0);
- tctx->thr->trace_prev_pc = 0;
- }
- if (trace->parts_allocated > trace->parts.Size()) {
- ctx->trace_part_finished_excess +=
- trace->parts_allocated - trace->parts.Size();
- trace->parts_allocated = trace->parts.Size();
- }
- }
- while (ctx->slot_queue.PopFront()) {
- }
- for (auto& slot : ctx->slots) {
- slot.SetEpoch(kEpochZero);
- slot.journal.Reset();
- slot.thr = nullptr;
- ctx->slot_queue.PushBack(&slot);
- }
-
- DPrintf("Resetting shadow...\n");
- if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(),
- "shadow")) {
- Printf("failed to reset shadow memory\n");
- Die();
- }
- DPrintf("Resetting meta shadow...\n");
- ctx->metamap.ResetClocks();
- ctx->resetting = false;
-}
-
-// Clang does not understand locking all slots in the loop:
-// error: expecting mutex 'slot.mtx' to be held at start of each loop
-void DoReset(ThreadState* thr, uptr epoch) NO_THREAD_SAFETY_ANALYSIS {
- {
- Lock l(&ctx->multi_slot_mtx);
- for (auto& slot : ctx->slots) {
- slot.mtx.Lock();
- if (UNLIKELY(epoch == 0))
- epoch = ctx->global_epoch;
- if (UNLIKELY(epoch != ctx->global_epoch)) {
- // Epoch can't change once we've locked the first slot.
- CHECK_EQ(slot.sid, 0);
- slot.mtx.Unlock();
- return;
- }
- }
- }
- DPrintf("#%d: DoReset epoch=%lu\n", thr ? thr->tid : -1, epoch);
- DoResetImpl(epoch);
- for (auto& slot : ctx->slots) slot.mtx.Unlock();
}
+#endif
-void FlushShadowMemory() { DoReset(nullptr, 0); }
-
-static TidSlot* FindSlotAndLock(ThreadState* thr)
- ACQUIRE(thr->slot->mtx) NO_THREAD_SAFETY_ANALYSIS {
- CHECK(!thr->slot);
- TidSlot* slot = nullptr;
- for (;;) {
- uptr epoch;
- {
- Lock lock(&ctx->slot_mtx);
- epoch = ctx->global_epoch;
- if (slot) {
- // This is an exhausted slot from the previous iteration.
- if (ctx->slot_queue.Queued(slot))
- ctx->slot_queue.Remove(slot);
- thr->slot_locked = false;
- slot->mtx.Unlock();
- }
- for (;;) {
- slot = ctx->slot_queue.PopFront();
- if (!slot)
- break;
- if (slot->epoch() != kEpochLast) {
- ctx->slot_queue.PushBack(slot);
- break;
- }
- }
- }
- if (!slot) {
- DoReset(thr, epoch);
- continue;
+static ThreadContextBase *CreateThreadContext(Tid tid) {
+ // Map thread trace when context is created.
+ char name[50];
+ internal_snprintf(name, sizeof(name), "trace %u", tid);
+ MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name);
+ const uptr hdr = GetThreadTraceHeader(tid);
+ internal_snprintf(name, sizeof(name), "trace header %u", tid);
+ MapThreadTrace(hdr, sizeof(Trace), name);
+ new((void*)hdr) Trace();
+ // We are going to use only a small part of the trace with the default
+ // value of history_size. However, the constructor writes to the whole trace.
+ // Release the unused part.
+ uptr hdr_end = hdr + sizeof(Trace);
+ hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts());
+ hdr_end = RoundUp(hdr_end, GetPageSizeCached());
+ if (hdr_end < hdr + sizeof(Trace)) {
+ ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace));
+ uptr unused = hdr + sizeof(Trace) - hdr_end;
+ if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) {
+ Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end,
+ unused);
+ CHECK("unable to mprotect" && 0);
}
- slot->mtx.Lock();
- CHECK(!thr->slot_locked);
- thr->slot_locked = true;
- if (slot->thr) {
- DPrintf("#%d: preempting sid=%d tid=%d\n", thr->tid, (u32)slot->sid,
- slot->thr->tid);
- slot->SetEpoch(slot->thr->fast_state.epoch());
- slot->thr = nullptr;
- }
- if (slot->epoch() != kEpochLast)
- return slot;
}
+ return New<ThreadContext>(tid);
}
-void SlotAttachAndLock(ThreadState* thr) {
- TidSlot* slot = FindSlotAndLock(thr);
- DPrintf("#%d: SlotAttach: slot=%u\n", thr->tid, static_cast<int>(slot->sid));
- CHECK(!slot->thr);
- CHECK(!thr->slot);
- slot->thr = thr;
- thr->slot = slot;
- Epoch epoch = EpochInc(slot->epoch());
- CHECK(!EpochOverflow(epoch));
- slot->SetEpoch(epoch);
- thr->fast_state.SetSid(slot->sid);
- thr->fast_state.SetEpoch(epoch);
- if (thr->slot_epoch != ctx->global_epoch) {
- thr->slot_epoch = ctx->global_epoch;
- thr->clock.Reset();
#if !SANITIZER_GO
- thr->last_sleep_stack_id = kInvalidStackID;
- thr->last_sleep_clock.Reset();
+static const u32 kThreadQuarantineSize = 16;
+#else
+static const u32 kThreadQuarantineSize = 64;
#endif
- }
- thr->clock.Set(slot->sid, epoch);
- slot->journal.PushBack({thr->tid, epoch});
-}
-
-static void SlotDetachImpl(ThreadState* thr, bool exiting) {
- TidSlot* slot = thr->slot;
- thr->slot = nullptr;
- if (thr != slot->thr) {
- slot = nullptr; // we don't own the slot anymore
- if (thr->slot_epoch != ctx->global_epoch) {
- TracePart* part = nullptr;
- auto* trace = &thr->tctx->trace;
- {
- Lock l(&trace->mtx);
- auto* parts = &trace->parts;
- // The trace can be completely empty in an unlikely event
- // the thread is preempted right after it acquired the slot
- // in ThreadStart and did not trace any events yet.
- CHECK_LE(parts->Size(), 1);
- part = parts->PopFront();
- thr->tctx->trace.local_head = nullptr;
- atomic_store_relaxed(&thr->trace_pos, 0);
- thr->trace_prev_pc = 0;
- }
- if (part) {
- Lock l(&ctx->slot_mtx);
- TracePartFree(part);
- }
- }
- return;
- }
- CHECK(exiting || thr->fast_state.epoch() == kEpochLast);
- slot->SetEpoch(thr->fast_state.epoch());
- slot->thr = nullptr;
-}
-
-void SlotDetach(ThreadState* thr) {
- Lock lock(&thr->slot->mtx);
- SlotDetachImpl(thr, true);
-}
-
-void SlotLock(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS {
- DCHECK(!thr->slot_locked);
- TidSlot* slot = thr->slot;
- slot->mtx.Lock();
- thr->slot_locked = true;
- if (LIKELY(thr == slot->thr && thr->fast_state.epoch() != kEpochLast))
- return;
- SlotDetachImpl(thr, false);
- thr->slot_locked = false;
- slot->mtx.Unlock();
- SlotAttachAndLock(thr);
-}
-
-void SlotUnlock(ThreadState* thr) {
- DCHECK(thr->slot_locked);
- thr->slot_locked = false;
- thr->slot->mtx.Unlock();
-}
Context::Context()
: initialized(),
report_mtx(MutexTypeReport),
nreported(),
- thread_registry([](Tid tid) -> ThreadContextBase* {
- return new (Alloc(sizeof(ThreadContext))) ThreadContext(tid);
- }),
+ thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize,
+ kMaxTidReuse),
racy_mtx(MutexTypeRacy),
racy_stacks(),
racy_addresses(),
fired_suppressions_mtx(MutexTypeFired),
- clock_alloc(LINKER_INITIALIZED, "clock allocator"),
- slot_mtx(MutexTypeSlots),
- multi_slot_mtx(MutexTypeMultiSlot),
- resetting() {
+ clock_alloc(LINKER_INITIALIZED, "clock allocator") {
fired_suppressions.reserve(8);
- for (uptr i = 0; i < ARRAY_SIZE(slots); i++) {
- TidSlot* slot = &slots[i];
- slot->sid = static_cast<Sid>(i);
- slot_queue.PushBack(slot);
- }
- global_epoch = 1;
}
-TidSlot::TidSlot() : mtx(MutexTypeSlot) {}
-
// The objects are allocated in TLS, so one may rely on zero-initialization.
-ThreadState::ThreadState(Tid tid)
- // Do not touch these, rely on zero initialization,
- // they may be accessed before the ctor.
- // ignore_reads_and_writes()
- // ignore_interceptors()
- : tid(tid) {
+ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
+ unsigned reuse_count, uptr stk_addr, uptr stk_size,
+ uptr tls_addr, uptr tls_size)
+ : fast_state(tid, epoch)
+ // Do not touch these, rely on zero initialization,
+ // they may be accessed before the ctor.
+ // , ignore_reads_and_writes()
+ // , ignore_interceptors()
+ ,
+ clock(tid, reuse_count)
+#if !SANITIZER_GO
+ ,
+ jmp_bufs()
+#endif
+ ,
+ tid(tid),
+ unique_id(unique_id),
+ stk_addr(stk_addr),
+ stk_size(stk_size),
+ tls_addr(tls_addr),
+ tls_size(tls_size)
+#if !SANITIZER_GO
+ ,
+ last_sleep_clock(tid)
+#endif
+{
CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
#if !SANITIZER_GO
// C/C++ uses fixed size shadow stack.
const int kInitStackSize = kShadowStackSize;
- shadow_stack = static_cast<uptr*>(
+ shadow_stack = static_cast<uptr *>(
MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack"));
SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack),
kInitStackSize * sizeof(uptr));
#else
// Go uses malloc-allocated shadow stack with dynamic size.
const int kInitStackSize = 8;
- shadow_stack = static_cast<uptr*>(Alloc(kInitStackSize * sizeof(uptr)));
+ shadow_stack = static_cast<uptr *>(Alloc(kInitStackSize * sizeof(uptr)));
#endif
shadow_stack_pos = shadow_stack;
shadow_stack_end = shadow_stack + kInitStackSize;
@@ -516,8 +278,7 @@ void UnmapShadow(ThreadState *thr, uptr addr, uptr size) {
if (size == 0) return;
DontNeedShadowFor(addr, size);
ScopedGlobalProcessor sgp;
- SlotLocker locker(thr, true);
- ctx->metamap.ResetRange(thr->proc(), addr, size, true);
+ ctx->metamap.ResetRange(thr->proc(), addr, size);
}
#endif
@@ -563,6 +324,18 @@ void MapShadow(uptr addr, uptr size) {
addr + size, meta_begin, meta_end);
}
+void MapThreadTrace(uptr addr, uptr size, const char *name) {
+ DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size);
+ CHECK_GE(addr, TraceMemBeg());
+ CHECK_LE(addr + size, TraceMemEnd());
+ CHECK_EQ(addr, addr & ~((64 << 10) - 1)); // windows wants 64K alignment
+ if (!MmapFixedSuperNoReserve(addr, size, name)) {
+ Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n",
+ addr, size);
+ Die();
+ }
+}
+
#if !SANITIZER_GO
static void OnStackUnwind(const SignalContext &sig, const void *,
BufferedStackTrace *stack) {
@@ -581,11 +354,8 @@ void CheckUnwind() {
// since we are going to die soon.
ScopedIgnoreInterceptors ignore;
#if !SANITIZER_GO
- ThreadState* thr = cur_thread();
- thr->nomalloc = false;
- thr->ignore_sync++;
- thr->ignore_reads_and_writes++;
- atomic_store_relaxed(&thr->in_signal_handler, 0);
+ cur_thread()->ignore_sync++;
+ cur_thread()->ignore_reads_and_writes++;
#endif
PrintCurrentStackSlow(StackTrace::GetCurrentPc());
}
@@ -640,22 +410,22 @@ void Initialize(ThreadState *thr) {
Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer);
#endif
- VPrintf(1, "***** Running under ThreadSanitizer v3 (pid %d) *****\n",
+ VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n",
(int)internal_getpid());
// Initialize thread 0.
- Tid tid = ThreadCreate(nullptr, 0, 0, true);
+ Tid tid = ThreadCreate(thr, 0, 0, true);
CHECK_EQ(tid, kMainTid);
ThreadStart(thr, tid, GetTid(), ThreadType::Regular);
#if TSAN_CONTAINS_UBSAN
__ubsan::InitAsPlugin();
#endif
+ ctx->initialized = true;
#if !SANITIZER_GO
Symbolizer::LateInitialize();
InitializeMemoryProfiler();
#endif
- ctx->initialized = true;
if (flags()->stop_on_start) {
Printf("ThreadSanitizer is suspended at startup (pid %d)."
@@ -681,6 +451,7 @@ void MaybeSpawnBackgroundThread() {
#endif
}
+
int Finalize(ThreadState *thr) {
bool failed = false;
@@ -688,12 +459,12 @@ int Finalize(ThreadState *thr) {
DumpProcessMap();
if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1)
- internal_usleep(u64(flags()->atexit_sleep_ms) * 1000);
+ SleepForMillis(flags()->atexit_sleep_ms);
- {
- // Wait for pending reports.
- ScopedErrorReportLock lock;
- }
+ // Wait for pending reports.
+ ctx->report_mtx.Lock();
+ { ScopedErrorReportLock l; }
+ ctx->report_mtx.Unlock();
#if !SANITIZER_GO
if (Verbosity()) AllocatorPrintStats();
@@ -720,14 +491,8 @@ int Finalize(ThreadState *thr) {
#if !SANITIZER_GO
void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
- GlobalProcessorLock();
- // Detaching from the slot makes OnUserFree skip writing to the shadow.
- // The slot will be locked so any attempts to use it will deadlock anyway.
- SlotDetach(thr);
- ctx->multi_slot_mtx.Lock();
- for (auto& slot : ctx->slots) slot.mtx.Lock();
ctx->thread_registry.Lock();
- ctx->slot_mtx.Lock();
+ ctx->report_mtx.Lock();
ScopedErrorReportLock::Lock();
AllocatorLock();
// Suppress all reports in the pthread_atfork callbacks.
@@ -747,30 +512,30 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
__tsan_test_only_on_fork();
}
-static void ForkAfter(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS {
+void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports--; // Enabled in ForkBefore.
thr->ignore_interceptors--;
thr->ignore_reads_and_writes--;
AllocatorUnlock();
ScopedErrorReportLock::Unlock();
- ctx->slot_mtx.Unlock();
+ ctx->report_mtx.Unlock();
ctx->thread_registry.Unlock();
- for (auto& slot : ctx->slots) slot.mtx.Unlock();
- ctx->multi_slot_mtx.Unlock();
- SlotAttachAndLock(thr);
- SlotUnlock(thr);
- GlobalProcessorUnlock();
}
-void ForkParentAfter(ThreadState* thr, uptr pc) { ForkAfter(thr); }
+void ForkChildAfter(ThreadState *thr, uptr pc,
+ bool start_thread) NO_THREAD_SAFETY_ANALYSIS {
+ thr->suppress_reports--; // Enabled in ForkBefore.
+ thr->ignore_interceptors--;
+ thr->ignore_reads_and_writes--;
+ AllocatorUnlock();
+ ScopedErrorReportLock::Unlock();
+ ctx->report_mtx.Unlock();
+ ctx->thread_registry.Unlock();
-void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) {
- ForkAfter(thr);
- u32 nthread = ThreadCount(thr);
- VPrintf(1,
- "ThreadSanitizer: forked new process with pid %d,"
- " parent had %d threads\n",
- (int)internal_getpid(), (int)nthread);
+ uptr nthread = 0;
+ ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */);
+ VPrintf(1, "ThreadSanitizer: forked new process with pid %d,"
+ " parent had %d threads\n", (int)internal_getpid(), (int)nthread);
if (nthread == 1) {
if (start_thread)
StartBackgroundThread();
@@ -780,7 +545,6 @@ void ForkChildAfter(ThreadState* thr, uptr pc, bool start_thread) {
// ignores for everything in the hope that we will exec soon.
ctx->after_multithreaded_fork = true;
thr->ignore_interceptors++;
- thr->suppress_reports++;
ThreadIgnoreBegin(thr, pc);
ThreadIgnoreSyncBegin(thr, pc);
}
@@ -802,10 +566,8 @@ void GrowShadowStack(ThreadState *thr) {
#endif
StackID CurrentStackId(ThreadState *thr, uptr pc) {
-#if !SANITIZER_GO
if (!thr->is_inited) // May happen during bootstrap.
return kInvalidStackID;
-#endif
if (pc != 0) {
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -823,72 +585,53 @@ StackID CurrentStackId(ThreadState *thr, uptr pc) {
return id;
}
-static bool TraceSkipGap(ThreadState* thr) {
+namespace v3 {
+
+NOINLINE
+void TraceSwitchPart(ThreadState *thr) {
Trace *trace = &thr->tctx->trace;
Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
DCHECK_EQ(reinterpret_cast<uptr>(pos + 1) & TracePart::kAlignment, 0);
auto *part = trace->parts.Back();
- DPrintf("#%d: TraceSwitchPart enter trace=%p parts=%p-%p pos=%p\n", thr->tid,
- trace, trace->parts.Front(), part, pos);
- if (!part)
- return false;
- // We can get here when we still have space in the current trace part.
- // The fast-path check in TraceAcquire has false positives in the middle of
- // the part. Check if we are indeed at the end of the current part or not,
- // and fill any gaps with NopEvent's.
- Event* end = &part->events[TracePart::kSize];
- DCHECK_GE(pos, &part->events[0]);
- DCHECK_LE(pos, end);
- if (pos + 1 < end) {
- if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) ==
- TracePart::kAlignment)
+ DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos);
+ if (part) {
+ // We can get here when we still have space in the current trace part.
+ // The fast-path check in TraceAcquire has false positives in the middle of
+ // the part. Check if we are indeed at the end of the current part or not,
+ // and fill any gaps with NopEvent's.
+ Event *end = &part->events[TracePart::kSize];
+ DCHECK_GE(pos, &part->events[0]);
+ DCHECK_LE(pos, end);
+ if (pos + 1 < end) {
+ if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) ==
+ TracePart::kAlignment)
+ *pos++ = NopEvent;
*pos++ = NopEvent;
- *pos++ = NopEvent;
- DCHECK_LE(pos + 2, end);
- atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos));
- return true;
+ DCHECK_LE(pos + 2, end);
+ atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos));
+ // Ensure we setup trace so that the next TraceAcquire
+ // won't detect trace part end.
+ Event *ev;
+ CHECK(TraceAcquire(thr, &ev));
+ return;
+ }
+ // We are indeed at the end.
+ for (; pos < end; pos++) *pos = NopEvent;
}
- // We are indeed at the end.
- for (; pos < end; pos++) *pos = NopEvent;
- return false;
-}
-
-NOINLINE
-void TraceSwitchPart(ThreadState* thr) {
- if (TraceSkipGap(thr))
- return;
#if !SANITIZER_GO
if (ctx->after_multithreaded_fork) {
// We just need to survive till exec.
- TracePart* part = thr->tctx->trace.parts.Back();
- if (part) {
- atomic_store_relaxed(&thr->trace_pos,
- reinterpret_cast<uptr>(&part->events[0]));
- return;
- }
+ CHECK(part);
+ atomic_store_relaxed(&thr->trace_pos,
+ reinterpret_cast<uptr>(&part->events[0]));
+ return;
}
#endif
- TraceSwitchPartImpl(thr);
-}
-
-void TraceSwitchPartImpl(ThreadState* thr) {
- SlotLocker locker(thr, true);
- Trace* trace = &thr->tctx->trace;
- TracePart* part = TracePartAlloc(thr);
+ part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart();
part->trace = trace;
thr->trace_prev_pc = 0;
- TracePart* recycle = nullptr;
- // Keep roughly half of parts local to the thread
- // (not queued into the recycle queue).
- uptr local_parts = (Trace::kMinParts + flags()->history_size + 1) / 2;
{
Lock lock(&trace->mtx);
- if (trace->parts.Empty())
- trace->local_head = part;
- if (trace->parts.Size() >= local_parts) {
- recycle = trace->local_head;
- trace->local_head = trace->parts.Next(recycle);
- }
trace->parts.PushBack(part);
atomic_store_relaxed(&thr->trace_pos,
reinterpret_cast<uptr>(&part->events[0]));
@@ -896,45 +639,60 @@ void TraceSwitchPartImpl(ThreadState* thr) {
// Make this part self-sufficient by restoring the current stack
// and mutex set in the beginning of the trace.
TraceTime(thr);
- {
- // Pathologically large stacks may not fit into the part.
- // In these cases we log only fixed number of top frames.
- const uptr kMaxFrames = 1000;
- // Sanity check that kMaxFrames won't consume the whole part.
- static_assert(kMaxFrames < TracePart::kSize / 2, "kMaxFrames is too big");
- uptr* pos = Max(&thr->shadow_stack[0], thr->shadow_stack_pos - kMaxFrames);
- for (; pos < thr->shadow_stack_pos; pos++) {
- if (TryTraceFunc(thr, *pos))
- continue;
- CHECK(TraceSkipGap(thr));
- CHECK(TryTraceFunc(thr, *pos));
- }
- }
+ for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++)
+ CHECK(TryTraceFunc(thr, *pos));
for (uptr i = 0; i < thr->mset.Size(); i++) {
MutexSet::Desc d = thr->mset.Get(i);
- for (uptr i = 0; i < d.count; i++)
- TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0,
- d.addr, d.stack_id);
- }
- {
- Lock lock(&ctx->slot_mtx);
- ctx->slot_queue.Remove(thr->slot);
- ctx->slot_queue.PushBack(thr->slot);
- if (recycle)
- ctx->trace_part_recycle.PushBack(recycle);
+ TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0,
+ d.addr, d.stack_id);
}
- DPrintf("#%d: TraceSwitchPart exit parts=%p-%p pos=0x%zx\n", thr->tid,
- trace->parts.Front(), trace->parts.Back(),
- atomic_load_relaxed(&thr->trace_pos));
+}
+
+} // namespace v3
+
+void TraceSwitch(ThreadState *thr) {
+#if !SANITIZER_GO
+ if (ctx->after_multithreaded_fork)
+ return;
+#endif
+ thr->nomalloc++;
+ Trace *thr_trace = ThreadTrace(thr->tid);
+ Lock l(&thr_trace->mtx);
+ unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts();
+ TraceHeader *hdr = &thr_trace->headers[trace];
+ hdr->epoch0 = thr->fast_state.epoch();
+ ObtainCurrentStack(thr, 0, &hdr->stack0);
+ hdr->mset0 = thr->mset;
+ thr->nomalloc--;
+}
+
+Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); }
+
+uptr TraceTopPC(ThreadState *thr) {
+ Event *events = (Event*)GetThreadTrace(thr->tid);
+ uptr pc = events[thr->fast_state.GetTracePos()];
+ return pc;
+}
+
+uptr TraceSize() {
+ return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1));
+}
+
+uptr TraceParts() {
+ return TraceSize() / kTracePartSize;
}
#if !SANITIZER_GO
-extern "C" void __tsan_trace_switch() {}
+extern "C" void __tsan_trace_switch() {
+ TraceSwitch(cur_thread());
+}
-extern "C" void __tsan_report_race() {}
+extern "C" void __tsan_report_race() {
+ ReportRace(cur_thread());
+}
#endif
-void ThreadIgnoreBegin(ThreadState* thr, uptr pc) {
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc) {
DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid);
thr->ignore_reads_and_writes++;
CHECK_GT(thr->ignore_reads_and_writes, 0);
@@ -994,6 +752,7 @@ void build_consistency_debug() {}
#else
void build_consistency_release() {}
#endif
+
} // namespace __tsan
#if SANITIZER_CHECK_DEADLOCKS
@@ -1001,30 +760,21 @@ namespace __sanitizer {
using namespace __tsan;
MutexMeta mutex_meta[] = {
{MutexInvalid, "Invalid", {}},
- {MutexThreadRegistry,
- "ThreadRegistry",
- {MutexTypeSlots, MutexTypeTrace, MutexTypeReport}},
- {MutexTypeReport, "Report", {MutexTypeTrace}},
- {MutexTypeSyncVar, "SyncVar", {MutexTypeReport, MutexTypeTrace}},
+ {MutexThreadRegistry, "ThreadRegistry", {}},
+ {MutexTypeTrace, "Trace", {}},
+ {MutexTypeReport,
+ "Report",
+ {MutexTypeSyncVar, MutexTypeGlobalProc, MutexTypeTrace}},
+ {MutexTypeSyncVar, "SyncVar", {MutexTypeTrace}},
{MutexTypeAnnotations, "Annotations", {}},
- {MutexTypeAtExit, "AtExit", {}},
+ {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}},
{MutexTypeFired, "Fired", {MutexLeaf}},
{MutexTypeRacy, "Racy", {MutexLeaf}},
- {MutexTypeGlobalProc,
- "GlobalProc",
- {MutexTypeSlot, MutexTypeSlots, MutexTypeMultiSlot}},
+ {MutexTypeGlobalProc, "GlobalProc", {}},
{MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}},
- {MutexTypeTrace, "Trace", {}},
- {MutexTypeSlot,
- "Slot",
- {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry,
- MutexTypeSlots}},
- {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}},
- {MutexTypeMultiSlot, "MultiSlot", {MutexTypeSlot, MutexTypeSlots}},
{},
};
void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); }
-
} // namespace __sanitizer
#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 3175847a880a..c71b27e1cbf5 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -38,7 +38,6 @@
#include "tsan_defs.h"
#include "tsan_flags.h"
#include "tsan_ignoreset.h"
-#include "tsan_ilist.h"
#include "tsan_mman.h"
#include "tsan_mutexset.h"
#include "tsan_platform.h"
@@ -47,7 +46,6 @@
#include "tsan_stack_trace.h"
#include "tsan_sync.h"
#include "tsan_trace.h"
-#include "tsan_vector_clock.h"
#if SANITIZER_WORDSIZE != 64
# error "ThreadSanitizer is supported only on 64-bit platforms"
@@ -118,6 +116,7 @@ struct Processor {
#endif
DenseSlabAllocCache block_cache;
DenseSlabAllocCache sync_cache;
+ DenseSlabAllocCache clock_cache;
DDPhysicalThread *dd_pt;
};
@@ -131,56 +130,30 @@ struct ScopedGlobalProcessor {
};
#endif
-struct TidEpoch {
- Tid tid;
- Epoch epoch;
-};
-
-struct TidSlot {
- Mutex mtx;
- Sid sid;
- atomic_uint32_t raw_epoch;
- ThreadState *thr;
- Vector<TidEpoch> journal;
- INode node;
-
- Epoch epoch() const {
- return static_cast<Epoch>(atomic_load(&raw_epoch, memory_order_relaxed));
- }
-
- void SetEpoch(Epoch v) {
- atomic_store(&raw_epoch, static_cast<u32>(v), memory_order_relaxed);
- }
-
- TidSlot();
-} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
-
// This struct is stored in TLS.
struct ThreadState {
FastState fast_state;
- int ignore_sync;
-#if !SANITIZER_GO
- int ignore_interceptors;
-#endif
- uptr *shadow_stack_pos;
-
- // Current position in tctx->trace.Back()->events (Event*).
- atomic_uintptr_t trace_pos;
- // PC of the last memory access, used to compute PC deltas in the trace.
- uptr trace_prev_pc;
-
+ // Synch epoch represents the threads's epoch before the last synchronization
+ // action. It allows to reduce number of shadow state updates.
+ // For example, fast_synch_epoch=100, last write to addr X was at epoch=150,
+ // if we are processing write to X from the same thread at epoch=200,
+ // we do nothing, because both writes happen in the same 'synch epoch'.
+ // That is, if another memory access does not race with the former write,
+ // it does not race with the latter as well.
+ // QUESTION: can we can squeeze this into ThreadState::Fast?
+ // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are
+ // taken by epoch between synchs.
+ // This way we can save one load from tls.
+ u64 fast_synch_epoch;
// Technically `current` should be a separate THREADLOCAL variable;
// but it is placed here in order to share cache line with previous fields.
ThreadState* current;
-
- atomic_sint32_t pending_signals;
-
- VectorClock clock;
-
// This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
// We do not distinguish beteween ignoring reads and writes
// for better performance.
int ignore_reads_and_writes;
+ atomic_sint32_t pending_signals;
+ int ignore_sync;
int suppress_reports;
// Go does not support ignores.
#if !SANITIZER_GO
@@ -189,27 +162,31 @@ struct ThreadState {
#endif
uptr *shadow_stack;
uptr *shadow_stack_end;
+ uptr *shadow_stack_pos;
+ RawShadow *racy_shadow_addr;
+ RawShadow racy_state[2];
+ MutexSet mset;
+ ThreadClock clock;
#if !SANITIZER_GO
Vector<JmpBuf> jmp_bufs;
- int in_symbolizer;
+ int ignore_interceptors;
+#endif
+ const Tid tid;
+ const int unique_id;
+ bool in_symbolizer;
bool in_ignored_lib;
bool is_inited;
-#endif
- MutexSet mset;
bool is_dead;
- const Tid tid;
- uptr stk_addr;
- uptr stk_size;
- uptr tls_addr;
- uptr tls_size;
+ bool is_freeing;
+ bool is_vptr_access;
+ const uptr stk_addr;
+ const uptr stk_size;
+ const uptr tls_addr;
+ const uptr tls_size;
ThreadContext *tctx;
DDLogicalThread *dd_lt;
- TidSlot *slot;
- uptr slot_epoch;
- bool slot_locked;
-
// Current wired Processor, or nullptr. Required to handle any events.
Processor *proc1;
#if !SANITIZER_GO
@@ -223,7 +200,7 @@ struct ThreadState {
#if !SANITIZER_GO
StackID last_sleep_stack_id;
- VectorClock last_sleep_clock;
+ ThreadClock last_sleep_clock;
#endif
// Set in regions of runtime that must be signal-safe and fork-safe.
@@ -232,7 +209,16 @@ struct ThreadState {
const ReportDesc *current_report;
- explicit ThreadState(Tid tid);
+ // Current position in tctx->trace.Back()->events (Event*).
+ atomic_uintptr_t trace_pos;
+ // PC of the last memory access, used to compute PC deltas in the trace.
+ uptr trace_prev_pc;
+ Sid sid;
+ Epoch epoch;
+
+ explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
+ unsigned reuse_count, uptr stk_addr, uptr stk_size,
+ uptr tls_addr, uptr tls_size);
} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
#if !SANITIZER_GO
@@ -266,9 +252,14 @@ class ThreadContext final : public ThreadContextBase {
~ThreadContext();
ThreadState *thr;
StackID creation_stack_id;
- VectorClock *sync;
- uptr sync_epoch;
- Trace trace;
+ SyncClock sync;
+ // Epoch at which the thread had started.
+ // If we see an event from the thread stamped by an older epoch,
+ // the event is from a dead thread that shared tid with this thread.
+ u64 epoch0;
+ u64 epoch1;
+
+ v3::Trace trace;
// Override superclass callbacks.
void OnDead() override;
@@ -328,21 +319,7 @@ struct Context {
Flags flags;
fd_t memprof_fd;
- // The last slot index (kFreeSid) is used to denote freed memory.
- TidSlot slots[kThreadSlotCount - 1];
-
- // Protects global_epoch, slot_queue, trace_part_recycle.
Mutex slot_mtx;
- // Prevents lock order inversions when we lock more than 1 slot.
- Mutex multi_slot_mtx;
- uptr global_epoch; // guarded by slot_mtx and by all slot mutexes
- bool resetting; // global reset is in progress
- IList<TidSlot, &TidSlot::node> slot_queue GUARDED_BY(slot_mtx);
- IList<TraceHeader, &TraceHeader::global, TracePart> trace_part_recycle
- GUARDED_BY(slot_mtx);
- uptr trace_part_total_allocated GUARDED_BY(slot_mtx);
- uptr trace_part_recycle_finished GUARDED_BY(slot_mtx);
- uptr trace_part_finished_excess GUARDED_BY(slot_mtx);
};
extern Context *ctx; // The one and the only global runtime context.
@@ -371,13 +348,14 @@ uptr TagFromShadowStackFrame(uptr pc);
class ScopedReportBase {
public:
- void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, Tid tid,
- StackTrace stack, const MutexSet *mset);
+ void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack,
+ const MutexSet *mset);
void AddStack(StackTrace stack, bool suppressable = false);
void AddThread(const ThreadContext *tctx, bool suppressable = false);
- void AddThread(Tid tid, bool suppressable = false);
+ void AddThread(Tid unique_tid, bool suppressable = false);
void AddUniqueTid(Tid unique_tid);
- int AddMutex(uptr addr, StackID creation_stack_id);
+ void AddMutex(const SyncVar *s);
+ u64 AddMutex(u64 id);
void AddLocation(uptr addr, uptr size);
void AddSleep(StackID stack_id);
void SetCount(int count);
@@ -394,6 +372,8 @@ class ScopedReportBase {
// at best it will cause deadlocks on internal mutexes.
ScopedIgnoreInterceptors ignore_interceptors_;
+ void AddDeadMutex(u64 id);
+
ScopedReportBase(const ScopedReportBase &) = delete;
void operator=(const ScopedReportBase &) = delete;
};
@@ -409,6 +389,8 @@ class ScopedReport : public ScopedReportBase {
bool ShouldReport(ThreadState *thr, ReportType typ);
ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
+void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
+ MutexSet *mset, uptr *tag = nullptr);
// The stack could look like:
// <start> | <main> | <foo> | tag | <bar>
@@ -456,8 +438,7 @@ void ForkBefore(ThreadState *thr, uptr pc);
void ForkParentAfter(ThreadState *thr, uptr pc);
void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread);
-void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
- AccessType typ);
+void ReportRace(ThreadState *thr);
bool OutputReport(ThreadState *thr, const ScopedReport &srep);
bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
bool IsExpectedReport(uptr addr, uptr size);
@@ -487,28 +468,55 @@ int Finalize(ThreadState *thr);
void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write);
void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write);
-void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
- AccessType typ);
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+ int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic);
+void MemoryAccessImpl(ThreadState *thr, uptr addr,
+ int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
+ u64 *shadow_mem, Shadow cur);
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
+ uptr size, bool is_write);
void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
AccessType typ);
-// This creates 2 non-inlined specialized versions of MemoryAccessRange.
-template <bool is_read>
-void MemoryAccessRangeT(ThreadState *thr, uptr pc, uptr addr, uptr size);
+
+const int kSizeLog1 = 0;
+const int kSizeLog2 = 1;
+const int kSizeLog4 = 2;
+const int kSizeLog8 = 3;
ALWAYS_INLINE
-void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
- bool is_write) {
- if (size == 0)
- return;
- if (is_write)
- MemoryAccessRangeT<false>(thr, pc, addr, size);
- else
- MemoryAccessRangeT<true>(thr, pc, addr, size);
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ) {
+ int size_log;
+ switch (size) {
+ case 1:
+ size_log = kSizeLog1;
+ break;
+ case 2:
+ size_log = kSizeLog2;
+ break;
+ case 4:
+ size_log = kSizeLog4;
+ break;
+ default:
+ DCHECK_EQ(size, 8);
+ size_log = kSizeLog8;
+ break;
+ }
+ bool is_write = !(typ & kAccessRead);
+ bool is_atomic = typ & kAccessAtomic;
+ if (typ & kAccessVptr)
+ thr->is_vptr_access = true;
+ if (typ & kAccessFree)
+ thr->is_freeing = true;
+ MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic);
+ if (typ & kAccessVptr)
+ thr->is_vptr_access = false;
+ if (typ & kAccessFree)
+ thr->is_freeing = false;
}
-void ShadowSet(RawShadow *p, RawShadow *end, RawShadow v);
-void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
uptr size);
@@ -518,6 +526,9 @@ void ThreadIgnoreEnd(ThreadState *thr);
void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
void ThreadIgnoreSyncEnd(ThreadState *thr);
+void FuncEntry(ThreadState *thr, uptr pc);
+void FuncExit(ThreadState *thr);
+
Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
ThreadType thread_type);
@@ -563,7 +574,11 @@ void Release(ThreadState *thr, uptr pc, uptr addr);
void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr);
void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
void AfterSleep(ThreadState *thr, uptr pc);
-void IncrementEpoch(ThreadState *thr);
+void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
// The hacky call uses custom calling convention and an assembly thunk.
// It is considerably faster that a normal call for the caller
@@ -586,19 +601,43 @@ void IncrementEpoch(ThreadState *thr);
#define HACKY_CALL(f) f()
#endif
+void TraceSwitch(ThreadState *thr);
+uptr TraceTopPC(ThreadState *thr);
+uptr TraceSize();
+uptr TraceParts();
+Trace *ThreadTrace(Tid tid);
+
+extern "C" void __tsan_trace_switch();
+void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
+ EventType typ, u64 addr) {
+ if (!kCollectHistory)
+ return;
+ // TraceSwitch accesses shadow_stack, but it's called infrequently,
+ // so we check it here proactively.
+ DCHECK(thr->shadow_stack);
+ DCHECK_GE((int)typ, 0);
+ DCHECK_LE((int)typ, 7);
+ DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
+ u64 pos = fs.GetTracePos();
+ if (UNLIKELY((pos % kTracePartSize) == 0)) {
+#if !SANITIZER_GO
+ HACKY_CALL(__tsan_trace_switch);
+#else
+ TraceSwitch(thr);
+#endif
+ }
+ Event *trace = (Event*)GetThreadTrace(fs.tid());
+ Event *evp = &trace[pos];
+ Event ev = (u64)addr | ((u64)typ << kEventPCBits);
+ *evp = ev;
+}
+
#if !SANITIZER_GO
uptr ALWAYS_INLINE HeapEnd() {
return HeapMemEnd() + PrimaryAllocator::AdditionalSize();
}
#endif
-void SlotAttachAndLock(ThreadState *thr) ACQUIRE(thr->slot->mtx);
-void SlotDetach(ThreadState *thr);
-void SlotLock(ThreadState *thr) ACQUIRE(thr->slot->mtx);
-void SlotUnlock(ThreadState *thr) RELEASE(thr->slot->mtx);
-void DoReset(ThreadState *thr, uptr epoch);
-void FlushShadowMemory();
-
ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags);
void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber);
void FiberSwitch(ThreadState *thr, uptr pc, ThreadState *fiber, unsigned flags);
@@ -609,53 +648,6 @@ enum FiberSwitchFlags {
FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync
};
-class SlotPairLocker {
- public:
- SlotPairLocker(ThreadState *thr, Sid sid);
- ~SlotPairLocker();
-
- private:
- ThreadState *thr_;
- TidSlot *slot_;
-};
-
-class SlotLocker {
- public:
- ALWAYS_INLINE
- SlotLocker(ThreadState *thr, bool recursive = false)
- : thr_(thr), locked_(recursive ? thr->slot_locked : false) {
- if (!locked_)
- SlotLock(thr_);
- }
-
- ALWAYS_INLINE
- ~SlotLocker() {
- if (!locked_)
- SlotUnlock(thr_);
- }
-
- private:
- ThreadState *thr_;
- bool locked_;
-};
-
-class SlotUnlocker {
- public:
- SlotUnlocker(ThreadState *thr) : thr_(thr), locked_(thr->slot_locked) {
- if (locked_)
- SlotUnlock(thr_);
- }
-
- ~SlotUnlocker() {
- if (locked_)
- SlotLock(thr_);
- }
-
- private:
- ThreadState *thr_;
- bool locked_;
-};
-
ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) {
if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals)))
ProcessPendingSignalsImpl(thr);
@@ -674,19 +666,16 @@ void LazyInitialize(ThreadState *thr) {
#endif
}
-void TraceResetForTesting();
+namespace v3 {
+
void TraceSwitchPart(ThreadState *thr);
-void TraceSwitchPartImpl(ThreadState *thr);
-bool RestoreStack(EventType type, Sid sid, Epoch epoch, uptr addr, uptr size,
- AccessType typ, Tid *ptid, VarSizeStackTrace *pstk,
+bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
+ uptr size, AccessType typ, VarSizeStackTrace *pstk,
MutexSet *pmset, uptr *ptag);
template <typename EventT>
ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr,
EventT **ev) {
- // TraceSwitchPart accesses shadow_stack, but it's called infrequently,
- // so we check it here proactively.
- DCHECK(thr->shadow_stack);
Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
#if SANITIZER_DEBUG
// TraceSwitch acquires these mutexes,
@@ -757,16 +746,20 @@ void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
void TraceMutexUnlock(ThreadState *thr, uptr addr);
void TraceTime(ThreadState *thr);
-void TraceRestartFuncExit(ThreadState *thr);
-void TraceRestartFuncEntry(ThreadState *thr, uptr pc);
+} // namespace v3
void GrowShadowStack(ThreadState *thr);
ALWAYS_INLINE
void FuncEntry(ThreadState *thr, uptr pc) {
- DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.sid(), (void *)pc);
- if (UNLIKELY(!TryTraceFunc(thr, pc)))
- return TraceRestartFuncEntry(thr, pc);
+ DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc);
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
+ }
+
+ // Shadow stack maintenance can be replaced with
+ // stack unwinding during trace switch (which presumably must be faster).
DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -780,9 +773,12 @@ void FuncEntry(ThreadState *thr, uptr pc) {
ALWAYS_INLINE
void FuncExit(ThreadState *thr) {
- DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.sid());
- if (UNLIKELY(!TryTraceFunc(thr, 0)))
- return TraceRestartFuncExit(thr);
+ DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
+ }
+
DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
#if !SANITIZER_GO
DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
@@ -794,6 +790,7 @@ void FuncExit(ThreadState *thr) {
extern void (*on_initialize)(void);
extern int (*on_finalize)(int);
#endif
+
} // namespace __tsan
#endif // TSAN_RTL_H
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
index 76e269e2ed2a..7365fdaa3038 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp
@@ -15,13 +15,15 @@
namespace __tsan {
-ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
+namespace v3 {
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
uptr addr, uptr size,
AccessType typ) {
DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
if (!kCollectHistory)
return true;
- EventAccess* ev;
+ EventAccess *ev;
if (UNLIKELY(!TraceAcquire(thr, &ev)))
return false;
u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
@@ -38,27 +40,25 @@ ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
TraceRelease(thr, ev);
return true;
}
- auto* evex = reinterpret_cast<EventAccessExt*>(ev);
+ auto *evex = reinterpret_cast<EventAccessExt *>(ev);
evex->is_access = 0;
evex->is_func = 0;
evex->type = EventType::kAccessExt;
evex->is_read = !!(typ & kAccessRead);
evex->is_atomic = !!(typ & kAccessAtomic);
evex->size_log = size_log;
- // Note: this is important, see comment in EventAccessExt.
- evex->_ = 0;
evex->addr = CompressAddr(addr);
evex->pc = pc;
TraceRelease(thr, evex);
return true;
}
-ALWAYS_INLINE
-bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
- AccessType typ) {
+ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
+ uptr addr, uptr size,
+ AccessType typ) {
if (!kCollectHistory)
return true;
- EventAccessRange* ev;
+ EventAccessRange *ev;
if (UNLIKELY(!TraceAcquire(thr, &ev)))
return false;
thr->trace_prev_pc = pc;
@@ -75,7 +75,7 @@ bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
return true;
}
-void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
+void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
AccessType typ) {
if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
return;
@@ -84,7 +84,7 @@ void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
DCHECK(res);
}
-void TraceFunc(ThreadState* thr, uptr pc) {
+void TraceFunc(ThreadState *thr, uptr pc) {
if (LIKELY(TryTraceFunc(thr, pc)))
return;
TraceSwitchPart(thr);
@@ -92,17 +92,7 @@ void TraceFunc(ThreadState* thr, uptr pc) {
DCHECK(res);
}
-NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
- TraceSwitchPart(thr);
- FuncEntry(thr, pc);
-}
-
-NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
- TraceSwitchPart(thr);
- FuncExit(thr);
-}
-
-void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
+void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
StackID stk) {
DCHECK(type == EventType::kLock || type == EventType::kRLock);
if (!kCollectHistory)
@@ -119,7 +109,7 @@ void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
TraceEvent(thr, ev);
}
-void TraceMutexUnlock(ThreadState* thr, uptr addr) {
+void TraceMutexUnlock(ThreadState *thr, uptr addr) {
if (!kCollectHistory)
return;
EventUnlock ev;
@@ -131,485 +121,396 @@ void TraceMutexUnlock(ThreadState* thr, uptr addr) {
TraceEvent(thr, ev);
}
-void TraceTime(ThreadState* thr) {
+void TraceTime(ThreadState *thr) {
if (!kCollectHistory)
return;
- FastState fast_state = thr->fast_state;
EventTime ev;
ev.is_access = 0;
ev.is_func = 0;
ev.type = EventType::kTime;
- ev.sid = static_cast<u64>(fast_state.sid());
- ev.epoch = static_cast<u64>(fast_state.epoch());
+ ev.sid = static_cast<u64>(thr->sid);
+ ev.epoch = static_cast<u64>(thr->epoch);
ev._ = 0;
TraceEvent(thr, ev);
}
-ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) {
- return static_cast<RawShadow>(
- atomic_load((atomic_uint32_t*)p, memory_order_relaxed));
-}
+} // namespace v3
-ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) {
- atomic_store((atomic_uint32_t*)sp, static_cast<u32>(s), memory_order_relaxed);
+ALWAYS_INLINE
+Shadow LoadShadow(u64 *p) {
+ u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed);
+ return Shadow(raw);
}
-NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
- Shadow old,
- AccessType typ) NO_THREAD_SAFETY_ANALYSIS {
- // For the free shadow markers the first element (that contains kFreeSid)
- // triggers the race, but the second element contains info about the freeing
- // thread, take it.
- if (old.sid() == kFreeSid)
- old = Shadow(LoadShadow(&shadow_mem[1]));
- // This prevents trapping on this address in future.
- for (uptr i = 0; i < kShadowCnt; i++)
- StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
- // See the comment in MemoryRangeFreed as to why the slot is locked
- // for free memory accesses. ReportRace must not be called with
- // the slot locked because of the fork. But MemoryRangeFreed is not
- // called during fork because fork sets ignore_reads_and_writes,
- // so simply unlocking the slot should be fine.
- if (typ & kAccessFree)
- SlotUnlock(thr);
- ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
- if (typ & kAccessFree)
- SlotLock(thr);
+ALWAYS_INLINE
+void StoreShadow(u64 *sp, u64 s) {
+ atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed);
}
-#if !TSAN_VECTORIZE
ALWAYS_INLINE
-bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
- AccessType typ) {
- for (uptr i = 0; i < kShadowCnt; i++) {
- auto old = LoadShadow(&s[i]);
- if (!(typ & kAccessRead)) {
- if (old == cur.raw())
- return true;
- continue;
- }
- auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
- static_cast<u32>(Shadow::kRodata));
- if (masked == cur.raw())
- return true;
- if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
- if (old == Shadow::kRodata)
- return true;
- }
- }
- return false;
+void StoreIfNotYetStored(u64 *sp, u64 *s) {
+ StoreShadow(sp, *s);
+ *s = 0;
}
+extern "C" void __tsan_report_race();
+
ALWAYS_INLINE
-bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
- int unused0, int unused1, AccessType typ) {
- bool stored = false;
- for (uptr idx = 0; idx < kShadowCnt; idx++) {
- RawShadow* sp = &shadow_mem[idx];
- Shadow old(LoadShadow(sp));
- if (LIKELY(old.raw() == Shadow::kEmpty)) {
- if (!(typ & kAccessCheckOnly) && !stored)
- StoreShadow(sp, cur.raw());
- return false;
- }
- if (LIKELY(!(cur.access() & old.access())))
- continue;
- if (LIKELY(cur.sid() == old.sid())) {
- if (!(typ & kAccessCheckOnly) &&
- LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
- StoreShadow(sp, cur.raw());
- stored = true;
- }
- continue;
- }
- if (LIKELY(old.IsBothReadsOrAtomic(typ)))
- continue;
- if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
- continue;
- DoReportRace(thr, shadow_mem, cur, old, typ);
- return true;
- }
- // We did not find any races and had already stored
- // the current access info, so we are done.
- if (LIKELY(stored))
- return false;
- // Choose a random candidate slot and replace it.
- uptr index =
- atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
- StoreShadow(&shadow_mem[index], cur.raw());
- return false;
+void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) {
+ thr->racy_state[0] = cur.raw();
+ thr->racy_state[1] = old.raw();
+ thr->racy_shadow_addr = shadow_mem;
+#if !SANITIZER_GO
+ HACKY_CALL(__tsan_report_race);
+#else
+ ReportRace(thr);
+#endif
}
-# define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
-
-#else /* !TSAN_VECTORIZE */
+static inline bool HappensBefore(Shadow old, ThreadState *thr) {
+ return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
+}
ALWAYS_INLINE
-bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
- m128 access, AccessType typ) {
- // Note: we could check if there is a larger access of the same type,
- // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
- // and now do smaller reads/writes, these can also be considered as "same
- // access". However, it will make the check more expensive, so it's unclear
- // if it's worth it. But this would conserve trace space, so it's useful
- // besides potential speed up.
- if (!(typ & kAccessRead)) {
- const m128 same = _mm_cmpeq_epi32(shadow, access);
- return _mm_movemask_epi8(same);
+void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog,
+ bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem,
+ Shadow cur) {
+ // This potentially can live in an MMX/SSE scratch register.
+ // The required intrinsics are:
+ // __m128i _mm_move_epi64(__m128i*);
+ // _mm_storel_epi64(u64*, __m128i);
+ u64 store_word = cur.raw();
+ bool stored = false;
+
+ // scan all the shadow values and dispatch to 4 categories:
+ // same, replace, candidate and race (see comments below).
+ // we consider only 3 cases regarding access sizes:
+ // equal, intersect and not intersect. initially I considered
+ // larger and smaller as well, it allowed to replace some
+ // 'candidates' with 'same' or 'replace', but I think
+ // it's just not worth it (performance- and complexity-wise).
+
+ Shadow old(0);
+
+ // It release mode we manually unroll the loop,
+ // because empirically gcc generates better code this way.
+ // However, we can't afford unrolling in debug mode, because the function
+ // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
+ // threads, which is not enough for the unrolled loop.
+#if SANITIZER_DEBUG
+ for (int idx = 0; idx < 4; idx++) {
+# include "tsan_update_shadow_word.inc"
+ }
+#else
+ int idx = 0;
+# include "tsan_update_shadow_word.inc"
+ idx = 1;
+ if (stored) {
+# include "tsan_update_shadow_word.inc"
+ } else {
+# include "tsan_update_shadow_word.inc"
}
- // For reads we need to reset read bit in the shadow,
- // because we need to match read with both reads and writes.
- // Shadow::kRodata has only read bit set, so it does what we want.
- // We also abuse it for rodata check to save few cycles
- // since we already loaded Shadow::kRodata into a register.
- // Reads from rodata can't race.
- // Measurements show that they can be 10-20% of all memory accesses.
- // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
- // (thread epochs start from 1). So the same read bit mask
- // serves as rodata indicator.
- const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
- const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
- m128 same = _mm_cmpeq_epi32(masked_shadow, access);
- // Range memory accesses check Shadow::kRodata before calling this,
- // Shadow::kRodatas is not possible for free memory access
- // and Go does not use Shadow::kRodata.
- if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
- const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
- same = _mm_or_si128(ro, same);
+ idx = 2;
+ if (stored) {
+# include "tsan_update_shadow_word.inc"
+ } else {
+# include "tsan_update_shadow_word.inc"
}
- return _mm_movemask_epi8(same);
-}
+ idx = 3;
+ if (stored) {
+# include "tsan_update_shadow_word.inc"
+ } else {
+# include "tsan_update_shadow_word.inc"
+ }
+#endif
-NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
- u32 race_mask, m128 shadow, AccessType typ) {
- // race_mask points which of the shadow elements raced with the current
- // access. Extract that element.
- CHECK_NE(race_mask, 0);
- u32 old;
- // Note: _mm_extract_epi32 index must be a constant value.
- switch (__builtin_ffs(race_mask) / 4) {
- case 0:
- old = _mm_extract_epi32(shadow, 0);
- break;
- case 1:
- old = _mm_extract_epi32(shadow, 1);
- break;
- case 2:
- old = _mm_extract_epi32(shadow, 2);
- break;
- case 3:
- old = _mm_extract_epi32(shadow, 3);
- break;
+ // we did not find any races and had already stored
+ // the current access info, so we are done
+ if (LIKELY(stored))
+ return;
+ // choose a random candidate slot and replace it
+ StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
+ return;
+RACE:
+ HandleRace(thr, shadow_mem, cur, old);
+ return;
+}
+
+void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ AccessType typ) {
+ DCHECK(!(typ & kAccessAtomic));
+ const bool kAccessIsWrite = !(typ & kAccessRead);
+ const bool kIsAtomic = false;
+ while (size) {
+ int size1 = 1;
+ int kAccessSizeLog = kSizeLog1;
+ if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
+ size1 = 8;
+ kAccessSizeLog = kSizeLog8;
+ } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
+ size1 = 4;
+ kAccessSizeLog = kSizeLog4;
+ } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
+ size1 = 2;
+ kAccessSizeLog = kSizeLog2;
+ }
+ MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
+ addr += size1;
+ size -= size1;
}
- Shadow prev(static_cast<RawShadow>(old));
- // For the free shadow markers the first element (that contains kFreeSid)
- // triggers the race, but the second element contains info about the freeing
- // thread, take it.
- if (prev.sid() == kFreeSid)
- prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
- DoReportRace(thr, shadow_mem, cur, prev, typ);
}
ALWAYS_INLINE
-bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
- m128 shadow, m128 access, AccessType typ) {
- // Note: empty/zero slots don't intersect with any access.
- const m128 zero = _mm_setzero_si128();
- const m128 mask_access = _mm_set1_epi32(0x000000ff);
- const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
- const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
- const m128 access_and = _mm_and_si128(access, shadow);
- const m128 access_xor = _mm_xor_si128(access, shadow);
- const m128 intersect = _mm_and_si128(access_and, mask_access);
- const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
- const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
- const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
- const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
- const m128 no_race =
- _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
- const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
- if (UNLIKELY(race_mask))
- goto SHARED;
-
-STORE : {
- if (typ & kAccessCheckOnly)
- return false;
- // We could also replace different sid's if access is the same,
- // rw weaker and happens before. However, just checking access below
- // is not enough because we also need to check that !both_read_or_atomic
- // (reads from different sids can be concurrent).
- // Theoretically we could replace smaller accesses with larger accesses,
- // but it's unclear if it's worth doing.
- const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
- const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
- const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
- const m128 access_read_atomic =
- _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
- const m128 rw_weaker =
- _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
- const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
- const int rewrite_mask = _mm_movemask_epi8(rewrite);
- int index = __builtin_ffs(rewrite_mask);
- if (UNLIKELY(index == 0)) {
- const m128 empty = _mm_cmpeq_epi32(shadow, zero);
- const int empty_mask = _mm_movemask_epi8(empty);
- index = __builtin_ffs(empty_mask);
- if (UNLIKELY(index == 0))
- index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
+bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+ Shadow cur(a);
+ for (uptr i = 0; i < kShadowCnt; i++) {
+ Shadow old(LoadShadow(&s[i]));
+ if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
+ old.TidWithIgnore() == cur.TidWithIgnore() &&
+ old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() &&
+ old.IsRead() <= cur.IsRead())
+ return true;
}
- StoreShadow(&shadow_mem[index / 4], cur.raw());
- // We could zero other slots determined by rewrite_mask.
- // That would help other threads to evict better slots,
- // but it's unclear if it's worth it.
return false;
}
-SHARED:
- m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
- // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
- // indexes must be constants.
-# define LOAD_EPOCH(idx) \
- if (LIKELY(race_mask & (1 << (idx * 4)))) { \
- u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \
- u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \
- thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
- }
- LOAD_EPOCH(0);
- LOAD_EPOCH(1);
- LOAD_EPOCH(2);
- LOAD_EPOCH(3);
-# undef LOAD_EPOCH
- const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
- const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
- const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
- const int concurrent_mask = _mm_movemask_epi8(concurrent);
- if (LIKELY(concurrent_mask == 0))
- goto STORE;
-
- DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
- return true;
+#if TSAN_VECTORIZE
+# define SHUF(v0, v1, i0, i1, i2, i3) \
+ _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
+ _mm_castsi128_ps(v1), \
+ (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
+ALWAYS_INLINE
+bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+ // This is an optimized version of ContainsSameAccessSlow.
+ // load current access into access[0:63]
+ const m128 access = _mm_cvtsi64_si128(a);
+ // duplicate high part of access in addr0:
+ // addr0[0:31] = access[32:63]
+ // addr0[32:63] = access[32:63]
+ // addr0[64:95] = access[32:63]
+ // addr0[96:127] = access[32:63]
+ const m128 addr0 = SHUF(access, access, 1, 1, 1, 1);
+ // load 4 shadow slots
+ const m128 shadow0 = _mm_load_si128((__m128i *)s);
+ const m128 shadow1 = _mm_load_si128((__m128i *)s + 1);
+ // load high parts of 4 shadow slots into addr_vect:
+ // addr_vect[0:31] = shadow0[32:63]
+ // addr_vect[32:63] = shadow0[96:127]
+ // addr_vect[64:95] = shadow1[32:63]
+ // addr_vect[96:127] = shadow1[96:127]
+ m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3);
+ if (!is_write) {
+ // set IsRead bit in addr_vect
+ const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15);
+ const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
+ addr_vect = _mm_or_si128(addr_vect, rw_mask);
+ }
+ // addr0 == addr_vect?
+ const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect);
+ // epoch1[0:63] = sync_epoch
+ const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch);
+ // epoch[0:31] = sync_epoch[0:31]
+ // epoch[32:63] = sync_epoch[0:31]
+ // epoch[64:95] = sync_epoch[0:31]
+ // epoch[96:127] = sync_epoch[0:31]
+ const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0);
+ // load low parts of shadow cell epochs into epoch_vect:
+ // epoch_vect[0:31] = shadow0[0:31]
+ // epoch_vect[32:63] = shadow0[64:95]
+ // epoch_vect[64:95] = shadow1[0:31]
+ // epoch_vect[96:127] = shadow1[64:95]
+ const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
+ // epoch_vect >= sync_epoch?
+ const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch);
+ // addr_res & epoch_res
+ const m128 res = _mm_and_si128(addr_res, epoch_res);
+ // mask[0] = res[7]
+ // mask[1] = res[15]
+ // ...
+ // mask[15] = res[127]
+ const int mask = _mm_movemask_epi8(res);
+ return mask != 0;
}
-
-# define LOAD_CURRENT_SHADOW(cur, shadow_mem) \
- const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
- const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
#endif
-char* DumpShadow(char* buf, RawShadow raw) {
- if (raw == Shadow::kEmpty) {
- internal_snprintf(buf, 64, "0");
- return buf;
- }
- Shadow s(raw);
- AccessType typ;
- s.GetAccess(nullptr, nullptr, &typ);
- internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
- static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
- s.access(), static_cast<u32>(typ));
- return buf;
+ALWAYS_INLINE
+bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+#if TSAN_VECTORIZE
+ bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
+ // NOTE: this check can fail if the shadow is concurrently mutated
+ // by other threads. But it still can be useful if you modify
+ // ContainsSameAccessFast and want to ensure that it's not completely broken.
+ // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
+ return res;
+#else
+ return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+#endif
}
-// TryTrace* and TraceRestart* functions allow to turn memory access and func
-// entry/exit callbacks into leaf functions with all associated performance
-// benefits. These hottest callbacks do only 2 slow path calls: report a race
-// and trace part switching. Race reporting is easy to turn into a tail call, we
-// just always return from the runtime after reporting a race. But trace part
-// switching is harder because it needs to be in the middle of callbacks. To
-// turn it into a tail call we immidiately return after TraceRestart* functions,
-// but TraceRestart* functions themselves recurse into the callback after
-// switching trace part. As the result the hottest callbacks contain only tail
-// calls, which effectively makes them leaf functions (can use all registers,
-// no frame setup, etc).
-NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
- uptr size, AccessType typ) {
- TraceSwitchPart(thr);
- MemoryAccess(thr, pc, addr, size, typ);
-}
+ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+ int kAccessSizeLog, bool kAccessIsWrite,
+ bool kIsAtomic) {
+ RawShadow *shadow_mem = MemToShadow(addr);
+ DPrintf2(
+ "#%d: MemoryAccess: @%p %p size=%d"
+ " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
+ (int)thr->fast_state.tid(), (void *)pc, (void *)addr,
+ (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
+ (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2],
+ (uptr)shadow_mem[3]);
+#if SANITIZER_DEBUG
+ if (!IsAppMem(addr)) {
+ Printf("Access to non app mem %zx\n", addr);
+ DCHECK(IsAppMem(addr));
+ }
+ if (!IsShadowMem(shadow_mem)) {
+ Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+ DCHECK(IsShadowMem(shadow_mem));
+ }
+#endif
-ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
- uptr size, AccessType typ) {
- RawShadow* shadow_mem = MemToShadow(addr);
- UNUSED char memBuf[4][64];
- DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
- static_cast<int>(thr->fast_state.sid()),
- static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
- static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
- DumpShadow(memBuf[1], shadow_mem[1]),
- DumpShadow(memBuf[2], shadow_mem[2]),
- DumpShadow(memBuf[3], shadow_mem[3]));
+ if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
+ // Access to .rodata section, no races here.
+ // Measurements show that it can be 10-20% of all memory accesses.
+ return;
+ }
FastState fast_state = thr->fast_state;
- Shadow cur(fast_state, addr, size, typ);
-
- LOAD_CURRENT_SHADOW(cur, shadow_mem);
- if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
+ if (UNLIKELY(fast_state.GetIgnoreBit())) {
return;
- if (UNLIKELY(fast_state.GetIgnoreBit()))
- return;
- if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
- return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
- CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
-}
+ }
-NOINLINE
-void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
- uptr size, AccessType typ) {
- TraceSwitchPart(thr);
- UnalignedMemoryAccess(thr, pc, addr, size, typ);
-}
+ Shadow cur(fast_state);
+ cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
+ cur.SetWrite(kAccessIsWrite);
+ cur.SetAtomic(kIsAtomic);
-ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
- uptr addr, uptr size,
- AccessType typ) {
- DCHECK_LE(size, 8);
- FastState fast_state = thr->fast_state;
- if (UNLIKELY(fast_state.GetIgnoreBit()))
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+ kAccessIsWrite))) {
return;
- RawShadow* shadow_mem = MemToShadow(addr);
- bool traced = false;
- uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
- {
- Shadow cur(fast_state, addr, size1, typ);
- LOAD_CURRENT_SHADOW(cur, shadow_mem);
- if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
- goto SECOND;
- if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
- return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
- traced = true;
- if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
- return;
}
-SECOND:
- uptr size2 = size - size1;
- if (LIKELY(size2 == 0))
- return;
- shadow_mem += kShadowCnt;
- Shadow cur(fast_state, 0, size2, typ);
- LOAD_CURRENT_SHADOW(cur, shadow_mem);
- if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
- return;
- if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
- return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
- CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
+
+ if (kCollectHistory) {
+ fast_state.IncrementEpoch();
+ thr->fast_state = fast_state;
+ TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+ cur.IncrementEpoch();
+ }
+
+ MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+ shadow_mem, cur);
}
-void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
- DCHECK_LE(p, end);
- DCHECK(IsShadowMem(p));
- DCHECK(IsShadowMem(end));
- UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
- DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
- DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
-#if !TSAN_VECTORIZE
- for (; p < end; p += kShadowCnt) {
- p[0] = v;
- for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
+// Called by MemoryAccessRange in tsan_rtl_thread.cpp
+ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr,
+ int kAccessSizeLog,
+ bool kAccessIsWrite, bool kIsAtomic,
+ u64 *shadow_mem, Shadow cur) {
+ if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+ kAccessIsWrite))) {
+ return;
}
-#else
- m128 vv = _mm_setr_epi32(
- static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
- static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
- m128* vp = reinterpret_cast<m128*>(p);
- m128* vend = reinterpret_cast<m128*>(end);
- for (; vp < vend; vp++) _mm_store_si128(vp, vv);
-#endif
+
+ MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+ shadow_mem, cur);
}
-static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
+static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ u64 val) {
+ (void)thr;
+ (void)pc;
if (size == 0)
return;
- DCHECK_EQ(addr % kShadowCell, 0);
- DCHECK_EQ(size % kShadowCell, 0);
+ // FIXME: fix me.
+ uptr offset = addr % kShadowCell;
+ if (offset) {
+ offset = kShadowCell - offset;
+ if (size <= offset)
+ return;
+ addr += offset;
+ size -= offset;
+ }
+ DCHECK_EQ(addr % 8, 0);
// If a user passes some insane arguments (memset(0)),
// let it just crash as usual.
if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
return;
- RawShadow* begin = MemToShadow(addr);
- RawShadow* end = begin + size / kShadowCell * kShadowCnt;
// Don't want to touch lots of shadow memory.
// If a program maps 10MB stack, there is no need reset the whole range.
+ size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
// UnmapOrDie/MmapFixedNoReserve does not work on Windows.
- if (SANITIZER_WINDOWS ||
- size <= common_flags()->clear_shadow_mmap_threshold) {
- ShadowSet(begin, end, val);
- return;
- }
- // The region is big, reset only beginning and end.
- const uptr kPageSize = GetPageSizeCached();
- // Set at least first kPageSize/2 to page boundary.
- RawShadow* mid1 =
- Min(end, reinterpret_cast<RawShadow*>(RoundUp(
- reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
- ShadowSet(begin, mid1, val);
- // Reset middle part.
- RawShadow* mid2 = RoundDown(end, kPageSize);
- if (mid2 > mid1) {
- if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
+ if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
+ RawShadow *p = MemToShadow(addr);
+ CHECK(IsShadowMem(p));
+ CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1));
+ // FIXME: may overwrite a part outside the region
+ for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
+ p[i++] = val;
+ for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0;
+ }
+ } else {
+ // The region is big, reset only beginning and end.
+ const uptr kPageSize = GetPageSizeCached();
+ RawShadow *begin = MemToShadow(addr);
+ RawShadow *end = begin + size / kShadowCell * kShadowCnt;
+ RawShadow *p = begin;
+ // Set at least first kPageSize/2 to page boundary.
+ while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
+ *p++ = val;
+ for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+ }
+ // Reset middle part.
+ RawShadow *p1 = p;
+ p = RoundDown(end, kPageSize);
+ if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
Die();
+ // Set the ending.
+ while (p < end) {
+ *p++ = val;
+ for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+ }
}
- // Set the ending.
- ShadowSet(mid2, end, val);
}
-void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
- uptr addr1 = RoundDown(addr, kShadowCell);
- uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
- MemoryRangeSet(addr1, size1, Shadow::kEmpty);
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+ MemoryRangeSet(thr, pc, addr, size, 0);
}
-void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
- // Callers must lock the slot to ensure synchronization with the reset.
- // The problem with "freed" memory is that it's not "monotonic"
- // with respect to bug detection: freed memory is bad to access,
- // but then if the heap block is reallocated later, it's good to access.
- // As the result a garbage "freed" shadow can lead to a false positive
- // if it happens to match a real free in the thread trace,
- // but the heap block was reallocated before the current memory access,
- // so it's still good to access. It's not the case with data races.
- DCHECK(thr->slot_locked);
- DCHECK_EQ(addr % kShadowCell, 0);
- size = RoundUp(size, kShadowCell);
- // Processing more than 1k (2k of shadow) is expensive,
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+ // Processing more than 1k (4k of shadow) is expensive,
// can cause excessive memory consumption (user does not necessary touch
// the whole range) and most likely unnecessary.
- size = Min<uptr>(size, 1024);
- const AccessType typ =
- kAccessWrite | kAccessFree | kAccessCheckOnly | kAccessNoRodata;
- TraceMemoryAccessRange(thr, pc, addr, size, typ);
- RawShadow* shadow_mem = MemToShadow(addr);
- Shadow cur(thr->fast_state, 0, kShadowCell, typ);
-#if TSAN_VECTORIZE
- const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
- const m128 freed = _mm_setr_epi32(
- static_cast<u32>(Shadow::FreedMarker()),
- static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
- for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
- const m128 shadow = _mm_load_si128((m128*)shadow_mem);
- if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
- return;
- _mm_store_si128((m128*)shadow_mem, freed);
+ if (size > 1024)
+ size = 1024;
+ CHECK_EQ(thr->is_freeing, false);
+ thr->is_freeing = true;
+ MemoryAccessRange(thr, pc, addr, size, true);
+ thr->is_freeing = false;
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
}
-#else
- for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
- if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
- return;
- StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
- StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
- StoreShadow(&shadow_mem[2], Shadow::kEmpty);
- StoreShadow(&shadow_mem[3], Shadow::kEmpty);
+ Shadow s(thr->fast_state);
+ s.ClearIgnoreBit();
+ s.MarkAsFreed();
+ s.SetWrite(true);
+ s.SetAddr0AndSizeLog(0, 3);
+ MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+ if (kCollectHistory) {
+ thr->fast_state.IncrementEpoch();
+ TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
}
-#endif
-}
-
-void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
- DCHECK_EQ(addr % kShadowCell, 0);
- size = RoundUp(size, kShadowCell);
- TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
- Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
- MemoryRangeSet(addr, size, cur.raw());
+ Shadow s(thr->fast_state);
+ s.ClearIgnoreBit();
+ s.SetWrite(true);
+ s.SetAddr0AndSizeLog(0, 3);
+ MemoryRangeSet(thr, pc, addr, size, s.raw());
}
-void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
+void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
uptr size) {
if (thr->ignore_reads_and_writes == 0)
MemoryRangeImitateWrite(thr, pc, addr, size);
@@ -617,29 +518,14 @@ void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
MemoryResetRange(thr, pc, addr, size);
}
-ALWAYS_INLINE
-bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
- AccessType typ) {
- LOAD_CURRENT_SHADOW(cur, shadow_mem);
- if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
- return false;
- return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
-}
-
-template <bool is_read>
-NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
- uptr size) {
- TraceSwitchPart(thr);
- MemoryAccessRangeT<is_read>(thr, pc, addr, size);
-}
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+ bool is_write) {
+ if (size == 0)
+ return;
-template <bool is_read>
-void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
- const AccessType typ =
- (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
- RawShadow* shadow_mem = MemToShadow(addr);
- DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
- (void*)pc, (void*)addr, (int)size, is_read);
+ RawShadow *shadow_mem = MemToShadow(addr);
+ DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid,
+ (void *)pc, (void *)addr, (int)size, is_write);
#if SANITIZER_DEBUG
if (!IsAppMem(addr)) {
@@ -651,57 +537,65 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
DCHECK(IsAppMem(addr + size - 1));