Speed up and slightly simplify Mutex
Eliminate the separate seqentially consistent contention counter load
in Mutex::ExclusiveUnlock by putting the contenion counter in the
Mutex's state word.
Replace some CHECK_GE checks with CHECK_GT. We were checking quantities
intended to be non-negative against >= 0 just before decrementing them.
Remove a pointless volatile declaration.
Introduce constants for the first FUTEX_WAKE argument. Remove all uses
of -1 as that argument, everywhere in ART. It appears to work, but the
documentation says it's wrong.
This does not yet address the ReaderWriterMutex issue, which is handled
in a different way in a separate CL.
Benchmark runs with and without this CL weakly suggest a tiny, not
statistically significant, improvement in both time and space with this
CL.
Bug: 111835365
Test: Build and boot AOSP. TreeHugger.
Change-Id: Ie53c65f2ce774a8cb4d224e2c1b3a110eb880f0c
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 98e23b4..22b76d2 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -209,7 +209,7 @@
if (done && (cur_state - 1) == 0) { // Weak CAS may fail spuriously.
if (num_contenders_.load(std::memory_order_seq_cst) > 0) {
// Wake any exclusive waiters as there are now no readers.
- futex(state_.Address(), FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0);
+ futex(state_.Address(), FUTEX_WAKE_PRIVATE, kWakeAll, nullptr, nullptr, 0);
}
}
} else {
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index e965447..e9fa46c 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -321,8 +321,7 @@
Mutex::Mutex(const char* name, LockLevel level, bool recursive)
: BaseMutex(name, level), exclusive_owner_(0), recursion_count_(0), recursive_(recursive) {
#if ART_USE_FUTEXES
- DCHECK_EQ(0, state_.load(std::memory_order_relaxed));
- DCHECK_EQ(0, num_contenders_.load(std::memory_order_relaxed));
+ DCHECK_EQ(0, state_and_contenders_.load(std::memory_order_relaxed));
#else
CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
#endif
@@ -337,18 +336,14 @@
Mutex::~Mutex() {
bool safe_to_call_abort = Locks::IsSafeToCallAbortRacy();
#if ART_USE_FUTEXES
- if (state_.load(std::memory_order_relaxed) != 0) {
+ if (state_and_contenders_.load(std::memory_order_relaxed) != 0) {
LOG(safe_to_call_abort ? FATAL : WARNING)
- << "destroying mutex with owner: " << GetExclusiveOwnerTid();
+ << "destroying mutex with owner or contenders. Owner:" << GetExclusiveOwnerTid();
} else {
if (GetExclusiveOwnerTid() != 0) {
LOG(safe_to_call_abort ? FATAL : WARNING)
<< "unexpectedly found an owner on unlocked mutex " << name_;
}
- if (num_contenders_.load(std::memory_order_seq_cst) != 0) {
- LOG(safe_to_call_abort ? FATAL : WARNING)
- << "unexpectedly found a contender on mutex " << name_;
- }
}
#else
// We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
@@ -371,28 +366,34 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_.load(std::memory_order_relaxed);
- if (LIKELY(cur_state == 0)) {
- // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
- done = state_.CompareAndSetWeakAcquire(0 /* cur_state */, 1 /* new state */);
+ int32_t cur_state = state_and_contenders_.load(std::memory_order_relaxed);
+ if (LIKELY((cur_state & kHeldMask) == 0) /* lock not held */) {
+ done = state_and_contenders_.CompareAndSetWeakAcquire(cur_state, cur_state | kHeldMask);
} else {
// Failed to acquire, hang up.
ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
- num_contenders_++;
+ // Increment contender count. We can't create enough threads for this to overflow.
+ increment_contenders();
+ // Make cur_state again reflect the expected value of state_and_contenders.
+ cur_state += kContenderIncrement;
if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
self->CheckEmptyCheckpointFromMutex();
}
- if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, 1, nullptr, nullptr, 0) != 0) {
+ if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state,
+ nullptr, nullptr, 0) != 0) {
+ // We only went to sleep after incrementing and contenders and checking that the lock
+ // is still held by someone else.
// EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
// We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
if ((errno != EAGAIN) && (errno != EINTR)) {
PLOG(FATAL) << "futex wait failed for " << name_;
}
}
- num_contenders_--;
+ decrement_contenders();
}
} while (!done);
- DCHECK_EQ(state_.load(std::memory_order_relaxed), 1);
+ // Confirm that lock is now held.
+ DCHECK_NE(state_and_contenders_.load(std::memory_order_relaxed) & kHeldMask, 0);
#else
CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
#endif
@@ -417,15 +418,15 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_.load(std::memory_order_relaxed);
- if (cur_state == 0) {
- // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
- done = state_.CompareAndSetWeakAcquire(0 /* cur_state */, 1 /* new state */);
+ int32_t cur_state = state_and_contenders_.load(std::memory_order_relaxed);
+ if ((cur_state & kHeldMask) == 0) {
+ // Change state to held and impose load/store ordering appropriate for lock acquisition.
+ done = state_and_contenders_.CompareAndSetWeakAcquire(cur_state, cur_state | kHeldMask);
} else {
return false;
}
} while (!done);
- DCHECK_EQ(state_.load(std::memory_order_relaxed), 1);
+ DCHECK_NE(state_and_contenders_.load(std::memory_order_relaxed) & kHeldMask, 0);
#else
int result = pthread_mutex_trylock(&mutex_);
if (result == EBUSY) {
@@ -474,20 +475,22 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_.load(std::memory_order_relaxed);
- if (LIKELY(cur_state == 1)) {
+ int32_t cur_state = state_and_contenders_.load(std::memory_order_relaxed);
+ if (LIKELY((cur_state & kHeldMask) != 0)) {
// We're no longer the owner.
exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
- // Change state to 0 and impose load/store ordering appropriate for lock release.
- // Note, the relaxed loads below mustn't reorder before the CompareAndSet.
- // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
- // a status bit into the state on contention.
- done = state_.CompareAndSetWeakSequentiallyConsistent(cur_state, 0 /* new state */);
- if (LIKELY(done)) { // Spurious fail?
- // Wake a contender.
- if (UNLIKELY(num_contenders_.load(std::memory_order_seq_cst) > 0)) {
- futex(state_.Address(), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+ // Change state to not held and impose load/store ordering appropriate for lock release.
+ uint32_t new_state = cur_state & ~kHeldMask; // Same number of contenders.
+ done = state_and_contenders_.CompareAndSetWeakRelease(cur_state, new_state);
+ if (LIKELY(done)) { // Spurious fail or waiters changed ?
+ if (UNLIKELY(new_state != 0) /* have contenders */) {
+ futex(state_and_contenders_.Address(), FUTEX_WAKE_PRIVATE, kWakeOne,
+ nullptr, nullptr, 0);
}
+ // We only do a futex wait after incrementing contenders and verifying the lock was
+ // still held. If we didn't see waiters, then there couldn't have been any futexes
+ // waiting on this lock when we did the CAS. New arrivals after that cannot wait for us,
+ // since the futex wait call would see the lock available and immediately return.
}
} else {
// Logging acquires the logging lock, avoid infinite recursion in that case.
@@ -528,8 +531,8 @@
#if ART_USE_FUTEXES
// Wake up all the waiters so they will respond to the emtpy checkpoint.
DCHECK(should_respond_to_empty_checkpoint_request_);
- if (UNLIKELY(num_contenders_.load(std::memory_order_relaxed) > 0)) {
- futex(state_.Address(), FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0);
+ if (UNLIKELY(get_contenders() != 0)) {
+ futex(state_and_contenders_.Address(), FUTEX_WAKE_PRIVATE, kWakeAll, nullptr, nullptr, 0);
}
#else
LOG(FATAL) << "Non futex case isn't supported.";
@@ -619,7 +622,7 @@
if (LIKELY(done)) { // Weak CAS may fail spuriously.
// Wake any waiters.
if (UNLIKELY(num_contenders_.load(std::memory_order_seq_cst) > 0)) {
- futex(state_.Address(), FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0);
+ futex(state_.Address(), FUTEX_WAKE_PRIVATE, kWakeAll, nullptr, nullptr, 0);
}
}
} else {
@@ -774,7 +777,7 @@
// Wake up all the waiters so they will respond to the emtpy checkpoint.
DCHECK(should_respond_to_empty_checkpoint_request_);
if (UNLIKELY(num_contenders_.load(std::memory_order_relaxed) > 0)) {
- futex(state_.Address(), FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0);
+ futex(state_.Address(), FUTEX_WAKE_PRIVATE, kWakeAll, nullptr, nullptr, 0);
}
#else
LOG(FATAL) << "Non futex case isn't supported.";
@@ -839,7 +842,7 @@
FUTEX_REQUEUE_PRIVATE,
/* Threads to wake */ 0,
/* Threads to requeue*/ reinterpret_cast<const timespec*>(count),
- guard_.state_.Address(),
+ guard_.state_and_contenders_.Address(),
0) != -1;
if (!done && errno != EAGAIN && errno != EINTR) {
PLOG(FATAL) << "futex requeue failed for " << name_;
@@ -871,7 +874,7 @@
#if ART_USE_FUTEXES
num_waiters_++;
// Ensure the Mutex is contended so that requeued threads are awoken.
- guard_.num_contenders_++;
+ guard_.increment_contenders();
guard_.recursion_count_ = 1;
int32_t cur_sequence = sequence_.load(std::memory_order_relaxed);
guard_.ExclusiveUnlock(self);
@@ -896,11 +899,11 @@
}
}
guard_.ExclusiveLock(self);
- CHECK_GE(num_waiters_, 0);
+ CHECK_GT(num_waiters_, 0);
num_waiters_--;
// We awoke and so no longer require awakes from the guard_'s unlock.
- CHECK_GE(guard_.num_contenders_.load(std::memory_order_relaxed), 0);
- guard_.num_contenders_--;
+ CHECK_GT(guard_.get_contenders(), 0);
+ guard_.decrement_contenders();
#else
pid_t old_owner = guard_.GetExclusiveOwnerTid();
guard_.exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
@@ -922,7 +925,7 @@
InitTimeSpec(false, CLOCK_REALTIME, ms, ns, &rel_ts);
num_waiters_++;
// Ensure the Mutex is contended so that requeued threads are awoken.
- guard_.num_contenders_++;
+ guard_.increment_contenders();
guard_.recursion_count_ = 1;
int32_t cur_sequence = sequence_.load(std::memory_order_relaxed);
guard_.ExclusiveUnlock(self);
@@ -937,11 +940,11 @@
}
}
guard_.ExclusiveLock(self);
- CHECK_GE(num_waiters_, 0);
+ CHECK_GT(num_waiters_, 0);
num_waiters_--;
// We awoke and so no longer require awakes from the guard_'s unlock.
- CHECK_GE(guard_.num_contenders_.load(std::memory_order_relaxed), 0);
- guard_.num_contenders_--;
+ CHECK_GT(guard_.get_contenders(), 0);
+ guard_.decrement_contenders();
#else
#if !defined(__APPLE__)
int clock = CLOCK_MONOTONIC;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 555053b..48359fe 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -17,6 +17,7 @@
#ifndef ART_RUNTIME_BASE_MUTEX_H_
#define ART_RUNTIME_BASE_MUTEX_H_
+#include <limits.h> // for INT_MAX
#include <pthread.h>
#include <stdint.h>
#include <unistd.h> // for pid_t
@@ -59,6 +60,9 @@
#ifdef ART_USE_FUTEXES
// To enable lock contention logging, set this to true.
constexpr bool kLogLockContentions = false;
+// FUTEX_WAKE first argument:
+constexpr int kWakeOne = 1;
+constexpr int kWakeAll = INT_MAX;
#else
// Keep this false as lock contention logging is supported only with
// futex.
@@ -191,8 +195,9 @@
AssertNotHeldExclusive(self);
}
- // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
- // than the owner.
+ // Id associated with exclusive owner. No memory ordering semantics if called from a thread
+ // other than the owner. GetTid() == GetExclusiveOwnerTid() is a reliable way to determine
+ // whether we hold the lock; any other information may be invalidated before we return.
pid_t GetExclusiveOwnerTid() const;
// Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
@@ -209,12 +214,33 @@
private:
#if ART_USE_FUTEXES
- // 0 is unheld, 1 is held.
- AtomicInteger state_;
+ // Low order bit: 0 is unheld, 1 is held.
+ // High order bits: Number of waiting contenders.
+ AtomicInteger state_and_contenders_;
+
+ static constexpr int32_t kHeldMask = 1;
+
+ static constexpr int32_t kContenderShift = 1;
+
+ static constexpr int32_t kContenderIncrement = 1 << kContenderShift;
+
+ void increment_contenders() {
+ state_and_contenders_.fetch_add(kContenderIncrement);
+ }
+
+ void decrement_contenders() {
+ state_and_contenders_.fetch_sub(kContenderIncrement);
+ }
+
+ int32_t get_contenders() {
+ // Result is guaranteed to include any contention added by this thread; otherwise approximate.
+ // Treat contenders as unsigned because we're paranoid about overflow; should never matter.
+ return static_cast<uint32_t>(state_and_contenders_.load(std::memory_order_relaxed))
+ >> kContenderShift;
+ }
+
// Exclusive owner.
Atomic<pid_t> exclusive_owner_;
- // Number of waiting contenders.
- AtomicInteger num_contenders_;
#else
pthread_mutex_t mutex_;
Atomic<pid_t> exclusive_owner_; // Guarded by mutex_. Asynchronous reads are OK.
@@ -421,7 +447,7 @@
AtomicInteger sequence_;
// Number of threads that have come into to wait, not the length of the waiters on the futex as
// waiters may have been requeued onto guard_. Guarded by guard_.
- volatile int32_t num_waiters_;
+ int32_t num_waiters_;
void RequeueWaiters(int32_t count);
#else
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ffdea8a..03fbd20 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -16,6 +16,7 @@
#include "thread.h"
+#include <limits.h> // for INT_MAX
#include <pthread.h>
#include <signal.h>
#include <sys/resource.h>
@@ -1504,7 +1505,7 @@
done = pending_threads->CompareAndSetWeakRelaxed(cur_val, cur_val - 1);
#if ART_USE_FUTEXES
if (done && (cur_val - 1) == 0) { // Weak CAS may fail spuriously.
- futex(pending_threads->Address(), FUTEX_WAKE_PRIVATE, -1, nullptr, nullptr, 0);
+ futex(pending_threads->Address(), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
}
#endif
} while (!done);