Merge "Avoid race in single thread suspension." into lmp-dev
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 7779547..f856ddc 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -41,6 +41,7 @@
 ReaderWriterMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
+Mutex* Locks::thread_list_suspend_thread_lock_ = nullptr;
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
@@ -149,7 +150,8 @@
     for (int i = kLockLevelCount - 1; i >= 0; --i) {
       if (i != level_) {
         BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-        if (held_mutex != NULL) {
+        // We expect waits to happen while holding the thread list suspend thread lock.
+        if (held_mutex != NULL && i != kThreadListSuspendThreadLock) {
           LOG(ERROR) << "Holding \"" << held_mutex->name_ << "\" "
                      << "(level " << LockLevel(i) << ") while performing wait on "
                      << "\"" << name_ << "\" (level " << level_ << ")";
@@ -841,6 +843,7 @@
     DCHECK(logging_lock_ != nullptr);
     DCHECK(mutator_lock_ != nullptr);
     DCHECK(thread_list_lock_ != nullptr);
+    DCHECK(thread_list_suspend_thread_lock_ != nullptr);
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
     DCHECK(profiler_lock_ != nullptr);
@@ -848,13 +851,18 @@
     DCHECK(intern_table_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
-    LockLevel current_lock_level = kMutatorLock;
-    DCHECK(mutator_lock_ == nullptr);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
+    LockLevel current_lock_level = kThreadListSuspendThreadLock;
+    DCHECK(thread_list_suspend_thread_lock_ == nullptr);
+    thread_list_suspend_thread_lock_ =
+        new Mutex("thread list suspend thread by .. lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
-        DCHECK_LT(new_level, current_lock_level); \
-        current_lock_level = new_level;
+      DCHECK_LT(new_level, current_lock_level); \
+      current_lock_level = new_level;
+
+    UPDATE_CURRENT_LOCK_LEVEL(kMutatorLock);
+    DCHECK(mutator_lock_ == nullptr);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kHeapBitmapLock);
     DCHECK(heap_bitmap_lock_ == nullptr);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 8d2cd07..7adaa1b 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -93,6 +93,7 @@
   kRuntimeShutdownLock,
   kHeapBitmapLock,
   kMutatorLock,
+  kThreadListSuspendThreadLock,
   kZygoteCreationLock,
 
   kLockLevelCount  // Must come last.
@@ -474,6 +475,15 @@
  public:
   static void Init();
 
+  // There's a potential race for two threads to try to suspend each other and for both of them
+  // to succeed and get blocked becoming runnable. This lock ensures that only one thread is
+  // requesting suspension of another at any time. As the the thread list suspend thread logic
+  // transitions to runnable, if the current thread were tried to be suspended then this thread
+  // would block holding this lock until it could safely request thread suspension of the other
+  // thread without that thread having a suspension request against this thread. This avoids a
+  // potential deadlock cycle.
+  static Mutex* thread_list_suspend_thread_lock_;
+
   // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
   // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
   // a share on the mutator_lock_. The garbage collector may also execute with shared access but
@@ -532,7 +542,7 @@
   // else                                          |  .. running ..
   //   Goto x                                      |  .. running ..
   //  .. running ..                                |  .. running ..
-  static ReaderWriterMutex* mutator_lock_;
+  static ReaderWriterMutex* mutator_lock_ ACQUIRED_AFTER(thread_list_suspend_thread_lock_);
 
   // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
   static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c95be01..c7739fe 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2249,15 +2249,18 @@
 }
 
 JDWP::JdwpError Dbg::SuspendThread(JDWP::ObjectId thread_id, bool request_suspension) {
-  ScopedLocalRef<jobject> peer(Thread::Current()->GetJniEnv(), NULL);
+  Thread* self = Thread::Current();
+  ScopedLocalRef<jobject> peer(self->GetJniEnv(), NULL);
   {
-    ScopedObjectAccess soa(Thread::Current());
+    ScopedObjectAccess soa(self);
     peer.reset(soa.AddLocalReference<jobject>(gRegistry->Get<mirror::Object*>(thread_id)));
   }
   if (peer.get() == NULL) {
     return JDWP::ERR_THREAD_NOT_ALIVE;
   }
-  // Suspend thread to build stack trace.
+  // Suspend thread to build stack trace. Take suspend thread lock to avoid races with threads
+  // trying to suspend this one.
+  MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
   bool timed_out;
   Thread* thread = ThreadList::SuspendThreadByPeer(peer.get(), request_suspension, true,
                                                    &timed_out);
@@ -3143,7 +3146,7 @@
   ScopedThreadSuspension(Thread* self, JDWP::ObjectId thread_id)
       LOCKS_EXCLUDED(Locks::thread_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
-      thread_(NULL),
+      thread_(nullptr),
       error_(JDWP::ERR_NONE),
       self_suspend_(false),
       other_suspend_(false) {
@@ -3159,10 +3162,15 @@
         soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
         jobject thread_peer = gRegistry->GetJObject(thread_id);
         bool timed_out;
-        Thread* suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
-                                                                   &timed_out);
+        Thread* suspended_thread;
+        {
+          // Take suspend thread lock to avoid races with threads trying to suspend this one.
+          MutexLock mu(soa.Self(), *Locks::thread_list_suspend_thread_lock_);
+          suspended_thread = ThreadList::SuspendThreadByPeer(thread_peer, true, true,
+                                                             &timed_out);
+        }
         CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kWaitingForDebuggerSuspension);
-        if (suspended_thread == NULL) {
+        if (suspended_thread == nullptr) {
           // Thread terminated from under us while suspending.
           error_ = JDWP::ERR_INVALID_THREAD;
         } else {
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index da481e4..a6873ef 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -681,6 +681,8 @@
     Thread* owner;
     {
       ScopedThreadStateChange tsc(self, kBlocked);
+      // Take suspend thread lock to avoid races with threads trying to suspend this one.
+      MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
       owner = thread_list->SuspendThreadByThreadId(owner_thread_id, false, &timed_out);
     }
     if (owner != nullptr) {
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index cf31064..5f718ba 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -35,7 +35,12 @@
     // Suspend thread to build stack trace.
     soa.Self()->TransitionFromRunnableToSuspended(kNative);
     bool timed_out;
-    Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+    Thread* thread;
+    {
+      // Take suspend thread lock to avoid races with threads trying to suspend this one.
+      MutexLock mu(soa.Self(), *Locks::thread_list_suspend_thread_lock_);
+      thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+    }
     if (thread != nullptr) {
       // Must be runnable to create returned array.
       CHECK_EQ(soa.Self()->TransitionFromSuspendedToRunnable(), kNative);
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index bae67f2..8f83f96 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -116,18 +116,25 @@
 
 static void Thread_nativeSetName(JNIEnv* env, jobject peer, jstring java_name) {
   ScopedUtfChars name(env, java_name);
+  Thread* self;
   {
     ScopedObjectAccess soa(env);
     if (soa.Decode<mirror::Object*>(peer) == soa.Self()->GetPeer()) {
       soa.Self()->SetThreadName(name.c_str());
       return;
     }
+    self = soa.Self();
   }
   // Suspend thread to avoid it from killing itself while we set its name. We don't just hold the
   // thread list lock to avoid this, as setting the thread name causes mutator to lock/unlock
   // in the DDMS send code.
   bool timed_out;
-  Thread* thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+  // Take suspend thread lock to avoid races with threads trying to suspend this one.
+  Thread* thread;
+  {
+    MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
+    thread = ThreadList::SuspendThreadByPeer(peer, true, false, &timed_out);
+  }
   if (thread != NULL) {
     {
       ScopedObjectAccess soa(env);
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index e17e60a..45ef9ae 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -61,7 +61,12 @@
     }
 
     // Suspend thread to build stack trace.
-    Thread* thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
+    Thread* thread;
+    {
+      // Take suspend thread lock to avoid races with threads trying to suspend this one.
+      MutexLock mu(self, *Locks::thread_list_suspend_thread_lock_);
+      thread = thread_list->SuspendThreadByThreadId(thin_lock_id, false, &timed_out);
+    }
     if (thread != nullptr) {
       {
         ScopedObjectAccess soa(env);
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 38f1307..a5caa07 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -57,26 +57,24 @@
 }
 
 inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
-#ifdef NDEBUG
-  UNUSED(check_locks);  // Keep GCC happy about unused parameters.
-#else
-  CHECK_EQ(0u, tls32_.no_thread_suspension) << tlsPtr_.last_no_thread_suspension_cause;
-  if (check_locks) {
-    bool bad_mutexes_held = false;
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      // We expect no locks except the mutator_lock_.
-      if (i != kMutatorLock) {
-        BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
-        if (held_mutex != NULL) {
-          LOG(ERROR) << "holding \"" << held_mutex->GetName()
-                  << "\" at point where thread suspension is expected";
-          bad_mutexes_held = true;
+  if (kIsDebugBuild) {
+    CHECK_EQ(0u, tls32_.no_thread_suspension) << tlsPtr_.last_no_thread_suspension_cause;
+    if (check_locks) {
+      bool bad_mutexes_held = false;
+      for (int i = kLockLevelCount - 1; i >= 0; --i) {
+        // We expect no locks except the mutator_lock_ or thread list suspend thread lock.
+        if (i != kMutatorLock && i != kThreadListSuspendThreadLock) {
+          BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
+          if (held_mutex != NULL) {
+            LOG(ERROR) << "holding \"" << held_mutex->GetName()
+                      << "\" at point where thread suspension is expected";
+            bad_mutexes_held = true;
+          }
         }
       }
+      CHECK(!bad_mutexes_held);
     }
-    CHECK(!bad_mutexes_held);
   }
-#endif
 }
 
 inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b649b62..ff1a079 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -170,16 +170,7 @@
 // individual thread requires polling. delay_us is the requested sleep and total_delay_us
 // accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
 // subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
-static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us,
-                               bool holding_locks) {
-  if (!holding_locks) {
-    for (int i = kLockLevelCount - 1; i >= 0; --i) {
-      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-      if (held_mutex != NULL) {
-        LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
-      }
-    }
-  }
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
   useconds_t new_delay_us = (*delay_us) * 2;
   CHECK_GE(new_delay_us, *delay_us);
   if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
@@ -244,7 +235,7 @@
       useconds_t total_delay_us = 0;
       do {
         useconds_t delay_us = 100;
-        ThreadSuspendSleep(self, &delay_us, &total_delay_us, true);
+        ThreadSuspendSleep(self, &delay_us, &total_delay_us);
       } while (!thread->IsSuspended());
       // Shouldn't need to wait for longer than 1000 microseconds.
       constexpr useconds_t kLongWaitThresholdUS = 1000;
@@ -444,6 +435,11 @@
   while (true) {
     Thread* thread;
     {
+      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+      // is requesting another suspend, to avoid deadlock, by requiring this function be called
+      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+      // than request thread suspension, to avoid potential cycles in threads requesting each other
+      // suspend.
       ScopedObjectAccess soa(self);
       MutexLock mu(self, *Locks::thread_list_lock_);
       thread = Thread::FromManagedThread(soa, peer);
@@ -483,7 +479,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
   }
 }
 
@@ -502,9 +498,14 @@
   CHECK_NE(thread_id, kInvalidThreadId);
   while (true) {
     {
-      Thread* thread = NULL;
+      // Note: this will transition to runnable and potentially suspend. We ensure only one thread
+      // is requesting another suspend, to avoid deadlock, by requiring this function be called
+      // holding Locks::thread_list_suspend_thread_lock_. Its important this thread suspend rather
+      // than request thread suspension, to avoid potential cycles in threads requesting each other
+      // suspend.
       ScopedObjectAccess soa(self);
       MutexLock mu(self, *Locks::thread_list_lock_);
+      Thread* thread = nullptr;
       for (const auto& it : list_) {
         if (it->GetThreadId() == thread_id) {
           thread = it;
@@ -550,7 +551,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
   }
 }
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index d46987a..1b67ac0 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -68,6 +68,7 @@
   // is set to true.
   static Thread* SuspendThreadByPeer(jobject peer, bool request_suspension, bool debug_suspension,
                                      bool* timed_out)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_suspend_thread_lock_)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
@@ -77,6 +78,7 @@
   // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
   // thread, that may be terminating. If the suspension times out then *timeout is set to true.
   Thread* SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension, bool* timed_out)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_suspend_thread_lock_)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);