Fix races in thread list Unregister.

First race:
We were releasing the thin_lock_id in Unregister before the thread
was not suspended. This could cause problems in
SuspendThreadByThreadId since there was a small window of time where
two threads could share the same thread id. This race caused an
occasional check failure in SuspendThreadByThreadId.

Second race:
We were setting the thin_lock_thread_id_ to 0 in Unregister before
waiting to not be suspended. This caused another race in
SuspendThreadByThreadId where we modified the thread suspend count,
busy waited, but didn't find the thread the next iteration. This
meant that we were returning null even though we had modified the
suspend count. This caused the suspend count to not get decremented
since the caller didn't know that the suspend count had been
increased. Removing the self->thin_lock_thread_id_ = 0 in
ThreadList::UnRegister fixes this race.

Added a bit of additional checks and logging to prevent these issues
from resurfacing, other misc cleanup.

Added thread names to threads in ThreadStress.

Bug: 11319866

Change-Id: I48e3a0700193b72079e450be1e924a2f88cf52e2
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index af93a56..ef9a9ce 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -633,8 +633,7 @@
     ScopedThreadStateChange tsc(self, kBlocked);
     if (lock_word == obj->GetLockWord()) {  // If lock word hasn't changed.
       bool timed_out;
-      Thread* owner = thread_list->SuspendThreadByThreadId(lock_word.ThinLockOwner(), false,
-                                                           &timed_out);
+      Thread* owner = thread_list->SuspendThreadByThreadId(owner_thread_id, false, &timed_out);
       if (owner != nullptr) {
         // We succeeded in suspending the thread, check the lock's status didn't change.
         lock_word = obj->GetLockWord();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d816ca1..297fa45 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -586,11 +586,11 @@
   if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
     return false;  // Fail, already a checkpoint pending.
   }
-  CHECK(checkpoint_function_ == NULL);
+  CHECK(checkpoint_function_ == nullptr);
   checkpoint_function_ = function;
   // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
-  old_state_and_flags.as_struct.state = kRunnable;
+  DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
   union StateAndFlags new_state_and_flags = old_state_and_flags;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
   int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
@@ -2120,12 +2120,11 @@
     opeer_ = visitor(opeer_, arg);
   }
   if (exception_ != nullptr) {
-    exception_ = reinterpret_cast<mirror::Throwable*>(visitor(exception_, arg));
+    exception_ = down_cast<mirror::Throwable*>(visitor(exception_, arg));
   }
   throw_location_.VisitRoots(visitor, arg);
   if (class_loader_override_ != nullptr) {
-    class_loader_override_ = reinterpret_cast<mirror::ClassLoader*>(
-        visitor(class_loader_override_, arg));
+    class_loader_override_ = down_cast<mirror::ClassLoader*>(visitor(class_loader_override_, arg));
   }
   jni_env_->locals.VisitRoots(visitor, arg);
   jni_env_->monitors.VisitRoots(visitor, arg);
@@ -2144,7 +2143,7 @@
       frame.this_object_ = visitor(frame.this_object_, arg);
     }
     DCHECK(frame.method_ != nullptr);
-    frame.method_ = reinterpret_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
+    frame.method_ = down_cast<mirror::ArtMethod*>(visitor(frame.method_, arg));
   }
 }
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index dd3f11c..aed8c77 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -162,6 +162,35 @@
 }
 #endif
 
+// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
+// individual thread requires polling. delay_us is the requested sleep and total_delay_us
+// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
+// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
+static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us,
+                               bool holding_locks) {
+  if (!holding_locks) {
+    for (int i = kLockLevelCount - 1; i >= 0; --i) {
+      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
+      if (held_mutex != NULL) {
+        LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
+      }
+    }
+  }
+  useconds_t new_delay_us = (*delay_us) * 2;
+  CHECK_GE(new_delay_us, *delay_us);
+  if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
+    *delay_us = new_delay_us;
+  }
+  if (*delay_us == 0) {
+    sched_yield();
+    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
+    *delay_us = 500;
+  } else {
+    usleep(*delay_us);
+    *total_delay_us += *delay_us;
+  }
+}
+
 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function) {
   Thread* self = Thread::Current();
   if (kIsDebugBuild) {
@@ -208,17 +237,15 @@
   for (const auto& thread : suspended_count_modified_threads) {
     if (!thread->IsSuspended()) {
       // Wait until the thread is suspended.
-      uint64_t start = NanoTime();
+      useconds_t total_delay_us = 0;
       do {
-        // Sleep for 100us.
-        usleep(100);
+        useconds_t delay_us = 100;
+        ThreadSuspendSleep(self, &delay_us, &total_delay_us, true);
       } while (!thread->IsSuspended());
-      uint64_t end = NanoTime();
-      // Shouldn't need to wait for longer than 1 millisecond.
-      const uint64_t threshold = 1;
-      if (NsToMs(end - start) > threshold) {
-        LOG(INFO) << "Warning: waited longer than " << threshold
-                  << " ms for thread suspend\n";
+      // Shouldn't need to wait for longer than 1000 microseconds.
+      constexpr useconds_t kLongWaitThresholdUS = 1000;
+      if (UNLIKELY(total_delay_us > kLongWaitThresholdUS)) {
+        LOG(WARNING) << "Waited " << total_delay_us << " us for thread suspend!";
       }
     }
     // We know for sure that the thread is suspended at this point.
@@ -354,34 +381,6 @@
   }
 }
 
-// Unlike suspending all threads where we can wait to acquire the mutator_lock_, suspending an
-// individual thread requires polling. delay_us is the requested sleep and total_delay_us
-// accumulates the total time spent sleeping for timeouts. The first sleep is just a yield,
-// subsequently sleeps increase delay_us from 1ms to 500ms by doubling.
-static void ThreadSuspendSleep(Thread* self, useconds_t* delay_us, useconds_t* total_delay_us) {
-  for (int i = kLockLevelCount - 1; i >= 0; --i) {
-    BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-    if (held_mutex != NULL) {
-      LOG(FATAL) << "Holding " << held_mutex->GetName() << " while sleeping for thread suspension";
-    }
-  }
-  {
-    useconds_t new_delay_us = (*delay_us) * 2;
-    CHECK_GE(new_delay_us, *delay_us);
-    if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
-      *delay_us = new_delay_us;
-    }
-  }
-  if ((*delay_us) == 0) {
-    sched_yield();
-    // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
-    (*delay_us) = 500;
-  } else {
-    usleep(*delay_us);
-    (*total_delay_us) += (*delay_us);
-  }
-}
-
 Thread* ThreadList::SuspendThreadByPeer(jobject peer, bool request_suspension,
                                         bool debug_suspension, bool* timed_out) {
   static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
@@ -432,7 +431,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
   }
 }
 
@@ -445,13 +444,13 @@
   static const useconds_t kTimeoutUs = 30 * 1000000;  // 30s.
   useconds_t total_delay_us = 0;
   useconds_t delay_us = 0;
-  bool did_suspend_request = false;
   *timed_out = false;
+  Thread* suspended_thread = nullptr;
   Thread* self = Thread::Current();
   CHECK_NE(thread_id, kInvalidThreadId);
   while (true) {
-    Thread* thread = NULL;
     {
+      Thread* thread = NULL;
       ScopedObjectAccess soa(self);
       MutexLock mu(self, *Locks::thread_list_lock_);
       for (const auto& it : list_) {
@@ -460,17 +459,20 @@
           break;
         }
       }
-      if (thread == NULL) {
+      if (thread == nullptr) {
+        CHECK(suspended_thread == nullptr) << "Suspended thread " << suspended_thread
+            << " no longer in thread list";
         // There's a race in inflating a lock and the owner giving up ownership and then dying.
         ThreadSuspendByThreadIdWarning(WARNING, "No such thread id for suspend", thread_id);
         return NULL;
       }
       {
         MutexLock mu(self, *Locks::thread_suspend_count_lock_);
-        if (!did_suspend_request) {
+        if (suspended_thread == nullptr) {
           thread->ModifySuspendCount(self, +1, debug_suspension);
-          did_suspend_request = true;
+          suspended_thread = thread;
         } else {
+          CHECK_EQ(suspended_thread, thread);
           // If the caller isn't requesting suspension, a suspension should have already occurred.
           CHECK_GT(thread->GetSuspendCount(), 0);
         }
@@ -487,7 +489,7 @@
         }
         if (total_delay_us >= kTimeoutUs) {
           ThreadSuspendByThreadIdWarning(WARNING, "Thread suspension timed out", thread_id);
-          if (did_suspend_request) {
+          if (suspended_thread != nullptr) {
             thread->ModifySuspendCount(soa.Self(), -1, debug_suspension);
           }
           *timed_out = true;
@@ -496,7 +498,7 @@
       }
       // Release locks and come out of runnable state.
     }
-    ThreadSuspendSleep(self, &delay_us, &total_delay_us);
+    ThreadSuspendSleep(self, &delay_us, &total_delay_us, false);
   }
 }
 
@@ -719,9 +721,7 @@
   self->Destroy();
 
   uint32_t thin_lock_id = self->thin_lock_thread_id_;
-  self->thin_lock_thread_id_ = 0;
-  ReleaseThreadId(self, thin_lock_id);
-  while (self != NULL) {
+  while (self != nullptr) {
     // Remove and delete the Thread* while holding the thread_list_lock_ and
     // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
     // Note: deliberately not using MutexLock that could hold a stale self pointer.
@@ -732,10 +732,14 @@
     if (!self->IsSuspended()) {
       list_.remove(self);
       delete self;
-      self = NULL;
+      self = nullptr;
     }
     Locks::thread_list_lock_->ExclusiveUnlock(self);
   }
+  // Release the thread ID after the thread is finished and deleted to avoid cases where we can
+  // temporarily have multiple threads with the same thread id. When this occurs, it causes
+  // problems in FindThreadByThreadId / SuspendThreadByThreadId.
+  ReleaseThreadId(nullptr, thin_lock_id);
 
   // Clear the TLS data, so that the underlying native thread is recognizably detached.
   // (It may wish to reattach later.)
diff --git a/test/ThreadStress/ThreadStress.java b/test/ThreadStress/ThreadStress.java
index 8d8135d..795c790 100644
--- a/test/ThreadStress/ThreadStress.java
+++ b/test/ThreadStress/ThreadStress.java
@@ -128,13 +128,13 @@
         Thread[] runners = new Thread[numberOfThreads];
         for (int r = 0; r < runners.length; r++) {
             final ThreadStress ts = threadStresses[r];
-            runners[r] = new Thread() {
+            runners[r] = new Thread("Runner thread " + r) {
                 final ThreadStress threadStress = ts;
                 public void run() {
                     int id = threadStress.id;
-                    System.out.println("Starting runner for " + id);
+                    System.out.println("Starting worker for " + id);
                     while (threadStress.nextOperation < operationsPerThread) {
-                        Thread thread = new Thread(ts);
+                        Thread thread = new Thread(ts, "Worker thread " + id);
                         thread.start();
                         try {
                             thread.join();
@@ -144,14 +144,14 @@
                                            + (operationsPerThread - threadStress.nextOperation)
                                            + " operations remaining.");
                     }
-                    System.out.println("Finishing runner for " + id);
+                    System.out.println("Finishing worker for " + id);
                 }
             };
         }
 
         // The notifier thread is a daemon just loops forever to wake
         // up threads in Operation.WAIT
-        Thread notifier = new Thread() {
+        Thread notifier = new Thread("Notifier") {
             public void run() {
                 while (true) {
                     synchronized (lock) {