Fix a deadlock between thread flip and suspend request.

See 31683379#9 for the deadlock scenario.

Make ModifySuspendCount(+1) retry if the thread flip function is set.

Bug: 31683379
Bug: 12687968
Test: test-art, N9 libartd boot, Ritz EAAC with CC.
Test: 129-GetThreadId with gcstress and CC.
Change-Id: Id5cdfcd90a08a2ff497f9f0e2842fa4c613549bc
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index bb6eb79..21b84dd 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -314,6 +314,37 @@
   }
 }
 
+inline bool Thread::ModifySuspendCount(Thread* self,
+                                       int delta,
+                                       AtomicInteger* suspend_barrier,
+                                       bool for_debugger) {
+  if (delta > 0 && ((kUseReadBarrier && this != self) || suspend_barrier != nullptr)) {
+    // When delta > 0 (requesting a suspend), ModifySuspendCountInternal() may fail either if
+    // active_suspend_barriers is full or we are in the middle of a thread flip. Retry in a loop.
+    while (true) {
+      if (LIKELY(ModifySuspendCountInternal(self, delta, suspend_barrier, for_debugger))) {
+        return true;
+      } else {
+        // Failure means the list of active_suspend_barriers is full or we are in the middle of a
+        // thread flip, we should release the thread_suspend_count_lock_ (to avoid deadlock) and
+        // wait till the target thread has executed or Thread::PassActiveSuspendBarriers() or the
+        // flip function. Note that we could not simply wait for the thread to change to a suspended
+        // state, because it might need to run checkpoint function before the state change or
+        // resumes from the resume_cond_, which also needs thread_suspend_count_lock_.
+        //
+        // The list of active_suspend_barriers is very unlikely to be full since more than
+        // kMaxSuspendBarriers threads need to execute SuspendAllInternal() simultaneously, and
+        // target thread stays in kRunnable in the mean time.
+        Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
+        NanoSleep(100000);
+        Locks::thread_suspend_count_lock_->ExclusiveLock(self);
+      }
+    }
+  } else {
+    return ModifySuspendCountInternal(self, delta, suspend_barrier, for_debugger);
+  }
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_THREAD_INL_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d0ea2d7..268699b 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -987,8 +987,10 @@
   LOG(FATAL) << ss.str();
 }
 
-bool Thread::ModifySuspendCount(Thread* self, int delta, AtomicInteger* suspend_barrier,
-                                bool for_debugger) {
+bool Thread::ModifySuspendCountInternal(Thread* self,
+                                        int delta,
+                                        AtomicInteger* suspend_barrier,
+                                        bool for_debugger) {
   if (kIsDebugBuild) {
     DCHECK(delta == -1 || delta == +1 || delta == -tls32_.debug_suspend_count)
           << delta << " " << tls32_.debug_suspend_count << " " << this;
@@ -1003,6 +1005,12 @@
     return false;
   }
 
+  if (kUseReadBarrier && delta > 0 && this != self && tlsPtr_.flip_function != nullptr) {
+    // Force retry of a suspend request if it's in the middle of a thread flip to avoid a
+    // deadlock. b/31683379.
+    return false;
+  }
+
   uint16_t flags = kSuspendRequest;
   if (delta > 0 && suspend_barrier != nullptr) {
     uint32_t available_barrier = kMaxSuspendBarriers;
diff --git a/runtime/thread.h b/runtime/thread.h
index 55f1489..76ddbb8 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -226,7 +226,13 @@
         (state_and_flags.as_struct.flags & kSuspendRequest) != 0;
   }
 
-  bool ModifySuspendCount(Thread* self, int delta, AtomicInteger* suspend_barrier, bool for_debugger)
+  // If delta > 0 and (this != self or suspend_barrier is not null), this function may temporarily
+  // release thread_suspend_count_lock_ internally.
+  ALWAYS_INLINE
+  bool ModifySuspendCount(Thread* self,
+                          int delta,
+                          AtomicInteger* suspend_barrier,
+                          bool for_debugger)
       REQUIRES(Locks::thread_suspend_count_lock_);
 
   bool RequestCheckpoint(Closure* function)
@@ -1218,6 +1224,12 @@
     is_sensitive_thread_hook_ = is_sensitive_thread_hook;
   }
 
+  bool ModifySuspendCountInternal(Thread* self,
+                                  int delta,
+                                  AtomicInteger* suspend_barrier,
+                                  bool for_debugger)
+      REQUIRES(Locks::thread_suspend_count_lock_);
+
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
   union PACKED(4) StateAndFlags {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 17c6c2e..21ecb15 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -584,24 +584,7 @@
         continue;
       }
       VLOG(threads) << "requesting thread suspend: " << *thread;
-      while (true) {
-        if (LIKELY(thread->ModifySuspendCount(self, +1, &pending_threads, debug_suspend))) {
-          break;
-        } else {
-          // Failure means the list of active_suspend_barriers is full, we should release the
-          // thread_suspend_count_lock_ (to avoid deadlock) and wait till the target thread has
-          // executed Thread::PassActiveSuspendBarriers(). Note that we could not simply wait for
-          // the thread to change to a suspended state, because it might need to run checkpoint
-          // function before the state change, which also needs thread_suspend_count_lock_.
-
-          // This is very unlikely to happen since more than kMaxSuspendBarriers threads need to
-          // execute SuspendAllInternal() simultaneously, and target thread stays in kRunnable
-          // in the mean time.
-          Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
-          NanoSleep(100000);
-          Locks::thread_suspend_count_lock_->ExclusiveLock(self);
-        }
-      }
+      thread->ModifySuspendCount(self, +1, &pending_threads, debug_suspend);
 
       // Must install the pending_threads counter first, then check thread->IsSuspend() and clear
       // the counter. Otherwise there's a race with Thread::TransitionFromRunnableToSuspended()