8270085: Suspend during block transition may deadlock if lock held

Co-authored-by: Robbin Ehn <rehn@openjdk.org>
Co-authored-by: Patricio Chilano Mateo <pchilanomate@openjdk.org>
Reviewed-by: dcubed, dholmes, coleenp
diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp
index 7d2f36a..6ef0251 100644
--- a/src/hotspot/share/prims/whitebox.cpp
+++ b/src/hotspot/share/prims/whitebox.cpp
@@ -2076,7 +2076,32 @@
   }
 WB_END
 
-//Some convenience methods to deal with objects from java
+static volatile int _emulated_lock = 0;
+
+WB_ENTRY(void, WB_LockAndBlock(JNIEnv* env, jobject wb, jboolean suspender))
+  JavaThread* self = JavaThread::current();
+
+  {
+    // Before trying to acquire the lock transition into a safepoint safe state.
+    // Otherwise if either suspender or suspendee blocks for a safepoint
+    // in ~ThreadBlockInVM the other one could loop forever trying to acquire
+    // the lock without allowing the safepoint to progress.
+    ThreadBlockInVM tbivm(self);
+
+    // We will deadlock here if we are 'suspender' and 'suspendee'
+    // suspended in ~ThreadBlockInVM. This verifies we only suspend
+    // at the right place.
+    while (Atomic::cmpxchg(&_emulated_lock, 0, 1) != 0) {}
+    assert(_emulated_lock == 1, "Must be locked");
+
+    // Sleep much longer in suspendee to force situation where
+    // 'suspender' is waiting above to acquire lock.
+    os::naked_short_sleep(suspender ? 1 : 10);
+  }
+  Atomic::store(&_emulated_lock, 0);
+WB_END
+
+// Some convenience methods to deal with objects from java
 int WhiteBox::offset_for_field(const char* field_name, oop object,
     Symbol* signature_symbol) {
   assert(field_name != NULL && strlen(field_name) > 0, "Field name not valid");
@@ -2566,6 +2591,7 @@
   {CC"clearInlineCaches0",  CC"(Z)V",                 (void*)&WB_ClearInlineCaches },
   {CC"handshakeWalkStack", CC"(Ljava/lang/Thread;Z)I", (void*)&WB_HandshakeWalkStack },
   {CC"asyncHandshakeWalkStack", CC"(Ljava/lang/Thread;)V", (void*)&WB_AsyncHandshakeWalkStack },
+  {CC"lockAndBlock", CC"(Z)V",                        (void*)&WB_LockAndBlock},
   {CC"checkThreadObjOfTerminatingThread", CC"(Ljava/lang/Thread;)V", (void*)&WB_CheckThreadObjOfTerminatingThread },
   {CC"verifyFrames",                CC"(ZZ)V",            (void*)&WB_VerifyFrames },
   {CC"addCompilerDirective",    CC"(Ljava/lang/String;)I",
diff --git a/src/hotspot/share/runtime/handshake.cpp b/src/hotspot/share/runtime/handshake.cpp
index d6b2163..8b76e9e 100644
--- a/src/hotspot/share/runtime/handshake.cpp
+++ b/src/hotspot/share/runtime/handshake.cpp
@@ -77,6 +77,7 @@
   int32_t pending_threads()        { return Atomic::load(&_pending_threads); }
   const char* name()               { return _handshake_cl->name(); }
   bool is_async()                  { return _handshake_cl->is_async(); }
+  bool is_suspend()                { return _handshake_cl->is_suspend(); }
 };
 
 class AsyncHandshakeOperation : public HandshakeOperation {
@@ -376,6 +377,7 @@
     // Check for pending handshakes to avoid possible deadlocks where our
     // target is trying to handshake us.
     if (SafepointMechanism::should_process(self)) {
+      // Will not suspend here.
       ThreadBlockInVM tbivm(self);
     }
     hsy.process();
@@ -425,11 +427,19 @@
   return _queue.contains(mo);
 }
 
-HandshakeOperation* HandshakeState::get_op_for_self() {
+static bool no_suspend_filter(HandshakeOperation* op) {
+  return !op->is_suspend();
+}
+
+HandshakeOperation* HandshakeState::get_op_for_self(bool allow_suspend) {
   assert(_handshakee == Thread::current(), "Must be called by self");
   assert(_lock.owned_by_self(), "Lock must be held");
-  return _queue.peek();
-};
+  if (allow_suspend) {
+    return _queue.peek();
+  } else {
+    return _queue.peek(no_suspend_filter);
+  }
+}
 
 static bool non_self_queue_filter(HandshakeOperation* op) {
   return !op->is_async();
@@ -441,6 +451,11 @@
   return _queue.contains(non_self_queue_filter);
 }
 
+bool HandshakeState::has_a_non_suspend_operation() {
+  MutexLocker ml(&_lock, Mutex::_no_safepoint_check_flag);
+  return _queue.contains(no_suspend_filter);
+}
+
 HandshakeOperation* HandshakeState::get_op() {
   assert(_handshakee != Thread::current(), "Must not be called by self");
   assert(_lock.owned_by_self(), "Lock must be held");
@@ -454,25 +469,22 @@
   assert(ret == op, "Popped op must match requested op");
 };
 
-bool HandshakeState::process_by_self() {
+bool HandshakeState::process_by_self(bool allow_suspend) {
   assert(Thread::current() == _handshakee, "should call from _handshakee");
   assert(!_handshakee->is_terminated(), "should not be a terminated thread");
   assert(_handshakee->thread_state() != _thread_blocked, "should not be in a blocked state");
   assert(_handshakee->thread_state() != _thread_in_native, "should not be in native");
-  ThreadInVMForHandshake tivm(_handshakee);
-  {
-    // Handshakes cannot safely safepoint.
-    // The exception to this rule is the asynchronous suspension handshake.
-    // It by-passes the NSV by manually doing the transition.
-    NoSafepointVerifier nsv;
-    return process_self_inner();
-  }
-}
 
-bool HandshakeState::process_self_inner() {
+  ThreadInVMForHandshake tivm(_handshakee);
+  // Handshakes cannot safely safepoint.
+  // The exception to this rule is the asynchronous suspension handshake.
+  // It by-passes the NSV by manually doing the transition.
+  NoSafepointVerifier nsv;
+
   while (has_operation()) {
     MutexLocker ml(&_lock, Mutex::_no_safepoint_check_flag);
-    HandshakeOperation* op = get_op_for_self();
+
+    HandshakeOperation* op = get_op_for_self(allow_suspend);
     if (op != NULL) {
       assert(op->_target == NULL || op->_target == Thread::current(), "Wrong thread");
       bool async = op->is_async();
@@ -621,6 +633,7 @@
     assert(current == Thread::current(), "Must be self executed.");
     current->handshake_state()->do_self_suspend();
   }
+  virtual bool is_suspend() { return true; }
 };
 
 bool HandshakeState::suspend_with_handshake() {
@@ -667,8 +680,15 @@
 };
 
 bool HandshakeState::suspend() {
+  JavaThread* self = JavaThread::current();
   SuspendThreadHandshake st;
   Handshake::execute(&st, _handshakee);
+  if (_handshakee == self) {
+    // If target is the current thread we need to call this to do the
+    // actual suspend since Handshake::execute() above only installed
+    // the asynchronous handshake.
+    SafepointMechanism::process_if_requested(self);
+  }
   return st.did_suspend();
 }
 
diff --git a/src/hotspot/share/runtime/handshake.hpp b/src/hotspot/share/runtime/handshake.hpp
index 747399b..b0145fe 100644
--- a/src/hotspot/share/runtime/handshake.hpp
+++ b/src/hotspot/share/runtime/handshake.hpp
@@ -49,6 +49,7 @@
   virtual ~HandshakeClosure()                      {}
   const char* name() const                         { return _name; }
   virtual bool is_async()                          { return false; }
+  virtual bool is_suspend()                        { return false; }
   virtual void do_thread(Thread* thread) = 0;
 };
 
@@ -92,15 +93,8 @@
   bool possibly_can_process_handshake();
   bool can_process_handshake();
 
-  // Returns false if the JavaThread finished all its handshake operations.
-  // If the method returns true there is still potential work to be done,
-  // but we need to check for a safepoint before.
-  // (This is due to a suspension handshake which put the JavaThread in blocked
-  // state so a safepoint may be in-progress.)
-  bool process_self_inner();
-
   bool have_non_self_executable_operation();
-  HandshakeOperation* get_op_for_self();
+  HandshakeOperation* get_op_for_self(bool allow_suspend);
   HandshakeOperation* get_op();
   void remove_op(HandshakeOperation* op);
 
@@ -123,10 +117,14 @@
   bool has_operation() {
     return !_queue.is_empty();
   }
+  bool has_a_non_suspend_operation();
 
   bool operation_pending(HandshakeOperation* op);
 
-  bool process_by_self();
+  // If the method returns true we need to check for a possible safepoint.
+  // This is due to a suspension handshake which put the JavaThread in blocked
+  // state so a safepoint may be in-progress.
+  bool process_by_self(bool allow_suspend);
 
   enum ProcessResult {
     _no_operation = 0,
diff --git a/src/hotspot/share/runtime/interfaceSupport.inline.hpp b/src/hotspot/share/runtime/interfaceSupport.inline.hpp
index 17e0469..1e345b0 100644
--- a/src/hotspot/share/runtime/interfaceSupport.inline.hpp
+++ b/src/hotspot/share/runtime/interfaceSupport.inline.hpp
@@ -243,8 +243,10 @@
 class ThreadBlockInVMPreprocess : public ThreadStateTransition {
  private:
   PRE_PROC& _pr;
+  bool _allow_suspend;
  public:
-  ThreadBlockInVMPreprocess(JavaThread* thread, PRE_PROC& pr) : ThreadStateTransition(thread), _pr(pr) {
+  ThreadBlockInVMPreprocess(JavaThread* thread, PRE_PROC& pr, bool allow_suspend = true)
+    : ThreadStateTransition(thread), _pr(pr), _allow_suspend(allow_suspend) {
     assert(thread->thread_state() == _thread_in_vm, "coming from wrong thread state");
     thread->check_possible_safepoint();
     // Once we are blocked vm expects stack to be walkable
@@ -257,9 +259,9 @@
     // Change to transition state and ensure it is seen by the VM thread.
     _thread->set_thread_state_fence(_thread_blocked_trans);
 
-    if (SafepointMechanism::should_process(_thread)) {
+    if (SafepointMechanism::should_process(_thread, _allow_suspend)) {
       _pr(_thread);
-      SafepointMechanism::process_if_requested(_thread);
+      SafepointMechanism::process_if_requested(_thread, _allow_suspend);
     }
 
     _thread->set_thread_state(_thread_in_vm);
@@ -289,7 +291,7 @@
   ThreadBlockInVMPreprocess<InFlightMutexRelease> _tbivmpp;
  public:
   ThreadBlockInVM(JavaThread* thread, Mutex** in_flight_mutex_addr = NULL)
-    : _ifmr(in_flight_mutex_addr), _tbivmpp(thread, _ifmr) {}
+    : _ifmr(in_flight_mutex_addr), _tbivmpp(thread, _ifmr, /* allow_suspend= */ false) {}
 };
 
 // Debug class instantiated in JRT_ENTRY macro.
diff --git a/src/hotspot/share/runtime/safepointMechanism.cpp b/src/hotspot/share/runtime/safepointMechanism.cpp
index 886698c..20e163a 100644
--- a/src/hotspot/share/runtime/safepointMechanism.cpp
+++ b/src/hotspot/share/runtime/safepointMechanism.cpp
@@ -76,29 +76,6 @@
   }
 }
 
-void SafepointMechanism::process(JavaThread *thread) {
-  bool need_rechecking;
-  do {
-    if (global_poll()) {
-      // Any load in ::block() must not pass the global poll load.
-      // Otherwise we might load an old safepoint counter (for example).
-      OrderAccess::loadload();
-      SafepointSynchronize::block(thread);
-    }
-
-    // The call to on_safepoint fixes the thread's oops and the first few frames.
-    //
-    // The call has been carefully placed here to cater to a few situations:
-    // 1) After we exit from block after a global poll
-    // 2) After a thread races with the disarming of the global poll and transitions from native/blocked
-    // 3) Before the handshake code is run
-    StackWatermarkSet::on_safepoint(thread);
-
-    need_rechecking = thread->handshake_state()->has_operation() && thread->handshake_state()->process_by_self();
-
-  } while (need_rechecking);
-}
-
 uintptr_t SafepointMechanism::compute_poll_word(bool armed, uintptr_t stack_watermark) {
   if (armed) {
     log_debug(stackbarrier)("Computed armed for tid %d", Thread::current()->osthread()->thread_id());
@@ -134,12 +111,31 @@
   }
 }
 
-void SafepointMechanism::process_if_requested_slow(JavaThread *thread) {
+void SafepointMechanism::process(JavaThread *thread, bool allow_suspend) {
   // Read global poll and has_handshake after local poll
   OrderAccess::loadload();
 
   // local poll already checked, if used.
-  process(thread);
+  bool need_rechecking;
+  do {
+    if (global_poll()) {
+      // Any load in ::block() must not pass the global poll load.
+      // Otherwise we might load an old safepoint counter (for example).
+      OrderAccess::loadload();
+      SafepointSynchronize::block(thread);
+    }
+
+    // The call to on_safepoint fixes the thread's oops and the first few frames.
+    //
+    // The call has been carefully placed here to cater to a few situations:
+    // 1) After we exit from block after a global poll
+    // 2) After a thread races with the disarming of the global poll and transitions from native/blocked
+    // 3) Before the handshake code is run
+    StackWatermarkSet::on_safepoint(thread);
+
+    need_rechecking = thread->handshake_state()->has_operation() && thread->handshake_state()->process_by_self(allow_suspend);
+  } while (need_rechecking);
+
   update_poll_values(thread);
   OrderAccess::cross_modify_fence();
 }
diff --git a/src/hotspot/share/runtime/safepointMechanism.hpp b/src/hotspot/share/runtime/safepointMechanism.hpp
index 309a8e7..69e1b84 100644
--- a/src/hotspot/share/runtime/safepointMechanism.hpp
+++ b/src/hotspot/share/runtime/safepointMechanism.hpp
@@ -50,8 +50,9 @@
 
   static inline bool global_poll();
 
-  static void process(JavaThread *thread);
-  static void process_if_requested_slow(JavaThread *thread);
+  static void process(JavaThread *thread, bool allow_suspend);
+
+  static inline bool should_process_no_suspend(JavaThread* thread);
 
   static void default_initialize();
 
@@ -60,7 +61,7 @@
   static uintptr_t compute_poll_word(bool armed, uintptr_t stack_watermark);
 
   const static intptr_t _poll_bit = 1;
-public:
+ public:
   static inline bool local_poll_armed(JavaThread* thread);
   static intptr_t poll_bit() { return _poll_bit; }
 
@@ -79,10 +80,10 @@
   };
 
   // Call this method to see if this thread should block for a safepoint or process handshake.
-  static inline bool should_process(JavaThread* thread);
+  static inline bool should_process(JavaThread* thread, bool allow_suspend = true);
 
   // Processes a pending requested operation.
-  static inline void process_if_requested(JavaThread* thread);
+  static inline void process_if_requested(JavaThread* thread, bool allow_suspend = true);
   static inline void process_if_requested_with_exit_check(JavaThread* thread, bool check_asyncs);
   // Compute what the poll values should be and install them.
   static void update_poll_values(JavaThread* thread);
diff --git a/src/hotspot/share/runtime/safepointMechanism.inline.hpp b/src/hotspot/share/runtime/safepointMechanism.inline.hpp
index 4a94400..965eea2 100644
--- a/src/hotspot/share/runtime/safepointMechanism.inline.hpp
+++ b/src/hotspot/share/runtime/safepointMechanism.inline.hpp
@@ -28,6 +28,7 @@
 #include "runtime/safepointMechanism.hpp"
 
 #include "runtime/atomic.hpp"
+#include "runtime/handshake.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.inline.hpp"
 
@@ -61,11 +62,29 @@
   return (SafepointSynchronize::_state != SafepointSynchronize::_not_synchronized);
 }
 
-bool SafepointMechanism::should_process(JavaThread* thread) {
-  return local_poll_armed(thread);
+bool SafepointMechanism::should_process_no_suspend(JavaThread* thread) {
+  if (global_poll() || thread->handshake_state()->has_a_non_suspend_operation()) {
+    return true;
+  } else {
+    // We ignore suspend requests if any and just check before returning if we need
+    // to fix the thread's oops and first few frames due to a possible safepoint.
+    StackWatermarkSet::on_safepoint(thread);
+    update_poll_values(thread);
+    OrderAccess::cross_modify_fence();
+    return false;
+  }
 }
 
-void SafepointMechanism::process_if_requested(JavaThread* thread) {
+bool SafepointMechanism::should_process(JavaThread* thread, bool allow_suspend) {
+  if (!local_poll_armed(thread)) {
+    return false;
+  } else if (allow_suspend) {
+    return true;
+  }
+  return should_process_no_suspend(thread);
+}
+
+void SafepointMechanism::process_if_requested(JavaThread* thread, bool allow_suspend) {
 
   // Macos/aarch64 should be in the right state for safepoint (e.g.
   // deoptimization needs WXWrite).  Crashes caused by the wrong state rarely
@@ -77,7 +96,7 @@
 #endif
 
   if (local_poll_armed(thread)) {
-    process_if_requested_slow(thread);
+    process(thread, allow_suspend);
   }
 }
 
diff --git a/test/hotspot/jtreg/runtime/handshake/SuspendBlocked.java b/test/hotspot/jtreg/runtime/handshake/SuspendBlocked.java
new file mode 100644
index 0000000..370b68d
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/handshake/SuspendBlocked.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test SuspendBlocked
+ * @bug 8270085
+ * @library /test/lib
+ * @build SuspendBlocked
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI SuspendBlocked
+ */
+
+import jdk.test.lib.Asserts;
+import sun.hotspot.WhiteBox;
+
+public class SuspendBlocked {
+
+    public static void main(String... args) throws Exception {
+        Thread suspend_thread = new Thread(() -> run_loop());
+        suspend_thread.start();
+        WhiteBox wb = WhiteBox.getWhiteBox();
+        for (int i = 0; i < 100; i++) {
+            suspend_thread.suspend();
+            wb.lockAndBlock(/* suspender= */ true);
+            suspend_thread.resume();
+            Thread.sleep(1);
+        }
+        suspend_thread.join();
+    }
+
+    public static void run_loop() {
+        WhiteBox wb = WhiteBox.getWhiteBox();
+        for (int i = 0; i < 100; i++) {
+            wb.lockAndBlock(/* suspender= */ false);
+        }
+    }
+}
diff --git a/test/lib/sun/hotspot/WhiteBox.java b/test/lib/sun/hotspot/WhiteBox.java
index 0f7f8a9..1dd5e2c 100644
--- a/test/lib/sun/hotspot/WhiteBox.java
+++ b/test/lib/sun/hotspot/WhiteBox.java
@@ -607,6 +607,8 @@
   public native int handshakeWalkStack(Thread t, boolean all_threads);
   public native void asyncHandshakeWalkStack(Thread t);
 
+  public native void lockAndBlock(boolean suspender);
+
   // Returns true on linux if library has the noexecstack flag set.
   public native boolean checkLibSpecifiesNoexecstack(String libfilename);