Prevent concurrent GC stack-walks during deoptimization

We could end up modifying the instrumentation stack at the same time
the GC or thread-flip stack walks are occurring. This could cause
crashes or check-failures as the stack is in an inconsistent state.

To fix this we changed the deoptimization process to block GC stack
walks.

We also standardized openjdkjvmti.so deoptimization to use a single
entrypoint.

Bug: 72608560
Test: ./test.py --host
Test: ./tools/parallel_run.py

Change-Id: I942ff891930d22a579345265897916cf84842a1c
diff --git a/openjdkjvmti/deopt_manager.cc b/openjdkjvmti/deopt_manager.cc
index ec29f2c..a7feba8 100644
--- a/openjdkjvmti/deopt_manager.cc
+++ b/openjdkjvmti/deopt_manager.cc
@@ -42,6 +42,7 @@
 #include "dex/dex_file_annotations.h"
 #include "dex/modifiers.h"
 #include "events-inl.h"
+#include "gc/collector_type.h"
 #include "gc/heap.h"
 #include "gc/scoped_gc_critical_section.h"
 #include "jit/jit.h"
@@ -49,6 +50,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "nativehelper/scoped_local_ref.h"
+#include "read_barrier_config.h"
 #include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "scoped_thread_state_change.h"
@@ -478,6 +480,11 @@
 }
 
 void DeoptManager::DeoptimizeThread(art::Thread* target) {
+  // We might or might not be running on the target thread (self) so get Thread::Current
+  // directly.
+  art::gc::ScopedGCCriticalSection sgccs(art::Thread::Current(),
+                                         art::gc::GcCause::kGcCauseDebugger,
+                                         art::gc::CollectorType::kCollectorTypeDebugger);
   art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(target);
 }
 
diff --git a/openjdkjvmti/deopt_manager.h b/openjdkjvmti/deopt_manager.h
index 73a64be..9789c5e 100644
--- a/openjdkjvmti/deopt_manager.h
+++ b/openjdkjvmti/deopt_manager.h
@@ -112,7 +112,9 @@
       REQUIRES(!deoptimization_status_lock_, !art::Roles::uninterruptible_)
       REQUIRES_SHARED(art::Locks::mutator_lock_);
 
-  void DeoptimizeThread(art::Thread* target) REQUIRES_SHARED(art::Locks::mutator_lock_);
+  void DeoptimizeThread(art::Thread* target)
+      REQUIRES(!art::Locks::thread_list_lock_)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
   void DeoptimizeAllThreads() REQUIRES_SHARED(art::Locks::mutator_lock_);
 
   void FinishSetup()
diff --git a/openjdkjvmti/ti_method.cc b/openjdkjvmti/ti_method.cc
index e88539f..bdbd7bc 100644
--- a/openjdkjvmti/ti_method.cc
+++ b/openjdkjvmti/ti_method.cc
@@ -38,6 +38,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/mutex-inl.h"
+#include "deopt_manager.h"
 #include "dex/code_item_accessors-inl.h"
 #include "dex/dex_file_annotations.h"
 #include "dex/dex_file_types.h"
@@ -52,10 +53,12 @@
 #include "mirror/object_array-inl.h"
 #include "nativehelper/scoped_local_ref.h"
 #include "oat_file.h"
+#include "obj_ptr.h"
 #include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "thread-current-inl.h"
+#include "thread.h"
 #include "thread_list.h"
 #include "ti_stack.h"
 #include "ti_thread.h"
@@ -528,48 +531,51 @@
 
   void Run(art::Thread* self) override REQUIRES(art::Locks::mutator_lock_) {
     art::Locks::mutator_lock_->AssertSharedHeld(art::Thread::Current());
-    art::ScopedAssertNoThreadSuspension sants("CommonLocalVariableClosure::Run");
-    std::unique_ptr<art::Context> context(art::Context::Create());
-    FindFrameAtDepthVisitor visitor(self, context.get(), depth_);
-    visitor.WalkStack();
-    if (!visitor.FoundFrame()) {
-      // Must have been a bad depth.
-      result_ = ERR(NO_MORE_FRAMES);
-      return;
-    }
-    art::ArtMethod* method = visitor.GetMethod();
-    // Native and 'art' proxy methods don't have registers.
-    if (method->IsNative() || method->IsProxyMethod()) {
-      // TODO It might be useful to fake up support for get at least on proxy frames.
-      result_ = ERR(OPAQUE_FRAME);
-      return;
-    } else if (method->DexInstructionData().RegistersSize() <= slot_) {
-      result_ = ERR(INVALID_SLOT);
-      return;
-    }
-    bool needs_instrument = !visitor.IsShadowFrame();
-    uint32_t pc = visitor.GetDexPc(/*abort_on_failure=*/ false);
-    if (pc == art::dex::kDexNoIndex) {
-      // Cannot figure out current PC.
-      result_ = ERR(OPAQUE_FRAME);
-      return;
-    }
-    std::string descriptor;
-    art::Primitive::Type slot_type = art::Primitive::kPrimVoid;
-    jvmtiError err = GetSlotType(method, pc, &descriptor, &slot_type);
-    if (err != OK) {
-      result_ = err;
-      return;
-    }
+    bool needs_instrument;
+    {
+      art::ScopedAssertNoThreadSuspension sants("CommonLocalVariableClosure::Run");
+      std::unique_ptr<art::Context> context(art::Context::Create());
+      FindFrameAtDepthVisitor visitor(self, context.get(), depth_);
+      visitor.WalkStack();
+      if (!visitor.FoundFrame()) {
+        // Must have been a bad depth.
+        result_ = ERR(NO_MORE_FRAMES);
+        return;
+      }
+      art::ArtMethod* method = visitor.GetMethod();
+      // Native and 'art' proxy methods don't have registers.
+      if (method->IsNative() || method->IsProxyMethod()) {
+        // TODO It might be useful to fake up support for get at least on proxy frames.
+        result_ = ERR(OPAQUE_FRAME);
+        return;
+      } else if (method->DexInstructionData().RegistersSize() <= slot_) {
+        result_ = ERR(INVALID_SLOT);
+        return;
+      }
+      needs_instrument = !visitor.IsShadowFrame();
+      uint32_t pc = visitor.GetDexPc(/*abort_on_failure=*/false);
+      if (pc == art::dex::kDexNoIndex) {
+        // Cannot figure out current PC.
+        result_ = ERR(OPAQUE_FRAME);
+        return;
+      }
+      std::string descriptor;
+      art::Primitive::Type slot_type = art::Primitive::kPrimVoid;
+      jvmtiError err = GetSlotType(method, pc, &descriptor, &slot_type);
+      if (err != OK) {
+        result_ = err;
+        return;
+      }
 
-    err = GetTypeError(method, slot_type, descriptor);
-    if (err != OK) {
-      result_ = err;
-      return;
+      err = GetTypeError(method, slot_type, descriptor);
+      if (err != OK) {
+        result_ = err;
+        return;
+      }
+      result_ = Execute(method, visitor);
     }
-    result_ = Execute(method, visitor);
     if (needs_instrument) {
-      art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(self);
+      DeoptManager::Get()->DeoptimizeThread(self);
     }
   }
 
@@ -637,9 +643,14 @@
 
   jvmtiError GetResult() override REQUIRES_SHARED(art::Locks::mutator_lock_) {
     if (result_ == OK && type_ == art::Primitive::kPrimNot) {
-      val_->l = obj_val_.IsNull()
-          ? nullptr
-          : art::Thread::Current()->GetJniEnv()->AddLocalReference<jobject>(obj_val_.Read());
+      if (obj_val_ == nullptr) {
+        val_->l = nullptr;
+      } else {
+        art::JNIEnvExt* jni = art::Thread::Current()->GetJniEnv();
+        val_->l = static_cast<JNIEnv*>(jni)->NewLocalRef(obj_val_);
+        jni->DeleteGlobalRef(obj_val_);
+        obj_val_ = nullptr;
+      }
     }
     return CommonLocalVariableClosure::GetResult();
   }
@@ -678,8 +689,10 @@
                              &ptr_val)) {
           return ERR(OPAQUE_FRAME);
         }
-        obj_val_ = art::GcRoot<art::mirror::Object>(
-            reinterpret_cast<art::mirror::Object*>(ptr_val));
+        art::JNIEnvExt* jni = art::Thread::Current()->GetJniEnv();
+        art::ObjPtr<art::mirror::Object> obj(reinterpret_cast<art::mirror::Object*>(ptr_val));
+        ScopedLocalRef<jobject> local(jni, jni->AddLocalReference<jobject>(obj));
+        obj_val_ = jni->NewGlobalRef(local.get());
         break;
       }
       case art::Primitive::kPrimInt:
@@ -716,7 +729,9 @@
  private:
   art::Primitive::Type type_;
   jvalue* val_;
-  art::GcRoot<art::mirror::Object> obj_val_;
+  // A global reference to the return value. We use the global reference to safely transfer the
+  // value between threads.
+  jobject obj_val_;
 };
 
 jvmtiError MethodUtil::GetLocalVariableGeneric(jvmtiEnv* env ATTRIBUTE_UNUSED,
@@ -737,12 +752,9 @@
     art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return err;
   }
-  art::ScopedAssertNoThreadSuspension sants("Performing GetLocalVariable");
   GetLocalVariableClosure c(depth, slot, type, val);
-  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.  We
-  // need to avoid suspending as we wait for the checkpoint to occur since we are (potentially)
-  // transfering a GcRoot across threads.
-  if (!target->RequestSynchronousCheckpoint(&c, art::ThreadState::kRunnable)) {
+  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
+  if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
     return c.GetResult();
diff --git a/openjdkjvmti/ti_stack.cc b/openjdkjvmti/ti_stack.cc
index 6ad4e2a..62204c9 100644
--- a/openjdkjvmti/ti_stack.cc
+++ b/openjdkjvmti/ti_stack.cc
@@ -45,6 +45,7 @@
 #include "base/bit_utils.h"
 #include "base/enums.h"
 #include "base/mutex.h"
+#include "deopt_manager.h"
 #include "dex/code_item_accessors-inl.h"
 #include "dex/dex_file.h"
 #include "dex/dex_file_annotations.h"
@@ -1020,18 +1021,22 @@
   // NB This does a SuspendCheck (during thread state change) so we need to make
   // sure we don't have the 'suspend_lock' locked here.
   art::ScopedObjectAccess soa(self);
-  art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+  art::Locks::thread_list_lock_->ExclusiveLock(self);
   jvmtiError err = ERR(INTERNAL);
   if (!ThreadUtil::GetAliveNativeThread(thread, soa, &target, &err)) {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return err;
   }
   if (target != self) {
     // TODO This is part of the spec but we could easily avoid needing to do it.
     // We would just put all the logic into a sync-checkpoint.
-    art::MutexLock tscl_mu(self, *art::Locks::thread_suspend_count_lock_);
+    art::Locks::thread_suspend_count_lock_->ExclusiveLock(self);
     if (target->GetUserCodeSuspendCount() == 0) {
+      art::Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
+      art::Locks::thread_list_lock_->ExclusiveUnlock(self);
       return ERR(THREAD_NOT_SUSPENDED);
     }
+    art::Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
   }
   // We hold the user_code_suspension_lock_ so the target thread is staying
   // suspended until we are done (unless it's 'self' in which case we don't care
@@ -1043,10 +1048,12 @@
   FindFrameAtDepthVisitor visitor(target, context.get(), depth);
   visitor.WalkStack();
   if (!visitor.FoundFrame()) {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return ERR(NO_MORE_FRAMES);
   }
   art::ArtMethod* method = visitor.GetMethod();
   if (method->IsNative()) {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return ERR(OPAQUE_FRAME);
   }
   // From here we are sure to succeed.
@@ -1068,8 +1075,12 @@
   }
   // Make sure can we will go to the interpreter and use the shadow frames.
   if (needs_instrument) {
-    art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(
-        target);
+    art::FunctionClosure fc([](art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      DeoptManager::Get()->DeoptimizeThread(self);
+    });
+    target->RequestSynchronousCheckpoint(&fc);
+  } else {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
   }
   return OK;
 }
@@ -1083,17 +1094,21 @@
   // NB This does a SuspendCheck (during thread state change) so we need to make
   // sure we don't have the 'suspend_lock' locked here.
   art::ScopedObjectAccess soa(self);
-  art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+  art::Locks::thread_list_lock_->ExclusiveLock(self);
   jvmtiError err = ERR(INTERNAL);
   if (!ThreadUtil::GetAliveNativeThread(thread, soa, &target, &err)) {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return err;
   }
   {
-    art::MutexLock tscl_mu(self, *art::Locks::thread_suspend_count_lock_);
+    art::Locks::thread_suspend_count_lock_->ExclusiveLock(self);
     if (target == self || target->GetUserCodeSuspendCount() == 0) {
       // We cannot be the current thread for this function.
+      art::Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
+      art::Locks::thread_list_lock_->ExclusiveUnlock(self);
       return ERR(THREAD_NOT_SUSPENDED);
     }
+    art::Locks::thread_suspend_count_lock_->ExclusiveUnlock(self);
   }
   JvmtiGlobalTLSData* tls_data = ThreadUtil::GetGlobalTLSData(target);
   constexpr art::StackVisitor::StackWalkKind kWalkKind =
@@ -1108,6 +1123,7 @@
         << "Frame at depth " << tls_data->disable_pop_frame_depth << " was "
         << "marked as un-poppable by the jvmti plugin. See b/117615146 for "
         << "more information.";
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return ERR(OPAQUE_FRAME);
   }
   // We hold the user_code_suspension_lock_ so the target thread is staying
@@ -1120,12 +1136,14 @@
 
   if (!final_frame.FoundFrame() || !penultimate_frame.FoundFrame()) {
     // Cannot do it if there is only one frame!
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return ERR(NO_MORE_FRAMES);
   }
 
   art::ArtMethod* called_method = final_frame.GetMethod();
   art::ArtMethod* calling_method = penultimate_frame.GetMethod();
   if (calling_method->IsNative() || called_method->IsNative()) {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return ERR(OPAQUE_FRAME);
   }
   // From here we are sure to succeed.
@@ -1149,7 +1167,12 @@
   // early return for the final frame will force everything to the interpreter
   // so we only need to instrument if it was not present.
   if (created_final_frame) {
-    DeoptManager::Get()->DeoptimizeThread(target);
+    art::FunctionClosure fc([](art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      DeoptManager::Get()->DeoptimizeThread(self);
+    });
+    target->RequestSynchronousCheckpoint(&fc);
+  } else {
+    art::Locks::thread_list_lock_->ExclusiveUnlock(self);
   }
   return OK;
 }
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 15fdfb3..6c50a20 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -37,6 +37,7 @@
 #include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "base/mutex.h"
+#include "deopt_manager.h"
 #include "events-inl.h"
 #include "gc/system_weak.h"
 #include "gc/collector_type.h"
@@ -1101,7 +1102,7 @@
 
     void Run(art::Thread* me) override REQUIRES_SHARED(art::Locks::mutator_lock_) {
       // Make sure the thread is prepared to notice the exception.
-      art::Runtime::Current()->GetInstrumentation()->InstrumentThreadStack(me);
+      DeoptManager::Get()->DeoptimizeThread(me);
       me->SetAsyncException(exception_.Get());
       // Wake up the thread if it is sleeping.
       me->Notify();
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index 0a2a50c..b04d4da 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -35,6 +35,17 @@
   virtual void Run(Thread* self) = 0;
 };
 
+class FunctionClosure : public Closure {
+ public:
+  explicit FunctionClosure(std::function<void(Thread*)>&& f) : func_(std::move(f)) {}
+  void Run(Thread* self) override {
+    func_(self);
+  }
+
+ private:
+  std::function<void(Thread*)> func_;
+};
+
 class Task : public Closure {
  public:
   // Called after Closure::Run has been called.