Revert^2 "Perform SetEvent under the user_code_suspend_count_lock_" If we needed to condition-wait for a gc to finish we would trigger an overly restrictive deadlock check. This passed testing because normally the Wait will not be executed. To prevent this type of issue from occuring in the future we added a way for code to assert that condition-waiting is allowed without actually going to sleep. This reverts commit eaa4831142fa58f176ddad52c0d5e9c288e26b81. Reason for revert: Relaxed too-strict check in CheckSafeToWait. Test: ./test.py --host Bug: 130150240 Change-Id: I8359e595cdd73f49cb68c8c70d755cab0e563ac7

commit: 66834468e9106e264a74766c17a0c047a3448921 [log] [tgz]
author: Alex Light <allight@google.com> Mon Apr 08 16:28:29 2019 +0000
committer: Alex Light <allight@google.com> Wed Apr 10 22:42:02 2019 +0000
tree: be827201de65b3dc888182c949ac580e084a0fb4
parent: 11d5a2507adf5a6d06adca5f18c399b826883182 [diff]
diff --git a/openjdkjvmti/events.cc b/openjdkjvmti/events.cc
index 90a550a..16abf41 100644
--- a/openjdkjvmti/events.cc
+++ b/openjdkjvmti/events.cc

@@ -38,6 +38,7 @@
 #include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "art_method-inl.h"
+#include "base/mutex.h"
 #include "deopt_manager.h"
 #include "dex/dex_file_types.h"
 #include "gc/allocation_listener.h"
@@ -1268,57 +1269,53 @@
     return ERR(MUST_POSSESS_CAPABILITY);
   }
 
-  art::Thread* art_thread = nullptr;
-  if (thread != nullptr) {
-    if (!IsThreadControllable(event)) {
-      return ERR(ILLEGAL_ARGUMENT);
-    }
-    art::ScopedObjectAccess soa(art::Thread::Current());
-    art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
-    jvmtiError err = ERR(INTERNAL);
-    if (!ThreadUtil::GetAliveNativeThread(thread, soa, &art_thread, &err)) {
-      return err;
-    } else if (art_thread->IsStillStarting()) {
-      return ERR(THREAD_NOT_ALIVE);
-    }
-    art::ThreadState state = art_thread->GetState();
-    if (state == art::ThreadState::kStarting || state == art::ThreadState::kTerminated) {
-      return ERR(THREAD_NOT_ALIVE);
-    }
+  if (thread != nullptr && !IsThreadControllable(event)) {
+    return ERR(ILLEGAL_ARGUMENT);
   }
 
-  // TODO We use art_thread simply as a global unique identifier here. It is not safe to actually
-  // use it without holding the thread_list_lock_.
-
+  art::Thread* self = art::Thread::Current();
+  art::Thread* target = nullptr;
+  ScopedNoUserCodeSuspension snucs(self);
   bool old_state;
   bool new_state;
-
   {
-    // Change the event masks atomically.
-    art::Thread* self = art::Thread::Current();
-    art::WriterMutexLock mu(self, envs_lock_);
-    art::WriterMutexLock mu_env_info(self, env->event_info_mutex_);
+    // From now on we know we cannot get suspended by user-code.
+    // NB This does a SuspendCheck (during thread state change) so we need to
+    // make sure we don't have the 'suspend_lock' locked here.
+    art::ScopedObjectAccess soa(self);
+    art::WriterMutexLock el_mu(self, envs_lock_);
+    art::MutexLock tll_mu(self, *art::Locks::thread_list_lock_);
+    jvmtiError err = ERR(INTERNAL);
+    if (thread != nullptr) {
+      if (!ThreadUtil::GetAliveNativeThread(thread, soa, &target, &err)) {
+        return err;
+      } else if (target->IsStillStarting() ||
+                target->GetState() == art::ThreadState::kStarting) {
+        target->Dump(LOG_STREAM(WARNING) << "Is not alive: ");
+        return ERR(THREAD_NOT_ALIVE);
+      }
+    }
+
+    art::WriterMutexLock ei_mu(self, env->event_info_mutex_);
     old_state = global_mask.Test(event);
     if (mode == JVMTI_ENABLE) {
-      env->event_masks.EnableEvent(env, art_thread, event);
+      env->event_masks.EnableEvent(env, target, event);
       global_mask.Set(event);
       new_state = true;
     } else {
       DCHECK_EQ(mode, JVMTI_DISABLE);
 
-      // TODO Replace art_thread with a uintptr_t or something to indicate we cannot read from it.
-      env->event_masks.DisableEvent(env, art_thread, event);
+      env->event_masks.DisableEvent(env, target, event);
       RecalculateGlobalEventMaskLocked(event);
       new_state = global_mask.Test(event);
     }
   }
-
-  // Handle any special work required for the event type.
+  // Handle any special work required for the event type. We still have the
+  // user_code_suspend_count_lock_ so there won't be any interleaving here.
   if (new_state != old_state) {
     return HandleEventType(event, thread, mode == JVMTI_ENABLE);
   }
-
-  return ERR(NONE);
+  return OK;
 }
 
 void EventHandler::HandleBreakpointEventsChanged(bool added) {
@@ -1341,7 +1338,7 @@
 }
 
 EventHandler::EventHandler()
-  : envs_lock_("JVMTI Environment List Lock", art::LockLevel::kTopLockLevel),
+  : envs_lock_("JVMTI Environment List Lock", art::LockLevel::kPostMutatorTopLockLevel),
     frame_pop_enabled(false) {
   alloc_listener_.reset(new JvmtiAllocationListener(this));
   ddm_listener_.reset(new JvmtiDdmChunkListener(this));

diff --git a/openjdkjvmti/events.h b/openjdkjvmti/events.h
index e48772c..2c3c7a0 100644
--- a/openjdkjvmti/events.h
+++ b/openjdkjvmti/events.h

@@ -21,6 +21,7 @@
 #include <vector>
 
 #include <android-base/logging.h>
+#include <android-base/thread_annotations.h>
 
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -323,9 +324,9 @@
   // need to be able to remove arbitrary elements from it.
   std::list<ArtJvmTiEnv*> envs GUARDED_BY(envs_lock_);
 
-  // Top level lock. Nothing at all should be held when we lock this.
-  mutable art::ReaderWriterMutex envs_lock_
-      ACQUIRED_BEFORE(art::Locks::instrument_entrypoints_lock_);
+  // Close to top level lock. Nothing should be held when we lock this (except for mutator_lock_
+  // which is needed when setting new events).
+  mutable art::ReaderWriterMutex envs_lock_ ACQUIRED_AFTER(art::Locks::mutator_lock_);
 
   // A union of all enabled events, anywhere.
   EventMask global_mask;

diff --git a/runtime/base/locks.cc b/runtime/base/locks.cc
index a7922a2..4349be0 100644
--- a/runtime/base/locks.cc
+++ b/runtime/base/locks.cc

@@ -158,9 +158,9 @@
     DCHECK(runtime_thread_pool_lock_ != nullptr);
   } else {
     // Create global locks in level order from highest lock level to lowest.
-    LockLevel current_lock_level = kInstrumentEntrypointsLock;
-    DCHECK(instrument_entrypoints_lock_ == nullptr);
-    instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
+    LockLevel current_lock_level = kUserCodeSuspensionLock;
+    DCHECK(user_code_suspension_lock_ == nullptr);
+    user_code_suspension_lock_ = new Mutex("user code suspension lock", current_lock_level);
 
     #define UPDATE_CURRENT_LOCK_LEVEL(new_level) \
       if ((new_level) >= current_lock_level) { \
@@ -171,9 +171,9 @@
       } \
       current_lock_level = new_level;
 
-    UPDATE_CURRENT_LOCK_LEVEL(kUserCodeSuspensionLock);
-    DCHECK(user_code_suspension_lock_ == nullptr);
-    user_code_suspension_lock_ = new Mutex("user code suspension lock", current_lock_level);
+    UPDATE_CURRENT_LOCK_LEVEL(kInstrumentEntrypointsLock);
+    DCHECK(instrument_entrypoints_lock_ == nullptr);
+    instrument_entrypoints_lock_ = new Mutex("instrument entrypoint lock", current_lock_level);
 
     UPDATE_CURRENT_LOCK_LEVEL(kMutatorLock);
     DCHECK(mutator_lock_ == nullptr);

diff --git a/runtime/base/locks.h b/runtime/base/locks.h
index b7d8e31..b15fd32 100644
--- a/runtime/base/locks.h
+++ b/runtime/base/locks.h

@@ -121,9 +121,14 @@
   kRuntimeShutdownLock,
   kTraceLock,
   kHeapBitmapLock,
+
+  // This is a generic lock level for a top-level lock meant to be gained after having the
+  // mutator_lock_.
+  kPostMutatorTopLockLevel,
+
   kMutatorLock,
-  kUserCodeSuspensionLock,
   kInstrumentEntrypointsLock,
+  kUserCodeSuspensionLock,
   kZygoteCreationLock,
 
   // The highest valid lock level. Use this if there is code that should only be called with no
@@ -174,13 +179,13 @@
   // Check if the given mutex is in expected_mutexes_on_weak_ref_access_.
   static bool IsExpectedOnWeakRefAccess(BaseMutex* mutex);
 
-  // Guards allocation entrypoint instrumenting.
-  static Mutex* instrument_entrypoints_lock_;
-
   // Guards code that deals with user-code suspension. This mutex must be held when suspending or
   // resuming threads with SuspendReason::kForUserCode. It may be held by a suspended thread, but
   // only if the suspension is not due to SuspendReason::kForUserCode.
-  static Mutex* user_code_suspension_lock_ ACQUIRED_AFTER(instrument_entrypoints_lock_);
+  static Mutex* user_code_suspension_lock_;
+
+  // Guards allocation entrypoint instrumenting.
+  static Mutex* instrument_entrypoints_lock_ ACQUIRED_AFTER(user_code_suspension_lock_);
 
   // A barrier is used to synchronize the GC/Debugger thread with mutator threads. When GC/Debugger
   // thread wants to suspend all mutator threads, it needs to wait for all mutator threads to pass
@@ -217,7 +222,7 @@
   //    state is changed                           |  .. running ..
   //  - if the CAS operation fails then goto x     |  .. running ..
   //  .. running ..                                |  .. running ..
-  static MutatorMutex* mutator_lock_ ACQUIRED_AFTER(user_code_suspension_lock_);
+  static MutatorMutex* mutator_lock_ ACQUIRED_AFTER(instrument_entrypoints_lock_);
 
   // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
   static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);

diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 7aec661..3b35f5b 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc

@@ -190,13 +190,14 @@
     for (int i = kLockLevelCount - 1; i >= 0; --i) {
       if (i != level_) {
         BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
-        // We allow the thread to wait even if the user_code_suspension_lock_ is held so long as we
-        // are some thread's resume_cond_ (level_ == kThreadSuspendCountLock). This just means that
-        // gc or some other internal process is suspending the thread while it is trying to suspend
-        // some other thread. So long as the current thread is not being suspended by a
-        // SuspendReason::kForUserCode (which needs the user_code_suspension_lock_ to clear) this is
-        // fine.
-        if (held_mutex == Locks::user_code_suspension_lock_ && level_ == kThreadSuspendCountLock) {
+        // We allow the thread to wait even if the user_code_suspension_lock_ is held so long. This
+        // just means that gc or some other internal process is suspending the thread while it is
+        // trying to suspend some other thread. So long as the current thread is not being suspended
+        // by a SuspendReason::kForUserCode (which needs the user_code_suspension_lock_ to clear)
+        // this is fine. This is needed due to user_code_suspension_lock_ being the way untrusted
+        // code interacts with suspension. One holds the lock to prevent user-code-suspension from
+        // occurring. Since this is only initiated from user-supplied native-code this is safe.
+        if (held_mutex == Locks::user_code_suspension_lock_) {
           // No thread safety analysis is fine since we have both the user_code_suspension_lock_
           // from the line above and the ThreadSuspendCountLock since it is our level_. We use this
           // lambda to avoid having to annotate the whole function as NO_THREAD_SAFETY_ANALYSIS.

diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 39fd8c8..b966fd4 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h

@@ -401,6 +401,12 @@
   // TODO: remove this.
   void WaitHoldingLocks(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
 
+  void CheckSafeToWait(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+    if (kDebugLocking) {
+      guard_.CheckSafeToWait(self);
+    }
+  }
+
  private:
   const char* const name_;
   // The Mutex being used by waiters. It is an error to mix condition variables between different

diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 09c83a0..3066c7a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc

@@ -35,6 +35,7 @@
 #include "base/histogram-inl.h"
 #include "base/logging.h"  // For VLOG.
 #include "base/memory_tool.h"
+#include "base/mutex.h"
 #include "base/os.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -841,6 +842,7 @@
   }
   ScopedThreadStateChange tsc(self, kWaitingForGcThreadFlip);
   MutexLock mu(self, *thread_flip_lock_);
+  thread_flip_cond_->CheckSafeToWait(self);
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
   if (thread_flip_running_) {
@@ -886,6 +888,7 @@
   CHECK(kUseReadBarrier);
   ScopedThreadStateChange tsc(self, kWaitingForGcThreadFlip);
   MutexLock mu(self, *thread_flip_lock_);
+  thread_flip_cond_->CheckSafeToWait(self);
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
   CHECK(!thread_flip_running_);
@@ -3278,6 +3281,7 @@
 }
 
 collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) {
+  gc_complete_cond_->CheckSafeToWait(self);
   collector::GcType last_gc_type = collector::kGcTypeNone;
   GcCause last_gc_cause = kGcCauseNone;
   uint64_t wait_start = NanoTime();
commit	66834468e9106e264a74766c17a0c047a3448921	[log] [tgz]
author	Alex Light <allight@google.com>	Mon Apr 08 16:28:29 2019 +0000
committer	Alex Light <allight@google.com>	Wed Apr 10 22:42:02 2019 +0000
tree	be827201de65b3dc888182c949ac580e084a0fb4
parent	11d5a2507adf5a6d06adca5f18c399b826883182 [diff]