Dump java backtrace when contending lock too long time.

It’s easy to cause a problem when execute time-consuming work inside
a synchronized method/block. Previously, we use "dvm_lock_sample" in
event log to help us look into.

But it's always after the event, "dvm_lock_sample" is printed after the
owner thread exited the critical section, we don't know more details
that which statement get stuck or more time-consuming.

This implementation provides a lot more information about what the
contending thread is doing. when a thread contend for a long time,
we will print backtrack which thread holding the lock.
Maybe many threads waiting this lock, those threads will request
printing backtrack many times, which is helpful for finding and
locating problems.

This mechanism is only suitable for very long wait times, not lots of
short wait times.
This feature is heavyweight, it's not enabled by default. we can
turn on by "-XX:MonitorTimeoutEnable" in AndroidRuntime::startVm.

Bug:140968481
Test: Manually check contention log messages.

Signed-off-by: wangguibo <wangguibo@xiaomi.com>
Change-Id: I29784d11355ae3c581723254848a87cd80e71456
Merged-In: I29784d11355ae3c581723254848a87cd80e71456
(cherry picked from commit 0d290721a8d63d5e0d38a64d3ecf922a91becb10)
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 0b8c781..6574ec0 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -31,11 +31,18 @@
 #include "mutex-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread.h"
+#include "thread_list.h"
 
 namespace art {
 
 using android::base::StringPrintf;
 
+static constexpr uint64_t kIntervalMillis = 50;
+static constexpr int kMonitorTimeoutTryMax = 5;
+
+static const char* kLastDumpStackTime = "LastDumpStackTime";
+
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
   Atomic<const BaseMutex*> all_mutexes_guard;
@@ -45,6 +52,13 @@
 };
 static struct AllMutexData gAllMutexData[kAllMutexDataSize];
 
+struct DumpStackLastTimeTLSData : public art::TLSData {
+  explicit DumpStackLastTimeTLSData(uint64_t last_dump_time_ms) {
+    last_dump_time_ms_ = last_dump_time_ms;
+  }
+  uint64_t last_dump_time_ms_;
+};
+
 #if ART_USE_FUTEXES
 static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, const timespec& rhs) {
   const int32_t one_sec = 1000 * 1000 * 1000;  // one second in nanoseconds.
@@ -443,15 +457,28 @@
           if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
             self->CheckEmptyCheckpointFromMutex();
           }
+
+          uint64_t wait_start_ms = enable_monitor_timeout_ ? MilliTime() : 0;
+          uint64_t try_times = 0;
           do {
+            timespec timeout_ts;
+            timeout_ts.tv_sec = 0;
+            timeout_ts.tv_nsec = Runtime::Current()->GetMonitorTimeoutNs();
             if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state,
-                      nullptr, nullptr, 0) != 0) {
+                      enable_monitor_timeout_ ? &timeout_ts : nullptr , nullptr, 0) != 0) {
               // We only went to sleep after incrementing and contenders and checking that the
               // lock is still held by someone else.  EAGAIN and EINTR both indicate a spurious
               // failure, try again from the beginning.  We don't use TEMP_FAILURE_RETRY so we can
               // intentionally retry to acquire the lock.
               if ((errno != EAGAIN) && (errno != EINTR)) {
-                PLOG(FATAL) << "futex wait failed for " << name_;
+                if (errno == ETIMEDOUT) {
+                  try_times++;
+                  if (try_times <= kMonitorTimeoutTryMax) {
+                    DumpStack(self, wait_start_ms, try_times);
+                  }
+                } else {
+                  PLOG(FATAL) << "futex wait failed for " << name_;
+                }
               }
             }
             SleepIfRuntimeDeleted(self);
@@ -481,6 +508,57 @@
   }
 }
 
+void Mutex::DumpStack(Thread* self, uint64_t wait_start_ms, uint64_t try_times) {
+  ScopedObjectAccess soa(self);
+  Locks::thread_list_lock_->ExclusiveLock(self);
+  std::string owner_stack_dump;
+  pid_t owner_tid = GetExclusiveOwnerTid();
+  Thread *owner = Runtime::Current()->GetThreadList()->FindThreadByTid(owner_tid);
+  if (owner != nullptr) {
+    if (IsDumpFrequent(owner, try_times)) {
+      Locks::thread_list_lock_->ExclusiveUnlock(self);
+      LOG(WARNING) << "Contention with tid " << owner_tid << ", monitor id " << monitor_id_;
+      return;
+    }
+    struct CollectStackTrace : public Closure {
+      void Run(art::Thread* thread) override
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+        if (IsDumpFrequent(thread)) {
+          return;
+        }
+        thread->SetCustomTLS(kLastDumpStackTime, new DumpStackLastTimeTLSData(MilliTime()));
+        thread->DumpJavaStack(oss);
+      }
+      std::ostringstream oss;
+    };
+    CollectStackTrace owner_trace;
+    owner->RequestSynchronousCheckpoint(&owner_trace);
+    owner_stack_dump = owner_trace.oss.str();
+    uint64_t wait_ms = MilliTime() - wait_start_ms;
+    LOG(WARNING) << "Monitor contention with tid " << owner_tid << ", wait time: " << wait_ms
+                 << "ms, monitor id: " << monitor_id_
+                 << "\nPerfMonitor owner thread(" << owner_tid << ") stack is:\n"
+                 << owner_stack_dump;
+  } else {
+    Locks::thread_list_lock_->ExclusiveUnlock(self);
+  }
+}
+
+bool Mutex::IsDumpFrequent(Thread* thread, uint64_t try_times) {
+  uint64_t last_dump_time_ms = 0;
+  DumpStackLastTimeTLSData* tls_data =
+      reinterpret_cast<DumpStackLastTimeTLSData*>(thread->GetCustomTLS(kLastDumpStackTime));
+  if (tls_data != nullptr) {
+     last_dump_time_ms = tls_data->last_dump_time_ms_;
+  }
+  uint64_t interval = MilliTime() - last_dump_time_ms;
+  if (interval < kIntervalMillis * try_times) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
 bool Mutex::ExclusiveTryLock(Thread* self) {
   DCHECK(self == nullptr || self == Thread::Current());
   if (kDebugLocking && !recursive_) {
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index e4a7e1c..8f2a8ea 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -221,6 +221,18 @@
 
   void Dump(std::ostream& os) const override;
 
+  void DumpStack(Thread *self, uint64_t wait_start_ms, uint64_t try_times = 1);
+
+  static bool IsDumpFrequent(Thread *self, uint64_t try_times = 1);
+
+  void setEnableMonitorTimeout() {
+    enable_monitor_timeout_ = true;
+  }
+
+  void setMonitorId(uint32_t monitorId) {
+    monitor_id_ = monitorId;
+  }
+
   // For negative capabilities in clang annotations.
   const Mutex& operator!() const { return *this; }
 
@@ -275,6 +287,10 @@
   unsigned int recursion_count_;
   const bool recursive_;  // Can the lock be recursively held?
 
+  bool enable_monitor_timeout_ = false;
+
+  uint32_t monitor_id_;
+
   friend class ConditionVariable;
   DISALLOW_COPY_AND_ASSIGN(Mutex);
 };
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 295e76c..2f59022 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -42,6 +42,7 @@
 #include "thread_list.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
+#include <android-base/properties.h>
 
 static_assert(ART_USE_FUTEXES);
 
@@ -116,6 +117,11 @@
   // with the owner unlocking the thin-lock.
   CHECK(owner == nullptr || owner == self || owner->IsSuspended());
   // The identity hash code is set for the life time of the monitor.
+
+  bool monitor_timeout_enabled = Runtime::Current()->IsMonitorTimeoutEnabled();
+  if (monitor_timeout_enabled) {
+    MaybeEnableTimeout();
+  }
 }
 
 Monitor::Monitor(Thread* self,
@@ -144,6 +150,11 @@
   // with the owner unlocking the thin-lock.
   CHECK(owner == nullptr || owner == self || owner->IsSuspended());
   // The identity hash code is set for the life time of the monitor.
+
+  bool monitor_timeout_enabled = Runtime::Current()->IsMonitorTimeoutEnabled();
+  if (monitor_timeout_enabled) {
+    MaybeEnableTimeout();
+  }
 }
 
 int32_t Monitor::GetHashCode() {
@@ -1711,4 +1722,13 @@
   }
 }
 
+void Monitor::MaybeEnableTimeout() {
+  std::string current_package = Runtime::Current()->GetProcessPackageName();
+  bool enabled_for_app = android::base::GetBoolProperty("debug.art.monitor.app", false);
+  if (current_package == "android" || enabled_for_app) {
+    monitor_lock_.setEnableMonitorTimeout();
+    monitor_lock_.setMonitorId(monitor_id_);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/monitor.h b/runtime/monitor.h
index c0a0a4f..99e071e 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -61,6 +61,12 @@
   // a lock word. See Runtime::max_spins_before_thin_lock_inflation_.
   constexpr static size_t kDefaultMaxSpinsBeforeThinLockInflation = 50;
 
+  static constexpr int kDefaultMonitorTimeoutMs = 500;
+
+  static constexpr int kMonitorTimeoutMinMs = 200;
+
+  static constexpr int kMonitorTimeoutMaxMs = 1000;  // 1 second
+
   ~Monitor();
 
   static void Init(uint32_t lock_profiling_threshold, uint32_t stack_dump_lock_profiling_threshold);
@@ -413,6 +419,8 @@
   void CheckLockOwnerRequest(Thread* self)
       REQUIRES(monitor_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void MaybeEnableTimeout() REQUIRES(Locks::mutator_lock_);
+
   // The denser encoded version of this monitor as stored in the lock word.
   MonitorId monitor_id_;
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 1cb8e9f..1bd905d 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -383,6 +383,13 @@
       .Define("-XX:ThreadSuspendTimeout=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::ThreadSuspendTimeout)
+      .Define("-XX:MonitorTimeoutEnable=_")
+          .WithType<bool>()
+          .WithValueMap({{"false", false}, {"true", true}})
+          .IntoKey(M::MonitorTimeoutEnable)
+      .Define("-XX:MonitorTimeout=_")  // in ms
+          .WithType<int>()
+          .IntoKey(M::MonitorTimeout)
       .Define("-XX:GlobalRefAllocStackTraceLimit=_")  // Number of free slots to enable tracing.
           .WithType<unsigned int>()
           .IntoKey(M::GlobalRefAllocStackTraceLimit)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 572e071..5343150 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -284,6 +284,8 @@
       async_exceptions_thrown_(false),
       non_standard_exits_enabled_(false),
       is_java_debuggable_(false),
+      monitor_timeout_enable_(false),
+      monitor_timeout_ns_(0),
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
       oat_file_manager_(nullptr),
@@ -1047,13 +1049,15 @@
 
   DCHECK(!IsZygote());
 
-  if (is_system_server && profile_system_server) {
+  if (is_system_server) {
     // Set the system server package name to "android".
     // This is used to tell the difference between samples provided by system server
     // and samples generated by other apps when processing boot image profiles.
     SetProcessPackageName("android");
-    jit_options_->SetWaitForJitNotificationsToSaveProfile(false);
-    VLOG(profiler) << "Enabling system server profiles";
+    if (profile_system_server) {
+      jit_options_->SetWaitForJitNotificationsToSaveProfile(false);
+      VLOG(profiler) << "Enabling system server profiles";
+    }
   }
 
   // Create the thread pools.
@@ -1412,6 +1416,14 @@
   thread_list_ = new ThreadList(runtime_options.GetOrDefault(Opt::ThreadSuspendTimeout));
   intern_table_ = new InternTable;
 
+  monitor_timeout_enable_ = runtime_options.GetOrDefault(Opt::MonitorTimeoutEnable);
+  int monitor_timeout_ms = runtime_options.GetOrDefault(Opt::MonitorTimeout);
+  if (monitor_timeout_ms < Monitor::kMonitorTimeoutMinMs ||
+      monitor_timeout_ms >= Monitor::kMonitorTimeoutMaxMs) {
+    LOG(ERROR) << "Improper monitor timeout could cause crash!";
+  }
+  monitor_timeout_ns_ = MsToNs(monitor_timeout_ms);
+
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
 
   target_sdk_version_ = runtime_options.GetOrDefault(Opt::TargetSdkVersion);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6f11916..88f7bc0 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -981,6 +981,13 @@
     return perfetto_javaheapprof_enabled_;
   }
 
+  bool IsMonitorTimeoutEnabled() const {
+    return monitor_timeout_enable_;
+  }
+
+  uint64_t GetMonitorTimeoutNs() const {
+    return monitor_timeout_ns_;
+  }
   // Return true if we should load oat files as executable or not.
   bool GetOatFilesExecutable() const;
 
@@ -1239,6 +1246,9 @@
   // Whether Java code needs to be debuggable.
   bool is_java_debuggable_;
 
+  bool monitor_timeout_enable_;
+  uint64_t monitor_timeout_ns_;
+
   // Whether or not this application can be profiled by the shell user,
   // even when running on a device that is running in user mode.
   bool is_profileable_from_shell_ = false;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index ef5bed7..3f0e3cc 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -66,6 +66,8 @@
                                           LongGCLogThreshold,             gc::Heap::kDefaultLongGCLogThreshold)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           ThreadSuspendTimeout,           ThreadList::kDefaultThreadSuspendTimeout)
+RUNTIME_OPTIONS_KEY (bool,                MonitorTimeoutEnable,           false)
+RUNTIME_OPTIONS_KEY (int,                 MonitorTimeout,                 Monitor::kDefaultMonitorTimeoutMs)
 RUNTIME_OPTIONS_KEY (Unit,                DumpGCPerformanceOnShutdown)
 RUNTIME_OPTIONS_KEY (Unit,                DumpRegionInfoBeforeGC)
 RUNTIME_OPTIONS_KEY (Unit,                DumpRegionInfoAfterGC)
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index fb4f6d3..e0d62d0 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -118,15 +118,6 @@
   return find(list_.begin(), list_.end(), thread) != list_.end();
 }
 
-bool ThreadList::Contains(pid_t tid) {
-  for (const auto& thread : list_) {
-    if (thread->GetTid() == tid) {
-      return true;
-    }
-  }
-  return false;
-}
-
 pid_t ThreadList::GetLockOwner() {
   return Locks::thread_list_lock_->GetExclusiveOwnerTid();
 }
@@ -179,12 +170,12 @@
     char* end;
     pid_t tid = strtol(e->d_name, &end, 10);
     if (!*end) {
-      bool contains;
+      Thread* thread;
       {
         MutexLock mu(self, *Locks::thread_list_lock_);
-        contains = Contains(tid);
+        thread = FindThreadByTid(tid);
       }
-      if (!contains) {
+      if (thread != nullptr) {
         DumpUnattachedThread(os, tid, dump_native_stack);
       }
     }
@@ -1110,6 +1101,15 @@
   return nullptr;
 }
 
+Thread* ThreadList::FindThreadByTid(int tid) {
+  for (const auto& thread : list_) {
+    if (thread->GetTid() == tid) {
+      return thread;
+    }
+  }
+  return nullptr;
+}
+
 void ThreadList::WaitForOtherNonDaemonThreadsToExit(bool check_no_birth) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   Thread* self = Thread::Current();
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 1dcdf62..87a4c8d 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -104,6 +104,9 @@
   // Find an existing thread (or self) by its thread id (not tid).
   Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_);
 
+  // Find an existing thread (or self) by its tid (not thread id).
+  Thread* FindThreadByTid(int tid) REQUIRES(Locks::thread_list_lock_);
+
   // Does the thread list still contain the given thread, or one at the same address?
   // Used by Monitor to provide (mostly accurate) debugging information.
   bool Contains(Thread* thread) REQUIRES(Locks::thread_list_lock_);
@@ -191,7 +194,6 @@
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
 
-  bool Contains(pid_t tid) REQUIRES(Locks::thread_list_lock_);
   size_t RunCheckpoint(Closure* checkpoint_function, bool includeSuspended)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);