Add systrace output to runtime shutdown

Help debug why am command shutdown is slow.

Findings:
~200ms spent in deleting thread list on N5.

Bug: 26351700
Change-Id: Icb59390a5267ae8bade4ced12252743c48206bb1
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 01f3bbd..a96a24e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -214,6 +214,7 @@
 }
 
 Runtime::~Runtime() {
+  ATRACE_BEGIN("Runtime shutdown");
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
@@ -228,45 +229,55 @@
   Thread* self = Thread::Current();
   const bool attach_shutdown_thread = self == nullptr;
   if (attach_shutdown_thread) {
+    ATRACE_BEGIN("Attach shutdown thread");
     CHECK(AttachCurrentThread("Shutdown thread", false, nullptr, false));
+    ATRACE_END();
     self = Thread::Current();
   } else {
     LOG(WARNING) << "Current thread not detached in Runtime shutdown";
   }
 
   {
+    ATRACE_BEGIN("Wait for shutdown cond");
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     shutting_down_started_ = true;
     while (threads_being_born_ > 0) {
       shutdown_cond_->Wait(self);
     }
     shutting_down_ = true;
+    ATRACE_END();
   }
   // Shutdown and wait for the daemons.
   CHECK(self != nullptr);
   if (IsFinishedStarting()) {
+    ATRACE_BEGIN("Waiting for Daemons");
     self->ClearException();
     self->GetJniEnv()->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
                                             WellKnownClasses::java_lang_Daemons_stop);
+    ATRACE_END();
   }
 
   Trace::Shutdown();
 
   if (attach_shutdown_thread) {
+    ATRACE_BEGIN("Detach shutdown thread");
     DetachCurrentThread();
+    ATRACE_END();
     self = nullptr;
   }
 
   // Make sure to let the GC complete if it is running.
   heap_->WaitForGcToComplete(gc::kGcCauseBackground, self);
   heap_->DeleteThreadPool();
-  if (jit_.get() != nullptr) {
+  if (jit_ != nullptr) {
+    ATRACE_BEGIN("Delete jit");
     VLOG(jit) << "Deleting jit thread pool";
     // Delete thread pool before the thread list since we don't want to wait forever on the
     // JIT compiler threads.
     jit_->DeleteThreadPool();
     // Similarly, stop the profile saver thread before deleting the thread list.
     jit_->StopProfileSaver();
+    ATRACE_END();
   }
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
@@ -274,11 +285,13 @@
   delete signal_catcher_;
 
   // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
+  ATRACE_BEGIN("Delete thread list");
   delete thread_list_;
+  ATRACE_END();
 
   // Delete the JIT after thread list to ensure that there is no remaining threads which could be
   // accessing the instrumentation when we delete it.
-  if (jit_.get() != nullptr) {
+  if (jit_ != nullptr) {
     VLOG(jit) << "Deleting jit";
     jit_.reset(nullptr);
   }
@@ -286,6 +299,7 @@
   // Shutdown the fault manager if it was initialized.
   fault_manager.Shutdown();
 
+  ATRACE_BEGIN("Delete state");
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
@@ -302,10 +316,12 @@
   low_4gb_arena_pool_.reset();
   arena_pool_.reset();
   MemMap::Shutdown();
+  ATRACE_END();
 
   // TODO: acquire a static mutex on Runtime to avoid racing.
   CHECK(instance_ == nullptr || instance_ == this);
   instance_ = nullptr;
+  ATRACE_END();
 }
 
 struct AbortState {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index a390908..77f780f 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -69,6 +69,7 @@
 }
 
 ThreadList::~ThreadList() {
+  ATRACE_BEGIN(__FUNCTION__);
   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
   // We need to detach the current thread here in case there's another thread waiting to join with
   // us.
@@ -79,19 +80,27 @@
     contains = Contains(self);
   }
   if (contains) {
+    ATRACE_BEGIN("DetachCurrentThread");
     Runtime::Current()->DetachCurrentThread();
+    ATRACE_END();
   }
+  ATRACE_BEGIN("WaitForOtherNonDaemonThreadsToExit");
   WaitForOtherNonDaemonThreadsToExit();
+  ATRACE_END();
   // Disable GC and wait for GC to complete in case there are still daemon threads doing
   // allocations.
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   heap->DisableGCForShutdown();
   // In case a GC is in progress, wait for it to finish.
+  ATRACE_BEGIN("WaitForGcToComplete");
   heap->WaitForGcToComplete(gc::kGcCauseBackground, Thread::Current());
-
+  ATRACE_END();
   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
   //       Thread::Init.
+  ATRACE_BEGIN("SuspendAllDaemonThreads");
   SuspendAllDaemonThreads();
+  ATRACE_END();
+  ATRACE_END();
 }
 
 bool ThreadList::Contains(Thread* thread) {