Add extra traces to Thread startup

Help diagnose what takes time during thread creation.

Bug: 116052292
Test: test-art-host
Change-Id: I738b0f987bdef077bca148c3cf06ef126f11022a
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8bec2d9..f459f9c 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -160,6 +160,7 @@
 }
 
 void Thread::InitTlsEntryPoints() {
+  ScopedTrace trace("InitTlsEntryPoints");
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
   uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
   uintptr_t* end = reinterpret_cast<uintptr_t*>(
@@ -903,6 +904,8 @@
   tlsPtr_.pthread_self = pthread_self();
   CHECK(is_started_);
 
+  ScopedTrace trace("Thread::Init");
+
   SetUpAlternateSignalStack();
   if (!InitStackHwm()) {
     return false;
@@ -912,7 +915,10 @@
   RemoveSuspendTrigger();
   InitCardTable();
   InitTid();
-  interpreter::InitInterpreterTls(this);
+  {
+    ScopedTrace trace2("InitInterpreterTls");
+    interpreter::InitInterpreterTls(this);
+  }
 
 #ifdef ART_TARGET_ANDROID
   __get_tls()[TLS_SLOT_ART_THREAD_SELF] = this;
@@ -936,6 +942,7 @@
     }
   }
 
+  ScopedTrace trace3("ThreadList::Register");
   thread_list->Register(this);
   return true;
 }
@@ -943,6 +950,7 @@
 template <typename PeerAction>
 Thread* Thread::Attach(const char* thread_name, bool as_daemon, PeerAction peer_action) {
   Runtime* runtime = Runtime::Current();
+  ScopedTrace trace("Thread::Attach");
   if (runtime == nullptr) {
     LOG(ERROR) << "Thread attaching to non-existent runtime: " <<
         ((thread_name != nullptr) ? thread_name : "(Unnamed)");
@@ -950,6 +958,7 @@
   }
   Thread* self;
   {
+    ScopedTrace trace2("Thread birth");
     MutexLock mu(nullptr, *Locks::runtime_shutdown_lock_);
     if (runtime->IsShuttingDownLocked()) {
       LOG(WARNING) << "Thread attaching while runtime is shutting down: " <<
@@ -1251,6 +1260,7 @@
 }
 
 bool Thread::InitStackHwm() {
+  ScopedTrace trace("InitStackHwm");
   void* read_stack_base;
   size_t read_stack_size;
   size_t read_guard_size;