Fix sampling profiler to use thread's cpu clock.

The sampling profiler was using the sampling thread's cpu clock to
measure cpu time, instead of the sampled thread's cpu clock.

Change-Id: Ief1f82e07e0353192c61521f67dec7a761905f64
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 48e595f..c4077de 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -496,6 +496,19 @@
   name.assign(*name_);
 }
 
+uint64_t Thread::GetCpuMicroTime() const {
+#if defined(HAVE_POSIX_CLOCKS)
+  clockid_t cpu_clock_id;
+  pthread_getcpuclockid(pthread_self_, &cpu_clock_id);
+  timespec now;
+  clock_gettime(cpu_clock_id, &now);
+  return static_cast<uint64_t>(now.tv_sec) * 1000000LL + now.tv_nsec / 1000LL;
+#else
+  UNIMPLEMENTED(WARNING);
+  return -1;
+#endif
+}
+
 void Thread::AtomicSetFlag(ThreadFlag flag) {
   android_atomic_or(flag, &state_and_flags_.as_int);
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index b9b93dd..f16695d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -267,6 +267,9 @@
   // Sets the thread's name.
   void SetThreadName(const char* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Returns the thread-specific CPU-time clock in microseconds or -1 if unavailable.
+  uint64_t GetCpuMicroTime() const;
+
   mirror::Object* GetPeer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(jpeer_ == NULL);
     return opeer_;
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 13e2bf6..d435129 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -174,7 +174,7 @@
 
 static void MeasureClockOverhead(Trace* trace) {
   if (trace->UseThreadCpuClock()) {
-    ThreadCpuMicroTime();
+    Thread::Current()->GetCpuMicroTime();
   }
   if (trace->UseWallClock()) {
     MicroTime();
@@ -182,7 +182,8 @@
 }
 
 static uint32_t GetClockOverhead(Trace* trace) {
-  uint64_t start = ThreadCpuMicroTime();
+  Thread* self = Thread::Current();
+  uint64_t start = self->GetCpuMicroTime();
 
   for (int i = 4000; i > 0; i--) {
     MeasureClockOverhead(trace);
@@ -195,7 +196,7 @@
     MeasureClockOverhead(trace);
   }
 
-  uint64_t elapsed = ThreadCpuMicroTime() - start;
+  uint64_t elapsed = self->GetCpuMicroTime() - start;
   return uint32_t (elapsed / 32);
 }
 
@@ -582,11 +583,11 @@
     uint32_t thread_clock_diff = 0;
     if (UNLIKELY(it == thread_clock_base_map_.end())) {
       // First event, the diff is 0, record the base time in the map.
-      uint64_t time = ThreadCpuMicroTime();
+      uint64_t time = thread->GetCpuMicroTime();
       thread_clock_base_map_.Put(thread, time);
     } else {
       uint64_t thread_clock_base = it->second;
-      thread_clock_diff = ThreadCpuMicroTime() - thread_clock_base;
+      thread_clock_diff = thread->GetCpuMicroTime() - thread_clock_base;
     }
     Append4LE(ptr, thread_clock_diff);
     ptr += 4;
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 6856bb7..4c17914 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -164,17 +164,6 @@
 #endif
 }
 
-uint64_t ThreadCpuMicroTime() {
-#if defined(HAVE_POSIX_CLOCKS)
-  timespec now;
-  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
-  return static_cast<uint64_t>(now.tv_sec) * 1000000LL + now.tv_nsec / 1000LL;
-#else
-  UNIMPLEMENTED(WARNING);
-  return -1;
-#endif
-}
-
 uint64_t ThreadCpuNanoTime() {
 #if defined(HAVE_POSIX_CLOCKS)
   timespec now;
diff --git a/runtime/utils.h b/runtime/utils.h
index 9e724d0..bd81114 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -285,9 +285,6 @@
 // Returns the monotonic time since some unspecified starting point in nanoseconds.
 uint64_t NanoTime();
 
-// Returns the thread-specific CPU-time clock in microseconds or -1 if unavailable.
-uint64_t ThreadCpuMicroTime();
-
 // Returns the thread-specific CPU-time clock in nanoseconds or -1 if unavailable.
 uint64_t ThreadCpuNanoTime();