Add support for sample profiling in traceview.

Instead of method instrumentation, allow traceview to periodically
sample stack traces of threads. Disabled by default until we add
gui support for this new mode.

(cherry picked from commit 676f8a527fb62abd39663d55c7d9208f5ca03093)

Change-Id: Id8942d4186fc628ee7bd8d809d7239dec310054b
diff --git a/vm/Profile.cpp b/vm/Profile.cpp
index dfb50b3..8492cd5 100644
--- a/vm/Profile.cpp
+++ b/vm/Profile.cpp
@@ -118,11 +118,12 @@
  * We use this clock when we can because it enables us to track the time that
  * a thread spends running and not blocked.
  */
-static inline u8 getThreadCpuTimeInUsec()
+static inline u8 getThreadCpuTimeInUsec(Thread* thread)
 {
+    clockid_t cid;
     struct timespec tm;
-
-    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &tm);
+    pthread_getcpuclockid(thread->handle, &cid);
+    clock_gettime(cid, &tm);
     if (!(tm.tv_nsec >= 0 && tm.tv_nsec < 1*1000*1000*1000)) {
         ALOGE("bad nsec: %ld", tm.tv_nsec);
         dvmAbort();
@@ -137,7 +138,7 @@
 static inline u8 getStopwatchClock()
 {
 #if defined(HAVE_POSIX_CLOCKS)
-    return getThreadCpuTimeInUsec();
+    return getThreadCpuTimeInUsec(dvmThreadSelf());
 #else
     return getWallTimeInUsec();
 #endif
@@ -171,6 +172,131 @@
 }
 
 /*
+ * Gets a thread's stack trace as an array of method pointers of length pCount.
+ * The returned array must be freed by the caller.
+ */
+static const Method** getStackTrace(Thread* thread, size_t* pCount)
+{
+    void* fp = thread->interpSave.curFrame;
+    assert(thread == dvmThreadSelf() || dvmIsSuspended(thread));
+
+    /* Compute the stack depth. */
+    size_t stackDepth = 0;
+    while (fp != NULL) {
+        const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
+
+        if (!dvmIsBreakFrame((u4*) fp))
+            stackDepth++;
+
+        assert(fp != saveArea->prevFrame);
+        fp = saveArea->prevFrame;
+    }
+    *pCount = stackDepth;
+
+    /*
+     * Allocate memory for stack trace. This must be freed later, either by
+     * freeThreadStackTraceSamples when tracing stops or by freeThread.
+     */
+    const Method** stackTrace = (const Method**) malloc(sizeof(Method*) *
+                                                        stackDepth);
+    if (stackTrace == NULL)
+        return NULL;
+
+    /* Walk the stack a second time, filling in the stack trace. */
+    const Method** ptr = stackTrace;
+    fp = thread->interpSave.curFrame;
+    while (fp != NULL) {
+        const StackSaveArea* saveArea = SAVEAREA_FROM_FP(fp);
+        const Method* method = saveArea->method;
+
+        if (!dvmIsBreakFrame((u4*) fp)) {
+            *ptr++ = method;
+            stackDepth--;
+        }
+        assert(fp != saveArea->prevFrame);
+        fp = saveArea->prevFrame;
+    }
+    assert(stackDepth == 0);
+
+    return stackTrace;
+}
+/*
+ * Get a sample of the stack trace for a thread.
+ */
+static void getSample(Thread* thread)
+{
+    /* Get old and new stack trace for thread. */
+    size_t newLength = 0;
+    const Method** newStackTrace = getStackTrace(thread, &newLength);
+    size_t oldLength = thread->stackTraceSampleLength;
+    const Method** oldStackTrace = thread->stackTraceSample;
+
+    /* Read time clocks to use for all events in this trace. */
+    u4 cpuClockDiff = 0;
+    u4 wallClockDiff = 0;
+    dvmMethodTraceReadClocks(thread, &cpuClockDiff, &wallClockDiff);
+    if (oldStackTrace == NULL) {
+        /*
+         * If there's no previous stack trace sample, log an entry event for
+         * every method in the trace.
+         */
+        for (int i = newLength - 1; i >= 0; --i) {
+            dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
+                              cpuClockDiff, wallClockDiff);
+        }
+    } else {
+        /*
+         * If there's a previous stack trace, diff the traces and emit entry
+         * and exit events accordingly.
+         */
+        int diffIndexOld = oldLength - 1;
+        int diffIndexNew = newLength - 1;
+        /* Iterate bottom-up until there's a difference between traces. */
+        while (diffIndexOld >= 0 && diffIndexNew >= 0 &&
+               oldStackTrace[diffIndexOld] == newStackTrace[diffIndexNew]) {
+            diffIndexOld--;
+            diffIndexNew--;
+        }
+        /* Iterate top-down over old trace until diff, emitting exit events. */
+        for (int i = 0; i <= diffIndexOld; ++i) {
+            dvmMethodTraceAdd(thread, oldStackTrace[i], METHOD_TRACE_EXIT,
+                              cpuClockDiff, wallClockDiff);
+        }
+        /* Iterate bottom-up over new trace from diff, emitting entry events. */
+        for (int i = diffIndexNew; i >= 0; --i) {
+            dvmMethodTraceAdd(thread, newStackTrace[i], METHOD_TRACE_ENTER,
+                              cpuClockDiff, wallClockDiff);
+        }
+    }
+
+    /* Free the old stack trace and update the thread's stack trace sample. */
+    free(oldStackTrace);
+    thread->stackTraceSample = newStackTrace;
+    thread->stackTraceSampleLength = newLength;
+}
+
+/*
+ * Entry point for sampling thread.
+ */
+static void* runSamplingThread(void* arg)
+{
+    while (gDvm.methodTrace.traceEnabled) {
+        dvmSuspendAllThreads(SUSPEND_FOR_SAMPLING);
+
+        dvmLockThreadList(dvmThreadSelf());
+        for (Thread *thread = gDvm.threadList; thread != NULL; thread = thread->next) {
+            getSample(thread);
+        }
+        dvmUnlockThreadList();
+
+        dvmResumeAllThreads(SUSPEND_FOR_SAMPLING);
+
+        usleep(gDvm.methodTrace.samplingIntervalUs);
+    }
+    return NULL;
+}
+
+/*
  * Boot-time init.
  */
 bool dvmProfilingStartup()
@@ -282,6 +408,21 @@
 }
 
 /*
+ * Free and reset the "stackTraceSample" field in all threads.
+ */
+static void freeThreadStackTraceSamples()
+{
+    Thread* thread;
+
+    dvmLockThreadList(NULL);
+    for (thread = gDvm.threadList; thread != NULL; thread = thread->next) {
+        free(thread->stackTraceSample);
+        thread->stackTraceSample = NULL;
+    }
+    dvmUnlockThreadList();
+}
+
+/*
  * Dump the thread list to the specified file.
  */
 static void dumpThreadList(FILE* fp) {
@@ -428,6 +569,10 @@
         state->recordSize = TRACE_REC_SIZE_SINGLE_CLOCK;
     }
 
+    /* TODO: Control sampling through gui. */
+    state->samplingEnabled = false;
+    state->samplingIntervalUs = 1000;
+
     /*
      * Output the header.
      */
@@ -452,7 +597,17 @@
      * following to take a Thread* argument, and set the appropriate
      * interpBreak flags only on the target thread.
      */
-    updateActiveProfilers(kSubModeMethodTrace, true);
+    if (state->samplingEnabled) {
+        updateActiveProfilers(kSubModeSampleTrace, true);
+        /* Start the sampling thread. */
+        if (!dvmCreateInternalThread(&state->samplingThreadHandle,
+                "Sampling Thread", &runSamplingThread, NULL)) {
+            dvmThrowInternalError("failed to create sampling thread");
+            goto fail;
+        }
+    } else {
+        updateActiveProfilers(kSubModeMethodTrace, true);
+    }
 
     dvmUnlockMutex(&state->startStopLock);
     return;
@@ -500,7 +655,7 @@
 {
 #if defined(HAVE_POSIX_CLOCKS)
     if (useThreadCpuClock()) {
-        getThreadCpuTimeInUsec();
+        getThreadCpuTimeInUsec(dvmThreadSelf());
     }
 #endif
     if (useWallClock()) {
@@ -565,7 +720,11 @@
         dvmUnlockMutex(&state->startStopLock);
         return;
     } else {
-        updateActiveProfilers(kSubModeMethodTrace, false);
+        if (state->samplingEnabled) {
+            updateActiveProfilers(kSubModeSampleTrace, false);
+        } else {
+            updateActiveProfilers(kSubModeMethodTrace, false);
+        }
     }
 
     /* compute elapsed time */
@@ -719,9 +878,42 @@
     fclose(state->traceFile);
     state->traceFile = NULL;
 
+    /* free and clear sampling traces held by all threads */
+    if (state->samplingEnabled) {
+        freeThreadStackTraceSamples();
+    }
+
     /* wake any threads that were waiting for profiling to complete */
     dvmBroadcastCond(&state->threadExitCond);
     dvmUnlockMutex(&state->startStopLock);
+
+    /* make sure the sampling thread has stopped */
+    if (state->samplingEnabled &&
+        pthread_join(state->samplingThreadHandle, NULL) != 0) {
+        ALOGW("Sampling thread join failed");
+    }
+}
+
+/*
+ * Read clocks and generate time diffs for method trace events.
+ */
+void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
+                              u4* wallClockDiff)
+{
+#if defined(HAVE_POSIX_CLOCKS)
+    if (useThreadCpuClock()) {
+        if (!self->cpuClockBaseSet) {
+            /* Initialize per-thread CPU clock base time on first use. */
+            self->cpuClockBase = getThreadCpuTimeInUsec(self);
+            self->cpuClockBaseSet = true;
+        } else {
+            *cpuClockDiff = getThreadCpuTimeInUsec(self) - self->cpuClockBase;
+        }
+    }
+#endif
+    if (useWallClock()) {
+        *wallClockDiff = getWallTimeInUsec() - gDvm.methodTrace.startWhen;
+    }
 }
 
 /*
@@ -730,7 +922,8 @@
  * Multiple threads may be banging on this all at once.  We use atomic ops
  * rather than mutexes for speed.
  */
-void dvmMethodTraceAdd(Thread* self, const Method* method, int action)
+void dvmMethodTraceAdd(Thread* self, const Method* method, int action,
+                       u4 cpuClockDiff, u4 wallClockDiff)
 {
     MethodTraceState* state = &gDvm.methodTrace;
     u4 methodVal;
@@ -739,21 +932,6 @@
 
     assert(method != NULL);
 
-#if defined(HAVE_POSIX_CLOCKS)
-    /*
-     * We can only access the per-thread CPU clock from within the
-     * thread, so we have to initialize the base time on the first use.
-     * (Looks like pthread_getcpuclockid(thread, &id) will do what we
-     * want, but it doesn't appear to be defined on the device.)
-     */
-    if (!self->cpuClockBaseSet) {
-        self->cpuClockBase = getThreadCpuTimeInUsec();
-        self->cpuClockBaseSet = true;
-        //ALOGI("thread base id=%d 0x%llx",
-        //    self->threadId, self->cpuClockBase);
-    }
-#endif
-
     /*
      * Advance "curOffset" atomically.
      */
@@ -784,7 +962,6 @@
 
 #if defined(HAVE_POSIX_CLOCKS)
     if (useThreadCpuClock()) {
-        u4 cpuClockDiff = (u4) (getThreadCpuTimeInUsec() - self->cpuClockBase);
         *ptr++ = (u1) cpuClockDiff;
         *ptr++ = (u1) (cpuClockDiff >> 8);
         *ptr++ = (u1) (cpuClockDiff >> 16);
@@ -793,7 +970,6 @@
 #endif
 
     if (useWallClock()) {
-        u4 wallClockDiff = (u4) (getWallTimeInUsec() - state->startWhen);
         *ptr++ = (u1) wallClockDiff;
         *ptr++ = (u1) (wallClockDiff >> 8);
         *ptr++ = (u1) (wallClockDiff >> 16);
@@ -809,7 +985,11 @@
 void dvmFastMethodTraceEnter(const Method* method, Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
-        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER);
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
+        dvmMethodTraceAdd(self, method, METHOD_TRACE_ENTER, cpuClockDiff,
+                          wallClockDiff);
     }
 }
 
@@ -821,8 +1001,11 @@
 void dvmFastMethodTraceExit(Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
         dvmMethodTraceAdd(self, self->interpSave.method,
-                          METHOD_TRACE_EXIT);
+                          METHOD_TRACE_EXIT, cpuClockDiff, wallClockDiff);
     }
 }
 
@@ -834,7 +1017,11 @@
 void dvmFastNativeMethodTraceExit(const Method* method, Thread* self)
 {
     if (self->interpBreak.ctl.subMode & kSubModeMethodTrace) {
-        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT);
+        u4 cpuClockDiff = 0;
+        u4 wallClockDiff = 0;
+        dvmMethodTraceReadClocks(self, &cpuClockDiff, &wallClockDiff);
+        dvmMethodTraceAdd(self, method, METHOD_TRACE_EXIT, cpuClockDiff,
+                          wallClockDiff);
     }
 }
 
diff --git a/vm/Profile.h b/vm/Profile.h
index 6c9e1c7..0ec8705 100644
--- a/vm/Profile.h
+++ b/vm/Profile.h
@@ -52,6 +52,10 @@
 
     int     traceVersion;
     size_t  recordSize;
+
+    bool    samplingEnabled;
+    int     samplingIntervalUs;
+    pthread_t       samplingThreadHandle;
 };
 
 /*
@@ -112,27 +116,45 @@
  */
 #define TRACE_METHOD_ENTER(_self, _method)                                  \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_ENTER);          \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_ENTER,           \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_ENTER);              \
     } while(0);
 #define TRACE_METHOD_EXIT(_self, _method)                                   \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_EXIT);           \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_EXIT,            \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_EXIT);               \
     } while(0);
 #define TRACE_METHOD_UNROLL(_self, _method)                                 \
     do {                                                                    \
-        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace)           \
-            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_UNROLL);         \
+        if (_self->interpBreak.ctl.subMode & kSubModeMethodTrace) {         \
+            u4 cpuClockDiff = 0;                                            \
+            u4 wallClockDiff = 0;                                           \
+            dvmMethodTraceReadClocks(_self, &cpuClockDiff, &wallClockDiff); \
+            dvmMethodTraceAdd(_self, _method, METHOD_TRACE_UNROLL,          \
+                              cpuClockDiff, wallClockDiff);                 \
+        }                                                                   \
         if (_self->interpBreak.ctl.subMode & kSubModeEmulatorTrace)         \
             dvmEmitEmulatorTrace(_method, METHOD_TRACE_UNROLL);             \
     } while(0);
 
-void dvmMethodTraceAdd(struct Thread* self, const Method* method, int action);
+void dvmMethodTraceReadClocks(Thread* self, u4* cpuClockDiff,
+                              u4* wallClockDiff);
+void dvmMethodTraceAdd(struct Thread* self, const Method* method, int action,
+                       u4 cpuClockDiff, u4 wallClockDiff);
 void dvmEmitEmulatorTrace(const Method* method, int action);
 
 void dvmMethodTraceGCBegin(void);
diff --git a/vm/Thread.cpp b/vm/Thread.cpp
index 205167e..1ebfca7 100644
--- a/vm/Thread.cpp
+++ b/vm/Thread.cpp
@@ -997,6 +997,7 @@
 #if defined(WITH_SELF_VERIFICATION)
     dvmSelfVerificationShadowSpaceFree(thread);
 #endif
+    free(thread->stackTraceSample);
     free(thread);
 }
 
diff --git a/vm/Thread.h b/vm/Thread.h
index 8deef6e..19bd49c 100644
--- a/vm/Thread.h
+++ b/vm/Thread.h
@@ -287,6 +287,10 @@
     bool        cpuClockBaseSet;
     u8          cpuClockBase;
 
+    /* previous stack trace sample and length (used by sampling profiler) */
+    const Method** stackTraceSample;
+    size_t stackTraceSampleLength;
+
     /* memory allocation profiling state */
     AllocProfState allocProf;
 
@@ -358,6 +362,7 @@
     SUSPEND_FOR_DEX_OPT,
     SUSPEND_FOR_VERIFY,
     SUSPEND_FOR_HPROF,
+    SUSPEND_FOR_SAMPLING,
 #if defined(WITH_JIT)
     SUSPEND_FOR_TBL_RESIZE,  // jit-table resize
     SUSPEND_FOR_IC_PATCH,    // polymorphic callsite inline-cache patch
diff --git a/vm/interp/Interp.cpp b/vm/interp/Interp.cpp
index fa77523..2b13bd8 100644
--- a/vm/interp/Interp.cpp
+++ b/vm/interp/Interp.cpp
@@ -1661,7 +1661,11 @@
         dvmEnableSubMode(thread, kSubModeInstCounting);
     }
     if (dvmIsMethodTraceActive()) {
-        dvmEnableSubMode(thread, kSubModeMethodTrace);
+        if (gDvm.methodTrace.samplingEnabled) {
+            dvmEnableSubMode(thread, kSubModeSampleTrace);
+        } else {
+            dvmEnableSubMode(thread, kSubModeMethodTrace);
+        }
     }
     if (gDvm.emulatorTraceEnableCount > 0) {
         dvmEnableSubMode(thread, kSubModeEmulatorTrace);
diff --git a/vm/interp/InterpState.h b/vm/interp/InterpState.h
index cc0a13f..6ef4472 100644
--- a/vm/interp/InterpState.h
+++ b/vm/interp/InterpState.h
@@ -63,6 +63,7 @@
     kSubModeCallbackPending   = 0x0020,
     kSubModeCountedStep       = 0x0040,
     kSubModeCheckAlways       = 0x0080,
+    kSubModeSampleTrace       = 0x0100,
     kSubModeJitTraceBuild     = 0x4000,
     kSubModeJitSV             = 0x8000,
     kSubModeDebugProfile   = (kSubModeMethodTrace |