Enhance the jit profiler to print more statistics and be more verbose.

Specifically, the per-trace invocation percentage is printed now.

D/dalvikvm(  671): TRACEPROFILE 0x42b6eed8   15449863  2.13% [0(+2), 253] Ljava/nio/Buffer;position;()I
D/dalvikvm(  671): TRACEPROFILE 0x42b7b3ac   15410023  2.13% [0x4(+3), 450] Lorg/apache/xml/serializer/CharInfo;shouldMapTextChar;(I)Z
D/dalvikvm(  671): TRACEPROFILE 0x42b7b2cc   15346582  2.12% [0(+2), 449] Lorg/apache/xml/serializer/CharInfo;shouldMapTextChar;(I)Z
D/dalvikvm(  671): TRACEPROFILE 0x42b7b40c   15013650  2.07% [0xb4(+2), 1527] Lorg/apache/xml/serializer/ToStream;characters;([CII)V
D/dalvikvm(  671): TRACEPROFILE 0x42b7b4f0   14907957  2.06% [0xc7(+2), 1537] Lorg/apache/xml/serializer/ToStream;characters;([CII)V
D/dalvikvm(  671): TRACEPROFILE 0x42b7b308   14898588  2.06% [0xad(+3), 1525] Lorg/apache/xml/serializer/ToStream;characters;([CII)V
D/dalvikvm(  671): TRACEPROFILE 0x42b7b52c   14798762  2.04% [0xc4(+2), 1523] Lorg/apache/xml/serializer/ToStream;characters;([CII)V
D/dalvikvm(  671): TRACEPROFILE 0x42b7b570   14763770  2.04% [0xf3(+2), 1577] Lorg/apache/xml/serializer/ToStream;characters;([CII)V

And the top 10 traces will be recompiled in verbose mode to facilitate code
quality analysis.
diff --git a/vm/Globals.h b/vm/Globals.h
index 928002b..7bc2d31 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -714,7 +714,7 @@
     pthread_mutex_t    compilerICPatchLock;
     pthread_cond_t     compilerQueueActivity;
     pthread_cond_t     compilerQueueEmpty;
-    int                compilerQueueLength;
+    volatile int       compilerQueueLength;
     int                compilerHighWater;
     int                compilerWorkEnqueueIndex;
     int                compilerWorkDequeueIndex;
diff --git a/vm/compiler/Compiler.c b/vm/compiler/Compiler.c
index 8297131..a353b86 100644
--- a/vm/compiler/Compiler.c
+++ b/vm/compiler/Compiler.c
@@ -532,6 +532,12 @@
 {
     void *threadReturn;
 
+    if (gDvm.verboseShutdown) {
+        dvmCompilerDumpStats();
+        while (gDvmJit.compilerQueueLength)
+          sleep(5);
+    }
+
     if (gDvmJit.compilerHandle) {
 
         gDvmJit.haltCompilerThread = true;
@@ -546,9 +552,6 @@
             LOGD("Compiler thread has shut down\n");
     }
 
-    if (gDvm.verboseShutdown)
-        dvmCompilerDumpStats();
-
     dvmDestroyMutex(&gDvmJit.tableLock);
     dvmDestroyMutex(&gDvmJit.compilerLock);
     dvmDestroyMutex(&gDvmJit.compilerICPatchLock);
diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h
index ccc91dd..3f60a5e 100644
--- a/vm/compiler/Compiler.h
+++ b/vm/compiler/Compiler.h
@@ -160,6 +160,7 @@
 struct BasicBlock;
 struct SSARepresentation;
 struct GrowableList;
+struct JitEntry;
 
 void dvmInitializeSSAConversion(struct CompilationUnit *cUnit);
 int dvmConvertSSARegToDalvik(struct CompilationUnit *cUnit, int ssaReg);
@@ -179,6 +180,7 @@
 void dvmCompilerDataFlowAnalysisDispatcher(struct CompilationUnit *cUnit,
                 void (*func)(struct CompilationUnit *, struct BasicBlock *));
 void dvmCompilerStateRefresh(void);
-JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc);
+JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
+                                            const struct JitEntry *desc);
 
 #endif /* _DALVIK_VM_COMPILER */
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index 6d901b9..09783f2 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -1704,11 +1704,13 @@
 }
 
 /* Dumps profile info for a single trace */
-static int dumpTraceProfile(JitEntry *p)
+static int dumpTraceProfile(JitEntry *p, bool silent, bool reset,
+                            unsigned long sum)
 {
     ChainCellCounts* pCellCounts;
     char* traceBase;
     u4* pExecutionCount;
+    u4 executionCount;
     u2* pCellOffset;
     JitTraceDescription *desc;
     const Method* method;
@@ -1716,15 +1718,24 @@
     traceBase = getTraceBase(p);
 
     if (p->codeAddress == NULL) {
-        LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
+        if (!silent)
+            LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
         return 0;
     }
     if (p->codeAddress == gDvmJit.interpretTemplate) {
-        LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY  0 0", (int)traceBase);
+        if (!silent)
+            LOGD("TRACEPROFILE 0x%08x 0 INTERPRET_ONLY  0 0", (int)traceBase);
         return 0;
     }
 
     pExecutionCount = (u4*) (traceBase);
+    executionCount = *pExecutionCount;
+    if (reset) {
+        *pExecutionCount =0;
+    }
+    if (silent) {
+        return executionCount;
+    }
     pCellOffset = (u2*) (traceBase + 4);
     pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset);
     desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts));
@@ -1746,22 +1757,24 @@
                        method->accessFlags,
                        addrToLineCb, NULL, &addrToLine);
 
-    LOGD("TRACEPROFILE 0x%08x % 10d [%#x(+%d), %d] %s%s;%s",
+    LOGD("TRACEPROFILE 0x%08x % 10d %5.2f%% [%#x(+%d), %d] %s%s;%s",
          (int)traceBase,
-         *pExecutionCount,
+         executionCount,
+         ((float ) executionCount) / sum * 100.0,
          desc->trace[0].frag.startOffset,
          desc->trace[0].frag.numInsts,
          addrToLine.lineNum,
          method->clazz->descriptor, method->name, methodDesc);
     free(methodDesc);
 
-    return *pExecutionCount;
+    return executionCount;
 }
 
 /* Create a copy of the trace descriptor of an existing compilation */
-JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc)
+JitTraceDescription *dvmCopyTraceDescriptor(const u2 *pc,
+                                            const JitEntry *knownEntry)
 {
-    JitEntry *jitEntry = dvmFindJitEntry(pc);
+    const JitEntry *jitEntry = knownEntry ? knownEntry : dvmFindJitEntry(pc);
     if (jitEntry == NULL) return NULL;
 
     /* Find out the startint point */
@@ -1810,7 +1823,7 @@
 {
     JitEntry *sortedEntries;
     int numTraces = 0;
-    unsigned long counts = 0;
+    unsigned long sum = 0;
     unsigned int i;
 
     /* Make sure that the table is not changing */
@@ -1825,16 +1838,40 @@
     qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
           sortTraceProfileCount);
 
-    /* Dump the sorted entries */
+    /* Analyze the sorted entries */
     for (i=0; i < gDvmJit.jitTableSize; i++) {
         if (sortedEntries[i].dPC != 0) {
-            counts += dumpTraceProfile(&sortedEntries[i]);
+            sum += dumpTraceProfile(&sortedEntries[i],
+                                       true /* silent */,
+                                       false /* reset */,
+                                       0);
             numTraces++;
         }
     }
     if (numTraces == 0)
         numTraces = 1;
-    LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces));
+    if (sum == 0) {
+        sum = 1;
+    }
+
+    LOGD("JIT: Average execution count -> %d",(int)(sum / numTraces));
+
+    /* Dump the sorted entries. The count of each trace will be reset to 0. */
+    for (i=0; i < gDvmJit.jitTableSize; i++) {
+        if (sortedEntries[i].dPC != 0) {
+            dumpTraceProfile(&sortedEntries[i],
+                             false /* silent */,
+                             true /* reset */,
+                             sum);
+        }
+    }
+
+    for (i=0; i < gDvmJit.jitTableSize && i < 10; i++) {
+        JitTraceDescription* desc =
+            dvmCopyTraceDescriptor(NULL, &sortedEntries[i]);
+        dvmCompilerWorkEnqueue(sortedEntries[i].dPC,
+                               kWorkOrderTraceDebug, desc);
+    }
 
     free(sortedEntries);
 done:
diff --git a/vm/interp/Jit.c b/vm/interp/Jit.c
index ec39c8a..198e1a2 100644
--- a/vm/interp/Jit.c
+++ b/vm/interp/Jit.c
@@ -218,7 +218,7 @@
 static void selfVerificationSpinLoop(ShadowSpace *shadowSpace)
 {
     const u2 *startPC = shadowSpace->startPC;
-    JitTraceDescription* desc = dvmCopyTraceDescriptor(startPC);
+    JitTraceDescription* desc = dvmCopyTraceDescriptor(startPC, NULL);
     if (desc) {
         dvmCompilerWorkEnqueue(startPC, kWorkOrderTraceDebug, desc);
         /*