Add a mode for concurrently marking and sweeping.

When enabled, the mutator threads will be resumed while tracing
proceeds from the roots.  After the trace completes the mutator
threads are suspended, the roots are remarked, and marked objects
are scanned for updates and re-marked.

There are two limitations to this implementation.  For the sake
of expediency, mutators are not permitted to allocate during the
concurrent marking phase.  This will be addressed in a subsequent
change.  As there is no write barrier, all objects, rather than
just those objects assigned to during the concurrent phase, are
scanned for updates.  This will be addressed after a write barrier
is implemented.

Change-Id: I82dba23b58a1cf985589ed21ec0cffb5ebf48aae
diff --git a/vm/Globals.h b/vm/Globals.h
index 8bb29af..30df209 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -122,6 +122,7 @@
     bool        preVerify;
     bool        postVerify;
     bool        generateRegisterMaps;
+    bool        concurrentMarkSweep;
 
     int         assertionCtrlCount;
     AssertionControl*   assertionCtrl;
diff --git a/vm/Init.c b/vm/Init.c
index 8b8f15c..e29687d 100644
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -117,6 +117,7 @@
     dvmFprintf(stderr, "  -Xgc:[no]overwritefree\n");
     dvmFprintf(stderr, "  -Xgc:[no]preverify\n");
     dvmFprintf(stderr, "  -Xgc:[no]postverify\n");
+    dvmFprintf(stderr, "  -Xgc:[no]concurrent\n");
     dvmFprintf(stderr, "  -Xgenregmap\n");
     dvmFprintf(stderr, "  -Xcheckdexsum\n");
 #if defined(WITH_JIT)
@@ -983,6 +984,10 @@
                 gDvm.postVerify = true;
             else if (strcmp(argv[i] + 5, "nopostverify") == 0)
                 gDvm.postVerify = false;
+            else if (strcmp(argv[i] + 5, "concurrent") == 0)
+                gDvm.concurrentMarkSweep = true;
+            else if (strcmp(argv[i] + 5, "noconcurrent") == 0)
+                gDvm.concurrentMarkSweep = false;
             else {
                 dvmFprintf(stderr, "Bad value for -Xgc");
                 return -1;
@@ -1591,7 +1596,7 @@
     /*
      * Stop our internal threads.
      */
-    dvmHeapWorkerShutdown();
+    dvmGcThreadShutdown();
 
     if (gDvm.jdwpState != NULL)
         dvmJdwpShutdown(gDvm.jdwpState);
diff --git a/vm/alloc/Alloc.c b/vm/alloc/Alloc.c
index e6b509d..61a1350 100644
--- a/vm/alloc/Alloc.c
+++ b/vm/alloc/Alloc.c
@@ -47,8 +47,16 @@
     if (!dvmHeapWorkerStartup()) {
         return false;
     }
-    dvmHeapStartupAfterZygote();
-    return true;
+    return dvmHeapStartupAfterZygote();
+}
+
+/*
+ * Shutdown the threads internal to the garbage collector.
+ */
+void dvmGcThreadShutdown(void)
+{
+    dvmHeapWorkerShutdown();
+    dvmHeapThreadShutdown();
 }
 
 /*
diff --git a/vm/alloc/Alloc.h b/vm/alloc/Alloc.h
index 1200788..8244ce7 100644
--- a/vm/alloc/Alloc.h
+++ b/vm/alloc/Alloc.h
@@ -28,6 +28,7 @@
 bool dvmCreateStockExceptions(void);
 bool dvmGcStartupAfterZygote(void);
 void dvmGcShutdown(void);
+void dvmGcThreadShutdown(void);
 
 /*
  * Do any last-minute preparation before we call fork() for the first time.
diff --git a/vm/alloc/Heap.c b/vm/alloc/Heap.c
index bc852ca..ecd7cc8 100644
--- a/vm/alloc/Heap.c
+++ b/vm/alloc/Heap.c
@@ -38,6 +38,7 @@
 
 static const char* GcReasonStr[] = {
     [GC_FOR_MALLOC] = "GC_FOR_MALLOC",
+    [GC_CONCURRENT] = "GC_CONCURRENT",
     [GC_EXPLICIT] = "GC_EXPLICIT",
     [GC_EXTERNAL_ALLOC] = "GC_EXTERNAL_ALLOC",
     [GC_HPROF_DUMP_HEAP] = "GC_HPROF_DUMP_HEAP"
@@ -91,12 +92,13 @@
     return true;
 }
 
-void dvmHeapStartupAfterZygote()
+bool dvmHeapStartupAfterZygote(void)
 {
     /* Update our idea of the last GC start time so that we
      * don't use the last time that Zygote happened to GC.
      */
     gDvm.gcHeap->gcStartTime = dvmGetRelativeTimeUsec();
+    return dvmHeapSourceStartupAfterZygote();
 }
 
 void dvmHeapShutdown()
@@ -127,6 +129,14 @@
 }
 
 /*
+ * Shutdown any threads internal to the heap.
+ */
+void dvmHeapThreadShutdown(void)
+{
+    dvmHeapSourceThreadShutdown();
+}
+
+/*
  * We've been asked to allocate something we can't, e.g. an array so
  * large that (length * elementWidth) is larger than 2^31.
  *
@@ -613,6 +623,7 @@
     int numFreed;
     size_t sizeFreed;
     GcMode gcMode;
+    int oldThreadPriority = kInvalidPriority;
 
     /* The heap lock must be held.
      */
@@ -635,32 +646,37 @@
 
     dvmSuspendAllThreads(SUSPEND_FOR_GC);
 
-    /* Get the priority (the "nice" value) of the current thread.  The
-     * getpriority() call can legitimately return -1, so we have to
-     * explicitly test errno.
+    /*
+     * If we are not marking concurrently raise the priority of the
+     * thread performing the garbage collection.
      */
-    errno = 0;
-    int oldThreadPriority = kInvalidPriority;
-    int priorityResult = getpriority(PRIO_PROCESS, 0);
-    if (errno != 0) {
-        LOGI_HEAP("getpriority(self) failed: %s\n", strerror(errno));
-    } else if (priorityResult > ANDROID_PRIORITY_NORMAL) {
-        /* Current value is numerically greater than "normal", which
-         * in backward UNIX terms means lower priority.
+    if (reason != GC_CONCURRENT) {
+        /* Get the priority (the "nice" value) of the current thread.  The
+         * getpriority() call can legitimately return -1, so we have to
+         * explicitly test errno.
          */
+        errno = 0;
+        int priorityResult = getpriority(PRIO_PROCESS, 0);
+        if (errno != 0) {
+            LOGI_HEAP("getpriority(self) failed: %s\n", strerror(errno));
+        } else if (priorityResult > ANDROID_PRIORITY_NORMAL) {
+            /* Current value is numerically greater than "normal", which
+             * in backward UNIX terms means lower priority.
+             */
 
-        if (priorityResult >= ANDROID_PRIORITY_BACKGROUND) {
-            set_sched_policy(dvmGetSysThreadId(), SP_FOREGROUND);
-        }
+            if (priorityResult >= ANDROID_PRIORITY_BACKGROUND) {
+                set_sched_policy(dvmGetSysThreadId(), SP_FOREGROUND);
+            }
 
-        if (setpriority(PRIO_PROCESS, 0, ANDROID_PRIORITY_NORMAL) != 0) {
-            LOGI_HEAP("Unable to elevate priority from %d to %d\n",
-                priorityResult, ANDROID_PRIORITY_NORMAL);
-        } else {
-            /* priority elevated; save value so we can restore it later */
-            LOGD_HEAP("Elevating priority from %d to %d\n",
-                priorityResult, ANDROID_PRIORITY_NORMAL);
-            oldThreadPriority = priorityResult;
+            if (setpriority(PRIO_PROCESS, 0, ANDROID_PRIORITY_NORMAL) != 0) {
+                LOGI_HEAP("Unable to elevate priority from %d to %d\n",
+                          priorityResult, ANDROID_PRIORITY_NORMAL);
+            } else {
+                /* priority elevated; save value so we can restore it later */
+                LOGD_HEAP("Elevating priority from %d to %d\n",
+                          priorityResult, ANDROID_PRIORITY_NORMAL);
+                oldThreadPriority = priorityResult;
+            }
         }
     }
 
@@ -760,6 +776,14 @@
     gcHeap->weakReferences = NULL;
     gcHeap->phantomReferences = NULL;
 
+    if (reason == GC_CONCURRENT) {
+        /*
+         * We are performing a concurrent collection.  Resume all
+         * threads for the duration of the recursive mark.
+         */
+        dvmResumeAllThreads(SUSPEND_FOR_GC);
+    }
+
     /* Recursively mark any objects that marked objects point to strongly.
      * If we're not collecting soft references, soft-reachable
      * objects will also be marked.
@@ -767,6 +791,24 @@
     LOGD_HEAP("Recursing...");
     dvmHeapScanMarkedObjects();
 
+    if (reason == GC_CONCURRENT) {
+        /*
+         * We are performing a concurrent collection.  Perform the
+         * final thread suspension.
+         */
+        dvmSuspendAllThreads(SUSPEND_FOR_GC);
+        /*
+         * As no barrier intercepts root updates, we conservatively
+         * assume all roots may be gray and re-mark them.
+         */
+        dvmHeapMarkRootSet();
+        /*
+         * Recursively mark gray objects pointed to by the roots or by
+         * heap objects dirtied during the concurrent mark.
+         */
+        dvmMarkDirtyObjects();
+    }
+
     /* All strongly-reachable objects have now been marked.
      */
     LOGD_HEAP("Handling soft references...");
@@ -870,16 +912,18 @@
 #endif
 
     dvmResumeAllThreads(SUSPEND_FOR_GC);
-    if (oldThreadPriority != kInvalidPriority) {
-        if (setpriority(PRIO_PROCESS, 0, oldThreadPriority) != 0) {
-            LOGW_HEAP("Unable to reset priority to %d: %s\n",
-                oldThreadPriority, strerror(errno));
-        } else {
-            LOGD_HEAP("Reset priority to %d\n", oldThreadPriority);
-        }
+    if (reason != GC_CONCURRENT) {
+        if (oldThreadPriority != kInvalidPriority) {
+            if (setpriority(PRIO_PROCESS, 0, oldThreadPriority) != 0) {
+                LOGW_HEAP("Unable to reset priority to %d: %s\n",
+                          oldThreadPriority, strerror(errno));
+            } else {
+                LOGD_HEAP("Reset priority to %d\n", oldThreadPriority);
+            }
 
-        if (oldThreadPriority >= ANDROID_PRIORITY_BACKGROUND) {
-            set_sched_policy(dvmGetSysThreadId(), SP_BACKGROUND);
+            if (oldThreadPriority >= ANDROID_PRIORITY_BACKGROUND) {
+                set_sched_policy(dvmGetSysThreadId(), SP_BACKGROUND);
+            }
         }
     }
     gcElapsedTime = (dvmGetRelativeTimeUsec() - gcHeap->gcStartTime) / 1000;
diff --git a/vm/alloc/Heap.h b/vm/alloc/Heap.h
index 0b0bc47..aeaab2b 100644
--- a/vm/alloc/Heap.h
+++ b/vm/alloc/Heap.h
@@ -31,7 +31,7 @@
  * This needs to be called before the first allocation or GC that
  * happens after forking.
  */
-void dvmHeapStartupAfterZygote(void);
+bool dvmHeapStartupAfterZygote(void);
 
 /*
  * Tear down the GC heap.
@@ -41,6 +41,12 @@
  */
 void dvmHeapShutdown(void);
 
+/*
+ * Stops any threads internal to the garbage collector.  Called before
+ * the heap itself is shutdown.
+ */
+void dvmHeapThreadShutdown(void);
+
 #if 0       // needs to be in Alloc.h so debug code can find it.
 /*
  * Returns a number of bytes greater than or
@@ -62,6 +68,8 @@
 typedef enum {
     /* Not enough space for an "ordinary" Object to be allocated. */
     GC_FOR_MALLOC,
+    /* Automatic GC triggered by exceeding a heap occupancy threshold. */
+    GC_CONCURRENT,
     /* Explicit GC via Runtime.gc(), VMRuntime.gc(), or SIGUSR1. */
     GC_EXPLICIT,
     /* GC to try to reduce heap footprint to allow more non-GC'ed memory. */
diff --git a/vm/alloc/HeapSource.c b/vm/alloc/HeapSource.c
index 662f99d..66249a1 100644
--- a/vm/alloc/HeapSource.c
+++ b/vm/alloc/HeapSource.c
@@ -43,6 +43,12 @@
 #define HEAP_IDEAL_FREE             (2 * 1024 * 1024)
 #define HEAP_MIN_FREE               (HEAP_IDEAL_FREE / 4)
 
+/*
+ * When the number of bytes allocated since the previous GC exceeds
+ * this threshold a concurrent garbage collection is triggered.
+ */
+#define OCCUPANCY_THRESHOLD (256 << 10)
+
 #define HS_BOILERPLATE() \
     do { \
         assert(gDvm.gcHeap != NULL); \
@@ -110,6 +116,12 @@
      */
     size_t bytesAllocated;
 
+    /*
+     * The number of bytes allocated after the previous garbage
+     * collection.
+     */
+    size_t prevBytesAllocated;
+
     /* Number of objects currently allocated from this mspace.
      */
     size_t objectsAllocated;
@@ -192,6 +204,15 @@
      * The mark bitmap.
      */
     HeapBitmap markBits;
+
+    /*
+     * State for the GC daemon.
+     */
+    bool hasGcThread;
+    pthread_t gcThread;
+    bool gcThreadShutdown;
+    pthread_mutex_t gcThreadMutex;
+    pthread_cond_t gcThreadCond;
 };
 
 #define hs2heap(hs_) (&((hs_)->heaps[0]))
@@ -282,8 +303,7 @@
     assert(heap->bytesAllocated < mspace_footprint(heap->msp));
 }
 
-static inline void
-countFree(Heap *heap, const void *ptr, bool isObj)
+static void countFree(Heap *heap, const void *ptr, bool isObj)
 {
     HeapSource *hs;
     size_t delta;
@@ -292,6 +312,7 @@
     assert(delta > 0);
     if (delta < heap->bytesAllocated) {
         heap->bytesAllocated -= delta;
+        heap->prevBytesAllocated = heap->bytesAllocated;
     } else {
         heap->bytesAllocated = 0;
     }
@@ -400,6 +421,47 @@
 }
 
 /*
+ * The garbage collection daemon.  Initiates a concurrent collection
+ * when signaled.
+ */
+static void *gcDaemonThread(void* arg)
+{
+    dvmChangeStatus(NULL, THREAD_VMWAIT);
+    dvmLockMutex(&gHs->gcThreadMutex);
+    while (gHs->gcThreadShutdown != true) {
+        dvmWaitCond(&gHs->gcThreadCond, &gHs->gcThreadMutex);
+        dvmLockHeap();
+        dvmChangeStatus(NULL, THREAD_RUNNING);
+        dvmCollectGarbageInternal(false, GC_CONCURRENT);
+        dvmChangeStatus(NULL, THREAD_VMWAIT);
+        dvmUnlockHeap();
+    }
+    dvmChangeStatus(NULL, THREAD_RUNNING);
+    return NULL;
+}
+
+static bool gcDaemonStartup(void)
+{
+    dvmInitMutex(&gHs->gcThreadMutex);
+    pthread_cond_init(&gHs->gcThreadCond, NULL);
+    gHs->gcThreadShutdown = false;
+    gHs->hasGcThread = dvmCreateInternalThread(&gHs->gcThread, "GC",
+                                               gcDaemonThread, NULL);
+    return gHs->hasGcThread;
+}
+
+static void gcDaemonShutdown(void)
+{
+    if (gDvm.concurrentMarkSweep) {
+        dvmLockMutex(&gHs->gcThreadMutex);
+        gHs->gcThreadShutdown = true;
+        dvmSignalCond(&gHs->gcThreadCond);
+        pthread_join(gHs->gcThread, NULL);
+        dvmUnlockMutex(&gHs->gcThreadMutex);
+    }
+}
+
+/*
  * Initializes the heap source; must be called before any other
  * dvmHeapSource*() functions.  Returns a GcHeap structure
  * allocated from the heap source.
@@ -472,6 +534,7 @@
     hs->softLimit = INT_MAX;    // no soft limit at first
     hs->numHeaps = 0;
     hs->sawZygote = gDvm.zygote;
+    hs->hasGcThread = false;
     hs->heapBase = base;
     hs->heapLength = length;
     if (!addNewHeap(hs, msp, absoluteMaxSize)) {
@@ -502,6 +565,11 @@
     return NULL;
 }
 
+bool dvmHeapSourceStartupAfterZygote(void)
+{
+    return gDvm.concurrentMarkSweep ? gcDaemonStartup() : true;
+}
+
 /*
  * This is called while in zygote mode, right before we fork() for the
  * first time.  We create a heap for all future zygote process allocations,
@@ -529,6 +597,11 @@
     return true;
 }
 
+void dvmHeapSourceThreadShutdown(void)
+{
+    gcDaemonShutdown();
+}
+
 /*
  * Tears down the entire GcHeap structure and all of the substructures
  * attached to it.  This call has the side effect of setting the given
@@ -650,7 +723,7 @@
 /*
  * Get the bitmap representing all live objects.
  */
-HeapBitmap *dvmHeapSourceGetLiveBits()
+HeapBitmap *dvmHeapSourceGetLiveBits(void)
 {
     HS_BOILERPLATE();
 
@@ -724,6 +797,12 @@
         ptr = mspace_calloc(heap->msp, 1, n);
         if (ptr != NULL) {
             countAllocation(heap, ptr, true);
+            size_t allocated = heap->bytesAllocated - heap->prevBytesAllocated;
+            if (allocated > OCCUPANCY_THRESHOLD) {
+                if (hs->hasGcThread == true) {
+                    dvmSignalCond(&gHs->gcThreadCond);
+                }
+            }
         }
     } else {
         /* This allocation would push us over the soft limit;
diff --git a/vm/alloc/HeapSource.h b/vm/alloc/HeapSource.h
index 465e7c9..fb8fdf1 100644
--- a/vm/alloc/HeapSource.h
+++ b/vm/alloc/HeapSource.h
@@ -36,6 +36,14 @@
 
 /*
  * If the HeapSource was created while in zygote mode, this
+ * will create a new heap for post-zygote allocations.
+ * Having a separate heap should maximize the number of pages
+ * that a given app_process shares with the zygote process.
+ */
+bool dvmHeapSourceStartupAfterZygote(void);
+
+/*
+ * If the HeapSource was created while in zygote mode, this
  * will create an additional zygote heap before the first fork().
  * Having a separate heap should reduce the number of shared
  * pages subsequently touched by the zygote process.
@@ -43,6 +51,12 @@
 bool dvmHeapSourceStartupBeforeFork(void);
 
 /*
+ * Shutdown any threads internal to the heap source.  This should be
+ * called before the heap source itself is shutdown.
+ */
+void dvmHeapSourceThreadShutdown(void);
+
+/*
  * Tears down the heap source and frees any resources associated with it.
  */
 void dvmHeapSourceShutdown(GcHeap **gcHeap);
@@ -57,7 +71,7 @@
 /*
  * Get the bitmap representing all live objects.
  */
-HeapBitmap *dvmHeapSourceGetLiveBits();
+HeapBitmap *dvmHeapSourceGetLiveBits(void);
 
 /*
  * Returns the requested value. If the per-heap stats are requested, fill
diff --git a/vm/alloc/MarkSweep.c b/vm/alloc/MarkSweep.c
index 81b2572..bef1a8b 100644
--- a/vm/alloc/MarkSweep.c
+++ b/vm/alloc/MarkSweep.c
@@ -20,6 +20,7 @@
 #include "alloc/HeapInternal.h"
 #include "alloc/HeapSource.h"
 #include "alloc/MarkSweep.h"
+#include "alloc/Visit.h"
 #include <limits.h>     // for ULONG_MAX
 #include <sys/mman.h>   // for madvise(), mmap()
 #include <cutils/ashmem.h>
@@ -453,7 +454,6 @@
     markObject((Object *)obj->obj.clazz, ctx);
     /* Scan the instance fields. */
     scanInstanceFields((const Object *)obj, ctx);
-
     if (IS_CLASS_FLAG_SET(obj->obj.clazz, CLASS_ISREFERENCE)) {
         delayReferenceReferent((Object *)obj, ctx);
     }
@@ -547,6 +547,53 @@
     LOG_SCAN("done with marked objects\n");
 }
 
+static void dirtyObjectVisitor(void *ptr, void *arg)
+{
+    markObject(*(Object **)ptr, (GcMarkContext *)arg);
+}
+
+/*
+ * Callback applied to each gray object to blacken it.
+ */
+static bool dirtyObjectCallback(size_t numPtrs, void **ptrs,
+                                const void *finger, void *arg)
+{
+    GcMarkContext *ctx;
+    size_t i;
+
+    ctx = (GcMarkContext *)arg;
+    for (i = 0; i < numPtrs; ++i) {
+        dvmVisitObject(dirtyObjectVisitor, ptrs[i], ctx);
+    }
+    return true;
+}
+
+/*
+ * Re-mark dirtied objects.  Iterates through all blackened objects
+ * looking for references to white objects.
+ */
+void dvmMarkDirtyObjects(void)
+{
+    HeapBitmap markBits[HEAP_SOURCE_MAX_HEAP_COUNT];
+    HeapBitmap liveBits[HEAP_SOURCE_MAX_HEAP_COUNT];
+    GcMarkContext *ctx;
+    size_t numBitmaps;
+    size_t i;
+
+    ctx = &gDvm.gcHeap->markContext;
+    /*
+     * Reset the finger to the maximum value to ensure that gray
+     * objects are always pushed onto the mark stack.
+     */
+    assert(ctx->finger == (void *)ULONG_MAX);
+    numBitmaps = dvmHeapSourceGetNumHeaps();
+    dvmHeapSourceGetObjectBitmaps(liveBits, markBits, numBitmaps);
+    for (i = 0; i < numBitmaps; i++) {
+        dvmHeapBitmapWalk(&markBits[i], dirtyObjectCallback, ctx);
+    }
+    processMarkStack(ctx);
+}
+
 /** Clear the referent field.
  */
 static void clearReference(Object *reference)
diff --git a/vm/alloc/MarkSweep.h b/vm/alloc/MarkSweep.h
index 5e5a0eb..d344aae 100644
--- a/vm/alloc/MarkSweep.h
+++ b/vm/alloc/MarkSweep.h
@@ -47,6 +47,7 @@
 bool dvmHeapBeginMarkStep(GcMode mode);
 void dvmHeapMarkRootSet(void);
 void dvmHeapScanMarkedObjects(void);
+void dvmMarkDirtyObjects(void);
 void dvmHandleSoftRefs(Object **list);
 void dvmClearWhiteRefs(Object **list);
 void dvmHeapScheduleFinalizations(void);