Dalvik SMP update

Fix some SMP correctness issues in the VM:

 - Correct AtomicCache implementation, inserting barriers where needed
   and correcting existing usage.
 - Emphasize that String startup isn't expected to be reentrant, and
   use atomic ops to ensure that we explode if anybody tries it.
 - Use 64-bit quasiatomic ops to manage the "last activity" timer in
   JDWP.  (Also, provide some documented but unimplemented behavior.)
 - Updated the volatile operations in sun.misc.Unsafe to include
   appropriate barriers.

(This does not purport to correct all SMP issues, just some of the
more obvious ones.)

Change-Id: I06957ebcf2724fe7a228b30d00194b9b4808fae0
diff --git a/vm/AtomicCache.c b/vm/AtomicCache.c
index b4f9862..bf8639e 100644
--- a/vm/AtomicCache.c
+++ b/vm/AtomicCache.c
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Mutex-free cache.  Each entry has two 32-bit keys, one 32-bit value,
  * and a 32-bit version.
@@ -75,7 +76,6 @@
 }
 
 
-
 /*
  * Update a cache entry.
  *
@@ -91,18 +91,19 @@
     )
 {
     /*
-     * The fields don't match, so we need to update them.  There is a
-     * risk that another thread is also trying to update them, so we
-     * grab an ownership flag to lock out other threads.
+     * The fields don't match, so we want to update them.  There is a risk
+     * that another thread is also trying to update them, so we grab an
+     * ownership flag to lock out other threads.
      *
      * If the lock flag was already set in "firstVersion", somebody else
-     * was in mid-update.  (This means that using "firstVersion" as the
-     * "before" argument to the CAS would succeed when it shouldn't and
-     * vice-versa -- we could also just pass in
-     * (firstVersion & ~ATOMIC_LOCK_FLAG) as the first argument.)
+     * was in mid-update, and we don't want to continue here.  (This means
+     * that using "firstVersion" as the "before" argument to the CAS would
+     * succeed when it shouldn't and vice-versa -- we could also just pass
+     * in (firstVersion & ~ATOMIC_LOCK_FLAG) as the first argument.)
      *
-     * NOTE: we don't really deal with the situation where we overflow
-     * the version counter (at 2^31).  Probably not a real concern.
+     * NOTE: we don't deal with the situation where we overflow the version
+     * counter and trample the ATOMIC_LOCK_FLAG (at 2^31).  Probably not
+     * a real concern.
      */
     if ((firstVersion & ATOMIC_LOCK_FLAG) != 0 ||
         android_atomic_release_cas(
@@ -129,7 +130,11 @@
         pCache->misses++;
 #endif
 
-    /* volatile incr */
+    /*
+     * We have the write lock, but somebody could be reading this entry
+     * while we work.  We use memory barriers to ensure that the state
+     * is always consistent when the version number is even.
+     */
     pEntry->version++;
     ANDROID_MEMBAR_FULL();
 
@@ -137,15 +142,15 @@
     pEntry->key2 = key2;
     pEntry->value = value;
 
-    /* volatile incr */
-    pEntry->version++;
     ANDROID_MEMBAR_FULL();
+    pEntry->version++;
 
     /*
      * Clear the lock flag.  Nobody else should have been able to modify
      * pEntry->version, so if this fails the world is broken.
      */
     firstVersion += 2;
+    assert((firstVersion & 0x01) == 0);
     if (android_atomic_release_cas(
             firstVersion | ATOMIC_LOCK_FLAG, firstVersion,
             (volatile s4*) &pEntry->version) != 0)
diff --git a/vm/AtomicCache.h b/vm/AtomicCache.h
index 1d59a47..66d222e 100644
--- a/vm/AtomicCache.h
+++ b/vm/AtomicCache.h
@@ -63,17 +63,16 @@
  * Do a cache lookup.  We need to be able to read and write entries
  * atomically.  There are a couple of ways to do this:
  *  (1) Have a global lock.  A mutex is too heavy, so instead we would use
- *      an atomic flag.  If the flag is set, we could sit and spin, but
- *      if we're a high-priority thread that may cause a lockup.  Better
- *      to just ignore the cache and do the full computation.
+ *      an atomic flag.  If the flag is set, we could sit and spin,
+ *      but if we're a high-priority thread that may cause a lockup.
+ *      Better to just ignore the cache and do the full computation.
  *  (2) Have a "version" that gets incremented atomically when a write
  *      begins and again when it completes.  Compare the version before
- *      and after doing reads.  So long as "version" is volatile the
- *      compiler will do the right thing, allowing us to skip atomic
- *      ops in the common read case.  The table updates are expensive,
- *      requiring two volatile writes and (for correctness on
- *      multiprocessor systems) memory barriers.  We also need some
- *      sort of lock to ensure that nobody else tries to start an
+ *      and after doing reads.  So long as we have memory barriers in the
+ *      right place the compiler and CPU will do the right thing, allowing
+ *      us to skip atomic ops in the common read case.  The table updates
+ *      are expensive, requiring two writes with barriers.  We also need
+ *      some sort of lock to ensure that nobody else tries to start an
  *      update while we're in the middle of one.
  *
  * We expect a 95+% hit rate for the things we use this for, so #2 is
@@ -103,8 +102,8 @@
     hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1);           \
     pEntry = (_cache)->entries + hash;                                      \
                                                                             \
-    /* volatile read */                                                     \
     firstVersion = pEntry->version;                                         \
+    ANDROID_MEMBAR_FULL();                                                  \
                                                                             \
     if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) {       \
         /*                                                                  \
@@ -113,7 +112,8 @@
          * We're also hosed if "firstVersion" was odd, indicating that      \
          * an update was in progress before we got here.                    \
          */                                                                 \
-        value = pEntry->value;    /* must grab before next check */         \
+        value = pEntry->value;                                              \
+        ANDROID_MEMBAR_FULL();                                              \
                                                                             \
         if ((firstVersion & 0x01) != 0 || firstVersion != pEntry->version)  \
         {                                                                   \
diff --git a/vm/Globals.h b/vm/Globals.h
index 29bba41..4ebfd5e 100644
--- a/vm/Globals.h
+++ b/vm/Globals.h
@@ -235,7 +235,7 @@
     int         offJavaLangClass_pd;
 
     /* field offsets - String */
-    volatile int javaLangStringReady;   /* 0=not init, 1=ready, -1=initing */
+    int         javaLangStringReady;    /* 0=not init, 1=ready, -1=initing */
     int         offJavaLangString_value;
     int         offJavaLangString_count;
     int         offJavaLangString_offset;
diff --git a/vm/UtfString.c b/vm/UtfString.c
index da67fae..80687d9 100644
--- a/vm/UtfString.c
+++ b/vm/UtfString.c
@@ -36,6 +36,10 @@
  * String implements java/lang/CharSequence, but CharSequence doesn't exist)
  * we can try to create an exception string internally before anything has
  * really tried to use String.  In that case we basically self-destruct.
+ *
+ * We're expecting to be essentially single-threaded at this point.
+ * We employ atomics to ensure everything is observed correctly, and also
+ * to guarantee that we do detect a problem if our assumption is wrong.
  */
 static bool stringStartup()
 {
@@ -44,9 +48,12 @@
         assert(false);
         return false;
     }
-    assert(gDvm.javaLangStringReady == 0);
 
-    gDvm.javaLangStringReady = -1;
+    if (android_atomic_acquire_cas(0, -1, &gDvm.javaLangStringReady) != 0) {
+        LOGE("ERROR: initial string-ready state not 0 (%d)\n",
+            gDvm.javaLangStringReady);
+        return false;
+    }
 
     if (gDvm.classJavaLangString == NULL)
         gDvm.classJavaLangString =
@@ -94,7 +101,7 @@
     if (badValue)
         return false;
 
-    gDvm.javaLangStringReady = 1;
+    android_atomic_release_store(1, &gDvm.javaLangStringReady);
 
     return true;
 }
diff --git a/vm/jdwp/JdwpHandler.c b/vm/jdwp/JdwpHandler.c
index 8dd4cc6..69fe510 100644
--- a/vm/jdwp/JdwpHandler.c
+++ b/vm/jdwp/JdwpHandler.c
@@ -2106,15 +2106,14 @@
     JdwpError result = ERR_NONE;
     int i, respLen;
 
-    /*
-     * Activity from a debugger, not merely ddms.  Mark us as having an
-     * active debugger session, and zero out the last-activity timestamp.
-     */
     if (pHeader->cmdSet != kJDWPDdmCmdSet) {
+        /*
+         * Activity from a debugger, not merely ddms.  Mark us as having an
+         * active debugger session, and zero out the last-activity timestamp
+         * so waitForDebugger() doesn't return if we stall for a bit here.
+         */
         dvmDbgActive();
-
-        state->lastActivitySec = 0;
-        ANDROID_MEMBAR_FULL();
+        dvmQuasiAtomicSwap64(0, &state->lastActivityWhen);
     }
 
     /*
@@ -2192,12 +2191,7 @@
      * the initial setup.  Only update if this is a non-DDMS packet.
      */
     if (pHeader->cmdSet != kJDWPDdmCmdSet) {
-        long lastSec, lastMsec;
-
-        dvmJdwpGetNowMsec(&lastSec, &lastMsec);
-        state->lastActivityMsec = lastMsec;
-        ANDROID_MEMBAR_FULL();      // updating a 64-bit value
-        state->lastActivitySec = lastSec;
+        dvmQuasiAtomicSwap64(dvmJdwpGetNowMsec(), &state->lastActivityWhen);
     }
 
     /* tell the VM that GC is okay again */
diff --git a/vm/jdwp/JdwpMain.c b/vm/jdwp/JdwpMain.c
index 96747ad..1688e5e 100644
--- a/vm/jdwp/JdwpMain.c
+++ b/vm/jdwp/JdwpMain.c
@@ -349,59 +349,40 @@
     return state->debugThreadHandle;
 }
 
-#if 0
+
 /*
- * Wait until the debugger attaches.  Returns immediately if the debugger
- * is already attached.
+ * Support routines for waitForDebugger().
  *
- * If we return the instant the debugger connects, we run the risk of
- * executing code before the debugger has had a chance to configure
- * breakpoints or issue suspend calls.  It would be nice to just sit in
- * the suspended state, but most debuggers don't expect any threads to be
- * suspended when they attach.
+ * We can't have a trivial "waitForDebugger" function that returns the
+ * instant the debugger connects, because we run the risk of executing code
+ * before the debugger has had a chance to configure breakpoints or issue
+ * suspend calls.  It would be nice to just sit in the suspended state, but
+ * most debuggers don't expect any threads to be suspended when they attach.
  *
- * There's no event we can post to tell the debugger "we've stopped, and
- * we like it that way".  We could send a fake breakpoint, which should
+ * There's no JDWP event we can post to tell the debugger, "we've stopped,
+ * and we like it that way".  We could send a fake breakpoint, which should
  * cause the debugger to immediately send a resume, but the debugger might
  * send the resume immediately or might throw an exception of its own upon
  * receiving a breakpoint event that it didn't ask for.
  *
  * What we really want is a "wait until the debugger is done configuring
- * stuff" event.  We can get close with a "wait until the debugger has
- * been idle for a brief period", and we can do a mild approximation with
- * "just sleep for a second after it connects".
- *
- * We should be in THREAD_VMWAIT here, so we're not allowed to do anything
- * with objects because a GC could be in progress.
- *
- * NOTE: this trips as soon as something connects to the socket.  This
- * is no longer appropriate -- we don't want to return when DDMS connects.
- * We could fix this by polling for the first debugger packet, but we have
- * to watch out for disconnects.  If we're going to do polling, it's
- * probably best to do it at a higher level.
+ * stuff" event.  We can approximate this with a "wait until the debugger
+ * has been idle for a brief period".
  */
-void dvmJdwpWaitForDebugger(JdwpState* state)
-{
-    // no more
-}
-#endif
 
 /*
- * Get a notion of the current time, in milliseconds.  We leave it in
- * two 32-bit pieces.
+ * Get a notion of the current time, in milliseconds.
  */
-void dvmJdwpGetNowMsec(long* pSec, long* pMsec)
+s8 dvmJdwpGetNowMsec(void)
 {
 #ifdef HAVE_POSIX_CLOCKS
     struct timespec now;
     clock_gettime(CLOCK_MONOTONIC, &now);
-    *pSec = now.tv_sec;
-    *pMsec = now.tv_nsec / 1000000;
+    return now.tv_sec * 1000LL + now.tv_nsec / 1000000LL;
 #else
     struct timeval now;
     gettimeofday(&now, NULL);
-    *pSec = now.tv_sec;
-    *pMsec = now.tv_usec / 1000;
+    return now.tv_sec * 1000LL + now.tv_usec / 1000LL;
 #endif
 }
 
@@ -413,29 +394,23 @@
  */
 s8 dvmJdwpLastDebuggerActivity(JdwpState* state)
 {
-    long lastSec, lastMsec;
-    long nowSec, nowMsec;
+    if (!gDvm.debuggerActive) {
+        LOGD("dvmJdwpLastDebuggerActivity: no active debugger\n");
+        return -1;
+    }
 
-    /* these are volatile; lastSec becomes 0 during update */
-    lastSec = state->lastActivitySec;
-    lastMsec = state->lastActivityMsec;
+    s8 last = dvmQuasiAtomicRead64(&state->lastActivityWhen);
 
     /* initializing or in the middle of something? */
-    if (lastSec == 0 || state->lastActivitySec != lastSec) {
-        //LOGI("+++ last=busy\n");
+    if (last == 0) {
+        LOGV("+++ last=busy\n");
         return 0;
     }
 
-    /* get the current time *after* latching the "last" time */
-    dvmJdwpGetNowMsec(&nowSec, &nowMsec);
+    /* now get the current time */
+    s8 now = dvmJdwpGetNowMsec();
+    assert(now > last);
 
-    s8 last = (s8)lastSec * 1000 + lastMsec;
-    s8 now = (s8)nowSec * 1000 + nowMsec;
-
-    //LOGI("last is %ld.%ld --> %lld\n", lastSec, lastMsec, last);
-    //LOGI("now is  %ld.%ld --> %lld\n", nowSec, nowMsec, now);
-
-
-    //LOGI("+++ interval=%lld\n", now - last);
+    LOGV("+++ debugger interval=%lld\n", now - last);
     return now - last;
 }
diff --git a/vm/jdwp/JdwpPriv.h b/vm/jdwp/JdwpPriv.h
index 87c3fc7..85f9ec2 100644
--- a/vm/jdwp/JdwpPriv.h
+++ b/vm/jdwp/JdwpPriv.h
@@ -91,9 +91,8 @@
     pthread_mutex_t attachLock;
     pthread_cond_t  attachCond;
 
-    /* time of last debugger activity; "sec" zeroed while processing */
-    volatile long   lastActivitySec;
-    volatile long   lastActivityMsec;
+    /* time of last debugger activity, in milliseconds */
+    s8              lastActivityWhen;
 
     /* global counters and a mutex to protect them */
     u4              requestSerial;
@@ -130,7 +129,7 @@
 u4 dvmJdwpNextEventSerial(JdwpState* state);
 
 /* get current time, in msec */
-void dvmJdwpGetNowMsec(long* pSec, long* pMsec);
+s8 dvmJdwpGetNowMsec(void);
 
 
 /*
diff --git a/vm/native/sun_misc_Unsafe.c b/vm/native/sun_misc_Unsafe.c
index 5294368..b3c7141 100644
--- a/vm/native/sun_misc_Unsafe.c
+++ b/vm/native/sun_misc_Unsafe.c
@@ -84,8 +84,8 @@
     s4 newValue = args[5];
     volatile int32_t* address = (volatile int32_t*) (((u1*) obj) + offset);
 
-    // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-    int result = android_atomic_cmpxchg(expectedValue, newValue, address);
+    // Note: android_atomic_release_cas() returns 0 on success, not failure.
+    int result = android_atomic_release_cas(expectedValue, newValue, address);
 
     RETURN_BOOLEAN(result == 0);
 }
@@ -126,7 +126,7 @@
     int32_t* address = (int32_t*) (((u1*) obj) + offset);
 
     // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-    int result = android_atomic_cmpxchg((int32_t) expectedValue,
+    int result = android_atomic_release_cas((int32_t) expectedValue,
             (int32_t) newValue, address);
 
     RETURN_BOOLEAN(result == 0);
@@ -141,9 +141,10 @@
     // We ignore the this pointer in args[0].
     Object* obj = (Object*) args[1];
     s8 offset = GET_ARG_LONG(args, 2);
-    volatile s4* address = (volatile s4*) (((u1*) obj) + offset);
+    volatile int32_t* address = (volatile int32_t*) (((u1*) obj) + offset);
 
-    RETURN_INT(*address);
+    int32_t value = android_atomic_acquire_load(address);
+    RETURN_INT(value);
 }
 
 /*
@@ -156,9 +157,9 @@
     Object* obj = (Object*) args[1];
     s8 offset = GET_ARG_LONG(args, 2);
     s4 value = (s4) args[4];
-    volatile s4* address = (volatile s4*) (((u1*) obj) + offset);
+    volatile int32_t* address = (volatile int32_t*) (((u1*) obj) + offset);
 
-    *address = value;
+    android_atomic_release_store(value, address);
     RETURN_VOID();
 }
 
@@ -171,7 +172,7 @@
     // We ignore the this pointer in args[0].
     Object* obj = (Object*) args[1];
     s8 offset = GET_ARG_LONG(args, 2);
-    volatile s8* address = (volatile s8*) (((u1*) obj) + offset);
+    volatile int64_t* address = (volatile int64_t*) (((u1*) obj) + offset);
 
     RETURN_LONG(dvmQuasiAtomicRead64(address));
 }
@@ -186,7 +187,7 @@
     Object* obj = (Object*) args[1];
     s8 offset = GET_ARG_LONG(args, 2);
     s8 value = GET_ARG_LONG(args, 4);
-    volatile s8* address = (volatile s8*) (((u1*) obj) + offset);
+    volatile int64_t* address = (volatile int64_t*) (((u1*) obj) + offset);
 
     dvmQuasiAtomicSwap64(value, address);
     RETURN_VOID();
@@ -201,9 +202,9 @@
     // We ignore the this pointer in args[0].
     Object* obj = (Object*) args[1];
     s8 offset = GET_ARG_LONG(args, 2);
-    volatile Object** address = (volatile Object**) (((u1*) obj) + offset);
+    volatile int32_t* address = (volatile int32_t*) (((u1*) obj) + offset);
 
-    RETURN_PTR((void*) *address);
+    RETURN_PTR((Object*) android_atomic_acquire_load(address));
 }
 
 /*
@@ -217,9 +218,9 @@
     Object* obj = (Object*) args[1];
     s8 offset = GET_ARG_LONG(args, 2);
     Object* value = (Object*) args[4];
-    volatile Object** address = (volatile Object**) (((u1*) obj) + offset);
+    volatile int32_t* address = (volatile int32_t*) (((u1*) obj) + offset);
 
-    *address = value;
+    android_atomic_release_store((int32_t)value, address);
     RETURN_VOID();
 }