Atomic/SMP update.

Moved quasiatomic 64-bit operations in here.  They still need work.

Use the new libcutils atomic-inline header for our memory barriers.
Adjust makefiles to set SMP definition appropriately.

Change-Id: Id2ab2123724bb0baeb32f862b5568392aba35a59
diff --git a/dexopt/Android.mk b/dexopt/Android.mk
index 8637073..eb486c8 100644
--- a/dexopt/Android.mk
+++ b/dexopt/Android.mk
@@ -35,6 +35,12 @@
 		libssl \
 		libdvm
 
+ifeq ($(TARGET_CPU_SMP),true)
+    LOCAL_CFLAGS += -DANDROID_SMP=1
+else
+    LOCAL_CFLAGS += -DANDROID_SMP=0
+endif
+
 LOCAL_MODULE := dexopt
 
 include $(BUILD_EXECUTABLE)
diff --git a/vm/Android.mk b/vm/Android.mk
index 0ef00f5..45f15e5 100644
--- a/vm/Android.mk
+++ b/vm/Android.mk
@@ -36,6 +36,13 @@
     WITH_JIT := false
 endif
 
+ifeq ($(TARGET_CPU_SMP),true)
+    target_smp_flag := -DANDROID_SMP=1
+else
+    target_smp_flag := -DANDROID_SMP=0
+endif
+host_smp_flag := -DANDROID_SMP=1
+
 # Build the installed version (libdvm.so) first
 include $(LOCAL_PATH)/ReconfigureDvm.mk
 
@@ -45,6 +52,7 @@
 endif
 LOCAL_MODULE_TAGS := user
 LOCAL_MODULE := libdvm
+LOCAL_CFLAGS += $(target_smp_flag)
 include $(BUILD_SHARED_LIBRARY)
 
 # If WITH_JIT is configured, build multiple versions of libdvm.so to facilitate
@@ -57,7 +65,7 @@
 
     # Enable assertions and JIT-tuning
     LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \
-				    -DWITH_JIT_TUNING -DJIT_STATS
+                    -DWITH_JIT_TUNING -DJIT_STATS $(target_smp_flag)
     LOCAL_MODULE := libdvm_assert
     include $(BUILD_SHARED_LIBRARY)
 
@@ -67,7 +75,7 @@
 
     # Enable assertions and JIT self-verification
     LOCAL_CFLAGS += -UNDEBUG -DDEBUG=1 -DLOG_NDEBUG=1 -DWITH_DALVIK_ASSERT \
-					-DWITH_SELF_VERIFICATION
+                    -DWITH_SELF_VERIFICATION $(target_smp_flag)
     LOCAL_MODULE := libdvm_sv
     include $(BUILD_SHARED_LIBRARY)
 
@@ -76,6 +84,7 @@
     WITH_JIT := false
     include $(LOCAL_PATH)/ReconfigureDvm.mk
 
+    LOCAL_CFLAGS += $(target_smp_flag)
     LOCAL_MODULE := libdvm_interp
     include $(BUILD_SHARED_LIBRARY)
 
@@ -117,6 +126,7 @@
             $(patsubst libffi, ,$(LOCAL_SHARED_LIBRARIES))
     endif
 
+    LOCAL_CFLAGS += $(host_smp_flag)
     LOCAL_MODULE := libdvm-host
 
     include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/vm/Atomic.c b/vm/Atomic.c
new file mode 100644
index 0000000..859a030
--- /dev/null
+++ b/vm/Atomic.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+
+#include <cutils/atomic.h>
+
+/*
+ * Quasi-atomic 64-bit operations, for platforms that lack the real thing.
+ *
+ * TODO: unify ARM/x86/sh implementations using the to-be-written
+ * spin lock implementation.  We don't want to rely on mutex innards,
+ * and it would be great if all platforms were running the same code.
+ */
+
+#if defined(HAVE_MACOSX_IPC)
+
+#include <libkern/OSAtomic.h>
+
+#if defined(__ppc__)        \
+    || defined(__PPC__)     \
+    || defined(__powerpc__) \
+    || defined(__powerpc)   \
+    || defined(__POWERPC__) \
+    || defined(_M_PPC)      \
+    || defined(__PPC)
+#define NEED_QUASIATOMICS 1
+#else
+
+int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
+        volatile int64_t* addr) {
+    return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue,
+            (int64_t*)addr) == 0;
+}
+
+int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
+    int64_t oldValue;
+    do {
+        oldValue = *addr;
+    } while (android_quasiatomic_cmpxchg_64(oldValue, value, addr));
+    return oldValue;
+}
+
+int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
+    return OSAtomicAdd64Barrier(0, addr);
+}
+#endif
+
+#elif defined(__i386__) || defined(__x86_64__)
+#define NEED_QUASIATOMICS 1
+
+#elif __arm__
+// Most of the implementation is in atomic-android-arm.s.
+
+// on the device, we implement the 64-bit atomic operations through
+// mutex locking. normally, this is bad because we must initialize
+// a pthread_mutex_t before being able to use it, and this means
+// having to do an initialization check on each function call, and
+// that's where really ugly things begin...
+//
+// BUT, as a special twist, we take advantage of the fact that in our
+// pthread library, a mutex is simply a volatile word whose value is always
+// initialized to 0. In other words, simply declaring a static mutex
+// object initializes it !
+//
+// another twist is that we use a small array of mutexes to dispatch
+// the contention locks from different memory addresses
+//
+
+#include <pthread.h>
+
+#define  SWAP_LOCK_COUNT  32U
+static pthread_mutex_t  _swap_locks[SWAP_LOCK_COUNT];
+
+#define  SWAP_LOCK(addr)   \
+   &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
+
+
+int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
+    int64_t oldValue;
+    pthread_mutex_t*  lock = SWAP_LOCK(addr);
+
+    pthread_mutex_lock(lock);
+
+    oldValue = *addr;
+    *addr    = value;
+
+    pthread_mutex_unlock(lock);
+    return oldValue;
+}
+
+int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
+        volatile int64_t* addr) {
+    int result;
+    pthread_mutex_t*  lock = SWAP_LOCK(addr);
+
+    pthread_mutex_lock(lock);
+
+    if (*addr == oldvalue) {
+        *addr  = newvalue;
+        result = 0;
+    } else {
+        result = 1;
+    }
+    pthread_mutex_unlock(lock);
+    return result;
+}
+
+int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
+    int64_t result;
+    pthread_mutex_t*  lock = SWAP_LOCK(addr);
+
+    pthread_mutex_lock(lock);
+    result = *addr;
+    pthread_mutex_unlock(lock);
+    return result;
+}
+
+/*****************************************************************************/
+#elif __sh__
+#define NEED_QUASIATOMICS 1
+
+#else
+#error "Unsupported atomic operations for this platform"
+#endif
+
+
+#if NEED_QUASIATOMICS
+
+/* Note that a spinlock is *not* a good idea in general
+ * since they can introduce subtle issues. For example,
+ * a real-time thread trying to acquire a spinlock already
+ * acquired by another thread will never yeld, making the
+ * CPU loop endlessly!
+ *
+ * However, this code is only used on the Linux simulator
+ * so it's probably ok for us.
+ *
+ * The alternative is to use a pthread mutex, but
+ * these must be initialized before being used, and
+ * then you have the problem of lazily initializing
+ * a mutex without any other synchronization primitive.
+ */
+
+/* global spinlock for all 64-bit quasiatomic operations */
+static int32_t quasiatomic_spinlock = 0;
+
+int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
+        volatile int64_t* addr) {
+    int result;
+
+    while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
+#ifdef HAVE_WIN32_THREADS
+        Sleep(0);
+#else
+        sched_yield();
+#endif
+    }
+
+    if (*addr == oldvalue) {
+        *addr = newvalue;
+        result = 0;
+    } else {
+        result = 1;
+    }
+
+    android_atomic_swap(0, &quasiatomic_spinlock);
+
+    return result;
+}
+
+int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
+    int64_t result;
+
+    while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
+#ifdef HAVE_WIN32_THREADS
+        Sleep(0);
+#else
+        sched_yield();
+#endif
+    }
+
+    result = *addr;
+    android_atomic_swap(0, &quasiatomic_spinlock);
+
+    return result;
+}
+
+int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
+    int64_t result;
+
+    while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
+#ifdef HAVE_WIN32_THREADS
+        Sleep(0);
+#else
+        sched_yield();
+#endif
+    }
+
+    result = *addr;
+    *addr = value;
+    android_atomic_swap(0, &quasiatomic_spinlock);
+
+    return result;
+}
+
+#endif
+
diff --git a/vm/Atomic.h b/vm/Atomic.h
index bc0203c..aa2f103 100644
--- a/vm/Atomic.h
+++ b/vm/Atomic.h
@@ -13,31 +13,26 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 /*
  * Atomic operations
  */
 #ifndef _DALVIK_ATOMIC
 #define _DALVIK_ATOMIC
 
-#include <utils/Atomic.h>       /* use common Android atomic ops */
+#include <cutils/atomic.h>          /* use common Android atomic ops */
+#include <cutils/atomic-inline.h>   /* and some uncommon ones */
 
 /*
- * Memory barrier.  Guarantee that register-resident variables
- * are flushed to memory, and guarantee that instructions before
- * the barrier do not get reordered to appear past it.
- *
- * 'asm volatile ("":::"memory")' is probably overkill, but it's correct.
- * There may be a way to do it that doesn't flush every single register.
- *
- * TODO: look into the wmb() family on Linux and equivalents on other systems.
+ * Full memory barrier.  Ensures compiler ordering and SMP behavior.
  */
-#define MEM_BARRIER()   do { asm volatile ("":::"memory"); } while (0)
+#define MEM_BARRIER()   android_membar_full()
 
 /*
- * Atomic compare-and-swap macro.
+ * 32-bit atomic compare-and-swap macro.  Performs a memory barrier
+ * before the swap (store-release).
  *
- * If *_addr equals "_old", replace it with "_new" and return 1.  Otherwise
- * return 0.  (e.g. x86 "cmpxchgl" instruction.)
+ * If *_addr equals "_old", replace it with "_new" and return nonzero.
  *
  * Underlying function is currently declared:
  * int android_atomic_cmpxchg(int32_t old, int32_t new, volatile int32_t* addr)
@@ -45,4 +40,23 @@
 #define ATOMIC_CMP_SWAP(_addr, _old, _new) \
             (android_atomic_cmpxchg((_old), (_new), (_addr)) == 0)
 
+
+/*
+ * NOTE: Two "quasiatomic" operations on the exact same memory address
+ * are guaranteed to operate atomically with respect to each other,
+ * but no guarantees are made about quasiatomic operations mixed with
+ * non-quasiatomic operations on the same address, nor about
+ * quasiatomic operations that are performed on partially-overlapping
+ * memory.
+ */
+
+/*
+ * TODO: rename android_quasiatomic_* to dvmQuasiatomic*.  Don't want to do
+ * that yet due to branch merge issues.
+ */
+int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr);
+int64_t android_quasiatomic_read_64(volatile int64_t* addr);
+int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
+        volatile int64_t* addr);
+
 #endif /*_DALVIK_ATOMIC*/
diff --git a/vm/Dvm.mk b/vm/Dvm.mk
index baf41c6..0a5a4fb 100644
--- a/vm/Dvm.mk
+++ b/vm/Dvm.mk
@@ -98,6 +98,7 @@
 
 LOCAL_SRC_FILES := \
 	AllocTracker.c \
+	Atomic.c \
 	AtomicCache.c \
 	CheckJni.c \
 	Ddm.c \
diff --git a/vm/Init.c b/vm/Init.c
index 6630395..e7052fc 100644
--- a/vm/Init.c
+++ b/vm/Init.c
@@ -192,6 +192,9 @@
 #if defined(WITH_SELF_VERIFICATION)
         " self_verification"
 #endif
+#if ANDROID_SMP != 0
+        " smp"
+#endif
     );
 #ifdef DVM_SHOW_EXCEPTION
     dvmFprintf(stderr, " show_exception=%d", DVM_SHOW_EXCEPTION);