Implement 64-bit atomic ops for ARMv6K+.
This implements the "quasi-atomic" 64-bit operations using LDREXD/STREXD
when those instructions are available (e.g. our ARMv7-A devices).
Also, folded in an expanded version of AtomicSpeed.c that has been
floating around my hard drive for a while.
(This time, without Condition Code Roulette.)
Change-Id: I90803794994373e2c620be423c081f0da4469fdf
diff --git a/vm/Atomic.c b/vm/Atomic.c
index 4cc425e..6a5381c 100644
--- a/vm/Atomic.c
+++ b/vm/Atomic.c
@@ -21,11 +21,9 @@
/*
* Quasi-atomic 64-bit operations, for platforms that lack the real thing.
*
- * TODO: unify ARM/x86/sh implementations using the to-be-written
+ * TODO: unify ARMv6/x86/sh implementations using the to-be-written
* spin lock implementation. We don't want to rely on mutex innards,
* and it would be great if all platforms were running the same code.
- *
- * TODO: provide ARMv7-A-specific implementation using LDREXD/STREXD.
*/
#if defined(HAVE_MACOSX_IPC)
@@ -54,7 +52,7 @@
int64_t oldValue;
do {
oldValue = *addr;
- } while (android_quasiatomic_cmpxchg_64(oldValue, value, addr));
+ } while (dvmQuasiAtomicCas64(oldValue, value, addr));
return oldValue;
}
@@ -68,7 +66,54 @@
#define NEED_QUASIATOMICS 1
#elif __arm__
-// Most of the implementation is in atomic-android-arm.s.
+#include <machine/cpu-features.h>
+
+#ifdef __ARM_HAVE_LDREXD
+int64_t dvmQuasiAtomicSwap64(int64_t newvalue, volatile int64_t* addr)
+{
+ int64_t prev;
+ int status;
+ do {
+ __asm__ __volatile__ ("@ dvmQuasiAtomicSwap64\n"
+ "ldrexd %0, %H0, [%3]\n"
+ "strexd %1, %4, %H4, [%3]"
+ : "=&r" (prev), "=&r" (status), "+m"(*addr)
+ : "r" (addr), "r" (newvalue)
+ : "cc");
+ } while (__builtin_expect(status != 0, 0));
+ return prev;
+}
+
+int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
+ volatile int64_t* addr)
+{
+ int64_t prev;
+ int status;
+ do {
+ __asm__ __volatile__ ("@ dvmQuasiAtomicCas64\n"
+ "ldrexd %0, %H0, [%3]\n"
+ "mov %1, #0\n"
+ "teq %0, %4\n"
+ "teqeq %H0, %H4\n"
+ "strexdeq %1, %5, %H5, [%3]"
+ : "=&r" (prev), "=&r" (status), "+m"(*addr)
+ : "r" (addr), "Ir" (oldvalue), "r" (newvalue)
+ : "cc");
+ } while (__builtin_expect(status != 0, 0));
+ return prev != oldvalue;
+}
+
+int64_t dvmQuasiAtomicRead64(volatile int64_t* addr)
+{
+ int64_t value;
+ __asm__ __volatile__ ("@ dvmQuasiAtomicRead64\n"
+ "ldrexd %0, %H0, [%1]"
+ : "=&r" (value)
+ : "r" (addr));
+ return value;
+}
+
+#else
// on the device, we implement the 64-bit atomic operations through
// mutex locking. normally, this is bad because we must initialize
@@ -137,6 +182,8 @@
return result;
}
+#endif /*__ARM_HAVE_LDREXD*/
+
/*****************************************************************************/
#elif __sh__
#define NEED_QUASIATOMICS 1
@@ -232,4 +279,4 @@
return result;
}
-#endif
+#endif /*NEED_QUASIATOMICS*/
diff --git a/vm/Dvm.mk b/vm/Dvm.mk
index 8f421d4..451ed8e 100644
--- a/vm/Dvm.mk
+++ b/vm/Dvm.mk
@@ -99,7 +99,7 @@
LOCAL_SRC_FILES := \
AllocTracker.c \
- Atomic.c \
+ Atomic.c.arm \
AtomicCache.c \
CheckJni.c \
Ddm.c \
@@ -195,7 +195,7 @@
reflect/Annotation.c \
reflect/Proxy.c \
reflect/Reflect.c \
- test/AtomicSpeed.c \
+ test/AtomicTest.c.arm \
test/TestHash.c \
test/TestIndirectRefTable.c
diff --git a/vm/test/AtomicSpeed.c b/vm/test/AtomicSpeed.c
deleted file mode 100644
index db64f36..0000000
--- a/vm/test/AtomicSpeed.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2009 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Atomic operation performance test.
- */
-#include "Dalvik.h"
-
-//#define TRIVIAL_COMPARE /* do something simple instead of an atomic op */
-
-/*
- * Perform operation. Returns elapsed time.
- */
-u8 dvmTestAtomicSpeedSub(int repeatCount)
-{
- static int value = 7;
- int* valuePtr = &value;
- u8 start, end;
- int i;
-
-#ifdef TRIVIAL_COMPARE
- /* init to arg value so compiler can't pre-determine result */
- int j = repeatCount;
-#endif
-
- assert((repeatCount % 10) == 0);
-
- start = dvmGetRelativeTimeNsec();
-
- for (i = repeatCount / 10; i != 0; i--) {
-#ifdef TRIVIAL_COMPARE
- // integer add (Dream: 3.4ns -- THUMB has 10 adds, ARM condenses)
- j += i; j += i; j += i; j += i; j += i;
- j += i; j += i; j += i; j += i; j += i;
-#else
- // succeed 10x (Dream: 155.9ns)
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
- (void)android_atomic_release_cas(7, 7, valuePtr);
-
- // fail 10x (Dream: 158.5ns)
- /*
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- (void)android_atomic_release_cas(6, 7, valuePtr);
- */
-#endif
- }
-
- end = dvmGetRelativeTimeNsec();
-
-#ifdef TRIVIAL_COMPARE
- /* use value so compiler can't eliminate it */
- dvmFprintf(stdout, "%d\n", j);
-#else
- dvmFprintf(stdout, ".");
- fflush(stdout); // not quite right if they intercepted fprintf
-#endif
- return end - start;
-}
-
-/*
- * Control loop.
- */
-bool dvmTestAtomicSpeed(void)
-{
- static const int kIterations = 10;
- static const int kRepeatCount = 5 * 1000 * 1000;
- static const int kDelay = 500 * 1000;
- u8 results[kIterations];
- int i;
-
- for (i = 0; i < kIterations; i++) {
- results[i] = dvmTestAtomicSpeedSub(kRepeatCount);
- usleep(kDelay);
- }
-
- dvmFprintf(stdout, "\n");
- dvmFprintf(stdout, "Atomic speed test results (%d per iteration):\n",
- kRepeatCount);
- for (i = 0; i < kIterations; i++) {
- dvmFprintf(stdout,
- " %2d: %.3fns\n", i, (double) results[i] / kRepeatCount);
- }
-
- return true;
-}
diff --git a/vm/test/AtomicTest.c b/vm/test/AtomicTest.c
new file mode 100644
index 0000000..ce018bb
--- /dev/null
+++ b/vm/test/AtomicTest.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This provides a handful of correctness and speed tests on our atomic
+ * operations.
+ *
+ * This doesn't really belong here, but we currently lack a better place
+ * for it, so this will do for now.
+ */
+#include "Dalvik.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <cutils/atomic.h>
+#ifdef __arm__
+# include <machine/cpu-features.h>
+#endif
+
+#define USE_ATOMIC 1
+#define THREAD_COUNT 10
+#define ITERATION_COUNT 500000
+
+#ifdef HAVE_ANDROID_OS
+/*#define TEST_BIONIC 1*/
+#endif
+
+
+#ifdef TEST_BIONIC
+extern int __atomic_cmpxchg(int old, int _new, volatile int *ptr);
+extern int __atomic_swap(int _new, volatile int *ptr);
+extern int __atomic_dec(volatile int *ptr);
+extern int __atomic_inc(volatile int *ptr);
+#endif
+
+static pthread_mutex_t waitLock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t waitCond = PTHREAD_COND_INITIALIZER;
+
+static volatile int threadsStarted = 0;
+
+/* results */
+static int incTest = 0;
+static int decTest = 0;
+static int addTest = 0;
+static int andTest = 0;
+static int orTest = 0;
+static int casTest = 0;
+static int failingCasTest = 0;
+static int swapTest = 0;
+static int64_t wideCasTest = 0x6600000077000000LL;
+
+/*
+ * Get a relative time value.
+ */
+static int64_t getRelativeTimeNsec(void)
+{
+#define HAVE_POSIX_CLOCKS
+#ifdef HAVE_POSIX_CLOCKS
+ struct timespec now;
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ return (int64_t) now.tv_sec*1000000000LL + now.tv_nsec;
+#else
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ return (int64_t) now.tv_sec*1000000000LL + now.tv_usec * 1000LL;
+#endif
+}
+
+
+/*
+ * Non-atomic implementations, for comparison.
+ *
+ * If these get inlined the compiler may figure out what we're up to and
+ * completely elide the operations.
+ */
+static void incr(void) __attribute__((noinline));
+static void decr(void) __attribute__((noinline));
+static void add(int addVal) __attribute__((noinline));
+static int compareAndSwap(int oldVal, int newVal, int* addr) __attribute__((noinline));
+static int compareAndSwapWide(int64_t oldVal, int64_t newVal, int64_t* addr) __attribute__((noinline));
+
+static void incr(void)
+{
+ incTest++;
+}
+static void decr(void)
+{
+ decTest--;
+}
+static void add(int32_t addVal)
+{
+ addTest += addVal;
+}
+static int compareAndSwap(int32_t oldVal, int32_t newVal, int32_t* addr)
+{
+ if (*addr == oldVal) {
+ *addr = newVal;
+ return 0;
+ }
+ return 1;
+}
+static int compareAndSwapWide(int64_t oldVal, int64_t newVal, int64_t* addr)
+{
+ if (*addr == oldVal) {
+ *addr = newVal;
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Exercise several of the atomic ops.
+ */
+static void doAtomicTest(int num)
+{
+ int addVal = (num & 0x01) + 1;
+
+ int i;
+ for (i = 0; i < ITERATION_COUNT; i++) {
+ if (USE_ATOMIC) {
+ android_atomic_inc(&incTest);
+ android_atomic_dec(&decTest);
+ android_atomic_add(addVal, &addTest);
+
+ int val;
+ do {
+ val = casTest;
+ } while (android_atomic_release_cas(val, val+3, &casTest) != 0);
+ do {
+ val = casTest;
+ } while (android_atomic_acquire_cas(val, val-1, &casTest) != 0);
+
+ int64_t wval;
+ do {
+ wval = dvmQuasiAtomicRead64(&wideCasTest);
+ } while (dvmQuasiAtomicCas64(wval,
+ wval + 0x0000002000000001LL, &wideCasTest) != 0);
+ do {
+ wval = dvmQuasiAtomicRead64(&wideCasTest);
+ } while (dvmQuasiAtomicCas64(wval,
+ wval - 0x0000002000000001LL, &wideCasTest) != 0);
+ } else {
+ incr();
+ decr();
+ add(addVal);
+
+ int val;
+ do {
+ val = casTest;
+ } while (compareAndSwap(val, val+3, &casTest) != 0);
+ do {
+ val = casTest;
+ } while (compareAndSwap(val, val-1, &casTest) != 0);
+
+ int64_t wval;
+ do {
+ wval = wideCasTest;
+ } while (compareAndSwapWide(wval,
+ wval + 0x0000002000000001LL, &wideCasTest) != 0);
+ do {
+ wval = wideCasTest;
+ } while (compareAndSwapWide(wval,
+ wval - 0x0000002000000001LL, &wideCasTest) != 0);
+ }
+ }
+}
+
+/*
+ * Entry point for multi-thread test.
+ */
+static void* atomicTest(void* arg)
+{
+ pthread_mutex_lock(&waitLock);
+ threadsStarted++;
+ pthread_cond_wait(&waitCond, &waitLock);
+ pthread_mutex_unlock(&waitLock);
+
+ doAtomicTest((int) arg);
+
+ return NULL;
+}
+
+/* lifted from a VM test */
+static int64_t testAtomicSpeedSub(int repeatCount)
+{
+ static int value = 7;
+ int* valuePtr = &value;
+ int64_t start, end;
+ int i;
+
+ start = getRelativeTimeNsec();
+
+ for (i = repeatCount / 10; i != 0; i--) {
+ if (USE_ATOMIC) {
+ // succeed 10x
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ (void) android_atomic_release_cas(7, 7, valuePtr);
+ } else {
+ // succeed 10x
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ compareAndSwap(7, 7, valuePtr);
+ }
+ }
+
+ end = getRelativeTimeNsec();
+
+ dvmFprintf(stdout, ".");
+ fflush(stdout);
+ return end - start;
+}
+
+static void testAtomicSpeed(void)
+{
+ static const int kIterations = 10;
+ static const int kRepeatCount = 5 * 1000 * 1000;
+ static const int kDelay = 50 * 1000;
+ int64_t results[kIterations];
+ int i;
+
+ for (i = 0; i < kIterations; i++) {
+ results[i] = testAtomicSpeedSub(kRepeatCount);
+ usleep(kDelay);
+ }
+
+ dvmFprintf(stdout, "\n");
+ dvmFprintf(stdout, "%s speed test results (%d per iteration):\n",
+ USE_ATOMIC ? "Atomic" : "Non-atomic", kRepeatCount);
+ for (i = 0; i < kIterations; i++) {
+ dvmFprintf(stdout,
+ " %2d: %.3fns\n", i, (double) results[i] / kRepeatCount);
+ }
+}
+
+/*
+ * Start tests, show results.
+ */
+bool dvmTestAtomicSpeed(void)
+{
+ pthread_t threads[THREAD_COUNT];
+ void *(*startRoutine)(void*) = atomicTest;
+ int64_t startWhen, endWhen;
+
+#if defined(__ARM_ARCH__)
+ dvmFprintf(stdout, "__ARM_ARCH__ is %d\n", __ARM_ARCH__);
+#endif
+#if defined(ANDROID_SMP)
+ dvmFprintf(stdout, "ANDROID_SMP is %d\n", ANDROID_SMP);
+#endif
+ dvmFprintf(stdout, "Creating threads\n");
+
+ int i;
+ for (i = 0; i < THREAD_COUNT; i++) {
+ void* arg = (void*) i;
+ if (pthread_create(&threads[i], NULL, startRoutine, arg) != 0) {
+ dvmFprintf(stderr, "thread create failed\n");
+ }
+ }
+
+ /* wait for all the threads to reach the starting line */
+ while (1) {
+ pthread_mutex_lock(&waitLock);
+ if (threadsStarted == THREAD_COUNT) {
+ dvmFprintf(stdout, "Starting test\n");
+ startWhen = getRelativeTimeNsec();
+ pthread_cond_broadcast(&waitCond);
+ pthread_mutex_unlock(&waitLock);
+ break;
+ }
+ pthread_mutex_unlock(&waitLock);
+ usleep(100000);
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ void* retval;
+ if (pthread_join(threads[i], &retval) != 0) {
+ dvmFprintf(stderr, "thread join (%d) failed\n", i);
+ }
+ }
+
+ endWhen = getRelativeTimeNsec();
+ dvmFprintf(stdout, "All threads stopped, time is %.6fms\n",
+ (endWhen - startWhen) / 1000000.0);
+
+ /*
+ * Show results; expecting:
+ *
+ * incTest = 5000000
+ * decTest = -5000000
+ * addTest = 7500000
+ * casTest = 10000000
+ * wideCasTest = 0x6600000077000000
+ */
+ dvmFprintf(stdout, "incTest = %d\n", incTest);
+ dvmFprintf(stdout, "decTest = %d\n", decTest);
+ dvmFprintf(stdout, "addTest = %d\n", addTest);
+ dvmFprintf(stdout, "casTest = %d\n", casTest);
+ dvmFprintf(stdout, "wideCasTest = 0x%llx\n", wideCasTest);
+
+ /* do again, serially (SMP check) */
+ startWhen = getRelativeTimeNsec();
+ for (i = 0; i < THREAD_COUNT; i++) {
+ doAtomicTest(i);
+ }
+ endWhen = getRelativeTimeNsec();
+ dvmFprintf(stdout, "Same iterations done serially: time is %.6fms\n",
+ (endWhen - startWhen) / 1000000.0);
+
+ /*
+ * Hard to do a meaningful thrash test on these, so just do a simple
+ * function test.
+ */
+ andTest = 0xffd7fa96;
+ orTest = 0x122221ff;
+ swapTest = 0x11111111;
+ android_atomic_and(0xfffdaf96, &andTest);
+ android_atomic_or(0xdeaaeb00, &orTest);
+ int oldSwap = android_atomic_acquire_swap(0x22222222, &swapTest);
+ int oldSwap2 = android_atomic_release_swap(0x33333333, &swapTest);
+ if (android_atomic_release_cas(failingCasTest+1, failingCasTest-1,
+ &failingCasTest) == 0)
+ dvmFprintf(stdout, "failing test did not fail!\n");
+
+ dvmFprintf(stdout, "andTest = 0x%x\n", andTest);
+ dvmFprintf(stdout, "orTest = 0x%x\n", orTest);
+ dvmFprintf(stdout, "acquire swapTest = 0x%x -> 0x%x\n", oldSwap, oldSwap2);
+ dvmFprintf(stdout, "release swapTest = 0x%x -> 0x%x\n", oldSwap2, swapTest);
+ dvmFprintf(stdout, "failingCasTest = %d\n", failingCasTest);
+
+#ifdef TEST_BIONIC
+ /*
+ * Quick function test on the bionic ops.
+ */
+ int prev;
+ int tester = 7;
+ prev = __atomic_inc(&tester);
+ __atomic_inc(&tester);
+ __atomic_inc(&tester);
+ dvmFprintf(stdout, "bionic 3 inc: %d -> %d\n", prev, tester);
+ prev = __atomic_dec(&tester);
+ __atomic_dec(&tester);
+ __atomic_dec(&tester);
+ dvmFprintf(stdout, "bionic 3 dec: %d -> %d\n", prev, tester);
+ prev = __atomic_swap(27, &tester);
+ dvmFprintf(stdout, "bionic swap: %d -> %d\n", prev, tester);
+ int swapok = __atomic_cmpxchg(27, 72, &tester);
+ dvmFprintf(stdout, "bionic cmpxchg: %d (%d)\n", tester, swapok);
+#endif
+
+ testAtomicSpeed();
+
+ return 0;
+}