Fix memory barriers (Issue 3338450)

Add extra memory barrier on volatile stores.

Change-Id: Id4a4750cdfc910eda2f0b44ead0af2a569b5735e
diff --git a/vm/Atomic.cpp b/vm/Atomic.cpp
index 4473c85..927a926 100644
--- a/vm/Atomic.cpp
+++ b/vm/Atomic.cpp
@@ -47,7 +47,9 @@
             (int64_t*)addr) == 0;
 }
 
-int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr)
+
+static inline int64_t dvmQuasiAtomicSwap64Body(int64_t value,
+                                               volatile int64_t* addr)
 {
     int64_t oldValue;
     do {
@@ -56,6 +58,21 @@
     return oldValue;
 }
 
+int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr)
+{
+    return dvmQuasiAtomicSwap64Body(value, adddr);
+}
+
+int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr)
+{
+    int64_t oldValue;
+    ANDROID_MEMBAR_STORE();
+    oldValue = dvmQuasiAtomicSwap64Body(value, addr);
+    /* TUNING: barriers can be avoided on some architectures */
+    ANDROID_MEMBAR_FULL();
+    return oldValue;
+}
+
 int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr)
 {
     return OSAtomicAdd64Barrier(0, addr);
@@ -69,7 +86,8 @@
 #include <machine/cpu-features.h>
 
 #ifdef __ARM_HAVE_LDREXD
-int64_t dvmQuasiAtomicSwap64(int64_t newvalue, volatile int64_t* addr)
+static inline int64_t dvmQuasiAtomicSwap64Body(int64_t newvalue,
+                                               volatile int64_t* addr)
 {
     int64_t prev;
     int status;
@@ -84,6 +102,20 @@
     return prev;
 }
 
+int64_t dvmQuasiAtomicSwap64(int64_t newvalue, volatile int64_t* addr)
+{
+    return dvmQuasiAtomicSwap64Body(newvalue, addr);
+}
+
+int64_t dvmQuasiAtomicSwap64Sync(int64_t newvalue, volatile int64_t* addr)
+{
+    int64_t prev;
+    ANDROID_MEMBAR_STORE();
+    prev = dvmQuasiAtomicSwap64Body(newvalue, addr);
+    ANDROID_MEMBAR_FULL();
+    return prev;
+}
+
 int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
     volatile int64_t* addr)
 {
@@ -153,6 +185,12 @@
     return oldValue;
 }
 
+/* Same as dvmQuasiAtomicSwap64 - mutex handles barrier */
+int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr)
+{
+    return dvmQuasiAtomicSwap64(value, addr);
+}
+
 int dvmQuasiAtomicCas64(int64_t oldvalue, int64_t newvalue,
     volatile int64_t* addr)
 {
@@ -279,4 +317,10 @@
     return result;
 }
 
+/* Same as dvmQuasiAtomicSwap64 - syscall handles barrier */
+int64_t dvmQuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr)
+{
+    return dvmQuasiAtomicSwap64(value, addr);
+}
+
 #endif /*NEED_QUASIATOMICS*/
diff --git a/vm/Atomic.h b/vm/Atomic.h
index c7e965f..6f7100b 100644
--- a/vm/Atomic.h
+++ b/vm/Atomic.h
@@ -31,7 +31,7 @@
  * quasiatomic operations that are performed on partially-overlapping
  * memory.
  *
- * None of these provide a memory barrier.
+ * Only the "Sync" versions of these provide a memory barrier.
  */
 
 /*
@@ -41,6 +41,13 @@
 extern "C" int64_t dvmQuasiAtomicSwap64(int64_t value, volatile int64_t* addr);
 
 /*
+ * Swap the 64-bit value at "addr" with "value".  Returns the previous
+ * value.  Provides memory barriers.
+ */
+extern "C" int64_t dvmQuasiAtomicSwap64Sync(int64_t value,
+                                            volatile int64_t* addr);
+
+/*
  * Read the 64-bit value at "addr".
  */
 extern "C" int64_t dvmQuasiAtomicRead64(volatile const int64_t* addr);
diff --git a/vm/compiler/codegen/arm/CodegenDriver.cpp b/vm/compiler/codegen/arm/CodegenDriver.cpp
index 75b86a4..85ecb78 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.cpp
+++ b/vm/compiler/codegen/arm/CodegenDriver.cpp
@@ -384,11 +384,14 @@
                  NULL);/* null object? */
 
     if (isVolatile) {
-        dvmCompilerGenMemBarrier(cUnit, kSY);
+        dvmCompilerGenMemBarrier(cUnit, kST);
     }
     HEAP_ACCESS_SHADOW(true);
     storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
     HEAP_ACCESS_SHADOW(false);
+    if (isVolatile) {
+        dvmCompilerGenMemBarrier(cUnit, kSY);
+    }
     if (isObject) {
         /* NOTE: marking card based on object head */
         markCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
@@ -1757,6 +1760,9 @@
                 objHead = dvmCompilerAllocTemp(cUnit);
                 loadWordDisp(cUnit, tReg, OFFSETOF_MEMBER(Field, clazz), objHead);
             }
+            if (isVolatile) {
+                dvmCompilerGenMemBarrier(cUnit, kST);
+            }
             HEAP_ACCESS_SHADOW(true);
             storeWordDisp(cUnit, tReg, valOffset ,rlSrc.lowReg);
             dvmCompilerFreeTemp(cUnit, tReg);
diff --git a/vm/mterp/armv5te/OP_IPUT.S b/vm/mterp/armv5te/OP_IPUT.S
index 13836fb..72c05c0 100644
--- a/vm/mterp/armv5te/OP_IPUT.S
+++ b/vm/mterp/armv5te/OP_IPUT.S
@@ -1,4 +1,4 @@
-%default { "store":"str", "barrier":"@ no-op ", "sqnum":"0" }
+%default { "store":"str", "postbarrier":"@ no-op ", "prebarrier":"@ no-op ", "sqnum":"0" }
 %verify "executed"
 %verify "null object"
 %verify "field already resolved"
@@ -42,6 +42,7 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     $store  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    $postbarrier
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/armv5te/OP_IPUT_JUMBO.S b/vm/mterp/armv5te/OP_IPUT_JUMBO.S
index 2c11dac..21f154c 100644
--- a/vm/mterp/armv5te/OP_IPUT_JUMBO.S
+++ b/vm/mterp/armv5te/OP_IPUT_JUMBO.S
@@ -1,4 +1,4 @@
-%default { "store":"str", "barrier":"@ no-op ", "sqnum":"0" }
+%default { "store":"str", "postbarrier":"@ no-op ", "prebarrier":"@ no-op ", "sqnum":"0" }
 %verify "executed"
 %verify "null object"
 %verify "field already resolved"
@@ -52,6 +52,7 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                         @ releasing store
     $store  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    $postbarrier
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/armv5te/OP_IPUT_OBJECT.S b/vm/mterp/armv5te/OP_IPUT_OBJECT.S
index 7cab4c5..a514f04 100644
--- a/vm/mterp/armv5te/OP_IPUT_OBJECT.S
+++ b/vm/mterp/armv5te/OP_IPUT_OBJECT.S
@@ -1,4 +1,4 @@
-%default { "barrier":"@ no-op ", "sqnum":"0" }
+%default { "postbarrier":"@ no-op ", "prebarrier":"@ no-op ", "sqnum":"0" }
 %verify "executed"
 %verify "null object"
 %verify "field already resolved"
@@ -43,8 +43,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    $postbarrier
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/armv5te/OP_IPUT_OBJECT_JUMBO.S b/vm/mterp/armv5te/OP_IPUT_OBJECT_JUMBO.S
index 11b6d68..6d1e6a7 100644
--- a/vm/mterp/armv5te/OP_IPUT_OBJECT_JUMBO.S
+++ b/vm/mterp/armv5te/OP_IPUT_OBJECT_JUMBO.S
@@ -1,4 +1,4 @@
-%default { "barrier":"@ no-op ", "sqnum":"0" }
+%default { "postbarrier":"@ no-op ", "prebarrier":"@ no-op ", "sqnum":"0" }
 %verify "executed"
 %verify "null object"
 %verify "field already resolved"
@@ -50,8 +50,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    $postbarrier
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE.S b/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE.S
index b4d24e7..317c5b2 100644
--- a/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE.S
+++ b/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_IPUT_OBJECT.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_IPUT_OBJECT.S" {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE_JUMBO.S b/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE_JUMBO.S
index 0d27fe5..4ae11ae 100644
--- a/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE_JUMBO.S
+++ b/vm/mterp/armv5te/OP_IPUT_OBJECT_VOLATILE_JUMBO.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_IPUT_OBJECT_JUMBO.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_IPUT_OBJECT_JUMBO.S"  {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_IPUT_VOLATILE.S b/vm/mterp/armv5te/OP_IPUT_VOLATILE.S
index ba3f615..1a7a098 100644
--- a/vm/mterp/armv5te/OP_IPUT_VOLATILE.S
+++ b/vm/mterp/armv5te/OP_IPUT_VOLATILE.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_IPUT.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_IPUT.S" {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_IPUT_VOLATILE_JUMBO.S b/vm/mterp/armv5te/OP_IPUT_VOLATILE_JUMBO.S
index af7d9eb..0a7e2fe 100644
--- a/vm/mterp/armv5te/OP_IPUT_VOLATILE_JUMBO.S
+++ b/vm/mterp/armv5te/OP_IPUT_VOLATILE_JUMBO.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_IPUT_JUMBO.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_IPUT_JUMBO.S"  {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_IPUT_WIDE.S b/vm/mterp/armv5te/OP_IPUT_WIDE.S
index d9a6fc3..ec787f0 100644
--- a/vm/mterp/armv5te/OP_IPUT_WIDE.S
+++ b/vm/mterp/armv5te/OP_IPUT_WIDE.S
@@ -39,7 +39,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     $volatile
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
diff --git a/vm/mterp/armv5te/OP_IPUT_WIDE_JUMBO.S b/vm/mterp/armv5te/OP_IPUT_WIDE_JUMBO.S
index a371f1c..d5e557a 100644
--- a/vm/mterp/armv5te/OP_IPUT_WIDE_JUMBO.S
+++ b/vm/mterp/armv5te/OP_IPUT_WIDE_JUMBO.S
@@ -48,7 +48,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     $volatile
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
diff --git a/vm/mterp/armv5te/OP_SPUT.S b/vm/mterp/armv5te/OP_SPUT.S
index 52e6ee9..986f06a 100644
--- a/vm/mterp/armv5te/OP_SPUT.S
+++ b/vm/mterp/armv5te/OP_SPUT.S
@@ -1,4 +1,4 @@
-%default { "barrier":"@ no-op " }
+%default { "prebarrier":"@ no-op", "postbarrier":"@ no-op " }
 %verify "executed"
 %verify "field already resolved"
 %verify "field not yet resolved"
@@ -20,8 +20,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    $postbarrier
     GOTO_OPCODE(ip)                     @ jump to next instruction
 %break
 
diff --git a/vm/mterp/armv5te/OP_SPUT_JUMBO.S b/vm/mterp/armv5te/OP_SPUT_JUMBO.S
index 2367138..85dea34 100644
--- a/vm/mterp/armv5te/OP_SPUT_JUMBO.S
+++ b/vm/mterp/armv5te/OP_SPUT_JUMBO.S
@@ -1,4 +1,4 @@
-%default { "barrier":"@ no-op " }
+%default { "prebarrier":"@ no-op", "postbarrier":"@ no-op " }
 %verify "executed"
 %verify "field already resolved"
 %verify "field not yet resolved"
@@ -23,8 +23,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    $postbarrier
     GOTO_OPCODE(ip)                     @ jump to next instruction
 %break
 
diff --git a/vm/mterp/armv5te/OP_SPUT_OBJECT.S b/vm/mterp/armv5te/OP_SPUT_OBJECT.S
index 79ca04a..77938d0 100644
--- a/vm/mterp/armv5te/OP_SPUT_OBJECT.S
+++ b/vm/mterp/armv5te/OP_SPUT_OBJECT.S
@@ -1,4 +1,4 @@
-%default { "barrier":"@ no-op " }
+%default { "postbarrier":"@ no-op ", "prebarrier":"@ no-op " }
 %verify "executed"
 %verify "field already resolved"
 %verify "field not yet resolved"
@@ -22,13 +22,14 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     b       .L${opcode}_end
 %break
 
 
 .L${opcode}_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    $postbarrier
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/armv5te/OP_SPUT_OBJECT_JUMBO.S b/vm/mterp/armv5te/OP_SPUT_OBJECT_JUMBO.S
index 623c892..6e5ebe2 100644
--- a/vm/mterp/armv5te/OP_SPUT_OBJECT_JUMBO.S
+++ b/vm/mterp/armv5te/OP_SPUT_OBJECT_JUMBO.S
@@ -1,4 +1,4 @@
-%default { "barrier":"@ no-op " }
+%default { "postbarrier":"@ no-op ", "prebarrier":"@ no-op " }
 %verify "executed"
 %verify "field already resolved"
 %verify "field not yet resolved"
@@ -22,13 +22,14 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    $barrier                            @ releasing store
+    $prebarrier                        @ releasing store
     b       .L${opcode}_end
 %break
 
 
 .L${opcode}_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    $postbarrier
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE.S b/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE.S
index fe12b9e..a9d69bb 100644
--- a/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE.S
+++ b/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_SPUT_OBJECT.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_SPUT_OBJECT.S"  {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE_JUMBO.S b/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE_JUMBO.S
index 78fe07b..e8b2b1d 100644
--- a/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE_JUMBO.S
+++ b/vm/mterp/armv5te/OP_SPUT_OBJECT_VOLATILE_JUMBO.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_SPUT_OBJECT_JUMBO.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_SPUT_OBJECT_JUMBO.S"  {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_SPUT_VOLATILE.S b/vm/mterp/armv5te/OP_SPUT_VOLATILE.S
index cfb2b27..1b8dd25 100644
--- a/vm/mterp/armv5te/OP_SPUT_VOLATILE.S
+++ b/vm/mterp/armv5te/OP_SPUT_VOLATILE.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_SPUT.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_SPUT.S" {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_SPUT_VOLATILE_JUMBO.S b/vm/mterp/armv5te/OP_SPUT_VOLATILE_JUMBO.S
index 7049fc6..5127051 100644
--- a/vm/mterp/armv5te/OP_SPUT_VOLATILE_JUMBO.S
+++ b/vm/mterp/armv5te/OP_SPUT_VOLATILE_JUMBO.S
@@ -1,2 +1,2 @@
 %verify "executed"
-%include "armv5te/OP_SPUT_JUMBO.S" {"barrier":"SMP_DMB"}
+%include "armv5te/OP_SPUT_JUMBO.S" {"prebarrier":"SMP_DMB_ST", "postbarrier":"SMP_DMB"}
diff --git a/vm/mterp/armv5te/OP_SPUT_WIDE.S b/vm/mterp/armv5te/OP_SPUT_WIDE.S
index bbe5906..d0f65e6 100644
--- a/vm/mterp/armv5te/OP_SPUT_WIDE.S
+++ b/vm/mterp/armv5te/OP_SPUT_WIDE.S
@@ -21,7 +21,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if $volatile
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
diff --git a/vm/mterp/armv5te/OP_SPUT_WIDE_JUMBO.S b/vm/mterp/armv5te/OP_SPUT_WIDE_JUMBO.S
index 5e93b52..03ea9f9 100644
--- a/vm/mterp/armv5te/OP_SPUT_WIDE_JUMBO.S
+++ b/vm/mterp/armv5te/OP_SPUT_WIDE_JUMBO.S
@@ -23,7 +23,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if $volatile
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
diff --git a/vm/mterp/out/InterpAsm-armv5te-vfp.S b/vm/mterp/out/InterpAsm-armv5te-vfp.S
index 2a9ca5c..a0835f9 100644
--- a/vm/mterp/out/InterpAsm-armv5te-vfp.S
+++ b/vm/mterp/out/InterpAsm-armv5te-vfp.S
@@ -2888,8 +2888,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -2914,7 +2915,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -2943,7 +2944,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_end
 
 /* ------------------------------ */
@@ -2968,8 +2969,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -2995,8 +2997,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3022,8 +3025,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3049,8 +3053,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7102,8 +7107,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7237,7 +7243,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -7688,7 +7694,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_end
 
 
@@ -8484,8 +8490,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -8512,7 +8519,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -8541,7 +8548,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_JUMBO_end
 
 /* ------------------------------ */
@@ -8569,8 +8576,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8599,8 +8607,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8629,8 +8638,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8659,8 +8669,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -10724,8 +10735,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -10754,7 +10766,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -10785,7 +10797,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end
 
 
@@ -11520,8 +11532,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE */
@@ -11543,7 +11556,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     0
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -11567,8 +11580,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -11590,8 +11604,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE */
@@ -11611,8 +11626,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR */
@@ -11632,8 +11648,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT */
@@ -11653,8 +11670,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET */
@@ -11902,6 +11920,7 @@
 
 .LOP_SPUT_OBJECT_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12411,8 +12430,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_VOLATILE */
@@ -12532,7 +12552,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -12769,8 +12789,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12806,6 +12827,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13380,8 +13402,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_JUMBO */
@@ -13412,7 +13435,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     0
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -13445,8 +13468,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13477,8 +13501,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE_JUMBO */
@@ -13507,8 +13532,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR_JUMBO */
@@ -13537,8 +13563,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT_JUMBO */
@@ -13567,8 +13594,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_JUMBO */
@@ -13805,6 +13833,7 @@
 
 .LOP_SPUT_OBJECT_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14183,8 +14212,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_VOLATILE_JUMBO */
@@ -14215,7 +14245,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -14248,8 +14278,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14384,6 +14415,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/out/InterpAsm-armv5te.S b/vm/mterp/out/InterpAsm-armv5te.S
index f8ae9ac..5e4ccd4 100644
--- a/vm/mterp/out/InterpAsm-armv5te.S
+++ b/vm/mterp/out/InterpAsm-armv5te.S
@@ -2910,8 +2910,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -2936,7 +2937,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -2965,7 +2966,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_end
 
 /* ------------------------------ */
@@ -2990,8 +2991,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3017,8 +3019,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3044,8 +3047,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3071,8 +3075,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7424,8 +7429,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7559,7 +7565,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -8010,7 +8016,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_end
 
 
@@ -8806,8 +8812,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -8834,7 +8841,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -8863,7 +8870,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_JUMBO_end
 
 /* ------------------------------ */
@@ -8891,8 +8898,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8921,8 +8929,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8951,8 +8960,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8981,8 +8991,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -11046,8 +11057,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -11076,7 +11088,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -11107,7 +11119,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end
 
 
@@ -11978,8 +11990,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE */
@@ -12001,7 +12014,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     0
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -12025,8 +12038,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12048,8 +12062,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE */
@@ -12069,8 +12084,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR */
@@ -12090,8 +12106,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT */
@@ -12111,8 +12128,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET */
@@ -12360,6 +12378,7 @@
 
 .LOP_SPUT_OBJECT_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12869,8 +12888,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_VOLATILE */
@@ -12990,7 +13010,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -13227,8 +13247,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13264,6 +13285,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13838,8 +13860,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_JUMBO */
@@ -13870,7 +13893,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     0
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -13903,8 +13926,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13935,8 +13959,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE_JUMBO */
@@ -13965,8 +13990,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR_JUMBO */
@@ -13995,8 +14021,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT_JUMBO */
@@ -14025,8 +14052,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_JUMBO */
@@ -14263,6 +14291,7 @@
 
 .LOP_SPUT_OBJECT_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14641,8 +14670,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_VOLATILE_JUMBO */
@@ -14673,7 +14703,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -14706,8 +14736,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14842,6 +14873,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/out/InterpAsm-armv7-a-neon.S b/vm/mterp/out/InterpAsm-armv7-a-neon.S
index 0037563..3a01a83 100644
--- a/vm/mterp/out/InterpAsm-armv7-a-neon.S
+++ b/vm/mterp/out/InterpAsm-armv7-a-neon.S
@@ -2892,8 +2892,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -2918,7 +2919,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -2947,7 +2948,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_end
 
 /* ------------------------------ */
@@ -2972,8 +2973,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -2999,8 +3001,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3026,8 +3029,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3053,8 +3057,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7060,8 +7065,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7195,7 +7201,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -7642,7 +7648,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_end
 
 
@@ -8438,8 +8444,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -8466,7 +8473,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -8495,7 +8502,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_JUMBO_end
 
 /* ------------------------------ */
@@ -8523,8 +8530,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8553,8 +8561,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8583,8 +8592,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8613,8 +8623,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -10678,8 +10689,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -10708,7 +10720,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -10739,7 +10751,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end
 
 
@@ -11505,8 +11517,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -11528,8 +11541,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE */
@@ -11549,8 +11563,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR */
@@ -11570,8 +11585,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT */
@@ -11591,8 +11607,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET */
@@ -11840,6 +11857,7 @@
 
 .LOP_SPUT_OBJECT_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12349,8 +12367,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_VOLATILE */
@@ -12470,7 +12489,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -12707,8 +12726,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12744,6 +12764,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13318,8 +13339,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_JUMBO */
@@ -13350,7 +13372,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     0
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -13383,8 +13405,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13415,8 +13438,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE_JUMBO */
@@ -13445,8 +13469,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR_JUMBO */
@@ -13475,8 +13500,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT_JUMBO */
@@ -13505,8 +13531,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_JUMBO */
@@ -13743,6 +13770,7 @@
 
 .LOP_SPUT_OBJECT_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14121,8 +14149,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_VOLATILE_JUMBO */
@@ -14153,7 +14182,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -14186,8 +14215,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14322,6 +14352,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/mterp/out/InterpAsm-armv7-a.S b/vm/mterp/out/InterpAsm-armv7-a.S
index b65c659..e8a9501 100644
--- a/vm/mterp/out/InterpAsm-armv7-a.S
+++ b/vm/mterp/out/InterpAsm-armv7-a.S
@@ -2892,8 +2892,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -2918,7 +2919,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -2947,7 +2948,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_end
 
 /* ------------------------------ */
@@ -2972,8 +2973,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -2999,8 +3001,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3026,8 +3029,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -3053,8 +3057,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7060,8 +7065,9 @@
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[AA]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vAA
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -7195,7 +7201,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vAA/vAA+1
     .endif
@@ -7642,7 +7648,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_end
 
 
@@ -8438,8 +8444,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* ------------------------------ */
@@ -8466,7 +8473,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 0
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -8495,7 +8502,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     b       .LOP_SPUT_OBJECT_JUMBO_end
 
 /* ------------------------------ */
@@ -8523,8 +8530,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8553,8 +8561,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8583,8 +8592,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -8613,8 +8623,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -10678,8 +10689,9 @@
     FETCH_ADVANCE_INST(4)               @ advance rPC, load rINST
     GET_VREG(r1, r2)                    @ r1<- fp[BBBB]
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r1, [r0, #offStaticField_value] @ field<- vBBBB
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 
@@ -10708,7 +10720,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if 1
     add     r2, r2, #offStaticField_value @ r2<- pointer to data
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r2, #offStaticField_value] @ field<- vBBBB/vBBBB+1
     .endif
@@ -10739,7 +10751,7 @@
     ldr     r2, [rSELF, #offThread_cardTable]  @ r2<- card table base
     ldr     r9, [r0, #offField_clazz]   @ r9<- field->clazz
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     b       .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end
 
 
@@ -11505,8 +11517,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -11528,8 +11541,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE */
@@ -11549,8 +11563,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR */
@@ -11570,8 +11585,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT */
@@ -11591,8 +11607,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET */
@@ -11840,6 +11857,7 @@
 
 .LOP_SPUT_OBJECT_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12349,8 +12367,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_VOLATILE */
@@ -12470,7 +12489,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -12707,8 +12726,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(2)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -12744,6 +12764,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vAA
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13318,8 +13339,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_JUMBO */
@@ -13350,7 +13372,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     0
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -13383,8 +13405,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                         @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    @ no-op 
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -13415,8 +13438,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_BYTE_JUMBO */
@@ -13445,8 +13469,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_CHAR_JUMBO */
@@ -13475,8 +13500,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_SHORT_JUMBO */
@@ -13505,8 +13531,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    @ no-op                             @ releasing store
+    @ no-op                          @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    @ no-op 
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_SGET_JUMBO */
@@ -13743,6 +13770,7 @@
 
 .LOP_SPUT_OBJECT_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    @ no-op 
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14121,8 +14149,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                         @ releasing store
     str  r0, [r9, r3]                @ obj.field (8/16/32 bits)<- r0
+    SMP_DMB
     GOTO_OPCODE(ip)                     @ jump to next instruction
 
 /* continuation for OP_IPUT_WIDE_VOLATILE_JUMBO */
@@ -14153,7 +14182,7 @@
     GET_INST_OPCODE(r10)                @ extract opcode from rINST
     .if     1
     add     r2, r9, r3                  @ r2<- target address
-    bl      dvmQuasiAtomicSwap64        @ stores r0/r1 into addr r2
+    bl      dvmQuasiAtomicSwap64Sync    @ stores r0/r1 into addr r2
     .else
     strd    r0, [r9, r3]                @ obj.field (64 bits, aligned)<- r0/r1
     .endif
@@ -14186,8 +14215,9 @@
     beq     common_errNullObject        @ object was null
     FETCH_ADVANCE_INST(5)               @ advance rPC, load rINST
     GET_INST_OPCODE(ip)                 @ extract opcode from rINST
-    SMP_DMB                            @ releasing store
+    SMP_DMB_ST                        @ releasing store
     str     r0, [r9, r3]                @ obj.field (32 bits)<- r0
+    SMP_DMB
     cmp     r0, #0                      @ stored a null reference?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card if not
     GOTO_OPCODE(ip)                     @ jump to next instruction
@@ -14322,6 +14352,7 @@
 
 .LOP_SPUT_OBJECT_VOLATILE_JUMBO_end:
     str     r1, [r0, #offStaticField_value]  @ field<- vBBBB
+    SMP_DMB
     cmp     r1, #0                      @ stored a null object?
     strneb  r2, [r2, r9, lsr #GC_CARD_SHIFT]  @ mark card based on obj head
     GOTO_OPCODE(ip)                     @ jump to next instruction
diff --git a/vm/oo/ObjectInlines.h b/vm/oo/ObjectInlines.h
index 44ce807..eb2e962 100644
--- a/vm/oo/ObjectInlines.h
+++ b/vm/oo/ObjectInlines.h
@@ -183,9 +183,7 @@
 }
 INLINE void dvmSetFieldLongVolatile(Object* obj, int offset, s8 val) {
     s8* addr = (s8*)BYTE_OFFSET(obj, offset);
-    ANDROID_MEMBAR_STORE();
-    dvmQuasiAtomicSwap64(val, addr);
-    /* post-store barrier not required due to use of atomic op or mutex */
+    dvmQuasiAtomicSwap64Sync(val, addr);
 }
 INLINE void dvmSetFieldDoubleVolatile(Object* obj, int offset, double val) {
     union { s8 lval; double dval; } alias;
@@ -335,9 +333,7 @@
 }
 INLINE void dvmSetStaticFieldLongVolatile(StaticField* sfield, s8 val) {
     s8* addr = &sfield->value.j;
-    ANDROID_MEMBAR_STORE();
-    dvmQuasiAtomicSwap64(val, addr);
-    /* post-store barrier not required due to use of atomic op or mutex */
+    dvmQuasiAtomicSwap64Sync(val, addr);
 }
 INLINE void dvmSetStaticFieldDoubleVolatile(StaticField* sfield, double val) {
     union { s8 lval; double dval; } alias;