AArch64: Use LDXR/STXR instead of LDAXR/STLXR for bionic_atomic_cmpxchg()

The bionic_atomic_cmpxchg() API states that the cmpxchg() will be done without
explicit memory barriers. LDAXR/STLXR semantics involve half barriers for
load/store.

This patch optimises cmpxchg() by using LDXR/STXR and avoiding unnecessary half
bariers. It also fixes the clobber list for all the bionic_atomic_*() functions.

Change-Id: Iae9468965785cfeeec791d52f1e8cbc524adb682
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
diff --git a/libc/private/bionic_atomic_aarch64.h b/libc/private/bionic_atomic_aarch64.h
index c5a9e2e..6ed1700 100644
--- a/libc/private/bionic_atomic_aarch64.h
+++ b/libc/private/bionic_atomic_aarch64.h
@@ -29,15 +29,15 @@
   int32_t tmp, oldval;
   __asm__ __volatile__ (
       "// atomic_cmpxchg\n"
-      "1:  ldaxr %w1, [%3]\n"
+      "1:  ldxr %w1, [%3]\n"
       "    cmp %w1, %w4\n"
       "    b.ne 2f\n"
-      "    stlxr %w0, %w5, [%3]\n"
+      "    stxr %w0, %w5, [%3]\n"
       "    cbnz  %w0, 1b\n"
       "2:"
       : "=&r" (tmp), "=&r" (oldval), "+o"(*ptr)
       : "r" (ptr), "Ir" (old_value), "r" (new_value)
-      : "cc");
+      : "cc", "memory");
   return oldval != old_value;
 }
 
@@ -51,7 +51,7 @@
       "    cbnz %w1, 1b\n"
       : "=&r" (prev), "=&r" (status), "+o" (*ptr)
       : "r" (ptr), "r" (new_value)
-      : "cc");
+      : "cc", "memory");
   return prev;
 }
 
@@ -65,7 +65,7 @@
       "    cbnz %w2, 1b"
       : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr)
       : "r" (ptr)
-      : "cc");
+      : "cc", "memory");
   return prev;
 }