fix arm thread-pointer/atomic asm when compiling to thumb code

armv7/thumb2 provides a way to do atomics in thumb mode, but for armv6
we need a call to arm mode.

this commit is based on a patch by Stephen Thomas which fixed the
armv7 cases but not the armv6 ones.

all of this should be revisited if/when runtime selection of thread
pointer access and atomics are added.
diff --git a/arch/arm/atomic.h b/arch/arm/atomic.h
index d8f6484..d4ba73f 100644
--- a/arch/arm/atomic.h
+++ b/arch/arm/atomic.h
@@ -22,9 +22,8 @@
 	return a_ctz_l(y);
 }
 
-#if __ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__ \
- || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ \
- || __ARM_ARCH >= 7
+#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
+ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 
 #if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ ||  __ARM_ARCH >= 7
 #define MEM_BARRIER "dmb ish"
@@ -39,6 +38,9 @@
 		"	" MEM_BARRIER "\n"
 		"1:	ldrex %0,%3\n"
 		"	subs %0,%0,%1\n"
+#ifdef __thumb__
+		"	itt eq\n"
+#endif
 		"	strexeq %0,%2,%3\n"
 		"	teqeq %0,#1\n"
 		"	beq 1b\n"
diff --git a/arch/arm/pthread_arch.h b/arch/arm/pthread_arch.h
index ec77a83..6d9dc3a 100644
--- a/arch/arm/pthread_arch.h
+++ b/arch/arm/pthread_arch.h
@@ -1,6 +1,5 @@
-#if __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__ \
- || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ \
- || __ARM_ARCH >= 7
+#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
+ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
 
 static inline __attribute__((const)) pthread_t __pthread_self()
 {