include/tst_atomic.h - platform/external/ltp - Git at Google

 /*
  * Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation, either version 2 of the License, or
  * (at your option) any later version.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 /* The LTP library has some of its own atomic synchronisation primitives
  * contained in this file. Generally speaking these should not be used
  * directly in tests for synchronisation, instead use tst_checkpoint.h,
  * tst_fuzzy_sync.h or the POSIX library.
  *
  * Notes on compile and runtime memory barriers and atomics.
  *
  * Within the LTP library we have three concerns when accessing variables
  * shared by multiple threads or processes:
  *
  * (1) Removal or reordering of accesses by the compiler.
  * (2) Atomicity of addition.
  * (3) LOAD-STORE ordering between threads.
  *
  * The first (1) is the most likely to cause an error if not properly
  * handled. We avoid it by using volatile variables and statements which will
  * not be removed or reordered by the compiler during optimisation. This includes
  * the __atomic and __sync intrinsics and volatile asm statements marked with
  * "memory" as well as variables marked with volatile.
  *
  * On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
  * 32-bit integer will be atomic. However fetching and adding to a variable is
  * quite likely not; so for (2) we need to ensure we use atomic addition.
  *
  * Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
  * STOREs of any shared variables (including non-atomics) that are made
  * between calls to tst_fzsync_wait are completed (globally visible) before
  * tst_fzsync_wait completes. For this, runtime memory and instruction
  * barriers are required in addition to compile time.
  *
  * We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
  * simplicity. LTP tests tend to be syscall heavy so any performance gain from
  * using a weaker memory model is unlikely to result in a relatively large
  * performance improvement while at the same time being a potent source of
  * confusion.
  *
  * Likewise, for the fallback ASM, the simplest "definitely will work, always"
  * approach is preferred over anything more performant.
  *
  * Also see Documentation/memory-barriers.txt in the kernel tree and
  * https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
  * terminology may vary between sources.
  */

 #ifndef TST_ATOMIC_H__
 #define TST_ATOMIC_H__

 #include "config.h"

 #if HAVE_ATOMIC_MEMORY_MODEL == 1
 static inline int tst_atomic_add_return(int i, int *v)
 {
 	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
 }

 static inline int tst_atomic_load(int *v)
 {
 	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
 }

 static inline void tst_atomic_store(int i, int *v)
 {
 	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
 }

 #elif HAVE_SYNC_ADD_AND_FETCH == 1
 static inline int tst_atomic_add_return(int i, int *v)
 {
 	return __sync_add_and_fetch(v, i);
 }

 static inline int tst_atomic_load(int *v)
 {
 	int ret;

 	__sync_synchronize();
 	ret = *v;
 	__sync_synchronize();
 	return ret;
 }

 static inline void tst_atomic_store(int i, int *v)
 {
 	__sync_synchronize();
 	*v = i;
 	__sync_synchronize();
 }

 #elif defined(__i386__) || defined(__x86_64__)
 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1

 static inline int tst_atomic_add_return(int i, int *v)
 {
 	int __ret = i;

 	/*
 	 * taken from arch/x86/include/asm/cmpxchg.h
 	 */
 	asm volatile ("lock; xaddl %0, %1\n"
 		: "+r" (__ret), "+m" (*v) : : "memory", "cc");

 	return i + __ret;
 }

 #elif defined(__powerpc__) || defined(__powerpc64__)
 static inline int tst_atomic_add_return(int i, int *v)
 {
 	int t;

 	/* taken from arch/powerpc/include/asm/atomic.h */
 	asm volatile(
 		"	sync\n"
 		"1:	lwarx	%0,0,%2		# atomic_add_return\n"
 		"	add %0,%1,%0\n"
 		"	stwcx.	%0,0,%2 \n"
 		"	bne-	1b\n"
 		"	sync\n"
 		: "=&r" (t)
 		: "r" (i), "r" (v)
 		: "cc", "memory");

 	return t;
 }

 static inline int tst_atomic_load(int *v)
 {
 	int ret;

 	asm volatile("sync\n" : : : "memory");
 	ret = *v;
 	asm volatile("sync\n" : : : "memory");

 	return ret;
 }

 static inline void tst_atomic_store(int i, int *v)
 {
 	asm volatile("sync\n" : : : "memory");
 	*v = i;
 	asm volatile("sync\n" : : : "memory");
 }

 #elif defined(__s390__) || defined(__s390x__)
 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1

 static inline int tst_atomic_add_return(int i, int *v)
 {
 	int old_val, new_val;

 	/* taken from arch/s390/include/asm/atomic.h */
 	asm volatile(
 		"	l	%0,%2\n"
 		"0:	lr	%1,%0\n"
 		"	ar	%1,%3\n"
 		"	cs	%0,%1,%2\n"
 		"	jl	0b"
 		: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
 		: "d" (i)
 		: "cc", "memory");

 	return old_val + i;
 }

 #elif defined(__arc__)

 /*ARCv2 defines the smp barriers */
 #ifdef __ARC700__
 #define smp_mb()	asm volatile("" : : : "memory")
 #else
 #define smp_mb()	asm volatile("dmb 3\n" : : : "memory")
 #endif

 static inline int tst_atomic_add_return(int i, int *v)
 {
 	unsigned int val;

 	smp_mb();

 	asm volatile(
 		"1:	llock   %[val], [%[ctr]]	\n"
 		"	add     %[val], %[val], %[i]	\n"
 		"	scond   %[val], [%[ctr]]	\n"
 		"	bnz     1b			\n"
 		: [val]	"=&r"	(val)
 		: [ctr]	"r"	(v),
 		  [i]	"ir"	(i)
 		: "cc", "memory");

 	smp_mb();

 	return val;
 }

 static inline int tst_atomic_load(int *v)
 {
 	int ret;

 	smp_mb();
 	ret = *v;
 	smp_mb();

 	return ret;
 }

 static inline void tst_atomic_store(int i, int *v)
 {
 	smp_mb();
 	*v = i;
 	smp_mb();
 }

 #elif defined (__aarch64__)
 static inline int tst_atomic_add_return(int i, int *v)
 {
 	unsigned long tmp;
 	int result;

 	__asm__ __volatile__(
 "       prfm    pstl1strm, %2	\n"
 "1:     ldaxr	%w0, %2		\n"
 "       add	%w0, %w0, %w3	\n"
 "       stlxr	%w1, %w0, %2	\n"
 "       cbnz	%w1, 1b		\n"
 "       dmb ish			\n"
 	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
 	: "Ir" (i)
 	: "memory");

 	return result;
 }

 /* We are using load and store exclusive (ldaxr & stlxr) instructions to try
  * and help prevent the tst_atomic_load and, more likely, tst_atomic_store
  * functions from interfering with tst_atomic_add_return which takes advantage
  * of exclusivity. It is not clear if this is a good idea or not, but does
  * mean that all three functions are very similar.
  */
 static inline int tst_atomic_load(int *v)
 {
 	int ret;
 	unsigned long tmp;

 	asm volatile("//atomic_load			\n"
 		"	prfm	pstl1strm,  %[v]	\n"
 		"1:	ldaxr	%w[ret], %[v]		\n"
 		"	stlxr   %w[tmp], %w[ret], %[v]  \n"
 		"	cbnz    %w[tmp], 1b		\n"
 		"	dmb ish				\n"
 		: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
 		: : "memory");

 	return ret;
 }

 static inline void tst_atomic_store(int i, int *v)
 {
 	unsigned long tmp;

 	asm volatile("//atomic_store			\n"
 		"	prfm	pstl1strm, %[v]		\n"
 		"1:	ldaxr	%w[tmp], %[v]		\n"
 		"	stlxr   %w[tmp], %w[i], %[v]	\n"
 		"	cbnz    %w[tmp], 1b		\n"
 		"	dmb ish				\n"
 		: [tmp] "=&r" (tmp), [v] "+Q" (*v)
 		: [i] "r" (i)
 		: "memory");
 }

 #elif defined(__sparc__) && defined(__arch64__)
 # define LTP_USE_GENERIC_LOAD_STORE_ASM 1
 static inline int tst_atomic_add_return(int i, int *v)
 {
 	int ret, tmp;

 	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
 	 * function removed because we are unlikely to have a large (>= 16?)
 	 * number of cores continuously trying to update one variable.
 	 */
 	asm volatile("/*atomic_add_return*/		\n"
 		"1:	ldsw	[%[v]], %[ret];		\n"
 		"	add	%[ret], %[i], %[tmp];	\n"
 		"	cas	[%[v]], %[ret], %[tmp];	\n"
 		"	cmp	%[ret], %[tmp];		\n"
 		"	bne,pn	%%icc, 1b;		\n"
 		"	nop;				\n"
 		"	add	%[ret], %[i], %[ret];	\n"
 		: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
 		: [i] "r" (i), [v] "r" (v)
 		: "memory", "cc");

 	return ret;
 }

 #else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
 # error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
         and an LTP implementation is missing for your architecture.
 #endif

 #ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
 static inline int tst_atomic_load(int *v)
 {
 	int ret;

 	asm volatile("" : : : "memory");
 	ret = *v;
 	asm volatile("" : : : "memory");

 	return ret;
 }

 static inline void tst_atomic_store(int i, int *v)
 {
 	asm volatile("" : : : "memory");
 	*v = i;
 	asm volatile("" : : : "memory");
 }
 #endif

 static inline int tst_atomic_inc(int *v)
 {
 	return tst_atomic_add_return(1, v);
 }

 static inline int tst_atomic_dec(int *v)
 {
 	return tst_atomic_add_return(-1, v);
 }

 #endif	/* TST_ATOMIC_H__ */
	/*
	* Copyright (c) 2016 Cyril Hrubis <chrubis@suse.cz>
	*
	* This program is free software: you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation, either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	*/
	/* The LTP library has some of its own atomic synchronisation primitives
	* contained in this file. Generally speaking these should not be used
	* directly in tests for synchronisation, instead use tst_checkpoint.h,
	* tst_fuzzy_sync.h or the POSIX library.
	*
	* Notes on compile and runtime memory barriers and atomics.
	*
	* Within the LTP library we have three concerns when accessing variables
	* shared by multiple threads or processes:
	*
	* (1) Removal or reordering of accesses by the compiler.
	* (2) Atomicity of addition.
	* (3) LOAD-STORE ordering between threads.
	*
	* The first (1) is the most likely to cause an error if not properly
	* handled. We avoid it by using volatile variables and statements which will
	* not be removed or reordered by the compiler during optimisation. This includes
	* the __atomic and __sync intrinsics and volatile asm statements marked with
	* "memory" as well as variables marked with volatile.
	*
	* On any platform Linux is likely to run on, a LOAD (fetch) or STORE of a
	* 32-bit integer will be atomic. However fetching and adding to a variable is
	* quite likely not; so for (2) we need to ensure we use atomic addition.
	*
	* Finally, for tst_fuzzy_sync at least, we need to ensure that LOADs and
	* STOREs of any shared variables (including non-atomics) that are made
	* between calls to tst_fzsync_wait are completed (globally visible) before
	* tst_fzsync_wait completes. For this, runtime memory and instruction
	* barriers are required in addition to compile time.
	*
	* We use full sequential ordering (__ATOMIC_SEQ_CST) for the sake of
	* simplicity. LTP tests tend to be syscall heavy so any performance gain from
	* using a weaker memory model is unlikely to result in a relatively large
	* performance improvement while at the same time being a potent source of
	* confusion.
	*
	* Likewise, for the fallback ASM, the simplest "definitely will work, always"
	* approach is preferred over anything more performant.
	*
	* Also see Documentation/memory-barriers.txt in the kernel tree and
	* https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
	* terminology may vary between sources.
	*/

	#ifndef TST_ATOMIC_H__
	#define TST_ATOMIC_H__

	#include "config.h"

	#if HAVE_ATOMIC_MEMORY_MODEL == 1
	static inline int tst_atomic_add_return(int i, int *v)
	{
	return __atomic_add_fetch(v, i, __ATOMIC_SEQ_CST);
	}

	static inline int tst_atomic_load(int *v)
	{
	return __atomic_load_n(v, __ATOMIC_SEQ_CST);
	}

	static inline void tst_atomic_store(int i, int *v)
	{
	__atomic_store_n(v, i, __ATOMIC_SEQ_CST);
	}

	#elif HAVE_SYNC_ADD_AND_FETCH == 1
	static inline int tst_atomic_add_return(int i, int *v)
	{
	return __sync_add_and_fetch(v, i);
	}

	static inline int tst_atomic_load(int *v)
	{
	int ret;

	__sync_synchronize();
	ret = *v;
	__sync_synchronize();
	return ret;
	}

	static inline void tst_atomic_store(int i, int *v)
	{
	__sync_synchronize();
	*v = i;
	__sync_synchronize();
	}

	#elif defined(__i386__) \|\| defined(__x86_64__)
	# define LTP_USE_GENERIC_LOAD_STORE_ASM 1

	static inline int tst_atomic_add_return(int i, int *v)
	{
	int __ret = i;

	/*
	* taken from arch/x86/include/asm/cmpxchg.h
	*/
	asm volatile ("lock; xaddl %0, %1\n"
	: "+r" (__ret), "+m" (*v) : : "memory", "cc");

	return i + __ret;
	}

	#elif defined(__powerpc__) \|\| defined(__powerpc64__)
	static inline int tst_atomic_add_return(int i, int *v)
	{
	int t;

	/* taken from arch/powerpc/include/asm/atomic.h */
	asm volatile(
	" sync\n"
	"1: lwarx %0,0,%2 # atomic_add_return\n"
	" add %0,%1,%0\n"
	" stwcx. %0,0,%2 \n"
	" bne- 1b\n"
	" sync\n"
	: "=&r" (t)
	: "r" (i), "r" (v)
	: "cc", "memory");

	return t;
	}

	static inline int tst_atomic_load(int *v)
	{
	int ret;

	asm volatile("sync\n" : : : "memory");
	ret = *v;
	asm volatile("sync\n" : : : "memory");

	return ret;
	}

	static inline void tst_atomic_store(int i, int *v)
	{
	asm volatile("sync\n" : : : "memory");
	*v = i;
	asm volatile("sync\n" : : : "memory");
	}

	#elif defined(__s390__) \|\| defined(__s390x__)
	# define LTP_USE_GENERIC_LOAD_STORE_ASM 1

	static inline int tst_atomic_add_return(int i, int *v)
	{
	int old_val, new_val;

	/* taken from arch/s390/include/asm/atomic.h */
	asm volatile(
	" l %0,%2\n"
	"0: lr %1,%0\n"
	" ar %1,%3\n"
	" cs %0,%1,%2\n"
	" jl 0b"
	: "=&d" (old_val), "=&d" (new_val), "+Q" (*v)
	: "d" (i)
	: "cc", "memory");

	return old_val + i;
	}

	#elif defined(__arc__)

	/ARCv2 defines the smp barriers /
	#ifdef __ARC700__
	#define smp_mb() asm volatile("" : : : "memory")
	#else
	#define smp_mb() asm volatile("dmb 3\n" : : : "memory")
	#endif

	static inline int tst_atomic_add_return(int i, int *v)
	{
	unsigned int val;

	smp_mb();

	asm volatile(
	"1: llock %[val], [%[ctr]] \n"
	" add %[val], %[val], %[i] \n"
	" scond %[val], [%[ctr]] \n"
	" bnz 1b \n"
	: [val] "=&r" (val)
	: [ctr] "r" (v),
	[i] "ir" (i)
	: "cc", "memory");

	smp_mb();

	return val;
	}

	static inline int tst_atomic_load(int *v)
	{
	int ret;

	smp_mb();
	ret = *v;
	smp_mb();

	return ret;
	}

	static inline void tst_atomic_store(int i, int *v)
	{
	smp_mb();
	*v = i;
	smp_mb();
	}

	#elif defined (__aarch64__)
	static inline int tst_atomic_add_return(int i, int *v)
	{
	unsigned long tmp;
	int result;

	__asm__ __volatile__(
	" prfm pstl1strm, %2 \n"
	"1: ldaxr %w0, %2 \n"
	" add %w0, %w0, %w3 \n"
	" stlxr %w1, %w0, %2 \n"
	" cbnz %w1, 1b \n"
	" dmb ish \n"
	: "=&r" (result), "=&r" (tmp), "+Q" (*v)
	: "Ir" (i)
	: "memory");

	return result;
	}

	/* We are using load and store exclusive (ldaxr & stlxr) instructions to try
	* and help prevent the tst_atomic_load and, more likely, tst_atomic_store
	* functions from interfering with tst_atomic_add_return which takes advantage
	* of exclusivity. It is not clear if this is a good idea or not, but does
	* mean that all three functions are very similar.
	*/
	static inline int tst_atomic_load(int *v)
	{
	int ret;
	unsigned long tmp;

	asm volatile("//atomic_load \n"
	" prfm pstl1strm, %[v] \n"
	"1: ldaxr %w[ret], %[v] \n"
	" stlxr %w[tmp], %w[ret], %[v] \n"
	" cbnz %w[tmp], 1b \n"
	" dmb ish \n"
	: [tmp] "=&r" (tmp), [ret] "=&r" (ret), [v] "+Q" (*v)
	: : "memory");

	return ret;
	}

	static inline void tst_atomic_store(int i, int *v)
	{
	unsigned long tmp;

	asm volatile("//atomic_store \n"
	" prfm pstl1strm, %[v] \n"
	"1: ldaxr %w[tmp], %[v] \n"
	" stlxr %w[tmp], %w[i], %[v] \n"
	" cbnz %w[tmp], 1b \n"
	" dmb ish \n"
	: [tmp] "=&r" (tmp), [v] "+Q" (*v)
	: [i] "r" (i)
	: "memory");
	}

	#elif defined(__sparc__) && defined(__arch64__)
	# define LTP_USE_GENERIC_LOAD_STORE_ASM 1
	static inline int tst_atomic_add_return(int i, int *v)
	{
	int ret, tmp;

	/* Based on arch/sparc/lib/atomic_64.S with the exponential backoff
	* function removed because we are unlikely to have a large (>= 16?)
	* number of cores continuously trying to update one variable.
	*/
	asm volatile("/atomic_add_return/ \n"
	"1: ldsw [%[v]], %[ret]; \n"
	" add %[ret], %[i], %[tmp]; \n"
	" cas [%[v]], %[ret], %[tmp]; \n"
	" cmp %[ret], %[tmp]; \n"
	" bne,pn %%icc, 1b; \n"
	" nop; \n"
	" add %[ret], %[i], %[ret]; \n"
	: [ret] "=r&" (ret), [tmp] "=r&" (tmp)
	: [i] "r" (i), [v] "r" (v)
	: "memory", "cc");

	return ret;
	}

	#else /* HAVE_SYNC_ADD_AND_FETCH == 1 */
	# error Your compiler does not provide __atomic_add_fetch, __sync_add_and_fetch \
	and an LTP implementation is missing for your architecture.
	#endif

	#ifdef LTP_USE_GENERIC_LOAD_STORE_ASM
	static inline int tst_atomic_load(int *v)
	{
	int ret;

	asm volatile("" : : : "memory");
	ret = *v;
	asm volatile("" : : : "memory");

	return ret;
	}

	static inline void tst_atomic_store(int i, int *v)
	{
	asm volatile("" : : : "memory");
	*v = i;
	asm volatile("" : : : "memory");
	}
	#endif

	static inline int tst_atomic_inc(int *v)
	{
	return tst_atomic_add_return(1, v);
	}

	static inline int tst_atomic_dec(int *v)
	{
	return tst_atomic_add_return(-1, v);
	}

	#endif /* TST_ATOMIC_H__ */