ptw32_InterlockedCompareExchange.c - platform/external/pthreads - Git at Google

 /*
  * ptw32_InterlockedCompareExchange.c
  *
  * Description:
  * This translation unit implements routines which are private to
  * the implementation and may be used throughout it.
  *
  * --------------------------------------------------------------------------
  *
  *      Pthreads-win32 - POSIX Threads Library for Win32
  *      Copyright(C) 1998 John E. Bossom
  *      Copyright(C) 1999,2005 Pthreads-win32 contributors
  *
  *      Contact Email: rpj@callisto.canberra.edu.au
  *
  *      The current list of contributors is contained
  *      in the file CONTRIBUTORS included with the source
  *      code distribution. The list can also be seen at the
  *      following World Wide Web location:
  *      http://sources.redhat.com/pthreads-win32/contributors.html
  *
  *      This library is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU Lesser General Public
  *      License as published by the Free Software Foundation; either
  *      version 2 of the License, or (at your option) any later version.
  *
  *      This library is distributed in the hope that it will be useful,
  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  *      Lesser General Public License for more details.
  *
  *      You should have received a copy of the GNU Lesser General Public
  *      License along with this library in the file COPYING.LIB;
  *      if not, write to the Free Software Foundation, Inc.,
  *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  */

 #include "pthread.h"
 #include "implement.h"


 /*
  * ptw32_InterlockedCompareExchange --
  *
  * Originally needed because W9x doesn't support InterlockedCompareExchange.
  * We now use this version wherever possible so we can inline it.
  */

 PTW32_INTERLOCKED_LONG WINAPI
 ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
 				  PTW32_INTERLOCKED_LONG value,
 				  PTW32_INTERLOCKED_LONG comparand)
 {

 #if defined(__WATCOMC__)
 /* Don't report that result is not assigned a value before being referenced */
 #pragma disable_message (200)
 #endif

   PTW32_INTERLOCKED_LONG result;

   /*
    * Using the LOCK prefix on uni-processor machines is significantly slower
    * and it is not necessary. The overhead of the conditional below is
    * negligible in comparison. Since an optimised DLL will inline this
    * routine, this will be faster than calling the system supplied
    * Interlocked routine, which appears to avoid the LOCK prefix on
    * uniprocessor systems. So one DLL works for all systems.
    */
   if (ptw32_smp_system)

 /* *INDENT-OFF* */

 #if defined(_M_IX86) || defined(_X86_)

 #if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
 #define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
     {
       _asm {
 	PUSH         ecx
 	PUSH         edx
 	MOV          ecx,dword ptr [location]
 	MOV          edx,dword ptr [value]
 	MOV          eax,dword ptr [comparand]
 	LOCK CMPXCHG dword ptr [ecx],edx
 	MOV          dword ptr [result], eax
 	POP          edx
 	POP          ecx
       }
     }
   else
     {
       _asm {
 	PUSH         ecx
 	PUSH         edx
 	MOV          ecx,dword ptr [location]
 	MOV          edx,dword ptr [value]
 	MOV          eax,dword ptr [comparand]
 	CMPXCHG      dword ptr [ecx],edx
 	MOV          dword ptr [result], eax
 	POP          edx
 	POP          ecx
       }
     }

 #elif defined(__GNUC__)
 #define HAVE_INLINABLE_INTERLOCKED_CMPXCHG

     {
       __asm__ __volatile__
 	(
 	 "lock\n\t"
 	 "cmpxchgl       %2,%1"      /* if (EAX == [location])  */
 	                             /*   [location] = value    */
                                      /* else                    */
                                      /*   EAX = [location]      */
 	 :"=a" (result)
 	 :"m"  (*location), "r" (value), "a" (comparand));
     }
   else
     {
       __asm__ __volatile__
 	(
 	 "cmpxchgl       %2,%1"      /* if (EAX == [location])  */
 	                             /*   [location] = value    */
                                      /* else                    */
                                      /*   EAX = [location]      */
 	 :"=a" (result)
 	 :"m"  (*location), "r" (value), "a" (comparand));
     }

 #endif

 #else

   /*
    * If execution gets to here then we're running on a currently
    * unsupported processor or compiler.
    */

   result = 0;

 #endif

 /* *INDENT-ON* */

   return result;

 #if defined(__WATCOMC__)
 #pragma enable_message (200)
 #endif

 }

 /*
  * ptw32_InterlockedExchange --
  *
  * We now use this version wherever possible so we can inline it.
  */

 LONG WINAPI
 ptw32_InterlockedExchange (LPLONG location,
 			   LONG value)
 {

 #if defined(__WATCOMC__)
 /* Don't report that result is not assigned a value before being referenced */
 #pragma disable_message (200)
 #endif

   LONG result;

   /*
    * The XCHG instruction always locks the bus with or without the
    * LOCKED prefix. This makes it significantly slower than CMPXCHG on
    * uni-processor machines. The Windows InterlockedExchange function
    * is nearly 3 times faster than the XCHG instruction, so this routine
    * is not yet very useful for speeding up pthreads.
    */
   if (ptw32_smp_system)

 /* *INDENT-OFF* */

 #if defined(_M_IX86) || defined(_X86_)

 #if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
 #define HAVE_INLINABLE_INTERLOCKED_XCHG

     {
       _asm {
 	PUSH         ecx
 	MOV          ecx,dword ptr [location]
 	MOV          eax,dword ptr [value]
 	XCHG         dword ptr [ecx],eax
 	MOV          dword ptr [result], eax
         POP          ecx
       }
     }
   else
     {
       /*
        * Faster version of XCHG for uni-processor systems because
        * it doesn't lock the bus. If an interrupt or context switch
        * occurs between the MOV and the CMPXCHG then the value in
        * 'location' may have changed, in which case we will loop
        * back to do the MOV again.
        *
        * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
        *
        * Tests show that this routine has almost identical timing
        * to Win32's InterlockedExchange(), which is much faster than
        * using the inlined 'xchg' instruction above, so it's probably
        * doing something similar to this (on UP systems).
        *
        * Can we do without the PUSH/POP instructions?
        */
       _asm {
 	PUSH         ecx
 	PUSH         edx
 	MOV          ecx,dword ptr [location]
 	MOV          edx,dword ptr [value]
 L1:	MOV          eax,dword ptr [ecx]
 	CMPXCHG      dword ptr [ecx],edx
 	JNZ          L1
 	MOV          dword ptr [result], eax
 	POP          edx
         POP          ecx
       }
     }

 #elif defined(__GNUC__)
 #define HAVE_INLINABLE_INTERLOCKED_XCHG

     {
       __asm__ __volatile__
 	(
 	 "xchgl          %2,%1"
 	 :"=r" (result)
 	 :"m"  (*location), "0" (value));
     }
   else
     {
       /*
        * Faster version of XCHG for uni-processor systems because
        * it doesn't lock the bus. If an interrupt or context switch
        * occurs between the movl and the cmpxchgl then the value in
        * 'location' may have changed, in which case we will loop
        * back to do the movl again.
        *
        * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
        *
        * Tests show that this routine has almost identical timing
        * to Win32's InterlockedExchange(), which is much faster than
        * using the an inlined 'xchg' instruction, so it's probably
        * doing something similar to this (on UP systems).
        */
       __asm__ __volatile__
 	(
 	 "0:\n\t"
 	 "movl           %1,%%eax\n\t"
 	 "cmpxchgl       %2,%1\n\t"
 	 "jnz            0b"
 	 :"=&a" (result)
 	 :"m"  (*location), "r" (value));
     }

 #endif

 #else

   /*
    * If execution gets to here then we're running on a currently
    * unsupported processor or compiler.
    */

   result = 0;

 #endif

 /* *INDENT-ON* */

   return result;

 #if defined(__WATCOMC__)
 #pragma enable_message (200)
 #endif

 }


 #if 1

 #if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)
 #undef PTW32_INTERLOCKED_COMPARE_EXCHANGE
 #define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange
 #endif

 #if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)
 #undef PTW32_INTERLOCKED_EXCHANGE
 #define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange
 #endif

 #endif
	/*
	* ptw32_InterlockedCompareExchange.c
	*
	* Description:
	* This translation unit implements routines which are private to
	* the implementation and may be used throughout it.
	*
	* --------------------------------------------------------------------------
	*
	* Pthreads-win32 - POSIX Threads Library for Win32
	* Copyright(C) 1998 John E. Bossom
	* Copyright(C) 1999,2005 Pthreads-win32 contributors
	*
	* Contact Email: rpj@callisto.canberra.edu.au
	*
	* The current list of contributors is contained
	* in the file CONTRIBUTORS included with the source
	* code distribution. The list can also be seen at the
	* following World Wide Web location:
	* http://sources.redhat.com/pthreads-win32/contributors.html
	*
	* This library is free software; you can redistribute it and/or
	* modify it under the terms of the GNU Lesser General Public
	* License as published by the Free Software Foundation; either
	* version 2 of the License, or (at your option) any later version.
	*
	* This library is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* Lesser General Public License for more details.
	*
	* You should have received a copy of the GNU Lesser General Public
	* License along with this library in the file COPYING.LIB;
	* if not, write to the Free Software Foundation, Inc.,
	* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
	*/

	#include "pthread.h"
	#include "implement.h"


	/*
	* ptw32_InterlockedCompareExchange --
	*
	* Originally needed because W9x doesn't support InterlockedCompareExchange.
	* We now use this version wherever possible so we can inline it.
	*/

	PTW32_INTERLOCKED_LONG WINAPI
	ptw32_InterlockedCompareExchange (PTW32_INTERLOCKED_LPLONG location,
	PTW32_INTERLOCKED_LONG value,
	PTW32_INTERLOCKED_LONG comparand)
	{

	#if defined(__WATCOMC__)
	/* Don't report that result is not assigned a value before being referenced */
	#pragma disable_message (200)
	#endif

	PTW32_INTERLOCKED_LONG result;

	/*
	* Using the LOCK prefix on uni-processor machines is significantly slower
	* and it is not necessary. The overhead of the conditional below is
	* negligible in comparison. Since an optimised DLL will inline this
	* routine, this will be faster than calling the system supplied
	* Interlocked routine, which appears to avoid the LOCK prefix on
	* uniprocessor systems. So one DLL works for all systems.
	*/
	if (ptw32_smp_system)

	/* INDENT-OFF */

	#if defined(_M_IX86) \|\| defined(_X86_)

	#if defined(_MSC_VER) \|\| defined(__WATCOMC__) \|\| (defined(__BORLANDC__) && defined(HAVE_TASM32))
	#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
	{
	_asm {
	PUSH ecx
	PUSH edx
	MOV ecx,dword ptr [location]
	MOV edx,dword ptr [value]
	MOV eax,dword ptr [comparand]
	LOCK CMPXCHG dword ptr [ecx],edx
	MOV dword ptr [result], eax
	POP edx
	POP ecx
	}
	}
	else
	{
	_asm {
	PUSH ecx
	PUSH edx
	MOV ecx,dword ptr [location]
	MOV edx,dword ptr [value]
	MOV eax,dword ptr [comparand]
	CMPXCHG dword ptr [ecx],edx
	MOV dword ptr [result], eax
	POP edx
	POP ecx
	}
	}

	#elif defined(__GNUC__)
	#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG

	{
	__asm__ __volatile__
	(
	"lock\n\t"
	"cmpxchgl %2,%1" /* if (EAX == [location]) */
	/* [location] = value */
	/* else */
	/* EAX = [location] */
	:"=a" (result)
	:"m" (*location), "r" (value), "a" (comparand));
	}
	else
	{
	__asm__ __volatile__
	(
	"cmpxchgl %2,%1" /* if (EAX == [location]) */
	/* [location] = value */
	/* else */
	/* EAX = [location] */
	:"=a" (result)
	:"m" (*location), "r" (value), "a" (comparand));
	}

	#endif

	#else

	/*
	* If execution gets to here then we're running on a currently
	* unsupported processor or compiler.
	*/

	result = 0;

	#endif

	/* INDENT-ON */

	return result;

	#if defined(__WATCOMC__)
	#pragma enable_message (200)
	#endif

	}

	/*
	* ptw32_InterlockedExchange --
	*
	* We now use this version wherever possible so we can inline it.
	*/

	LONG WINAPI
	ptw32_InterlockedExchange (LPLONG location,
	LONG value)
	{

	#if defined(__WATCOMC__)
	/* Don't report that result is not assigned a value before being referenced */
	#pragma disable_message (200)
	#endif

	LONG result;

	/*
	* The XCHG instruction always locks the bus with or without the
	* LOCKED prefix. This makes it significantly slower than CMPXCHG on
	* uni-processor machines. The Windows InterlockedExchange function
	* is nearly 3 times faster than the XCHG instruction, so this routine
	* is not yet very useful for speeding up pthreads.
	*/
	if (ptw32_smp_system)

	/* INDENT-OFF */

	#if defined(_M_IX86) \|\| defined(_X86_)

	#if defined(_MSC_VER) \|\| defined(__WATCOMC__) \|\| (defined(__BORLANDC__) && defined(HAVE_TASM32))
	#define HAVE_INLINABLE_INTERLOCKED_XCHG

	{
	_asm {
	PUSH ecx
	MOV ecx,dword ptr [location]
	MOV eax,dword ptr [value]
	XCHG dword ptr [ecx],eax
	MOV dword ptr [result], eax
	POP ecx
	}
	}
	else
	{
	/*
	* Faster version of XCHG for uni-processor systems because
	* it doesn't lock the bus. If an interrupt or context switch
	* occurs between the MOV and the CMPXCHG then the value in
	* 'location' may have changed, in which case we will loop
	* back to do the MOV again.
	*
	* FIXME! Need memory barriers for the MOV+CMPXCHG combo?
	*
	* Tests show that this routine has almost identical timing
	* to Win32's InterlockedExchange(), which is much faster than
	* using the inlined 'xchg' instruction above, so it's probably
	* doing something similar to this (on UP systems).
	*
	* Can we do without the PUSH/POP instructions?
	*/
	_asm {
	PUSH ecx
	PUSH edx
	MOV ecx,dword ptr [location]
	MOV edx,dword ptr [value]
	L1: MOV eax,dword ptr [ecx]
	CMPXCHG dword ptr [ecx],edx
	JNZ L1
	MOV dword ptr [result], eax
	POP edx
	POP ecx
	}
	}

	#elif defined(__GNUC__)
	#define HAVE_INLINABLE_INTERLOCKED_XCHG

	{
	__asm__ __volatile__
	(
	"xchgl %2,%1"
	:"=r" (result)
	:"m" (*location), "0" (value));
	}
	else
	{
	/*
	* Faster version of XCHG for uni-processor systems because
	* it doesn't lock the bus. If an interrupt or context switch
	* occurs between the movl and the cmpxchgl then the value in
	* 'location' may have changed, in which case we will loop
	* back to do the movl again.
	*
	* FIXME! Need memory barriers for the MOV+CMPXCHG combo?
	*
	* Tests show that this routine has almost identical timing
	* to Win32's InterlockedExchange(), which is much faster than
	* using the an inlined 'xchg' instruction, so it's probably
	* doing something similar to this (on UP systems).
	*/
	__asm__ __volatile__
	(
	"0:\n\t"
	"movl %1,%%eax\n\t"
	"cmpxchgl %2,%1\n\t"
	"jnz 0b"
	:"=&a" (result)
	:"m" (*location), "r" (value));
	}

	#endif

	#else

	/*
	* If execution gets to here then we're running on a currently
	* unsupported processor or compiler.
	*/

	result = 0;

	#endif

	/* INDENT-ON */

	return result;

	#if defined(__WATCOMC__)
	#pragma enable_message (200)
	#endif

	}


	#if 1

	#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)
	#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE
	#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange
	#endif

	#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)
	#undef PTW32_INTERLOCKED_EXCHANGE
	#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange
	#endif

	#endif