test_common/harness/msvc9.c - platform/external/OpenCL-CTS - Git at Google

 //
 // Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "compat.h"

 #if defined ( _MSC_VER )

 #include <limits.h>
 #include <stdlib.h>

 #include <CL/cl.h>

 #include <windows.h>

 #if ! defined( __INTEL_COMPILER )

 ///////////////////////////////////////////////////////////////////
 //
 //                   rint, rintf
 //
 ///////////////////////////////////////////////////////////////////

 float copysignf( float x, float y )
 {
     union{ cl_uint u; float f; }ux, uy;

     ux.f = x;
     uy.f = y;

     ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);

     return ux.f;
 }

 double copysign( double x, double y )
 {
     union{ cl_ulong u; double f; }ux, uy;

     ux.f = x;
     uy.f = y;

     ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);

     return ux.f;
 }

 long double copysignl( long double x, long double y )
 {
     union
     {
         long double f;
         struct{ cl_ulong m; cl_ushort sexp; }u;
     }ux, uy;

     ux.f = x;
     uy.f = y;

     ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);

     return ux.f;
 }

 float rintf(float x)
 {
     float absx = fabsf(x);

     if( absx < 8388608.0f /* 0x1.0p23f */ )
     {
         float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
         float rounded = x + magic;
         rounded -= magic;
         x = copysignf( rounded, x );
     }

     return x;
 }

 double rint(double x)
 {
     double absx = fabs(x);

     if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
     {
         double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
         double rounded = x + magic;
         rounded -= magic;
         x = copysign( rounded, x );
     }

     return x;
 }

 long double rintl(long double x)
 {
     double absx = fabs(x);

     if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
     {
         long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
         long double rounded = x + magic;
         rounded -= magic;
         x = copysignl( rounded, x );
     }

     return x;
 }

 #if _MSC_VER < 1800

 ///////////////////////////////////////////////////////////////////
 //
 //                   ilogb, ilogbf, ilogbl
 //
 ///////////////////////////////////////////////////////////////////
 #ifndef FP_ILOGB0
     #define FP_ILOGB0   INT_MIN
 #endif

 #ifndef FP_ILOGBNAN
     #define FP_ILOGBNAN INT_MIN
 #endif

 int ilogb (double x)
 {
     union{ double f; cl_ulong u;} u;
     u.f = x;

     cl_ulong absx = u.u & CL_LONG_MAX;
     if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
     {
         switch( absx )
         {
             case 0:
                 return FP_ILOGB0;
             case 0x7ff0000000000000ULL:
                 return INT_MAX;
             default:
                 if( absx > 0x7ff0000000000000ULL )
                     return FP_ILOGBNAN;

                 // subnormal
                 u.u = absx | 0x3ff0000000000000ULL;
                 u.f -= 1.0;
                 return (u.u >> 52) - (1023 + 1022);
         }
     }

     return (absx >> 52) - 1023;
 }


 int ilogbf (float x)
 {
     union{ float f; cl_uint u;} u;
     u.f = x;

     cl_uint absx = u.u & 0x7fffffff;
     if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
     {
         switch( absx )
         {
             case 0:
                 return FP_ILOGB0;
             case 0x7f800000U:
                 return INT_MAX;
             default:
                 if( absx > 0x7f800000 )
                     return FP_ILOGBNAN;

                 // subnormal
                 u.u = absx | 0x3f800000U;
                 u.f -= 1.0f;
                 return (u.u >> 23) - (127 + 126);
         }
     }

     return (absx >> 23) - 127;
 }

 int ilogbl (long double x)
 {
     union
     {
         long double f;
         struct{ cl_ulong m; cl_ushort sexp; }u;
     } u;
     u.f = x;

     int exp = u.u.sexp & 0x7fff;
     if( 0 == exp )
     {
         if( 0 == u.u.m )
             return FP_ILOGB0;

         //subnormal
         u.u.sexp = 0x3fff;
         u.f -= 1.0f;
         exp = u.u.sexp & 0x7fff;

         return exp - (0x3fff + 0x3ffe);
     }
     else if( 0x7fff == exp )
     {
         if( u.u.m & CL_LONG_MAX )
             return FP_ILOGBNAN;

         return INT_MAX;
     }

     return exp - 0x3fff;
 }

 #endif // _MSC_VER < 1800

 ///////////////////////////////////////////////////////////////////
 //
 //                 fmax, fmin, fmaxf, fminf
 //
 ///////////////////////////////////////////////////////////////////

 static void GET_BITS_SP32(float fx, unsigned int* ux)
 {
     volatile union {float f; unsigned int u;} _bitsy;
     _bitsy.f = (fx);
     *ux = _bitsy.u;
 }
 /* static void GET_BITS_SP32(float fx, unsigned int* ux) */
 /* { */
 /*     volatile union {float f; unsigned int i;} _bitsy; */
 /*     _bitsy.f = (fx); */
 /*     *ux = _bitsy.i; */
 /* } */
 static void PUT_BITS_SP32(unsigned int ux, float* fx)
 {
     volatile union {float f; unsigned int u;} _bitsy;
     _bitsy.u = (ux);
     *fx = _bitsy.f;
 }
 /* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
 /* { */
 /*     volatile union {float f; unsigned int i;} _bitsy; */
 /*     _bitsy.i = (ux); */
 /*     *fx = _bitsy.f; */
 /* } */
 static void GET_BITS_DP64(double dx, unsigned __int64* lx)
 {
     volatile union {double d; unsigned __int64 l;} _bitsy;
     _bitsy.d = (dx);
     *lx = _bitsy.l;
 }
 static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
 {
     volatile union {double d; unsigned __int64 l;} _bitsy;
     _bitsy.l = (lx);
     *dx = _bitsy.d;
 }

 #if 0
 int SIGNBIT_DP64(double x )
 {
     int hx;
     _GET_HIGH_WORD(hx,x);
     return((hx>>31));
 }
 #endif

 /* fmax(x, y) returns the larger (more positive) of x and y.
    NaNs are treated as missing values: if one argument is NaN,
    the other argument is returned. If both arguments are NaN,
    the first argument is returned. */

 /* This works so long as the compiler knows that (x != x) means
    that x is NaN; gcc does. */
 double fmax(double x, double y)
 {
     if( isnan(y) )
         return x;

     return x >= y ? x : y;
 }


 /* fmin(x, y) returns the smaller (more negative) of x and y.
    NaNs are treated as missing values: if one argument is NaN,
    the other argument is returned. If both arguments are NaN,
    the first argument is returned. */

 double fmin(double x, double y)
 {
     if( isnan(y) )
         return x;

     return x <= y ? x : y;
 }


 float fmaxf( float x, float y )
 {
     if( isnan(y) )
         return x;

     return x >= y ? x : y;
 }

 /* fminf(x, y) returns the smaller (more negative) of x and y.
    NaNs are treated as missing values: if one argument is NaN,
    the other argument is returned. If both arguments are NaN,
    the first argument is returned. */

 float fminf(float x, float y)
 {
     if( isnan(y) )
         return x;

     return x <= y ? x : y;
 }

 long double scalblnl(long double x, long n)
 {
     union
     {
         long double d;
         struct{ cl_ulong m; cl_ushort sexp;}u;
     }u;
     u.u.m = CL_LONG_MIN;

     if( x == 0.0L || n < -2200)
         return copysignl( 0.0L, x );

     if( n > 2200 )
         return INFINITY;

     if( n < 0 )
     {
         u.u.sexp = 0x3fff - 1022;
         while( n <= -1022 )
         {
             x *= u.d;
             n += 1022;
         }
         u.u.sexp = 0x3fff + n;
         x *= u.d;
         return x;
     }

     if( n > 0 )
     {
         u.u.sexp = 0x3fff + 1023;
         while( n >= 1023 )
         {
             x *= u.d;
             n -= 1023;
         }
         u.u.sexp = 0x3fff + n;
         x *= u.d;
         return x;
     }

     return x;
 }

 ///////////////////////////////////////////////////////////////////
 //
 //                          log2
 //
 ///////////////////////////////////////////////////////////////////
 const static cl_double log_e_base2   = 1.4426950408889634074;
 const static cl_double log_10_base2  = 3.3219280948873623478;

 //double log10(double x);

 double log2(double x)
 {
     return 1.44269504088896340735992468100189214 * log(x);
 }

 long double log2l(long double x)
 {
     return 1.44269504088896340735992468100189214L * log(x);
 }

 double trunc(double x)
 {
     double absx = fabs(x);

     if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
     {
         cl_long rounded = x;
         x = copysign( (double) rounded, x );
     }

     return x;
 }

 float  truncf(float x)
 {
     float absx = fabsf(x);

     if( absx < 8388608.0f /* 0x1.0p23f */ )
     {
         cl_int rounded = x;
         x = copysignf( (float) rounded, x );
     }

     return x;
 }

 long lround(double x)
 {
     double absx = fabs(x);

     if( absx < 0.5 )
         return 0;

     if( absx < 4503599627370496.0 /* 0x1.0p52 */)
     {
         absx += 0.5;
         cl_long rounded = absx;
         absx = rounded;
         x = copysign( absx, x );
     }

     if( x >= (double) LONG_MAX )
         return LONG_MAX;

     return (long) x;
 }

 long lroundf(float x)
 {
     float absx = fabsf(x);

     if( absx < 0.5f )
         return 0;

     if( absx < 8388608.0f )
     {
         absx += 0.5f;
         cl_int rounded = absx;
         absx = rounded;
         x = copysignf(  absx, x );
     }

     if( x >= (float) LONG_MAX )
         return LONG_MAX;

     return (long) x;
 }

 double round(double x)
 {
     double absx = fabs(x);

     if( absx < 0.5 )
         return copysign( 0.0, x);

     if( absx < 4503599627370496.0 /* 0x1.0p52 */)
     {
         absx += 0.5;
         cl_long rounded = absx;
         absx = rounded;
         x = copysign( absx, x );
     }

     return x;
 }

 float  roundf(float x)
 {
     float absx = fabsf(x);

     if( absx < 0.5f )
         return copysignf( 0.0f, x);

     if( absx < 8388608.0f )
     {
         absx += 0.5f;
         cl_int rounded = absx;
         absx = rounded;
         x = copysignf( absx, x );
     }

     return x;
 }

 long double roundl(long double x)
 {
     long double absx = fabsl(x);

     if( absx < 0.5L )
         return copysignl( 0.0L, x);

     if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
     {
         absx += 0.5L;
         cl_ulong rounded = absx;
         absx = rounded;
         x = copysignl( absx, x );
     }

     return x;
 }

 float cbrtf( float x )
 {
     float z = pow( fabs((double) x), 1.0 / 3.0 );
     return copysignf( z, x );
 }

 double cbrt( double x )
 {
     return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
 }

 long int lrint (double x)
 {
     double absx = fabs(x);

     if( x >= (double) LONG_MAX )
         return LONG_MAX;

     if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
     {
         double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
         double rounded = x + magic;
         rounded -= magic;
         return (long int) rounded;
     }

     return (long int) x;
 }

 long int lrintf (float x)
 {
     float absx = fabsf(x);

     if( x >= (float) LONG_MAX )
         return LONG_MAX;

     if( absx < 8388608.0f /* 0x1.0p23f */ )
     {
         float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
         float rounded = x + magic;
         rounded -= magic;
         return (long int) rounded;
     }

     return (long int) x;
 }


 ///////////////////////////////////////////////////////////////////
 //
 //                  fenv functions
 //
 ///////////////////////////////////////////////////////////////////

 #if _MSC_VER < 1800
 int fetestexcept(int excepts)
 {
     unsigned int status = _statusfp();
     return excepts & (
         ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
         ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
         ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
         ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
         ((status & _SW_INVALID) ? FE_INVALID : 0)
     );
 }

 int feclearexcept(int excepts)
 {
     _clearfp();
     return 0;
 }
 #endif

 #endif // __INTEL_COMPILER

 #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300

 float make_nan()
 {
 /* This is the IEEE 754 single-precision format:
     unsigned int mantissa:  22;
     unsigned int quiet_nan:  1;
     unsigned int exponent:   8;
     unsigned int negative:   1;
 */
      //const static unsigned
      static const int32_t _nan = 0x7fc00000;
      return *(const float*)(&_nan);
 }

 float nanf( const char* str)
 {
     cl_uint u = atoi( str );
     u |= 0x7fc00000U;
     return *( float*)(&u);
 }


 double nan( const char* str)
 {
     cl_ulong u = atoi( str );
     u |= 0x7ff8000000000000ULL;
     return *( double*)(&u);
 }

 // double check this implementatation
 long double nanl( const char* str)
 {
     union
     {
         long double f;
         struct { cl_ulong m; cl_ushort sexp; }u;
     }u;
     u.u.sexp = 0x7fff;
     u.u.m = 0x8000000000000000ULL | atoi( str );

     return u.f;
 }

 #endif

 ///////////////////////////////////////////////////////////////////
 //
 //                  misc functions
 //
 ///////////////////////////////////////////////////////////////////

 /*
 // This function is commented out because the Windows implementation should never call munmap.
 // If it is calling it, we have a bug. Please file a bugzilla.
 int munmap(void *addr, size_t len)
 {
 // FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html

     return (int)VirtualAlloc( (LPVOID)addr, len,
                   MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
 }
 */

 uint64_t ReadTime( void )
 {
     LARGE_INTEGER current;
     QueryPerformanceCounter(&current);
     return (uint64_t)current.QuadPart;
 }

 double SubtractTime( uint64_t endTime, uint64_t startTime )
 {
     static double PerformanceFrequency = 0.0;

     if (PerformanceFrequency == 0.0) {
         LARGE_INTEGER frequency;
         QueryPerformanceFrequency(&frequency);
         PerformanceFrequency = (double) frequency.QuadPart;
     }

     return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
 }

 int cf_signbit(double x)
 {
     union
     {
         double f;
         cl_ulong u;
     }u;
     u.f = x;
     return u.u >> 63;
 }

 int cf_signbitf(float x)
 {
     union
     {
         float f;
         cl_uint u;
     }u;
     u.f = x;
     return u.u >> 31;
 }

 float int2float (int32_t ix)
 {
     union {
         float   f;
         int32_t i;
     } u;
     u.i = ix;
     return u.f;
 }

 int32_t float2int (float   fx)
 {
     union {
         float   f;
         int32_t i;
     } u;
     u.f = fx;
     return u.i;
 }

 #if !defined(_WIN64)
 /** Returns the number of leading 0-bits in x,
     starting at the most significant bit position.
     If x is 0, the result is undefined.
 */
 int __builtin_clz(unsigned int pattern)
 {
 #if 0
     int res;
     __asm {
         mov eax, pattern
         bsr eax, eax
         mov res, eax
     }
     return 31 - res;
 #endif
     unsigned long index;
     unsigned char res = _BitScanReverse( &index, pattern);
     if (res) {
         return 8*sizeof(int) - 1 - index;
     } else {
         return 8*sizeof(int);
     }
 }
 #else
 int __builtin_clz(unsigned int pattern)
 {
    int count;
    if (pattern == 0u) {
        return 32;
    }
    count = 31;
    if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
    if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
    if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
    if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
    if (pattern >=  1u<<1) {                 count -=  1; }
    return count;
 }

 #endif // !defined(_WIN64)

 #include <intrin.h>
 #include <emmintrin.h>

 int usleep(int usec)
 {
     Sleep((usec + 999) / 1000);
     return 0;
 }

 unsigned int sleep( unsigned int sec )
 {
     Sleep( sec * 1000 );
     return 0;
 }

 #endif // defined( _MSC_VER )
	//
	// Copyright (c) 2017 The Khronos Group Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.
	//
	#include "compat.h"

	#if defined ( _MSC_VER )

	#include <limits.h>
	#include <stdlib.h>

	#include <CL/cl.h>

	#include <windows.h>

	#if ! defined( __INTEL_COMPILER )

	///////////////////////////////////////////////////////////////////
	//
	// rint, rintf
	//
	///////////////////////////////////////////////////////////////////

	float copysignf( float x, float y )
	{
	union{ cl_uint u; float f; }ux, uy;

	ux.f = x;
	uy.f = y;

	ux.u = (ux.u & 0x7fffffffU) \| (uy.u & 0x80000000U);

	return ux.f;
	}

	double copysign( double x, double y )
	{
	union{ cl_ulong u; double f; }ux, uy;

	ux.f = x;
	uy.f = y;

	ux.u = (ux.u & 0x7fffffffffffffffULL) \| (uy.u & 0x8000000000000000ULL);

	return ux.f;
	}

	long double copysignl( long double x, long double y )
	{
	union
	{
	long double f;
	struct{ cl_ulong m; cl_ushort sexp; }u;
	}ux, uy;

	ux.f = x;
	uy.f = y;

	ux.u.sexp = (ux.u.sexp & 0x7fff) \| (uy.u.sexp & 0x8000);

	return ux.f;
	}

	float rintf(float x)
	{
	float absx = fabsf(x);

	if( absx < 8388608.0f /* 0x1.0p23f */ )
	{
	float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
	float rounded = x + magic;
	rounded -= magic;
	x = copysignf( rounded, x );
	}

	return x;
	}

	double rint(double x)
	{
	double absx = fabs(x);

	if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
	{
	double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
	double rounded = x + magic;
	rounded -= magic;
	x = copysign( rounded, x );
	}

	return x;
	}

	long double rintl(long double x)
	{
	double absx = fabs(x);

	if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
	{
	long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
	long double rounded = x + magic;
	rounded -= magic;
	x = copysignl( rounded, x );
	}

	return x;
	}

	#if _MSC_VER < 1800

	///////////////////////////////////////////////////////////////////
	//
	// ilogb, ilogbf, ilogbl
	//
	///////////////////////////////////////////////////////////////////
	#ifndef FP_ILOGB0
	#define FP_ILOGB0 INT_MIN
	#endif

	#ifndef FP_ILOGBNAN
	#define FP_ILOGBNAN INT_MIN
	#endif

	int ilogb (double x)
	{
	union{ double f; cl_ulong u;} u;
	u.f = x;

	cl_ulong absx = u.u & CL_LONG_MAX;
	if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
	{
	switch( absx )
	{
	case 0:
	return FP_ILOGB0;
	case 0x7ff0000000000000ULL:
	return INT_MAX;
	default:
	if( absx > 0x7ff0000000000000ULL )
	return FP_ILOGBNAN;

	// subnormal
	u.u = absx \| 0x3ff0000000000000ULL;
	u.f -= 1.0;
	return (u.u >> 52) - (1023 + 1022);
	}
	}

	return (absx >> 52) - 1023;
	}


	int ilogbf (float x)
	{
	union{ float f; cl_uint u;} u;
	u.f = x;

	cl_uint absx = u.u & 0x7fffffff;
	if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
	{
	switch( absx )
	{
	case 0:
	return FP_ILOGB0;
	case 0x7f800000U:
	return INT_MAX;
	default:
	if( absx > 0x7f800000 )
	return FP_ILOGBNAN;

	// subnormal
	u.u = absx \| 0x3f800000U;
	u.f -= 1.0f;
	return (u.u >> 23) - (127 + 126);
	}
	}

	return (absx >> 23) - 127;
	}

	int ilogbl (long double x)
	{
	union
	{
	long double f;
	struct{ cl_ulong m; cl_ushort sexp; }u;
	} u;
	u.f = x;

	int exp = u.u.sexp & 0x7fff;
	if( 0 == exp )
	{
	if( 0 == u.u.m )
	return FP_ILOGB0;

	//subnormal
	u.u.sexp = 0x3fff;
	u.f -= 1.0f;
	exp = u.u.sexp & 0x7fff;

	return exp - (0x3fff + 0x3ffe);
	}
	else if( 0x7fff == exp )
	{
	if( u.u.m & CL_LONG_MAX )
	return FP_ILOGBNAN;

	return INT_MAX;
	}

	return exp - 0x3fff;
	}

	#endif // _MSC_VER < 1800

	///////////////////////////////////////////////////////////////////
	//
	// fmax, fmin, fmaxf, fminf
	//
	///////////////////////////////////////////////////////////////////

	static void GET_BITS_SP32(float fx, unsigned int* ux)
	{
	volatile union {float f; unsigned int u;} _bitsy;
	_bitsy.f = (fx);
	*ux = _bitsy.u;
	}
	/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
	/* { */
	/* volatile union {float f; unsigned int i;} _bitsy; */
	/* _bitsy.f = (fx); */
	/* ux = _bitsy.i; /
	/* } */
	static void PUT_BITS_SP32(unsigned int ux, float* fx)
	{
	volatile union {float f; unsigned int u;} _bitsy;
	_bitsy.u = (ux);
	*fx = _bitsy.f;
	}
	/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
	/* { */
	/* volatile union {float f; unsigned int i;} _bitsy; */
	/* _bitsy.i = (ux); */
	/* fx = _bitsy.f; /
	/* } */
	static void GET_BITS_DP64(double dx, unsigned __int64* lx)
	{
	volatile union {double d; unsigned __int64 l;} _bitsy;
	_bitsy.d = (dx);
	*lx = _bitsy.l;
	}
	static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
	{
	volatile union {double d; unsigned __int64 l;} _bitsy;
	_bitsy.l = (lx);
	*dx = _bitsy.d;
	}

	#if 0
	int SIGNBIT_DP64(double x )
	{
	int hx;
	_GET_HIGH_WORD(hx,x);
	return((hx>>31));
	}
	#endif

	/* fmax(x, y) returns the larger (more positive) of x and y.
	NaNs are treated as missing values: if one argument is NaN,
	the other argument is returned. If both arguments are NaN,
	the first argument is returned. */

	/* This works so long as the compiler knows that (x != x) means
	that x is NaN; gcc does. */
	double fmax(double x, double y)
	{
	if( isnan(y) )
	return x;

	return x >= y ? x : y;
	}


	/* fmin(x, y) returns the smaller (more negative) of x and y.
	NaNs are treated as missing values: if one argument is NaN,
	the other argument is returned. If both arguments are NaN,
	the first argument is returned. */

	double fmin(double x, double y)
	{
	if( isnan(y) )
	return x;

	return x <= y ? x : y;
	}


	float fmaxf( float x, float y )
	{
	if( isnan(y) )
	return x;

	return x >= y ? x : y;
	}

	/* fminf(x, y) returns the smaller (more negative) of x and y.
	NaNs are treated as missing values: if one argument is NaN,
	the other argument is returned. If both arguments are NaN,
	the first argument is returned. */

	float fminf(float x, float y)
	{
	if( isnan(y) )
	return x;

	return x <= y ? x : y;
	}

	long double scalblnl(long double x, long n)
	{
	union
	{
	long double d;
	struct{ cl_ulong m; cl_ushort sexp;}u;
	}u;
	u.u.m = CL_LONG_MIN;

	if( x == 0.0L \|\| n < -2200)
	return copysignl( 0.0L, x );

	if( n > 2200 )
	return INFINITY;

	if( n < 0 )
	{
	u.u.sexp = 0x3fff - 1022;
	while( n <= -1022 )
	{
	x *= u.d;
	n += 1022;
	}
	u.u.sexp = 0x3fff + n;
	x *= u.d;
	return x;
	}

	if( n > 0 )
	{
	u.u.sexp = 0x3fff + 1023;
	while( n >= 1023 )
	{
	x *= u.d;
	n -= 1023;
	}
	u.u.sexp = 0x3fff + n;
	x *= u.d;
	return x;
	}

	return x;
	}

	///////////////////////////////////////////////////////////////////
	//
	// log2
	//
	///////////////////////////////////////////////////////////////////
	const static cl_double log_e_base2 = 1.4426950408889634074;
	const static cl_double log_10_base2 = 3.3219280948873623478;

	//double log10(double x);

	double log2(double x)
	{
	return 1.44269504088896340735992468100189214 * log(x);
	}

	long double log2l(long double x)
	{
	return 1.44269504088896340735992468100189214L * log(x);
	}

	double trunc(double x)
	{
	double absx = fabs(x);

	if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
	{
	cl_long rounded = x;
	x = copysign( (double) rounded, x );
	}

	return x;
	}

	float truncf(float x)
	{
	float absx = fabsf(x);

	if( absx < 8388608.0f /* 0x1.0p23f */ )
	{
	cl_int rounded = x;
	x = copysignf( (float) rounded, x );
	}

	return x;
	}

	long lround(double x)
	{
	double absx = fabs(x);

	if( absx < 0.5 )
	return 0;

	if( absx < 4503599627370496.0 /* 0x1.0p52 */)
	{
	absx += 0.5;
	cl_long rounded = absx;
	absx = rounded;
	x = copysign( absx, x );
	}

	if( x >= (double) LONG_MAX )
	return LONG_MAX;

	return (long) x;
	}

	long lroundf(float x)
	{
	float absx = fabsf(x);

	if( absx < 0.5f )
	return 0;

	if( absx < 8388608.0f )
	{
	absx += 0.5f;
	cl_int rounded = absx;
	absx = rounded;
	x = copysignf( absx, x );
	}

	if( x >= (float) LONG_MAX )
	return LONG_MAX;

	return (long) x;
	}

	double round(double x)
	{
	double absx = fabs(x);

	if( absx < 0.5 )
	return copysign( 0.0, x);

	if( absx < 4503599627370496.0 /* 0x1.0p52 */)
	{
	absx += 0.5;
	cl_long rounded = absx;
	absx = rounded;
	x = copysign( absx, x );
	}

	return x;
	}

	float roundf(float x)
	{
	float absx = fabsf(x);

	if( absx < 0.5f )
	return copysignf( 0.0f, x);

	if( absx < 8388608.0f )
	{
	absx += 0.5f;
	cl_int rounded = absx;
	absx = rounded;
	x = copysignf( absx, x );
	}

	return x;
	}

	long double roundl(long double x)
	{
	long double absx = fabsl(x);

	if( absx < 0.5L )
	return copysignl( 0.0L, x);

	if( absx < 9223372036854775808.0L /0x1.0p63L/ )
	{
	absx += 0.5L;
	cl_ulong rounded = absx;
	absx = rounded;
	x = copysignl( absx, x );
	}

	return x;
	}

	float cbrtf( float x )
	{
	float z = pow( fabs((double) x), 1.0 / 3.0 );
	return copysignf( z, x );
	}

	double cbrt( double x )
	{
	return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
	}

	long int lrint (double x)
	{
	double absx = fabs(x);

	if( x >= (double) LONG_MAX )
	return LONG_MAX;

	if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
	{
	double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
	double rounded = x + magic;
	rounded -= magic;
	return (long int) rounded;
	}

	return (long int) x;
	}

	long int lrintf (float x)
	{
	float absx = fabsf(x);

	if( x >= (float) LONG_MAX )
	return LONG_MAX;

	if( absx < 8388608.0f /* 0x1.0p23f */ )
	{
	float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
	float rounded = x + magic;
	rounded -= magic;
	return (long int) rounded;
	}

	return (long int) x;
	}


	///////////////////////////////////////////////////////////////////
	//
	// fenv functions
	//
	///////////////////////////////////////////////////////////////////

	#if _MSC_VER < 1800
	int fetestexcept(int excepts)
	{
	unsigned int status = _statusfp();
	return excepts & (
	((status & _SW_INEXACT) ? FE_INEXACT : 0) \|
	((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) \|
	((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) \|
	((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) \|
	((status & _SW_INVALID) ? FE_INVALID : 0)
	);
	}

	int feclearexcept(int excepts)
	{
	_clearfp();
	return 0;
	}
	#endif

	#endif // __INTEL_COMPILER

	#if ! defined( __INTEL_COMPILER ) \|\| __INTEL_COMPILER < 1300

	float make_nan()
	{
	/* This is the IEEE 754 single-precision format:
	unsigned int mantissa: 22;
	unsigned int quiet_nan: 1;
	unsigned int exponent: 8;
	unsigned int negative: 1;
	*/
	//const static unsigned
	static const int32_t _nan = 0x7fc00000;
	return (const float)(&_nan);
	}

	float nanf( const char* str)
	{
	cl_uint u = atoi( str );
	u \|= 0x7fc00000U;
	return ( float)(&u);
	}


	double nan( const char* str)
	{
	cl_ulong u = atoi( str );
	u \|= 0x7ff8000000000000ULL;
	return ( double)(&u);
	}

	// double check this implementatation
	long double nanl( const char* str)
	{
	union
	{
	long double f;
	struct { cl_ulong m; cl_ushort sexp; }u;
	}u;
	u.u.sexp = 0x7fff;
	u.u.m = 0x8000000000000000ULL \| atoi( str );

	return u.f;
	}

	#endif

	///////////////////////////////////////////////////////////////////
	//
	// misc functions
	//
	///////////////////////////////////////////////////////////////////

	/*
	// This function is commented out because the Windows implementation should never call munmap.
	// If it is calling it, we have a bug. Please file a bugzilla.
	int munmap(void *addr, size_t len)
	{
	// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html

	return (int)VirtualAlloc( (LPVOID)addr, len,
	MEM_COMMIT\|MEM_RESERVE, PAGE_NOACCESS );
	}
	*/

	uint64_t ReadTime( void )
	{
	LARGE_INTEGER current;
	QueryPerformanceCounter(&current);
	return (uint64_t)current.QuadPart;
	}

	double SubtractTime( uint64_t endTime, uint64_t startTime )
	{
	static double PerformanceFrequency = 0.0;

	if (PerformanceFrequency == 0.0) {
	LARGE_INTEGER frequency;
	QueryPerformanceFrequency(&frequency);
	PerformanceFrequency = (double) frequency.QuadPart;
	}

	return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
	}

	int cf_signbit(double x)
	{
	union
	{
	double f;
	cl_ulong u;
	}u;
	u.f = x;
	return u.u >> 63;
	}

	int cf_signbitf(float x)
	{
	union
	{
	float f;
	cl_uint u;
	}u;
	u.f = x;
	return u.u >> 31;
	}

	float int2float (int32_t ix)
	{
	union {
	float f;
	int32_t i;
	} u;
	u.i = ix;
	return u.f;
	}

	int32_t float2int (float fx)
	{
	union {
	float f;
	int32_t i;
	} u;
	u.f = fx;
	return u.i;
	}

	#if !defined(_WIN64)
	/** Returns the number of leading 0-bits in x,
	starting at the most significant bit position.
	If x is 0, the result is undefined.
	*/
	int __builtin_clz(unsigned int pattern)
	{
	#if 0
	int res;
	__asm {
	mov eax, pattern
	bsr eax, eax
	mov res, eax
	}
	return 31 - res;
	#endif
	unsigned long index;
	unsigned char res = _BitScanReverse( &index, pattern);
	if (res) {
	return 8*sizeof(int) - 1 - index;
	} else {
	return 8*sizeof(int);
	}
	}
	#else
	int __builtin_clz(unsigned int pattern)
	{
	int count;
	if (pattern == 0u) {
	return 32;
	}
	count = 31;
	if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
	if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
	if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
	if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
	if (pattern >= 1u<<1) { count -= 1; }
	return count;
	}

	#endif // !defined(_WIN64)

	#include <intrin.h>
	#include <emmintrin.h>

	int usleep(int usec)
	{
	Sleep((usec + 999) / 1000);
	return 0;
	}

	unsigned int sleep( unsigned int sec )
	{
	Sleep( sec * 1000 );
	return 0;
	}

	#endif // defined( _MSC_VER )