| #ifndef THC_NUMERICS_INC |
| #define THC_NUMERICS_INC |
| |
| #include <cuda.h> |
| #include <limits.h> |
| #include "THCHalf.h" |
| |
| /// Class for numeric limits of the particular data type, which |
| /// includes support for `half`. |
| /// Unfortunately since `half` does not have a constructor, these have |
| /// to be expressed as functions (either that or non-const statics). |
| template <typename T> |
| struct THCNumerics { |
| }; |
| |
| template <> |
| struct THCNumerics<unsigned char> { |
| static inline __host__ __device__ unsigned char min() { return 0; } |
| static inline __host__ __device__ unsigned char max() { return UCHAR_MAX; } |
| |
| static inline __host__ __device__ bool lt(unsigned char a, unsigned char b) { return a < b; } |
| static inline __host__ __device__ bool le(unsigned char a, unsigned char b) { return a <= b; } |
| static inline __host__ __device__ bool gt(unsigned char a, unsigned char b) { return a > b; } |
| static inline __host__ __device__ bool ge(unsigned char a, unsigned char b) { return a >= b; } |
| static inline __host__ __device__ bool eq(unsigned char a, unsigned char b) { return a == b; } |
| static inline __host__ __device__ bool ne(unsigned char a, unsigned char b) { return a != b; } |
| |
| static inline __host__ __device__ unsigned char add(unsigned char a, unsigned char b) { return a + b; } |
| static inline __host__ __device__ unsigned char mul(unsigned char a, unsigned char b) { return a * b; } |
| static inline __host__ __device__ unsigned char sub(unsigned char a, unsigned char b) { return a - b; } |
| static inline __host__ __device__ unsigned char div(unsigned char a, unsigned char b) { return a / b; } |
| static inline __host__ __device__ unsigned char abs(unsigned char a) { return abs(a); } |
| }; |
| |
| template <> |
| struct THCNumerics<char> { |
| static inline __host__ __device__ char min() { return CHAR_MIN; } |
| static inline __host__ __device__ char max() { return CHAR_MAX; } |
| |
| static inline __host__ __device__ bool lt(char a, char b) { return a < b; } |
| static inline __host__ __device__ bool le(char a, char b) { return a <= b; } |
| static inline __host__ __device__ bool gt(char a, char b) { return a > b; } |
| static inline __host__ __device__ bool ge(char a, char b) { return a >= b; } |
| static inline __host__ __device__ bool eq(char a, char b) { return a == b; } |
| static inline __host__ __device__ bool ne(char a, char b) { return a != b; } |
| |
| static inline __host__ __device__ char add(char a, char b) { return a + b; } |
| static inline __host__ __device__ char mul(char a, char b) { return a * b; } |
| static inline __host__ __device__ char sub(char a, char b) { return a - b; } |
| static inline __host__ __device__ char div(char a, char b) { return a / b; } |
| static inline __host__ __device__ char abs(char a) { return abs(a); } |
| }; |
| |
| template <> |
| struct THCNumerics<short> { |
| static inline __host__ __device__ short min() { return SHRT_MIN; } |
| static inline __host__ __device__ short max() { return SHRT_MAX; } |
| |
| static inline __host__ __device__ bool lt(short a, short b) { return a < b; } |
| static inline __host__ __device__ bool le(short a, short b) { return a <= b; } |
| static inline __host__ __device__ bool gt(short a, short b) { return a > b; } |
| static inline __host__ __device__ bool ge(short a, short b) { return a >= b; } |
| static inline __host__ __device__ bool eq(short a, short b) { return a == b; } |
| static inline __host__ __device__ bool ne(short a, short b) { return a != b; } |
| |
| static inline __host__ __device__ short add(short a, short b) { return a + b; } |
| static inline __host__ __device__ short mul(short a, short b) { return a * b; } |
| static inline __host__ __device__ short sub(short a, short b) { return a - b; } |
| static inline __host__ __device__ short div(short a, short b) { return a / b; } |
| static inline __host__ __device__ short abs(short a) { return abs(a); } |
| }; |
| |
| template <> |
| struct THCNumerics<int> { |
| static inline __host__ __device__ int min() { return INT_MIN; } |
| static inline __host__ __device__ int max() { return INT_MAX; } |
| |
| static inline __host__ __device__ bool lt(int a, int b) { return a < b; } |
| static inline __host__ __device__ bool le(int a, int b) { return a <= b; } |
| static inline __host__ __device__ bool gt(int a, int b) { return a > b; } |
| static inline __host__ __device__ bool ge(int a, int b) { return a >= b; } |
| static inline __host__ __device__ bool eq(int a, int b) { return a == b; } |
| static inline __host__ __device__ bool ne(int a, int b) { return a != b; } |
| |
| static inline __host__ __device__ int add(int a, int b) { return a + b; } |
| static inline __host__ __device__ int mul(int a, int b) { return a * b; } |
| static inline __host__ __device__ int sub(int a, int b) { return a - b; } |
| static inline __host__ __device__ int div(int a, int b) { return a / b; } |
| static inline __host__ __device__ int abs(int a) { return ::abs(a); } |
| }; |
| |
| template <> |
| struct THCNumerics<long> { |
| static inline __host__ __device__ long min() { return LONG_MIN; } |
| static inline __host__ __device__ long max() { return LONG_MAX; } |
| |
| static inline __host__ __device__ bool lt(long a, long b) { return a < b; } |
| static inline __host__ __device__ bool le(long a, long b) { return a <= b; } |
| static inline __host__ __device__ bool gt(long a, long b) { return a > b; } |
| static inline __host__ __device__ bool ge(long a, long b) { return a >= b; } |
| static inline __host__ __device__ bool eq(long a, long b) { return a == b; } |
| static inline __host__ __device__ bool ne(long a, long b) { return a != b; } |
| |
| static inline __host__ __device__ long add(long a, long b) { return a + b; } |
| static inline __host__ __device__ long mul(long a, long b) { return a * b; } |
| static inline __host__ __device__ long sub(long a, long b) { return a - b; } |
| static inline __host__ __device__ long div(long a, long b) { return a / b; }; |
| static inline __host__ __device__ long abs(long a) { return labs(a); } |
| }; |
| |
| #ifdef CUDA_HALF_TENSOR |
| template <> |
| struct THCNumerics<half> { |
| static inline __host__ __device__ half min() { half h; h.x = 0xfbff; return h; } |
| static inline __host__ __device__ half max() { half h; h.x = 0x7bff; return h; } |
| |
| static inline __host__ __device__ bool lt(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hlt(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return fa < fb; |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_half2float(a) < THC_half2float(b); |
| #endif |
| } |
| |
| static inline __host__ __device__ bool le(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hle(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return fa <= fb; |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_half2float(a) <= THC_half2float(b); |
| #endif |
| } |
| |
| static inline __host__ __device__ bool gt(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hgt(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return fa > fb; |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_half2float(a) > THC_half2float(b); |
| #endif |
| } |
| |
| static inline __host__ __device__ bool ge(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hge(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return fa >= fb; |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_half2float(a) >= THC_half2float(b); |
| #endif |
| } |
| |
| static inline __host__ __device__ bool eq(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __heq(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return fa == fb; |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_half2float(a) == THC_half2float(b); |
| #endif |
| } |
| |
| static inline __host__ __device__ bool ne(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hne(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return fa != fb; |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_half2float(a) != THC_half2float(b); |
| #endif |
| } |
| |
| static inline __host__ __device__ half exp(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hexp(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(expf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(expf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half log(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hlog(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(logf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(logf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half log1p(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(log1pf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(log1pf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half cos(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hcos(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(cosf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(cosf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half sin(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hsin(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(sinf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(sinf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half sqrt(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hsqrt(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(sqrtf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(sqrtf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half rsqrt(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hrsqrt(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(rsqrtf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(rsqrtf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half ceil(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hceil(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(ceilf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(ceilf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half floor(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return hfloor(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(floorf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(floorf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half trunc(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return htrunc(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(truncf(fa)); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(truncf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half neg(half a) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hneg(a); |
| #else |
| float fa = __half2float(a); |
| return __float2half(-fa); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(-(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half acos(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(acosf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(acosf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half cosh(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(coshf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(coshf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half asin(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(asinf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(asinf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half sinh(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(sinhf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(sinhf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half tan(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(tanf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(tanf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half atan(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(atanf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(atanf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half tanh(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(tanhf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(tanhf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half abs(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(fabs(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(fabs(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half round(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(roundf(fa)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(roundf(THC_half2float(a))); |
| #endif |
| } |
| |
| static inline __host__ __device__ half frac(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(fa - truncf(fa)); |
| #else // __CUDA_ARCH__ |
| float fa = THC_half2float(a); |
| return THC_float2half(fa - floorf(fa)); |
| #endif |
| } |
| |
| static inline __host__ __device__ half cinv(half a) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| return __float2half(1.0f / fa); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(1.0f / THC_half2float(a)); |
| #endif |
| } |
| |
| static inline __host__ __device__ half add(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hadd(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return __float2half( fa + fb ); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(THC_half2float(a) + THC_half2float(b)); |
| #endif |
| } |
| |
| static inline __host__ __device__ half div(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return __float2half( fa / fb ); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(THC_half2float(a) / THC_half2float(b)); |
| #endif |
| } |
| |
| static inline __host__ __device__ half mul(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hmul(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return __float2half( fa * fb ); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(THC_half2float(a) * THC_half2float(b)); |
| #endif |
| } |
| |
| static inline __host__ __device__ half sub(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| #ifdef CUDA_HALF_INSTRUCTIONS |
| return __hsub(a, b); |
| #else |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return __float2half( fa - fb ); |
| #endif |
| #else // __CUDA_ARCH__ |
| return THC_float2half(THC_half2float(a) - THC_half2float(b)); |
| #endif |
| } |
| |
| static inline __host__ __device__ half pow(half a, half b) { |
| #ifdef __CUDA_ARCH__ |
| float fa = __half2float(a); |
| float fb = __half2float(b); |
| return __float2half(powf(fa, fb)); |
| #else // __CUDA_ARCH__ |
| return THC_float2half(powf(THC_half2float(a), THC_half2float(b))); |
| #endif |
| } |
| |
| }; |
| #endif |
| |
| template <> |
| struct THCNumerics<float> { |
| static inline __host__ __device__ float min() { return -FLT_MAX; } |
| static inline __host__ __device__ float max() { return FLT_MAX; } |
| |
| static inline __host__ __device__ bool lt(float a, float b) { return a < b; } |
| static inline __host__ __device__ bool le(float a, float b) { return a <= b; } |
| static inline __host__ __device__ bool gt(float a, float b) { return a > b; } |
| static inline __host__ __device__ bool ge(float a, float b) { return a >= b; } |
| static inline __host__ __device__ bool eq(float a, float b) { return a == b; } |
| static inline __host__ __device__ bool ne(float a, float b) { return a != b; } |
| |
| static inline __host__ __device__ float exp (float a) { return expf(a); } |
| static inline __host__ __device__ float log (float a) { return logf(a); } |
| static inline __host__ __device__ float log1p(float a) { return log1pf(a); } |
| static inline __host__ __device__ float cos (float a) { return cosf(a); } |
| static inline __host__ __device__ float sin (float a) { return sinf(a); } |
| static inline __host__ __device__ float sqrt (float a) { return sqrtf(a); } |
| static inline __host__ __device__ float rsqrt(float a) { return rsqrtf(a); } |
| static inline __host__ __device__ float ceil (float a) { return ceilf(a); } |
| static inline __host__ __device__ float floor(float a) { return floorf(a); } |
| static inline __host__ __device__ float trunc(float a) { return truncf(a); } |
| static inline __host__ __device__ float neg (float a) { return -a; } |
| static inline __host__ __device__ float acos (float a) { return acosf(a); } |
| static inline __host__ __device__ float cosh (float a) { return coshf(a); } |
| static inline __host__ __device__ float acosh(float a) { return acoshf(a); } |
| static inline __host__ __device__ float asin (float a) { return asinf(a); } |
| static inline __host__ __device__ float sinh (float a) { return sinhf(a); } |
| static inline __host__ __device__ float asinh(float a) { return asinhf(a); } |
| static inline __host__ __device__ float tan (float a) { return tanf(a); } |
| static inline __host__ __device__ float atan (float a) { return atanf(a); } |
| static inline __host__ __device__ float tanh (float a) { return tanhf(a); } |
| static inline __host__ __device__ float abs (float a) { return fabs(a); } |
| static inline __host__ __device__ float round(float a) { return roundf(a); } |
| static inline __host__ __device__ float frac (float a) { return a - truncf(a); } |
| static inline __host__ __device__ float cinv (float a) { return 1.0f / a; } |
| static inline __host__ __device__ float add (float a, float b) { return a + b; } |
| static inline __host__ __device__ float div (float a, float b) { return a / b; } |
| static inline __host__ __device__ float mul (float a, float b) { return a * b; } |
| static inline __host__ __device__ float sub (float a, float b) { return a - b; } |
| static inline __host__ __device__ float pow (float a, float b) { return powf(a, b); } |
| }; |
| |
| template <> |
| struct THCNumerics<double> { |
| static inline __host__ __device__ double min() { return -DBL_MAX; } |
| static inline __host__ __device__ double max() { return DBL_MAX; } |
| |
| static inline __host__ __device__ bool lt(double a, double b) { return a < b; } |
| static inline __host__ __device__ bool le(double a, double b) { return a <= b; } |
| static inline __host__ __device__ bool gt(double a, double b) { return a > b; } |
| static inline __host__ __device__ bool ge(double a, double b) { return a >= b; } |
| static inline __host__ __device__ bool eq(double a, double b) { return a == b; } |
| static inline __host__ __device__ bool ne(double a, double b) { return a != b; } |
| |
| static inline __host__ __device__ double exp (double a) { return ::exp(a); } |
| static inline __host__ __device__ double log (double a) { return ::log(a); } |
| static inline __host__ __device__ double log1p(double a) { return ::log1p(a); } |
| static inline __host__ __device__ double cos (double a) { return ::cos(a); } |
| static inline __host__ __device__ double sin (double a) { return ::sin(a); } |
| static inline __host__ __device__ double sqrt (double a) { return ::sqrt(a); } |
| static inline __host__ __device__ double rsqrt(double a) { return ::rsqrt(a); } |
| static inline __host__ __device__ double ceil (double a) { return ::ceil(a); } |
| static inline __host__ __device__ double floor(double a) { return ::floor(a); } |
| static inline __host__ __device__ double trunc(double a) { return ::trunc(a); } |
| static inline __host__ __device__ double neg (double a) { return -a; } |
| static inline __host__ __device__ double acos (double a) { return ::acos(a); } |
| static inline __host__ __device__ double cosh (double a) { return ::cosh(a); } |
| static inline __host__ __device__ double acosh(double a) { return ::acosh(a); } |
| static inline __host__ __device__ double asin (double a) { return ::asin(a); } |
| static inline __host__ __device__ double sinh (double a) { return ::sinh(a); } |
| static inline __host__ __device__ double asinh(double a) { return ::asinh(a); } |
| static inline __host__ __device__ double tan (double a) { return ::tan(a); } |
| static inline __host__ __device__ double atan (double a) { return ::atan(a); } |
| static inline __host__ __device__ double tanh (double a) { return ::tanh(a); } |
| static inline __host__ __device__ double abs (double a) { return ::abs(a); } |
| static inline __host__ __device__ double round(double a) { return ::round(a); } |
| static inline __host__ __device__ double frac (double a) { return a - ::trunc(a); } |
| static inline __host__ __device__ double cinv (double a) { return 1.0 / a; } |
| static inline __host__ __device__ double add (double a, double b) { return a + b; } |
| static inline __host__ __device__ double div (double a, double b) { return a / b; } |
| static inline __host__ __device__ double mul (double a, double b) { return a * b; } |
| static inline __host__ __device__ double sub (double a, double b) { return a - b; } |
| static inline __host__ __device__ double pow (double a, double b) { return ::pow(a, b); } |
| }; |
| |
| /// `half` has some type conversion issues associated with it, since it |
| /// is a struct without a constructor/implicit conversion constructor. |
| /// We use this to convert scalar values to the given type that the |
| /// tensor expects. |
| template <typename In, typename Out> |
| struct ScalarConvert { |
| static __host__ __device__ Out to(const In v) { return (Out) v; } |
| }; |
| |
| #ifdef CUDA_HALF_TENSOR |
| template <typename Out> |
| struct ScalarConvert<half, Out> { |
| static __host__ __device__ Out to(const half v) { |
| #ifdef __CUDA_ARCH__ |
| return (Out) __half2float(v); |
| #else |
| return (Out) THC_half2float(v); |
| #endif |
| } |
| }; |
| |
| template <typename In> |
| struct ScalarConvert<In, half> { |
| static __host__ __device__ half to(const In v) { |
| #ifdef __CUDA_ARCH__ |
| return __float2half((float) v); |
| #else |
| return THC_float2half((float) v); |
| #endif |
| } |
| }; |
| |
| template <> |
| struct ScalarConvert<half, half> { |
| static __host__ __device__ half to(const half v) { |
| return v; |
| } |
| }; |
| #endif |
| |
| #endif // THC_NUMERICS_INC |