blob: a02d19e6f8b221676e23e451c3f641414bff0705 [file] [log] [blame]
#include "THCTensorMath.h"
#include "THCGeneral.h"
#include "THCHalf.h"
#include "THCTensorCopy.h"
#include "THCApply.cuh"
#include "THCNumerics.cuh"
template <typename T>
struct TensorAddConstantOp {
TensorAddConstantOp(T v) : val(v) {}
__device__ __forceinline__ void operator()(T* out, T* in) {
*out = *in + val;
}
__device__ __forceinline__ void operator()(T* v) {
*v += val;
}
const T val;
};
#ifdef CUDA_HALF_TENSOR
template <>
struct TensorAddConstantOp<half> {
#ifdef CUDA_HALF_INSTRUCTIONS
TensorAddConstantOp(half v) : val(v) {}
#else
TensorAddConstantOp(half v) : fval(THC_half2float(v)) {}
#endif
__device__ __forceinline__ void operator()(half* out, half* in) {
#ifdef CUDA_HALF_INSTRUCTIONS
*out = __hadd(*in, val);
#else
float fin = __half2float(*in);
float fout = fin + fval;
*out = __float2half(fout);
#endif
}
__device__ __forceinline__ void operator()(half* v) {
#ifdef CUDA_HALF_INSTRUCTIONS
*v = __hadd(*v, val);
#else
float fv = __half2float(*v);
fv += fval;
*v = __float2half(fv);
#endif
}
#ifdef CUDA_HALF_INSTRUCTIONS
const half val;
#else
const float fval;
#endif
};
#endif // CUDA_HALF_TENSOR
template <typename T>
struct TensorSubConstantOp {
TensorSubConstantOp(T v) : val(v) {}
__device__ __forceinline__ void operator()(T* out, T* in) {
*out = *in - val;
}
__device__ __forceinline__ void operator()(T* v) {
*v -= val;
}
const T val;
};
#ifdef CUDA_HALF_TENSOR
template <>
struct TensorSubConstantOp<half> {
#ifdef CUDA_HALF_INSTRUCTIONS
TensorSubConstantOp(half v): val(THC_float2half(-(THC_half2float(v)))) {}
#else
TensorSubConstantOp(half v): fval(-(THC_half2float(v))) {}
#endif
__device__ __forceinline__ void operator()(half* out, half* in) {
#ifdef CUDA_HALF_INSTRUCTIONS
*out = __hadd(*in, val);
#else
float fin = __half2float(*in);
float fout = fin + fval;
*out = __float2half(fout);
#endif
}
__device__ __forceinline__ void operator()(half* v) {
#ifdef CUDA_HALF_INSTRUCTIONS
*v = __hadd(*v, val);
#else
float fv = __half2float(*v);
fv += fval;
*v = __float2half(fv);
#endif
}
#ifdef CUDA_HALF_INSTRUCTIONS
const half val;
#else
const float fval;
#endif
};
#endif // CUDA_HALF_TENSOR
template <typename T>
struct TensorMulConstantOp {
TensorMulConstantOp(T v) : val(v) {}
__device__ __forceinline__ void operator()(T* out, T* in) {
*out = *in * val;
}
__device__ __forceinline__ void operator()(T* v) {
*v *= val;
}
const T val;
};
#ifdef CUDA_HALF_TENSOR
template <>
struct TensorMulConstantOp<half> {
#ifdef CUDA_HALF_INSTRUCTIONS
TensorMulConstantOp(half v) : val(v) {}
#else
TensorMulConstantOp(half v) : fval(THC_half2float(v)) {}
#endif
__device__ __forceinline__ void operator()(half* out, half* in) {
#ifdef CUDA_HALF_INSTRUCTIONS
*out = __hmul(*in, val);
#else
float fin = __half2float(*in);
float fout = fin * fval;
*out = __float2half(fout);
#endif
}
__device__ __forceinline__ void operator()(half* v) {
#ifdef CUDA_HALF_INSTRUCTIONS
*v = __hmul(*v, val);
#else
float fv = __half2float(*v);
fv *= fval;
*v = __float2half(fv);
#endif
}
#ifdef CUDA_HALF_INSTRUCTIONS
const half val;
#else
const float fval;
#endif
};
#endif // CUDA_HALF_TENSOR
template <typename T>
struct TensorDivConstantOp {
TensorDivConstantOp(T v) : val(v) {}
__device__ __forceinline__ void operator()(T* out, T* in) {
*out = *in / val;
}
__device__ __forceinline__ void operator()(T* v) {
*v /= val;
}
const T val;
};
template <>
struct TensorDivConstantOp<float> {
TensorDivConstantOp(float v) : val(1.f / v) {}
__device__ __forceinline__ void operator()(float* out, float* in) {
*out = *in * val;
}
__device__ __forceinline__ void operator()(float* v) {
*v *= val;
}
const float val;
};
template <>
struct TensorDivConstantOp<double> {
TensorDivConstantOp(double v) : val(1. / v) {}
__device__ __forceinline__ void operator()(double* out, double* in) {
*out = *in * val;
}
__device__ __forceinline__ void operator()(double* v) {
*v *= val;
}
const double val;
};
#ifdef CUDA_HALF_TENSOR
template <>
struct TensorDivConstantOp<half> {
#ifdef CUDA_HALF_INSTRUCTIONS
TensorDivConstantOp(half v) : val(ScalarInv<half>::to(v)) {}
#else
TensorDivConstantOp(half v) : fval(1.f / THC_half2float(v)) {}
#endif
__device__ __forceinline__ void operator()(half* out, half* in) {
#ifdef CUDA_HALF_INSTRUCTIONS
*out = __hmul(*in, val);
#else
float fin = __half2float(*in);
float fout = fin * fval;
*out = __float2half(fout);
#endif
}
__device__ __forceinline__ void operator()(half* v) {
#ifdef CUDA_HALF_INSTRUCTIONS
*v = __hmul(*v, val);
#else
float fv = __half2float(*v);
fv *= fval;
*v = __float2half(fv);
#endif
}
#ifdef CUDA_HALF_INSTRUCTIONS
const half val;
#else
const float fval;
#endif
};
#endif // CUDA_HALF_TENSOR
template <typename T, int Upper>
struct TensorTriOp {
TensorTriOp(T *start_, long stride0_, long stride1_, long k_)
: start(start_), stride0(stride0_), stride1(stride1_), k(k_) {}
__device__ __forceinline__ int mask(T *in) {
ptrdiff_t n = in - start;
long row, col;
if (stride0 > stride1)
{
row = (long) (n / stride0);
col = (long) ((n % stride0) / stride1);
}
else
{
row = (long) ((n % stride1) / stride0);
col = (long) (n / stride1);
}
return Upper ? (col - row >= k) : (col - row <= k);
}
__device__ __forceinline__ void operator()(T* out, T* in) {
*out = mask(in) ? *in : ScalarConvert<int, T>::to(0);
}
__device__ __forceinline__ void operator()(T* v) {
if (!mask(v))
*v = ScalarConvert<int, T>::to(0);
}
const T *start;
const long stride0, stride1, k;
};
#include "generic/THCTensorMathPairwise.cu"
#include "THCGenerateAllTypes.h"