blob: c33716c5f565e0bf936b1a503fa3412810e0927a [file] [log] [blame]
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "THC/generic/THCTensorMathPointwise.cu"
#else
#include <ATen/MemoryOverlap.h>
#include <ATen/NamedTensorUtils.h>
#if !defined(THC_REAL_IS_BOOL)
static void propagate_names_if_named_tensor_enabled(THCTensor* result, THCTensor* src) {
at::namedinference::propagate_names(result, src);
}
#define IMPLEMENT_CUDA_TENSOR_BASIC_FUNC_(NAME, CFUNC, REAL) \
struct Tensor_##NAME##_##REAL##_Op { \
__device__ __forceinline__ void operator()(scalar_t* out, scalar_t* in) const { \
*out = CFUNC(*in); \
} \
\
__device__ __forceinline__ void operator()(scalar_t* v) const { \
*v = CFUNC(*v); \
} \
}; \
\
void THCTensor_(NAME)(THCState* state, THCTensor* self_, THCTensor* src) { \
THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, self_, src)); \
at::assert_no_internal_overlap(self_); \
if (self_ == src) { \
if (!THC_pointwiseApply1<scalar_t>(state, self_, Tensor_##NAME##_##REAL##_Op())) { \
THArgCheck(false, 2, CUTORCH_DIM_WARNING); \
} \
} else { \
THCTensor_(resizeAs)(state, self_, src); \
\
if (!THC_pointwiseApply2<scalar_t, scalar_t>(state, self_, src, Tensor_##NAME##_##REAL##_Op())) { \
THArgCheck(false, 2, CUTORCH_DIM_WARNING); \
} \
} \
\
THCudaCheck(cudaGetLastError()); \
propagate_names_if_named_tensor_enabled(self_, src); \
}
#define IMPLEMENT_CUDA_TENSOR_BASIC_FUNC(NAME, CFUNC, REAL) \
IMPLEMENT_CUDA_TENSOR_BASIC_FUNC_(NAME, CFUNC, REAL)
#if defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE) || defined(THC_REAL_IS_HALF)
IMPLEMENT_CUDA_TENSOR_BASIC_FUNC( sqrt, THCNumerics<scalar_t>::sqrt, Real)
#endif
#undef IMPLEMENT_CUDA_TENSOR_BASIC_FUNC_
#undef IMPLEMENT_CUDA_TENSOR_BASIC_FUNC
void THCTensor_(crossKernel)(THCState *state, THCTensor *self, THCTensor *x, THCTensor *y, int dimension)
{
THCAssertSameGPU(THCTensor_(checkGPU)(state, 3, self, x, y));
int64_t sx = THCTensor_(stride)(state, x, dimension);
int64_t sy = THCTensor_(stride)(state, y, dimension);
int64_t so = THCTensor_(stride)(state, self, dimension);
THCTensor *nx = THCTensor_(newNarrow)(state, x, dimension, 0, 1);
THCTensor *ny = THCTensor_(newNarrow)(state, y, dimension, 0, 1);
THCTensor *nself = THCTensor_(newNarrow)(state, self, dimension, 0, 1);
if (!THC_pointwiseApply3<scalar_t, scalar_t, scalar_t>(state, nself, nx, ny, TensorCrossOp<scalar_t>(sx, sy, so))) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
THCTensor_(free)(state, nx);
THCTensor_(free)(state, ny);
THCTensor_(free)(state, nself);
}
namespace {
c10::intrusive_ptr<at::TensorImpl, at::UndefinedTensorImpl> retainTensorImpl(THCTensor* self) {
c10::raw::intrusive_ptr::incref(self);
return c10::intrusive_ptr<at::TensorImpl, at::UndefinedTensorImpl>::reclaim(self);
}
}
void THCTensor_(cmul)(THCState *state, THCTensor *self_, THCTensor *src1, THCTensor *src2)
{
auto out = at::Tensor(retainTensorImpl(self_));
at::mul_out(out, at::Tensor(retainTensorImpl(src1)), at::Tensor(retainTensorImpl(src2)));
}
#endif
#endif