blob: d1fe328bc21488e1e3b5c4d0a1ecc2df96a217d7 [file] [log] [blame]
#include "THCTensorMath.h"
#include "THCGeneral.h"
#include "THCBlas.h"
#include "THCTensorCopy.h"
#include "THCTensorRandom.h"
#include "THCApply.cuh"
#include "THCReduce.cuh"
#include "THCTensorMathReduce.cuh"
#include "THCTensorMathPointwise.cuh"
#include <thrust/device_ptr.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>
#include <thrust/inner_product.h>
#if CUDA_VERSION >= 7000
#include <thrust/system/cuda/execution_policy.h>
#endif
struct TensorTPowOp {
TensorTPowOp(float v) : val(v) {}
__device__ __forceinline__ void operator()(float* out, float* in) {
*out = powf(val, *in);
}
__device__ __forceinline__ void operator()(float* v) {
*v = powf(val, *v);
}
const float val;
};
void THCudaTensor_tpow(THCState *state, THCudaTensor *self_, float value, THCudaTensor *src)
{
THAssert(THCudaTensor_checkGPU(state, 2, self_, src));
if (self_ == src) {
if (!THC_pointwiseApply1(state, self_, TensorTPowOp(value))) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
} else {
THCudaTensor_resizeAs(state, self_, src);
if (!THC_pointwiseApply2(state, self_, src, TensorTPowOp(value))) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
}
THCudaCheck(cudaGetLastError());
}
struct TensorATan2Op {
__device__ __forceinline__ void operator()(float* out, float* a, float* b) {
*out = atan2f(*a, *b);
}
};
void THCudaTensor_atan2(THCState *state, THCudaTensor *self_, THCudaTensor *tx, THCudaTensor *ty)
{
THAssert(THCudaTensor_checkGPU(state, 3, self_, tx, ty));
THArgCheck(THCudaTensor_nElement(state, tx) ==
THCudaTensor_nElement(state, ty), 3, "sizes do not match");
THCudaTensor_resizeAs(state, self_, tx);
if (!THC_pointwiseApply3(state, self_, tx, ty, TensorATan2Op())) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
THCudaCheck(cudaGetLastError());
}
float THCudaTensor_dist(THCState *state, THCudaTensor *self, THCudaTensor *src, float value)
{
THAssert(THCudaTensor_checkGPU(state, 2, self, src));
self = THCudaTensor_newContiguous(state, self);
ptrdiff_t size = THCudaTensor_nElement(state, self);
src = THCudaTensor_newContiguous(state, src);
thrust::device_ptr<float> self_data(THCudaTensor_data(state, self));
thrust::device_ptr<float> src_data(THCudaTensor_data(state, src));
float result = thrust::inner_product(
#if CUDA_VERSION >= 7000
thrust::cuda::par.on(THCState_getCurrentStream(state)),
#endif
self_data, self_data+size, src_data, (float) 0,
thrust::plus<float>(), TensorDistOp<float>(value));
THCudaTensor_free(state, src);
THCudaTensor_free(state, self);
return pow(result, (float)1.0/value);
}
void THCudaTensor_rand(THCState *state, THCudaTensor *r_, THLongStorage *size)
{
THAssert(THCudaTensor_checkGPU(state, 1, r_));
THCudaTensor_resize(state, r_, size, NULL);
THCudaTensor_uniform(state, r_, 0, 1);
}
void THCudaTensor_randn(THCState *state, THCudaTensor *r_, THLongStorage *size)
{
THAssert(THCudaTensor_checkGPU(state, 1, r_));
THCudaTensor_resize(state, r_, size, NULL);
THCudaTensor_normal(state, r_, 0, 1);
}