THCTensorMath2.cu - platform/external/pytorch - Git at Google

 #include "THCTensorMath.h"
 #include "THCGeneral.h"
 #include "THCBlas.h"
 #include "THCTensorCopy.h"
 #include "THCTensorRandom.h"
 #include "THCApply.cuh"
 #include "THCReduce.cuh"
 #include "THCTensorMathReduce.cuh"
 #include "THCTensorMathPointwise.cuh"

 struct TensorATan2Op {
   __device__ __forceinline__ void operator()(float* out, float* a, float* b) {
     *out = atan2f(*a, *b);
   }
 };

 void THCudaTensor_atan2(THCState *state, THCudaTensor *self_, THCudaTensor *tx, THCudaTensor *ty)
 {
   THCAssertSameGPU(THCudaTensor_checkGPU(state, 3, self_, tx, ty));
   THArgCheck(THCudaTensor_nElement(state, tx) ==
              THCudaTensor_nElement(state, ty), 3, "sizes do not match");
   THCudaTensor_resizeAs(state, self_, tx);

   if (!THC_pointwiseApply3(state, self_, tx, ty, TensorATan2Op())) {
     THArgCheck(false, 2, CUTORCH_DIM_WARNING);
   }

   THCudaCheck(cudaGetLastError());
 }
	#include "THCTensorMath.h"
	#include "THCGeneral.h"
	#include "THCBlas.h"
	#include "THCTensorCopy.h"
	#include "THCTensorRandom.h"
	#include "THCApply.cuh"
	#include "THCReduce.cuh"
	#include "THCTensorMathReduce.cuh"
	#include "THCTensorMathPointwise.cuh"

	struct TensorATan2Op {
	__device__ __forceinline__ void operator()(float* out, float* a, float* b) {
	out = atan2f(a, *b);
	}
	};

	void THCudaTensor_atan2(THCState state, THCudaTensor self_, THCudaTensor tx, THCudaTensor ty)
	{
	THCAssertSameGPU(THCudaTensor_checkGPU(state, 3, self_, tx, ty));
	THArgCheck(THCudaTensor_nElement(state, tx) ==
	THCudaTensor_nElement(state, ty), 3, "sizes do not match");
	THCudaTensor_resizeAs(state, self_, tx);

	if (!THC_pointwiseApply3(state, self_, tx, ty, TensorATan2Op())) {
	THArgCheck(false, 2, CUTORCH_DIM_WARNING);
	}

	THCudaCheck(cudaGetLastError());
	}