| #include "caffe2/core/context_gpu.h" | 
 | #include "caffe2/operators/perplexity_op.h" | 
 | #include "caffe2/utils/math.h" | 
 | #include <thrust/device_vector.h> | 
 | #include <thrust/transform_reduce.h> | 
 | #include <thrust/system/cuda/execution_policy.h> | 
 |  | 
 | namespace caffe2 { | 
 |  | 
 | struct perplexity_function | 
 | { | 
 |   perplexity_function(float p) : pow(p) {} | 
 |   __host__ __device__ float operator()(float x) const | 
 |   { | 
 |     return powf(1.0f/x, pow); | 
 |   } | 
 |   float pow; | 
 | }; | 
 |  | 
 | template <> | 
 | bool PerplexityOp<float, CUDAContext>::RunOnDevice() { | 
 |   auto& X = Input(0); | 
 |  | 
 |   TORCH_DCHECK_EQ(X.dim(), 1); | 
 |   int N = X.dim32(0); | 
 |  | 
 |   auto* Y = Output(0, vector<int64_t>(), at::dtype<float>()); | 
 |   float* Ydata = Y->template mutable_data<float>(); | 
 |   const float* Xdata = X.data<float>(); | 
 |  | 
 |   float perplexity = thrust::transform_reduce( | 
 |       #if THRUST_VERSION >= 100800 | 
 |         thrust::cuda::par.on(context_.cuda_stream()), | 
 |       #endif  // THRUST_VERSION >= 100800 | 
 |       Xdata, Xdata + N, | 
 |       perplexity_function(1.0f/N), | 
 |       1.0f, | 
 |       thrust::multiplies<float>()); | 
 |  | 
 |   math::Set<float, CUDAContext>(1, perplexity, Ydata, &context_); | 
 |   return true; | 
 | } | 
 |  | 
 | REGISTER_CUDA_OPERATOR(Perplexity, PerplexityOp<float, CUDAContext>); | 
 | }  // namespace caffe2 |