| #include <cfloat> |
| |
| #include "caffe2/core/context_gpu.h" |
| #include "utility_ops.h" |
| |
| namespace caffe2 { |
| |
| __global__ void |
| ElwiseMaxKernel(const float* X, const float* Y, float* maxout, const int N) { |
| CUDA_1D_KERNEL_LOOP(i, N) { |
| maxout[i] = max(X[i], Y[i]); |
| } |
| } |
| |
| template <> |
| bool MaxOp<float, CUDAContext>::Compute() { |
| float* output_data = Output(0)->mutable_data<float>(); |
| const int N = Input(0).size(); |
| |
| // Run pairwise-maxes |
| for (int i = 1; i < InputSize(); ++i) { |
| ElwiseMaxKernel<<< |
| CAFFE_GET_BLOCKS(N), |
| CAFFE_CUDA_NUM_THREADS, |
| 0, |
| context_.cuda_stream()>>>( |
| (i == 0 ? Input(0).data<float>() : Output(0)->data<float>()), |
| Input(i).data<float>(), |
| output_data, |
| N); |
| } |
| |
| return true; |
| } |
| |
| REGISTER_CUDA_OPERATOR(Max, MaxOp<float, CUDAContext>); |
| } |