blob: 71f3573ca92a5aed7653f156f02796ee0c0a2bdb [file] [log] [blame]
#include <cfloat>
#include "caffe2/core/context_gpu.h"
#include "utility_ops.h"
namespace caffe2 {
__global__ void
ElwiseMaxKernel(const float* X, const float* Y, float* maxout, const int N) {
CUDA_1D_KERNEL_LOOP(i, N) {
maxout[i] = max(X[i], Y[i]);
}
}
template <>
bool MaxOp<float, CUDAContext>::Compute() {
float* output_data = Output(0)->mutable_data<float>();
const int N = Input(0).size();
// Run pairwise-maxes
for (int i = 1; i < InputSize(); ++i) {
ElwiseMaxKernel<<<
CAFFE_GET_BLOCKS(N),
CAFFE_CUDA_NUM_THREADS,
0,
context_.cuda_stream()>>>(
(i == 0 ? Input(0).data<float>() : Output(0)->data<float>()),
Input(i).data<float>(),
output_data,
N);
}
return true;
}
REGISTER_CUDA_OPERATOR(Max, MaxOp<float, CUDAContext>);
}