caffe2/operators/filler_op.cu - platform/external/pytorch - Git at Google

 #include <cmath>
 #include "caffe2/core/context_gpu.h"
 #include "caffe2/operators/filler_op.h"
 #include "caffe2/operators/operator_fallback_gpu.h"

 namespace caffe2 {

 namespace {
 __global__ void FillRangeKernel(const int n, float* data) {
   CUDA_1D_KERNEL_LOOP(index, n) {
     data[index] = index;
   }
 }

 template <typename T>
 __global__ void FillDiagonalKernel(
     const int num_diagonal_elements,
     const int64_t step_size,
     const T value,
     T* data) {
   CUDA_1D_KERNEL_LOOP(index, num_diagonal_elements) {
     data[index * step_size] = value;
   }
 }
 }

 template <>
 bool RangeFillOp<float, CUDAContext>::Fill(Tensor* output) {
   int N = output->numel();
   FillRangeKernel<<<
       CAFFE_GET_BLOCKS(N),
       CAFFE_CUDA_NUM_THREADS,
       0,
       context_.cuda_stream()>>>(N, output->template mutable_data<float>());
   C10_CUDA_KERNEL_LAUNCH_CHECK();

   return true;
 }

 template <>
 template <typename T>
 bool DiagonalFillOp<CUDAContext>::FillWithType(Tensor* output) {
   VerifyOutputShape(output);
   auto* data = output->template mutable_data<T>();
   int size = output->numel();
   // first fill everything with 0
   math::Set<T, CUDAContext>(size, T(0), data, &context_);

   T value = OperatorBase::GetSingleArgument<T>("value", 0);
   int64_t step_size = GetStepSize(output);
   int num_diagonal_elements = ceil((float)size / step_size);

   FillDiagonalKernel<<<
       CAFFE_GET_BLOCKS(num_diagonal_elements),
       CAFFE_CUDA_NUM_THREADS,
       0,
       context_.cuda_stream()>>>(num_diagonal_elements, step_size, value, data);
   C10_CUDA_KERNEL_LAUNCH_CHECK();

   return true;
 }

 REGISTER_CUDA_OPERATOR(UniformFill, UniformFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(UniformIntFill, UniformFillOp<int, CUDAContext>);
 REGISTER_CUDA_OPERATOR(ConstantFill, ConstantFillOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(DiagonalFill, DiagonalFillOp<CUDAContext>);
 REGISTER_CUDA_OPERATOR(GaussianFill, GaussianFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(XavierFill, XavierFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(MSRAFill, MSRAFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(RangeFill, RangeFillOp<float, CUDAContext>);
 REGISTER_CUDA_OPERATOR(LengthsRangeFill, GPUFallbackOp);

 } // namespace caffe2
	#include <cmath>
	#include "caffe2/core/context_gpu.h"
	#include "caffe2/operators/filler_op.h"
	#include "caffe2/operators/operator_fallback_gpu.h"

	namespace caffe2 {

	namespace {
	__global__ void FillRangeKernel(const int n, float* data) {
	CUDA_1D_KERNEL_LOOP(index, n) {
	data[index] = index;
	}
	}

	template <typename T>
	__global__ void FillDiagonalKernel(
	const int num_diagonal_elements,
	const int64_t step_size,
	const T value,
	T* data) {
	CUDA_1D_KERNEL_LOOP(index, num_diagonal_elements) {
	data[index * step_size] = value;
	}
	}
	}

	template <>
	bool RangeFillOp<float, CUDAContext>::Fill(Tensor* output) {
	int N = output->numel();
	FillRangeKernel<<<
	CAFFE_GET_BLOCKS(N),
	CAFFE_CUDA_NUM_THREADS,
	0,
	context_.cuda_stream()>>>(N, output->template mutable_data<float>());
	C10_CUDA_KERNEL_LAUNCH_CHECK();

	return true;
	}

	template <>
	template <typename T>
	bool DiagonalFillOp<CUDAContext>::FillWithType(Tensor* output) {
	VerifyOutputShape(output);
	auto* data = output->template mutable_data<T>();
	int size = output->numel();
	// first fill everything with 0
	math::Set<T, CUDAContext>(size, T(0), data, &context_);

	T value = OperatorBase::GetSingleArgument<T>("value", 0);
	int64_t step_size = GetStepSize(output);
	int num_diagonal_elements = ceil((float)size / step_size);

	FillDiagonalKernel<<<
	CAFFE_GET_BLOCKS(num_diagonal_elements),
	CAFFE_CUDA_NUM_THREADS,
	0,
	context_.cuda_stream()>>>(num_diagonal_elements, step_size, value, data);
	C10_CUDA_KERNEL_LAUNCH_CHECK();

	return true;
	}

	REGISTER_CUDA_OPERATOR(UniformFill, UniformFillOp<float, CUDAContext>);
	REGISTER_CUDA_OPERATOR(UniformIntFill, UniformFillOp<int, CUDAContext>);
	REGISTER_CUDA_OPERATOR(ConstantFill, ConstantFillOp<CUDAContext>);
	REGISTER_CUDA_OPERATOR(DiagonalFill, DiagonalFillOp<CUDAContext>);
	REGISTER_CUDA_OPERATOR(GaussianFill, GaussianFillOp<float, CUDAContext>);
	REGISTER_CUDA_OPERATOR(XavierFill, XavierFillOp<float, CUDAContext>);
	REGISTER_CUDA_OPERATOR(MSRAFill, MSRAFillOp<float, CUDAContext>);
	REGISTER_CUDA_OPERATOR(RangeFill, RangeFillOp<float, CUDAContext>);
	REGISTER_CUDA_OPERATOR(LengthsRangeFill, GPUFallbackOp);

	} // namespace caffe2