caffe2/operators/mem_query_op.cu - platform/external/pytorch - Git at Google

 #include "caffe2/core/context_gpu.h"
 #include "caffe2/core/operator.h"

 namespace caffe2 {
 namespace {

 class GetGPUMemoryUsageOp final : public Operator<CUDAContext> {
  public:
   template<class... Args> explicit GetGPUMemoryUsageOp(Args&&... args)
       : Operator<CUDAContext>(std::forward<Args>(args)...) {}
   ~GetGPUMemoryUsageOp() override {}

   bool RunOnDevice() override {
     TORCH_CHECK_EQ(InputSize(), 0);
     TORCH_CHECK_EQ(OutputSize(), 1);
     std::vector<long> total_by_gpu = CUDAContext::TotalMemoryByGpu();
     std::vector<long> max_by_gpu = CUDAContext::MaxMemoryByGpu();
     TORCH_CHECK_EQ(total_by_gpu.size(), max_by_gpu.size());


     auto* stats = Output(0, {2, static_cast<int64_t>(total_by_gpu.size())}, at::dtype<long>());
     context_.CopyFromCPU<long>(
         total_by_gpu.size(),
         total_by_gpu.data(),
         stats->template mutable_data<long>());
     context_.CopyFromCPU<long>(
         max_by_gpu.size(),
         max_by_gpu.data(),
         stats->template mutable_data<long>() + total_by_gpu.size());
     return true;
   }
 };

 OPERATOR_SCHEMA(GetGPUMemoryUsage)
     .NumInputs(0)
     .NumOutputs(1)
     .SetDoc(R"DOC(Fetches GPU memory stats from CUDAContext. Result is stored
       in output blob with shape (2, num_gpus). First row contains the total
       current memory usage, and the second row the maximum usage during
       this execution.

       NOTE: --caffe2_gpu_memory_tracking flag must be enabled to use this op.
     )DOC");

 REGISTER_CUDA_OPERATOR(GetGPUMemoryUsage, GetGPUMemoryUsageOp);
 }

 } // namespace caffe2
	#include "caffe2/core/context_gpu.h"
	#include "caffe2/core/operator.h"

	namespace caffe2 {
	namespace {

	class GetGPUMemoryUsageOp final : public Operator<CUDAContext> {
	public:
	template<class... Args> explicit GetGPUMemoryUsageOp(Args&&... args)
	: Operator<CUDAContext>(std::forward<Args>(args)...) {}
	~GetGPUMemoryUsageOp() override {}

	bool RunOnDevice() override {
	TORCH_CHECK_EQ(InputSize(), 0);
	TORCH_CHECK_EQ(OutputSize(), 1);
	std::vector<long> total_by_gpu = CUDAContext::TotalMemoryByGpu();
	std::vector<long> max_by_gpu = CUDAContext::MaxMemoryByGpu();
	TORCH_CHECK_EQ(total_by_gpu.size(), max_by_gpu.size());


	auto* stats = Output(0, {2, static_cast<int64_t>(total_by_gpu.size())}, at::dtype<long>());
	context_.CopyFromCPU<long>(
	total_by_gpu.size(),
	total_by_gpu.data(),
	stats->template mutable_data<long>());
	context_.CopyFromCPU<long>(
	max_by_gpu.size(),
	max_by_gpu.data(),
	stats->template mutable_data<long>() + total_by_gpu.size());
	return true;
	}
	};

	OPERATOR_SCHEMA(GetGPUMemoryUsage)
	.NumInputs(0)
	.NumOutputs(1)
	.SetDoc(R"DOC(Fetches GPU memory stats from CUDAContext. Result is stored
	in output blob with shape (2, num_gpus). First row contains the total
	current memory usage, and the second row the maximum usage during
	this execution.

	NOTE: --caffe2_gpu_memory_tracking flag must be enabled to use this op.
	)DOC");

	REGISTER_CUDA_OPERATOR(GetGPUMemoryUsage, GetGPUMemoryUsageOp);
	}

	} // namespace caffe2