aten/src/ATen/cuda/CUDAContext.h - platform/external/pytorch - Git at Google

 #pragma once

 #include <cstdint>

 #include <cuda_runtime_api.h>
 #include <cusparse.h>
 #include <cublas_v2.h>

 #ifdef CUDART_VERSION
 #include <cusolverDn.h>
 #endif

 #include <ATen/core/ATenGeneral.h>
 #include <ATen/Context.h>
 #include <c10/cuda/CUDAStream.h>
 #include <c10/cuda/CUDAFunctions.h>
 #include <ATen/cuda/Exceptions.h>

 namespace at {
 namespace cuda {

 /*
 A common CUDA interface for ATen.

 This interface is distinct from CUDAHooks, which defines an interface that links
 to both CPU-only and CUDA builds. That interface is intended for runtime
 dispatch and should be used from files that are included in both CPU-only and
 CUDA builds.

 CUDAContext, on the other hand, should be preferred by files only included in
 CUDA builds. It is intended to expose CUDA functionality in a consistent
 manner.

 This means there is some overlap between the CUDAContext and CUDAHooks, but
 the choice of which to use is simple: use CUDAContext when in a CUDA-only file,
 use CUDAHooks otherwise.

 Note that CUDAContext simply defines an interface with no associated class.
 It is expected that the modules whose functions compose this interface will
 manage their own state. There is only a single CUDA context/state.
 */

 /**
  * DEPRECATED: use device_count() instead
  */
 inline int64_t getNumGPUs() {
     return c10::cuda::device_count();
 }

 /**
  * CUDA is available if we compiled with CUDA, and there are one or more
  * devices.  If we compiled with CUDA but there is a driver problem, etc.,
  * this function will report CUDA is not available (rather than raise an error.)
  */
 inline bool is_available() {
     return c10::cuda::device_count() > 0;
 }

 TORCH_CUDA_CPP_API cudaDeviceProp* getCurrentDeviceProperties();

 TORCH_CUDA_CPP_API int warp_size();

 TORCH_CUDA_CPP_API cudaDeviceProp* getDeviceProperties(int64_t device);

 TORCH_CUDA_CPP_API bool canDeviceAccessPeer(
     int64_t device,
     int64_t peer_device);

 TORCH_CUDA_CPP_API Allocator* getCUDADeviceAllocator();

 /* Handles */
 TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
 TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();

 TORCH_CUDA_CPP_API void clearCublasWorkspaces();

 #ifdef CUDART_VERSION
 TORCH_CUDA_CPP_API cusolverDnHandle_t getCurrentCUDASolverDnHandle();
 #endif

 } // namespace cuda
 } // namespace at
	#pragma once

	#include <cstdint>

	#include <cuda_runtime_api.h>
	#include <cusparse.h>
	#include <cublas_v2.h>

	#ifdef CUDART_VERSION
	#include <cusolverDn.h>
	#endif

	#include <ATen/core/ATenGeneral.h>
	#include <ATen/Context.h>
	#include <c10/cuda/CUDAStream.h>
	#include <c10/cuda/CUDAFunctions.h>
	#include <ATen/cuda/Exceptions.h>

	namespace at {
	namespace cuda {

	/*
	A common CUDA interface for ATen.

	This interface is distinct from CUDAHooks, which defines an interface that links
	to both CPU-only and CUDA builds. That interface is intended for runtime
	dispatch and should be used from files that are included in both CPU-only and
	CUDA builds.

	CUDAContext, on the other hand, should be preferred by files only included in
	CUDA builds. It is intended to expose CUDA functionality in a consistent
	manner.

	This means there is some overlap between the CUDAContext and CUDAHooks, but
	the choice of which to use is simple: use CUDAContext when in a CUDA-only file,
	use CUDAHooks otherwise.

	Note that CUDAContext simply defines an interface with no associated class.
	It is expected that the modules whose functions compose this interface will
	manage their own state. There is only a single CUDA context/state.
	*/

	/**
	* DEPRECATED: use device_count() instead
	*/
	inline int64_t getNumGPUs() {
	return c10::cuda::device_count();
	}

	/**
	* CUDA is available if we compiled with CUDA, and there are one or more
	* devices. If we compiled with CUDA but there is a driver problem, etc.,
	* this function will report CUDA is not available (rather than raise an error.)
	*/
	inline bool is_available() {
	return c10::cuda::device_count() > 0;
	}

	TORCH_CUDA_CPP_API cudaDeviceProp* getCurrentDeviceProperties();

	TORCH_CUDA_CPP_API int warp_size();

	TORCH_CUDA_CPP_API cudaDeviceProp* getDeviceProperties(int64_t device);

	TORCH_CUDA_CPP_API bool canDeviceAccessPeer(
	int64_t device,
	int64_t peer_device);

	TORCH_CUDA_CPP_API Allocator* getCUDADeviceAllocator();

	/* Handles */
	TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle();
	TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle();

	TORCH_CUDA_CPP_API void clearCublasWorkspaces();

	#ifdef CUDART_VERSION
	TORCH_CUDA_CPP_API cusolverDnHandle_t getCurrentCUDASolverDnHandle();
	#endif

	} // namespace cuda
	} // namespace at