|  | #include <c10/cuda/CUDAFunctions.h> | 
|  | #include <c10/macros/Macros.h> | 
|  |  | 
|  | #include <limits> | 
|  |  | 
|  | namespace c10 { | 
|  | namespace cuda { | 
|  |  | 
|  | namespace { | 
|  | // returns -1 on failure | 
|  | int32_t driver_version() { | 
|  | int driver_version = -1; | 
|  | C10_CUDA_IGNORE_ERROR(cudaDriverGetVersion(&driver_version)); | 
|  | return driver_version; | 
|  | } | 
|  |  | 
|  | int device_count_impl(bool fail_if_no_driver) { | 
|  | int count; | 
|  | auto err = C10_CUDA_ERROR_HANDLED(cudaGetDeviceCount(&count)); | 
|  | if (err == cudaSuccess) { | 
|  | return count; | 
|  | } | 
|  | // Clear out the error state, so we don't spuriously trigger someone else. | 
|  | // (This shouldn't really matter, since we won't be running very much CUDA | 
|  | // code in this regime.) | 
|  | cudaError_t last_err C10_UNUSED = cudaGetLastError(); | 
|  | switch (err) { | 
|  | case cudaErrorNoDevice: | 
|  | // Zero devices is ok here | 
|  | count = 0; | 
|  | break; | 
|  | case cudaErrorInsufficientDriver: { | 
|  | auto version = driver_version(); | 
|  | if (version <= 0) { | 
|  | if (!fail_if_no_driver) { | 
|  | // No CUDA driver means no devices | 
|  | count = 0; | 
|  | break; | 
|  | } | 
|  | TORCH_CHECK( | 
|  | false, | 
|  | "Found no NVIDIA driver on your system. Please check that you " | 
|  | "have an NVIDIA GPU and installed a driver from " | 
|  | "http://www.nvidia.com/Download/index.aspx"); | 
|  | } else { | 
|  | TORCH_CHECK( | 
|  | false, | 
|  | "The NVIDIA driver on your system is too old (found version ", | 
|  | version, | 
|  | "). Please update your GPU driver by downloading and installing " | 
|  | "a new version from the URL: " | 
|  | "http://www.nvidia.com/Download/index.aspx Alternatively, go to: " | 
|  | "https://pytorch.org to install a PyTorch version that has been " | 
|  | "compiled with your version of the CUDA driver."); | 
|  | } | 
|  | } break; | 
|  | case cudaErrorInitializationError: | 
|  | TORCH_CHECK( | 
|  | false, | 
|  | "CUDA driver initialization failed, you might not " | 
|  | "have a CUDA gpu."); | 
|  | break; | 
|  | case cudaErrorUnknown: | 
|  | TORCH_CHECK( | 
|  | false, | 
|  | "CUDA unknown error - this may be due to an " | 
|  | "incorrectly set up environment, e.g. changing env " | 
|  | "variable CUDA_VISIBLE_DEVICES after program start. " | 
|  | "Setting the available devices to be zero."); | 
|  | break; | 
|  | #if C10_ASAN_ENABLED | 
|  | case cudaErrorMemoryAllocation: | 
|  | // In ASAN mode, we know that a cudaErrorMemoryAllocation error will | 
|  | // pop up if compiled with NVCC (clang-cuda is fine) | 
|  | TORCH_CHECK( | 
|  | false, | 
|  | "Got 'out of memory' error while trying to initialize CUDA. " | 
|  | "CUDA with nvcc does not work well with ASAN and it's probably " | 
|  | "the reason. We will simply shut down CUDA support. If you " | 
|  | "would like to use GPUs, turn off ASAN."); | 
|  | break; | 
|  | #endif // C10_ASAN_ENABLED | 
|  | default: | 
|  | TORCH_CHECK( | 
|  | false, | 
|  | "Unexpected error from cudaGetDeviceCount(). Did you run " | 
|  | "some cuda functions before calling NumCudaDevices() " | 
|  | "that might have already set an error? Error ", | 
|  | err, | 
|  | ": ", | 
|  | cudaGetErrorString(err)); | 
|  | } | 
|  | return count; | 
|  | } | 
|  | } // namespace | 
|  |  | 
|  | DeviceIndex device_count() noexcept { | 
|  | // initialize number of devices only once | 
|  | static int count = []() { | 
|  | try { | 
|  | auto result = device_count_impl(/*fail_if_no_driver=*/false); | 
|  | TORCH_INTERNAL_ASSERT( | 
|  | result <= std::numeric_limits<DeviceIndex>::max(), | 
|  | "Too many CUDA devices, DeviceIndex overflowed"); | 
|  | return result; | 
|  | } catch (const c10::Error& ex) { | 
|  | // We don't want to fail, but still log the warning | 
|  | // msg() returns the message without the stack trace | 
|  | TORCH_WARN("CUDA initialization: ", ex.msg()); | 
|  | return 0; | 
|  | } | 
|  | }(); | 
|  | return static_cast<DeviceIndex>(count); | 
|  | } | 
|  |  | 
|  | DeviceIndex device_count_ensure_non_zero() { | 
|  | // Call the implementation every time to throw the exception | 
|  | int count = device_count_impl(/*fail_if_no_driver=*/true); | 
|  | // Zero gpus doesn't produce a warning in `device_count` but we fail here | 
|  | TORCH_CHECK(count, "No CUDA GPUs are available"); | 
|  | return static_cast<DeviceIndex>(count); | 
|  | } | 
|  |  | 
|  | DeviceIndex current_device() { | 
|  | int cur_device; | 
|  | C10_CUDA_CHECK(cudaGetDevice(&cur_device)); | 
|  | return static_cast<DeviceIndex>(cur_device); | 
|  | } | 
|  |  | 
|  | void set_device(DeviceIndex device) { | 
|  | C10_CUDA_CHECK(cudaSetDevice(static_cast<int>(device))); | 
|  | } | 
|  |  | 
|  | void device_synchronize() { | 
|  | C10_CUDA_CHECK(cudaDeviceSynchronize()); | 
|  | } | 
|  |  | 
|  | // this function has to be called from callers performing cuda synchronizing | 
|  | // operations, to raise proper error or warning | 
|  | void warn_or_error_on_sync() { | 
|  | if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_ERROR) { | 
|  | TORCH_CHECK(false, "called a synchronizing CUDA operation"); | 
|  | } else if (warning_state().get_sync_debug_mode() == SyncDebugMode::L_WARN) { | 
|  | TORCH_WARN("called a synchronizing CUDA operation"); | 
|  | } | 
|  | } | 
|  |  | 
|  | } // namespace cuda | 
|  | } // namespace c10 |