[CUDA][cuSPARSE] Work around IMA in cuSPARSE ALG1 on SM 8.9 devices (#119610) Originally surfaced from the discuss forum: https://discuss.pytorch.org/t/issue-with-torch-sparse-mm-while-running-on-gpu/188669 This has been forwarded to cuSPARSE but we have not yet received a commitment on their end to fix this issue directly. CC @ptrblck Pull Request resolved: https://github.com/pytorch/pytorch/pull/119610 Approved by: https://github.com/jeffdaily, https://github.com/jcaip

commit: e386bfa68884d3399016d3f2aaf029acdd35f5ef [log] [tgz]
author: eqy <eddiey@nvidia.com> Thu Feb 15 18:28:45 2024 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Thu Feb 15 18:28:45 2024 +0000
tree: 4bec78d818b25e2e1c4d90b3f413c28fbd7fd4ec
parent: 2429495820558fb953df68a284f95e81060d3c33 [diff]
diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp
index 22efdbd..0251857 100644
--- a/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp
+++ b/aten/src/ATen/native/sparse/cuda/SparseCUDABlas.cpp

@@ -154,6 +154,13 @@
 
 
   auto handle = at::cuda::getCurrentCUDASparseHandle();
+  cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties();
+  // ALG1 is broken on SM89 as of CUDA 11.8+
+#if !defined(USE_ROCM)
+  auto default_alg = prop->major == 8 && prop->minor == 9 ? CUSPARSE_SPMM_CSR_ALG2 : CUSPARSE_SPMM_CSR_ALG1;
+#else
+  auto default_alg = CUSPARSE_SPMM_CSR_ALG1;
+#endif
 
   // cusparseSpMM_bufferSize returns the bufferSize that can be used by cusparseSpMM
   size_t bufferSize;
@@ -164,7 +171,7 @@
     beta,
     descC,
     cusparse_value_type,      /* data type in which the computation is executed */
-    CUSPARSE_SPMM_CSR_ALG1,   /* default computing algorithm for CSR sparse matrix format */
+    default_alg,              /* default computing algorithm for CSR sparse matrix format */
     &bufferSize               /* output */
   ));
 
@@ -178,7 +185,7 @@
     beta,
     descC,
     cusparse_value_type,      /* data type in which the computation is executed */
-    CUSPARSE_SPMM_CSR_ALG1,   /* default computing algorithm for CSR sparse matrix format */
+    default_alg,              /* default computing algorithm for CSR sparse matrix format */
     dataPtr.get()             /* external buffer */
   ));
commit	e386bfa68884d3399016d3f2aaf029acdd35f5ef	[log] [tgz]
author	eqy <eddiey@nvidia.com>	Thu Feb 15 18:28:45 2024 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Thu Feb 15 18:28:45 2024 +0000
tree	4bec78d818b25e2e1c4d90b3f413c28fbd7fd4ec
parent	2429495820558fb953df68a284f95e81060d3c33 [diff]