[ATen][CUDA][CUBLAS] cublasLtMatmul increase workspace_size (#120925) According to the [cuBLAS API Reference](https://docs.nvidia.com/cuda/cublas/index.html#cublassetworkspace) the recommended workspace size for Hopper is 32 MiB and for the rest architectures 4 MiB. This PR increases the workspace size accordingly. I am not aware of the recommended workspace size for HIP, that is why I am keeping it unchanged. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120925 Approved by: https://github.com/eqy, https://github.com/malfet

commit: 3239f86a3df133b5977d988324639e0de7af8749 [log] [tgz]
author: Aidyn-A <aidyn.b.aitzhan@gmail.com> Tue Mar 05 18:13:05 2024 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Tue Mar 05 18:13:05 2024 +0000
tree: a5f8d2dec98e382f11ef811f4d078a0b9754aca1
parent: 8aeb247a3dd202b611fe9530115cdcd87cf1f930 [diff]
diff --git a/aten/src/ATen/cuda/CUDABlas.cpp b/aten/src/ATen/cuda/CUDABlas.cpp
index d534ec5..46d73bc 100644
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp

@@ -183,13 +183,22 @@
 
 static size_t _parseChosenWorkspaceSize() {
   const char * val = getenv("CUBLASLT_WORKSPACE_SIZE");
+  size_t workspace_size = 1024;
 #ifdef USE_ROCM
   if (!val) {
     // accept either env var
     val = getenv("HIPBLASLT_WORKSPACE_SIZE");
   }
+#else
+  cudaDeviceProp* p = at::cuda::getDeviceProperties(c10::cuda::current_device());
+  // Keep workspace_size = 1024 for small Ampere GPUs
+  // See https://github.com/pytorch/pytorch/pull/120925#issuecomment-1977556485
+  if (p->major == 8 && p->totalGlobalMem / 1073741824 >= 24) {
+    workspace_size = 4096;
+  } else if (p->major >= 9) {
+    workspace_size = 32768;
+  }
 #endif
-  size_t workspace_size = 1024; /* default size in KiB according to #73328 */
   if (val) {
     try {
       workspace_size = std::stoi(val);
commit	3239f86a3df133b5977d988324639e0de7af8749	[log] [tgz]
author	Aidyn-A <aidyn.b.aitzhan@gmail.com>	Tue Mar 05 18:13:05 2024 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Tue Mar 05 18:13:05 2024 +0000
tree	a5f8d2dec98e382f11ef811f4d078a0b9754aca1
parent	8aeb247a3dd202b611fe9530115cdcd87cf1f930 [diff]