[cuDNN][Quantization] Don't print when plan finalization fails in cuDNN quantization backend (#128177) Similar in spirit to #125790, hopefully addresses failures seen for cuDNN 9.1 upgrade: #https://github.com/pytorch/pytorch/pull/128166 CC @nWEIdia @atalman Pull Request resolved: https://github.com/pytorch/pytorch/pull/128177 Approved by: https://github.com/nWEIdia, https://github.com/Skylion007

commit: cac7a22b92478d897488688010e562b7bd36b97f [log] [tgz]
author: Eddie Yan <eddiey@nvidia.com> Tue Jun 11 18:09:25 2024 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Tue Jun 11 18:09:25 2024 +0000
tree: e80f3551f61a0ecaff9dddadb41dc49a2af199bf
parent: 8a09940a543d4c2fd23a5c78edbf1ac24d481b45 [diff]
diff --git a/aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp b/aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp
index 07ccc19..9e9e675 100644
--- a/aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/BinaryOps.cpp

@@ -242,7 +242,7 @@
       run(plan_desc);
       execution_plan_cache[key] = plan_desc;
       return quantized_output.view(orig_sizes);
-    } catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
+    } catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
   }
 
   TORCH_CHECK(false, "Unable to find an engine to execute this computation in Quantized Add Cudnn");

diff --git a/aten/src/ATen/native/quantized/cudnn/Conv.cpp b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
index 606d769..8823038 100644
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp

@@ -252,7 +252,7 @@
       run(plan);
       execution_plan_cache.emplace(key, plan);
       return;
-    } catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
+    } catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
   }
 
   TORCH_CHECK(false, "Unable to find an engine to execute this computation in Quantized Conv2D Cudnn");

diff --git a/aten/src/ATen/native/quantized/cudnn/Linear.cpp b/aten/src/ATen/native/quantized/cudnn/Linear.cpp
index d321959..54eb084 100644
--- a/aten/src/ATen/native/quantized/cudnn/Linear.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Linear.cpp

@@ -286,7 +286,7 @@
       run(plan);
       execution_plan_cache.emplace(key, plan);
       return;
-    } catch (cudnn_frontend::cudnnException &e) {std::cout << "cudnn error:" << e.what() << std::endl;} catch(c10::CuDNNError &e) { std::cout << "other error" << e.what() << std::endl;}
+    } catch (cudnn_frontend::cudnnException &e) {} catch(c10::CuDNNError &e) {}
   }
 
   TORCH_CHECK(false, "Unable to find an engine to execute this computation Quantized Linear Cudnn");

diff --git a/test/quantization/core/test_quantized_op.py b/test/quantization/core/test_quantized_op.py
index 5b86693..6671b66 100644
--- a/test/quantization/core/test_quantized_op.py
+++ b/test/quantization/core/test_quantized_op.py

@@ -4052,7 +4052,6 @@
            use_channelwise=st.sampled_from([False]))  # channelwise currently not supported for qlinear cudnn
     @skipIfNoFBGEMM
     @unittest.skipIf(not TEST_CUDNN, "cudnn is not enabled.")
-    @unittest.skipIf(TEST_CUDNN and torch.backends.cudnn.version() == 90100, "expected failure on cuDNN 9.1.0")
     @unittest.skipIf(not SM80OrLater, "requires sm80 or later.")
     @unittest.skipIf(TEST_ROCM, "not supported on rocm.")
     # TODO: check with yang regarding CUDNN flags
commit	cac7a22b92478d897488688010e562b7bd36b97f	[log] [tgz]
author	Eddie Yan <eddiey@nvidia.com>	Tue Jun 11 18:09:25 2024 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Tue Jun 11 18:09:25 2024 +0000
tree	e80f3551f61a0ecaff9dddadb41dc49a2af199bf
parent	8a09940a543d4c2fd23a5c78edbf1ac24d481b45 [diff]