Fixed minor issues for bmm/mm decompositon (#109836) Summary: * Fixed minor issues for bmm/mm decompositon * enabled addmm for inductor Test Plan: ci Reviewed By: mikekgfb Differential Revision: D49522332 Pull Request resolved: https://github.com/pytorch/pytorch/pull/109836 Approved by: https://github.com/jansel, https://github.com/mikekgfb

commit: aaaa3c158617d80cb6a36a33b1d316a084e7473f [log] [tgz]
author: Yang Chen <yangche@fb.com> Thu Sep 28 18:44:59 2023 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Thu Sep 28 18:45:01 2023 +0000
tree: c5980fa19166498506ee51a0dc5bf3fe7e67d688
parent: 168f516fae0a009f5d11f40c8478d246622dcaef [diff]
diff --git a/torch/_inductor/decomposition.py b/torch/_inductor/decomposition.py
index 4d36e5d..73bbf75 100644
--- a/torch/_inductor/decomposition.py
+++ b/torch/_inductor/decomposition.py

@@ -179,8 +179,9 @@
 
 
 @register_decomposition([aten.bmm])
+@pw_cast_for_opmath
 def bmm(self, batch2):
-    if self.device == "cpu":
+    if self.device.type == "cpu":
         if self.size(1) == 1 and batch2.size(-1) == 1:
             return torch.sum(
                 self.squeeze(1) * batch2.squeeze(-1), dim=1, keepdim=True
@@ -201,15 +202,17 @@
 
 
 @register_decomposition([aten.mm])
+@pw_cast_for_opmath
 def mm(self, input2):
     # Our matrix vector multiplies only achieve peak bandwidth with coordinate descent tuning.
     # todo: Look into why and fix it (hopefully)
     if config.coordinate_descent_tuning:
         if self.shape[0] == 1 or input2.shape[1] == 1:
             return (self.unsqueeze(2) * input2.unsqueeze(0)).sum(dim=1)
-    if self.device == "cpu":
+    if self.device.type == "cpu":
         if (
             self.size(-1) == 1
+            and self.size(0) > 0
             and input2.size(0) == 1
             and (self.dtype == input2.dtype)
             and ((torch.numel(self) + torch.numel(input2)) <= 32)
commit	aaaa3c158617d80cb6a36a33b1d316a084e7473f	[log] [tgz]
author	Yang Chen <yangche@fb.com>	Thu Sep 28 18:44:59 2023 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Thu Sep 28 18:45:01 2023 +0000
tree	c5980fa19166498506ee51a0dc5bf3fe7e67d688
parent	168f516fae0a009f5d11f40c8478d246622dcaef [diff]