nvfuser opinfo test fixes masked_var/std (#77273)

Enables guard mode in opinfo tests.
Fixes opinfo failures for
    test_nvfuser_correctness__masked_var_cuda_xxxx
    test_nvfuser_correctness__masked_std_cuda_xxxx

The root cause of the failure is that tracing changes stride properties and causes nvfuser to use wrong kernel and generate wrong results.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/77273
Approved by: https://github.com/davidberard98
diff --git a/test/test_jit_cuda_fuser.py b/test/test_jit_cuda_fuser.py
index d6aa87a..f754d10 100644
--- a/test/test_jit_cuda_fuser.py
+++ b/test/test_jit_cuda_fuser.py
@@ -165,14 +165,6 @@
         if TEST_BF16:
             self.support_tensor_dtypes.append(torch.bfloat16)
 
-        self.old_cpu_fuse = torch._C._jit_can_fuse_on_cpu()
-        self.old_gpu_fuse = torch._C._jit_can_fuse_on_gpu()
-        torch._C._jit_override_can_fuse_on_cpu(False)
-        torch._C._jit_override_can_fuse_on_gpu(False)
-        self.old_guard = torch._C._jit_set_nvfuser_guard_mode(False)
-        torch._C._debug_set_autodiff_subgraph_inlining(False)
-        self.old_value = torch._C._jit_set_autocast_mode(True)
-
         if(RUN_NVFUSER):
             self.cuda_fuser_options = CudaFuserTestOptions()
 
@@ -4694,6 +4686,8 @@
         super(TestCudaFuserOpInfoParent, self).setUp()
         if RUN_NVFUSER:
             self.cuda_fuser_options = CudaFuserTestOptions()
+            # enables guard mode since tracing could change graph to violate guard.
+            torch._C._jit_set_nvfuser_guard_mode(True)
         self.nvfuser_single_node_mode = torch._C._jit_set_nvfuser_single_node_mode(True)
 
     def tearDown(self):