Correct #39759 for HIP. (#39801)

Summary:
Changes in PR https://github.com/pytorch/pytorch/issues/39759 broke HIP caffe2.
hipify for caffe2 renames CUDA to HIP; torch does not.
If caffe2 calls into torch, it needs to use CUDA-named functions.

CC ezyang xw285cornell sunway513 houseroad dzhulgakov
Pull Request resolved: https://github.com/pytorch/pytorch/pull/39801

Differential Revision: D21982493

Pulled By: xw285cornell

fbshipit-source-id: 8e88e0fb80c71f0342e23ef0214a42d5542bdc70
diff --git a/caffe2/python/test/gpu_context_test.py b/caffe2/python/test/gpu_context_test.py
index e52f5a1..741f39d 100644
--- a/caffe2/python/test/gpu_context_test.py
+++ b/caffe2/python/test/gpu_context_test.py
@@ -14,7 +14,9 @@
                  "THC pool testing is obscure and doesn't work on HIP yet")
 class TestGPUInit(unittest.TestCase):
     def testTHCAllocator(self):
-        core.GlobalInit(['caffe2', '--caffe2_cuda_memory_pool=thc'])
+        cuda_or_hip = 'hip' if workspace.has_hip_support else 'cuda'
+        flag = '--caffe2_{}_memory_pool=thc'.format(cuda_or_hip)
+        core.GlobalInit(['caffe2', flag])
         # just run one operator
         # it's importantant to not call anything here from Torch API
         # even torch.cuda.memory_allocated would initialize CUDA context
@@ -24,3 +26,6 @@
         ))
         # make sure we actually used THC allocator
         self.assertGreater(torch.cuda.memory_allocated(), 0)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/torch/utils/hipify/cuda_to_hip_mappings.py b/torch/utils/hipify/cuda_to_hip_mappings.py
index 6d3f1be..e3d638f 100644
--- a/torch/utils/hipify/cuda_to_hip_mappings.py
+++ b/torch/utils/hipify/cuda_to_hip_mappings.py
@@ -8010,9 +8010,10 @@
         ("curandGenerateUniform", ("hiprandGenerateUniform", API_CAFFE2)),
         ("curand_generator", ("hiprand_generator", API_CAFFE2)),
         ("CaffeCudaGetDevice", ("CaffeHipGetDevice", API_CAFFE2)),
-        # do not rename CUDA_KERNEL_ASSERT,
+        # do not rename CUDA_KERNEL_ASSERT, lazyInitCUDA in caffe2 sources
         # the ordered dict guarantees this pattern will match first, before "CUDA"
         ("CUDA_KERNEL_ASSERT", ("CUDA_KERNEL_ASSERT", API_CAFFE2)),
+        ("lazyInitCUDA", ("lazyInitCUDA", API_CAFFE2)),
         ("CUDA", ("HIP", API_CAFFE2)),
         ("Cuda", ("Hip", API_CAFFE2)),
         ("cuda_", ("hip_", API_CAFFE2)),