Unify cuda and hip device types in Caffe2 python front end (#14221)
Summary:
Goal of this PR is to unify cuda and hip device types in caffe2 python front end.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/14221
Differential Revision: D13148564
Pulled By: bddppq
fbshipit-source-id: ef9bd2c7d238200165f217097ac5727e686d887b
diff --git a/caffe2/python/cnn.py b/caffe2/python/cnn.py
index f9ccf92..aead1d5 100644
--- a/caffe2/python/cnn.py
+++ b/caffe2/python/cnn.py
@@ -5,7 +5,7 @@
from __future__ import print_function
from __future__ import unicode_literals
-from caffe2.python import brew
+from caffe2.python import brew, workspace
from caffe2.python.model_helper import ModelHelper
from caffe2.proto import caffe2_pb2
import logging
@@ -235,6 +235,6 @@
@property
def GPU(self, gpu_id=0):
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = gpu_id
return device_option
diff --git a/caffe2/python/core.py b/caffe2/python/core.py
index 4f683da..6cab923 100644
--- a/caffe2/python/core.py
+++ b/caffe2/python/core.py
@@ -82,6 +82,10 @@
return C.op_registry_key(op_type, engine) in _REGISTERED_OPERATORS
+def IsGPUDeviceType(device_type):
+ return device_type in {caffe2_pb2.CUDA, caffe2_pb2.HIP}
+
+
def DeviceOption(
device_type,
device_id=0,
@@ -2110,7 +2114,7 @@
def RunAllOnGPU(self, gpu_id=0, use_cudnn=False):
"""A convenient function to run everything on the GPU."""
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = gpu_id
self._net.device_option.CopyFrom(device_option)
if use_cudnn:
@@ -2280,12 +2284,13 @@
def copy_func_between_devices(src, dst):
CPU = caffe2_pb2.CPU
- CUDA = caffe2_pb2.CUDA
+ is_src_gpu = IsGPUDeviceType(src.device_type)
+ is_dst_gpu = IsGPUDeviceType(dst.device_type)
if src.device_type == CPU and dst.device_type == CPU:
return None
- if src.device_type == CUDA and dst.device_type == CUDA:
+ if is_src_gpu and is_dst_gpu:
if src.device_id == dst.device_id:
return None
else:
@@ -2294,13 +2299,13 @@
return net.Copy(*args, **kw)
return fun
- if src.device_type == CUDA and dst.device_type == CPU:
+ if is_src_gpu and dst.device_type == CPU:
def fun(net, *args, **kw):
with DeviceScope(src):
return net.CopyGPUToCPU(*args, **kw)
return fun
- if src.device_type == CPU and dst.device_type == CUDA:
+ if src.device_type == CPU and is_dst_gpu:
def fun(net, *args, **kw):
with DeviceScope(dst):
return net.CopyCPUToGPU(*args, **kw)
@@ -2425,11 +2430,10 @@
def _gen_new_name(blob, device_option):
CPU = caffe2_pb2.CPU
- CUDA = caffe2_pb2.CUDA
if device_option.device_type == CPU:
suffix = '_cpu'
- elif device_option.device_type == CUDA:
- suffix = '_cuda_' + str(device_option.device_id)
+ elif IsGPUDeviceType(device_option.device_type):
+ suffix = '_gpu_' + str(device_option.device_id)
else:
raise RuntimeError(
"Unknown device type: {}".
diff --git a/caffe2/python/core_gradients_test.py b/caffe2/python/core_gradients_test.py
index bf25806..75c2689 100644
--- a/caffe2/python/core_gradients_test.py
+++ b/caffe2/python/core_gradients_test.py
@@ -9,9 +9,8 @@
import unittest
from caffe2.proto import caffe2_pb2
-from caffe2.python import core, test_util
+from caffe2.python import core, test_util, workspace
from caffe2.python.core import CreateOperator, GradientRegistry
-from caffe2.python import workspace
import numpy as np
@@ -94,7 +93,7 @@
@given(device_option=st.sampled_from([
None,
- core.DeviceOption(caffe2_pb2.CUDA, 1)]))
+ core.DeviceOption(workspace.GpuDeviceType, 1)]))
def testDirect(self, device_option):
operators = [
CreateOperator('Direct', 'in', 'hidden'),
@@ -279,7 +278,7 @@
@given(device_option=st.sampled_from([
None,
- core.DeviceOption(caffe2_pb2.CUDA, 1)]))
+ core.DeviceOption(workspace.GpuDeviceType, 1)]))
def testMultiUseInput(self, device_option):
"""Test gradient for the following case:
diff --git a/caffe2/python/core_test.py b/caffe2/python/core_test.py
index 6c23d88..641a109 100644
--- a/caffe2/python/core_test.py
+++ b/caffe2/python/core_test.py
@@ -82,17 +82,17 @@
self.assertFalse(op.HasField('device_option'))
# explicitly setting a device
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
op = core.CreateOperator("Relu", "x", "y", device_option=device_option)
self.assertTrue(op.HasField('device_option'))
- self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
with core.DeviceScope(device_option):
# from device scope
op = core.CreateOperator("Relu", "x", "y")
self.assertTrue(op.HasField('device_option'))
- self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
# from an overridden device option
override_device = caffe2_pb2.DeviceOption()
@@ -108,13 +108,13 @@
def testNameAndDeviceScopeTogether(self):
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
with core.DeviceScope(device_option):
with core.NameScope("foo"):
op = core.CreateOperator("Relu", "x", "y")
self.assertTrue(op.HasField('device_option'))
- self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
self.assertEqual(len(op.input), 1)
self.assertEqual(op.input[0], "foo/x")
@@ -254,7 +254,7 @@
class TestCreateOperator(test_util.TestCase):
def testCreate(self):
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
op = core.CreateOperator(
"Ludicrous", "x", "y", name="ludicrous",
@@ -270,7 +270,7 @@
self.assertEqual(len(op.control_input), 1)
self.assertEqual(op.control_input[0], "z")
self.assertTrue(op.HasField('device_option'))
- self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
self.assertTrue(len(op.arg), 3)
@@ -643,14 +643,15 @@
self.assertEqual(op.input[2], "fc_b")
-@unittest.skipIf(not workspace.has_gpu_support, 'No GPU support')
+@unittest.skipIf(not workspace.has_gpu_support
+ and not workspace.has_hip_support, 'No GPU support')
class TestInferDevice(test_util.TestCase):
def setUp(self):
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
- self.cuda_option = device_option
+ self.gpu_option = device_option
self.cpu_option = caffe2_pb2.DeviceOption()
def _test_op(
@@ -662,7 +663,7 @@
inputs=None,
outputs=None
):
- op_option = self.cuda_option if not op_option else op_option
+ op_option = self.gpu_option if not op_option else op_option
inputs = ["blob_1"] if not inputs else inputs
outputs = ["blob_2"] if not outputs else outputs
with core.DeviceScope(op_option):
@@ -690,9 +691,9 @@
def test_infer_device(self):
self._test_op(
"FC",
- self.cuda_option,
- self.cuda_option,
- op_option=self.cuda_option,
+ self.gpu_option,
+ self.gpu_option,
+ op_option=self.gpu_option,
inputs=["data", "fc_w", "fc_b"],
outputs=["fc_1"]
)
@@ -700,31 +701,31 @@
def test_infer_device_split_by_lengths(self):
self._test_op(
"SplitByLengths",
- [self.cuda_option, self.cpu_option],
- self.cuda_option,
- op_option=self.cuda_option,
+ [self.gpu_option, self.cpu_option],
+ self.gpu_option,
+ op_option=self.gpu_option,
inputs=["data", "fc_w"],
outputs=["fc_1"]
)
def test_infer_device_adam(self):
- in_options = [self.cuda_option] * 6
+ in_options = [self.gpu_option] * 6
in_options[5] = self.cpu_option
- out_options = [self.cuda_option] * 4
+ out_options = [self.gpu_option] * 4
self._test_op(
"Adam",
in_options,
out_options,
- op_option=self.cuda_option,
+ op_option=self.gpu_option,
inputs=["param", "moment_1", "moment_2", "grad", "lr", "iter"],
outputs=["output_param", "output_moment_1", "output_moment_2",
"output_grad"]
)
def test_infer_device_cross_device(self):
- self._test_op("CopyGPUToCPU", self.cuda_option, self.cpu_option)
- self._test_op("CopyCPUToGPU", self.cpu_option, self.cuda_option)
- self._test_op("CopyFromCPUInput", self.cpu_option, self.cuda_option)
+ self._test_op("CopyGPUToCPU", self.gpu_option, self.cpu_option)
+ self._test_op("CopyCPUToGPU", self.cpu_option, self.gpu_option)
+ self._test_op("CopyFromCPUInput", self.cpu_option, self.gpu_option)
self._test_op(
"CopyFromCPUInput",
self.cpu_option,
@@ -734,7 +735,7 @@
def test_device_inference_function(self):
# ConcatOp.
- op_option = self.cuda_option
+ op_option = self.gpu_option
with core.DeviceScope(op_option):
op = core.CreateOperator(
'Concat',
@@ -746,7 +747,7 @@
self.assertEqual(output_dev[1], self.cpu_option)
#SplitOp.
- op_option = self.cuda_option
+ op_option = self.gpu_option
with core.DeviceScope(op_option):
op = core.CreateOperator(
'Split',
@@ -761,7 +762,7 @@
net = core.Net("test")
init_net = core.Net("init")
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
weight = init_net.XavierFill([], 'fc_w', shape=[10, 100])
bias = init_net.ConstantFill([], 'fc_b', shape=[10, ])
@@ -775,10 +776,10 @@
)
op = new_net._net.op[-1]
self.assertEqual(op.type, "FC")
- self.assertEqual(op.input[0], "data_cuda_1")
- self.assertEqual(op.input[1], "fc_w_cuda_1")
- self.assertEqual(op.input[2], "fc_b_cuda_1")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.input[0], "data_gpu_1")
+ self.assertEqual(op.input[1], "fc_w_gpu_1")
+ self.assertEqual(op.input[2], "fc_b_gpu_1")
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
self.assertEqual(new_net._net.op[-2].type, "CopyCPUToGPU")
self.assertEqual(new_net._net.op[0].type, "CopyCPUToGPU")
@@ -788,7 +789,7 @@
net = core.Net("test")
init_net = core.Net("init")
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
weight = init_net.XavierFill([], 'fc_w', shape=[10, 100])
bias = init_net.ConstantFill([], 'fc_b', shape=[10, ])
@@ -804,34 +805,34 @@
)
op = nets[1]._net.op[0]
self.assertEqual(op.type, "CopyCPUToGPU")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
- self.assertEqual(op.output[0], "fc_w_cuda_1")
+ self.assertEqual(op.output[0], "fc_w_gpu_1")
op = nets[1]._net.op[1]
self.assertEqual(op.type, "CopyCPUToGPU")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
- self.assertEqual(op.output[0], "fc_b_cuda_1")
+ self.assertEqual(op.output[0], "fc_b_gpu_1")
op = nets[1]._net.op[2]
self.assertEqual(op.type, "FC")
self.assertEqual(op.input[0], "data")
- self.assertEqual(op.input[1], "fc_w_cuda_1")
- self.assertEqual(op.input[2], "fc_b_cuda_1")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.input[1], "fc_w_gpu_1")
+ self.assertEqual(op.input[2], "fc_b_gpu_1")
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
op = nets[1]._net.op[3]
self.assertEqual(op.type, "Add")
self.assertEqual(op.input[0], "fc1")
- self.assertEqual(op.input[1], "const_cuda_1")
+ self.assertEqual(op.input[1], "const_gpu_1")
# check that moved blob is in input to the new net
- for c in ["data", "fc_w", "fc_b", "const_cuda_1"]:
+ for c in ["data", "fc_w", "fc_b", "const_gpu_1"]:
self.assertTrue(c in nets[1]._net.external_input)
"""
For reference, net.Proto() should be like:
name: ""
op {
input: "fc_w"
- output: "fc_w_cuda_1"
+ output: "fc_w_gpu_1"
name: ""
type: "CopyCPUToGPU"
device_option {
@@ -841,7 +842,7 @@
}
op {
input: "fc_b"
- output: "fc_b_cuda_1"
+ output: "fc_b_gpu_1"
name: ""
type: "CopyCPUToGPU"
device_option {
@@ -851,8 +852,8 @@
}
op {
input: "data"
- input: "fc_w_cuda_1"
- input: "fc_b_cuda_1"
+ input: "fc_w_gpu_1"
+ input: "fc_b_gpu_1"
output: "fc1"
name: ""
type: "FC"
@@ -863,7 +864,7 @@
}
op {
input: "fc1"
- input: "const_cuda_1"
+ input: "const_gpu_1"
output: "fc1"
name: ""
type: "Add"
@@ -876,14 +877,14 @@
external_input: "fc_w"
external_input: "fc_b"
external_input: "const"
-external_input: "const_cuda_1"
+external_input: "const_gpu_1"
"""
def test_cross_nets_no_change(self):
net = core.Net("test")
init_net = core.Net("init")
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
with core.DeviceScope(device_option):
@@ -900,7 +901,7 @@
self.assertEqual(op.input[0], "data")
self.assertEqual(op.input[1], "fc_w")
self.assertEqual(op.input[2], "fc_b")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
"""
For reference, net.Proto() should be like:
@@ -925,7 +926,7 @@
def test_inject_copy_multi_use(self):
net = core.Net("test")
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
with core.DeviceScope(device_option):
@@ -944,12 +945,12 @@
new_net, _ = core.InjectCrossDeviceCopies(net)
op = new_net._net.op[0]
self.assertEqual(op.type, "CopyCPUToGPU")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
- self.assertEqual(op.output[0], "data_cuda_1")
+ self.assertEqual(op.output[0], "data_gpu_1")
op = new_net._net.op[1]
self.assertEqual(op.type, "Relu")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
self.assertEqual(op.output[0], "relu1")
op = new_net._net.op[2]
@@ -958,9 +959,9 @@
self.assertEqual(op.output[0], "relu2")
op = new_net._net.op[3]
self.assertEqual(op.type, "Relu")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
- self.assertEqual(op.input[0], "data_cuda_1")
+ self.assertEqual(op.input[0], "data_gpu_1")
self.assertEqual(op.output[0], "relu3")
op = new_net._net.op[4]
self.assertEqual(op.type, "Relu")
@@ -968,27 +969,27 @@
self.assertEqual(op.output[0], "relu4")
op = new_net._net.op[5]
self.assertEqual(op.type, "CopyCPUToGPU")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 0)
- self.assertEqual(op.output[0], "data_cuda_0")
+ self.assertEqual(op.output[0], "data_gpu_0")
op = new_net._net.op[6]
self.assertEqual(op.type, "Relu")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 0)
- self.assertEqual(op.input[0], "data_cuda_0")
+ self.assertEqual(op.input[0], "data_gpu_0")
self.assertEqual(op.output[0], "relu5")
op = new_net._net.op[7]
self.assertEqual(op.type, "Relu")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 1)
- self.assertEqual(op.input[0], "data_cuda_1")
+ self.assertEqual(op.input[0], "data_gpu_1")
self.assertEqual(op.output[0], "relu6")
"""
For reference, net.Proto() should be like:
name: ""
op {
input: "data"
- output: "data_cuda_1"
+ output: "data_gpu_1"
name: ""
type: "CopyCPUToGPU"
device_option {
@@ -997,7 +998,7 @@
}
}
op {
- input: "data_cuda_1"
+ input: "data_gpu_1"
output: "relu1"
name: ""
type: "Relu"
@@ -1013,7 +1014,7 @@
type: "Relu"
}
op {
- input: "data_cuda_1"
+ input: "data_gpu_1"
output: "relu3"
name: ""
type: "Relu"
@@ -1030,7 +1031,7 @@
}
op {
input: "data"
- output: "data_cuda_0"
+ output: "data_gpu_0"
name: ""
type: "CopyCPUToGPU"
device_option {
@@ -1039,7 +1040,7 @@
}
}
op {
- input: "data_cuda_0"
+ input: "data_gpu_0"
output: "relu5"
name: ""
type: "Relu"
@@ -1049,7 +1050,7 @@
}
}
op {
- input: "data_cuda_1"
+ input: "data_gpu_1"
output: "relu6"
name: ""
type: "Relu"
@@ -1073,7 +1074,7 @@
cpu_device.append(caffe2_pb2.DeviceOption())
cpu_device[i].node_name = 'node:' + str(i)
gpu_device.append(caffe2_pb2.DeviceOption())
- gpu_device[i].device_type = caffe2_pb2.CUDA
+ gpu_device[i].device_type = workspace.GpuDeviceType
gpu_device[i].device_id = 0
gpu_device[i].node_name = 'node:' + str(i)
send_node = 'node:0'
@@ -1113,12 +1114,12 @@
# Verify (init_net)
op = init_net._net.op[2]
self.assertEqual(op.type, "CopyGPUToCPU")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 0)
self.assertEqual(op.output[0], "fc_w_cpu")
op = init_net._net.op[3]
self.assertEqual(op.type, "CopyGPUToCPU")
- self.assertEqual(op.device_option.device_type, 1)
+ self.assertEqual(op.device_option.device_type, workspace.GpuDeviceType)
self.assertEqual(op.device_option.device_id, 0)
self.assertEqual(op.output[0], "fc_b_cpu")
op = init_net._net.op[4]
@@ -1141,7 +1142,7 @@
def test_blob_inplace(self):
net = core.Net("test")
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = 1
net.Adagrad(['param', 'moment', 'grad', 'lr'], ['param', 'moment'])
@@ -1151,9 +1152,9 @@
op = net._net.op[1]
self.assertEqual(op.type, 'CopyCPUToGPU')
self.assertEqual(op.input[0], 'param')
- self.assertEqual(op.output[0], 'param_cuda_1')
+ self.assertEqual(op.output[0], 'param_gpu_1')
op = net._net.op[2]
- self.assertEqual(op.input[0], 'param_cuda_1')
+ self.assertEqual(op.input[0], 'param_gpu_1')
net.Relu('nonsense_input', 'moment')
# should not raise inplace error
diff --git a/caffe2/python/data_parallel_model.py b/caffe2/python/data_parallel_model.py
index 1ea110e..7a76545 100644
--- a/caffe2/python/data_parallel_model.py
+++ b/caffe2/python/data_parallel_model.py
@@ -136,17 +136,17 @@
if devices is None:
if not cpu_device:
- devices = list(range(0, workspace.NumCudaDevices()))
+ devices = list(range(0, workspace.NumGpuDevices()))
else:
devices = list(range(0, cpu_count()))
if not cpu_device:
for gpu in devices:
- if gpu >= workspace.NumCudaDevices():
+ if gpu >= workspace.NumGpuDevices():
log.warning("** Only {} GPUs available, GPUs {} requested".format(
- workspace.NumCudaDevices(), devices))
+ workspace.NumGpuDevices(), devices))
break
- model_helper_obj._device_type = caffe2_pb2.CUDA
+ model_helper_obj._device_type = workspace.GpuDeviceType
model_helper_obj._device_prefix = "gpu"
model_helper_obj._shared_model = False
device_name = "GPU"
@@ -447,17 +447,17 @@
assert isinstance(model_helper_obj, model_helper.ModelHelper)
if devices is None:
- devices = list(range(0, workspace.NumCudaDevices()))
+ devices = list(range(0, workspace.NumGpuDevices()))
if master_device is None:
master_device = devices[0]
if not cpu_device:
for gpu in devices:
- if gpu >= workspace.NumCudaDevices():
+ if gpu >= workspace.NumGpuDevices():
log.warning("** Only {} GPUs available, GPUs {} requested".format(
- workspace.NumCudaDevices(), devices))
+ workspace.NumGpuDevices(), devices))
break
- model_helper_obj._device_type = caffe2_pb2.CUDA
+ model_helper_obj._device_type = workspace.GpuDeviceType
model_helper_obj._device_prefix = "gpu"
else:
model_helper_obj._device_type = caffe2_pb2.CPU
@@ -812,7 +812,7 @@
if device is None:
device = scope.CurrentDeviceScope()
- device_prefix = "gpu" if device.device_type == caffe2_pb2.CUDA else "cpu"
+ device_prefix = "gpu" if core.IsGPUDeviceType(device.device_type) else "cpu"
namescope = "{}_{}/".format(device_prefix, device.device_id)
for op in mnet.Proto().op:
@@ -971,7 +971,7 @@
if model._optimizer is not None:
if model._device_type == caffe2_pb2.CPU:
return [model._optimizer.get_cpu_blob_name('lr')]
- elif model._device_type == caffe2_pb2.CUDA:
+ elif core.IsGPUDeviceType(model._device_type):
return [model._optimizer.get_gpu_blob_name('lr', gpu, '')
for gpu in model._devices]
else:
@@ -1006,7 +1006,7 @@
for dev_idx in devices[1:]:
if _IsGPUBlob(model, param):
- device_opt = core.DeviceOption(caffe2_pb2.CUDA, dev_idx)
+ device_opt = core.DeviceOption(workspace.GpuDeviceType, dev_idx)
else:
device_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
with core.DeviceScope(device_opt):
@@ -1025,8 +1025,8 @@
)
return
- if model._device_type == caffe2_pb2.CUDA:
- p2p_access_pattern = workspace.GetCudaPeerAccessPattern()
+ if model._device_type == workspace.GpuDeviceType:
+ p2p_access_pattern = workspace.GetGpuPeerAccessPattern()
else:
p2p_access_pattern = None
@@ -1546,7 +1546,7 @@
op_gpu = op_dev.device_id
# This avoids failing on operators that are only for CPU
- if op_dev.device_type != caffe2_pb2.CUDA:
+ if not core.IsGPUDeviceType(op_dev.device_type):
continue
namescope = "{}_{}/".format(model._device_prefix, op_gpu)
@@ -1589,14 +1589,14 @@
def _IsGPUBlob(model, blob_name):
if blob_name in model._blob_to_device:
- return model._blob_to_device[blob_name].device_type == caffe2_pb2.CUDA
+ return core.IsGPUDeviceType(model._blob_to_device[blob_name].device_type)
else:
blob_name = "{}_{}/{}".format(
model._device_prefix, model._devices[0], blob_name
)
if blob_name not in model._blob_to_device:
- return model._device_type == caffe2_pb2.CUDA
- return model._blob_to_device[blob_name].device_type == caffe2_pb2.CUDA
+ return core.IsGPUDeviceType(model._device_type)
+ return core.IsGPUDeviceType(model._blob_to_device[blob_name].device_type)
def _GroupByDevice(model, devices, params, non_data_params):
diff --git a/caffe2/python/data_parallel_model_test.py b/caffe2/python/data_parallel_model_test.py
index 16d1d94..c28dc07 100644
--- a/caffe2/python/data_parallel_model_test.py
+++ b/caffe2/python/data_parallel_model_test.py
@@ -409,7 +409,7 @@
def test_device_scope_check(self):
with self.assertRaises(AssertionError):
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
data_parallel_model.Parallelize_GPU(None, None, None)
def test_net_transformer_function(self):
@@ -984,7 +984,7 @@
self.LR],
self.vecs)
else:
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
model.CopyGPUToCPU("gpu_0/gpuvecs", self.vecs)
np.random.seed(2603)
@@ -1006,12 +1006,12 @@
device_for_indices = core.DeviceOption(caffe2_pb2.CPU)
if not cpu_indices:
- device_for_indices = core.DeviceOption(caffe2_pb2.CUDA, g)
+ device_for_indices = core.DeviceOption(workspace.GpuDeviceType, g)
with core.DeviceScope(device_for_indices):
workspace.FeedBlob("gpu_{}/indices".format(g), indices)
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, g)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, g)):
workspace.FeedBlob("gpu_{}/label".format(g), labels)
if i == 0:
@@ -1027,7 +1027,7 @@
workspace.FeedBlob(
"gpu_{}/gpuvecs".format(g),
orig_vecs,
- device_option=core.DeviceOption(caffe2_pb2.CUDA, g),
+ device_option=core.DeviceOption(workspace.GpuDeviceType, g),
)
workspace.CreateNet(model.net)
@@ -1073,7 +1073,8 @@
self._test_equiv_sparse(False)
-@unittest.skipIf(workspace.NumCudaDevices() < 2, "Need at least 2 GPUs.")
+@unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
+@unittest.skipIf(workspace.NumGpuDevices() < 2, "Need at least 2 GPUs.")
class ParallelizeBMUFTest(TestCase):
def _run_model(self, gpu_devices):
@@ -1132,7 +1133,7 @@
cpu_device=st.booleans()
)
def test_parallelize_bmuf(self, cpu_device):
- assume(cpu_device or workspace.has_gpu_support)
+ assume(cpu_device or workspace.has_gpu_support or workspace.has_hip_support)
workspace.ResetWorkspace()
@@ -1146,7 +1147,7 @@
return None
if not cpu_device:
- device_type = caffe2_pb2.CUDA
+ device_type = workspace.GpuDeviceType
device_prefix = "gpu"
else:
device_type = caffe2_pb2.CPU
@@ -1220,7 +1221,7 @@
@unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
-@unittest.skipIf(workspace.NumCudaDevices() < 2, "Need at least 2 GPUs.")
+@unittest.skipIf(workspace.NumGpuDevices() < 2, "Need at least 2 GPUs.")
class SparseDataParallelModelTestWithSharedIndices(TestCase):
'''
@@ -1336,7 +1337,7 @@
)
# Update the vecs
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
for num, vec in enumerate(self.vecs[:-1]):
model.CopyGPUToCPU("gpu_0/gpuvec_{}".format(num), vec)
@@ -1354,7 +1355,7 @@
indices = full_indices[st:en].astype(np.int32)
labels = full_labels[st:en].astype(np.int32)
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, g)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, g)):
workspace.FeedBlob("gpu_{}/indices".format(g), indices)
workspace.FeedBlob("gpu_{}/label".format(g), labels)
@@ -1377,7 +1378,7 @@
"gpu_{}/gpuvec_{}".format(g, num),
orig_vec,
device_option=core.DeviceOption(
- caffe2_pb2.CUDA, g),
+ workspace.GpuDeviceType, g),
)
workspace.CreateNet(model.net)
@@ -1407,10 +1408,10 @@
self.run_model(V, [0, 1])
self.run_model(V, [0])
- if workspace.NumCudaDevices() >= 4:
+ if workspace.NumGpuDevices() >= 4:
self.run_model(V, list(range(4)))
- if workspace.NumCudaDevices() >= 8:
+ if workspace.NumGpuDevices() >= 8:
self.run_model(V, list(range(8)))
diff --git a/caffe2/python/examples/char_rnn.py b/caffe2/python/examples/char_rnn.py
index a74d489..fb2059f 100644
--- a/caffe2/python/examples/char_rnn.py
+++ b/caffe2/python/examples/char_rnn.py
@@ -264,7 +264,7 @@
args = parser.parse_args()
device = core.DeviceOption(
- caffe2_pb2.CUDA if args.gpu else caffe2_pb2.CPU, 0)
+ workspace.GpuDeviceType if args.gpu else caffe2_pb2.CPU, 0)
with core.DeviceScope(device):
model = CharRNN(args)
model.CreateModel()
diff --git a/caffe2/python/examples/resnet50_trainer.py b/caffe2/python/examples/resnet50_trainer.py
index 05b753b..307d7b2 100644
--- a/caffe2/python/examples/resnet50_trainer.py
+++ b/caffe2/python/examples/resnet50_trainer.py
@@ -67,7 +67,7 @@
reader, ["data", "label"],
batch_size=batch_size,
output_type=dtype,
- use_gpu_transform=True if model._device_type == 1 else False,
+ use_gpu_transform=True if core.IsGPUDeviceType(model._device_type) else False,
use_caffe_datum=True,
mean_per_channel=mean_per_channel,
std_per_channel=std_per_channel,
diff --git a/caffe2/python/gradient_check_test.py b/caffe2/python/gradient_check_test.py
index b67c173..7d88efc 100644
--- a/caffe2/python/gradient_check_test.py
+++ b/caffe2/python/gradient_check_test.py
@@ -23,9 +23,9 @@
import unittest
-if workspace.has_gpu_support and workspace.NumCudaDevices() > 0:
+if (workspace.has_gpu_support or workspace.has_hip_support) and workspace.NumGpuDevices() > 0:
gpu_device_option = caffe2_pb2.DeviceOption()
- gpu_device_option.device_type = caffe2_pb2.CUDA
+ gpu_device_option.device_type = workspace.GpuDeviceType
cpu_device_option = caffe2_pb2.DeviceOption()
gpu_device_checker = device_checker.DeviceChecker(
0.01, [gpu_device_option]
diff --git a/caffe2/python/hypothesis_test_util.py b/caffe2/python/hypothesis_test_util.py
index b6f3b49..1f6ae91 100644
--- a/caffe2/python/hypothesis_test_util.py
+++ b/caffe2/python/hypothesis_test_util.py
@@ -259,10 +259,9 @@
device_options = _device_options_no_hip + ([hip_do] if workspace.has_hip_support else [])
# Include device option for each GPU
-expanded_device_options = [cpu_do] + (
- [caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, device_id=i)
- for i in range(workspace.NumCudaDevices())]
- if workspace.has_gpu_support else [])
+expanded_device_options = [cpu_do] + [
+ caffe2_pb2.DeviceOption(device_type=workspace.GpuDeviceType, device_id=i)
+ for i in range(workspace.NumGpuDevices())]
def device_checker_device_options():
diff --git a/caffe2/python/lstm_benchmark.py b/caffe2/python/lstm_benchmark.py
index deefb12..a66967e 100644
--- a/caffe2/python/lstm_benchmark.py
+++ b/caffe2/python/lstm_benchmark.py
@@ -341,7 +341,7 @@
'--caffe2_gpu_memory_tracking=1'] + extra_args)
device = core.DeviceOption(
- caffe2_pb2.CUDA if args.gpu else caffe2_pb2.CPU, 4)
+ workspace.GpuDeviceType if args.gpu else caffe2_pb2.CPU, 4)
with core.DeviceScope(device):
Benchmark(args)
diff --git a/caffe2/python/memonger_test.py b/caffe2/python/memonger_test.py
index 6536280..c3e6168 100644
--- a/caffe2/python/memonger_test.py
+++ b/caffe2/python/memonger_test.py
@@ -223,13 +223,14 @@
np.testing.assert_almost_equal(loss, optimized_loss)
np.testing.assert_almost_equal(grad, optimized_grad)
- @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
+ @unittest.skipIf(not workspace.has_gpu_support
+ and not workspace.has_hip_support, "No gpu support.")
def test_memonger_mix_cpu_gpu(self):
'''
Check that memonger does not make blobs cross CPU/GPU boundary
'''
m = model_helper.ModelHelper()
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
fc1 = brew.fc(m, "data", "fc1", dim_in=2, dim_out=2)
fc2 = brew.fc(m, fc1, "fc2", dim_in=2, dim_out=2)
fc3 = brew.fc(m, fc2, "fc3", dim_in=2, dim_out=2)
@@ -259,7 +260,7 @@
# Create set of blobs on CPU side and GPU side and check they don't
# overlap
- device_blobs = {caffe2_pb2.CPU: set(), caffe2_pb2.CUDA: set()}
+ device_blobs = {caffe2_pb2.CPU: set(), workspace.GpuDeviceType: set()}
for op in optim_proto.op:
if op.type not in ['CopyCPUToGPU', "CopyGPUToCPU"]:
dev = op.device_option.device_type
@@ -267,7 +268,7 @@
device_blobs[dev].add(b)
device_crossers = device_blobs[caffe2_pb2.CPU].intersection(
- device_blobs[caffe2_pb2.CUDA]
+ device_blobs[workspace.GpuDeviceType]
)
self.assertEquals(device_crossers, set())
diff --git a/caffe2/python/model_device_test.py b/caffe2/python/model_device_test.py
index 31cba3f..86c46ea 100644
--- a/caffe2/python/model_device_test.py
+++ b/caffe2/python/model_device_test.py
@@ -124,7 +124,7 @@
cpu_device = caffe2_pb2.DeviceOption()
cpu_device.device_type = caffe2_pb2.CPU
gpu_device = caffe2_pb2.DeviceOption()
- gpu_device.device_type = caffe2_pb2.CUDA
+ gpu_device.device_type = workspace.GpuDeviceType
checker = device_checker.DeviceChecker(0.05, [cpu_device, gpu_device])
ret = checker.CheckNet(
@@ -136,7 +136,8 @@
)
self.assertEqual(ret, True)
- @unittest.skipIf(not workspace.has_gpu_support,
+ @unittest.skipIf(not workspace.has_gpu_support
+ and not workspace.has_hip_support,
"No GPU support. Skipping test.")
def testMiniAlexNetNCHW(self):
self._testMiniAlexNet("NCHW")
diff --git a/caffe2/python/models/seq2seq/train.py b/caffe2/python/models/seq2seq/train.py
index 1e5b286..df68e3e 100644
--- a/caffe2/python/models/seq2seq/train.py
+++ b/caffe2/python/models/seq2seq/train.py
@@ -537,7 +537,7 @@
if batch_obj_name in ['encoder_inputs', 'decoder_inputs']:
dev = core.DeviceOption(caffe2_pb2.CPU)
else:
- dev = core.DeviceOption(caffe2_pb2.CUDA, i)
+ dev = core.DeviceOption(workspace.GpuDeviceType, i)
workspace.FeedBlob(name, batch_obj_value, device_option=dev)
if forward_only:
diff --git a/caffe2/python/muji.py b/caffe2/python/muji.py
index 2f2b5ac..3464739 100644
--- a/caffe2/python/muji.py
+++ b/caffe2/python/muji.py
@@ -25,7 +25,7 @@
specified gpu id.
"""
device_option = caffe2_pb2.DeviceOption()
- device_option.device_type = caffe2_pb2.CUDA
+ device_option.device_type = workspace.GpuDeviceType
device_option.device_id = gpu_id
return device_option
@@ -39,7 +39,7 @@
def Allreduce(net, blobs, reduced_affix="_reduced", gpu_indices=None):
"""The general Allreduce interface that reroutes the function calls.
CPUs and AMD GPUs are not supported because
- GetCudaPeerAccessPattern is called to get gpu peer access pattern.
+ GetGpuPeerAccessPattern is called to get gpu peer access pattern.
"""
if gpu_indices is None:
gpu_indices = list(range(len(blobs)))
@@ -48,7 +48,7 @@
"gpu_indices length and blobs length mismatch: %d vs %d" %
(len(gpu_indices), len(blobs))
)
- pattern = workspace.GetCudaPeerAccessPattern()
+ pattern = workspace.GetGpuPeerAccessPattern()
if len(blobs) == 2 and pattern.shape[0] >= 2 and np.all(pattern[:2, :2]):
return Allreduce2(net, blobs, reduced_affix, gpu_indices)
elif len(blobs) == 4 and pattern.shape[0] >= 4 and np.all(pattern[:4, :4]):
diff --git a/caffe2/python/muji_test.py b/caffe2/python/muji_test.py
index cca0ca0..8adc2da 100644
--- a/caffe2/python/muji_test.py
+++ b/caffe2/python/muji_test.py
@@ -38,36 +38,36 @@
def testAllreduceFallback(self):
self.RunningAllreduceWithGPUs(
- list(range(workspace.NumCudaDevices())), muji.AllreduceFallback
+ list(range(workspace.NumGpuDevices())), muji.AllreduceFallback
)
def testAllreduceSingleGPU(self):
- for i in range(workspace.NumCudaDevices()):
+ for i in range(workspace.NumGpuDevices()):
self.RunningAllreduceWithGPUs([i], muji.Allreduce)
def testAllreduceWithTwoGPUs(self):
- pattern = workspace.GetCudaPeerAccessPattern()
+ pattern = workspace.GetGpuPeerAccessPattern()
if pattern.shape[0] >= 2 and np.all(pattern[:2, :2]):
self.RunningAllreduceWithGPUs([0, 1], muji.Allreduce2)
else:
print('Skipping allreduce with 2 gpus. Not peer access ready.')
def testAllreduceWithFourGPUs(self):
- pattern = workspace.GetCudaPeerAccessPattern()
+ pattern = workspace.GetGpuPeerAccessPattern()
if pattern.shape[0] >= 4 and np.all(pattern[:4, :4]):
self.RunningAllreduceWithGPUs([0, 1, 2, 3], muji.Allreduce4)
else:
print('Skipping allreduce with 4 gpus. Not peer access ready.')
def testAllreduceWithFourGPUsAndTwoGroups(self):
- pattern = workspace.GetCudaPeerAccessPattern()
+ pattern = workspace.GetGpuPeerAccessPattern()
if pattern.shape[0] >= 4 and np.all(pattern[:2, :2]) and np.all(pattern[2:4, 2:4]):
self.RunningAllreduceWithGPUs([0, 1, 2, 3], muji.Allreduce4Group2)
else:
print('Skipping allreduce with 4 gpus and 2 groups. Not peer access ready.')
def testAllreduceWithEightGPUs(self):
- pattern = workspace.GetCudaPeerAccessPattern()
+ pattern = workspace.GetGpuPeerAccessPattern()
if (
pattern.shape[0] >= 8 and np.all(pattern[:4, :4]) and
np.all(pattern[4:, 4:])
diff --git a/caffe2/python/operator_test/copy_ops_test.py b/caffe2/python/operator_test/copy_ops_test.py
index 05a018f..04e9358 100644
--- a/caffe2/python/operator_test/copy_ops_test.py
+++ b/caffe2/python/operator_test/copy_ops_test.py
@@ -40,21 +40,21 @@
def test_copy_gradient_cpu(self):
self.run_test_copy_gradient(core.DeviceOption(caffe2_pb2.CPU, 0))
- @unittest.skipIf(workspace.NumCudaDevices() < 1, "Need at least 1 GPU.")
+ @unittest.skipIf(workspace.NumGpuDevices() < 1, "Need at least 1 GPU.")
def test_copy_gradient_gpu(self):
- self.run_test_copy_gradient(core.DeviceOption(caffe2_pb2.CUDA, 0))
+ self.run_test_copy_gradient(core.DeviceOption(workspace.GpuDeviceType, 0))
- @unittest.skipIf(workspace.NumCudaDevices() < 2, "Need at least 2 GPU.")
+ @unittest.skipIf(workspace.NumGpuDevices() < 2, "Need at least 2 GPU.")
def test_copy_gradient_multiple_gpus(self):
model = model_helper.ModelHelper(name="copy_test")
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
x_cpu = model.net.AddExternalInputs("x_cpu")
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
x_gpu_1 = model.CopyCPUToGPU(x_cpu, "x_gpu_1")
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 1)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 1)):
x_gpu_2 = model.Copy(x_gpu_1, "x_gpu_2")
loss = model.AveragedLoss(x_gpu_2, "loss")
gradient_map = model.AddGradientOperators([loss])
@@ -80,20 +80,20 @@
self.assertEqual(
get_op_with_output(model, "x_gpu_2_grad").device_option,
- core.DeviceOption(caffe2_pb2.CUDA, 1),
+ core.DeviceOption(workspace.GpuDeviceType, 1),
)
self.assertEqual(
get_op_with_output(model, "x_cpu_grad").device_option,
- core.DeviceOption(caffe2_pb2.CUDA, 0),
+ core.DeviceOption(workspace.GpuDeviceType, 0),
)
- @unittest.skipIf(workspace.NumCudaDevices() < 1, "Need at least 1 GPU.")
+ @unittest.skipIf(workspace.NumGpuDevices() < 1, "Need at least 1 GPU.")
def test_cpu2gpu_gpu2cpu_sparse_gradients(self):
model = model_helper.ModelHelper(name="copy_test")
v = model.param_init_net.UniformFill([], ["v"], shape=[16, 4])
indices = model.param_init_net.UniformFill([], ["v"], shape=[16, 4])
cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
- gpu_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
+ gpu_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
with core.DeviceScope(gpu_opt):
vcpu = model.CopyGPUToCPU(v, "vcpu")
@@ -112,13 +112,13 @@
self.assertTrue("v" in gradient_map)
self.assertTrue(isinstance(gradient_map['v'], core.GradientSlice))
- @unittest.skipIf(workspace.NumCudaDevices() < 1, "Need at least 1 GPU.")
+ @unittest.skipIf(workspace.NumGpuDevices() < 1, "Need at least 1 GPU.")
def test_cpu2gpu_gpu2cpu_gradients(self):
model = model_helper.ModelHelper(name="copy_test")
batch = 32
cpu_opt = core.DeviceOption(caffe2_pb2.CPU, 0)
- gpu_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
+ gpu_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
with core.NameScope("cpu"):
with core.DeviceScope(cpu_opt):
diff --git a/caffe2/python/operator_test/load_save_test.py b/caffe2/python/operator_test/load_save_test.py
index 8e38170..1f3f930 100644
--- a/caffe2/python/operator_test/load_save_test.py
+++ b/caffe2/python/operator_test/load_save_test.py
@@ -15,8 +15,8 @@
from caffe2.python import core, test_util, workspace
if workspace.has_gpu_support:
- DEVICES = [caffe2_pb2.CPU, caffe2_pb2.CUDA]
- max_gpuid = workspace.NumCudaDevices() - 1
+ DEVICES = [caffe2_pb2.CPU, workspace.GpuDeviceType]
+ max_gpuid = workspace.NumGpuDevices() - 1
else:
DEVICES = [caffe2_pb2.CPU]
max_gpuid = 0
@@ -42,8 +42,8 @@
np.int16, np.int32, np.int64, np.uint8, np.uint16]
arrays = [np.random.permutation(6).reshape(2, 3).astype(T)
for T in dtypes]
- assume(src_device_type == caffe2_pb2.CUDA or src_gpu_id == 0)
- assume(dst_device_type == caffe2_pb2.CUDA or dst_gpu_id == 0)
+ assume(core.IsGPUDeviceType(src_device_type) or src_gpu_id == 0)
+ assume(core.IsGPUDeviceType(dst_device_type) or dst_gpu_id == 0)
src_device_option = core.DeviceOption(
src_device_type, src_gpu_id)
dst_device_option = core.DeviceOption(
@@ -90,7 +90,7 @@
self.assertTrue(proto.HasField('tensor'))
self.assertEqual(proto.tensor.device_detail.device_type,
device_type)
- if device_type == caffe2_pb2.CUDA:
+ if core.IsGPUDeviceType(device_type):
self.assertEqual(proto.tensor.device_detail.device_id,
gpu_id)
diff --git a/caffe2/python/operator_test/prepend_dim_test.py b/caffe2/python/operator_test/prepend_dim_test.py
index a5b7d01..6cf8e7a 100644
--- a/caffe2/python/operator_test/prepend_dim_test.py
+++ b/caffe2/python/operator_test/prepend_dim_test.py
@@ -38,8 +38,8 @@
def test_prepend_dim(self):
devices = [core.DeviceOption(caffe2_pb2.CPU, 0)]
- if workspace.NumCudaDevices() > 0:
- devices.append(core.DeviceOption(caffe2_pb2.CUDA, 0))
+ if workspace.NumGpuDevices() > 0:
+ devices.append(core.DeviceOption(workspace.GpuDeviceType, 0))
for device_opt in devices:
with core.DeviceScope(device_opt):
diff --git a/caffe2/python/operator_test/reshape_ops_test.py b/caffe2/python/operator_test/reshape_ops_test.py
index be006f4..98189b8 100644
--- a/caffe2/python/operator_test/reshape_ops_test.py
+++ b/caffe2/python/operator_test/reshape_ops_test.py
@@ -115,8 +115,8 @@
def _test_reshape(old_shape, new_shape, expected_shape=None, arg_shape=True,
in_place=False):
devices = [core.DeviceOption(caffe2_pb2.CPU, 0)]
- if workspace.NumCudaDevices() > 0:
- devices.append(core.DeviceOption(caffe2_pb2.CUDA, 0))
+ if workspace.NumGpuDevices() > 0:
+ devices.append(core.DeviceOption(workspace.GpuDeviceType, 0))
for device_opt in devices:
with core.DeviceScope(device_opt):
diff --git a/caffe2/python/operator_test/roi_align_rotated_op_test.py b/caffe2/python/operator_test/roi_align_rotated_op_test.py
index 9263791..0487d96 100644
--- a/caffe2/python/operator_test/roi_align_rotated_op_test.py
+++ b/caffe2/python/operator_test/roi_align_rotated_op_test.py
@@ -77,7 +77,7 @@
self.assertReferenceChecks(
device_option=gc, op=op, inputs=[X, R], reference=roialign_ref
)
- if gc.device_type == caffe2_pb2.CUDA:
+ if core.IsGPUDeviceType(gc.device_type):
self.assertGradientChecks(gc, op, [X, R], 0, [0])
@given(
@@ -202,7 +202,7 @@
self.assertReferenceChecks(
device_option=gc, op=op, inputs=[X, R], reference=roialign_ref
)
- if gc.device_type == caffe2_pb2.CUDA:
+ if core.IsGPUDeviceType(gc.device_type):
self.assertGradientChecks(gc, op, [X, R], 0, [0])
diff --git a/caffe2/python/optimizer.py b/caffe2/python/optimizer.py
index b84f852..0aa0201 100644
--- a/caffe2/python/optimizer.py
+++ b/caffe2/python/optimizer.py
@@ -81,7 +81,7 @@
if current_scope is None:
return self.get_cpu_blob_name(base_str)
- if current_scope.device_type == caffe2_pb2.CUDA:
+ if core.IsGPUDeviceType(current_scope.device_type):
return self.get_gpu_blob_name(
base_str, current_scope.device_id, current_scope.node_name
)
@@ -127,7 +127,7 @@
if self._local_lr_multiplier is not None:
current_scope = scope.CurrentDeviceScope()
if (current_scope is not None
- and current_scope.device_type == caffe2_pb2.CUDA
+ and core.IsGPUDeviceType(current_scope.device_type)
and not self._local_lr_multiplier_on_gpu):
local_lr_multiplier = net.CopyFromCPUInput(
self._local_lr_multiplier,
@@ -258,7 +258,7 @@
self._add_local_lr_multiplier(
lr_lars_multiplier,
is_gpu_blob=(current_scope is not None
- and current_scope.device_type == caffe2_pb2.CUDA),
+ and core.IsGPUDeviceType(current_scope.device_type)),
)
# We need negative sign for LR when used directly with WeightedSum
@@ -549,7 +549,7 @@
self._add_local_lr_multiplier(
lr_lars_multiplier,
is_gpu_blob=(current_scope is not None
- and current_scope.device_type == caffe2_pb2.CUDA),
+ and core.IsGPUDeviceType(current_scope.device_type)),
)
lr, _ = self.build_lr(
@@ -688,7 +688,7 @@
self._add_local_lr_multiplier(
lr_lars_multiplier,
is_gpu_blob=(current_scope is not None
- and current_scope.device_type == caffe2_pb2.CUDA),
+ and core.IsGPUDeviceType(current_scope.device_type)),
)
lr, _ = self.build_lr(
diff --git a/caffe2/python/optimizer_test.py b/caffe2/python/optimizer_test.py
index 0dc8e55..5ca6af4 100644
--- a/caffe2/python/optimizer_test.py
+++ b/caffe2/python/optimizer_test.py
@@ -454,11 +454,12 @@
)
@unittest.skip("Results might vary too much. Only for individual use.")
- @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
+ @unittest.skipIf(not workspace.has_gpu_support
+ and not workspace.has_hip_support, "No gpu support")
def test_caffe2_gpu_vs_numpy(self):
n_dim = 1000000
n_iter = 50
- gpu_device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
+ gpu_device_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
with core.DeviceScope(gpu_device_opt):
for zero_debias in [False, True]:
for grad_coef in [1.0, 0.1, 0.01]:
diff --git a/caffe2/python/optimizer_test_util.py b/caffe2/python/optimizer_test_util.py
index dbb0dbe..685782d 100644
--- a/caffe2/python/optimizer_test_util.py
+++ b/caffe2/python/optimizer_test_util.py
@@ -70,7 +70,7 @@
@unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
def testGPUDense(self, dtype=core.DataType.FLOAT):
- device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0)
+ device_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
with core.DeviceScope(device_opt):
model, _perfect_model, data, label = self._createDense(dtype)
if dtype == core.DataType.FLOAT16:
diff --git a/caffe2/python/parallelize_bmuf_distributed_test.py b/caffe2/python/parallelize_bmuf_distributed_test.py
index afce7c6..a930c15 100644
--- a/caffe2/python/parallelize_bmuf_distributed_test.py
+++ b/caffe2/python/parallelize_bmuf_distributed_test.py
@@ -25,10 +25,10 @@
dyndep.InitOpsLibrary("@/caffe2/caffe2/distributed:file_store_handler_ops")
if not cpu_device:
- if not workspace.has_gpu_support:
+ if not workspace.has_gpu_support and not workspace.has_hip_support:
log.info('No GPU support test is Ignored.')
return
- if workspace.NumCudaDevices() < 4:
+ if workspace.NumGpuDevices() < 4:
log.info('Not enough GPU support, test IGNORED')
return
@@ -37,7 +37,7 @@
name="test"
)
if not cpu_device:
- device_type = caffe2_pb2.CUDA
+ device_type = workspace.GpuDeviceType
device_prefix = "gpu"
else:
device_type = caffe2_pb2.CPU
diff --git a/caffe2/python/rnn/lstm_comparison.py b/caffe2/python/rnn/lstm_comparison.py
index 2d44e09..c3bf9b3 100644
--- a/caffe2/python/rnn/lstm_comparison.py
+++ b/caffe2/python/rnn/lstm_comparison.py
@@ -11,7 +11,7 @@
results = []
num_iters = 1000
args.gpu = True
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
+ with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
for batch_size in [64, 128, 256]:
for seq_length in [20, 100]:
for hidden_dim in [40, 100, 400, 800]:
diff --git a/caffe2/python/rnn_cell.py b/caffe2/python/rnn_cell.py
index 4010502..108b749 100644
--- a/caffe2/python/rnn_cell.py
+++ b/caffe2/python/rnn_cell.py
@@ -1314,7 +1314,7 @@
)
if (
scope.CurrentDeviceScope() is not None and
- scope.CurrentDeviceScope().device_type == caffe2_pb2.CUDA
+ core.IsGPUDeviceType(scope.CurrentDeviceScope().device_type)
):
encoder_length = model.net.CopyGPUToCPU(
encoder_length,
diff --git a/caffe2/python/scope_test.py b/caffe2/python/scope_test.py
index 11f7a2c..d51488d 100644
--- a/caffe2/python/scope_test.py
+++ b/caffe2/python/scope_test.py
@@ -3,7 +3,7 @@
from __future__ import print_function
from __future__ import unicode_literals
-from caffe2.python import scope, core
+from caffe2.python import scope, core, workspace
from caffe2.proto import caffe2_pb2
import unittest
@@ -18,7 +18,7 @@
testobj.assertEquals(scope.CurrentNameScope(), "")
testobj.assertEquals(scope.CurrentDeviceScope(), None)
namescope = "namescope_{}".format(idx)
- dsc = core.DeviceOption(caffe2_pb2.CUDA, idx)
+ dsc = core.DeviceOption(workspace.GpuDeviceType, idx)
with scope.DeviceScope(dsc):
with scope.NameScope(namescope):
testobj.assertEquals(scope.CurrentNameScope(), namescope + "/")
@@ -58,7 +58,7 @@
def testDevicescopeBasic(self):
self.assertEquals(scope.CurrentDeviceScope(), None)
- dsc = core.DeviceOption(caffe2_pb2.CUDA, 9)
+ dsc = core.DeviceOption(workspace.GpuDeviceType, 9)
with scope.DeviceScope(dsc):
self.assertEquals(scope.CurrentDeviceScope(), dsc)
@@ -67,7 +67,7 @@
def testEmptyDevicescopeBasic(self):
self.assertEquals(scope.CurrentDeviceScope(), None)
- dsc = core.DeviceOption(caffe2_pb2.CUDA, 9)
+ dsc = core.DeviceOption(workspace.GpuDeviceType, 9)
with scope.DeviceScope(dsc):
self.assertEquals(scope.CurrentDeviceScope(), dsc)
with scope.EmptyDeviceScope():
@@ -78,7 +78,7 @@
def testDevicescopeAssertion(self):
self.assertEquals(scope.CurrentDeviceScope(), None)
- dsc = core.DeviceOption(caffe2_pb2.CUDA, 9)
+ dsc = core.DeviceOption(workspace.GpuDeviceType, 9)
try:
with scope.DeviceScope(dsc):
diff --git a/caffe2/python/test/executor_test.py b/caffe2/python/test/executor_test.py
index bee45e1..d4ff0c3 100644
--- a/caffe2/python/test/executor_test.py
+++ b/caffe2/python/test/executor_test.py
@@ -47,14 +47,15 @@
)
-@unittest.skipIf(not workspace.has_gpu_support, "no gpu")
+@unittest.skipIf(not workspace.has_gpu_support
+ and not workspace.has_hip_support, "no gpu")
class ExecutorGPUResNetTest(ExecutorTestBase):
@given(executor=st.sampled_from(EXECUTORS),
num_workers=st.sampled_from([8]))
@executor_test_settings
def test_executor(self, executor, num_workers):
model = build_resnet50_dataparallel_model(
- num_gpus=workspace.NumCudaDevices(), batch_size=8, epoch_size=8)
+ num_gpus=workspace.NumGpuDevices(), batch_size=8, epoch_size=8)
model.Proto().num_workers = num_workers
def run_model():
diff --git a/caffe2/python/utils.py b/caffe2/python/utils.py
index f9e87c1..a4ec59e 100644
--- a/caffe2/python/utils.py
+++ b/caffe2/python/utils.py
@@ -237,7 +237,7 @@
def GetGPUMemoryUsageStats():
- """Get GPU memory usage stats from CUDAContext. This requires flag
+ """Get GPU memory usage stats from CUDAContext/HIPContext. This requires flag
--caffe2_gpu_memory_tracking to be enabled"""
from caffe2.python import workspace, core
workspace.RunOperatorOnce(
@@ -245,7 +245,7 @@
"GetGPUMemoryUsage",
[],
["____mem____"],
- device_option=core.DeviceOption(caffe2_pb2.CUDA, 0),
+ device_option=core.DeviceOption(workspace.GpuDeviceType, 0),
),
)
b = workspace.FetchBlob("____mem____")
diff --git a/caffe2/python/workspace.py b/caffe2/python/workspace.py
index 8551ac5..88aece2 100644
--- a/caffe2/python/workspace.py
+++ b/caffe2/python/workspace.py
@@ -44,11 +44,15 @@
has_gpu_support = C.has_gpu_support
has_hip_support = C.has_hip_support
if has_gpu_support:
+ GpuDeviceType = caffe2_pb2.CUDA
NumCudaDevices = C.num_cuda_devices
+ # This is a duplicate of NumCudaDevices. Remove
+ # NumCudaDevices once replaced everywhere in the code
+ NumGpuDevices = C.num_cuda_devices
GetCUDAVersion = C.get_cuda_version
GetCuDNNVersion = C.get_cudnn_version
- def GetCudaPeerAccessPattern():
+ def GetGpuPeerAccessPattern():
return np.asarray(C.get_cuda_peer_access_pattern())
GetDeviceProperties = C.get_device_properties
@@ -56,8 +60,22 @@
NumCudaDevices = lambda: 0 # noqa
GetCUDAVersion = lambda: 0 # noqa
GetCuDNNVersion = lambda: 0 # noqa
- GetCudaPeerAccessPattern = lambda: np.array([]) # noqa
+
+if has_hip_support:
+ GpuDeviceType = caffe2_pb2.HIP
+ NumGpuDevices = C.num_hip_devices
+
+ def GetGpuPeerAccessPattern():
+ return np.asarray(C.get_hip_peer_access_pattern())
+ GetDeviceProperties = C.get_device_properties
+
+if not has_gpu_support and not has_hip_support:
+ # setting cuda as the default GpuDeviceType as some tests
+ # like core, scope tests use GpuDeviceType even without gpu support
+ GpuDeviceType = caffe2_pb2.CUDA
+ NumGpuDevices = lambda: 0 # noqa
GetDeviceProperties = lambda x: None # noqa
+ GetGpuPeerAccessPattern = lambda: np.array([]) # noqa
IsNUMAEnabled = C.is_numa_enabled
GetNumNUMANodes = C.get_num_numa_nodes
@@ -82,7 +100,6 @@
# rather than 24x7 service.
return port
-
def StartMint(root_folder=None, port=None):
"""Start a mint instance.
diff --git a/caffe2/python/workspace_test.py b/caffe2/python/workspace_test.py
index a248d62..93bcb11 100644
--- a/caffe2/python/workspace_test.py
+++ b/caffe2/python/workspace_test.py
@@ -320,7 +320,8 @@
self.assertTrue("test" in workspaces)
-@unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
+@unittest.skipIf(not workspace.has_gpu_support
+ and not workspace.has_hip_support, "No gpu support.")
class TestWorkspaceGPU(test_util.TestCase):
def setUp(self):
@@ -342,12 +343,12 @@
self.assertEqual(fetched_again.shape, (1, 2, 3, 4))
np.testing.assert_array_equal(fetched_again, 2.0)
- def testGetCudaPeerAccessPattern(self):
- pattern = workspace.GetCudaPeerAccessPattern()
+ def testGetGpuPeerAccessPattern(self):
+ pattern = workspace.GetGpuPeerAccessPattern()
self.assertEqual(type(pattern), np.ndarray)
self.assertEqual(pattern.ndim, 2)
self.assertEqual(pattern.shape[0], pattern.shape[1])
- self.assertEqual(pattern.shape[0], workspace.NumCudaDevices())
+ self.assertEqual(pattern.shape[0], workspace.NumGpuDevices())
@unittest.skipIf(not workspace.C.use_mkldnn, "No MKLDNN support.")