support appending net and converting them
Summary:
As per rushabhmshah99 request: he wants to append a pre-trained model (without training that) to the model.
So added data_parallel_model.ConvertNetForDevice() to enable that. The unit test shows example how to use this with
AppendNet, and I also added a blurb to the function.
Differential Revision: D5503335
fbshipit-source-id: b2a5db5c1739dc97f46dd0d7606ed555d99255b8
diff --git a/caffe2/python/data_parallel_model.py b/caffe2/python/data_parallel_model.py
index c2cea66..709a9f3 100644
--- a/caffe2/python/data_parallel_model.py
+++ b/caffe2/python/data_parallel_model.py
@@ -563,6 +563,44 @@
workspace.RunNetOnce(barrier_net)
+def ConvertNetForDevice(net, device=None):
+ '''
+ Converts all blobs in the net to have namescope gpu_X, and correct
+ device scope. You can use this to enable AppendNet with a
+ forward_pass_builder_fun:
+
+ def builder_fun(model):
+ ...
+ model.net.AppendNet(
+ data_parallel_model.ConvertNetForDevice(othermodel.net))
+ model.param_init_net.AppendNet(
+ data_parallel_model.ConvertNetForDevice(othermodel.param_init_net))
+ '''
+ mnet = copy.deepcopy(net)
+
+ if device is None:
+ device = scope.CurrentDeviceScope()
+
+ device_prefix = "gpu" if device.device_type == caffe2_pb2.CUDA else "cpu"
+
+ namescope = "{}_{}/".format(device_prefix, device.cuda_gpu_id)
+ for op in mnet.Proto().op:
+ if "RecurrentNetwork" in op.type:
+ raise("RecurrentNetwork conversion not yet supported")
+ for i, inputb in enumerate(op.input):
+ op.input[i] = namescope + inputb
+ for i, outputb in enumerate(op.output):
+ op.output[i] = namescope + outputb
+ for i, blob in enumerate(op.control_input):
+ op.control_input[i] = namescope + blob
+ op.device_option.CopyFrom(device)
+ for i, einp in enumerate(mnet.Proto().external_input):
+ mnet.Proto().external_input[i] = namescope + einp
+ for i, eoutp in enumerate(mnet.Proto().external_output):
+ mnet.Proto().external_output[i] = namescope + eoutp
+ return mnet
+
+
def _ForEachGPU(gpu_ids, f, scoped=False, *args, **kwargs):
for gpu_id in gpu_ids:
device_opt = core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
diff --git a/caffe2/python/data_parallel_model_test.py b/caffe2/python/data_parallel_model_test.py
index 39b644a..f586930 100644
--- a/caffe2/python/data_parallel_model_test.py
+++ b/caffe2/python/data_parallel_model_test.py
@@ -9,7 +9,7 @@
from multiprocessing import Process, Queue
from caffe2.proto import caffe2_pb2
from caffe2.python import core, cnn, data_parallel_model, dyndep, optimizer, \
- rnn_cell, workspace
+ rnn_cell, workspace, model_helper, brew
from caffe2.python.test_util import TestCase
from future.utils import viewkeys
@@ -208,6 +208,59 @@
self.assertFalse(core.BlobReference("cpu_1/data") in checkpoint_params)
self.assertTrue(core.BlobReference("optimizer_iteration") in checkpoint_params)
+ def test_net_conversion_and_append_net(self):
+ other = model_helper.ModelHelper()
+ fc1 = brew.fc(other, "data", "other_fc1", dim_in=3*227*227, dim_out=10)
+ fc2 = brew.fc(other, fc1, "other_fc2", dim_in=10, dim_out=10)
+ brew.fc(other, fc2, "other_fc3", dim_in=10, dim_out=10)
+
+ def add_input_ops(model):
+ model.net.UniformFill([], ["data"], shape=[4, 227, 227, 3])
+ model.net.UniformFill([], ["label"], shape=[4])
+
+ def add_model_ops(model, loss_scale):
+ model.NHWC2NCHW("data", "data_nchw")
+ model.Conv("data_nchw", 'conv1', 3, 64,
+ weight_init=("MSRAFill", {}), kernel=7,
+ stride=2, pad=3, no_bias=0)
+ model.SpatialBN('conv1', 'conv1_spatbn_relu', 64, epsilon=1e-3)
+ model.Relu('conv1_spatbn_relu', 'conv1_spatbn_relu')
+ model.MaxPool('conv1_spatbn_relu', 'pool1', kernel=3, stride=2)
+ model.FC('pool1', 'fc', dim_in=(64 * 56 * 56), dim_out=10)
+
+ # Append the net and param_init_net of the other model
+ appendnet = data_parallel_model.ConvertNetForDevice(other.net)
+ model.net.AppendNet(appendnet)
+
+ model.param_init_net.AppendNet(
+ data_parallel_model.ConvertNetForDevice(other.param_init_net))
+
+ model.Sigmoid('fc', 'fc_sigm')
+ model.Softmax('fc_sigm', 'softmax')
+ loss = model.AveragedLoss('softmax', 'loss')
+ return [loss]
+
+ def add_optimizer(model):
+ optimizer.build_sgd(model, 0.1, policy="fixed", momentum=0.9)
+
+ model = cnn.CNNModelHelper(
+ order="NCHW",
+ name="test",
+ )
+ data_parallel_model.Parallelize_CPU(
+ model,
+ input_builder_fun=add_input_ops,
+ forward_pass_builder_fun=add_model_ops,
+ optimizer_builder_fun=add_optimizer,
+ devices=range(4)
+ )
+
+ # Just create and run net and confirm no exception is thrown
+ workspace.RunNetOnce(model.param_init_net)
+ workspace.CreateNet(model.net)
+ workspace.RunNet(model.net)
+
+
def test_synchronization_barrier(self):
def run(comm_rank, comm_size, tmpdir):