blob: 57aeef2c4458535b77a033b1e1da5ce08ececd0f [file] [log] [blame]
# Copyright (c) 2016-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
import time
from caffe2.python import workspace, cnn, memonger, core
import caffe2.python.models.resnet as resnet
import hypothesis.strategies as st
from hypothesis import given, settings
import caffe2.python.hypothesis_test_util as hu
def has_blob(proto, needle):
for op in proto.op:
for inp in op.input:
if inp == needle:
return True
for outp in op.output:
if outp == needle:
return True
return False
def count_blobs(proto):
blobs = set()
for op in proto.op:
blobs = blobs.union(set(op.input)).union(set(op.output))
return len(blobs)
def count_shared_blobs(proto):
blobs = set()
for op in proto.op:
blobs = blobs.union(set(op.input)).union(set(op.output))
return len([b for b in blobs if "_shared" in b])
class ResnetMemongerTest(hu.HypothesisTestCase):
@given(with_shapes=st.booleans(), **hu.gcs_cpu_only)
@settings(max_examples=2, timeout=120)
def test_resnet_shared_grads(self, with_shapes, gc, dc):
model = cnn.CNNModelHelper(
order="NCHW",
name="test",
cudnn_exhaustive_search=True,
)
with core.NameScope("gpu_0"):
data = model.net.AddExternalInput("gpu_0/data")
label = model.net.AddExternalInput("gpu_0/label")
(_softmax, loss) = resnet.create_resnet50(
model,
data,
num_input_channels=3,
num_labels=1000,
label=label,
is_test=False,
)
param_to_grad = model.AddGradientOperators([loss])
(shapes, types) = workspace.InferShapesAndTypes(
[model.param_init_net, model.net],
{'gpu_0/data': [4, 3, 227, 227],
'gpu_0/label': [4]},
)
count_before = count_blobs(model.net.Proto())
optim_proto = memonger.share_grad_blobs(
model.net,
["gpu_0/loss"],
set(model.param_to_grad.values()),
"gpu_0/",
share_activations=True,
dont_share_blobs=set([str(param_to_grad["gpu_0/conv1_w"])]),
blob_shapes=shapes if with_shapes else None,
)
count_after = count_blobs(optim_proto)
self.assertTrue(count_after < count_before)
# Run model and compare results. We check that the loss is same
# and also that the final gradient (conv1_w_grad is same)
workspace.RunNetOnce(model.param_init_net)
data = np.random.rand(4, 3, 227, 227).astype(np.float32)
label = (np.random.rand(4) * 1000).astype(np.int32)
workspace.FeedBlob("gpu_0/data", data)
workspace.FeedBlob("gpu_0/label", label)
workspace.RunNetOnce(model.net)
model.net.Proto().type = 'dag'
model.net.Proto().num_workers = 4
loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"])
workspace.FeedBlob(param_to_grad["gpu_0/conv1_w"], np.array([0.0]))
workspace.RunNetOnce(optim_proto)
optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
optim_conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"])
print("before: {} after: {}".format(count_before, count_after))
np.testing.assert_almost_equal(loss1, optimized_loss1)
np.testing.assert_almost_equal(conv1_w_grad, optim_conv1_w_grad)
def test_resnet_forward_only(self):
model = cnn.CNNModelHelper(
order="NCHW",
name="test",
cudnn_exhaustive_search=True,
)
with core.NameScope("gpu_0"):
data = model.net.AddExternalInput("gpu_0/data")
resnet.create_resnet50(
model,
data,
num_input_channels=3,
num_labels=1000,
is_test=True
)
count_before = count_blobs(model.net.Proto())
optim_proto = memonger.optimize_inference_for_dag(
model.net, ["gpu_0/data"], "gpu_0/"
)
count_after = count_blobs(optim_proto)
num_shared_blobs = count_shared_blobs(optim_proto)
# Run model and compare results
workspace.RunNetOnce(model.param_init_net)
data = np.random.rand(4, 3, 227, 227).astype(np.float32)
workspace.FeedBlob("gpu_0/data", data)
workspace.RunNetOnce(model.net)
model.net.Proto().type = 'dag'
model.net.Proto().num_workers = 4
loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
workspace.RunNetOnce(optim_proto)
optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
self.assertTrue(count_after < count_before)
self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0)
np.testing.assert_almost_equal(loss1, optimized_loss1)
def test_resnet_forward_only_fast_simplenet(self):
'''
Test C++ memonger that is only for simple nets
'''
model = cnn.CNNModelHelper(
order="NCHW",
name="test",
cudnn_exhaustive_search=True,
)
with core.NameScope("gpu_0"):
data = model.net.AddExternalInput("gpu_0/data")
resnet.create_resnet50(
model,
data,
num_input_channels=3,
num_labels=1000,
is_test=True
)
count_before = count_blobs(model.net.Proto())
t = time.time()
optim_proto = memonger.optimize_inference_fast(
model.net.Proto(),
set(["gpu_0/data", "gpu_0/last_out_L1000"]).union(
set(model.net.Proto().external_input))
)
print("Optimization took {} secs".format(time.time() - t))
count_after = count_blobs(optim_proto)
num_shared_blobs = count_shared_blobs(optim_proto)
self.assertTrue(count_after < count_before)
print(count_after, count_before, num_shared_blobs)
self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0)
# Run model and compare results
workspace.RunNetOnce(model.param_init_net)
data = np.random.rand(4, 3, 227, 227).astype(np.float32)
workspace.FeedBlob("gpu_0/data", data)
model.net.Proto().type = 'simple'
workspace.RunNetOnce(model.net)
loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
workspace.RunNetOnce(optim_proto)
optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000")
np.testing.assert_almost_equal(loss1, optimized_loss1)
if __name__ == "__main__":
import unittest
import random
random.seed(2603)
workspace.GlobalInit([
'caffe2',
'--caffe2_log_level=0',
'--caffe2_print_blob_sizes_at_exit=0',
'--caffe2_gpu_memory_tracking=1'])
unittest.main()