| # Copyright (c) 2016-present, Facebook, Inc. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| ############################################################################## |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| |
| import numpy as np |
| import time |
| |
| from caffe2.python import workspace, cnn, memonger, core |
| import caffe2.python.models.resnet as resnet |
| import hypothesis.strategies as st |
| from hypothesis import given, settings |
| import caffe2.python.hypothesis_test_util as hu |
| |
| |
| def has_blob(proto, needle): |
| for op in proto.op: |
| for inp in op.input: |
| if inp == needle: |
| return True |
| for outp in op.output: |
| if outp == needle: |
| return True |
| return False |
| |
| |
| def count_blobs(proto): |
| blobs = set() |
| for op in proto.op: |
| blobs = blobs.union(set(op.input)).union(set(op.output)) |
| return len(blobs) |
| |
| |
| def count_shared_blobs(proto): |
| blobs = set() |
| for op in proto.op: |
| blobs = blobs.union(set(op.input)).union(set(op.output)) |
| return len([b for b in blobs if "_shared" in b]) |
| |
| |
| class ResnetMemongerTest(hu.HypothesisTestCase): |
| |
| @given(with_shapes=st.booleans(), **hu.gcs_cpu_only) |
| @settings(max_examples=2, timeout=120) |
| def test_resnet_shared_grads(self, with_shapes, gc, dc): |
| model = cnn.CNNModelHelper( |
| order="NCHW", |
| name="test", |
| cudnn_exhaustive_search=True, |
| ) |
| with core.NameScope("gpu_0"): |
| data = model.net.AddExternalInput("gpu_0/data") |
| label = model.net.AddExternalInput("gpu_0/label") |
| (_softmax, loss) = resnet.create_resnet50( |
| model, |
| data, |
| num_input_channels=3, |
| num_labels=1000, |
| label=label, |
| is_test=False, |
| ) |
| |
| param_to_grad = model.AddGradientOperators([loss]) |
| |
| (shapes, types) = workspace.InferShapesAndTypes( |
| [model.param_init_net, model.net], |
| {'gpu_0/data': [4, 3, 227, 227], |
| 'gpu_0/label': [4]}, |
| ) |
| |
| count_before = count_blobs(model.net.Proto()) |
| optim_proto = memonger.share_grad_blobs( |
| model.net, |
| ["gpu_0/loss"], |
| set(model.param_to_grad.values()), |
| "gpu_0/", |
| share_activations=True, |
| dont_share_blobs=set([str(param_to_grad["gpu_0/conv1_w"])]), |
| blob_shapes=shapes if with_shapes else None, |
| ) |
| count_after = count_blobs(optim_proto) |
| self.assertTrue(count_after < count_before) |
| |
| # Run model and compare results. We check that the loss is same |
| # and also that the final gradient (conv1_w_grad is same) |
| workspace.RunNetOnce(model.param_init_net) |
| data = np.random.rand(4, 3, 227, 227).astype(np.float32) |
| label = (np.random.rand(4) * 1000).astype(np.int32) |
| |
| workspace.FeedBlob("gpu_0/data", data) |
| workspace.FeedBlob("gpu_0/label", label) |
| |
| workspace.RunNetOnce(model.net) |
| model.net.Proto().type = 'dag' |
| model.net.Proto().num_workers = 4 |
| loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") |
| conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) |
| workspace.FeedBlob(param_to_grad["gpu_0/conv1_w"], np.array([0.0])) |
| |
| workspace.RunNetOnce(optim_proto) |
| optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") |
| optim_conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) |
| |
| print("before: {} after: {}".format(count_before, count_after)) |
| |
| np.testing.assert_almost_equal(loss1, optimized_loss1) |
| np.testing.assert_almost_equal(conv1_w_grad, optim_conv1_w_grad) |
| |
| def test_resnet_forward_only(self): |
| model = cnn.CNNModelHelper( |
| order="NCHW", |
| name="test", |
| cudnn_exhaustive_search=True, |
| ) |
| with core.NameScope("gpu_0"): |
| data = model.net.AddExternalInput("gpu_0/data") |
| resnet.create_resnet50( |
| model, |
| data, |
| num_input_channels=3, |
| num_labels=1000, |
| is_test=True |
| ) |
| |
| count_before = count_blobs(model.net.Proto()) |
| optim_proto = memonger.optimize_inference_for_dag( |
| model.net, ["gpu_0/data"], "gpu_0/" |
| ) |
| count_after = count_blobs(optim_proto) |
| num_shared_blobs = count_shared_blobs(optim_proto) |
| |
| # Run model and compare results |
| workspace.RunNetOnce(model.param_init_net) |
| data = np.random.rand(4, 3, 227, 227).astype(np.float32) |
| |
| workspace.FeedBlob("gpu_0/data", data) |
| workspace.RunNetOnce(model.net) |
| model.net.Proto().type = 'dag' |
| model.net.Proto().num_workers = 4 |
| loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") |
| |
| workspace.RunNetOnce(optim_proto) |
| optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") |
| self.assertTrue(count_after < count_before) |
| self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) |
| np.testing.assert_almost_equal(loss1, optimized_loss1) |
| |
| def test_resnet_forward_only_fast_simplenet(self): |
| ''' |
| Test C++ memonger that is only for simple nets |
| ''' |
| model = cnn.CNNModelHelper( |
| order="NCHW", |
| name="test", |
| cudnn_exhaustive_search=True, |
| ) |
| with core.NameScope("gpu_0"): |
| data = model.net.AddExternalInput("gpu_0/data") |
| resnet.create_resnet50( |
| model, |
| data, |
| num_input_channels=3, |
| num_labels=1000, |
| is_test=True |
| ) |
| |
| count_before = count_blobs(model.net.Proto()) |
| t = time.time() |
| optim_proto = memonger.optimize_inference_fast( |
| model.net.Proto(), |
| set(["gpu_0/data", "gpu_0/last_out_L1000"]).union( |
| set(model.net.Proto().external_input)) |
| ) |
| print("Optimization took {} secs".format(time.time() - t)) |
| count_after = count_blobs(optim_proto) |
| num_shared_blobs = count_shared_blobs(optim_proto) |
| |
| self.assertTrue(count_after < count_before) |
| print(count_after, count_before, num_shared_blobs) |
| self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) |
| |
| # Run model and compare results |
| workspace.RunNetOnce(model.param_init_net) |
| data = np.random.rand(4, 3, 227, 227).astype(np.float32) |
| |
| workspace.FeedBlob("gpu_0/data", data) |
| model.net.Proto().type = 'simple' |
| |
| workspace.RunNetOnce(model.net) |
| loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") |
| |
| workspace.RunNetOnce(optim_proto) |
| optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") |
| np.testing.assert_almost_equal(loss1, optimized_loss1) |
| |
| |
| if __name__ == "__main__": |
| import unittest |
| import random |
| random.seed(2603) |
| workspace.GlobalInit([ |
| 'caffe2', |
| '--caffe2_log_level=0', |
| '--caffe2_print_blob_sizes_at_exit=0', |
| '--caffe2_gpu_memory_tracking=1']) |
| unittest.main() |