| # Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Functional tests for fused conv2d bias and activation operation.""" |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import numpy as np |
| |
| from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op |
| from tensorflow.python.framework import constant_op |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import errors_impl |
| from tensorflow.python.framework import test_util |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import gen_array_ops |
| from tensorflow.python.ops import nn_ops |
| from tensorflow.python.ops import random_ops |
| from tensorflow.python.platform import test |
| from tensorflow.python.platform import tf_logging |
| |
| |
| def GetShrunkInceptionShapes(shrink=10): |
| """Iterator for smaller versions of convolution shapes in 2015 Inception. |
| |
| Relative to inception, each depth value is `depth // shrink`. |
| |
| Args: |
| shrink: Factor to shrink each depth value by relative to Inception. |
| |
| Yields: |
| Tuple (input_size, filter_size, out_size, stride, padding), the convolution |
| parameters of Inception layers. |
| """ |
| input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [ |
| 4, 8, 8, 2048 |
| ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [ |
| 4, 8, 8, 1760 |
| ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [ |
| 4, 17, 17, 192 |
| ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [ |
| 4, 17, 17, 192 |
| ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [ |
| 4, 17, 17, 192 |
| ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [ |
| 4, 17, 17, 160 |
| ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024], |
| [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [ |
| 4, 17, 17, 768 |
| ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768], |
| [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [ |
| 4, 35, 35, 64 |
| ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [ |
| 4, 35, 35, 256 |
| ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [ |
| 4, 35, 35, 192 |
| ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]] |
| filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [ |
| 1, 1, 2048, 192 |
| ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384], |
| [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [ |
| 1, 1, 1760, 320 |
| ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [ |
| 3, 3, 128, 320 |
| ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [ |
| 1, 3, 192, 256 |
| ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [ |
| 3, 3, 192, 224 |
| ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [ |
| 3, 1, 192, 192 |
| ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [ |
| 1, 3, 128, 192 |
| ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [ |
| 3, 1, 128, 128 |
| ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [ |
| 1, 1, 768, 128 |
| ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [ |
| 3, 3, 64, 96 |
| ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64], |
| [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [ |
| 1, 1, 192, 64 |
| ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, |
| 64], [1, 1, 24, 64]] |
| out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [ |
| 4, 8, 8, 384 |
| ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [ |
| 4, 8, 8, 192 |
| ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [ |
| 4, 17, 17, 192 |
| ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [ |
| 4, 17, 17, 256 |
| ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [ |
| 4, 17, 17, 192 |
| ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [ |
| 4, 17, 17, 160 |
| ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [ |
| 4, 17, 17, 256 |
| ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [ |
| 4, 17, 17, 128 |
| ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [ |
| 4, 35, 35, 64 |
| ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96], |
| [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48], |
| [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]] |
| strides = [ |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1 |
| ] |
| # Shrink sizes to make the test faster |
| for i in input_sizes: |
| i[3] //= shrink |
| for f in filter_sizes: |
| f[2] //= shrink |
| f[3] //= shrink |
| for o in out_sizes: |
| o[3] //= shrink |
| # pylint: disable=invalid-name |
| VALID = "VALID" |
| SAME = "SAME" |
| # pylint: enable=invalid-name |
| paddings = [ |
| SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, |
| VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, |
| SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, |
| SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME, |
| SAME, SAME, SAME, SAME, VALID, VALID, VALID |
| ] |
| for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides, |
| paddings): |
| yield i, f, o, s, p |
| |
| |
| def GetTestConfigs(): |
| """Get all the valid tests configs to run. |
| |
| Returns: |
| all the valid test configs as tuples of data_format and use_gpu. |
| """ |
| test_configs = [("NCHW", True), ("NHWC", True)] |
| return test_configs |
| |
| |
| class FusedConv2DBiasActivationTest(test.TestCase): |
| |
| def _DtypesToTest(self, use_gpu): |
| return [dtypes.float32] |
| |
| def _FilterFormatsToTest(self, use_gpu): |
| return ["HWIO", "OIHW"] |
| |
| def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias, |
| strides, padding, activation_mode, data_format, |
| filter_format, dtype): |
| """Verifies the output values of the convolution function. |
| |
| Args: |
| tensor_in_sizes: Input tensor dimensions in |
| [batch, input_rows, input_cols, input_depth]. |
| filter_in_sizes: Filter tensor dimensions in |
| [kernel_rows, kernel_cols, input_depth, output_depth]. |
| bias: 1-D bias tensor of length output_depth. |
| strides: Stride: [col_stride, row_stride] |
| padding: Padding type. |
| activation_mode: Activation mode. |
| data_format: Format of the data tensors. |
| filter_format: Filter format to use for the fused convolution. |
| dtype: Data type for inputs and outputs. |
| Returns: |
| Symbolic tensor value and reference value that can be used to |
| execute the computation and verify the results. |
| """ |
| input_size = np.prod(tensor_in_sizes) |
| filter_size = np.prod(filter_in_sizes) |
| bias_size = filter_in_sizes[-1] # equals to output depth |
| # Initializes the input tensor with array containing incrementing |
| # numbers from 1. |
| x1 = [f * 1.0 for f in range(1, input_size + 1)] |
| x2 = [f * 1.0 for f in range(1, filter_size + 1)] |
| # This is to guarantee that there is always negative values after |
| # bias add so that we can test whether relu works correctly. |
| x3 = bias |
| with self.test_session(use_gpu=True): |
| t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) |
| t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) |
| fused_t2 = t2 |
| if filter_format == "OIHW": |
| fused_t2 = HwioToOihw(t2) |
| t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype) |
| strides = [1] + strides + [1] |
| if data_format == "NCHW": |
| t1 = test_util.NHWCToNCHW(t1) |
| strides = test_util.NHWCToNCHW(strides) |
| output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| t1, |
| fused_t2, |
| t3, |
| strides=strides, |
| padding=padding, |
| data_format=data_format, |
| filter_format=filter_format, |
| activation_mode=activation_mode) |
| ref_conv_output = nn_ops.conv2d( |
| t1, t2, strides=strides, padding=padding, data_format=data_format) |
| ref_bias_output = nn_ops.bias_add( |
| ref_conv_output, t3, data_format=data_format) |
| ref_output = nn_ops.relu(ref_bias_output) |
| if data_format == "NCHW": |
| output = test_util.NCHWToNHWC(output) |
| ref_output = test_util.NCHWToNHWC(ref_output) |
| |
| return output, ref_output |
| |
| def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides, |
| padding): |
| """Verifies that CPU and GPU produce the same values. |
| |
| Args: |
| tensor_in_sizes: Input tensor dimensions in |
| [batch, input_rows, input_cols, input_depth]. |
| filter_in_sizes: Filter tensor dimensions in |
| [kernel_rows, kernel_cols, input_depth, output_depth]. |
| conv_strides: [row_stride, col_stride] for the convolution; |
| padding: Padding type. |
| """ |
| x1 = np.random.rand(*tensor_in_sizes).astype(np.float32) |
| x2 = np.random.rand(*filter_in_sizes).astype(np.float32) |
| x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32) |
| |
| def _SetupVal(data_format, use_gpu): |
| with self.test_session(use_gpu=use_gpu): |
| t1 = constant_op.constant(x1, shape=tensor_in_sizes) |
| t2 = constant_op.constant(x2, shape=filter_in_sizes) |
| t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]]) |
| strides = [1] + conv_strides + [1] |
| if data_format == "NCHW": |
| t1 = test_util.NHWCToNCHW(t1) |
| strides = test_util.NHWCToNCHW(strides) |
| output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| t1, |
| t2, |
| t3, |
| strides=strides, |
| padding=padding, |
| data_format=data_format, |
| activation_mode="Relu") |
| |
| if data_format == "NCHW": |
| output = test_util.NCHWToNHWC(output) |
| return output |
| |
| tensors = [] |
| for (data_format, use_gpu) in GetTestConfigs(): |
| tensors.append(_SetupVal(data_format, use_gpu)) |
| with self.test_session() as sess: |
| values = sess.run(tensors) |
| for i in range(1, len(values)): |
| self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5) |
| |
| def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides, |
| padding): |
| tensors = [] |
| ref_tensors = [] |
| for (data_format, use_gpu) in GetTestConfigs(): |
| for dtype in self._DtypesToTest(use_gpu): |
| for filter_format in self._FilterFormatsToTest(use_gpu): |
| result, expected = self._SetupValuesForDevice( |
| tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu", |
| data_format, filter_format, dtype) |
| tensors.append(result) |
| ref_tensors.append(expected) |
| with self.test_session() as sess: |
| values = sess.run(tensors) |
| ref_values = sess.run(ref_tensors) |
| for i in range(len(tensors)): |
| conv = tensors[i] |
| value = values[i] |
| ref_value = ref_values[i] |
| tf_logging.info("expected = ", ref_value) |
| tf_logging.info("actual = ", value) |
| tol = 1e-5 |
| if value.dtype == np.float16: |
| tol = 1e-3 |
| self.assertAllClose( |
| np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol) |
| self.assertShapeEqual(value, conv) |
| |
| def testConv2D1x1Filter(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2D1x1Filter test.") |
| return |
| # expected_output = [ |
| # 0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0, |
| # 86.0, 43.0, 165.0, 131.0, 97.0 |
| # ] |
| medians = [-45.0, -130.0, -215.0] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 2, 3, 3], |
| filter_in_sizes=[1, 1, 3, 3], |
| bias=medians, |
| strides=[1, 1], |
| padding="VALID") |
| |
| def testConv2DEmpty(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2DEmpty test.") |
| return |
| # expected_output = [] |
| self._VerifyValues( |
| tensor_in_sizes=[0, 2, 3, 3], |
| filter_in_sizes=[1, 1, 3, 3], |
| bias=[0.0, 0.0, 0.0], |
| strides=[1, 1], |
| padding="VALID") |
| |
| def testConv2D2x2Filter(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2D2x2Filter test.") |
| return |
| # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 2, 3, 3], |
| filter_in_sizes=[2, 2, 3, 3], |
| bias=[-2500.0, -2500.0, -2500.0], |
| strides=[1, 1], |
| padding="VALID") |
| |
| def testConv2D1x2Filter(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2D1x2Filter test.") |
| return |
| # expected_output = [ |
| # 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0 |
| # ] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 2, 3, 3], |
| filter_in_sizes=[1, 2, 3, 3], |
| bias=[-500.0, -500.0, -500.0], |
| strides=[1, 1], |
| padding="VALID") |
| |
| def testConv2D2x2FilterStride2(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2D2x2FilterStride2 test.") |
| return |
| # expected_output = [0.0, 67.0, 163.0] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 2, 3, 3], |
| filter_in_sizes=[2, 2, 3, 3], |
| bias=[-2300.0, -2300.0, -2300.0], |
| strides=[2, 2], |
| padding="VALID") |
| |
| def testConv2D2x2FilterStride2Same(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.") |
| return |
| # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 2, 3, 3], |
| filter_in_sizes=[2, 2, 3, 3], |
| bias=[-2300.0, -1000.0, -1000.0], |
| strides=[2, 2], |
| padding="SAME") |
| |
| def testConv2D2x2FilterStride1x2(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.") |
| return |
| # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 3, 6, 1], |
| filter_in_sizes=[2, 2, 1, 1], |
| bias=[-90.0], |
| strides=[1, 2], |
| padding="VALID") |
| |
| def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.") |
| return |
| # expected_output = [0, 0, 175, 205] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 7, 7, 1], |
| filter_in_sizes=[2, 2, 1, 1], |
| bias=[-100.0], |
| strides=[3, 3], |
| padding="VALID") |
| |
| def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.") |
| return |
| # expected = [0, 0, 2, 4] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 3, 3, 1], |
| filter_in_sizes=[1, 1, 1, 1], |
| bias=[-5.0], |
| strides=[2, 2], |
| padding="SAME") |
| |
| # expected = [0, 0, 4, 6] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 4, 4, 1], |
| filter_in_sizes=[1, 1, 1, 1], |
| bias=[-5.0], |
| strides=[2, 2], |
| padding="SAME") |
| |
| # expected = [4, 0, 1, 0] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 4, 4, 1], |
| filter_in_sizes=[2, 2, 1, 1], |
| bias=[-40.0], |
| strides=[3, 3], |
| padding="SAME") |
| |
| def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.") |
| return |
| # expected = [0, 5] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 2, 2, 1], |
| filter_in_sizes=[2, 2, 1, 2], |
| bias=[-50.0, -55.0], |
| strides=[1, 1], |
| padding="VALID") |
| |
| # expected = [0, 2, 282, 322] |
| self._VerifyValues( |
| tensor_in_sizes=[1, 8, 8, 1], |
| filter_in_sizes=[2, 2, 1, 1], |
| bias=[-200.0], |
| strides=[4, 4], |
| padding="SAME") |
| |
| def testShapeFunctionEdgeCases(self): |
| # All shapes unknown. |
| c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| strides=[1, 1, 1, 1], |
| padding="SAME", |
| activation_mode="Relu") |
| self.assertEqual([None, None, None, None], c1.get_shape().as_list()) |
| |
| # Incorrect input shape. |
| with self.assertRaises(ValueError): |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32, shape=[1, 3]), |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| strides=[1, 1, 1, 1], |
| padding="SAME", |
| activation_mode="Relu") |
| |
| # Incorrect filter shape. |
| with self.assertRaises(ValueError): |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32, shape=[1, 3]), |
| array_ops.placeholder(dtypes.float32), |
| strides=[1, 1, 1, 1], |
| padding="SAME", |
| activation_mode="Relu") |
| |
| # Depth mismatch. |
| with self.assertRaises(ValueError): |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), |
| array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]), |
| array_ops.placeholder(dtypes.float32), |
| strides=[1, 1, 1, 1], |
| padding="SAME", |
| activation_mode="Relu") |
| |
| def testOpEdgeCases(self, gpu_only=True): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping OpEdgeCases tests.") |
| return |
| with self.test_session() as sess: |
| # Illegal strides. |
| with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, |
| "Convolutional strides are not supported in " |
| "the batch or depth dimensions."): |
| sess.run( |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| strides=[2, 1, 1, 1], |
| padding="SAME", |
| activation_mode="Relu")) |
| with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, |
| "Convolutional strides are not supported in " |
| "the batch or depth dimensions."): |
| sess.run( |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| strides=[1, 1, 1, 2], |
| padding="SAME", |
| activation_mode="Relu")) |
| |
| # Illegal activation mode. |
| with self.assertRaisesRegexp(ValueError, |
| "Op passed string 'Tanh' not in:"): |
| sess.run( |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| array_ops.placeholder(dtypes.float32), |
| strides=[1, 1, 1, 1], |
| padding="SAME", |
| activation_mode="Tanh")) |
| |
| # Filter larger than input. |
| with self.assertRaisesRegexp(ValueError, "Negative dimension size"): |
| sess.run( |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), |
| array_ops.placeholder(dtypes.float32, shape=[20, 21, 3, 2]), |
| array_ops.placeholder(dtypes.float32, shape=[2]), |
| strides=[1, 1, 1, 1], |
| padding="VALID", |
| activation_mode="Relu")) |
| with self.assertRaisesRegexp(ValueError, "Negative dimension size"): |
| sess.run( |
| fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), |
| array_ops.placeholder(dtypes.float32, shape=[21, 20, 3, 2]), |
| array_ops.placeholder(dtypes.float32, shape=[2]), |
| strides=[1, 1, 1, 1], |
| padding="VALID", |
| activation_mode="Relu")) |
| |
| |
| def GetInceptionFwdTest(input_size, filter_size, stride, padding, |
| gpu_only=True): |
| |
| def Test(self): |
| if gpu_only and not test.is_gpu_available(): |
| tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size, |
| stride, padding)) |
| return |
| tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride, |
| padding)) |
| self._CompareFwdValues(input_size, filter_size, [stride, stride], padding) |
| |
| return Test |
| |
| |
| def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): |
| """Calculates the size of an output dimension of a strided convolution. |
| |
| Given the sizes of the corresponding dimension of the input and filter shapes, |
| and the stride and padding_types, calculates the size of the output dimension. |
| This function can be called separately for each input dimension. |
| |
| Args: |
| input_dim: An `int` specifying the size of the input dimension. |
| filter_dim: An `int` specifying the size of the filter dimension. |
| stride: An `int` specifying the step size of the convolution along the |
| input dimension. |
| padding_type: either 'VALID' or 'SAME'. |
| |
| Returns: |
| The size of the output dimension. |
| """ |
| if padding_type == "VALID": |
| return (input_dim - filter_dim + stride) // stride |
| else: # padding_type == 'SAME' |
| return (input_dim + stride - 1) // stride |
| |
| |
| def NchwVectCToNchw(in_tensor): |
| # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W] |
| t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3]) |
| n = in_tensor.shape.dims[0].value |
| c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value |
| h = in_tensor.shape.dims[2].value |
| w = in_tensor.shape.dims[3].value |
| return array_ops.reshape(t, [n, c, h, w]) |
| |
| |
| def OihwVectIToHwio(in_tensor): |
| # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W] |
| t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0]) |
| o = in_tensor.shape.dims[0].value |
| i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value |
| h = in_tensor.shape.dims[2].value |
| w = in_tensor.shape.dims[3].value |
| return array_ops.reshape(t, [h, w, i, o]) |
| |
| |
| def NchwToNchwVectC(in_tensor): |
| n, c, h, w = in_tensor.shape.as_list() |
| assert c % 4 == 0 |
| t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w]) |
| return array_ops.transpose(t, [0, 1, 3, 4, 2]) |
| |
| |
| def HwioToOihw(in_tensor): |
| return array_ops.transpose(in_tensor, [3, 2, 0, 1]) |
| |
| |
| def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, |
| padding, strides, side_input_scale, |
| side_input, biases, apply_relu): |
| """Simulates the int8 fused 2-D convolution op using separate float ops. |
| |
| The arguments and return values have the same format, meanings and |
| restrictions as the actual op. |
| Args: |
| conv_input_scale: A scalar 'float'. |
| conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. |
| kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. |
| padding: A `string` from: `"SAME", "VALID"`. |
| strides: A list of `ints`. |
| side_input_scale: A scalar 'float'. |
| side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. |
| biases: A `Tensor` of type `float32` in NCHW layout. |
| apply_relu: A boolean to specify whether to apply "Relu" activation function |
| that clips outputs to the range [0, 127], or "None" activation that clips |
| to the range [-128, 127]. |
| Returns: |
| A `Tensor` of type `qint8` in NCHW_VECT_C layout. |
| """ |
| conv_result = nn_ops.conv2d( |
| NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), |
| OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), |
| strides=strides, |
| padding=padding, |
| data_format="NCHW") * conv_input_scale |
| |
| conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( |
| gen_array_ops.dequantize(side_input, -128, 127)) |
| |
| output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") |
| if apply_relu: |
| output = nn_ops.relu(output) |
| |
| result, _, _ = gen_array_ops.quantize_v2( |
| NchwToNchwVectC(output), -128, 127, dtypes.qint8) |
| return result |
| |
| |
| class FusedConvInt8Tests(test.TestCase): |
| _test_params = [ |
| { |
| "batch_size": 1, |
| "input_channels": 4, |
| "output_channels": 4, |
| "input_height": 8, |
| "input_width": 8, |
| "filter_height": 6, |
| "filter_width": 6, |
| "vertical_stride": 2, |
| "horizontal_stride": 2, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.0, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| { |
| "batch_size": 1, |
| "input_channels": 4, |
| "output_channels": 4, |
| "input_height": 6, |
| "input_width": 6, |
| "filter_height": 6, |
| "filter_width": 6, |
| "vertical_stride": 2, |
| "horizontal_stride": 2, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.0, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| { |
| "batch_size": 2, |
| "input_channels": 8, |
| "output_channels": 16, |
| "input_height": 8, |
| "input_width": 8, |
| "filter_height": 3, |
| "filter_width": 3, |
| "vertical_stride": 2, |
| "horizontal_stride": 2, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.0, |
| "bias_scale": 1, |
| "padding_type": "VALID" |
| }, |
| { |
| "batch_size": 2, |
| "input_channels": 8, |
| "output_channels": 16, |
| "input_height": 8, |
| "input_width": 8, |
| "filter_height": 3, |
| "filter_width": 3, |
| "vertical_stride": 2, |
| "horizontal_stride": 2, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.0, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| { |
| "batch_size": 2, |
| "input_channels": 8, |
| "output_channels": 16, |
| "input_height": 8, |
| "input_width": 8, |
| "filter_height": 3, |
| "filter_width": 3, |
| "vertical_stride": 2, |
| "horizontal_stride": 2, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.5, |
| "bias_scale": 1, |
| "padding_type": "VALID" |
| }, |
| { |
| "batch_size": 2, |
| "input_channels": 16, |
| "output_channels": 16, |
| "input_height": 9, |
| "input_width": 9, |
| "filter_height": 3, |
| "filter_width": 3, |
| "vertical_stride": 1, |
| "horizontal_stride": 1, |
| "conv_input_scale": 0.001, |
| "side_input_scale": 0.5, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| { |
| "batch_size": 3, |
| "input_channels": 8, |
| "output_channels": 8, |
| "input_height": 9, |
| "input_width": 9, |
| "filter_height": 5, |
| "filter_width": 5, |
| "vertical_stride": 1, |
| "horizontal_stride": 1, |
| "conv_input_scale": 0.001, |
| "side_input_scale": 0.5, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| { |
| "batch_size": 3, |
| "input_channels": 8, |
| "output_channels": 8, |
| "input_height": 9, |
| "input_width": 9, |
| "filter_height": 7, |
| "filter_width": 1, |
| "vertical_stride": 2, |
| "horizontal_stride": 1, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.5, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| { |
| "batch_size": 3, |
| "input_channels": 8, |
| "output_channels": 8, |
| "input_height": 9, |
| "input_width": 9, |
| "filter_height": 1, |
| "filter_width": 7, |
| "vertical_stride": 1, |
| "horizontal_stride": 1, |
| "conv_input_scale": 0.002, |
| "side_input_scale": 0.5, |
| "bias_scale": 1, |
| "padding_type": "SAME" |
| }, |
| ] |
| |
| def runTest(self, test_param, apply_relu): |
| batch_size = test_param["batch_size"] |
| input_channels = test_param["input_channels"] |
| output_channels = test_param["output_channels"] |
| input_height = test_param["input_height"] |
| input_width = test_param["input_width"] |
| filter_height = test_param["filter_height"] |
| filter_width = test_param["filter_width"] |
| vertical_stride = test_param["vertical_stride"] |
| horizontal_stride = test_param["horizontal_stride"] |
| conv_input_scale = test_param["conv_input_scale"] |
| side_input_scale = test_param["side_input_scale"] |
| bias_scale = test_param["bias_scale"] |
| padding_type = test_param["padding_type"] |
| |
| conv_input, _, _ = gen_array_ops.quantize_v2( |
| random_ops.random_uniform( |
| [batch_size, input_channels // 4, input_height, input_width, 4], |
| minval=-0.0, |
| maxval=1.0, |
| dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) |
| |
| kernel, _, _ = gen_array_ops.quantize_v2( |
| random_ops.random_uniform( |
| [ |
| output_channels, input_channels // 4, filter_height, |
| filter_width, 4 |
| ], |
| minval=-1.0, |
| maxval=1.0, |
| dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) |
| |
| output_height = CalculateConvolvedOutputDim(input_height, filter_height, |
| vertical_stride, padding_type) |
| output_width = CalculateConvolvedOutputDim(input_width, filter_width, |
| horizontal_stride, padding_type) |
| tf_logging.info("output_height=", output_height, ", output_width=", |
| output_width) |
| |
| side_input, _, _ = gen_array_ops.quantize_v2( |
| random_ops.random_uniform( |
| [batch_size, output_channels // 4, output_height, output_width, 4], |
| minval=0.0, |
| maxval=1.0, |
| dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) |
| |
| biases = random_ops.random_uniform( |
| [output_channels], |
| minval=-10 * bias_scale, |
| maxval=20 * bias_scale, |
| dtype=dtypes.float32) |
| |
| strides = [1, 1, vertical_stride, horizontal_stride] |
| |
| actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( |
| conv_input, |
| kernel, |
| biases, |
| strides=strides, |
| padding=padding_type, |
| conv_input_scale=conv_input_scale, |
| side_input_scale=side_input_scale, |
| side_input=side_input, |
| activation_mode="Relu" if apply_relu else "None", |
| data_format="NCHW_VECT_C", |
| filter_format="OIHW_VECT_I") |
| |
| expected = SimulateFusedConv2dBiasActivationInt8( |
| conv_input_scale, conv_input, kernel, padding_type, strides, |
| side_input_scale, side_input, biases, apply_relu) |
| |
| with self.test_session(use_gpu=True) as sess: |
| actual_y, expected_y = sess.run([actual, expected]) |
| tf_logging.info("actual_y = ", actual_y) |
| tf_logging.info("expected_y = ", expected_y) |
| self.assertTrue(np.array_equal(actual_y, expected_y)) |
| |
| def testFusedConvInt8(self): |
| if not test.is_gpu_available( |
| cuda_only=True, min_cuda_compute_capability=(6, 1)): |
| tf_logging.info("int8 test skipped because not run with --config=cuda or " |
| "no GPUs with compute capability >= 6.1 are available.") |
| return |
| for apply_relu in [True, False]: |
| for test_param in self._test_params: |
| self.runTest(test_param, apply_relu) |
| |
| |
| if __name__ == "__main__": |
| for index, (input_size_, filter_size_, output_size_, stride_, |
| padding_) in enumerate(GetShrunkInceptionShapes()): |
| setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index), |
| GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_)) |
| |
| # TODO(b/35359731) |
| # Fwd, BckInput, and BackFilter to test that for certain input parameter |
| # set, winograd nonfused algorithm will be excluded from conv autotune. If |
| # in such case, winograd nonfused algorithm is added as one option of the |
| # conv autotune, and cuDNN version is smaller than 7, the following tests |
| # will fail. |
| ishape = [1, 400, 400, 1] |
| fshape = [1, 1, 1, 256] |
| oshape = [1, 400, 400, 256] |
| setattr(FusedConv2DBiasActivationTest, |
| "testInceptionFwd_No_Winograd_Nonfused", |
| GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)) |
| test.main() |