blob: 0185ef662c2ed05b1ceaf0e3e8071bad4c0d1a0a [file] [log] [blame]
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functional tests for fused conv2d bias and activation operation."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import test_util
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import gen_array_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging
def GetShrunkInceptionShapes(shrink=10):
"""Iterator for smaller versions of convolution shapes in 2015 Inception.
Relative to inception, each depth value is `depth // shrink`.
Args:
shrink: Factor to shrink each depth value by relative to Inception.
Yields:
Tuple (input_size, filter_size, out_size, stride, padding), the convolution
parameters of Inception layers.
"""
input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [
4, 8, 8, 2048
], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [
4, 8, 8, 1760
], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [
4, 17, 17, 192
], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [
4, 17, 17, 192
], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [
4, 17, 17, 192
], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [
4, 17, 17, 160
], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024],
[4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [
4, 17, 17, 768
], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768],
[4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [
4, 35, 35, 64
], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [
4, 35, 35, 256
], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [
4, 35, 35, 192
], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]]
filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [
1, 1, 2048, 192
], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384],
[1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [
1, 1, 1760, 320
], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [
3, 3, 128, 320
], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [
1, 3, 192, 256
], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [
3, 3, 192, 224
], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [
3, 1, 192, 192
], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [
1, 3, 128, 192
], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [
3, 1, 128, 128
], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [
1, 1, 768, 128
], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [
3, 3, 64, 96
], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64],
[1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [
1, 1, 192, 64
], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64,
64], [1, 1, 24, 64]]
out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [
4, 8, 8, 384
], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [
4, 8, 8, 192
], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [
4, 17, 17, 192
], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [
4, 17, 17, 256
], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [
4, 17, 17, 192
], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [
4, 17, 17, 160
], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [
4, 17, 17, 256
], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [
4, 17, 17, 128
], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [
4, 35, 35, 64
], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96],
[4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48],
[4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]]
strides = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1
]
# Shrink sizes to make the test faster
for i in input_sizes:
i[3] //= shrink
for f in filter_sizes:
f[2] //= shrink
f[3] //= shrink
for o in out_sizes:
o[3] //= shrink
# pylint: disable=invalid-name
VALID = "VALID"
SAME = "SAME"
# pylint: enable=invalid-name
paddings = [
SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME,
SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME,
SAME, SAME, SAME, SAME, VALID, VALID, VALID
]
for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides,
paddings):
yield i, f, o, s, p
def GetTestConfigs():
"""Get all the valid tests configs to run.
Returns:
all the valid test configs as tuples of data_format and use_gpu.
"""
test_configs = [("NCHW", True), ("NHWC", True)]
return test_configs
class FusedConv2DBiasActivationTest(test.TestCase):
def _DtypesToTest(self, use_gpu):
return [dtypes.float32]
def _FilterFormatsToTest(self, use_gpu):
return ["HWIO", "OIHW"]
def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias,
strides, padding, activation_mode, data_format,
filter_format, dtype):
"""Verifies the output values of the convolution function.
Args:
tensor_in_sizes: Input tensor dimensions in
[batch, input_rows, input_cols, input_depth].
filter_in_sizes: Filter tensor dimensions in
[kernel_rows, kernel_cols, input_depth, output_depth].
bias: 1-D bias tensor of length output_depth.
strides: Stride: [col_stride, row_stride]
padding: Padding type.
activation_mode: Activation mode.
data_format: Format of the data tensors.
filter_format: Filter format to use for the fused convolution.
dtype: Data type for inputs and outputs.
Returns:
Symbolic tensor value and reference value that can be used to
execute the computation and verify the results.
"""
input_size = np.prod(tensor_in_sizes)
filter_size = np.prod(filter_in_sizes)
bias_size = filter_in_sizes[-1] # equals to output depth
# Initializes the input tensor with array containing incrementing
# numbers from 1.
x1 = [f * 1.0 for f in range(1, input_size + 1)]
x2 = [f * 1.0 for f in range(1, filter_size + 1)]
# This is to guarantee that there is always negative values after
# bias add so that we can test whether relu works correctly.
x3 = bias
with self.test_session(use_gpu=True):
t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype)
t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype)
fused_t2 = t2
if filter_format == "OIHW":
fused_t2 = HwioToOihw(t2)
t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype)
strides = [1] + strides + [1]
if data_format == "NCHW":
t1 = test_util.NHWCToNCHW(t1)
strides = test_util.NHWCToNCHW(strides)
output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
t1,
fused_t2,
t3,
strides=strides,
padding=padding,
data_format=data_format,
filter_format=filter_format,
activation_mode=activation_mode)
ref_conv_output = nn_ops.conv2d(
t1, t2, strides=strides, padding=padding, data_format=data_format)
ref_bias_output = nn_ops.bias_add(
ref_conv_output, t3, data_format=data_format)
ref_output = nn_ops.relu(ref_bias_output)
if data_format == "NCHW":
output = test_util.NCHWToNHWC(output)
ref_output = test_util.NCHWToNHWC(ref_output)
return output, ref_output
def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides,
padding):
"""Verifies that CPU and GPU produce the same values.
Args:
tensor_in_sizes: Input tensor dimensions in
[batch, input_rows, input_cols, input_depth].
filter_in_sizes: Filter tensor dimensions in
[kernel_rows, kernel_cols, input_depth, output_depth].
conv_strides: [row_stride, col_stride] for the convolution;
padding: Padding type.
"""
x1 = np.random.rand(*tensor_in_sizes).astype(np.float32)
x2 = np.random.rand(*filter_in_sizes).astype(np.float32)
x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32)
def _SetupVal(data_format, use_gpu):
with self.test_session(use_gpu=use_gpu):
t1 = constant_op.constant(x1, shape=tensor_in_sizes)
t2 = constant_op.constant(x2, shape=filter_in_sizes)
t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]])
strides = [1] + conv_strides + [1]
if data_format == "NCHW":
t1 = test_util.NHWCToNCHW(t1)
strides = test_util.NHWCToNCHW(strides)
output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
t1,
t2,
t3,
strides=strides,
padding=padding,
data_format=data_format,
activation_mode="Relu")
if data_format == "NCHW":
output = test_util.NCHWToNHWC(output)
return output
tensors = []
for (data_format, use_gpu) in GetTestConfigs():
tensors.append(_SetupVal(data_format, use_gpu))
with self.test_session() as sess:
values = sess.run(tensors)
for i in range(1, len(values)):
self.assertAllClose(values[0], values[i], rtol=1e-5, atol=1e-5)
def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides,
padding):
tensors = []
ref_tensors = []
for (data_format, use_gpu) in GetTestConfigs():
for dtype in self._DtypesToTest(use_gpu):
for filter_format in self._FilterFormatsToTest(use_gpu):
result, expected = self._SetupValuesForDevice(
tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu",
data_format, filter_format, dtype)
tensors.append(result)
ref_tensors.append(expected)
with self.test_session() as sess:
values = sess.run(tensors)
ref_values = sess.run(ref_tensors)
for i in range(len(tensors)):
conv = tensors[i]
value = values[i]
ref_value = ref_values[i]
tf_logging.info("expected = ", ref_value)
tf_logging.info("actual = ", value)
tol = 1e-5
if value.dtype == np.float16:
tol = 1e-3
self.assertAllClose(
np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol)
self.assertShapeEqual(value, conv)
def testConv2D1x1Filter(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2D1x1Filter test.")
return
# expected_output = [
# 0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0,
# 86.0, 43.0, 165.0, 131.0, 97.0
# ]
medians = [-45.0, -130.0, -215.0]
self._VerifyValues(
tensor_in_sizes=[1, 2, 3, 3],
filter_in_sizes=[1, 1, 3, 3],
bias=medians,
strides=[1, 1],
padding="VALID")
def testConv2DEmpty(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2DEmpty test.")
return
# expected_output = []
self._VerifyValues(
tensor_in_sizes=[0, 2, 3, 3],
filter_in_sizes=[1, 1, 3, 3],
bias=[0.0, 0.0, 0.0],
strides=[1, 1],
padding="VALID")
def testConv2D2x2Filter(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2D2x2Filter test.")
return
# expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0]
self._VerifyValues(
tensor_in_sizes=[1, 2, 3, 3],
filter_in_sizes=[2, 2, 3, 3],
bias=[-2500.0, -2500.0, -2500.0],
strides=[1, 1],
padding="VALID")
def testConv2D1x2Filter(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2D1x2Filter test.")
return
# expected_output = [
# 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0
# ]
self._VerifyValues(
tensor_in_sizes=[1, 2, 3, 3],
filter_in_sizes=[1, 2, 3, 3],
bias=[-500.0, -500.0, -500.0],
strides=[1, 1],
padding="VALID")
def testConv2D2x2FilterStride2(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2D2x2FilterStride2 test.")
return
# expected_output = [0.0, 67.0, 163.0]
self._VerifyValues(
tensor_in_sizes=[1, 2, 3, 3],
filter_in_sizes=[2, 2, 3, 3],
bias=[-2300.0, -2300.0, -2300.0],
strides=[2, 2],
padding="VALID")
def testConv2D2x2FilterStride2Same(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.")
return
# expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0]
self._VerifyValues(
tensor_in_sizes=[1, 2, 3, 3],
filter_in_sizes=[2, 2, 3, 3],
bias=[-2300.0, -1000.0, -1000.0],
strides=[2, 2],
padding="SAME")
def testConv2D2x2FilterStride1x2(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.")
return
# expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0]
self._VerifyValues(
tensor_in_sizes=[1, 3, 6, 1],
filter_in_sizes=[2, 2, 1, 1],
bias=[-90.0],
strides=[1, 2],
padding="VALID")
def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.")
return
# expected_output = [0, 0, 175, 205]
self._VerifyValues(
tensor_in_sizes=[1, 7, 7, 1],
filter_in_sizes=[2, 2, 1, 1],
bias=[-100.0],
strides=[3, 3],
padding="VALID")
def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.")
return
# expected = [0, 0, 2, 4]
self._VerifyValues(
tensor_in_sizes=[1, 3, 3, 1],
filter_in_sizes=[1, 1, 1, 1],
bias=[-5.0],
strides=[2, 2],
padding="SAME")
# expected = [0, 0, 4, 6]
self._VerifyValues(
tensor_in_sizes=[1, 4, 4, 1],
filter_in_sizes=[1, 1, 1, 1],
bias=[-5.0],
strides=[2, 2],
padding="SAME")
# expected = [4, 0, 1, 0]
self._VerifyValues(
tensor_in_sizes=[1, 4, 4, 1],
filter_in_sizes=[2, 2, 1, 1],
bias=[-40.0],
strides=[3, 3],
padding="SAME")
def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.")
return
# expected = [0, 5]
self._VerifyValues(
tensor_in_sizes=[1, 2, 2, 1],
filter_in_sizes=[2, 2, 1, 2],
bias=[-50.0, -55.0],
strides=[1, 1],
padding="VALID")
# expected = [0, 2, 282, 322]
self._VerifyValues(
tensor_in_sizes=[1, 8, 8, 1],
filter_in_sizes=[2, 2, 1, 1],
bias=[-200.0],
strides=[4, 4],
padding="SAME")
def testShapeFunctionEdgeCases(self):
# All shapes unknown.
c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
strides=[1, 1, 1, 1],
padding="SAME",
activation_mode="Relu")
self.assertEqual([None, None, None, None], c1.get_shape().as_list())
# Incorrect input shape.
with self.assertRaises(ValueError):
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32, shape=[1, 3]),
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
strides=[1, 1, 1, 1],
padding="SAME",
activation_mode="Relu")
# Incorrect filter shape.
with self.assertRaises(ValueError):
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32, shape=[1, 3]),
array_ops.placeholder(dtypes.float32),
strides=[1, 1, 1, 1],
padding="SAME",
activation_mode="Relu")
# Depth mismatch.
with self.assertRaises(ValueError):
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]),
array_ops.placeholder(dtypes.float32),
strides=[1, 1, 1, 1],
padding="SAME",
activation_mode="Relu")
def testOpEdgeCases(self, gpu_only=True):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping OpEdgeCases tests.")
return
with self.test_session() as sess:
# Illegal strides.
with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
"Convolutional strides are not supported in "
"the batch or depth dimensions."):
sess.run(
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
strides=[2, 1, 1, 1],
padding="SAME",
activation_mode="Relu"))
with self.assertRaisesRegexp(errors_impl.InvalidArgumentError,
"Convolutional strides are not supported in "
"the batch or depth dimensions."):
sess.run(
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
strides=[1, 1, 1, 2],
padding="SAME",
activation_mode="Relu"))
# Illegal activation mode.
with self.assertRaisesRegexp(ValueError,
"Op passed string 'Tanh' not in:"):
sess.run(
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
array_ops.placeholder(dtypes.float32),
strides=[1, 1, 1, 1],
padding="SAME",
activation_mode="Tanh"))
# Filter larger than input.
with self.assertRaisesRegexp(ValueError, "Negative dimension size"):
sess.run(
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
array_ops.placeholder(dtypes.float32, shape=[20, 21, 3, 2]),
array_ops.placeholder(dtypes.float32, shape=[2]),
strides=[1, 1, 1, 1],
padding="VALID",
activation_mode="Relu"))
with self.assertRaisesRegexp(ValueError, "Negative dimension size"):
sess.run(
fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]),
array_ops.placeholder(dtypes.float32, shape=[21, 20, 3, 2]),
array_ops.placeholder(dtypes.float32, shape=[2]),
strides=[1, 1, 1, 1],
padding="VALID",
activation_mode="Relu"))
def GetInceptionFwdTest(input_size, filter_size, stride, padding,
gpu_only=True):
def Test(self):
if gpu_only and not test.is_gpu_available():
tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size,
stride, padding))
return
tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride,
padding))
self._CompareFwdValues(input_size, filter_size, [stride, stride], padding)
return Test
def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type):
"""Calculates the size of an output dimension of a strided convolution.
Given the sizes of the corresponding dimension of the input and filter shapes,
and the stride and padding_types, calculates the size of the output dimension.
This function can be called separately for each input dimension.
Args:
input_dim: An `int` specifying the size of the input dimension.
filter_dim: An `int` specifying the size of the filter dimension.
stride: An `int` specifying the step size of the convolution along the
input dimension.
padding_type: either 'VALID' or 'SAME'.
Returns:
The size of the output dimension.
"""
if padding_type == "VALID":
return (input_dim - filter_dim + stride) // stride
else: # padding_type == 'SAME'
return (input_dim + stride - 1) // stride
def NchwVectCToNchw(in_tensor):
# [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W]
t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3])
n = in_tensor.shape.dims[0].value
c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
h = in_tensor.shape.dims[2].value
w = in_tensor.shape.dims[3].value
return array_ops.reshape(t, [n, c, h, w])
def OihwVectIToHwio(in_tensor):
# [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W]
t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0])
o = in_tensor.shape.dims[0].value
i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value
h = in_tensor.shape.dims[2].value
w = in_tensor.shape.dims[3].value
return array_ops.reshape(t, [h, w, i, o])
def NchwToNchwVectC(in_tensor):
n, c, h, w = in_tensor.shape.as_list()
assert c % 4 == 0
t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w])
return array_ops.transpose(t, [0, 1, 3, 4, 2])
def HwioToOihw(in_tensor):
return array_ops.transpose(in_tensor, [3, 2, 0, 1])
def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel,
padding, strides, side_input_scale,
side_input, biases, apply_relu):
"""Simulates the int8 fused 2-D convolution op using separate float ops.
The arguments and return values have the same format, meanings and
restrictions as the actual op.
Args:
conv_input_scale: A scalar 'float'.
conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout.
padding: A `string` from: `"SAME", "VALID"`.
strides: A list of `ints`.
side_input_scale: A scalar 'float'.
side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout.
biases: A `Tensor` of type `float32` in NCHW layout.
apply_relu: A boolean to specify whether to apply "Relu" activation function
that clips outputs to the range [0, 127], or "None" activation that clips
to the range [-128, 127].
Returns:
A `Tensor` of type `qint8` in NCHW_VECT_C layout.
"""
conv_result = nn_ops.conv2d(
NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)),
OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)),
strides=strides,
padding=padding,
data_format="NCHW") * conv_input_scale
conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw(
gen_array_ops.dequantize(side_input, -128, 127))
output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW")
if apply_relu:
output = nn_ops.relu(output)
result, _, _ = gen_array_ops.quantize_v2(
NchwToNchwVectC(output), -128, 127, dtypes.qint8)
return result
class FusedConvInt8Tests(test.TestCase):
_test_params = [
{
"batch_size": 1,
"input_channels": 4,
"output_channels": 4,
"input_height": 8,
"input_width": 8,
"filter_height": 6,
"filter_width": 6,
"vertical_stride": 2,
"horizontal_stride": 2,
"conv_input_scale": 0.002,
"side_input_scale": 0.0,
"bias_scale": 1,
"padding_type": "SAME"
},
{
"batch_size": 1,
"input_channels": 4,
"output_channels": 4,
"input_height": 6,
"input_width": 6,
"filter_height": 6,
"filter_width": 6,
"vertical_stride": 2,
"horizontal_stride": 2,
"conv_input_scale": 0.002,
"side_input_scale": 0.0,
"bias_scale": 1,
"padding_type": "SAME"
},
{
"batch_size": 2,
"input_channels": 8,
"output_channels": 16,
"input_height": 8,
"input_width": 8,
"filter_height": 3,
"filter_width": 3,
"vertical_stride": 2,
"horizontal_stride": 2,
"conv_input_scale": 0.002,
"side_input_scale": 0.0,
"bias_scale": 1,
"padding_type": "VALID"
},
{
"batch_size": 2,
"input_channels": 8,
"output_channels": 16,
"input_height": 8,
"input_width": 8,
"filter_height": 3,
"filter_width": 3,
"vertical_stride": 2,
"horizontal_stride": 2,
"conv_input_scale": 0.002,
"side_input_scale": 0.0,
"bias_scale": 1,
"padding_type": "SAME"
},
{
"batch_size": 2,
"input_channels": 8,
"output_channels": 16,
"input_height": 8,
"input_width": 8,
"filter_height": 3,
"filter_width": 3,
"vertical_stride": 2,
"horizontal_stride": 2,
"conv_input_scale": 0.002,
"side_input_scale": 0.5,
"bias_scale": 1,
"padding_type": "VALID"
},
{
"batch_size": 2,
"input_channels": 16,
"output_channels": 16,
"input_height": 9,
"input_width": 9,
"filter_height": 3,
"filter_width": 3,
"vertical_stride": 1,
"horizontal_stride": 1,
"conv_input_scale": 0.001,
"side_input_scale": 0.5,
"bias_scale": 1,
"padding_type": "SAME"
},
{
"batch_size": 3,
"input_channels": 8,
"output_channels": 8,
"input_height": 9,
"input_width": 9,
"filter_height": 5,
"filter_width": 5,
"vertical_stride": 1,
"horizontal_stride": 1,
"conv_input_scale": 0.001,
"side_input_scale": 0.5,
"bias_scale": 1,
"padding_type": "SAME"
},
{
"batch_size": 3,
"input_channels": 8,
"output_channels": 8,
"input_height": 9,
"input_width": 9,
"filter_height": 7,
"filter_width": 1,
"vertical_stride": 2,
"horizontal_stride": 1,
"conv_input_scale": 0.002,
"side_input_scale": 0.5,
"bias_scale": 1,
"padding_type": "SAME"
},
{
"batch_size": 3,
"input_channels": 8,
"output_channels": 8,
"input_height": 9,
"input_width": 9,
"filter_height": 1,
"filter_width": 7,
"vertical_stride": 1,
"horizontal_stride": 1,
"conv_input_scale": 0.002,
"side_input_scale": 0.5,
"bias_scale": 1,
"padding_type": "SAME"
},
]
def runTest(self, test_param, apply_relu):
batch_size = test_param["batch_size"]
input_channels = test_param["input_channels"]
output_channels = test_param["output_channels"]
input_height = test_param["input_height"]
input_width = test_param["input_width"]
filter_height = test_param["filter_height"]
filter_width = test_param["filter_width"]
vertical_stride = test_param["vertical_stride"]
horizontal_stride = test_param["horizontal_stride"]
conv_input_scale = test_param["conv_input_scale"]
side_input_scale = test_param["side_input_scale"]
bias_scale = test_param["bias_scale"]
padding_type = test_param["padding_type"]
conv_input, _, _ = gen_array_ops.quantize_v2(
random_ops.random_uniform(
[batch_size, input_channels // 4, input_height, input_width, 4],
minval=-0.0,
maxval=1.0,
dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
kernel, _, _ = gen_array_ops.quantize_v2(
random_ops.random_uniform(
[
output_channels, input_channels // 4, filter_height,
filter_width, 4
],
minval=-1.0,
maxval=1.0,
dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
output_height = CalculateConvolvedOutputDim(input_height, filter_height,
vertical_stride, padding_type)
output_width = CalculateConvolvedOutputDim(input_width, filter_width,
horizontal_stride, padding_type)
tf_logging.info("output_height=", output_height, ", output_width=",
output_width)
side_input, _, _ = gen_array_ops.quantize_v2(
random_ops.random_uniform(
[batch_size, output_channels // 4, output_height, output_width, 4],
minval=0.0,
maxval=1.0,
dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8)
biases = random_ops.random_uniform(
[output_channels],
minval=-10 * bias_scale,
maxval=20 * bias_scale,
dtype=dtypes.float32)
strides = [1, 1, vertical_stride, horizontal_stride]
actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation(
conv_input,
kernel,
biases,
strides=strides,
padding=padding_type,
conv_input_scale=conv_input_scale,
side_input_scale=side_input_scale,
side_input=side_input,
activation_mode="Relu" if apply_relu else "None",
data_format="NCHW_VECT_C",
filter_format="OIHW_VECT_I")
expected = SimulateFusedConv2dBiasActivationInt8(
conv_input_scale, conv_input, kernel, padding_type, strides,
side_input_scale, side_input, biases, apply_relu)
with self.test_session(use_gpu=True) as sess:
actual_y, expected_y = sess.run([actual, expected])
tf_logging.info("actual_y = ", actual_y)
tf_logging.info("expected_y = ", expected_y)
self.assertTrue(np.array_equal(actual_y, expected_y))
def testFusedConvInt8(self):
if not test.is_gpu_available(
cuda_only=True, min_cuda_compute_capability=(6, 1)):
tf_logging.info("int8 test skipped because not run with --config=cuda or "
"no GPUs with compute capability >= 6.1 are available.")
return
for apply_relu in [True, False]:
for test_param in self._test_params:
self.runTest(test_param, apply_relu)
if __name__ == "__main__":
for index, (input_size_, filter_size_, output_size_, stride_,
padding_) in enumerate(GetShrunkInceptionShapes()):
setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index),
GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_))
# TODO(b/35359731)
# Fwd, BckInput, and BackFilter to test that for certain input parameter
# set, winograd nonfused algorithm will be excluded from conv autotune. If
# in such case, winograd nonfused algorithm is added as one option of the
# conv autotune, and cuDNN version is smaller than 7, the following tests
# will fail.
ishape = [1, 400, 400, 1]
fshape = [1, 1, 1, 256]
oshape = [1, 400, 400, 256]
setattr(FusedConv2DBiasActivationTest,
"testInceptionFwd_No_Winograd_Nonfused",
GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True))
test.main()