| # Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Tests for Cudnn RNN models.""" |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import argparse |
| import collections |
| import functools |
| import itertools |
| import os |
| import sys |
| import unittest |
| |
| import numpy as np |
| |
| from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn |
| from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops |
| from tensorflow.contrib.rnn.python.ops import rnn as contrib_rnn_lib |
| from tensorflow.python.eager import backprop |
| from tensorflow.python.eager import context |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import ops |
| from tensorflow.python.framework import random_seed |
| from tensorflow.python.framework import test_util |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import control_flow_ops |
| from tensorflow.python.ops import gen_nn_ops |
| from tensorflow.python.ops import gradients_impl as gradients |
| from tensorflow.python.ops import init_ops |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops import random_ops |
| from tensorflow.python.ops import rnn as rnn_lib |
| from tensorflow.python.ops import rnn_cell_impl |
| from tensorflow.python.ops import state_ops |
| from tensorflow.python.ops import variable_scope as vs |
| from tensorflow.python.ops import variables |
| from tensorflow.python.ops.losses import losses |
| from tensorflow.python.platform import googletest |
| from tensorflow.python.platform import test |
| from tensorflow.python.platform import tf_logging as logging |
| from tensorflow.python.training import adagrad |
| from tensorflow.python.training import adam |
| from tensorflow.python.training import gradient_descent |
| from tensorflow.python.training import momentum |
| from tensorflow.python.training import rmsprop |
| from tensorflow.python.training import saver as saver_lib |
| from tensorflow.python.training.checkpointable import util as checkpointable_utils |
| |
| |
| CUDNN_LSTM = cudnn_rnn_ops.CUDNN_LSTM |
| CUDNN_GRU = cudnn_rnn_ops.CUDNN_GRU |
| CUDNN_RNN_RELU = cudnn_rnn_ops.CUDNN_RNN_RELU |
| CUDNN_RNN_TANH = cudnn_rnn_ops.CUDNN_RNN_TANH |
| CUDNN_RNN_UNIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION |
| CUDNN_RNN_BIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION |
| |
| CUDNN_LSTM_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_LSTM_PARAMS_PER_LAYER |
| CUDNN_GRU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_GRU_PARAMS_PER_LAYER |
| CUDNN_RNN_TANH_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_TANH_PARAMS_PER_LAYER |
| CUDNN_RNN_RELU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_RELU_PARAMS_PER_LAYER |
| |
| |
| class CudnnTestModel(object): |
| """Model with convenient APIs for easier building and running test graph. |
| |
| The graph built is used by all tests below to avoid repeatedly building |
| similar test graphs. |
| """ |
| |
| def __init__(self, |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=CUDNN_RNN_UNIDIRECTION, |
| dropout=0., |
| dtype=dtypes.float32, |
| training=False, |
| seed=None, |
| kernel_initializer=None, |
| bias_initializer=None): |
| if dtype not in (dtypes.float16, dtypes.float32, dtypes.float64): |
| raise ValueError("Invalid dtype: %s" % dtype) |
| self._dtype = dtype |
| |
| self._inputs = array_ops.placeholder( |
| dtype=dtype, shape=[None, None, input_size], name="inputs") |
| h = array_ops.placeholder( |
| dtype=dtype, shape=[None, None, num_units], name="h") |
| c = array_ops.placeholder( |
| dtype=dtype, shape=[None, None, num_units], name="c") |
| if rnn_mode == CUDNN_LSTM: |
| model_fn = cudnn_rnn.CudnnLSTM |
| self._initial_state = (h, c) |
| elif rnn_mode == CUDNN_GRU: |
| model_fn = cudnn_rnn.CudnnGRU |
| self._initial_state = (h,) |
| elif rnn_mode == CUDNN_RNN_TANH: |
| model_fn = cudnn_rnn.CudnnRNNTanh |
| self._initial_state = (h,) |
| elif rnn_mode == CUDNN_RNN_RELU: |
| model_fn = cudnn_rnn.CudnnRNNRelu |
| self._initial_state = (h,) |
| else: |
| raise ValueError("Invalid rnn_mode: %s" % rnn_mode) |
| self._rnn = model_fn( |
| num_layers, |
| num_units, |
| direction=direction, |
| dropout=dropout, |
| dtype=dtype, |
| seed=seed, |
| kernel_initializer=kernel_initializer, |
| bias_initializer=bias_initializer) |
| self._rnn.build([None, None, input_size]) |
| |
| self._outputs, self._output_state = self._rnn( |
| self._inputs, initial_state=self._initial_state, training=training) |
| |
| def _AddUp(self, outputs, output_state): |
| total = math_ops.reduce_sum(outputs) |
| for s in output_state: |
| total += math_ops.reduce_sum(s) |
| return total |
| |
| @property |
| def inputs(self): |
| return self._inputs |
| |
| @property |
| def initial_state(self): |
| return self._initial_state |
| |
| @property |
| def outputs(self): |
| return self._outputs |
| |
| @property |
| def output_state(self): |
| return self._output_state |
| |
| @property |
| def rnn(self): |
| return self._rnn |
| |
| @property |
| def total_sum(self): |
| return self._AddUp(self.outputs, self.output_state) |
| |
| def SynthesizeInput(self, seq_length, batch_size, seed=1234): |
| """Synthesizes input and initial state values for testing.""" |
| np.random.seed(seed) |
| num_layers = self._rnn.num_layers |
| dir_count = self._rnn.num_dirs |
| num_units = self._rnn.num_units |
| input_size = self._rnn.input_size |
| |
| np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64 |
| inputs = np.random.randn(seq_length, batch_size, |
| input_size).astype(np_dtype) |
| input_h = np.random.randn(num_layers * dir_count, batch_size, |
| num_units).astype(np_dtype) |
| if self._rnn.rnn_mode == CUDNN_LSTM: |
| input_c = np.random.randn(num_layers * dir_count, batch_size, |
| num_units).astype(np_dtype) |
| initial_state = (input_h, input_c) |
| else: |
| initial_state = (input_h,) |
| return inputs, initial_state |
| |
| def ZeroState(self, batch_size): |
| num_layers = self._rnn.num_layers |
| dir_count = self._rnn.num_dirs |
| num_units = self._rnn.num_units |
| |
| np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64 |
| input_h = np.zeros((num_layers * dir_count, batch_size, |
| num_units)).astype(np_dtype) |
| if self._rnn.rnn_mode == CUDNN_LSTM: |
| input_c = np.zeros((num_layers * dir_count, batch_size, |
| num_units)).astype(np_dtype) |
| initial_state = (input_h, input_c) |
| else: |
| initial_state = (input_h,) |
| return initial_state |
| |
| def FProp(self, inputs_t, initial_state_t, training): |
| """Builds additional subgraph with given inputs and state. |
| |
| Args: |
| inputs_t: a tensor. |
| initial_state_t: a tensor. |
| training: boolean, true if training mode. |
| Returns: |
| A tensor of the forward pass output of the model. |
| """ |
| outputs, output_state = self._rnn( |
| inputs_t, initial_state=initial_state_t, training=training) |
| return self._AddUp(outputs, output_state) |
| |
| def Feed(self, sess, inputs, initial_state=None, return_sum=True): |
| """Runs graph with given inputs and initial state.""" |
| batch_size = inputs.shape[1] |
| if initial_state is None: |
| initial_state = self.ZeroState(batch_size) |
| if return_sum: |
| return sess.run( |
| self.total_sum, |
| feed_dict={self.inputs: inputs, |
| self.initial_state: initial_state}) |
| else: |
| return sess.run( |
| [self.outputs, self.output_state], |
| feed_dict={self.inputs: inputs, |
| self.initial_state: initial_state}) |
| |
| |
| def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None): |
| mode = rnn.rnn_mode |
| num_units = rnn.num_units |
| num_layers = rnn.num_layers |
| |
| # To reuse cuDNN-trained models, must use cudnn compatible rnn cells. |
| if mode == CUDNN_LSTM: |
| single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units) |
| elif mode == CUDNN_GRU: |
| single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units) |
| elif mode == CUDNN_RNN_TANH: |
| single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh)) |
| elif mode == CUDNN_RNN_RELU: |
| single_cell = ( |
| lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu)) |
| else: |
| raise ValueError("%s is not supported!" % mode) |
| |
| if not is_bidi: |
| cell = rnn_cell_impl.MultiRNNCell( |
| [single_cell() for _ in range(num_layers)]) |
| return rnn_lib.dynamic_rnn( |
| cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope) |
| else: |
| cells_fw = [single_cell() for _ in range(num_layers)] |
| cells_bw = [single_cell() for _ in range(num_layers)] |
| |
| (outputs, output_state_fw, |
| output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn( |
| cells_fw, |
| cells_bw, |
| inputs, |
| dtype=dtypes.float32, |
| time_major=True, |
| scope=scope) |
| return outputs, (output_state_fw, output_state_bw) |
| |
| |
| class CudnnRNNTestBasic(test_util.TensorFlowTestCase): |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testLayerBasic(self): |
| num_layers = 4 |
| num_units = 2 |
| batch_size = 8 |
| direction = CUDNN_RNN_UNIDIRECTION |
| dir_count = 1 |
| |
| with vs.variable_scope("main"): |
| kernel_initializer = init_ops.constant_initializer(0.) |
| bias_initializer = init_ops.constant_initializer(0.) |
| inputs = random_ops.random_uniform([ |
| num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) |
| |
| lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, |
| direction=direction, |
| kernel_initializer=kernel_initializer, |
| bias_initializer=bias_initializer, |
| name="awesome_lstm") |
| |
| # Build the layer |
| outputs1, _ = lstm(inputs) |
| # Reuse the layer |
| outputs2, _ = lstm(inputs) |
| |
| total_sum1 = math_ops.reduce_sum(outputs1) |
| total_sum2 = math_ops.reduce_sum(outputs2) |
| |
| with vs.variable_scope("main", reuse=True): |
| lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, |
| direction=direction, |
| kernel_initializer=kernel_initializer, |
| bias_initializer=bias_initializer, |
| name="awesome_lstm") |
| |
| # Reuse the layer |
| outputs3, _ = lstm(inputs) |
| total_sum3 = math_ops.reduce_sum(outputs3) |
| |
| self.assertEqual(1, len(variables.trainable_variables())) |
| self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS))) |
| self.assertEqual("main/awesome_lstm/opaque_kernel", |
| variables.trainable_variables()[0].op.name) |
| |
| with self.test_session(use_gpu=True) as sess: |
| sess.run(variables.global_variables_initializer()) |
| (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run( |
| [total_sum1, total_sum2, total_sum3]) |
| self.assertEqual(0, total_sum1_v) |
| self.assertEqual(0, total_sum2_v) |
| self.assertEqual(0, total_sum3_v) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testOptimizersSupport(self): |
| for opt in ("adagrad", "adam", "rmsprop", "momentum", "sgd"): |
| self._TestOptimizerSupportHelper(opt) |
| |
| def _GetOptimizer(self, opt): |
| if opt == "adagrad": |
| return adagrad.AdagradOptimizer(learning_rate=1e-2) |
| elif opt == "adam": |
| return adam.AdamOptimizer(learning_rate=1e-2) |
| elif opt == "rmsprop": |
| return rmsprop.RMSPropOptimizer(learning_rate=1e-2) |
| elif opt == "momentum": |
| return momentum.MomentumOptimizer(learning_rate=1e-2, momentum=0.9) |
| elif opt == "sgd": |
| return gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) |
| else: |
| raise ValueError("Unsupported optimizer: %s" % opt) |
| |
| def _TestOptimizerSupportHelper(self, opt): |
| num_layers = 4 |
| num_units = 2 |
| batch_size = 8 |
| direction = CUDNN_RNN_UNIDIRECTION |
| dir_count = 1 |
| |
| with ops.Graph().as_default() as g: |
| kernel_initializer = init_ops.constant_initializer(0.) |
| bias_initializer = init_ops.constant_initializer(0.) |
| inputs = random_ops.random_uniform([ |
| num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) |
| |
| lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, |
| direction=direction, |
| kernel_initializer=kernel_initializer, |
| bias_initializer=bias_initializer, |
| name="awesome_lstm") |
| outputs, _ = lstm(inputs) |
| loss = math_ops.reduce_sum(outputs) |
| optimizer = self._GetOptimizer(opt) |
| train_op = optimizer.minimize(loss) |
| |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| sess.run(variables.global_variables_initializer()) |
| sess.run(train_op) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSaveableGraphDeviceAssignment(self): |
| num_layers = 4 |
| num_units = 2 |
| batch_size = 8 |
| direction = CUDNN_RNN_UNIDIRECTION |
| dir_count = 1 |
| |
| def DeviceFn(op): |
| if op.type in ("Variable", "VariableV2"): |
| return "/cpu:0" |
| else: |
| return "/gpu:0" |
| |
| with ops.Graph().as_default() as g: |
| with ops.device(DeviceFn): |
| with vs.variable_scope("main"): |
| kernel_initializer = init_ops.constant_initializer(3.14) |
| bias_initializer = init_ops.constant_initializer(1.59) |
| inputs = random_ops.random_uniform( |
| [num_layers * dir_count, batch_size, num_units], |
| dtype=dtypes.float32) |
| |
| lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, |
| direction=direction, |
| kernel_initializer=kernel_initializer, |
| bias_initializer=bias_initializer, |
| name="awesome_lstm") |
| outputs = lstm(inputs) |
| |
| # saver is created in the scope of DeviceFn. |
| saver = saver_lib.Saver() |
| |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| save_path = os.path.join(self.get_temp_dir(), |
| "test-saveable-device-assignment") |
| sess.run(variables.global_variables_initializer()) |
| |
| saver.save(sess, save_path) |
| saver.restore(sess, save_path) |
| sess.run(outputs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testDifferentShapesEager(self): |
| # Checks that kernel caching does not cause sharing of temporary storage |
| # across different input shapes when executing eagerly. |
| with context.eager_mode(): |
| with ops.device("gpu:0"): |
| first_output, _ = cudnn_rnn.CudnnGRU(1, 100)( |
| array_ops.zeros([28, 100, 28])) |
| second_output, _ = cudnn_rnn.CudnnGRU(1, 100)( |
| array_ops.zeros([28, 100, 100])) |
| self.assertAllEqual([28, 100, 100], first_output.shape) |
| self.assertAllEqual([28, 100, 100], second_output.shape) |
| |
| def _LossFunc(): |
| first_output, _ = cudnn_rnn.CudnnGRU(1, 100)( |
| array_ops.zeros([28, 100, 28])) |
| second_output, _ = cudnn_rnn.CudnnGRU(1, 100)( |
| array_ops.zeros([28, 100, 100])) |
| return (math_ops.reduce_sum(first_output) + |
| math_ops.reduce_sum(second_output)) |
| |
| backprop.implicit_grad(_LossFunc)() |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testDifferentShapesGraph(self): |
| # Tests that a single kernel instance presented with multiple input shapes |
| # does not crash with graph execution. |
| with ops.device("gpu:0"): |
| layer = cudnn_rnn.CudnnGRU(1, 100) |
| layer(array_ops.zeros([28, 100, 100])) |
| |
| def _Cond(index, accumulation): |
| del accumulation # unused |
| return math_ops.less(index, 4) |
| |
| def _Body(index, accumulation): |
| layer_input = accumulation[:, :, 10 * (1 + index % 2):] |
| output, _ = layer(layer_input) |
| return index + 1, accumulation + output |
| |
| original_input = array_ops.zeros([28, 100, 100]) |
| _, accumulation = control_flow_ops.while_loop(_Cond, _Body, |
| [0, original_input]) |
| grad, = gradients.gradients( |
| math_ops.reduce_sum(accumulation), (original_input,)) |
| init_op = variables.global_variables_initializer() |
| with self.test_session() as sess: |
| sess.run(init_op) |
| accumulation_eval, grad_eval = sess.run((accumulation, grad)) |
| self.assertAllEqual([28, 100, 100], accumulation_eval.shape) |
| self.assertAllEqual([28, 100, 100], grad_eval.shape) |
| |
| |
| # TODO(jamesqin): Transform to parameterized test after it is included in the |
| # TF open source codebase. |
| class CudnnRNNTestSaveRestore(test_util.TensorFlowTestCase): |
| |
| def _CompareWeights(self, lhs, rhs): |
| self.assertEqual(len(lhs), len(rhs)) |
| for lw, rw in zip(lhs, rhs): |
| self.assertAllEqual(lw, rw) |
| |
| def _CompareBiases(self, lhs, rhs, rnn_mode, num_layers, direction): |
| self.assertEqual(len(lhs), len(rhs)) |
| if rnn_mode == CUDNN_LSTM: |
| num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER |
| elif rnn_mode == CUDNN_GRU: |
| num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER |
| elif rnn_mode == CUDNN_RNN_TANH: |
| num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER |
| else: |
| num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER |
| num_dirs = 1 if direction == CUDNN_RNN_UNIDIRECTION else 2 |
| num_params_per_layer *= num_dirs |
| self.assertEqual(num_params_per_layer * num_layers, len(lhs)) |
| |
| for i in range(num_layers): |
| layer_lhs = lhs[i * num_params_per_layer: (i+1) * num_params_per_layer] |
| layer_rhs = rhs[i * num_params_per_layer: (i+1) * num_params_per_layer] |
| if direction == CUDNN_RNN_UNIDIRECTION: |
| self._CompareSingleLayerBiases(layer_lhs, layer_rhs) |
| else: |
| size = len(layer_lhs) |
| fw_lhs, bw_lhs = layer_lhs[:size//2], layer_lhs[size//2:] |
| fw_rhs, bw_rhs = layer_rhs[:size//2], layer_rhs[size//2:] |
| self._CompareSingleLayerBiases(fw_lhs, fw_rhs) |
| self._CompareSingleLayerBiases(bw_lhs, bw_rhs) |
| |
| def _CompareSingleLayerBiases(self, lhs, rhs): |
| self.assertEqual(len(lhs), len(rhs)) |
| |
| lf_lhs, rt_lhs = lhs[:len(lhs)//2], lhs[len(lhs)//2:] |
| lf_rhs, rt_rhs = rhs[:len(rhs)//2], rhs[len(rhs)//2:] |
| self.assertEqual(len(lf_lhs), len(rt_lhs)) |
| self.assertEqual(len(lf_rhs), len(rt_rhs)) |
| |
| sum_lhs, sum_rhs = [], [] |
| for lf, rt in zip(lf_lhs, rt_lhs): |
| sum_lhs.append(lf + rt) |
| for lf, rt in zip(lf_rhs, rt_rhs): |
| sum_rhs.append(lf + rt) |
| self.assertEqual(len(sum_lhs), len(sum_rhs)) |
| for lf, rt in zip(sum_lhs, sum_rhs): |
| self.assertAllEqual(lf, rt) |
| |
| def _TestSaveRestoreVariable(self, rnn_mode, direction, dtype): |
| input_size = 3 |
| num_layers = 2 |
| num_units = 7 |
| with ops.Graph().as_default() as g: |
| random_seed.set_random_seed(1234) |
| model = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dtype=dtype) |
| rnn = model.rnn |
| save_path = os.path.join(self.get_temp_dir(), |
| "save-restore-variable-test") |
| saver = saver_lib.Saver() |
| weights, biases = model.rnn.saveable._OpaqueParamsToCanonical() |
| opaque_params = rnn.trainable_variables[0] |
| # CudnnTestModel() creates CudnnOpaqueParamsSaveable that helps saver save |
| # Cudnn vars in canonical format. |
| reset_op = state_ops.assign( |
| opaque_params, |
| array_ops.zeros(array_ops.shape(opaque_params), dtype=dtype)) |
| # Passing graph explicitly, otherwise an old sess would be reused. |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| sess.run(variables.global_variables_initializer()) |
| val = saver.save(sess, save_path) |
| self.assertEqual(save_path, val) |
| weights_v, biases_v = sess.run([weights, biases]) |
| |
| # Reset opaque param |
| sess.run(reset_op) |
| saver.restore(sess, save_path) |
| weights_v_restored, biases_v_restored = sess.run([weights, biases]) |
| |
| self._CompareWeights(weights_v, weights_v_restored) |
| self._CompareBiases(biases_v, biases_v_restored, rnn_mode, num_layers, |
| direction) |
| |
| def _TestSaveRestoreTwoVariables(self, rnn_mode, direction, dtype): |
| input_size = 3 |
| num_layers = 2 |
| num_units = 7 |
| with ops.Graph().as_default() as g: |
| random_seed.set_random_seed(1234) |
| with vs.variable_scope("m1"): |
| model1 = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dtype=dtype) |
| with vs.variable_scope("m2"): |
| model2 = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dtype=dtype) |
| opaque_params = (model1.rnn.trainable_variables[0], |
| model2.rnn.trainable_variables[0]) |
| weights1, biases1 = model1.rnn.saveable._OpaqueParamsToCanonical() |
| weights2, biases2 = model2.rnn.saveable._OpaqueParamsToCanonical() |
| reset_params = [ |
| state_ops.assign(params, |
| array_ops.zeros_like(params, dtype=dtype)) |
| for params in opaque_params |
| ] |
| reset_op = control_flow_ops.group(*reset_params) |
| save_path = os.path.join(self.get_temp_dir(), |
| "save-restore-variable-test2") |
| saver = saver_lib.Saver() |
| # Passing graph explicitly, otherwise an old sess would be reused. |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| sess.run(variables.global_variables_initializer()) |
| val = saver.save(sess, save_path) |
| self.assertEqual(save_path, val) |
| |
| weights1_v, biases1_v = sess.run([weights1, biases1]) |
| weights2_v, biases2_v = sess.run([weights2, biases2]) |
| |
| sess.run(reset_op) |
| saver.restore(sess, save_path) |
| weights1_v_restored, biases1_v_restored = sess.run([weights1, biases1]) |
| weights2_v_restored, biases2_v_restored = sess.run([weights2, biases2]) |
| |
| self._CompareWeights(weights1_v, weights1_v_restored) |
| self._CompareWeights(weights2_v, weights2_v_restored) |
| self._CompareBiases(biases1_v, biases1_v_restored, rnn_mode, num_layers, |
| direction) |
| self._CompareBiases(biases2_v, biases2_v_restored, rnn_mode, num_layers, |
| direction) |
| |
| def _TestSaveRestoreOutput(self, rnn_mode, direction, dtype): |
| with ops.Graph().as_default() as g: |
| num_layers = 2 |
| num_units = 7 |
| input_size = 7 |
| seq_length = 8 |
| batch_size = 4 |
| model = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dtype=dtype, |
| training=False) |
| rnn = model.rnn |
| |
| save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test") |
| saver = saver_lib.Saver() |
| |
| # Only one opaque var in a cudnn layer. |
| assert len(rnn.trainable_variables) == 1 |
| reset_params = state_ops.assign( |
| rnn.trainable_variables[0], |
| array_ops.zeros( |
| array_ops.shape(rnn.trainable_variables[0]), dtype=dtype)) |
| |
| # Passing graph explicitly, otherwise an old sess would be reused. |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| sess.run(variables.global_variables_initializer()) |
| inputs, initial_state = model.SynthesizeInput(seq_length, batch_size) |
| total_sum_v = model.Feed(sess, inputs, initial_state) |
| val = saver.save(sess, save_path) |
| self.assertEqual(save_path, val) |
| |
| sess.run(reset_params) |
| saver.restore(sess, save_path) |
| total_sum_v_restored = model.Feed(sess, inputs, initial_state) |
| self.assertAllClose(total_sum_v, total_sum_v_restored, atol=1e-5) |
| |
| def _TestSaveRestoreHelper(self, rnn_mode): |
| directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] |
| dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64] |
| for direction, dtype in itertools.product(directions, dtype_list): |
| self._TestSaveRestoreVariable(rnn_mode, direction, dtype) |
| self._TestSaveRestoreTwoVariables(rnn_mode, direction, dtype) |
| self._TestSaveRestoreOutput(rnn_mode, direction, dtype) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSaveRestoreRepeatedlyCreateCustomSaveable(self): |
| input_size = 3 |
| num_layers = 2 |
| num_units = 7 |
| with ops.Graph().as_default(): |
| random_seed.set_random_seed(1234) |
| model = CudnnTestModel( |
| CUDNN_LSTM, |
| num_layers, |
| num_units, |
| input_size, |
| direction=CUDNN_RNN_UNIDIRECTION, |
| dtype=dtypes.float32) |
| with self.assertRaisesRegexp(RuntimeError, |
| "Cudnn saveable already created"): |
| model.rnn._create_saveable() |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSaveRestoreLSTM(self): |
| self._TestSaveRestoreHelper(CUDNN_LSTM) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSaveRestoreGRU(self): |
| self._TestSaveRestoreHelper(CUDNN_GRU) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSaveRestoreRNNTanh(self): |
| self._TestSaveRestoreHelper(CUDNN_RNN_TANH) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSaveRestoreRNNRelu(self): |
| self._TestSaveRestoreHelper(CUDNN_RNN_RELU) |
| |
| |
| class CudnnRNNTestSaveRestoreCheckpointable(test_util.TensorFlowTestCase): |
| |
| def _VerifyCheckpoint( |
| self, checkpoint_path, compatible_cell_fn, cudnn_cell_fn, |
| num_layers, input_size, expected_variable_values, num_applications=3): |
| checkpoint_directory = self.get_temp_dir() |
| checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") |
| with ops.device("gpu:0"): |
| cudnn_layer = cudnn_cell_fn() |
| cudnn_checkpoint = checkpointable_utils.Checkpoint(cell=cudnn_layer) |
| status = cudnn_checkpoint.restore(checkpoint_path) |
| inputs = 3. * array_ops.ones([num_applications, num_layers, input_size], |
| dtype=dtypes.float32) |
| cudnn_output, _ = cudnn_layer(inputs) |
| status.run_restore_ops() |
| second_save_path = cudnn_checkpoint.save(checkpoint_prefix) |
| restore_layer = compatible_cell_fn() |
| restore_layer_checkpoint = checkpointable_utils.Checkpoint( |
| cell=restore_layer) |
| status = restore_layer_checkpoint.restore(second_save_path) |
| current_state = restore_layer.zero_state(1, dtypes.float32) |
| for _ in range(num_applications): |
| restore_layer_output, current_state = restore_layer( |
| inputs=3. * array_ops.ones([1, input_size]), |
| state=current_state) |
| status.run_restore_ops() |
| self.assertTrue(restore_layer.variables) |
| for variable, expected_value in zip( |
| restore_layer.variables, expected_variable_values): |
| self.assertAllClose(expected_value, self.evaluate(variable)) |
| self.assertAllClose(self.evaluate(restore_layer_output), |
| self.evaluate(cudnn_output)[-1, -1:, ...]) |
| |
| def _CheckpointableSingleCellUnidirectionalTestTemplate( |
| self, single_cell_fn, cudnn_cell_fn): |
| # Single-layer cuDNN cells with object-based checkpointing should be |
| # checkpoint compatible with either single CudnnCompatible cells or |
| # MultiRnnCells with one cell. |
| input_size = 3 |
| save_cell_layer = single_cell_fn() |
| save_cell_layer( |
| inputs=array_ops.ones([1, input_size]), |
| state=save_cell_layer.zero_state(1, dtypes.float32)) |
| self.assertTrue(save_cell_layer.variables) |
| expected_values = [] |
| np.random.seed(10) |
| for variable in save_cell_layer.variables: |
| value = np.random.normal(size=variable.shape) |
| expected_values.append(value) |
| self.evaluate(variable.assign(value)) |
| save_checkpoint = checkpointable_utils.Checkpoint(cell=save_cell_layer) |
| checkpoint_directory = self.get_temp_dir() |
| checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") |
| first_save_path = save_checkpoint.save(checkpoint_prefix) |
| self._VerifyCheckpoint( |
| checkpoint_path=first_save_path, |
| compatible_cell_fn= |
| lambda: rnn_cell_impl.MultiRNNCell([single_cell_fn()]), |
| cudnn_cell_fn=cudnn_cell_fn, |
| num_layers=1, |
| expected_variable_values=expected_values, |
| input_size=input_size) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| @test_util.run_in_graph_and_eager_modes |
| def testLSTMCheckpointableSingleLayer(self): |
| num_units = 2 |
| direction = CUDNN_RNN_UNIDIRECTION |
| self._CheckpointableSingleCellUnidirectionalTestTemplate( |
| single_cell_fn=functools.partial( |
| cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units), |
| cudnn_cell_fn=functools.partial( |
| cudnn_rnn.CudnnLSTM, num_layers=1, num_units=num_units, |
| direction=direction, name="awesome_lstm")) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| @test_util.run_in_graph_and_eager_modes |
| def testGRUCheckpointableSingleLayer(self): |
| num_units = 2 |
| direction = CUDNN_RNN_UNIDIRECTION |
| with self.assertRaises(NotImplementedError): |
| # TODO(allenl): Implement object-based saving for GRUs and other cells. |
| self._CheckpointableSingleCellUnidirectionalTestTemplate( |
| single_cell_fn=functools.partial( |
| cudnn_rnn_ops.CudnnCompatibleGRUCell, num_units=num_units), |
| cudnn_cell_fn=functools.partial( |
| cudnn_rnn.CudnnGRU, num_layers=1, num_units=num_units, |
| direction=direction, name="awesome_gru")) |
| |
| def _CheckpointableMultiLayerTestTemplate( |
| self, single_cell_fn, cudnn_cell_fn, num_layers): |
| |
| def _MultiCellFn(): |
| return rnn_cell_impl.MultiRNNCell( |
| [single_cell_fn() for _ in range(num_layers)]) |
| input_size = 3 |
| save_graph = ops.Graph() |
| with save_graph.as_default(), self.session(graph=save_graph): |
| save_layer = _MultiCellFn() |
| save_layer(inputs=array_ops.ones([1, input_size]), |
| state=save_layer.zero_state(1, dtypes.float32)) |
| self.assertTrue(save_layer.variables) |
| expected_values = [] |
| np.random.seed(10) |
| for variable in save_layer.variables: |
| value = np.random.normal(size=variable.shape) |
| expected_values.append(value) |
| self.evaluate(variable.assign(value)) |
| save_checkpoint = checkpointable_utils.Checkpoint(cell=save_layer) |
| checkpoint_directory = self.get_temp_dir() |
| checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") |
| first_save_path = save_checkpoint.save(checkpoint_prefix) |
| self._VerifyCheckpoint( |
| checkpoint_path=first_save_path, |
| compatible_cell_fn=_MultiCellFn, cudnn_cell_fn=cudnn_cell_fn, |
| num_layers=num_layers, |
| expected_variable_values=expected_values, |
| input_size=input_size) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| @test_util.run_in_graph_and_eager_modes |
| def testCudnnCompatibleLSTMCheckpointablMultiLayer(self): |
| num_units = 2 |
| num_layers = 3 |
| direction = CUDNN_RNN_UNIDIRECTION |
| self._CheckpointableMultiLayerTestTemplate( |
| single_cell_fn=functools.partial( |
| cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units), |
| cudnn_cell_fn=functools.partial( |
| cudnn_rnn.CudnnLSTM, num_layers=num_layers, num_units=num_units, |
| direction=direction, name="awesome_lstm"), |
| num_layers=num_layers) |
| |
| |
| # TODO(jamesqin): Transform to parameterized test after it is included in the |
| # TF open source codebase. |
| class CudnnRNNTestCompatibleRNNCells(test_util.TensorFlowTestCase): |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testCudnnCompatibleLSTM(self): |
| self._TestCudnnCompatibleRnnCellsHelper(CUDNN_LSTM) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testCudnnCompatibleGRU(self): |
| self._TestCudnnCompatibleRnnCellsHelper(CUDNN_GRU) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testCudnnCompatibleRNNTanh(self): |
| self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_TANH) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testCudnnCompatibleRNNRelu(self): |
| self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_RELU) |
| |
| def _TestCudnnCompatibleRnnCellsHelper(self, rnn_mode): |
| configs = [ |
| { |
| "num_layers": 1, |
| "seq_length": 3, |
| "num_units": 4, |
| "input_size": 5, |
| "batch_size": 6, |
| }, |
| { |
| "num_layers": 2, |
| "seq_length": 8, |
| "num_units": 4, |
| "input_size": 8, |
| "batch_size": 16, |
| }, |
| { |
| "num_layers": 2, |
| "seq_length": 3, |
| "num_units": 4, |
| "input_size": 5, |
| "batch_size": 6, |
| }, |
| { |
| "num_layers": 1, |
| "seq_length": 2, |
| "num_units": 2, |
| "input_size": 4, |
| "batch_size": 1, |
| }, |
| ] |
| directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] |
| for cfg, direction in zip(configs, directions): |
| self._TestCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"], |
| cfg["num_units"], cfg["input_size"], |
| cfg["batch_size"], rnn_mode, direction) |
| |
| def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, |
| input_size, batch_size, rnn_mode, direction): |
| dtype = dtypes.float32 |
| # Train graph |
| with ops.Graph().as_default() as g: |
| model = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dtype=dtype, |
| training=True) |
| target_output = array_ops.placeholder(dtype=dtype) |
| loss_op = losses.log_loss( |
| labels=target_output, predictions=model.total_sum) |
| optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) |
| train_op = optimizer.minimize(loss_op) |
| |
| saver = saver_lib.Saver() |
| |
| # Train Cudnn model |
| seed = 0 |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| sess.run(variables.global_variables_initializer()) |
| # Train 128 steps |
| num_steps = 128 |
| for _ in range(num_steps): |
| inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed) |
| targets = np.random.rand() |
| sess.run( |
| train_op, |
| feed_dict={ |
| model.inputs: inputs, |
| model.initial_state: model.ZeroState(batch_size), |
| target_output: targets |
| }) |
| seed += 1 |
| |
| save_path = os.path.join(self.get_temp_dir(), |
| ("cudnn-rnn-%s-test" % rnn_mode)) |
| save_v = saver.save(sess, save_path) |
| self.assertEqual(save_path, save_v) |
| |
| # Cudnn inference graph |
| with ops.Graph().as_default() as g: |
| model = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dtype=dtype, |
| training=False) |
| rnn = model.rnn |
| saver = saver_lib.Saver() |
| |
| inference_input = np.random.rand(seq_length, batch_size, |
| input_size).astype(np.float32) |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| sess.run(variables.global_variables_initializer()) |
| saver.restore(sess, save_path) |
| |
| # Cudnn inference |
| cudnn_outputs_v, cudnn_output_states_v = model.Feed( |
| sess, inference_input, return_sum=False) |
| |
| # Canonical RNN inference graph |
| with ops.Graph().as_default() as g: |
| cell_inputs = array_ops.placeholder( |
| dtype, shape=[seq_length, batch_size, input_size]) |
| if direction == CUDNN_RNN_UNIDIRECTION: |
| # outputs is one tensor, states are num_layer tuples, each 2 tensors |
| (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs) |
| if rnn_mode == CUDNN_LSTM: |
| output_h = array_ops.stack([s.h for s in states]) |
| output_c = array_ops.stack([s.c for s in states]) |
| else: |
| output_state = array_ops.stack([s for s in states]) |
| else: |
| # outputs is one tensor. |
| # states is a tuple of 2 tuples: |
| # each sub tuple is num_layer tuples, each with 2 tensors. |
| (outputs, states) = _CreateCudnnCompatibleCanonicalRNN( |
| rnn, cell_inputs, is_bidi=True) |
| output_state_fw, output_state_bw = states |
| if rnn_mode == CUDNN_LSTM: |
| output_h, output_c = [], [] |
| for s_fw, s_bw in zip(output_state_fw, output_state_bw): |
| output_h.append(array_ops.stack([s_fw.h, s_bw.h])) |
| output_c.append(array_ops.stack([s_fw.c, s_bw.c])) |
| output_h = array_ops.concat(output_h, axis=0) |
| output_c = array_ops.concat(output_c, axis=0) |
| else: |
| output_state = [] |
| for s_fw, s_bw in zip(output_state_fw, output_state_bw): |
| output_state.append(array_ops.stack([s_fw, s_bw])) |
| output_state = array_ops.concat(output_state, axis=0) |
| saver = saver_lib.Saver() |
| |
| with self.test_session(use_gpu=True, graph=g) as sess: |
| saver.restore(sess, save_path) |
| |
| # BlockCell inference |
| if rnn_mode == CUDNN_LSTM: |
| outputs_v, output_h_v, output_c_v = sess.run( |
| [outputs, output_h, output_c], |
| feed_dict={cell_inputs: inference_input}) |
| self.assertAllClose(cudnn_outputs_v, outputs_v) |
| cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v |
| self.assertAllClose(cudnn_output_h_v, output_h_v) |
| self.assertAllClose(cudnn_output_c_v, output_c_v) |
| else: |
| outputs_v, output_state_v = sess.run( |
| [outputs, output_state], |
| feed_dict={cell_inputs: inference_input}) |
| self.assertAllClose(cudnn_outputs_v, outputs_v, atol=2e-5, rtol=2e-5) |
| (cudnn_output_h_v,) = cudnn_output_states_v |
| self.assertAllClose(cudnn_output_h_v, output_state_v, atol=2e-5, |
| rtol=2e-5) |
| |
| |
| class CudnnRNNTestParamsSize(test_util.TensorFlowTestCase): |
| |
| def _TestOpaqueParamsSize(self, rnn_mode, num_layers, num_units, input_size, |
| dtype, direction): |
| logging.info("Testing one lstm param size with config: %s", locals()) |
| model = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| dtype=dtype, |
| direction=direction) |
| rnn = model.rnn |
| |
| # Min param size estimate = sum(weights.size) + sum(biases.size) |
| min_params_size = ( |
| np.sum(map(np.prod, rnn.canonical_weight_shapes)) + |
| np.sum([sp[0] for sp in rnn.canonical_bias_shapes])) |
| |
| opaque_params = rnn.trainable_variables[0] |
| with self.test_session(use_gpu=True, graph=ops.get_default_graph()): |
| variables.global_variables_initializer().run() |
| opaque_params_size_v = opaque_params.eval().size |
| self.assertLessEqual(min_params_size, opaque_params_size_v) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testOpaqueParamsSize(self): |
| test_configs = [ |
| [4, 200, 200], |
| [4, 200, 300], |
| [4, 200, 100], |
| [1, 100, 200], |
| [2, 200, 100], |
| [3, 200, 400], |
| ] |
| directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] |
| dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64] |
| rnns = [CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH] |
| for (rnn, config, dtype, direction) in itertools.product( |
| rnns, test_configs, dtype_list, directions): |
| num_layers, num_units, input_size = config |
| with ops.Graph().as_default(): |
| self._TestOpaqueParamsSize(rnn, num_layers, num_units, input_size, |
| dtype, direction) |
| |
| |
| class CudnnRNNTestTraining(test_util.TensorFlowTestCase): |
| |
| def setUp(self): |
| super(CudnnRNNTestTraining, self).setUp() |
| self._reset_rnd_gen_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE", |
| str(False)) |
| self._rnn_use_v2 = os.environ.get("TF_CUDNN_RNN_USE_V2", "0") |
| |
| def tearDown(self): |
| super(CudnnRNNTestTraining, self).tearDown() |
| os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = self._reset_rnd_gen_state |
| os.environ["TF_CUDNN_RNN_USE_V2"] = self._rnn_use_v2 |
| |
| def _ComputeNumericGrad(self, sess, y, x, delta=1e-4, step=1): |
| """Compute the numeric gradient of y wrt to x. |
| |
| Args: |
| sess: The TF session constructed with a graph containing x and y. |
| y: A scalar TF Tensor in the graph constructed in sess. |
| x: A TF Tensor in the graph constructed in sess. |
| delta: Gradient checker's small perturbation of x[i]. |
| step: Only compute numerical gradients for a subset of x values. |
| I.e. dy/dx[i] is computed if i % step == 0. |
| Returns: |
| A Tensor of the same shape and dtype as x. If x[i] is not chosen |
| to compute the numerical gradient dy/x[i], the corresponding |
| value is set to 0. |
| """ |
| |
| x_data = sess.run(x) |
| x_size = x_data.size |
| x_shape = x_data.shape |
| |
| numeric_grad = np.zeros(x_size, dtype=x_data.dtype) |
| |
| for i in range(0, x_size, step): |
| x_pos = x_data.copy() |
| if x_size == 1: |
| x_pos += delta |
| else: |
| x_pos.flat[i] += delta |
| y_pos_feed_dict = dict([(x.name, x_pos)]) |
| y_pos = sess.run(y, feed_dict=y_pos_feed_dict) |
| |
| x_neg = x_data.copy() |
| if x_size == 1: |
| x_neg -= delta |
| else: |
| x_neg.flat[i] -= delta |
| y_neg_feed_dict = dict([(x.name, x_neg)]) |
| y_neg = sess.run(y, feed_dict=y_neg_feed_dict) |
| numeric_grad[i] = (y_pos - y_neg) / (2 * delta) |
| return numeric_grad.reshape(x_shape) |
| |
| def _GetShape(self, sess, inputs): |
| if not isinstance(inputs, collections.Iterable): |
| return sess.run(array_ops.shape(inputs)) |
| else: |
| return sess.run([array_ops.shape(x) for x in inputs]) |
| |
| def _GradientCheckFp16(self, sess, y, xs, num_samples, |
| tolerance=1e-6, delta=1e-4): |
| """Gradient check for Fp16. |
| |
| Fp16 numerical gradients end up being zeros. Use a new way to check |
| gradients: |
| |
| Given multi-variant function: |
| y = f(x1, x2, ... xn) |
| delta_y = f(x1 + delta_x1, x2+delta_x2, ..., xn+delta_xn) - |
| f(x1, x2, ..., xn) |
| = f'(x1) * delta_x1 + f'(x2) * delta_x2 + .. + f'(xn) * delta_xn |
| where: |
| delta_xi are very small disturbance. |
| f'(xi) is the gradient of y w.r.t xi. |
| |
| The gradient check verifies the expected delta_y calculated by the above |
| equation is close to the actual delta_y. |
| Args: |
| sess: tf.Session object. |
| y: output tensor. |
| xs: a tensor or a list of input tensors. |
| num_samples: number of test samples to run. |
| tolerance: error tolerance. |
| delta: the order of magnititued of input disturbance to apply to calculate |
| the output change w.r.t inputs. |
| """ |
| sym_grads = self._ComputeSymGrads(sess, y, xs) |
| xs_shapes = self._GetShape(sess, xs) |
| |
| x_vals = [sess.run(x) for x in xs] |
| for _ in range(num_samples): |
| delta_xs = [delta * np.random.rand(*shape.tolist()) |
| for shape in xs_shapes] |
| |
| feed_dict = {} |
| for x, x_val, delta_x in zip(xs, x_vals, delta_xs): |
| feed_dict[x] = x_val + delta_x |
| actual_delta_y = (float(sess.run(y, feed_dict=feed_dict)) - |
| float(sess.run(y))) |
| |
| expected_delta_y = 0. |
| for sym_grad, delta_x in zip(sym_grads, delta_xs): |
| expected_delta_y += np.dot( |
| sym_grad.astype(np.float32).flatten(), |
| delta_x.astype(np.float32).flatten()) |
| self.assertAllClose(expected_delta_y, actual_delta_y, |
| atol=tolerance, rtol=tolerance) |
| |
| def _GradientCheck(self, sess, y, xs, tolerance=1e-6, delta=1e-4): |
| sym_grads = self._ComputeSymGrads(sess, y, xs) |
| |
| num_grads = [self._ComputeNumericGrad(sess, y, x, delta) for x in xs] |
| self.assertEqual(len(sym_grads), len(num_grads)) |
| for sym, num in zip(sym_grads, num_grads): |
| self.assertFalse(np.any(np.isnan(sym))) |
| self.assertFalse(np.any(np.isnan(num))) |
| self.assertAllClose(sym, num, atol=tolerance, rtol=tolerance) |
| |
| def _ComputeSymGrads(self, sess, y, xs): |
| sym_grads_t = gradients.gradients(y, xs) |
| return sess.run(sym_grads_t) |
| |
| def _TestOneSimpleTraining(self, rnn_mode, num_layers, num_units, input_size, |
| batch_size, seq_length, dir_count, dropout, dtype, |
| use_v2, delta, tolerance): |
| # Gradient checking runs two forward ops with almost the same input. Need to |
| # make sure the drop patterns across the two runs are the same. |
| logging.info("Training test with config: %s", locals()) |
| os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True) |
| |
| np.random.seed(1234) |
| random_seed.set_random_seed(5678) |
| has_input_c = (rnn_mode == CUDNN_LSTM) |
| direction = (CUDNN_RNN_UNIDIRECTION |
| if dir_count == 1 else CUDNN_RNN_BIDIRECTION) |
| if use_v2: |
| os.environ["TF_CUDNN_RNN_USE_V2"] = "1" |
| else: |
| os.environ["TF_CUDNN_RNN_USE_V2"] = "0" |
| model = CudnnTestModel( |
| rnn_mode, |
| num_layers, |
| num_units, |
| input_size, |
| direction=direction, |
| dropout=dropout, |
| dtype=dtype, |
| training=True, |
| bias_initializer=init_ops.random_normal_initializer( |
| mean=1., dtype=dtype)) |
| rnn = model.rnn |
| params = rnn.trainable_variables[0] |
| |
| inputs = variables.Variable( |
| random_ops.random_uniform( |
| [seq_length, batch_size, input_size], dtype=dtype), |
| dtype=dtype) |
| input_h = variables.Variable( |
| random_ops.random_uniform( |
| [num_layers * dir_count, batch_size, num_units], dtype=dtype), |
| dtype=dtype) |
| if has_input_c: |
| input_c = variables.Variable( |
| random_ops.random_uniform( |
| [num_layers * dir_count, batch_size, num_units], dtype=dtype), |
| dtype=dtype) |
| initial_state = (input_h, input_c) |
| else: |
| initial_state = (input_h,) |
| total_sum = model.FProp(inputs, initial_state, training=True) |
| |
| with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: |
| sess.run(variables.global_variables_initializer()) |
| all_inputs = [inputs, params] |
| for s in initial_state: |
| all_inputs.append(s) |
| if dtype == dtypes.float16: |
| self._GradientCheckFp16( |
| sess, total_sum, all_inputs, |
| num_samples=FLAGS.grad_check_num_samples, |
| tolerance=tolerance, delta=delta) |
| else: |
| for _ in range(FLAGS.grad_check_num_samples): |
| # Each time choose a different set of inputs. |
| sess.run(variables.global_variables_initializer()) |
| self._GradientCheck( |
| sess, total_sum, all_inputs, |
| tolerance=tolerance, delta=delta) |
| |
| def _TestSimpleTrainingHelper(self, rnn_mode, test_configs): |
| dropouts = [0, 0.5, 1.] |
| v2_options = [str(False), str(True)] |
| for config, dropout, use_v2 in itertools.product(test_configs, dropouts, |
| v2_options): |
| dtype = config.get("dtype", dtypes.float32) |
| delta = config.get("delta", 1e-4) |
| tolerance = config.get("tolerance", 1e-6) |
| dir_count = config.get("dir_count", 1) |
| shape = config["shape"] |
| with ops.Graph().as_default(): |
| self._TestOneSimpleTraining( |
| rnn_mode, shape["num_layers"], shape["num_units"], |
| shape["input_size"], shape["batch_size"], shape["seq_length"], |
| dir_count, dropout, dtype, use_v2, delta, tolerance) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingLSTMFp64(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float64, |
| "tolerance": 5e-6, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingLSTMFp32(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float32, |
| "delta": 1e-4, |
| "tolerance": 9e-2, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingLSTMFp16(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float16, |
| "delta": 1e-3, |
| "tolerance": 9e-2, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| { |
| "dtype": dtypes.float16, |
| "delta": 1e-2, |
| "tolerance": 9e-2, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 6, |
| "input_size": 8, |
| "batch_size": 6, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingGRUFp64(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float64, |
| "tolerance": 5e-6, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| } |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingGRUFp32(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float32, |
| "delta": 1e-3, |
| "tolerance": 4e-3, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingGRUFp16(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float16, |
| "delta": 2e-3, |
| "tolerance": 6e-2, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingRNNTanhFp64(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float64, |
| "tolerance": 5e-6, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingRNNTanhFp32(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float32, |
| "delta": 1e-3, |
| "tolerance": 5e-3, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingRNNTanhFp16(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float16, |
| "delta": 1e-3, |
| "tolerance": 5e-2, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingRNNReluFp64(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float64, |
| "tolerance": 5e-6, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingRNNReluFp32(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float32, |
| "delta": 1e-4, |
| "tolerance": 3e-1, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) |
| |
| @unittest.skipUnless(test.is_built_with_cuda(), |
| "Test only applicable when running on GPUs") |
| def testSimpleTrainingRNNReluFp16(self): |
| test_configs = [ |
| { |
| "dtype": dtypes.float16, |
| "delta": 1e-3, |
| "tolerance": 7e-2, |
| "shape": { |
| "num_layers": 2, |
| "num_units": 3, |
| "input_size": 4, |
| "batch_size": 3, |
| "seq_length": 4, |
| }, |
| }, |
| ] |
| self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) |
| |
| |
| if __name__ == "__main__": |
| argv0 = sys.argv[0] |
| parser = argparse.ArgumentParser() |
| parser.add_argument( |
| "--grad_check_num_samples", |
| type=int, |
| default=5, |
| help="Number of samples to run for gradient check.") |
| FLAGS, unparsed = parser.parse_known_args() |
| sys.argv = [argv0] + unparsed |
| googletest.main() |