| # Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import functools |
| |
| from absl.testing import parameterized |
| import numpy as np |
| |
| from tensorflow.python import pywrap_tfe |
| from tensorflow.python.eager import backprop |
| from tensorflow.python.eager import context |
| from tensorflow.python.eager import def_function |
| from tensorflow.python.eager import function |
| from tensorflow.python.eager import tape as tape_lib |
| from tensorflow.python.eager import test |
| from tensorflow.python.framework import constant_op |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import errors_impl |
| from tensorflow.python.framework import ops |
| from tensorflow.python.framework import sparse_tensor |
| from tensorflow.python.framework import tensor_shape |
| from tensorflow.python.framework import tensor_util |
| from tensorflow.python.framework import test_util |
| from tensorflow.python.framework.memory_checker import MemoryChecker |
| from tensorflow.python.layers.pooling import max_pooling3d |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import control_flow_ops |
| from tensorflow.python.ops import custom_gradient |
| from tensorflow.python.ops import embedding_ops |
| from tensorflow.python.ops import functional_ops |
| from tensorflow.python.ops import gradients |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops import nn |
| from tensorflow.python.ops import nn_grad # pylint: disable=unused-import |
| from tensorflow.python.ops import nn_ops |
| from tensorflow.python.ops import random_ops |
| from tensorflow.python.ops import resource_variable_ops |
| from tensorflow.python.ops import sparse_ops |
| from tensorflow.python.ops import variables |
| from tensorflow.python.training import training |
| |
| |
| class BackpropTest(test.TestCase, parameterized.TestCase): |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testAggregateGradients(self): |
| |
| def fn(x): |
| ind1 = constant_op.constant(np.array([0, 1])) |
| ind2 = constant_op.constant(np.array([2, 3])) |
| ind3 = constant_op.constant(np.array([1, 3])) |
| g1 = embedding_ops.embedding_lookup(x, ind1) |
| g2 = embedding_ops.embedding_lookup(x, ind2) |
| g3 = embedding_ops.embedding_lookup(x, ind3) |
| return g1 * g2 * g3 |
| |
| var_np = np.random.rand(4, 2).astype(np.float32) |
| var = constant_op.constant(var_np) |
| grad = backprop.gradients_function(fn, [0])(var)[0] |
| grad = self.evaluate(ops.convert_to_tensor(grad)) |
| |
| if not context.executing_eagerly(): |
| tf_var = array_ops.constant(var_np, dtypes.float32) |
| tf_ind1 = array_ops.constant([0, 1]) |
| tf_ind2 = array_ops.constant([2, 3]) |
| tf_ind3 = array_ops.constant([1, 3]) |
| tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) |
| tf_g2 = embedding_ops.embedding_lookup(tf_var, tf_ind2) |
| tf_g3 = embedding_ops.embedding_lookup(tf_var, tf_ind3) |
| tf_y = tf_g1 * tf_g2 * tf_g3 |
| tf_grad = gradients.gradients(tf_y, [tf_var])[0] |
| |
| tf_dense_grad = math_ops.unsorted_segment_sum(tf_grad.values, |
| tf_grad.indices, |
| tf_grad.dense_shape[0]) |
| |
| self.assertAllClose(grad, self.evaluate(tf_dense_grad)) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testAggregateGradientsWithTensor(self): |
| |
| def fn(x): |
| ind1 = constant_op.constant(np.array([0, 1])) |
| # A mixture of IndexedSlices and dense tensor to aggregate. |
| g1 = embedding_ops.embedding_lookup(x, ind1) |
| g2 = math_ops.reduce_sum(x * constant_op.constant(2.0)) |
| return g1 * g2 |
| |
| var_np = np.random.rand(4, 2).astype(np.float32) |
| var = constant_op.constant(var_np) |
| grad = backprop.gradients_function(fn, [0])(var)[0] |
| grad = self.evaluate(ops.convert_to_tensor(grad)) |
| |
| if not context.executing_eagerly(): |
| tf_var = array_ops.constant(var_np, dtypes.float32) |
| tf_ind1 = array_ops.constant([0, 1]) |
| tf_g1 = embedding_ops.embedding_lookup(tf_var, tf_ind1) |
| tf_g2 = math_ops.reduce_sum(tf_var * 2.0, axis=(0, 1)) |
| tf_y = tf_g1 * tf_g2 |
| tf_grad = gradients.gradients(tf_y, [tf_var])[0] |
| |
| self.assertAllClose(grad, tf_grad) |
| |
| def testImplicitGradWithResourceVariable(self): |
| x = resource_variable_ops.ResourceVariable( |
| initial_value=constant_op.constant(1.0), name='x') |
| |
| def fn(): |
| b = constant_op.constant(2.0) |
| c = math_ops.add(x.value(), b) |
| return math_ops.add(c, constant_op.constant(3.0)) |
| |
| grads_and_vars = backprop.implicit_grad(fn)() |
| self.assertAllEqual(grads_and_vars[0][0], 1.0) |
| self.assertAllEqual(id(grads_and_vars[0][1]), id(x)) |
| |
| @parameterized.named_parameters([('Function', def_function.function), |
| ('NoFunction', lambda f: f)]) |
| def testNoOpBehaviorConsistent(self, decorator): |
| |
| @decorator |
| def f(x): |
| # Test all different types of no-ops |
| x1 = array_ops.identity(x) |
| x2 = math_ops.add_v2(x, 0) |
| x3 = math_ops.subtract(x, 0) |
| x4 = math_ops.multiply(x, 1) |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| t.watch(x1) |
| t.watch(x2) |
| t.watch(x3) |
| t.watch(x4) |
| y1 = x * 2. |
| y2 = x1 * 3. |
| y3 = x2 * 3. |
| y4 = x3 * 3. |
| y5 = x4 * 3. |
| loss = y1 + y2 + y3 + y4 + y5 |
| return t.gradient(loss, [x, x1, x2, x3, x4]) |
| |
| self.assertAllClose([2., 3., 3., 3., 3.], f(constant_op.constant(10.))) |
| |
| def testGradientInsideLoop(self): |
| with ops.Graph().as_default(): |
| v = resource_variable_ops.ResourceVariable(1.0) |
| |
| def body(_): |
| _ = v + 1.0 # This reads the variable inside the loop context |
| with backprop.GradientTape() as t: |
| result = v * 2 |
| self.assertIsNotNone(t.gradient(result, v)) |
| return 1.0 |
| |
| control_flow_ops.while_loop(lambda i: False, body, [1.0]) |
| |
| def testWhereGradient(self): |
| # Note: where is special because only some of its arguments are of |
| # differentiable dtypes. |
| |
| def f(x): |
| return array_ops.where(x < 10, x, x * x) |
| |
| g = backprop.gradients_function(f) |
| |
| self.assertAllEqual(g(5.)[0], 1.0) |
| self.assertAllEqual(g(50.)[0], 100.0) |
| |
| def testTwoTargets(self): |
| with backprop.GradientTape() as t: |
| x = constant_op.constant(3.0) |
| y = constant_op.constant(2.0) |
| t.watch([x, y]) |
| xx = 2 * x |
| yy = 3 * y |
| dx, dy = t.gradient([xx, yy], [x, y]) |
| self.assertAllEqual(dx, 2.0) |
| self.assertAllEqual(dy, 3.0) |
| |
| def testCustomGradientEmptyError(self): |
| |
| @custom_gradient.custom_gradient |
| def identity(x): |
| |
| def grad(_): |
| return [] # This return value is wrong! |
| |
| return x, grad |
| |
| x = variables.Variable(1.0) |
| with backprop.GradientTape() as t: |
| y = identity(x) |
| with self.assertRaises(ValueError): |
| t.gradient(y, [x]) |
| |
| def testOutputGradUsedInComputation(self): |
| with backprop.GradientTape() as t: |
| x = constant_op.constant(3.0) |
| y = constant_op.constant(2.0) |
| t.watch([x, y]) |
| loss = x * y |
| dx, = t.gradient([loss, x], [x], output_gradients=[1.0, 2.0]) |
| self.assertAllEqual(dx, 4.0) |
| |
| def testDy(self): |
| |
| def f(x): |
| return x |
| |
| grad_fn = backprop.gradients_function(f) |
| self.assertAllEqual(2., grad_fn(1., dy=2.)[0]) |
| |
| def testGradientInteger(self): |
| |
| def f(x): |
| return x + x |
| |
| int_tensor = constant_op.constant(1) |
| self.assertEqual(backprop.gradients_function(f)(int_tensor)[0], None) |
| |
| def testErrors(self): |
| |
| @custom_gradient.custom_gradient |
| def f(x): |
| |
| def grad(_): |
| raise RuntimeError('x') |
| |
| return x, grad |
| |
| # TODO(apassos) raise the right error here |
| with self.assertRaises(RuntimeError): |
| backprop.gradients_function(f)(constant_op.constant(1.0)) |
| |
| def testGradientsFunctionInCustomGradient(self): |
| |
| @custom_gradient.custom_gradient |
| def f(x): |
| (y,) = backprop.gradients_function(lambda x: x * x)(x) |
| |
| def grad(dy): |
| return [2 * dy] |
| |
| return y, grad |
| |
| self.assertAllEqual(f(1.0), 2.0) |
| |
| def testImplicitGradOverEmbeddingLookup(self): |
| batch_size = 8 |
| embedding_size = 512 |
| vocab_size = 1000 |
| lrn_rate = 0.1 |
| random_init = random_ops.random_uniform([vocab_size, embedding_size]) |
| |
| x = array_ops.ones((batch_size), dtypes.int64) |
| embedding = resource_variable_ops.ResourceVariable( |
| initial_value=random_init, dtype=dtypes.float32, name='embedding') |
| |
| def f(): |
| embedded_x = embedding_ops.embedding_lookup(embedding, x) |
| return constant_op.constant(1.0, dtypes.float32) - embedded_x |
| |
| grad = backprop.implicit_grad(f)()[0][0] |
| opt = training.GradientDescentOptimizer(lrn_rate) |
| |
| with ops.Graph().as_default(), self.cached_session(): |
| tf_x = array_ops.ones((batch_size), dtypes.int64) |
| # TODO(ashankar,apassos): Change to ResourceVariable. |
| tf_embedding = variables.Variable( |
| random_init.numpy(), name='tf_embedding') |
| tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x) |
| tf_y = 1.0 - tf_embedded_x |
| tf_grad = gradients.gradients(tf_y, [tf_embedding])[0] |
| tf_opt = training.GradientDescentOptimizer(0.1) |
| tf_embedding.initializer.run() |
| |
| self.assertAllClose(tf_grad.indices, grad.indices) |
| self.assertAllClose(tf_grad.values, grad.values) |
| |
| tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run() |
| expected = self.evaluate(tf_embedding) |
| opt.apply_gradients([(grad, embedding)]) |
| self.assertAllClose(expected, embedding.read_value()) |
| |
| def testImplicitGradOrdering(self): |
| v0 = resource_variable_ops.ResourceVariable(1.0) |
| v1 = resource_variable_ops.ResourceVariable(2.0) |
| |
| def f(): |
| x = v1 * v1 |
| y = v0 * v0 |
| return x + y |
| |
| grads = backprop.implicit_grad(f)() |
| ordered_variables = [x[1] for x in grads] |
| self.assertIs(ordered_variables[0], v0) |
| self.assertIs(ordered_variables[1], v1) |
| |
| def testTapeNoOpGradient(self): |
| x = constant_op.constant(3.0) |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| y = x |
| self.assertEqual(t.gradient(y, x).numpy(), 1.0) |
| |
| def testTapeIdentityGradientIsIdentity(self): |
| x = constant_op.constant(3.0) |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| y = array_ops.identity(x) |
| self.assertEqual(t.gradient(y, x).numpy(), 1.0) |
| |
| def testFunctionIndexedSlicesGradient(self): |
| |
| @def_function.function |
| def f(x): |
| return x + 1 |
| |
| with backprop.GradientTape() as t: |
| x = constant_op.constant([1.0]) |
| t.watch(x) |
| y = f(x) |
| y = array_ops.gather(y, [0]) |
| self.assertAllEqual(t.gradient(y, x), [1.0]) |
| |
| def testTapeGradientMultiTargetOneIsSource(self): |
| x = constant_op.constant(2.0) |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| y = x * x |
| self.assertEqual(t.gradient([x, y], x).numpy(), 5.0) |
| |
| def testTapeNoOpGradientWithMultiTargetAllSource(self): |
| x = constant_op.constant(3.0) |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| y = x |
| self.assertEqual(t.gradient([y, y], x).numpy(), 2.0) |
| |
| def testTapeNoOpGradientWithMultiTargetMultiSource(self): |
| x = constant_op.constant(3.0) |
| y = constant_op.constant(5.0) |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| t.watch(y) |
| z = y * y |
| self.assertAllEqual(t.gradient([x, y, z], [x, y]), [1.0, 11.0]) |
| |
| def testTapeGradientStringTarget(self): |
| s = constant_op.constant('unknown', dtype=dtypes.string) |
| x = constant_op.constant(3.0) |
| |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| t.watch(s) |
| grads = t.gradient(s, x) |
| self.assertEqual(grads, None) |
| |
| def testTapeNoOpGradientStringSourceAndTarget(self): |
| s = constant_op.constant('unknown', dtype=dtypes.string) |
| |
| with backprop.GradientTape() as t: |
| t.watch(s) |
| grads = t.gradient(s, s) |
| self.assertEqual(grads, None) |
| |
| def testTapeNoOpGradientWithMultiTargetMultiSourceIncludeString(self): |
| x = constant_op.constant(3.0) |
| y = constant_op.constant(5.0) |
| s = constant_op.constant('unknown', dtype=dtypes.string) |
| |
| with backprop.GradientTape() as t: |
| t.watch(x) |
| t.watch(y) |
| t.watch(s) |
| z = y * y |
| grads = t.gradient([x, y, z, s], [x, y, s]) |
| self.assertAllEqual(grads[:2], [1.0, 11.0]) |
| self.assertEqual(grads[2], None) |
| |
| def testTapeNoOpOnVariableIsIdentity(self): |
| v0 = resource_variable_ops.ResourceVariable(1.0) |
| with backprop.GradientTape() as t: |
| y = v0.read_value() |
| self.assertEqual(t.gradient(y, v0).numpy(), 1.0) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.assert_no_garbage_created |
| def testTapeNoOpGradient2By2(self): |
| a_2_by_2 = constant_op.constant(2.0, shape=[2, 2]) |
| with backprop.GradientTape(persistent=True) as tape: |
| tape.watch(a_2_by_2) |
| dy_dy = tape.gradient(a_2_by_2, [a_2_by_2])[0] |
| self.assertAllEqual(dy_dy.numpy(), |
| constant_op.constant(1.0, shape=[2, 2]).numpy()) |
| |
| @test_util.assert_no_new_pyobjects_executing_eagerly |
| def testTapeNoOpGradientMultiTarget2By2(self): |
| a_2_by_2 = constant_op.constant(2.0, shape=[2, 2]) |
| with backprop.GradientTape(persistent=True) as tape: |
| tape.watch(a_2_by_2) |
| dy_dy = tape.gradient([a_2_by_2, a_2_by_2], [a_2_by_2])[0] |
| self.assertAllEqual(dy_dy.numpy(), |
| constant_op.constant(2.0, shape=[2, 2]).numpy()) |
| |
| def testTapeStopRecording(self): |
| with backprop.GradientTape() as t: |
| x = resource_variable_ops.ResourceVariable(1.0) |
| with t.stop_recording(): |
| y = x * x |
| self.assertEqual(t.gradient(y, x), None) |
| |
| def testTapeStopStartRecording(self): |
| with backprop.GradientTape(persistent=True) as t: |
| x = resource_variable_ops.ResourceVariable(1.0) |
| x2 = x * 2 # This should be differentiated through. |
| with t.stop_recording(): |
| y = x2 * x2 |
| z = x2 * x2 |
| self.assertEqual(t.gradient(y, x2), None) |
| |
| # If the x*2 was not differentiated through, this would be 2.0, not 4.0 |
| self.assertEqual(t.gradient(z, x2).numpy(), 4.0) |
| |
| def testTapeReset(self): |
| with backprop.GradientTape() as t: |
| v = resource_variable_ops.ResourceVariable(1.0) |
| loss = v * v |
| t.reset() |
| loss += v * v |
| self.assertAllEqual(t.gradient(loss, v), 2.0) |
| |
| def testPythonMax(self): |
| x = [ |
| resource_variable_ops.ResourceVariable(2.), |
| resource_variable_ops.ResourceVariable(3.), |
| resource_variable_ops.ResourceVariable(5.) |
| ] |
| with backprop.GradientTape() as t: |
| f = max(x) |
| grad = t.gradient(f, x) |
| self.assertAllEqual(self.evaluate(f), 5.) |
| self.assertAllEqual(self.evaluate(grad), [None, None, 1.0]) |
| |
| def testAutomaticWatchedVariables(self): |
| with backprop.GradientTape() as t: |
| self.assertEqual(0, len(t.watched_variables())) |
| v = resource_variable_ops.ResourceVariable(1.0) |
| loss = v * v |
| self.assertAllEqual([v], t.watched_variables()) |
| |
| t.reset() |
| self.assertEqual(0, len(t.watched_variables())) |
| loss += v * v |
| self.assertAllEqual([v], t.watched_variables()) |
| |
| def testExplicitWatchedVariables(self): |
| with backprop.GradientTape() as t: |
| self.assertEqual(0, len(t.watched_variables())) |
| v = resource_variable_ops.ResourceVariable(1.0) |
| t.watch(v) |
| self.assertAllEqual([v], t.watched_variables()) |
| |
| t.reset() |
| self.assertEqual(0, len(t.watched_variables())) |
| t.watch(v) |
| self.assertAllEqual([v], t.watched_variables()) |
| |
| @test_util.assert_no_new_tensors |
| def testGradientNone(self): |
| |
| def loss(x, l): |
| return math_ops.reduce_mean( |
| nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l), |
| constant_op.constant([0])) |
| |
| logits = constant_op.constant([[0.0, 0.0]]) |
| labels = constant_op.constant([[1.0, 0.0]]) |
| # softmax_cross_entropy_with_logits returns two outputs and in this case the |
| # gradient wrt the second is None. |
| g, = backprop.gradients_function(loss, [0])(logits, labels) |
| self.assertAllEqual(g.numpy(), [[-0.5, 0.5]]) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testGradientWithinTapeBlock(self): |
| v1 = resource_variable_ops.ResourceVariable(1.) |
| self.evaluate(v1.initializer) |
| with backprop.GradientTape() as t: |
| loss = 2 * v1 |
| grad = t.gradient(loss, v1) |
| self.assertAllEqual(self.evaluate(grad), 2.0) |
| |
| with backprop.GradientTape(persistent=True) as t: |
| loss = 2 * v1 |
| grad = t.gradient(loss, v1) |
| self.assertAllEqual(self.evaluate(grad), 2.0) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testNestedSelfContexts(self): |
| v1 = resource_variable_ops.ResourceVariable(1.) |
| self.evaluate(v1.initializer) |
| with backprop.GradientTape() as t: |
| with self.assertRaises(ValueError): |
| with t: |
| pass |
| |
| @test_util.assert_no_new_tensors |
| def testSecondGrad(self): |
| |
| def first(x): |
| l = constant_op.constant([[0.0]]) |
| x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x) |
| x = math_ops.reduce_sum(x, constant_op.constant([0])) |
| return x |
| |
| def second(x): |
| grad = backprop.gradients_function(first, [0])(x)[0] |
| return math_ops.reduce_sum(grad, constant_op.constant([0])) |
| |
| f = constant_op.constant([[0.1]]) |
| grad = backprop.gradients_function(second, [0])(f)[0] |
| self.assertAllEqual([[0.0]], grad) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testWatchingIsTapeLocal(self): |
| x1 = resource_variable_ops.ResourceVariable(2.0, trainable=False) |
| x2 = resource_variable_ops.ResourceVariable(2.0, trainable=False) |
| |
| with backprop.GradientTape() as tape1: |
| with backprop.GradientTape() as tape2: |
| tape1.watch(x1) |
| tape2.watch([x1, x2]) |
| y = x1**3 |
| z = x2**2 |
| dy, dz = tape2.gradient([y, z], [x1, x2]) |
| d2y, d2z = tape1.gradient([dy, dz], [x1, x2]) |
| |
| self.evaluate([x1.initializer, x2.initializer]) |
| self.assertEqual(self.evaluate(d2y), 12.0) |
| self.assertIsNone(d2z) |
| |
| @test_util.assert_no_new_tensors |
| def testMakeVJP(self): |
| |
| def f(x): |
| return x * x |
| |
| wrapped_fn = backprop.make_vjp(f, persistent=False) |
| result, vjp = wrapped_fn(constant_op.constant(3.0)) |
| self.assertAllEqual(result, 9.0) |
| self.assertAllEqual(vjp(2.0)[0], 12.0) |
| |
| def testPersistentMakeVJP(self): |
| |
| def f(x): |
| return x * x |
| |
| wrapped_fn = backprop.make_vjp(f, persistent=True) |
| _, vjp = wrapped_fn(constant_op.constant(3.0)) |
| vjp_result1 = vjp(2.0)[0] |
| vjp_result2 = vjp(2.0)[0] |
| self.assertAllEqual(vjp_result1, vjp_result2, 12.0) |
| |
| @test_util.assert_no_new_tensors |
| def testGradGrad(self): |
| |
| def sq(x): |
| return x * x |
| |
| def grad(x): |
| value = backprop.gradients_function(sq, [0])(x)[0] |
| return value |
| |
| gradgrad = backprop.gradients_function(grad, [0]) |
| |
| self.assertAllEqual(gradgrad(constant_op.constant(3.0))[0], 2.0) |
| |
| @test_util.assert_no_new_tensors |
| def testGradGradExp(self): |
| |
| def grad(x): |
| value = backprop.gradients_function(math_ops.exp, [0])(x)[0] |
| return value |
| |
| gradgrad = backprop.gradients_function(grad, [0]) |
| |
| self.assertAllEqual(gradgrad(constant_op.constant(0.0))[0], 1.0) |
| |
| @test_util.assert_no_new_tensors |
| def testStopGradient(self): |
| grad = backprop.gradients_function( |
| lambda x: array_ops.stop_gradient(math_ops.argmax(x))) |
| self.assertAllEqual(grad([0.0])[0], None) |
| |
| @test_util.assert_no_new_tensors |
| def testArgmax(self): |
| |
| def argmax(x): |
| i = math_ops.argmax(x) |
| return array_ops.stop_gradient(i) |
| |
| grad = backprop.gradients_function(argmax) |
| self.assertAllEqual(grad([0.0])[0], None) |
| |
| @test_util.run_gpu_only |
| @test_util.assert_no_new_tensors |
| def testGPU(self): |
| |
| def fn(x): |
| with context.device('/gpu:0'): |
| b = constant_op.constant(2.0) |
| c = math_ops.add(x.gpu(), b) |
| # TODO(apassos): remove cpu below by making TensorVSPace aware |
| # of devices. |
| return math_ops.add(c, constant_op.constant(3.0)).cpu() |
| |
| grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] |
| self.assertAllEqual(grad, 1.0) |
| |
| @test_util.run_gpu_only |
| @test_util.assert_no_new_tensors |
| def testGPUImplicitGrad(self): |
| with context.device('gpu:0'): |
| v = resource_variable_ops.ResourceVariable( |
| constant_op.constant(1.0), name='v') |
| |
| def f(): |
| with context.device('gpu:0'): |
| return v.read_value() |
| |
| self.assertEqual(backprop.implicit_grad(f)()[0][0].cpu().numpy(), 1.0) |
| |
| @test_util.assert_no_new_tensors |
| def testCPU(self): |
| |
| def fn(x): |
| b = constant_op.constant(2.0) |
| c = math_ops.add(x, b) |
| return math_ops.add(c, constant_op.constant(3.0)) |
| |
| grad = backprop.gradients_function(fn, [0])(constant_op.constant(1.0))[0] |
| self.assertAllEqual(grad, 1.0) |
| |
| @test_util.run_gpu_only |
| @test_util.assert_no_new_tensors |
| def testTensorCopyGPU2CPU2GPU(self): |
| |
| def f(a, b): |
| return a.cpu() + b.cpu() |
| |
| with context.device('/gpu:0'): |
| a = constant_op.constant(1.0) |
| b = constant_op.constant(2.0) |
| |
| grad = backprop.gradients_function(f, [0])(a, b)[0] |
| self.assertAllEqual(grad, 1.0) |
| |
| @test_util.assert_no_new_tensors |
| def testEmptyParams(self): |
| |
| def fn(a, b): |
| return a * b |
| |
| x = constant_op.constant(1.0) |
| y = constant_op.constant(2.0) |
| dx, dy = backprop.gradients_function(fn)(x, y) |
| self.assertAllEqual(dx, y.numpy()) |
| self.assertAllEqual(dy, x.numpy()) |
| |
| @test_util.assert_no_new_tensors |
| def testUnconnectedNone(self): |
| v = resource_variable_ops.ResourceVariable(1.0, name='testUnconnectedNone') |
| |
| def f(): |
| v.read_value() |
| return constant_op.constant(1.0) |
| |
| self.assertEqual(backprop.implicit_grad(f)()[0][0], None) |
| |
| @test_util.assert_no_new_tensors |
| def testGradientTapeReEnterContext(self): |
| g = backprop.GradientTape() |
| with g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = 2 * x |
| with g: |
| z = 2 * y |
| grad = g.gradient(target=z, sources=[x]) |
| self.assertEqual(self.evaluate(grad), [4.0]) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testGradientTapeRepeatedSource(self): |
| with backprop.GradientTape(persistent=False) as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = 2 * x |
| grad = g.gradient(target=y, sources=[x, x]) |
| self.assertEqual(self.evaluate(grad), [2.0, 2.0]) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testPersistentGradientTapeRepeatedSource(self): |
| with backprop.GradientTape(persistent=True) as g: |
| x = constant_op.constant(3.0) |
| y = constant_op.constant(5.0) |
| g.watch(x) |
| g.watch(y) |
| z = x * x + x * y |
| grad = g.gradient(target=z, sources=[x, x]) |
| self.assertEqual(self.evaluate(grad), [11.0, 11.0]) |
| grad = g.gradient(target=z, sources=[y, x]) |
| self.assertEqual(self.evaluate(grad), [3.0, 11.0]) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testGradientTapeStructure(self): |
| with backprop.GradientTape(persistent=True) as g: |
| # Using different constant values because constant tensors are |
| # cached, leading to a different gradient then what one might expect. |
| x1 = constant_op.constant(3.0) |
| x2 = constant_op.constant(3.1) |
| x3 = constant_op.constant(3.2) |
| g.watch(x1) |
| g.watch(x2) |
| g.watch(x3) |
| y = x1 + 2 * x2 + 3 * x3 |
| self.assertEqual(self.evaluate(g.gradient(y, x1)), [1.0]) |
| self.assertEqual(self.evaluate(g.gradient(y, (x1,))), (1.0,)) |
| self.assertEqual(self.evaluate(g.gradient(y, (x1, x2))), (1.0, 2.0)) |
| self.assertEqual( |
| self.evaluate(g.gradient(y, [(x1, x2), (x2, x3)])), [(1.0, 2.0), |
| (2.0, 3.0)]) |
| self.assertEqual( |
| self.evaluate(g.gradient(y, (x1, x2, [x1, x3]))), |
| (1.0, 2.0, [1.0, 3.0])) |
| self.assertEqual( |
| self.evaluate(g.gradient(y, [x1, { |
| 'x2': x2, |
| 'x3': x3 |
| }])), [1.0, { |
| 'x2': 2.0, |
| 'x3': 3.0 |
| }]) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testGradientTape(self): |
| with backprop.GradientTape() as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = x * x |
| with backprop.GradientTape() as gg: |
| gg.watch(y) |
| z = 2 * y |
| inner_grad = gg.gradient(z, [y])[0] |
| self.assertEqual(self.evaluate(inner_grad), 2.0) |
| y += inner_grad |
| grad = g.gradient(y, [x])[0] |
| self.assertEqual(self.evaluate(grad), 6.0) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testGadientTapeCalledOnConstantTarget(self): |
| with backprop.GradientTape() as g: |
| x = variables.Variable([3.0]) |
| y = variables.Variable([2.0]) |
| grad = g.gradient(x, y) |
| self.assertAllEqual(grad, None) |
| |
| @test_util.run_in_graph_and_eager_modes |
| @test_util.run_v1_only('b/120545219') |
| def testGradientTapeWithCond(self): |
| x = constant_op.constant(3.0) |
| |
| def true_fn(): |
| return x |
| |
| def false_fn(): |
| return x * x |
| |
| with backprop.GradientTape() as g: |
| g.watch(x) |
| y = control_flow_ops.cond(x < x, true_fn, false_fn) |
| |
| if not context.executing_eagerly(): |
| with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'): |
| dy = g.gradient(y, [x])[0] |
| else: |
| dy = g.gradient(y, [x])[0] |
| self.assertEqual(self.evaluate(dy), 6.0) |
| |
| @test_util.run_in_graph_and_eager_modes |
| @test_util.run_v1_only('b/120545219') |
| def testGradientTapeWithWhileLoop(self): |
| i = constant_op.constant(1) |
| x = constant_op.constant(2.) |
| |
| def cond(i, _): |
| return i < 3 |
| |
| def body(i, x): |
| return i + 1, x * 2 |
| |
| with backprop.GradientTape() as g: |
| g.watch([x]) |
| _, y = control_flow_ops.while_loop(cond, body, [i, x]) |
| |
| if not context.executing_eagerly(): |
| with self.assertRaisesRegex(NotImplementedError, 'tf.gradients'): |
| dy = g.gradient(y, [x])[0] |
| else: |
| dy = g.gradient(y, [x])[0] |
| self.assertEqual(self.evaluate(dy), 4.0) |
| |
| @test_util.assert_no_new_tensors |
| def testGradientTapeGradientCalledMultipleTimes(self): |
| with backprop.GradientTape() as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = x * x |
| z = y * y |
| g.gradient(z, [x]) |
| with self.assertRaisesRegex( |
| RuntimeError, 'A non-persistent GradientTape can only'): |
| g.gradient(y, [x]) |
| |
| @test_util.assert_no_new_tensors |
| def testGradientTapeJacobianCalledMultipleTimes(self): |
| with backprop.GradientTape() as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = x * x |
| z = y * y |
| g.jacobian(z, [x]) |
| with self.assertRaisesRegex( |
| RuntimeError, 'A non-persistent GradientTape can only'): |
| g.jacobian(y, [x]) |
| |
| @test_util.assert_no_new_tensors |
| def testGradientTapeBatchJacobianCalledMultipleTimes(self): |
| with backprop.GradientTape() as g: |
| x = constant_op.constant([[3.0]]) |
| g.watch(x) |
| y = x * x |
| z = y * y |
| g.batch_jacobian(z, x) |
| with self.assertRaisesRegex( |
| RuntimeError, 'A non-persistent GradientTape can only'): |
| g.batch_jacobian(y, [x]) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| @test_util.run_v1_only('b/120545219') |
| def testPersistentTape(self): |
| with backprop.GradientTape(persistent=True) as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = x * x |
| z = y * y |
| dz_dx = g.gradient(z, [x])[0] |
| self.assertEqual(self.evaluate(dz_dx), 4 * 3 * 3 * 3) |
| dy_dx = g.gradient(y, [x])[0] |
| self.assertEqual(self.evaluate(dy_dx), 2 * 3) |
| del g |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testHigherOrderGradient(self): |
| with backprop.GradientTape(persistent=True) as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = x**3 # y := x^3 |
| dy_dx = g.gradient(y, x) # dy/dx := 3x^2 |
| d2y_dx2 = g.gradient(dy_dx, x) # d2y/dx2 := 6x |
| d3y_dx3 = g.gradient(d2y_dx2, x) # d3y/dx3 := 6 |
| x = 3 |
| self.assertEqual(self.evaluate(y), x**3) |
| self.assertEqual(self.evaluate(dy_dx), 3 * x**2) |
| self.assertEqual(self.evaluate(d2y_dx2), 6 * x) |
| self.assertEqual(self.evaluate(d3y_dx3), 6) |
| del g |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testPersistentNestedTape(self): |
| with backprop.GradientTape(persistent=True) as g: |
| x = constant_op.constant(3.0) |
| g.watch(x) |
| y = x * x |
| with backprop.GradientTape(persistent=True) as gg: |
| gg.watch(y) |
| z = 2 * y |
| for _ in range(2): |
| inner_grad = gg.gradient(z, [y])[0] |
| self.assertEqual(self.evaluate(inner_grad), 2.0) |
| y += inner_grad |
| del gg |
| grad = g.gradient(y, [x])[0] |
| self.assertEqual(self.evaluate(grad), 6.0) |
| grad = g.gradient(z, [x])[0] |
| self.assertEqual(self.evaluate(grad), 12.0) |
| del g |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testGradientTapeVariable(self): |
| v = resource_variable_ops.ResourceVariable(1.0, name='v') |
| self.evaluate(v.initializer) |
| with backprop.GradientTape() as g: |
| y = v * v |
| grad = g.gradient(y, [v])[0] |
| self.assertAllEqual(self.evaluate(grad), 2.0) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testNestedGradients(self): |
| x = constant_op.constant(3.0) |
| with backprop.GradientTape() as g: |
| g.watch(x) |
| y = x * x |
| z = y * y |
| dz_dx, dz_dy = g.gradient(z, [x, y]) |
| self.assertEqual(self.evaluate(dz_dx), 108.0) |
| self.assertEqual(self.evaluate(dz_dy), 18.0) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testUnconnectedGradientsDefault(self): |
| x = constant_op.constant(1.0) |
| y = constant_op.constant(3.0) |
| with backprop.GradientTape() as g: |
| g.watch([x, y]) |
| z = y * 2 |
| dz_dx = g.gradient(z, x) |
| self.assertEqual(dz_dx, None) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testUnconnectedGradientsZeros(self): |
| x = constant_op.constant(1.0, shape=[2, 2]) |
| y = constant_op.constant(3.0) |
| with backprop.GradientTape() as g: |
| g.watch([x, y]) |
| z = y * 2 |
| dz_dx = g.gradient(z, x, unconnected_gradients='zero') |
| self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx)) |
| |
| @test_util.assert_no_new_tensors |
| @test_util.run_in_graph_and_eager_modes |
| def testUnconnectedGradientsVariablesZeros(self): |
| x = resource_variable_ops.ResourceVariable( |
| constant_op.constant(1., shape=[2, 2])) |
| self.evaluate(x.initializer) |
| y = resource_variable_ops.ResourceVariable(constant_op.constant(3.)) |
| self.evaluate(y.initializer) |
| with backprop.GradientTape() as g: |
| g.watch([x, y]) |
| z = y * 2 |
| dz_dx = g.gradient(z, x, unconnected_gradients='zero') |
| self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(dz_dx)) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testUnknownUnconnectedGradientsValueGiven(self): |
| x = constant_op.constant(1.0) |
| y = constant_op.constant(1.0) |
| with backprop.GradientTape() as g: |
| g.watch([x, y]) |
| z = y * 2 |
| with self.assertRaisesRegex( |
| ValueError, "Unknown value for unconnected_gradients: 'nonsense'"): |
| g.gradient(z, x, unconnected_gradients='nonsense') |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testUnconnectedGradientsNestedDefunZeros(self): |
| |
| @function.defun |
| def f(x): |
| return x * x |
| |
| @function.defun |
| def h(y): |
| z = f(y) |
| return array_ops.stop_gradient(z) |
| |
| x = constant_op.constant(1.0) |
| with backprop.GradientTape() as g: |
| g.watch(x) |
| k = x + 2. |
| y = h(k) |
| |
| dy_dx = g.gradient(y, x, unconnected_gradients='zero') |
| self.assertEqual(0.0, self.evaluate(dy_dx)) |
| |
| def testInvalidRecordOperationMessage(self): |
| y = constant_op.constant(2.) |
| x = constant_op.constant(1.) |
| with backprop.GradientTape() as g: |
| g.watch(x) |
| tape_lib.record_operation('InvalidBackprop', [y], [x], lambda dy: []) |
| with self.assertRaisesRegex(errors_impl.InternalError, |
| 'InvalidBackprop.*too few gradients'): |
| g.gradient(y, x) |
| |
| @test_util.assert_no_new_tensors |
| def testEmptyParamsForValueAndGradFunction(self): |
| |
| def fn(a, b): |
| return a * b |
| |
| val_and_grads_fn = backprop.val_and_grad_function(fn) |
| |
| x = 2.0 |
| y = 3.0 |
| val, (dx, dy) = val_and_grads_fn(x, y) |
| self.assertAllClose(val, x * y) |
| self.assertAllEqual(dx, y) |
| self.assertAllEqual(dy, x) |
| |
| @test_util.assert_no_new_tensors |
| def testNonEmptyParamsForValueAndGradFunction(self): |
| |
| def fn(a, b): |
| return a * b |
| |
| val_and_grad_fn = backprop.val_and_grad_function(fn, params=[1]) |
| |
| x = 2.0 |
| y = 3.0 |
| val, grads = val_and_grad_fn(x, y) |
| self.assertAllClose(val, x * y) |
| self.assertEqual(1, len(grads)) |
| self.assertAllEqual(grads[0], x) |
| |
| @test_util.run_gpu_only |
| @test_util.assert_no_new_tensors |
| def testTensorCopyCPU2GPU2CPU(self): |
| # forward: a (cpu->gpu) -> add (gpu) -> c (gpu->cpu) -> add (cpu) -> e (cpu) |
| # back: e (cpu) -> add (cpu) -> c (cpu->gpu) -> add (gpu) -> grad (gpu->cpu) |
| def f(a, b): |
| with context.device('/gpu:0'): |
| c = math_ops.add(a.gpu(0), b.gpu(0)) |
| return math_ops.add(c.cpu(), constant_op.constant(3.0)) |
| |
| with context.device('/cpu:0'): |
| a = constant_op.constant(1.0) |
| b = constant_op.constant(2.0) |
| |
| grad = backprop.gradients_function(f, [0])(a, b)[0] |
| self.assertAllEqual(grad, 1.0) |
| |
| def testGetAttrType(self): |
| typ = backprop.op_attr_type('Add', 'T') |
| self.assertEqual(typ, int(pywrap_tfe.TF_ATTR_TYPE)) |
| |
| def testGetAttrList(self): |
| typ = backprop.op_attr_type('MaxPool', 'ksize') |
| self.assertEqual(typ, [int(pywrap_tfe.TF_ATTR_INT)]) |
| |
| def testMakeAttrType(self): |
| self.assertEqual(dtypes.float32, |
| backprop.make_attr(int(pywrap_tfe.TF_ATTR_TYPE), 1)) |
| |
| def testMakeAttrTypeList(self): |
| self.assertEqual([dtypes.float32], |
| backprop.make_attr([int(pywrap_tfe.TF_ATTR_TYPE)], [1])) |
| |
| def testMulType(self): |
| |
| def mul(x): |
| return math_ops._mul_dispatch(x, x) # pylint: disable=protected-access |
| |
| self.assertAllEqual(backprop.gradients_function(mul)(3.0)[0].numpy(), 6.0) |
| |
| def testMakeAttrShape(self): |
| for s in ([], None, [1, 2, 3], [None, None], [1, None, 3]): |
| expected = tensor_shape.TensorShape(s).as_proto() |
| actual = backprop.make_attr(int(pywrap_tfe.TF_ATTR_SHAPE), s) |
| self.assertEqual( |
| expected, |
| actual, |
| msg=('For shape %r, expected %r != %r actual' % |
| (s, expected, actual))) |
| |
| def testMakeAttrShapeList(self): |
| shape_list = [[], None, [1, 2, 3], [None, None], [1, None, 3]] |
| self.assertEqual( |
| [tensor_shape.TensorShape(s).as_proto() for s in shape_list], |
| backprop.make_attr([int(pywrap_tfe.TF_ATTR_SHAPE)], shape_list)) |
| |
| def testArgsGradientFunction(self): |
| |
| def f(*args): |
| return args[0] * args[0] |
| |
| grad = backprop.gradients_function(f) |
| self.assertAllEqual(grad(1.0)[0], 2.0) |
| |
| def testPartial(self): |
| |
| def f(x, y): |
| return x * y |
| |
| part = functools.partial(f, constant_op.constant(2.0)) |
| self.assertAllEqual( |
| backprop.gradients_function(part)(constant_op.constant(1.0))[0], 2.0) |
| |
| def testReturnSameThing(self): |
| |
| def f(x): |
| return x, 2 * x |
| |
| self.assertAllEqual(backprop.gradients_function(f)(1.0)[0], 3.0) |
| |
| @test_util.assert_no_new_tensors |
| def testExceptionSafety(self): |
| |
| def f(unused_x): |
| raise ValueError() |
| |
| try: |
| backprop.gradients_function(f)(1.0) |
| except ValueError: |
| pass |
| |
| def real_f(x): |
| return x * x |
| |
| self.assertAllEqual(backprop.gradients_function(real_f)(1.0)[0], 2.0) |
| |
| @test_util.assert_no_new_tensors |
| def testMultiValueConvertToTensor(self): |
| x = resource_variable_ops.ResourceVariable( |
| initial_value=array_ops.constant([1.0]), name='x') |
| |
| def fn(): |
| a = math_ops.add(x.value(), 1.0) |
| # Make sure convert_to_tensor works correctly with list of TensorNodes. |
| b = array_ops.stack([a, a], axis=0) |
| return math_ops.reduce_mean(b) |
| |
| grad = backprop.implicit_grad(fn)()[0][0] |
| self.assertAllEqual([1.0], grad) |
| |
| def testOutput(self): |
| |
| def multiout(x): |
| return x + 2, x * x |
| |
| x = constant_op.constant([0.0, 1.0, 2.0]) |
| |
| grad = backprop.gradients_function(multiout)(x)[0] |
| self.assertAllEqual([1.0, 3.0, 5.0], grad) |
| |
| def testMultiValuePreservesIfNotDiffedAgainst(self): |
| |
| def tfe_conv2d(timage, tkernel, conv2dstrides): |
| return nn_ops.conv2d(timage, tkernel, conv2dstrides, 'SAME') |
| |
| i = constant_op.constant([[[[1.0]]]]) |
| k = constant_op.constant([[[[2.0]]]]) |
| s = [1, 1, 1, 1] |
| |
| grad = backprop.gradients_function(tfe_conv2d, params=(0,))(i, k, s)[0] |
| self.assertAllEqual([[[[2.0]]]], grad) |
| |
| def testSameObjectForMultipleArguments(self): |
| |
| def f(x, y): |
| return math_ops.multiply(x, y) |
| |
| g = backprop.gradients_function(f) |
| |
| def np_g(x, y): |
| dx, dy = g(x, y) |
| return [dx.numpy(), dy.numpy()] |
| |
| x = constant_op.constant(1.) |
| self.assertAllEqual([1., 1.], np_g(x, x)) |
| x = 1. |
| self.assertAllEqual([1., 1.], np_g(x, x)) |
| x = constant_op.constant([[1.]]) |
| self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) |
| x = [[1.]] |
| self.assertAllEqual([[[1.]], [[1.]]], np_g(x, x)) |
| |
| v = resource_variable_ops.ResourceVariable( |
| initial_value=1., name='testSameObjectForMultipleArguments.Variable') |
| self.assertAllEqual([1., 1.], np_g(v, v)) |
| |
| @test_util.assert_no_new_tensors |
| def testImplicitGradientsCustomGradientAndCachedVariableValue(self): |
| |
| @custom_gradient.custom_gradient |
| def my_square(x): |
| result = math_ops.square(x) |
| |
| def grad(dr): |
| return 2 * dr * x + 1 |
| |
| return result, grad |
| |
| x = resource_variable_ops.ResourceVariable( |
| initial_value=3., name='X.' + self.id()) |
| |
| def f(): |
| return my_square(x) |
| |
| g = backprop.implicit_grad(f) |
| |
| grads_and_vars = g() |
| self.assertEqual(1, len(grads_and_vars)) |
| grad, var = grads_and_vars[0] |
| self.assertAllEqual(7, grad) |
| self.assertAllEqual(x, var) |
| |
| def testJacobianCustomGradient(self): |
| |
| class MyCallable(object): |
| |
| def __init__(self): |
| self.a = variables.Variable(1.) |
| self.b = variables.Variable(2.) |
| self.c = variables.Variable(3.) |
| |
| def __call__(self, x): |
| return self.a * x * x + self.b * x + self.c |
| |
| @def_function.function |
| def call(c, x): |
| |
| @custom_gradient.custom_gradient |
| def _call(): |
| y = c(x) |
| |
| def grad(dy, variables=None): # pylint: disable=redefined-outer-name |
| with backprop.GradientTape(persistent=True) as g: |
| g.watch(variables) |
| y = c(x) |
| grad_vars = [ |
| 2 * math_ops.reduce_sum(dy * g.jacobian(y, v)) for v in variables |
| ] |
| del g |
| return (), grad_vars |
| |
| return y, grad |
| |
| return _call() |
| |
| c = MyCallable() |
| x = constant_op.constant([1., 2., 3.]) |
| with backprop.GradientTape(persistent=True) as g: |
| g.watch([c.a, c.b, c.c]) |
| y = call(c, x) |
| self.assertAllEqual(g.gradient(y, x), None) |
| |
| @test_util.assert_no_new_tensors |
| def testCustomGradient(self): |
| |
| @custom_gradient.custom_gradient |
| def my_mul(x, y): |
| result = x * y |
| |
| def grad(dr): |
| return [dr * y, dr * x] |
| |
| return result, grad |
| |
| lr = 0.25 |
| x = resource_variable_ops.ResourceVariable(2., name='x') |
| |
| def loss(x): |
| return my_mul(2., x.read_value()) |
| |
| loss_grads_fn = backprop.implicit_val_and_grad(loss) |
| |
| losses = [] |
| for _ in range(5): |
| loss, grads_and_vars = loss_grads_fn(x) |
| losses.append(loss.numpy()) |
| for (grad, var) in grads_and_vars: |
| var.assign_sub(lr * grad) |
| self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.]) |
| |
| @test_util.assert_no_new_tensors |
| def testCustomGradientIdentity(self): |
| |
| @custom_gradient.custom_gradient |
| def my_identity(x): |
| |
| def grad(dresult): |
| return [2 * dresult] |
| |
| return x, grad |
| |
| self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0) |
| |
| def testDifferentiatingFunctionThatReturnsNone(self): |
| |
| def fn(x, y): |
| result = x * y # pylint: disable=unused-variable |
| |
| x = constant_op.constant(1) |
| y = constant_op.constant(2) |
| |
| loss_grads_fn = backprop.implicit_val_and_grad(fn) |
| with self.assertRaisesRegex( |
| ValueError, 'Cannot differentiate a function that returns None; ' |
| 'did you forget to return a value from fn?'): |
| loss_grads_fn(x, y) |
| |
| val_and_grads_fn = backprop.val_and_grad_function(fn) |
| with self.assertRaisesRegex( |
| ValueError, 'Cannot differentiate a function that returns None; ' |
| 'did you forget to return a value from fn?'): |
| val_and_grads_fn(x, y) |
| |
| def testZerosCacheDoesntLeakAcrossGraphs(self): |
| with ops.Graph().as_default(): |
| |
| def get_grad(): |
| with ops.Graph().as_default(), self.cached_session(): |
| t = constant_op.constant(1, dtype=dtypes.float32, shape=(10, 4)) |
| x = constant_op.constant(2, dtype=dtypes.float32, shape=(10, 4)) |
| with backprop.GradientTape() as tape: |
| tape.watch(x) |
| x1, _ = array_ops.split(x, num_or_size_splits=2, axis=1) |
| y1 = x1**2 |
| y = array_ops.concat([y1, t], axis=1) |
| return self.evaluate(tape.gradient(y, x)) |
| |
| grad1 = get_grad() |
| grad2 = get_grad() |
| |
| self.assertAllEqual(grad1, grad2) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testSelectivelyWatchVariables(self): |
| x1 = resource_variable_ops.ResourceVariable(1.0) |
| x2 = resource_variable_ops.ResourceVariable(1.0) |
| with backprop.GradientTape(watch_accessed_variables=False) as tape: |
| tape.watch(x2) |
| y = x1**2 |
| z = x2**3 |
| self.assertTupleEqual(tape.watched_variables(), (x2,)) |
| dy, dz = tape.gradient([y, z], [x1, x2]) |
| self.evaluate([x1.initializer, x2.initializer]) |
| self.assertIsNone(dy) |
| self.assertEqual(self.evaluate(dz), 3.0) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testDifferentiatingScalarCache(self): |
| # In the following test, if x2 = x1 (i.e the objects are the exact same), |
| # then y is essentially, 2*x1, and dy/dx1 = 2. |
| # When we had a pure scalar cache in eager, this would be the case. This |
| # test prevents us from going back to that case. |
| with backprop.GradientTape(persistent=False) as g: |
| x1 = constant_op.constant(3.0) |
| x2 = constant_op.constant(3.0) |
| g.watch(x1) |
| g.watch(x2) |
| y = x1 + x2 |
| grad = g.gradient(target=y, sources=[x1]) |
| self.assertEqual(self.evaluate(grad), [1.0]) |
| |
| def testVariablesAndConstantsProduceTheSameGradients(self): |
| |
| # In the following test, differentiating [y, z] against [a, b] gives: |
| # (dy/da + dz/da, dy/db + dz/db). |
| # If a and b are the same constant, dz/da will not be 0 (which it should |
| # be). |
| # This is solved by using variable since doing a read_value on a tensor will |
| # produce a new tensor and corresponding TensorHandle, and not reuse the |
| # same tensor (which would happen if we are using a cache and reusing |
| # EagerTensor objects). |
| def get_grads(a, b): |
| with backprop.GradientTape() as tape: |
| tape.watch([a, b]) |
| y = a**3 |
| z = b**2 |
| return tape.gradient([y, z], [a, b]) |
| |
| gradients_constants = get_grads( |
| constant_op.constant(2.0), constant_op.constant(2.0)) |
| gradients_variables = get_grads( |
| resource_variable_ops.ResourceVariable(2.0), |
| resource_variable_ops.ResourceVariable(2.0)) |
| self.assertAllEqual(gradients_constants, gradients_variables) |
| |
| def testUnknownShapes(self): |
| with ops.Graph().as_default(): |
| with backprop.GradientTape() as tape: |
| a = array_ops.placeholder(dtype=dtypes.float32, shape=None) |
| tape.watch(a) |
| b = a**3 |
| |
| db_da = tape.gradient(b, a) |
| |
| with self.cached_session() as sess: |
| self.assertEqual((8.0, 12.0), sess.run((b, db_da), feed_dict={a: 2.0})) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testCustomGradientInEagerAndGraph(self): |
| |
| @custom_gradient.custom_gradient |
| def f(x): |
| y = x * x |
| |
| def grad(dy): |
| return [4 * dy] |
| |
| return y, grad |
| |
| with backprop.GradientTape() as t: |
| c = constant_op.constant(1.0) |
| t.watch(c) |
| g = f(c) |
| self.assertAllEqual(self.evaluate(t.gradient(g, c)), 4.0) |
| |
| def testOverrideSecondOrderWithCustomGradient(self): |
| |
| @custom_gradient.custom_gradient |
| def f(x): |
| |
| def first_order_grad(dz): |
| |
| @custom_gradient.custom_gradient |
| def first_order_custom(unused_x): |
| |
| def h(ddz): |
| return -2.1 * ddz |
| |
| return -1.1, h |
| |
| return dz * first_order_custom(x) |
| |
| return x + 10., first_order_grad |
| |
| c = constant_op.constant(1.) |
| with backprop.GradientTape() as outer: |
| outer.watch(c) |
| with backprop.GradientTape() as inner: |
| inner.watch(c) |
| d = f(c)**4. |
| dd = inner.gradient(d, c) |
| self.assertAllClose(4. * f(c)**3. * -1.1, dd) |
| self.assertAllClose(3. * 4. * f(c)**2. * -1.1 * -1.1 + 4. * f(c)**3. * -2.1, |
| outer.gradient(dd, c)) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testCustomGradientForwardprop(self): |
| |
| @custom_gradient.custom_gradient |
| def f(x): |
| z = 2. * tensor_util.constant_value(x) |
| |
| def g(dz): |
| |
| @custom_gradient.custom_gradient |
| def first_order(unused_x, unused_dz): |
| |
| def second_order_and_transpose(unused_ddz): |
| return 2.2, 3.1 |
| |
| return 2.1, second_order_and_transpose |
| |
| return first_order(x, dz) |
| |
| return z, g |
| |
| with backprop.GradientTape(persistent=True) as t: |
| with backprop.GradientTape() as tt: |
| c = constant_op.constant(1.) |
| t.watch(c) |
| tt.watch(c) |
| output_grad = array_ops.ones([]) |
| t.watch(output_grad) |
| output = f(c) |
| self.assertAllClose(2., output) |
| gc = tt.gradient(output, c, output_gradients=output_grad) |
| self.assertAllClose(2.1, gc) |
| ggc = t.gradient(gc, c) |
| self.assertAllClose(2.2, ggc) |
| # Note that executed eagerly this kind of transpose is not efficient. But |
| # from a tf.function we could prune out the first-order gradient |
| # computation. |
| transpose = t.gradient(gc, output_grad) |
| self.assertAllClose(3.1, transpose) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testMaxPooling3DGradient(self): |
| |
| def forward(a): |
| r = max_pooling3d(a, pool_size=pool_size, strides=strides, padding='SAME') |
| return r |
| |
| input_sizes = [1, 3, 2, 4, 1] |
| pool_size = (2, 2, 1) |
| strides = (1, 1, 1) |
| |
| total_size = np.prod(input_sizes) |
| x = np.arange(1, total_size + 1, dtype=np.float32) |
| aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32) |
| da = backprop.gradients_function(forward)(aa) |
| |
| if not context.executing_eagerly(): |
| tf_aa = constant_op.constant(x, shape=input_sizes, dtype=dtypes.float32) |
| tf_max = max_pooling3d( |
| tf_aa, pool_size=pool_size, strides=strides, padding='SAME') |
| tf_da = gradients.gradients(tf_max, [tf_aa]) |
| self.assertAllEqual(da[0], tf_da[0]) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def testWatchBadThing(self): |
| g = backprop.GradientTape() |
| with self.assertRaisesRegex(ValueError, 'ndarray'): |
| g.watch(np.array(1.)) |
| |
| def testWatchComposite(self): |
| """Test that tape.watch expands composites and watches component Tensors.""" |
| with backprop.GradientTape() as t: |
| values = constant_op.constant([1.0, 2.0], dtypes.float32) |
| s = sparse_tensor.SparseTensor( |
| indices=[[0, 0], [1, 2]], values=values, dense_shape=[3, 4]) |
| t.watch(s) |
| z = sparse_ops.sparse_reduce_sum_v2(s) |
| result = t.gradient(z, values) |
| self.assertAllEqual(result, [1.0, 1.0]) |
| |
| def testWatchedVariablesAfterNonPersistentGradientCall(self): |
| with backprop.GradientTape(persistent=False) as tape: |
| x = resource_variable_ops.ResourceVariable(1.0) |
| tape.watch(x) |
| tape.gradient(x, x) |
| self.assertEqual((x,), tape.watched_variables()) |
| |
| def testWatchedVariablesOnlyHasVariablesFromLastTape(self): |
| with backprop.GradientTape(persistent=False) as tape: |
| x = resource_variable_ops.ResourceVariable(1.0) |
| tape.watch(x) |
| with backprop.GradientTape(persistent=False) as tape: |
| z = resource_variable_ops.ResourceVariable(2.0) |
| tape.watch(z) |
| tape.gradient(z, z) |
| self.assertEqual((z,), tape.watched_variables()) |
| |
| def testWatchedVariablesRespectReset(self): |
| with backprop.GradientTape(persistent=False) as tape: |
| x = resource_variable_ops.ResourceVariable(1.0) |
| tape.watch(x) |
| self.assertEqual((x,), tape.watched_variables()) |
| tape.reset() |
| z = resource_variable_ops.ResourceVariable(2.0) |
| tape.watch(z) |
| self.assertEqual((z,), tape.watched_variables()) |
| tape.gradient(z, z) |
| self.assertEqual((z,), tape.watched_variables()) |
| |
| def testNameScope(self): |
| |
| def fn(x): |
| with ops.name_scope('my_scope'): |
| a = math_ops.cos(x) |
| b = math_ops.cos(x) |
| return math_ops.add(a, b) |
| |
| @function.defun |
| def grad_fn(x): |
| return backprop.gradients_function(fn)(x) |
| |
| grad_ops = grad_fn.get_concrete_function( |
| constant_op.constant(1.0)).graph.get_operations() |
| num_sin_ops_found = 0 |
| for op in grad_ops: |
| if op.type == 'Sin': |
| num_sin_ops_found += 1 |
| self.assertIn('gradient_tape/my_scope/', op.name) |
| self.assertEqual(num_sin_ops_found, 2) |
| |
| @test_util.assert_no_new_pyobjects_executing_eagerly |
| def testRecomputeGradWithNestedFunctionAndWhileLoop(self): |
| |
| @custom_gradient.recompute_grad |
| @def_function.function |
| def outer(x): |
| |
| @def_function.function |
| def middle(y): |
| |
| @def_function.function |
| def inner(z): |
| return z + 1 |
| |
| i = constant_op.constant(0.0) |
| c = lambda y, i: i < 10. |
| b = lambda y, i: (inner(y), i + 1.0) |
| y, i = control_flow_ops.while_loop(c, b, [y, i]) |
| |
| return y |
| |
| return middle(x) |
| |
| with MemoryChecker() as memory_checker: |
| for _ in range(5): |
| x = variables.Variable(1.0, name='x') |
| with backprop.GradientTape(): |
| y = outer(x) |
| self.assertAllEqual(y, 11.0) |
| |
| memory_checker.report() |
| memory_checker.assert_no_leak_if_all_possibly_except_one() |
| |
| |
| class JacobianTest(test.TestCase): |
| |
| def _jacobian(self, experimental_use_pfor): |
| persistent = context.executing_eagerly and not experimental_use_pfor |
| with backprop.GradientTape(persistent=persistent) as g: |
| x = constant_op.constant([1., 2.]) |
| y = constant_op.constant([3., 4.]) |
| g.watch(x) |
| g.watch(y) |
| z = x * x * y |
| jacobian = g.jacobian( |
| z, [x, y], experimental_use_pfor=experimental_use_pfor) |
| answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)] |
| return jacobian, answer |
| |
| @test_util.run_v1_only('b/120545219') |
| def testPfor(self): |
| jacobian, answer = self._jacobian(experimental_use_pfor=True) |
| for j, a in zip(jacobian, answer): |
| self.assertAllEqual(a, j) |
| |
| @test_util.run_v1_only('b/120545219') |
| def testWhileLoop(self): |
| jacobian, answer = self._jacobian(experimental_use_pfor=False) |
| for j, a in zip(jacobian, answer): |
| self.assertAllEqual(a, j) |
| |
| @test_util.run_v1_only('b/120545219') |
| def testPforDefun(self): |
| |
| @function.defun |
| def _f(): |
| return self._jacobian(experimental_use_pfor=True) |
| |
| jacobian, answer = _f() |
| for j, a in zip(jacobian, answer): |
| self.assertAllEqual(a, j) |
| |
| @test_util.run_v1_only('b/120545219') |
| def testWhileLoopDefun(self): |
| |
| @function.defun |
| def _f(): |
| return self._jacobian(experimental_use_pfor=False) |
| |
| jacobian, answer = _f() |
| for j, a in zip(jacobian, answer): |
| self.assertAllEqual(a, j) |
| |
| @test_util.run_v1_only('b/120545219') |
| def testPersistentTape(self): |
| if not context.executing_eagerly(): |
| return |
| with backprop.GradientTape() as g: |
| x = constant_op.constant([1.0, 2.0]) |
| g.watch(x) |
| y = x * x |
| with self.assertRaisesRegex(RuntimeError, 'persistent'): |
| g.jacobian(y, x, experimental_use_pfor=False) |
| |
| @test_util.run_v1_only('b/120545219') |
| def test_parallel_iterations(self): |
| with backprop.GradientTape(persistent=True) as g: |
| x = constant_op.constant([[1., 2], [3, 4]]) |
| g.watch(x) |
| y = math_ops.matmul(x, x) |
| self.assertAllClose( |
| g.jacobian(y, x, parallel_iterations=2), |
| g.jacobian(y, x, parallel_iterations=3)) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def test_nested_jacobian(self): |
| if context.executing_eagerly(): |
| # TODO(agarwal): b/128842926 |
| self.skipTest('Conversion of function calls not implemented yet.') |
| x = array_ops.ones((10, 2)) |
| with backprop.GradientTape(persistent=False) as g: |
| g.watch(x) |
| with backprop.GradientTape(persistent=False) as gg: |
| gg.watch(x) |
| y = math_ops.reduce_sum(math_ops.square(x)) |
| dy_x = gg.jacobian(y, x) |
| dy_xx = g.batch_jacobian(dy_x, x) |
| dy_xx_answer = [[[2., 0], [0, 2.]]] * 10 |
| self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx)) |
| |
| @test_util.run_in_graph_and_eager_modes |
| def test_indexed_slices(self): |
| with backprop.GradientTape(persistent=True) as g: |
| inp = random_ops.random_uniform([3, 2]) |
| g.watch(inp) |
| output = nn.embedding_lookup(inp, [0, 2]) |
| self.assertAllClose( |
| g.jacobian(output, inp, experimental_use_pfor=True), |
| g.jacobian(output, inp, experimental_use_pfor=False)) |
| |
| def test_foldl_partial_function(self): |
| x = array_ops.zeros([3]) |
| with backprop.GradientTape(persistent=True) as tape: |
| tape.watch(x) |
| result = def_function.function( |
| functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))( |
| x) |
| self.assertAllClose([1., 1., 1.], |
| tape.jacobian(result, x, experimental_use_pfor=True)) |
| self.assertAllClose([1., 1., 1.], |
| tape.jacobian(result, x, experimental_use_pfor=False)) |
| |
| # Non-persistent tapes take a different function gradient path, but also |
| # work with pfor=True. |
| x = array_ops.zeros([3]) |
| with backprop.GradientTape() as tape: |
| tape.watch(x) |
| result = def_function.function( |
| functools.partial(functional_ops.foldl_v2, lambda a, b: a + b))( |
| x) |
| self.assertAllClose([1., 1., 1.], |
| tape.jacobian(result, x, experimental_use_pfor=True)) |
| |
| def test_foldl_pure_function(self): |
| |
| @def_function.function |
| def compute_jacobian(use_pfor): |
| x = array_ops.zeros([3]) |
| with backprop.GradientTape(persistent=True) as tape: |
| tape.watch(x) |
| result = functools.partial(functional_ops.foldl_v2, lambda a, b: a + b)( |
| x) |
| return tape.jacobian(result, x, experimental_use_pfor=use_pfor) |
| |
| self.assertAllClose(compute_jacobian(use_pfor=True), |
| compute_jacobian(use_pfor=False)) |
| |
| |
| @test_util.run_all_in_graph_and_eager_modes |
| class BatchJacobianTest(test.TestCase, parameterized.TestCase): |
| |
| def _batch_jacobian(self, experimental_use_pfor): |
| persistent = context.executing_eagerly and not experimental_use_pfor |
| with backprop.GradientTape(persistent=persistent) as g: |
| x = constant_op.constant([[1., 2.], [3., 4.]]) |
| y = constant_op.constant([[3., 4.], [5., 6.]]) |
| g.watch(x) |
| z = x * x * y |
| batch_jacobian = g.batch_jacobian( |
| z, x, experimental_use_pfor=experimental_use_pfor) |
| answer = array_ops.stack( |
| [array_ops.diag(2 * x[0] * y[0]), |
| array_ops.diag(2 * x[1] * y[1])]) |
| return batch_jacobian, answer |
| |
| def testPfor(self): |
| batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=True) |
| self.assertAllEqual(answer, batch_jacobian) |
| |
| def testWhileLoop(self): |
| batch_jacobian, answer = self._batch_jacobian(experimental_use_pfor=False) |
| self.assertAllEqual(answer, batch_jacobian) |
| |
| def testPforDefun(self): |
| |
| @function.defun |
| def _f(): |
| return self._batch_jacobian(experimental_use_pfor=True) |
| |
| batch_jacobian, answer = _f() |
| self.assertAllEqual(answer, batch_jacobian) |
| |
| def testWhileLoopDefun(self): |
| |
| @function.defun |
| def _f(): |
| return self._batch_jacobian(experimental_use_pfor=False) |
| |
| batch_jacobian, answer = _f() |
| self.assertAllEqual(answer, batch_jacobian) |
| |
| def testPersistentTape(self): |
| if not context.executing_eagerly(): |
| return |
| with backprop.GradientTape() as g: |
| x = constant_op.constant([[1.0, 2.0]]) |
| g.watch(x) |
| y = x * x |
| with self.assertRaisesRegex(RuntimeError, 'persistent'): |
| g.batch_jacobian(y, x, experimental_use_pfor=False) |
| |
| def testBadShape(self): |
| x = random_ops.random_uniform([2, 3]) |
| with backprop.GradientTape() as g: |
| y = array_ops.concat([x, x], axis=0) |
| with self.assertRaisesRegex(ValueError, 'Need first dimension'): |
| g.batch_jacobian(y, x) |
| |
| def testBadInputRank(self): |
| x = random_ops.random_uniform([2]) |
| with backprop.GradientTape() as g: |
| y = random_ops.random_uniform([2, 2]) |
| with self.assertRaisesRegex(ValueError, 'must have rank at least 2'): |
| g.batch_jacobian(y, x) |
| |
| def testBadOutputRank(self): |
| x = random_ops.random_uniform([2, 2]) |
| with backprop.GradientTape() as g: |
| y = random_ops.random_uniform([2]) |
| with self.assertRaisesRegex(ValueError, 'must have rank at least 2'): |
| g.batch_jacobian(y, x) |
| |
| def test_parallel_iterations(self): |
| with backprop.GradientTape(persistent=True) as g: |
| x = constant_op.constant([[1., 2], [3, 4]]) |
| g.watch(x) |
| w = constant_op.constant([[1., 2, 3, 4], [5, 6, 7, 8]]) |
| y = math_ops.matmul(x, w) |
| self.assertAllClose( |
| g.batch_jacobian(y, x, parallel_iterations=2), |
| g.batch_jacobian(y, x, parallel_iterations=3)) |
| |
| @parameterized.parameters((True, True), (True, False), (False, True), |
| (False, False)) |
| def test_degenerate_shape(self, use_function, use_pfor): |
| |
| def f(x): |
| with backprop.GradientTape(persistent=True) as tape: |
| tape.watch(x) |
| y = x**2 |
| return tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor) |
| |
| if use_function: |
| f = def_function.function(f) |
| self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0])))) |
| |
| |
| class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase): |
| |
| def _assert_indexed_slices_equal(self, left, right): |
| self.assertAllEqual( |
| self.evaluate(ops.convert_to_tensor(left)), |
| self.evaluate(ops.convert_to_tensor(right))) |
| |
| def testNoGradients(self): |
| self.assertIsNone(backprop.aggregate_indexed_slices_gradients([])) |
| |
| def testOneGradient(self): |
| t = math_ops._as_indexed_slices( |
| constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) |
| result = backprop.aggregate_indexed_slices_gradients([t]) |
| self._assert_indexed_slices_equal(t, result) |
| |
| def testMultipleGradients(self): |
| t0 = math_ops._as_indexed_slices( |
| constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) |
| t1 = math_ops._as_indexed_slices( |
| constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) |
| total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) |
| result = backprop.aggregate_indexed_slices_gradients([t0, t1]) |
| self._assert_indexed_slices_equal(total, result) |
| |
| def testMultipleGradientsWithNones(self): |
| t0 = math_ops._as_indexed_slices( |
| constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) |
| t1 = math_ops._as_indexed_slices( |
| constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) |
| t3 = None |
| total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) |
| result = backprop.aggregate_indexed_slices_gradients([t0, t1, t3]) |
| self._assert_indexed_slices_equal(total, result) |
| |
| def testMixedTensorAndIndexedSlices(self): |
| t0 = math_ops._as_indexed_slices( |
| constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) |
| t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]]) |
| total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) |
| result = backprop.aggregate_indexed_slices_gradients([t0, t1]) |
| self._assert_indexed_slices_equal(total, result) |
| |
| |
| if __name__ == '__main__': |
| test.main() |