blob: 69eb32c391f34a4d107fe6984e411fb6f6a974fb [file] [log] [blame]
import torch
from torch.autograd import Variable
from collections import Iterable
import sys
def iter_variables(x):
if isinstance(x, Variable):
if x.requires_grad:
yield (x.grad.data, x.data) if x.grad is not None else (None, None)
elif isinstance(x, Iterable):
for elem in x:
for result in iter_variables(elem):
yield result
def zero_gradients(x):
if isinstance(x, Variable):
if x.grad is not None:
x.grad.detach_()
x.grad.data.zero_()
elif isinstance(x, Iterable):
for elem in x:
zero_gradients(elem)
def make_jacobian(input, num_out):
if isinstance(input, Variable) and not input.requires_grad:
return None
elif torch.is_tensor(input) or isinstance(input, Variable):
return torch.zeros(input.nelement(), num_out)
elif isinstance(input, Iterable):
jacobians = list(filter(
lambda x: x is not None, (make_jacobian(elem, num_out) for elem in input)))
if not jacobians:
return None
return type(input)(jacobians)
else:
return None
def iter_tensors(x, only_requiring_grad=False):
if torch.is_tensor(x):
yield x
elif isinstance(x, Variable):
if x.requires_grad or not only_requiring_grad:
yield x.data
elif isinstance(x, Iterable):
for elem in x:
for result in iter_tensors(elem, only_requiring_grad):
yield result
def contiguous(input):
if torch.is_tensor(input):
return input.contiguous()
elif isinstance(input, Variable):
return input.contiguous()
elif isinstance(input, Iterable):
return type(input)(contiguous(e) for e in input)
return input
def get_numerical_jacobian(fn, input, target, eps=1e-3):
# To be able to use .view(-1) input must be contiguous
input = contiguous(input)
target = contiguous(target)
output_size = fn(input).numel()
jacobian = make_jacobian(target, output_size)
# It's much easier to iterate over flattened lists of tensors.
# These are reference to the same objects in jacobian, so any changes
# will be reflected in it as well.
x_tensors = [t for t in iter_tensors(target, True)]
j_tensors = [t for t in iter_tensors(jacobian)]
outa = torch.DoubleTensor(output_size)
outb = torch.DoubleTensor(output_size)
# TODO: compare structure
for x_tensor, d_tensor in zip(x_tensors, j_tensors):
flat_tensor = x_tensor.view(-1)
for i in range(flat_tensor.nelement()):
orig = flat_tensor[i]
flat_tensor[i] = orig - eps
outa.copy_(fn(input), broadcast=False)
flat_tensor[i] = orig + eps
outb.copy_(fn(input), broadcast=False)
flat_tensor[i] = orig
outb.add_(-1, outa).div_(2 * eps)
d_tensor[i] = outb
return jacobian
def get_analytical_jacobian(input, output):
input = contiguous(input)
jacobian = make_jacobian(input, output.numel())
jacobian_reentrant = make_jacobian(input, output.numel())
grad_output = output.data.clone().zero_()
flat_grad_output = grad_output.view(-1)
reentrant = True
correct_grad_sizes = True
for i in range(flat_grad_output.numel()):
flat_grad_output.zero_()
flat_grad_output[i] = 1
for jacobian_c in (jacobian, jacobian_reentrant):
zero_gradients(input)
output.backward(grad_output, create_graph=True)
for jacobian_x, (d_x, x) in zip(jacobian_c, iter_variables(input)):
if d_x is None:
jacobian_x[:, i].zero_()
else:
if d_x.size() != x.size():
correct_grad_sizes = False
jacobian_x[:, i] = d_x.to_dense() if d_x.is_sparse else d_x
for jacobian_x, jacobian_reentrant_x in zip(jacobian, jacobian_reentrant):
if (jacobian_x - jacobian_reentrant_x).abs().max() != 0:
reentrant = False
return jacobian, reentrant, correct_grad_sizes
def _as_tuple(x):
if isinstance(x, tuple):
return x
elif isinstance(x, list):
return tuple(x)
else:
return x,
def _differentiable_outputs(x):
return tuple(o for o in _as_tuple(x) if o.requires_grad or o.grad_fn is not None)
def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True):
"""Check gradients computed via small finite differences
against analytical gradients
The check between numerical and analytical has the same behaviour as
numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
meaning it check that
absolute(a - n) <= (atol + rtol * absolute(n))
is true for all elements of analytical jacobian a and numerical jacobian n.
Args:
func: Python function that takes Variable inputs and returns
a tuple of Variables
inputs: tuple of Variables
eps: perturbation for finite differences
atol: absolute tolerance
rtol: relative tolerance
raise_exception: bool indicating whether to raise an exception if
gradcheck fails. The exception gives more information about the
exact nature of the failure. This is helpful when debugging gradchecks.
Returns:
True if all differences satisfy allclose condition
"""
output = _differentiable_outputs(func(*inputs))
def fail_test(msg):
if raise_exception:
raise RuntimeError(msg)
return False
for i, o in enumerate(output):
if not o.requires_grad:
continue
def fn(input):
return _as_tuple(func(*input))[i].data
analytical, reentrant, correct_grad_sizes = get_analytical_jacobian(_as_tuple(inputs), o)
numerical = get_numerical_jacobian(fn, inputs, inputs, eps)
for j, (a, n) in enumerate(zip(analytical, numerical)):
if not ((a - n).abs() <= (atol + rtol * n.abs())).all():
return fail_test('for output no. %d,\n numerical:%s\nanalytical:%s\n' % (j, numerical, analytical))
if not reentrant:
return fail_test('not reentrant')
if not correct_grad_sizes:
return fail_test('not correct_grad_sizes')
# check if the backward multiplies by grad_output
zero_gradients(inputs)
output = _differentiable_outputs(func(*inputs))
if any([o.requires_grad for o in output]):
torch.autograd.backward(output, [o.data.new(o.size()).zero_() for o in output], create_graph=True)
var_inputs = list(filter(lambda i: isinstance(i, Variable), inputs))
if not var_inputs:
raise RuntimeError("no Variables found in input")
for i in var_inputs:
if i.grad is None:
continue
if not i.grad.data.eq(0).all():
return fail_test('backward not multiplied by grad_output')
return True
def gradgradcheck(func, inputs, grad_outputs=None, eps=1e-6, atol=1e-5, rtol=1e-3):
"""Check gradients of gradients computed via small finite differences
against analytical gradients
This function checks that backpropagating through the gradients computed
to the given grad_outputs are correct.
The check between numerical and analytical has the same behaviour as
numpy.allclose https://docs.scipy.org/doc/numpy/reference/generated/numpy.allclose.html
meaning it check that
absolute(a - n) <= (atol + rtol * absolute(n))
is true for all elements of analytical gradient a and numerical gradient n.
Args:
func (function): Python function that takes Variable inputs and returns
a tuple of Variables
inputs (tuple of Variable): inputs to the function
grad_outputs (tuple of Variable, optional): The gradients with respect to
the function's outputs.
eps (float, optional): perturbation for finite differences
atol (float, optional): absolute tolerance
rtol (float, optional): relative tolerance
Returns:
True if all differences satisfy allclose condition. Raises an exception
otherwise.
"""
if grad_outputs is None:
# If grad_outputs is not specified, create random variables of the same
# shape, type, and device as the outputs
def randn_like(x):
return Variable(x.data.new(x.size()).normal_(), requires_grad=True)
outputs = _as_tuple(func(*inputs))
grad_outputs = [randn_like(x) for x in outputs]
def new_func(*input_args):
input_args = input_args[:-len(grad_outputs)]
outputs = _differentiable_outputs(func(*input_args))
input_args = tuple(x for x in input_args if isinstance(x, Variable) and x.requires_grad)
grad_inputs = torch.autograd.grad(outputs, input_args, grad_outputs)
return grad_inputs
return gradcheck(new_func, inputs + grad_outputs, eps, atol, rtol)