caffe2/python/gradient_checker.py - platform/external/pytorch - Git at Google

 import numpy as np
 from caffe2.python import core, workspace
 from caffe2.proto import caffe2_pb2


 class GradientChecker:
     """A gradient checker in Python.

     This is not the most efficient way to check gradients, as the Python
     interface will involve a lot of copy back and forth operations. Use at your
     own risk.
     """

     def __init__(
         self,
         stepsize,
         threshold,
         device_option=caffe2_pb2.DeviceOption(),
         workspace_name="gradient_check"
     ):
         self._stepsize = stepsize
         self._threshold = threshold
         self._device_option = device_option
         self._workspace_name = workspace_name

     def GetLossAndGrad(
         self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
     ):
         # First, feed in the current input. Note that we are not changing
         # anything else, so we don't need to feed in others.
         workspace.FeedBlob(input_name, x, self._device_option)
         # Run.
         workspace.RunOperatorOnce(op)
         loss = 0.
         # Get Loss and feed in the gradients, run gradient ops.
         for idx in outputs_with_grads:
             name = op.output[idx]
             arr = workspace.FetchBlob(name)
             loss += (arr**2).sum()
             workspace.FeedBlob(name + '_grad', arr, self._device_option)
         loss /= 2.
         # Run gradient ops
         workspace.RunOperatorsOnce(grad_ops)
         # Get gradients
         if isinstance(grad_name, core.GradientSlice):
             workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
             workspace.FeedBlob('one', np.ones(1, dtype=np.float32))
             sparse_to_dense_op = core.CreateOperator(
                 'ScatterWeightedSum',
                 ['zeros', 'one', grad_name.indices, grad_name.values, 'one'],
                 'zeros')
             workspace.RunOperatorOnce(sparse_to_dense_op)
             grad = workspace.FetchBlob('zeros')
         else:
             grad = workspace.FetchBlob(grad_name)
         return loss, grad

     def CheckSimple(
         self,
         op,
         inputs,
         input_to_check,
         outputs_with_grads,
         grad_ops=None,
         input_device_options=None
     ):
         """Checks the operator in a very simple fashion by stacking a sum of
         squares on the top.

         Inputs:
           op: the operator to be checked.
           inputs: the input data in numpy arrays.
           input_to_check: an index specifying which input blob we should
               check.
           outputs_with_grads: indices specifying which output blobs will we
               need to check gradients with. For these outputs, we will collect a
               squared sum and also feed in their gradients.
           grad_operator: the gradient operator. If not given, we will get the
               gradient operator from the gradient registry.
           input_device_options: an optional mapping from input names to
               DeviceOptions (to override the default DeviceOption)
         Outputs:
           boolean: True if it passes, False if it does not pass.
         """
         if input_device_options is None:
             input_device_options = {}
         # Entering the checker workspace
         old_ws_name = workspace.CurrentWorkspace()
         if self._workspace_name != old_ws_name:
             workspace.SwitchWorkspace(self._workspace_name, True)

         op.device_option.CopyFrom(self._device_option)
         if grad_ops is None:
             # TODO(jiayq): use the gradient registration instead of the old
             # hack.
             grad_ops, g_input = core.GradientRegistry.GetGradientForOp(
                 op, [s + '_grad' for s in op.output])

         dims_to_check = inputs[input_to_check].size
         # First, feed in the input.
         for i, arr in enumerate(inputs):
             workspace.FeedBlob(
                 op.input[i], arr,
                 input_device_options.get(
                     op.input[i], self._device_option))

         # Get the loss and gradient for the original.
         input_name = op.input[input_to_check]
         grad_name = g_input[input_to_check]
         loss, grad = self.GetLossAndGrad(
             op, grad_ops, inputs[input_to_check], input_name, grad_name,
             outputs_with_grads
         )
         grad_estimate = np.zeros_like(inputs[input_to_check])
         if grad_estimate.shape != grad.shape:
             raise Exception(
                 "Mismatched gradient shapes: estimated ({}), grad ({})".format(
                     grad_estimate.shape, grad.shape))

         for current_dim in range(dims_to_check):
             # Positive gradient
             inputs[input_to_check].flat[current_dim] += self._stepsize
             pos_loss, _ = self.GetLossAndGrad(
                 op, grad_ops, inputs[input_to_check], input_name,
                 grad_name, outputs_with_grads
             )
             # Negative gradient
             inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
             neg_loss, _ = self.GetLossAndGrad(
                 op, grad_ops, inputs[input_to_check], input_name,
                 grad_name, outputs_with_grads
             )
             # Recover the value
             inputs[input_to_check].flat[current_dim] += self._stepsize
             grad_estimate.flat[current_dim] = (
                 pos_loss - neg_loss) / self._stepsize / 2
         # Now, check correctness
         fail_mat = ~np.isclose(
             grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
         if np.any(fail_mat):
             idx = np.flatnonzero(fail_mat)
             print('Failed. [idx, grad, grad_estimate] are:')
             print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
             ret = False
         else:
             ret = True
         # After finishing, cleaning up things.
         if self._workspace_name != old_ws_name:
             # We reset the workspace to make sure everything intermediate is
             # cleaned up. Note that there is no need to delete a workspace -
             # when empty it takes a very limited amount of memory.
             workspace.ResetWorkspace()
             workspace.SwitchWorkspace(old_ws_name)
         return ret, grad, grad_estimate
	import numpy as np
	from caffe2.python import core, workspace
	from caffe2.proto import caffe2_pb2


	class GradientChecker:
	"""A gradient checker in Python.

	This is not the most efficient way to check gradients, as the Python
	interface will involve a lot of copy back and forth operations. Use at your
	own risk.
	"""

	def __init__(
	self,
	stepsize,
	threshold,
	device_option=caffe2_pb2.DeviceOption(),
	workspace_name="gradient_check"
	):
	self._stepsize = stepsize
	self._threshold = threshold
	self._device_option = device_option
	self._workspace_name = workspace_name

	def GetLossAndGrad(
	self, op, grad_ops, x, input_name, grad_name, outputs_with_grads
	):
	# First, feed in the current input. Note that we are not changing
	# anything else, so we don't need to feed in others.
	workspace.FeedBlob(input_name, x, self._device_option)
	# Run.
	workspace.RunOperatorOnce(op)
	loss = 0.
	# Get Loss and feed in the gradients, run gradient ops.
	for idx in outputs_with_grads:
	name = op.output[idx]
	arr = workspace.FetchBlob(name)
	loss += (arr**2).sum()
	workspace.FeedBlob(name + '_grad', arr, self._device_option)
	loss /= 2.
	# Run gradient ops
	workspace.RunOperatorsOnce(grad_ops)
	# Get gradients
	if isinstance(grad_name, core.GradientSlice):
	workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
	workspace.FeedBlob('one', np.ones(1, dtype=np.float32))
	sparse_to_dense_op = core.CreateOperator(
	'ScatterWeightedSum',
	['zeros', 'one', grad_name.indices, grad_name.values, 'one'],
	'zeros')
	workspace.RunOperatorOnce(sparse_to_dense_op)
	grad = workspace.FetchBlob('zeros')
	else:
	grad = workspace.FetchBlob(grad_name)
	return loss, grad

	def CheckSimple(
	self,
	op,
	inputs,
	input_to_check,
	outputs_with_grads,
	grad_ops=None,
	input_device_options=None
	):
	"""Checks the operator in a very simple fashion by stacking a sum of
	squares on the top.

	Inputs:
	op: the operator to be checked.
	inputs: the input data in numpy arrays.
	input_to_check: an index specifying which input blob we should
	check.
	outputs_with_grads: indices specifying which output blobs will we
	need to check gradients with. For these outputs, we will collect a
	squared sum and also feed in their gradients.
	grad_operator: the gradient operator. If not given, we will get the
	gradient operator from the gradient registry.
	input_device_options: an optional mapping from input names to
	DeviceOptions (to override the default DeviceOption)
	Outputs:
	boolean: True if it passes, False if it does not pass.
	"""
	if input_device_options is None:
	input_device_options = {}
	# Entering the checker workspace
	old_ws_name = workspace.CurrentWorkspace()
	if self._workspace_name != old_ws_name:
	workspace.SwitchWorkspace(self._workspace_name, True)

	op.device_option.CopyFrom(self._device_option)
	if grad_ops is None:
	# TODO(jiayq): use the gradient registration instead of the old
	# hack.
	grad_ops, g_input = core.GradientRegistry.GetGradientForOp(
	op, [s + '_grad' for s in op.output])

	dims_to_check = inputs[input_to_check].size
	# First, feed in the input.
	for i, arr in enumerate(inputs):
	workspace.FeedBlob(
	op.input[i], arr,
	input_device_options.get(
	op.input[i], self._device_option))

	# Get the loss and gradient for the original.
	input_name = op.input[input_to_check]
	grad_name = g_input[input_to_check]
	loss, grad = self.GetLossAndGrad(
	op, grad_ops, inputs[input_to_check], input_name, grad_name,
	outputs_with_grads
	)
	grad_estimate = np.zeros_like(inputs[input_to_check])
	if grad_estimate.shape != grad.shape:
	raise Exception(
	"Mismatched gradient shapes: estimated ({}), grad ({})".format(
	grad_estimate.shape, grad.shape))

	for current_dim in range(dims_to_check):
	# Positive gradient
	inputs[input_to_check].flat[current_dim] += self._stepsize
	pos_loss, _ = self.GetLossAndGrad(
	op, grad_ops, inputs[input_to_check], input_name,
	grad_name, outputs_with_grads
	)
	# Negative gradient
	inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
	neg_loss, _ = self.GetLossAndGrad(
	op, grad_ops, inputs[input_to_check], input_name,
	grad_name, outputs_with_grads
	)
	# Recover the value
	inputs[input_to_check].flat[current_dim] += self._stepsize
	grad_estimate.flat[current_dim] = (
	pos_loss - neg_loss) / self._stepsize / 2
	# Now, check correctness
	fail_mat = ~np.isclose(
	grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
	if np.any(fail_mat):
	idx = np.flatnonzero(fail_mat)
	print('Failed. [idx, grad, grad_estimate] are:')
	print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
	ret = False
	else:
	ret = True
	# After finishing, cleaning up things.
	if self._workspace_name != old_ws_name:
	# We reset the workspace to make sure everything intermediate is
	# cleaned up. Note that there is no need to delete a workspace -
	# when empty it takes a very limited amount of memory.
	workspace.ResetWorkspace()
	workspace.SwitchWorkspace(old_ws_name)
	return ret, grad, grad_estimate