pycaffe2/gradient_checker.py - platform/external/pytorch - Git at Google

 import numpy as np
 from pycaffe2 import core, workspace
 from caffe2.proto import caffe2_pb2

 class GradientChecker:
   """A gradient checker in Python.

   This is not the most efficient way to check gradients, as the Python interface
   will involve a lot of copy back and forth operations. Use at your own risk.
   """
   def __init__(self, stepsize, threshold,
                device_option=caffe2_pb2.DeviceOption(),
                workspace_name="gradient_check"):
     self._stepsize = stepsize
     self._threshold = threshold
     self._device_option = device_option
     self._workspace_name = workspace_name

   def GetLossAndGrad(self, op, grad_ops, x, input_name, outputs_with_grads):
     # First, feed in the current input. Note that we are not changing anything
     # else, so we don't need to feed in others.
     workspace.FeedBlob(input_name, x, self._device_option)
     # Run.
     workspace.RunOperatorOnce(op)
     loss = 0.
     # Get Loss and feed in the gradients, run gradient ops.
     for idx in outputs_with_grads:
       name = op.output[idx]
       arr = workspace.FetchBlob(name)
       loss += (arr ** 2).sum()
       workspace.FeedBlob(core.GetGradientName(name), arr, self._device_option)
     loss /= 2.
     # Run gradient ops
     workspace.RunOperatorsOnce(grad_ops)
     # Get gradients
     grad = workspace.FetchBlob(core.GetGradientName(input_name))
     return loss, grad


   def CheckSimple(self, op, inputs, input_to_check,
                   outputs_with_grads, grad_ops=None):
     """Checks the operator in a very simple fashion by stacking a sum of squares
     on the top.

     Inputs:
       op: the operator to be checked.
       inputs: the input data in numpy arrays.
       input_to_check: an index specifying which input blob we should
           check.
       outputs_with_grads: indices specifying which output blobs will we
           need to check gradients with. For these outputs, we will collect a
           squared sum and also feed in their gradients.
       grad_operator: the gradient operator. If not given, we will get the
           gradient operator from the gradient registry.
     Outputs:
       boolean: True if it passes, False if it does not pass.
     """
     # Entering the checker workspace
     old_ws_name = workspace.CurrentWorkspace()
     if self._workspace_name != old_ws_name:
       workspace.SwitchWorkspace(self._workspace_name, True)

     op.device_option.CopyFrom(self._device_option)
     if grad_ops is None:
       grad_ops = core.GradientRegistry.GetGradient(op)

     dims_to_check = inputs[input_to_check].size
     # First, feed in the input.
     for i, arr in enumerate(inputs):
       workspace.FeedBlob(op.input[i], arr, self._device_option)

     # Get the loss and gradient for the original.
     input_name = op.input[input_to_check]
     loss, grad = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check],
                                      input_name, outputs_with_grads)
     grad_estimate = np.zeros_like(inputs[input_to_check])
     for current_dim in range(dims_to_check):
       # Positive gradient
       inputs[input_to_check].flat[current_dim] += self._stepsize
       pos_loss, _ = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check],
                                      input_name, outputs_with_grads)
       # Negative gradient
       inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
       neg_loss, _ = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check],
                                      input_name, outputs_with_grads)
       # Recover the value
       inputs[input_to_check].flat[current_dim] += self._stepsize
       grad_estimate.flat[current_dim] = (pos_loss - neg_loss) / self._stepsize / 2
     # Now, check correctness
     scale = np.maximum(np.maximum(np.abs(grad), np.abs(grad_estimate)), 1)
     fail_mat = (np.abs(grad - grad_estimate) > scale * self._threshold)
     if np.any(fail_mat):
       idx = np.flatnonzero(fail_mat)
       #print 'Failed. [idx, grad, grad_estimate] are:'
       #print np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T
       ret = False
     else:
       ret = True
     # After finishing, cleaning up things.
     if self._workspace_name != old_ws_name:
       # We reset the workspace to make sure everything intermediate is cleaned
       # up. Note that there is no need to delete a workspace - when empty it
       # takes a very limited amount of memory.
       workspace.ResetWorkspace()
       workspace.SwitchWorkspace(old_ws_name)
     return ret, grad, grad_estimate
	import numpy as np
	from pycaffe2 import core, workspace
	from caffe2.proto import caffe2_pb2

	class GradientChecker:
	"""A gradient checker in Python.

	This is not the most efficient way to check gradients, as the Python interface
	will involve a lot of copy back and forth operations. Use at your own risk.
	"""
	def __init__(self, stepsize, threshold,
	device_option=caffe2_pb2.DeviceOption(),
	workspace_name="gradient_check"):
	self._stepsize = stepsize
	self._threshold = threshold
	self._device_option = device_option
	self._workspace_name = workspace_name

	def GetLossAndGrad(self, op, grad_ops, x, input_name, outputs_with_grads):
	# First, feed in the current input. Note that we are not changing anything
	# else, so we don't need to feed in others.
	workspace.FeedBlob(input_name, x, self._device_option)
	# Run.
	workspace.RunOperatorOnce(op)
	loss = 0.
	# Get Loss and feed in the gradients, run gradient ops.
	for idx in outputs_with_grads:
	name = op.output[idx]
	arr = workspace.FetchBlob(name)
	loss += (arr ** 2).sum()
	workspace.FeedBlob(core.GetGradientName(name), arr, self._device_option)
	loss /= 2.
	# Run gradient ops
	workspace.RunOperatorsOnce(grad_ops)
	# Get gradients
	grad = workspace.FetchBlob(core.GetGradientName(input_name))
	return loss, grad


	def CheckSimple(self, op, inputs, input_to_check,
	outputs_with_grads, grad_ops=None):
	"""Checks the operator in a very simple fashion by stacking a sum of squares
	on the top.

	Inputs:
	op: the operator to be checked.
	inputs: the input data in numpy arrays.
	input_to_check: an index specifying which input blob we should
	check.
	outputs_with_grads: indices specifying which output blobs will we
	need to check gradients with. For these outputs, we will collect a
	squared sum and also feed in their gradients.
	grad_operator: the gradient operator. If not given, we will get the
	gradient operator from the gradient registry.
	Outputs:
	boolean: True if it passes, False if it does not pass.
	"""
	# Entering the checker workspace
	old_ws_name = workspace.CurrentWorkspace()
	if self._workspace_name != old_ws_name:
	workspace.SwitchWorkspace(self._workspace_name, True)

	op.device_option.CopyFrom(self._device_option)
	if grad_ops is None:
	grad_ops = core.GradientRegistry.GetGradient(op)

	dims_to_check = inputs[input_to_check].size
	# First, feed in the input.
	for i, arr in enumerate(inputs):
	workspace.FeedBlob(op.input[i], arr, self._device_option)

	# Get the loss and gradient for the original.
	input_name = op.input[input_to_check]
	loss, grad = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check],
	input_name, outputs_with_grads)
	grad_estimate = np.zeros_like(inputs[input_to_check])
	for current_dim in range(dims_to_check):
	# Positive gradient
	inputs[input_to_check].flat[current_dim] += self._stepsize
	pos_loss, _ = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check],
	input_name, outputs_with_grads)
	# Negative gradient
	inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
	neg_loss, _ = self.GetLossAndGrad(op, grad_ops, inputs[input_to_check],
	input_name, outputs_with_grads)
	# Recover the value
	inputs[input_to_check].flat[current_dim] += self._stepsize
	grad_estimate.flat[current_dim] = (pos_loss - neg_loss) / self._stepsize / 2
	# Now, check correctness
	scale = np.maximum(np.maximum(np.abs(grad), np.abs(grad_estimate)), 1)
	fail_mat = (np.abs(grad - grad_estimate) > scale * self._threshold)
	if np.any(fail_mat):
	idx = np.flatnonzero(fail_mat)
	#print 'Failed. [idx, grad, grad_estimate] are:'
	#print np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T
	ret = False
	else:
	ret = True
	# After finishing, cleaning up things.
	if self._workspace_name != old_ws_name:
	# We reset the workspace to make sure everything intermediate is cleaned
	# up. Note that there is no need to delete a workspace - when empty it
	# takes a very limited amount of memory.
	workspace.ResetWorkspace()
	workspace.SwitchWorkspace(old_ws_name)
	return ret, grad, grad_estimate