torch/nn/utils/clip_grad.py - platform/external/pytorch - Git at Google

 import warnings
 import torch
 from torch._six import inf
 from typing import Union, Iterable

 _tensor_or_tensors = Union[torch.Tensor, Iterable[torch.Tensor]]


 def clip_grad_norm_(parameters: _tensor_or_tensors, max_norm: float, norm_type: float = 2.0) -> torch.Tensor:
     r"""Clips gradient norm of an iterable of parameters.

     The norm is computed over all gradients together, as if they were
     concatenated into a single vector. Gradients are modified in-place.

     Args:
         parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
             single Tensor that will have gradients normalized
         max_norm (float or int): max norm of the gradients
         norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
             infinity norm.

     Returns:
         Total norm of the parameters (viewed as a single vector).
     """
     if isinstance(parameters, torch.Tensor):
         parameters = [parameters]
     parameters = [p for p in parameters if p.grad is not None]
     max_norm = float(max_norm)
     norm_type = float(norm_type)
     if len(parameters) == 0:
         return torch.tensor(0.)
     device = parameters[0].grad.device
     if norm_type == inf:
         total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
     else:
         total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
     clip_coef = max_norm / (total_norm + 1e-6)
     if clip_coef < 1:
         for p in parameters:
             p.grad.detach().mul_(clip_coef.to(p.grad.device))
     return total_norm


 def clip_grad_norm(parameters: _tensor_or_tensors, max_norm: float, norm_type: float = 2.) -> torch.Tensor:
     r"""Clips gradient norm of an iterable of parameters.

     .. warning::
         This method is now deprecated in favor of
         :func:`torch.nn.utils.clip_grad_norm_`.
     """
     warnings.warn("torch.nn.utils.clip_grad_norm is now deprecated in favor "
                   "of torch.nn.utils.clip_grad_norm_.", stacklevel=2)
     return clip_grad_norm_(parameters, max_norm, norm_type)


 def clip_grad_value_(parameters: _tensor_or_tensors, clip_value: float) -> None:
     r"""Clips gradient of an iterable of parameters at specified value.

     Gradients are modified in-place.

     Args:
         parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
             single Tensor that will have gradients normalized
         clip_value (float or int): maximum allowed value of the gradients.
             The gradients are clipped in the range
             :math:`\left[\text{-clip\_value}, \text{clip\_value}\right]`
     """
     if isinstance(parameters, torch.Tensor):
         parameters = [parameters]
     clip_value = float(clip_value)
     for p in filter(lambda p: p.grad is not None, parameters):
         p.grad.data.clamp_(min=-clip_value, max=clip_value)
	import warnings
	import torch
	from torch._six import inf
	from typing import Union, Iterable

	_tensor_or_tensors = Union[torch.Tensor, Iterable[torch.Tensor]]


	def clip_grad_norm_(parameters: _tensor_or_tensors, max_norm: float, norm_type: float = 2.0) -> torch.Tensor:
	r"""Clips gradient norm of an iterable of parameters.

	The norm is computed over all gradients together, as if they were
	concatenated into a single vector. Gradients are modified in-place.

	Args:
	parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
	single Tensor that will have gradients normalized
	max_norm (float or int): max norm of the gradients
	norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
	infinity norm.

	Returns:
	Total norm of the parameters (viewed as a single vector).
	"""
	if isinstance(parameters, torch.Tensor):
	parameters = [parameters]
	parameters = [p for p in parameters if p.grad is not None]
	max_norm = float(max_norm)
	norm_type = float(norm_type)
	if len(parameters) == 0:
	return torch.tensor(0.)
	device = parameters[0].grad.device
	if norm_type == inf:
	total_norm = max(p.grad.detach().abs().max().to(device) for p in parameters)
	else:
	total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]), norm_type)
	clip_coef = max_norm / (total_norm + 1e-6)
	if clip_coef < 1:
	for p in parameters:
	p.grad.detach().mul_(clip_coef.to(p.grad.device))
	return total_norm


	def clip_grad_norm(parameters: _tensor_or_tensors, max_norm: float, norm_type: float = 2.) -> torch.Tensor:
	r"""Clips gradient norm of an iterable of parameters.

	.. warning::
	This method is now deprecated in favor of
	:func:`torch.nn.utils.clip_grad_norm_`.
	"""
	warnings.warn("torch.nn.utils.clip_grad_norm is now deprecated in favor "
	"of torch.nn.utils.clip_grad_norm_.", stacklevel=2)
	return clip_grad_norm_(parameters, max_norm, norm_type)


	def clip_grad_value_(parameters: _tensor_or_tensors, clip_value: float) -> None:
	r"""Clips gradient of an iterable of parameters at specified value.

	Gradients are modified in-place.

	Args:
	parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
	single Tensor that will have gradients normalized
	clip_value (float or int): maximum allowed value of the gradients.
	The gradients are clipped in the range
	:math:`\left[\text{-clip\_value}, \text{clip\_value}\right]`
	"""
	if isinstance(parameters, torch.Tensor):
	parameters = [parameters]
	clip_value = float(clip_value)
	for p in filter(lambda p: p.grad is not None, parameters):
	p.grad.data.clamp_(min=-clip_value, max=clip_value)