torch/nn/modules/normalization.py - platform/external/pytorch - Git at Google

 import torch
 from torch.nn.parameter import Parameter
 from .module import Module
 from .batchnorm import _BatchNorm
 from .. import functional as F


 class LocalResponseNorm(Module):
     r"""Applies local response normalization over an input signal composed
     of several input planes, where channels occupy the second dimension.
     Applies normalization across channels.

     .. math::
         b_{c} = a_{c}\left(k + \frac{\alpha}{n}
         \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

     Args:
         size: amount of neighbouring channels used for normalization
         alpha: multiplicative factor. Default: 0.0001
         beta: exponent. Default: 0.75
         k: additive factor. Default: 1

     Shape:
         - Input: :math:`(N, C, ...)`
         - Output: :math:`(N, C, ...)` (same shape as input)
     Examples:
         >>> lrn = nn.LocalResponseNorm(2)
         >>> signal_2d = autograd.Variable(torch.randn(32, 5, 24, 24))
         >>> signal_4d = autograd.Variable(torch.randn(16, 5, 7, 7, 7, 7))
         >>> output_2d = lrn(signal_2d)
         >>> output_4d = lrn(signal_4d)
     """

     def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
         super(LocalResponseNorm, self).__init__()
         self.size = size
         self.alpha = alpha
         self.beta = beta
         self.k = k

     def forward(self, input):
         return F.local_response_norm(input, self.size, self.alpha, self.beta,
                                      self.k)

     def __repr__(self):
         return self.__class__.__name__ + '(' \
             + str(self.size) \
             + ', alpha=' + str(self.alpha) \
             + ', beta=' + str(self.beta) \
             + ', k=' + str(self.k) + ')'


 class CrossMapLRN2d(Module):

     def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
         super(CrossMapLRN2d, self).__init__()
         self.size = size
         self.alpha = alpha
         self.beta = beta
         self.k = k

     def forward(self, input):
         return self._backend.CrossMapLRN2d(self.size, self.alpha, self.beta,
                                            self.k)(input)

     def __repr__(self):
         return self.__class__.__name__ + '(' \
             + str(self.size) \
             + ', alpha=' + str(self.alpha) \
             + ', beta=' + str(self.beta) \
             + ', k=' + str(self.k) + ')'


 class LayerNorm(Module):
     r"""Applies Layer Normalization over a mini-batch of inputs as described in
     the paper `Layer Normalization`_ .

     .. math::
         y = \frac{x - mean[x]}{ \sqrt{Var[x]} + \epsilon} * gamma + beta

     The mean and standard-deviation are calculated separately over the last
     certain number dimensions with shape specified by :attr:`normalized_shape`.
     Gamma and beta are learnable parameters of :attr:`normalized_shape` if
     :attr:`elementwise_affine` is ``True``.

     .. note::
         Unlike Batch Normalization and Instance Normalization, which applies
         scalar scale and bias for each entire channel/plane with the
         :attr:`affine` option, Layer Normalization applies per-element scale and
         bias with :attr:`elementwise_affine`.

     By default, this layer uses statistics computed from input data in both
     training and evaluation modes.

     If :attr:`track_running_stats` is set to ``True``, during training this
     layer keeps running estimates of its computed mean and variance, which are
     then used for normalization during evaluation. The running estimates are
     kept with a default :attr:`momentum` of 0.1.

     .. note::
         This :attr:`momentum` argument is different from one used in optimizer
         classes and the conventional notion of momentum. Mathematically, the
         update rule for running statistics here is
         :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x}_\text{new} + \text{momemtum} \times x_t`,
         where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
         new observed value.

     Args:
         normalized_shape (list or torch.Size): input shape from an expected input of size
             `[* x normalized_shape[0] x normalized_shape[1] x ... x normalized_shape[-1]]`
         eps: a value added to the denominator for numerical stability. Default: 1e-5
         momentum: the value used for the running_mean and running_var computation. Default: 0.1
         elementwise_affine: a boolean value that when set to ``True``, this module
             has learnable per-element affine parameters. Default: ``True``
         track_running_stats: a boolean value that when set to ``True``, this
             module tracks the running mean and variance, and when set to ``False``,
             this module does not track such statistics and always uses batch
             statistics in both training and eval modes. Default: ``False``

     Shape:
         - Input: :math:`(N, *)`
         - Output: :math:`(N, *)` (same shape as input)

     Examples:
         >>> input = autograd.Variable(torch.randn(20, 5, 10, 10))
         >>> # With Learnable Parameters
         >>> m = nn.LayerNorm(input.size()[1:])
         >>> # Without Learnable Parameters
         >>> m = nn.LayerNorm(input.size()[1:], elementwise_affine=False)
         >>> output = m(input)

     .. _`Layer Normalization`: https://arxiv.org/abs/1607.06450
     """
     def __init__(self, normalized_shape, eps=1e-5, momentum=0.1,
                  elementwise_affine=True, track_running_stats=False):
         super(LayerNorm, self).__init__()
         self.normalized_shape = torch.Size(normalized_shape)
         self.eps = eps
         self.momentum = momentum
         self.elementwise_affine = elementwise_affine
         self.track_running_stats = track_running_stats
         if self.elementwise_affine:
             self.weight = Parameter(torch.Tensor(*normalized_shape))
             self.bias = Parameter(torch.Tensor(*normalized_shape))
         else:
             self.register_parameter('weight', None)
             self.register_parameter('bias', None)
         if self.track_running_stats:
             self.register_buffer('running_mean', torch.zeros(1))
             self.register_buffer('running_var', torch.ones(1))
         else:
             self.register_parameter('running_mean', None)
             self.register_parameter('running_var', None)
         self.reset_parameters()

     def reset_parameters(self):
         if self.track_running_stats:
             self.running_mean.zero_()
             self.running_var.fill_(1)
         if self.elementwise_affine:
             self.weight.data.uniform_()
             self.bias.data.zero_()

     def forward(self, input):
         return F.layer_norm(
             input, self.normalized_shape, self.running_mean, self.running_var,
             self.weight, self.bias, self.training or not self.track_running_stats,
             self.momentum, self.eps)

     def __repr__(self):
         return ('{name}({normalized_shape}, eps={eps}, momentum={momentum},'
                 ' elementwise_affine={elementwise_affine},'
                 ' track_running_stats={track_running_stats})'
                 .format(name=self.__class__.__name__, **self.__dict__))


 # TODO: ContrastiveNorm2d
 # TODO: DivisiveNorm2d
 # TODO: SubtractiveNorm2d
	import torch
	from torch.nn.parameter import Parameter
	from .module import Module
	from .batchnorm import _BatchNorm
	from .. import functional as F


	class LocalResponseNorm(Module):
	r"""Applies local response normalization over an input signal composed
	of several input planes, where channels occupy the second dimension.
	Applies normalization across channels.

	.. math::
	b_{c} = a_{c}\left(k + \frac{\alpha}{n}
	\sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

	Args:
	size: amount of neighbouring channels used for normalization
	alpha: multiplicative factor. Default: 0.0001
	beta: exponent. Default: 0.75
	k: additive factor. Default: 1

	Shape:
	- Input: :math:`(N, C, ...)`
	- Output: :math:`(N, C, ...)` (same shape as input)
	Examples:
	>>> lrn = nn.LocalResponseNorm(2)
	>>> signal_2d = autograd.Variable(torch.randn(32, 5, 24, 24))
	>>> signal_4d = autograd.Variable(torch.randn(16, 5, 7, 7, 7, 7))
	>>> output_2d = lrn(signal_2d)
	>>> output_4d = lrn(signal_4d)
	"""

	def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
	super(LocalResponseNorm, self).__init__()
	self.size = size
	self.alpha = alpha
	self.beta = beta
	self.k = k

	def forward(self, input):
	return F.local_response_norm(input, self.size, self.alpha, self.beta,
	self.k)

	def __repr__(self):
	return self.__class__.__name__ + '(' \
	+ str(self.size) \
	+ ', alpha=' + str(self.alpha) \
	+ ', beta=' + str(self.beta) \
	+ ', k=' + str(self.k) + ')'


	class CrossMapLRN2d(Module):

	def __init__(self, size, alpha=1e-4, beta=0.75, k=1):
	super(CrossMapLRN2d, self).__init__()
	self.size = size
	self.alpha = alpha
	self.beta = beta
	self.k = k

	def forward(self, input):
	return self._backend.CrossMapLRN2d(self.size, self.alpha, self.beta,
	self.k)(input)

	def __repr__(self):
	return self.__class__.__name__ + '(' \
	+ str(self.size) \
	+ ', alpha=' + str(self.alpha) \
	+ ', beta=' + str(self.beta) \
	+ ', k=' + str(self.k) + ')'


	class LayerNorm(Module):
	r"""Applies Layer Normalization over a mini-batch of inputs as described in
	the paper `Layer Normalization`_ .

	.. math::
	y = \frac{x - mean[x]}{ \sqrt{Var[x]} + \epsilon} * gamma + beta

	The mean and standard-deviation are calculated separately over the last
	certain number dimensions with shape specified by :attr:`normalized_shape`.
	Gamma and beta are learnable parameters of :attr:`normalized_shape` if
	:attr:`elementwise_affine` is ``True``.

	.. note::
	Unlike Batch Normalization and Instance Normalization, which applies
	scalar scale and bias for each entire channel/plane with the
	:attr:`affine` option, Layer Normalization applies per-element scale and
	bias with :attr:`elementwise_affine`.

	By default, this layer uses statistics computed from input data in both
	training and evaluation modes.

	If :attr:`track_running_stats` is set to ``True``, during training this
	layer keeps running estimates of its computed mean and variance, which are
	then used for normalization during evaluation. The running estimates are
	kept with a default :attr:`momentum` of 0.1.

	.. note::
	This :attr:`momentum` argument is different from one used in optimizer
	classes and the conventional notion of momentum. Mathematically, the
	update rule for running statistics here is
	:math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x}_\text{new} + \text{momemtum} \times x_t`,
	where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
	new observed value.

	Args:
	normalized_shape (list or torch.Size): input shape from an expected input of size
	`[* x normalized_shape[0] x normalized_shape[1] x ... x normalized_shape[-1]]`
	eps: a value added to the denominator for numerical stability. Default: 1e-5
	momentum: the value used for the running_mean and running_var computation. Default: 0.1
	elementwise_affine: a boolean value that when set to ``True``, this module
	has learnable per-element affine parameters. Default: ``True``
	track_running_stats: a boolean value that when set to ``True``, this
	module tracks the running mean and variance, and when set to ``False``,
	this module does not track such statistics and always uses batch
	statistics in both training and eval modes. Default: ``False``

	Shape:
	- Input: :math:`(N, *)`
	- Output: :math:`(N, *)` (same shape as input)

	Examples:
	>>> input = autograd.Variable(torch.randn(20, 5, 10, 10))
	>>> # With Learnable Parameters
	>>> m = nn.LayerNorm(input.size()[1:])
	>>> # Without Learnable Parameters
	>>> m = nn.LayerNorm(input.size()[1:], elementwise_affine=False)
	>>> output = m(input)

	.. _`Layer Normalization`: https://arxiv.org/abs/1607.06450
	"""
	def __init__(self, normalized_shape, eps=1e-5, momentum=0.1,
	elementwise_affine=True, track_running_stats=False):
	super(LayerNorm, self).__init__()
	self.normalized_shape = torch.Size(normalized_shape)
	self.eps = eps
	self.momentum = momentum
	self.elementwise_affine = elementwise_affine
	self.track_running_stats = track_running_stats
	if self.elementwise_affine:
	self.weight = Parameter(torch.Tensor(*normalized_shape))
	self.bias = Parameter(torch.Tensor(*normalized_shape))
	else:
	self.register_parameter('weight', None)
	self.register_parameter('bias', None)
	if self.track_running_stats:
	self.register_buffer('running_mean', torch.zeros(1))
	self.register_buffer('running_var', torch.ones(1))
	else:
	self.register_parameter('running_mean', None)
	self.register_parameter('running_var', None)
	self.reset_parameters()

	def reset_parameters(self):
	if self.track_running_stats:
	self.running_mean.zero_()
	self.running_var.fill_(1)
	if self.elementwise_affine:
	self.weight.data.uniform_()
	self.bias.data.zero_()

	def forward(self, input):
	return F.layer_norm(
	input, self.normalized_shape, self.running_mean, self.running_var,
	self.weight, self.bias, self.training or not self.track_running_stats,
	self.momentum, self.eps)

	def __repr__(self):
	return ('{name}({normalized_shape}, eps={eps}, momentum={momentum},'
	' elementwise_affine={elementwise_affine},'
	' track_running_stats={track_running_stats})'
	.format(name=self.__class__.__name__, **self.__dict__))


	# TODO: ContrastiveNorm2d
	# TODO: DivisiveNorm2d
	# TODO: SubtractiveNorm2d