tensorflow/contrib/distributions/python/ops/vector_student_t.py - platform/external/tensorflow - Git at Google

 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Vector Student's t distribution classes."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 from tensorflow.contrib.distributions.python.ops import bijectors
 from tensorflow.contrib.distributions.python.ops import distribution_util
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops.distributions import student_t
 from tensorflow.python.ops.distributions import transformed_distribution
 from tensorflow.python.util import deprecation


 class _VectorStudentT(transformed_distribution.TransformedDistribution):
   """A vector version of Student's t-distribution on `R^k`.

   #### Mathematical details

   The probability density function (pdf) is,

   ```none
   pdf(x; df, mu, Sigma) = (1 + ||y||**2 / df)**(-0.5 (df + 1)) / Z
   where,
   y = inv(Sigma) (x - mu)
   Z = abs(det(Sigma)) ( sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) )**k
   ```

   where:
   * `loc = mu`; a vector in `R^k`,
   * `scale = Sigma`; a lower-triangular matrix in `R^{k x k}`,
   * `Z` denotes the normalization constant, and,
   * `Gamma` is the [gamma function](
     https://en.wikipedia.org/wiki/Gamma_function), and,
   * `||y||**2` denotes the [squared Euclidean norm](
   https://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm) of `y`.

   The VectorStudentT distribution is a member of the [location-scale family](
   https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be
   constructed as,

   ```none
   X ~ StudentT(df, loc=0, scale=1)
   Y = loc + scale * X
   ```

   Notice that the `scale` matrix has semantics closer to std. deviation than
   covariance (but it is not std. deviation).

   This distribution is an Affine transformation of iid
   [Student's t-distributions](
   https://en.wikipedia.org/wiki/Student%27s_t-distribution)
   and should not be confused with the [Multivariate Student's t-distribution](
   https://en.wikipedia.org/wiki/Multivariate_t-distribution). The
   traditional Multivariate Student's t-distribution is type of
   [elliptical distribution](
   https://en.wikipedia.org/wiki/Elliptical_distribution); it has PDF:

   ```none
   pdf(x; df, mu, Sigma) = (1 + ||y||**2 / df)**(-0.5 (df + k)) / Z
   where,
   y = inv(Sigma) (x - mu)
   Z = abs(det(Sigma)) sqrt(df pi)**k Gamma(0.5 df) / Gamma(0.5 (df + k))
   ```

   Notice that the Multivariate Student's t-distribution uses `k` where the
   Vector Student's t-distribution has a `1`. Conversely the Vector version has a
   broader application of the power-`k` in the normalization constant.

   #### Examples

   A single instance of a "Vector Student's t-distribution" is defined by a mean
   vector of length `k` and a scale matrix of shape `k x k`.

   Extra leading dimensions, if provided, allow for batches.

   ```python
   import tensorflow_probability as tfp
   tfd = tfp.distributions

   # Initialize a single 3-variate vector Student's t-distribution.
   mu = [1., 2, 3]
   chol = [[1., 0, 0.],
           [1, 3, 0],
           [1, 2, 3]]
   vt = tfd.VectorStudentT(df=2, loc=mu, scale_tril=chol)

   # Evaluate this on an observation in R^3, returning a scalar.
   vt.prob([-1., 0, 1])

   # Initialize a batch of two 3-variate vector Student's t-distributions.
   mu = [[1., 2, 3],
         [11, 22, 33]]
   chol = ...  # shape 2 x 3 x 3, lower triangular, positive diagonal.
   vt = tfd.VectorStudentT(loc=mu, scale_tril=chol)

   # Evaluate this on a two observations, each in R^3, returning a length two
   # tensor.
   x = [[-1, 0, 1],
        [-11, 0, 11]]
   vt.prob(x)
   ```

   For more examples of how to construct the `scale` matrix, see the
   `tf.contrib.distributions.bijectors.Affine` docstring.

   """

   @deprecation.deprecated(
       "2018-10-01",
       "The TensorFlow Distributions library has moved to "
       "TensorFlow Probability "
       "(https://github.com/tensorflow/probability). You "
       "should update all references to use `tfp.distributions` "
       "instead of `tf.contrib.distributions`.",
       warn_once=True)
   def __init__(self,
                df,
                loc=None,
                scale_identity_multiplier=None,
                scale_diag=None,
                scale_tril=None,
                scale_perturb_factor=None,
                scale_perturb_diag=None,
                validate_args=False,
                allow_nan_stats=True,
                name="VectorStudentT"):
     """Instantiates the vector Student's t-distributions on `R^k`.

     The `batch_shape` is the broadcast between `df.batch_shape` and
     `Affine.batch_shape` where `Affine` is constructed from `loc` and
     `scale_*` arguments.

     The `event_shape` is the event shape of `Affine.event_shape`.

     Args:
       df: Floating-point `Tensor`. The degrees of freedom of the
         distribution(s). `df` must contain only positive values. Must be
         scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the
         same `batch_shape` implied by `loc`, `scale_*`.
       loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is
         applied.
       scale_identity_multiplier: floating point rank 0 `Tensor` representing a
         scaling done to the identity matrix. When `scale_identity_multiplier =
         scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise
         no scaled-identity-matrix is added to `scale`.
       scale_diag: Floating-point `Tensor` representing the diagonal matrix.
         `scale_diag` has shape [N1, N2, ..., k], which represents a k x k
         diagonal matrix. When `None` no diagonal term is added to `scale`.
       scale_tril: Floating-point `Tensor` representing the diagonal matrix.
         `scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k
         lower triangular matrix. When `None` no `scale_tril` term is added to
         `scale`. The upper triangular elements above the diagonal are ignored.
       scale_perturb_factor: Floating-point `Tensor` representing factor matrix
         with last two dimensions of shape `(k, r)`. When `None`, no rank-r
         update is added to `scale`.
       scale_perturb_diag: Floating-point `Tensor` representing the diagonal
         matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which
         represents an r x r Diagonal matrix. When `None` low rank updates will
         take the form `scale_perturb_factor * scale_perturb_factor.T`.
       validate_args: Python `bool`, default `False`. When `True` distribution
         parameters are checked for validity despite possibly degrading runtime
         performance. When `False` invalid inputs may silently render incorrect
         outputs.
       allow_nan_stats: Python `bool`, default `True`. When `True`,
         statistics (e.g., mean, mode, variance) use the value "`NaN`" to
         indicate the result is undefined. When `False`, an exception is raised
         if one or more of the statistic's batch members are undefined.
       name: Python `str` name prefixed to Ops created by this class.
     """
     parameters = dict(locals())
     graph_parents = [df, loc, scale_identity_multiplier, scale_diag,
                      scale_tril, scale_perturb_factor, scale_perturb_diag]
     with ops.name_scope(name) as name:
       with ops.name_scope("init", values=graph_parents):
         # The shape of the _VectorStudentT distribution is governed by the
         # relationship between df.batch_shape and affine.batch_shape. In
         # pseudocode the basic procedure is:
         #   if df.batch_shape is scalar:
         #     if affine.batch_shape is not scalar:
         #       # broadcast distribution.sample so
         #       # it has affine.batch_shape.
         #     self.batch_shape = affine.batch_shape
         #   else:
         #     if affine.batch_shape is scalar:
         #       # let affine broadcasting do its thing.
         #     self.batch_shape = df.batch_shape
         # All of the above magic is actually handled by TransformedDistribution.
         # Here we really only need to collect the affine.batch_shape and decide
         # what we're going to pass in to TransformedDistribution's
         # (override) batch_shape arg.
         affine = bijectors.Affine(
             shift=loc,
             scale_identity_multiplier=scale_identity_multiplier,
             scale_diag=scale_diag,
             scale_tril=scale_tril,
             scale_perturb_factor=scale_perturb_factor,
             scale_perturb_diag=scale_perturb_diag,
             validate_args=validate_args)
         distribution = student_t.StudentT(
             df=df,
             loc=array_ops.zeros([], dtype=affine.dtype),
             scale=array_ops.ones([], dtype=affine.dtype))
         batch_shape, override_event_shape = (
             distribution_util.shapes_from_loc_and_scale(
                 affine.shift, affine.scale))
         override_batch_shape = distribution_util.pick_vector(
             distribution.is_scalar_batch(),
             batch_shape,
             constant_op.constant([], dtype=dtypes.int32))
         super(_VectorStudentT, self).__init__(
             distribution=distribution,
             bijector=affine,
             batch_shape=override_batch_shape,
             event_shape=override_event_shape,
             validate_args=validate_args,
             name=name)
         self._parameters = parameters

   @property
   def df(self):
     """Degrees of freedom in these Student's t distribution(s)."""
     return self.distribution.df

   @property
   def loc(self):
     """Locations of these Student's t distribution(s)."""
     return self.bijector.shift

   @property
   def scale(self):
     """Dense (batch) covariance matrix, if available."""
     return self.bijector.scale
	# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	"""Vector Student's t distribution classes."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	from tensorflow.contrib.distributions.python.ops import bijectors
	from tensorflow.contrib.distributions.python.ops import distribution_util
	from tensorflow.python.framework import constant_op
	from tensorflow.python.framework import dtypes
	from tensorflow.python.framework import ops
	from tensorflow.python.ops import array_ops
	from tensorflow.python.ops.distributions import student_t
	from tensorflow.python.ops.distributions import transformed_distribution
	from tensorflow.python.util import deprecation


	class _VectorStudentT(transformed_distribution.TransformedDistribution):
	"""A vector version of Student's t-distribution on `R^k`.

	#### Mathematical details

	The probability density function (pdf) is,

	```none
	pdf(x; df, mu, Sigma) = (1 + \|\|y\|\|2 / df)(-0.5 (df + 1)) / Z
	where,
	y = inv(Sigma) (x - mu)
	Z = abs(det(Sigma)) ( sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) )**k
	```

	where:
	* `loc = mu`; a vector in `R^k`,
	* `scale = Sigma`; a lower-triangular matrix in `R^{k x k}`,
	* `Z` denotes the normalization constant, and,
	* `Gamma` is the [gamma function](
	https://en.wikipedia.org/wiki/Gamma_function), and,
	* `\|\|y\|\|**2` denotes the [squared Euclidean norm](
	https://en.wikipedia.org/wiki/Norm_(mathematics)#Euclidean_norm) of `y`.

	The VectorStudentT distribution is a member of the [location-scale family](
	https://en.wikipedia.org/wiki/Location-scale_family), i.e., it can be
	constructed as,

	```none
	X ~ StudentT(df, loc=0, scale=1)
	Y = loc + scale * X
	```

	Notice that the `scale` matrix has semantics closer to std. deviation than
	covariance (but it is not std. deviation).

	This distribution is an Affine transformation of iid
	[Student's t-distributions](
	https://en.wikipedia.org/wiki/Student%27s_t-distribution)
	and should not be confused with the [Multivariate Student's t-distribution](
	https://en.wikipedia.org/wiki/Multivariate_t-distribution). The
	traditional Multivariate Student's t-distribution is type of
	[elliptical distribution](
	https://en.wikipedia.org/wiki/Elliptical_distribution); it has PDF:

	```none
	pdf(x; df, mu, Sigma) = (1 + \|\|y\|\|2 / df)(-0.5 (df + k)) / Z
	where,
	y = inv(Sigma) (x - mu)
	Z = abs(det(Sigma)) sqrt(df pi)**k Gamma(0.5 df) / Gamma(0.5 (df + k))
	```

	Notice that the Multivariate Student's t-distribution uses `k` where the
	Vector Student's t-distribution has a `1`. Conversely the Vector version has a
	broader application of the power-`k` in the normalization constant.

	#### Examples

	A single instance of a "Vector Student's t-distribution" is defined by a mean
	vector of length `k` and a scale matrix of shape `k x k`.

	Extra leading dimensions, if provided, allow for batches.

	```python
	import tensorflow_probability as tfp
	tfd = tfp.distributions

	# Initialize a single 3-variate vector Student's t-distribution.
	mu = [1., 2, 3]
	chol = [[1., 0, 0.],
	[1, 3, 0],
	[1, 2, 3]]
	vt = tfd.VectorStudentT(df=2, loc=mu, scale_tril=chol)

	# Evaluate this on an observation in R^3, returning a scalar.
	vt.prob([-1., 0, 1])

	# Initialize a batch of two 3-variate vector Student's t-distributions.
	mu = [[1., 2, 3],
	[11, 22, 33]]
	chol = ... # shape 2 x 3 x 3, lower triangular, positive diagonal.
	vt = tfd.VectorStudentT(loc=mu, scale_tril=chol)

	# Evaluate this on a two observations, each in R^3, returning a length two
	# tensor.
	x = [[-1, 0, 1],
	[-11, 0, 11]]
	vt.prob(x)
	```

	For more examples of how to construct the `scale` matrix, see the
	`tf.contrib.distributions.bijectors.Affine` docstring.

	"""

	@deprecation.deprecated(
	"2018-10-01",
	"The TensorFlow Distributions library has moved to "
	"TensorFlow Probability "
	"(https://github.com/tensorflow/probability). You "
	"should update all references to use `tfp.distributions` "
	"instead of `tf.contrib.distributions`.",
	warn_once=True)
	def __init__(self,
	df,
	loc=None,
	scale_identity_multiplier=None,
	scale_diag=None,
	scale_tril=None,
	scale_perturb_factor=None,
	scale_perturb_diag=None,
	validate_args=False,
	allow_nan_stats=True,
	name="VectorStudentT"):
	"""Instantiates the vector Student's t-distributions on `R^k`.

	The `batch_shape` is the broadcast between `df.batch_shape` and
	`Affine.batch_shape` where `Affine` is constructed from `loc` and
	`scale_*` arguments.

	The `event_shape` is the event shape of `Affine.event_shape`.

	Args:
	df: Floating-point `Tensor`. The degrees of freedom of the
	distribution(s). `df` must contain only positive values. Must be
	scalar if `loc`, `scale_*` imply non-scalar batch_shape or must have the
	same `batch_shape` implied by `loc`, `scale_*`.
	loc: Floating-point `Tensor`. If this is set to `None`, no `loc` is
	applied.
	scale_identity_multiplier: floating point rank 0 `Tensor` representing a
	scaling done to the identity matrix. When `scale_identity_multiplier =
	scale_diag=scale_tril = None` then `scale += IdentityMatrix`. Otherwise
	no scaled-identity-matrix is added to `scale`.
	scale_diag: Floating-point `Tensor` representing the diagonal matrix.
	`scale_diag` has shape [N1, N2, ..., k], which represents a k x k
	diagonal matrix. When `None` no diagonal term is added to `scale`.
	scale_tril: Floating-point `Tensor` representing the diagonal matrix.
	`scale_diag` has shape [N1, N2, ..., k, k], which represents a k x k
	lower triangular matrix. When `None` no `scale_tril` term is added to
	`scale`. The upper triangular elements above the diagonal are ignored.
	scale_perturb_factor: Floating-point `Tensor` representing factor matrix
	with last two dimensions of shape `(k, r)`. When `None`, no rank-r
	update is added to `scale`.
	scale_perturb_diag: Floating-point `Tensor` representing the diagonal
	matrix. `scale_perturb_diag` has shape [N1, N2, ..., r], which
	represents an r x r Diagonal matrix. When `None` low rank updates will
	take the form `scale_perturb_factor * scale_perturb_factor.T`.
	validate_args: Python `bool`, default `False`. When `True` distribution
	parameters are checked for validity despite possibly degrading runtime
	performance. When `False` invalid inputs may silently render incorrect
	outputs.
	allow_nan_stats: Python `bool`, default `True`. When `True`,
	statistics (e.g., mean, mode, variance) use the value "`NaN`" to
	indicate the result is undefined. When `False`, an exception is raised
	if one or more of the statistic's batch members are undefined.
	name: Python `str` name prefixed to Ops created by this class.
	"""
	parameters = dict(locals())
	graph_parents = [df, loc, scale_identity_multiplier, scale_diag,
	scale_tril, scale_perturb_factor, scale_perturb_diag]
	with ops.name_scope(name) as name:
	with ops.name_scope("init", values=graph_parents):
	# The shape of the _VectorStudentT distribution is governed by the
	# relationship between df.batch_shape and affine.batch_shape. In
	# pseudocode the basic procedure is:
	# if df.batch_shape is scalar:
	# if affine.batch_shape is not scalar:
	# # broadcast distribution.sample so
	# # it has affine.batch_shape.
	# self.batch_shape = affine.batch_shape
	# else:
	# if affine.batch_shape is scalar:
	# # let affine broadcasting do its thing.
	# self.batch_shape = df.batch_shape
	# All of the above magic is actually handled by TransformedDistribution.
	# Here we really only need to collect the affine.batch_shape and decide
	# what we're going to pass in to TransformedDistribution's
	# (override) batch_shape arg.
	affine = bijectors.Affine(
	shift=loc,
	scale_identity_multiplier=scale_identity_multiplier,
	scale_diag=scale_diag,
	scale_tril=scale_tril,
	scale_perturb_factor=scale_perturb_factor,
	scale_perturb_diag=scale_perturb_diag,
	validate_args=validate_args)
	distribution = student_t.StudentT(
	df=df,
	loc=array_ops.zeros([], dtype=affine.dtype),
	scale=array_ops.ones([], dtype=affine.dtype))
	batch_shape, override_event_shape = (
	distribution_util.shapes_from_loc_and_scale(
	affine.shift, affine.scale))
	override_batch_shape = distribution_util.pick_vector(
	distribution.is_scalar_batch(),
	batch_shape,
	constant_op.constant([], dtype=dtypes.int32))
	super(_VectorStudentT, self).__init__(
	distribution=distribution,
	bijector=affine,
	batch_shape=override_batch_shape,
	event_shape=override_event_shape,
	validate_args=validate_args,
	name=name)
	self._parameters = parameters

	@property
	def df(self):
	"""Degrees of freedom in these Student's t distribution(s)."""
	return self.distribution.df

	@property
	def loc(self):
	"""Locations of these Student's t distribution(s)."""
	return self.bijector.shift

	@property
	def scale(self):
	"""Dense (batch) covariance matrix, if available."""
	return self.bijector.scale