tensorflow/python/keras/losses.py - platform/external/tensorflow - Git at Google

 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Built-in loss functions.
 """
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import abc

 import six

 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import smart_cond
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.keras.utils import tf_utils
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops.losses import losses_impl
 from tensorflow.python.ops.losses import util as tf_losses_util
 from tensorflow.python.util.tf_export import keras_export
 from tensorflow.tools.docs import doc_controls


 @keras_export('keras.losses.Loss')
 class Loss(object):
   """Loss base class.

   To be implemented by subclasses:
   * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.

   Example subclass implementation:
   ```python
   class MeanSquaredError(Loss):
     def call(self, y_true, y_pred):
       y_pred = ops.convert_to_tensor_v2(y_pred)
       y_true = math_ops.cast(y_true, y_pred.dtype)
       return K.mean(math_ops.square(y_pred - y_true), axis=-1)
   ```

   When used with `tf.distribute.Strategy`, outside of built-in training loops
   such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction
   types, and reduce losses explicitly in your training loop. Using 'AUTO' or
   'SUM_OVER_BATCH_SIZE' will raise an error.

   Please see this custom training [tutorial]
   (https://www.tensorflow.org/tutorials/distribute/custom_training) for more
   details on this.

   You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
   ```python
   with strategy.scope():
     loss_obj = tf.keras.losses.CategoricalCrossentropy(
         reduction=tf.keras.losses.Reduction.NONE)
     ....
     loss = (tf.reduce_sum(loss_obj(labels, predictions)) *
             (1. / global_batch_size))
   ```
   """

   def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None):
     """Initializes `Loss` class.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op.
     """
     losses_utils.ReductionV2.validate(reduction)
     self.reduction = reduction
     self.name = name
     # SUM_OVER_BATCH is only allowed in losses managed by `fit` or
     # CannedEstimators.
     self._allow_sum_over_batch_size = False
     self._set_name_scope()

   def _set_name_scope(self):
     """Creates a valid `name_scope` name."""
     if self.name is None:
       self._name_scope = self.__class__.__name__
     elif self.name == '<lambda>':
       self._name_scope = 'lambda'
     else:
       # E.g. '_my_loss' => 'my_loss'
       self._name_scope = self.name.strip('_')

   def __call__(self, y_true, y_pred, sample_weight=None):
     """Invokes the `Loss` instance.

     Args:
       y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
         sparse loss functions such as sparse categorical crossentropy where
         shape = `[batch_size, d0, .. dN-1]`
       y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
       sample_weight: Optional `sample_weight` acts as a
         coefficient for the loss. If a scalar is provided, then the loss is
         simply scaled by the given value. If `sample_weight` is a tensor of size
         `[batch_size]`, then the total loss for each sample of the batch is
         rescaled by the corresponding element in the `sample_weight` vector. If
         the shape of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be
         broadcasted to this shape), then each loss element of `y_pred` is scaled
         by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
         functions reduce by 1 dimension, usually axis=-1.)

     Returns:
       Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
         shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note `dN-1`
         because all loss functions reduce by 1 dimension, usually axis=-1.)

     Raises:
       ValueError: If the shape of `sample_weight` is invalid.
     """
     # If we are wrapping a lambda function strip '<>' from the name as it is not
     # accepted in scope name.
     graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
         y_true, y_pred, sample_weight)
     with K.name_scope(self._name_scope), graph_ctx:
       losses = self.call(y_true, y_pred)
       return losses_utils.compute_weighted_loss(
           losses, sample_weight, reduction=self._get_reduction())

   @classmethod
   def from_config(cls, config):
     """Instantiates a `Loss` from its config (output of `get_config()`).

     Args:
         config: Output of `get_config()`.

     Returns:
         A `Loss` instance.
     """
     return cls(**config)

   def get_config(self):
     """Returns the config dictionary for a `Loss` instance."""
     return {'reduction': self.reduction, 'name': self.name}

   @abc.abstractmethod
   @doc_controls.for_subclass_implementers
   def call(self, y_true, y_pred):
     """Invokes the `Loss` instance.

     Args:
       y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
         sparse loss functions such as sparse categorical crossentropy where
         shape = `[batch_size, d0, .. dN-1]`
       y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

     Returns:
       Loss values with the shape `[batch_size, d0, .. dN-1]`.
     """
     NotImplementedError('Must be implemented in subclasses.')

   def _get_reduction(self):
     """Handles `AUTO` reduction cases and returns the reduction value."""
     if (not self._allow_sum_over_batch_size and
         distribution_strategy_context.has_strategy() and
         (self.reduction == losses_utils.ReductionV2.AUTO or
          self.reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE)):
       raise ValueError(
           'Please use `tf.keras.losses.Reduction.SUM` or '
           '`tf.keras.losses.Reduction.NONE` for loss reduction when losses are '
           'used with `tf.distribute.Strategy` outside of the built-in training '
           'loops. You can implement '
           '`tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch '
           'size like:\n```\nwith strategy.scope():\n'
           '    loss_obj = tf.keras.losses.CategoricalCrossentropy('
           'reduction=tf.keras.losses.Reduction.NONE)\n....\n'
           '    loss = tf.reduce_sum(loss_obj(labels, predictions)) * '
           '(1. / global_batch_size)\n```\nPlease see '
           'https://www.tensorflow.org/tutorials/distribute/custom_training'
           ' for more details.')

     if self.reduction == losses_utils.ReductionV2.AUTO:
       return losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE
     return self.reduction


 class LossFunctionWrapper(Loss):
   """Wraps a loss function in the `Loss` class."""

   def __init__(self,
                fn,
                reduction=losses_utils.ReductionV2.AUTO,
                name=None,
                **kwargs):
     """Initializes `LossFunctionWrapper` class.

     Args:
       fn: The loss function to wrap, with signature `fn(y_true, y_pred,
         **kwargs)`.
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: (Optional) name for the loss.
       **kwargs: The keyword arguments that are passed on to `fn`.
     """
     super(LossFunctionWrapper, self).__init__(reduction=reduction, name=name)
     self.fn = fn
     self._fn_kwargs = kwargs

   def call(self, y_true, y_pred):
     """Invokes the `LossFunctionWrapper` instance.

     Args:
       y_true: Ground truth values.
       y_pred: The predicted values.

     Returns:
       Loss values per sample.
     """
     if tensor_util.is_tensor(y_pred) and tensor_util.is_tensor(y_true):
       y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
           y_pred, y_true)
     return self.fn(y_true, y_pred, **self._fn_kwargs)

   def get_config(self):
     config = {}
     for k, v in six.iteritems(self._fn_kwargs):
       config[k] = K.eval(v) if tf_utils.is_tensor_or_variable(v) else v
     base_config = super(LossFunctionWrapper, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))


 @keras_export('keras.losses.MeanSquaredError')
 class MeanSquaredError(LossFunctionWrapper):
   """Computes the mean of squares of errors between labels and predictions.

   `loss = square(y_true - y_pred)`

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[1., 1.], [1., 0.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> mse = tf.keras.losses.MeanSquaredError()
   >>> mse(y_true, y_pred).numpy()
   0.5

   >>> # Calling with 'sample_weight'.
   >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
   0.25

   >>> # Using 'sum' reduction type.
   >>> mse = tf.keras.losses.MeanSquaredError(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> mse(y_true, y_pred).numpy()
   1.0

   >>> # Using 'none' reduction type.
   >>> mse = tf.keras.losses.MeanSquaredError(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> mse(y_true, y_pred).numpy()
   array([0.5, 0.5], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanSquaredError())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='mean_squared_error'):
     """Initializes `MeanSquaredError` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'mean_squared_error'.
     """
     super(MeanSquaredError, self).__init__(
         mean_squared_error, name=name, reduction=reduction)


 @keras_export('keras.losses.MeanAbsoluteError')
 class MeanAbsoluteError(LossFunctionWrapper):
   """Computes the mean of absolute difference between labels and predictions.

   `loss = abs(y_true - y_pred)`

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[1., 1.], [1., 0.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> mae = tf.keras.losses.MeanAbsoluteError()
   >>> mae(y_true, y_pred).numpy()
   0.5

   >>> # Calling with 'sample_weight'.
   >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
   0.25

   >>> # Using 'sum' reduction type.
   >>> mae = tf.keras.losses.MeanAbsoluteError(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> mae(y_true, y_pred).numpy()
   1.0

   >>> # Using 'none' reduction type.
   >>> mae = tf.keras.losses.MeanAbsoluteError(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> mae(y_true, y_pred).numpy()
   array([0.5, 0.5], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanAbsoluteError())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='mean_absolute_error'):
     """Initializes `MeanAbsoluteError` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'mean_absolute_error'.
     """
     super(MeanAbsoluteError, self).__init__(
         mean_absolute_error, name=name, reduction=reduction)


 @keras_export('keras.losses.MeanAbsolutePercentageError')
 class MeanAbsolutePercentageError(LossFunctionWrapper):
   """Computes the mean absolute percentage error between `y_true` and `y_pred`.

   `loss = 100 * abs(y_true - y_pred) / y_true`

   Usage:

   >>> y_true = [[2., 1.], [2., 3.]]
   >>> y_pred = [[1., 1.], [1., 0.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
   >>> mape(y_true, y_pred).numpy()
   50.

   >>> # Calling with 'sample_weight'.
   >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
   20.

   >>> # Using 'sum' reduction type.
   >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> mape(y_true, y_pred).numpy()
   100.

   >>> # Using 'none' reduction type.
   >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> mape(y_true, y_pred).numpy()
   array([25., 75.], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanAbsolutePercentageError())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='mean_absolute_percentage_error'):
     """Initializes `MeanAbsolutePercentageError` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to
         'mean_absolute_percentage_error'.
     """
     super(MeanAbsolutePercentageError, self).__init__(
         mean_absolute_percentage_error, name=name, reduction=reduction)


 @keras_export('keras.losses.MeanSquaredLogarithmicError')
 class MeanSquaredLogarithmicError(LossFunctionWrapper):
   """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

   `loss = square(log(y_true + 1.) - log(y_pred + 1.))`

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[1., 1.], [1., 0.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
   >>> msle(y_true, y_pred).numpy()
   0.240

   >>> # Calling with 'sample_weight'.
   >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
   0.120

   >>> # Using 'sum' reduction type.
   >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> msle(y_true, y_pred).numpy()
   0.480

   >>> # Using 'none' reduction type.
   >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> msle(y_true, y_pred).numpy()
   array([0.240, 0.240], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.MeanSquaredLogarithmicError())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='mean_squared_logarithmic_error'):
     """Initializes `MeanSquaredLogarithmicError` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to
         'mean_squared_logarithmic_error'.
     """
     super(MeanSquaredLogarithmicError, self).__init__(
         mean_squared_logarithmic_error, name=name, reduction=reduction)


 @keras_export('keras.losses.BinaryCrossentropy')
 class BinaryCrossentropy(LossFunctionWrapper):
   """Computes the cross-entropy loss between true labels and predicted labels.

   Use this cross-entropy loss when there are only two label classes (assumed to
   be 0 and 1). For each example, there should be a single floating-point value
   per prediction.

   In the snippet below, each of the four examples has only a single
   floating-pointing value, and both `y_pred` and `y_true` have the shape
   `[batch_size]`.

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> bce = tf.keras.losses.BinaryCrossentropy()
   >>> bce(y_true, y_pred).numpy()
   0.815

   >>> # Calling with 'sample_weight'.
   >>> bce(y_true, y_pred, sample_weight=[1, 0]).numpy()
   0.458

    >>> # Using 'sum' reduction type.
   >>> bce = tf.keras.losses.BinaryCrossentropy(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> bce(y_true, y_pred).numpy()
   1.630

   >>> # Using 'none' reduction type.
   >>> bce = tf.keras.losses.BinaryCrossentropy(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> bce(y_true, y_pred).numpy()
   array([0.916 , 0.714], dtype=float32)

   Usage with the `tf.keras` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.BinaryCrossentropy())
   ```
   """

   def __init__(self,
                from_logits=False,
                label_smoothing=0,
                reduction=losses_utils.ReductionV2.AUTO,
                name='binary_crossentropy'):
     """Initializes `BinaryCrossentropy` instance.

     Args:
       from_logits: Whether to interpret `y_pred` as a tensor of
         [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
           assume that `y_pred` contains probabilities (i.e., values in [0, 1]).
           **Note - Using from_logits=True may be more numerically stable.
       label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > 0,
         we compute the loss between the predicted labels and a smoothed version
         of the true labels, where the smoothing squeezes the labels towards 0.5.
         Larger values of `label_smoothing` correspond to heavier smoothing.
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: (Optional) Name for the op. Defaults to 'binary_crossentropy'.
     """
     super(BinaryCrossentropy, self).__init__(
         binary_crossentropy,
         name=name,
         reduction=reduction,
         from_logits=from_logits,
         label_smoothing=label_smoothing)
     self.from_logits = from_logits


 @keras_export('keras.losses.CategoricalCrossentropy')
 class CategoricalCrossentropy(LossFunctionWrapper):
   """Computes the crossentropy loss between the labels and predictions.

   Use this crossentropy loss function when there are two or more label classes.
   We expect labels to be provided in a `one_hot` representation. If you want to
   provide labels as integers, please use `SparseCategoricalCrossentropy` loss.
   There should be `# classes` floating point values per feature.

   In the snippet below, there is `# classes` floating pointing values per
   example. The shape of both `y_pred` and `y_true` are
   `[batch_size, num_classes]`.

   Usage:

   >>> y_true = [[0, 1, 0], [0, 0, 1]]
   >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> cce = tf.keras.losses.CategoricalCrossentropy()
   >>> cce(y_true, y_pred).numpy()
   1.177

   >>> # Calling with 'sample_weight'.
   >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
   0.814

   >>> # Using 'sum' reduction type.
   >>> cce = tf.keras.losses.CategoricalCrossentropy(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> cce(y_true, y_pred).numpy()
   2.354

   >>> # Using 'none' reduction type.
   >>> cce = tf.keras.losses.CategoricalCrossentropy(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> cce(y_true, y_pred).numpy()
   array([0.0513, 2.303], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.CategoricalCrossentropy())
   ```
   """

   def __init__(self,
                from_logits=False,
                label_smoothing=0,
                reduction=losses_utils.ReductionV2.AUTO,
                name='categorical_crossentropy'):
     """Initializes `CategoricalCrossentropy` instance.

     Args:
       from_logits: Whether `y_pred` is expected to be a logits tensor. By
         default, we assume that `y_pred` encodes a probability distribution.
         **Note - Using from_logits=True is more numerically stable.**
       label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
         meaning the confidence on label values are relaxed. e.g.
         `label_smoothing=0.2` means that we will use a value of `0.1` for label
         `0` and `0.9` for label `1`"
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'categorical_crossentropy'.
     """
     super(CategoricalCrossentropy, self).__init__(
         categorical_crossentropy,
         name=name,
         reduction=reduction,
         from_logits=from_logits,
         label_smoothing=label_smoothing)


 @keras_export('keras.losses.SparseCategoricalCrossentropy')
 class SparseCategoricalCrossentropy(LossFunctionWrapper):
   """Computes the crossentropy loss between the labels and predictions.

   Use this crossentropy loss function when there are two or more label classes.
   We expect labels to be provided as integers. If you want to provide labels
   using `one-hot` representation, please use `CategoricalCrossentropy` loss.
   There should be `# classes` floating point values per feature for `y_pred`
   and a single floating point value per feature for `y_true`.

   In the snippet below, there is a single floating point value per example for
   `y_true` and `# classes` floating pointing values per example for `y_pred`.
   The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
   `[batch_size, num_classes]`.

   Usage:

   >>> y_true = [1, 2]
   >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
   >>> scce(y_true, y_pred).numpy()
   1.177

   >>> # Calling with 'sample_weight'.
   >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
   0.814

   >>> # Using 'sum' reduction type.
   >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> scce(y_true, y_pred).numpy()
   2.354

   >>> # Using 'none' reduction type.
   >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> scce(y_true, y_pred).numpy()
   array([0.0513, 2.303], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.SparseCategoricalCrossentropy())
   ```
   """

   def __init__(self,
                from_logits=False,
                reduction=losses_utils.ReductionV2.AUTO,
                name='sparse_categorical_crossentropy'):
     """Initializes `SparseCategoricalCrossentropy` instance.

     Args:
       from_logits: Whether `y_pred` is expected to be a logits tensor. By
         default, we assume that `y_pred` encodes a probability distribution.
         **Note - Using from_logits=True may be more numerically stable.
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to
         'sparse_categorical_crossentropy'.
     """
     super(SparseCategoricalCrossentropy, self).__init__(
         sparse_categorical_crossentropy,
         name=name,
         reduction=reduction,
         from_logits=from_logits)


 @keras_export('keras.losses.Hinge')
 class Hinge(LossFunctionWrapper):
   """Computes the hinge loss between `y_true` and `y_pred`.

   `loss = maximum(1 - y_true * y_pred, 0)`

   `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
   provided we will convert them to -1 or 1.

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> h = tf.keras.losses.Hinge()
   >>> h(y_true, y_pred).numpy()
   1.3

   >>> # Calling with 'sample_weight'.
   >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
   0.55

   >>> # Using 'sum' reduction type.
   >>> h = tf.keras.losses.Hinge(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> h(y_true, y_pred).numpy()
   2.6

   >>> # Using 'none' reduction type.
   >>> h = tf.keras.losses.Hinge(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> h(y_true, y_pred).numpy()
   array([1.1, 1.5], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.Hinge())
   ```
   """

   def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='hinge'):
     """Initializes `Hinge` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'hinge'.
     """
     super(Hinge, self).__init__(hinge, name=name, reduction=reduction)


 @keras_export('keras.losses.SquaredHinge')
 class SquaredHinge(LossFunctionWrapper):
   """Computes the squared hinge loss between `y_true` and `y_pred`.

   `loss = square(maximum(1 - y_true * y_pred, 0))`

   `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
   provided we will convert them to -1 or 1.

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> h = tf.keras.losses.SquaredHinge()
   >>> h(y_true, y_pred).numpy()
   1.86

   >>> # Calling with 'sample_weight'.
   >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
   0.73

   >>> # Using 'sum' reduction type.
   >>> h = tf.keras.losses.SquaredHinge(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> h(y_true, y_pred).numpy()
   3.72

   >>> # Using 'none' reduction type.
   >>> h = tf.keras.losses.SquaredHinge(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> h(y_true, y_pred).numpy()
   array([1.46, 2.26], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.SquaredHinge())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='squared_hinge'):
     """Initializes `SquaredHinge` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'squared_hinge'.
     """
     super(SquaredHinge, self).__init__(
         squared_hinge, name=name, reduction=reduction)


 @keras_export('keras.losses.CategoricalHinge')
 class CategoricalHinge(LossFunctionWrapper):
   """Computes the categorical hinge loss between `y_true` and `y_pred`.

   `loss = maximum(neg - pos + 1, 0)`
   where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

   Usage:

   >>> y_true = [[0, 1], [0, 0]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> h = tf.keras.losses.CategoricalHinge()
   >>> h(y_true, y_pred).numpy()
   1.4

   >>> # Calling with 'sample_weight'.
   >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
   0.6

   >>> # Using 'sum' reduction type.
   >>> h = tf.keras.losses.CategoricalHinge(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> h(y_true, y_pred).numpy()
   2.8

   >>> # Using 'none' reduction type.
   >>> h = tf.keras.losses.CategoricalHinge(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> h(y_true, y_pred).numpy()
   array([1.2, 1.6], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.CategoricalHinge())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='categorical_hinge'):
     """Initializes `CategoricalHinge` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'categorical_hinge'.
     """
     super(CategoricalHinge, self).__init__(
         categorical_hinge, name=name, reduction=reduction)


 @keras_export('keras.losses.Poisson')
 class Poisson(LossFunctionWrapper):
   """Computes the Poisson loss between `y_true` and `y_pred`.

   `loss = y_pred - y_true * log(y_pred)`

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[1., 1.], [0., 0.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> p = tf.keras.losses.Poisson()
   >>> p(y_true, y_pred).numpy()
   0.5

   >>> # Calling with 'sample_weight'.
   >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
   0.4

   >>> # Using 'sum' reduction type.
   >>> p = tf.keras.losses.Poisson(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> p(y_true, y_pred).numpy()
   0.999

   >>> # Using 'none' reduction type.
   >>> p = tf.keras.losses.Poisson(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> p(y_true, y_pred).numpy()
   array([0.999, 0.], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.Poisson())
   ```
   """

   def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='poisson'):
     """Initializes `Poisson` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'poisson'.
     """
     super(Poisson, self).__init__(poisson, name=name, reduction=reduction)


 @keras_export('keras.losses.LogCosh')
 class LogCosh(LossFunctionWrapper):
   """Computes the logarithm of the hyperbolic cosine of the prediction error.

   `logcosh = log((exp(x) + exp(-x))/2)`,
   where x is the error `y_pred - y_true`.

   Usage:

   >>> y_true = [[0., 1.], [0., 0.]]
   >>> y_pred = [[1., 1.], [0., 0.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> l = tf.keras.losses.LogCosh()
   >>> l(y_true, y_pred).numpy()
   0.108

   >>> # Calling with 'sample_weight'.
   >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
   0.087

   >>> # Using 'sum' reduction type.
   >>> l = tf.keras.losses.LogCosh(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> l(y_true, y_pred).numpy()
   0.217

   >>> # Using 'none' reduction type.
   >>> l = tf.keras.losses.LogCosh(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> l(y_true, y_pred).numpy()
   array([0.217, 0.], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.LogCosh())
   ```
   """

   def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name='logcosh'):
     """Initializes `LogCosh` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'logcosh'.
     """
     super(LogCosh, self).__init__(logcosh, name=name, reduction=reduction)


 @keras_export('keras.losses.KLDivergence')
 class KLDivergence(LossFunctionWrapper):
   """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

   `loss = y_true * log(y_true / y_pred)`

   See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

   Usage:

   >>> y_true = [[0, 1], [0, 0]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> kl = tf.keras.losses.KLDivergence()
   >>> kl(y_true, y_pred).numpy()
   0.458

   >>> # Calling with 'sample_weight'.
   >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
   0.366

   >>> # Using 'sum' reduction type.
   >>> kl = tf.keras.losses.KLDivergence(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> kl(y_true, y_pred).numpy()
   0.916

   >>> # Using 'none' reduction type.
   >>> kl = tf.keras.losses.KLDivergence(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> kl(y_true, y_pred).numpy()
   array([0.916, -3.08e-06], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.KLDivergence())
   ```
   """

   def __init__(self,
                reduction=losses_utils.ReductionV2.AUTO,
                name='kullback_leibler_divergence'):
     """Initializes `KLDivergence` instance.

     Args:
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'kullback_leibler_divergence'.
     """
     super(KLDivergence, self).__init__(
         kullback_leibler_divergence, name=name, reduction=reduction)


 @keras_export('keras.losses.Huber')
 class Huber(LossFunctionWrapper):
   """Computes the Huber loss between `y_true` and `y_pred`.

   For each value x in `error = y_true - y_pred`:

   ```
   loss = 0.5 * x^2                  if |x| <= d
   loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d
   ```
   where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

   Usage:

   >>> y_true = [[0, 1], [0, 0]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> h = tf.keras.losses.Huber()
   >>> h(y_true, y_pred).numpy()
   0.155

   >>> # Calling with 'sample_weight'.
   >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
   0.09

   >>> # Using 'sum' reduction type.
   >>> h = tf.keras.losses.Huber(
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> h(y_true, y_pred).numpy()
   0.31

   >>> # Using 'none' reduction type.
   >>> h = tf.keras.losses.Huber(
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> h(y_true, y_pred).numpy()
   array([0.18, 0.13], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.Huber())
   ```
   """

   def __init__(self,
                delta=1.0,
                reduction=losses_utils.ReductionV2.AUTO,
                name='huber_loss'):
     """Initializes `Huber` instance.

     Args:
       delta: A float, the point where the Huber loss function changes from a
         quadratic to linear.
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
         this defaults to `SUM_OVER_BATCH_SIZE`. When used with
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial]
         (https://www.tensorflow.org/tutorials/distribute/custom_training)
         for more details.
       name: Optional name for the op. Defaults to 'huber_loss'.
     """
     super(Huber, self).__init__(
         huber_loss, name=name, reduction=reduction, delta=delta)


 @keras_export('keras.metrics.mean_squared_error',
               'keras.metrics.mse',
               'keras.metrics.MSE',
               'keras.losses.mean_squared_error',
               'keras.losses.mse',
               'keras.losses.MSE')
 def mean_squared_error(y_true, y_pred):
   """Computes the mean squared error between labels and predictions.

   After computing the squared distance between the inputs, the mean value over
   the last dimension is returned.

   `loss = mean(square(y_true - y_pred), axis=-1)`

   Usage:

   >>> y_true = np.random.randint(0, 2, size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> assert np.array_equal(
   ...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
     Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)


 @keras_export('keras.metrics.mean_absolute_error',
               'keras.metrics.mae',
               'keras.metrics.MAE',
               'keras.losses.mean_absolute_error',
               'keras.losses.mae',
               'keras.losses.MAE')
 def mean_absolute_error(y_true, y_pred):
   """Computes the mean absolute error between labels and predictions.

   `loss = mean(abs(y_true - y_pred), axis=-1)`

   Usage:

   >>> y_true = np.random.randint(0, 2, size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> assert np.array_equal(
   ...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
     Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.mean(math_ops.abs(y_pred - y_true), axis=-1)


 @keras_export('keras.metrics.mean_absolute_percentage_error',
               'keras.metrics.mape',
               'keras.metrics.MAPE',
               'keras.losses.mean_absolute_percentage_error',
               'keras.losses.mape',
               'keras.losses.MAPE')
 def mean_absolute_percentage_error(y_true, y_pred):
   """Computes the mean absolute percentage error between `y_true` and `y_pred`.

   `loss = 100 * mean(abs(y_true - y_pred) / y_true, axis=-1)`

   Usage:

   >>> y_true = np.random.random(size=(2, 3))
   >>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> assert np.array_equal(
   ...     loss.numpy(),
   ...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
     Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   diff = math_ops.abs(
       (y_true - y_pred) / K.maximum(math_ops.abs(y_true), K.epsilon()))
   return 100. * K.mean(diff, axis=-1)


 @keras_export('keras.metrics.mean_squared_logarithmic_error',
               'keras.metrics.msle',
               'keras.metrics.MSLE',
               'keras.losses.mean_squared_logarithmic_error',
               'keras.losses.msle',
               'keras.losses.MSLE')
 def mean_squared_logarithmic_error(y_true, y_pred):
   """Computes the mean squared logarithmic error between `y_true` and `y_pred`.

   `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

   Usage:

   >>> y_true = np.random.randint(0, 2, size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> y_true = np.maximum(y_true, 1e-7)
   >>> y_pred = np.maximum(y_pred, 1e-7)
   >>> assert np.array_equal(
   ...     loss.numpy(),
   ...     np.mean(
   ...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
     Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   first_log = math_ops.log(K.maximum(y_pred, K.epsilon()) + 1.)
   second_log = math_ops.log(K.maximum(y_true, K.epsilon()) + 1.)
   return K.mean(math_ops.squared_difference(first_log, second_log), axis=-1)


 def _maybe_convert_labels(y_true):
   """Converts binary labels into -1/1."""
   are_zeros = math_ops.equal(y_true, 0)
   are_ones = math_ops.equal(y_true, 1)
   is_binary = math_ops.reduce_all(math_ops.logical_or(are_zeros, are_ones))

   def _convert_binary_labels():
     # Convert the binary labels to -1 or 1.
     return 2. * y_true - 1.

   updated_y_true = smart_cond.smart_cond(is_binary,
                                          _convert_binary_labels, lambda: y_true)
   return updated_y_true


 @keras_export('keras.metrics.squared_hinge', 'keras.losses.squared_hinge')
 def squared_hinge(y_true, y_pred):
   """Computes the squared hinge loss between `y_true` and `y_pred`.

   `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`

   Usage:

   >>> y_true = np.random.choice([-1, 1], size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> assert np.array_equal(
   ...     loss.numpy(),
   ...     np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

   Args:
     y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
       If binary (0 or 1) labels are provided we will convert them to -1 or 1.
       shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
      Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   y_true = _maybe_convert_labels(y_true)
   return K.mean(
       math_ops.square(math_ops.maximum(1. - y_true * y_pred, 0.)), axis=-1)


 @keras_export('keras.metrics.hinge', 'keras.losses.hinge')
 def hinge(y_true, y_pred):
   """Computes the hinge loss between `y_true` and `y_pred`.

   `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`

   Usage:

   >>> y_true = np.random.choice([-1, 1], size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.hinge(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> assert np.array_equal(
   ...     loss.numpy(),
   ...     np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

   Args:
     y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
       If binary (0 or 1) labels are provided they will be converted to -1 or 1.
       shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
     Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   y_true = _maybe_convert_labels(y_true)
   return K.mean(math_ops.maximum(1. - y_true * y_pred, 0.), axis=-1)


 @keras_export('keras.losses.categorical_hinge')
 def categorical_hinge(y_true, y_pred):
   """Computes the categorical hinge loss between `y_true` and `y_pred`.

   `loss = maximum(neg - pos + 1, 0)`
   where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

   Usage:

   >>> y_true = np.random.randint(0, 3, size=(2,))
   >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> pos = np.sum(y_true * y_pred, axis=-1)
   >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
   >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

   Args:
     y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
       If binary (0 or 1) labels are provided they will be converted to -1 or 1.
     y_pred: The predicted values.

   Returns:
     Categorical hinge loss values.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   pos = math_ops.reduce_sum(y_true * y_pred, axis=-1)
   neg = math_ops.reduce_max((1. - y_true) * y_pred, axis=-1)
   return math_ops.maximum(0., neg - pos + 1.)


 def huber_loss(y_true, y_pred, delta=1.0):
   """Computes Huber loss value.

   For each value x in `error = y_true - y_pred`:

   ```
   loss = 0.5 * x^2                  if |x| <= d
   loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d
   ```
   where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

   Args:
     y_true: tensor of true targets.
     y_pred: tensor of predicted targets.
     delta: A float, the point where the Huber loss function changes from a
       quadratic to linear.

   Returns:
     Tensor with one scalar loss entry per sample.
   """
   y_pred = math_ops.cast(y_pred, dtype=K.floatx())
   y_true = math_ops.cast(y_true, dtype=K.floatx())
   error = math_ops.subtract(y_pred, y_true)
   abs_error = math_ops.abs(error)
   quadratic = math_ops.minimum(abs_error, delta)
   linear = math_ops.subtract(abs_error, quadratic)
   return K.mean(
       math_ops.add(
           math_ops.multiply(
               ops.convert_to_tensor_v2(0.5, dtype=quadratic.dtype),
               math_ops.multiply(quadratic, quadratic)),
           math_ops.multiply(delta, linear)),
       axis=-1)


 @keras_export('keras.losses.logcosh')
 def logcosh(y_true, y_pred):
   """Logarithm of the hyperbolic cosine of the prediction error.

   `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
   to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
   like the mean squared error, but will not be so strongly affected by the
   occasional wildly incorrect prediction.

   Usage:

   >>> y_true = np.random.random(size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> x = y_pred - y_true
   >>> assert np.allclose(
   ...     loss.numpy(),
   ...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - math_ops.log(2.), axis=-1),
   ...     atol=1e-5)

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
     Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)

   def _logcosh(x):
     return x + nn.softplus(-2. * x) - math_ops.cast(math_ops.log(2.), x.dtype)

   return K.mean(_logcosh(y_pred - y_true), axis=-1)


 @keras_export('keras.metrics.categorical_crossentropy',
               'keras.losses.categorical_crossentropy')
 def categorical_crossentropy(y_true,
                              y_pred,
                              from_logits=False,
                              label_smoothing=0):
   """Computes the categorical crossentropy loss.

   Usage:

   >>> y_true = [[0, 1, 0], [0, 0, 1]]
   >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
   >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> loss.numpy()
   array([0.0513, 2.303], dtype=float32)

   Args:
     y_true: Tensor of one-hot true targets.
     y_pred: Tensor of predicted targets.
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
     label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.

   Returns:
     Categorical crossentropy loss value.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   label_smoothing = ops.convert_to_tensor_v2(label_smoothing, dtype=K.floatx())

   def _smooth_labels():
     num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype)
     return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)

   y_true = smart_cond.smart_cond(label_smoothing,
                                  _smooth_labels, lambda: y_true)
   return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)


 @keras_export('keras.metrics.sparse_categorical_crossentropy',
               'keras.losses.sparse_categorical_crossentropy')
 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
   """Computes the sparse categorical crossentropy loss.

   Usage:

   >>> y_true = [1, 2]
   >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
   >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> loss.numpy()
   array([0.0513, 2.303], dtype=float32)

   Args:
     y_true: Ground truth values.
     y_pred: The predicted values.
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
     axis: (Optional) Defaults to -1. The dimension along which the entropy is
       computed.

   Returns:
     Sparse categorical crossentropy loss value.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.sparse_categorical_crossentropy(
       y_true, y_pred, from_logits=from_logits, axis=axis)


 @keras_export('keras.metrics.binary_crossentropy',
               'keras.losses.binary_crossentropy')
 def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
   """Computes the binary crossentropy loss.

   Usage:

   >>> y_true = [[0, 1], [0, 0]]
   >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
   >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> loss.numpy()
   array([0.916 , 0.714], dtype=float32)

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
     label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.

   Returns:
     Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   label_smoothing = ops.convert_to_tensor_v2(label_smoothing, dtype=K.floatx())

   def _smooth_labels():
     return y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing

   y_true = smart_cond.smart_cond(label_smoothing,
                                  _smooth_labels, lambda: y_true)
   return K.mean(
       K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)


 @keras_export('keras.metrics.kullback_leibler_divergence',
               'keras.metrics.kld',
               'keras.metrics.KLD',
               'keras.losses.kullback_leibler_divergence',
               'keras.losses.kld',
               'keras.losses.KLD')
 def kullback_leibler_divergence(y_true, y_pred):
   """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

   `loss = y_true * log(y_true / y_pred)`

   See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

   Usage:

   >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
   >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
   >>> assert np.array_equal(
   ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

   Args:
     y_true: Tensor of true targets.
     y_pred: Tensor of predicted targets.

   Returns:
     A `Tensor` with loss.

   Raises:
     TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   y_true = K.clip(y_true, K.epsilon(), 1)
   y_pred = K.clip(y_pred, K.epsilon(), 1)
   return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)


 @keras_export('keras.metrics.poisson', 'keras.losses.poisson')
 def poisson(y_true, y_pred):
   """Computes the Poisson loss between y_true and y_pred.

   The Poisson loss is the mean of the elements of the `Tensor`
   `y_pred - y_true * log(y_pred)`.

   Usage:

   >>> y_true = np.random.randint(0, 2, size=(2, 3))
   >>> y_pred = np.random.random(size=(2, 3))
   >>> loss = tf.keras.losses.poisson(y_true, y_pred)
   >>> assert loss.shape == (2,)
   >>> y_pred = y_pred + 1e-7
   >>> assert np.allclose(
   ...     loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
   ...     atol=1e-5)

   Args:
     y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

   Returns:
      Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.

   Raises:
     InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
   """
   y_pred = ops.convert_to_tensor_v2(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1)


 @keras_export(
     'keras.losses.cosine_similarity',
     v1=[
         'keras.metrics.cosine_proximity',
         'keras.metrics.cosine',
         'keras.losses.cosine_proximity',
         'keras.losses.cosine',
         'keras.losses.cosine_similarity',
     ])
 def cosine_similarity(y_true, y_pred, axis=-1):
   """Computes the cosine similarity between labels and predictions.

   Note that it is a number between -1 and 1. When it is a negative number between
   -1 and 0, 0 indicates orthogonality and values closer to -1 indicate greater
   similarity. This makes it usable as a loss function in a setting where you try
   to maximize the proximity between predictions and targets. If either `y_true`
   or `y_pred` is a zero vector, cosine similarity will be 0 regardless of the
   proximity between predictions and targets.

   `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

   Usage:

   >>> y_true = [[0., 1.], [1., 1.]]
   >>> y_pred =[[1., 0.], [1., 1.]]
   >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
   >>> # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
   >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
   >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
   >>> # loss = -sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)
   >>> #       = -[0. + 0., 0.5 + 0.5]
   >>> loss.numpy()
   array([-0., -0.999], dtype=float32)

   Args:
     y_true: Tensor of true targets.
     y_pred: Tensor of predicted targets.
     axis: Axis along which to determine similarity.

   Returns:
     Cosine similarity tensor.
   """
   y_true = nn.l2_normalize(y_true, axis=axis)
   y_pred = nn.l2_normalize(y_pred, axis=axis)
   return -math_ops.reduce_sum(y_true * y_pred, axis=axis)


 @keras_export('keras.losses.CosineSimilarity')
 class CosineSimilarity(LossFunctionWrapper):
   """Computes the cosine similarity between labels and predictions.

   Note that it is a negative quantity between -1 and 0, where 0 indicates
   orthogonality and values closer to -1 indicate greater similarity. This makes
   it usable as a loss function in a setting where you try to maximize the
   proximity between predictions and targets. If either `y_true` or `y_pred`
   is a zero vector, cosine similarity will be 0 regardless of the proximity
   between predictions and targets.

   `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

   Usage:

   >>> y_true = [[0., 1.], [1., 1.]]
   >>> y_pred = [[1., 0.], [1., 1.]]
   >>> # Using 'auto'/'sum_over_batch_size' reduction type.
   >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
   >>> # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
   >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
   >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
   >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
   >>> #       = -((0. + 0.) +  (0.5 + 0.5)) / 2
   >>> cosine_loss(y_true, y_pred).numpy()
   -0.5

   >>> # Calling with 'sample_weight'.
   >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
   -0.0999

   >>> # Using 'sum' reduction type.
   >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
   ...     reduction=tf.keras.losses.Reduction.SUM)
   >>> cosine_loss(y_true, y_pred).numpy()
   -0.999

   >>> # Using 'none' reduction type.
   >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
   ...     reduction=tf.keras.losses.Reduction.NONE)
   >>> cosine_loss(y_true, y_pred).numpy()
   array([-0., -0.999], dtype=float32)

   Usage with the `compile` API:

   ```python
   model = tf.keras.Model(inputs, outputs)
   model.compile('sgd', loss=tf.keras.losses.CosineSimilarity(axis=1))
   ```

   Args:
     axis: (Optional) Defaults to -1. The dimension along which the cosine
       similarity is computed.
     reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
       Default value is `AUTO`. `AUTO` indicates that the reduction option will
       be determined by the usage context. For almost all cases this defaults to
       `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`, outside of
       built-in training loops such as `tf.keras` `compile` and `fit`, using
       `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please see this
       custom training [tutorial]
       (https://www.tensorflow.org/tutorials/distribute/custom_training) for more
         details.
     name: Optional name for the op.
   """

   def __init__(self,
                axis=-1,
                reduction=losses_utils.ReductionV2.AUTO,
                name='cosine_similarity'):
     super(CosineSimilarity, self).__init__(
         cosine_similarity, reduction=reduction, name=name, axis=axis)


 # Aliases.

 bce = BCE = binary_crossentropy
 mse = MSE = mean_squared_error
 mae = MAE = mean_absolute_error
 mape = MAPE = mean_absolute_percentage_error
 msle = MSLE = mean_squared_logarithmic_error
 kld = KLD = kullback_leibler_divergence


 def is_categorical_crossentropy(loss):
   result = ((isinstance(loss, CategoricalCrossentropy) or
              (isinstance(loss, LossFunctionWrapper) and
               loss.fn == categorical_crossentropy) or
              (hasattr(loss, '__name__') and
               loss.__name__ == 'categorical_crossentropy') or
              (loss == 'categorical_crossentropy')))
   return result


 @keras_export('keras.losses.serialize')
 def serialize(loss):
   """Serializes loss function or `Loss` instance.

   Arguments:
     loss: A Keras `Loss` instance or a loss function.

   Returns:
     Loss configuration dictionary.
   """
   return serialize_keras_object(loss)


 @keras_export('keras.losses.deserialize')
 def deserialize(name, custom_objects=None):
   """Deserializes a serialized loss class/function instance.

   Arguments:
       name: Loss configuration.
       custom_objects: Optional dictionary mapping names (strings) to custom
         objects (classes and functions) to be considered during deserialization.

   Returns:
       A Keras `Loss` instance or a loss function.
   """
   return deserialize_keras_object(
       name,
       module_objects=globals(),
       custom_objects=custom_objects,
       printable_module_name='loss function')


 @keras_export('keras.losses.get')
 def get(identifier):
   """Retrieves a Keras loss as a `function`/`Loss` class instance.

   The `identifier` may be the string name of a loss function or `Loss` class.

   >>> loss = tf.keras.losses.get("categorical_crossentropy")
   >>> type(loss)
   <class 'function'>
   >>> loss = tf.keras.losses.get("CategoricalCrossentropy")
   >>> type(loss)
   <class '...tensorflow.python.keras.losses.CategoricalCrossentropy'>

   You can also specify `config` of the loss to this function by passing dict
   containing `class_name` and `config` as an identifier. Also note that the
   `class_name` must map to a `Loss` class

   >>> identifier = {"class_name": "CategoricalCrossentropy",
   ...               "config": {"from_logits": True}}
   >>> loss = tf.keras.losses.get(identifier)
   >>> type(loss)
   <class '...tensorflow.python.keras.losses.CategoricalCrossentropy'>

   Arguments:
     identifier: A loss identifier. One of None or string name of a loss
       function/class or loss configuration dictionary or a loss function or a
       loss class instance

   Returns:
     A Keras loss as a `function`/ `Loss` class instance.

   Raises:
     ValueError: If `identifier` cannot be interpreted.
   """
   if identifier is None:
     return None
   if isinstance(identifier, six.string_types):
     identifier = str(identifier)
     return deserialize(identifier)
   if isinstance(identifier, dict):
     return deserialize(identifier)
   elif callable(identifier):
     return identifier
   else:
     raise ValueError('Could not interpret '
                      'loss function identifier:', identifier)


 LABEL_DTYPES_FOR_LOSSES = {
     losses_impl.sparse_softmax_cross_entropy: 'int32',
     sparse_categorical_crossentropy: 'int32'
 }