tensorflow/contrib/metrics/python/ops/metric_ops.py - platform/external/tensorflow - Git at Google

 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Contains metric-computing operations on streamed tensors.

 Module documentation, including "@@" callouts, should be put in
 third_party/tensorflow/contrib/metrics/__init__.py
 """

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections as collections_lib

 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import confusion_matrix
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics
 from tensorflow.python.ops import metrics_impl
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import weights_broadcast_ops
 from tensorflow.python.ops.distributions.normal import Normal
 from tensorflow.python.util.deprecation import deprecated

 # Epsilon constant used to represent extremely small quantity.
 _EPSILON = 1e-7


 @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_true_positives(predictions,
                              labels,
                              weights=None,
                              metrics_collections=None,
                              updates_collections=None,
                              name=None):
   """Sum the weights of true_positives.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
       be cast to `bool`.
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     value_tensor: A `Tensor` representing the current value of the metric.
     update_op: An operation that accumulates the error from a batch of data.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.true_positives(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.true_negatives. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_true_negatives(predictions,
                              labels,
                              weights=None,
                              metrics_collections=None,
                              updates_collections=None,
                              name=None):
   """Sum the weights of true_negatives.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
       be cast to `bool`.
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     value_tensor: A `Tensor` representing the current value of the metric.
     update_op: An operation that accumulates the error from a batch of data.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.true_negatives(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.false_positives. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_false_positives(predictions,
                               labels,
                               weights=None,
                               metrics_collections=None,
                               updates_collections=None,
                               name=None):
   """Sum the weights of false positives.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
       be cast to `bool`.
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     value_tensor: A `Tensor` representing the current value of the metric.
     update_op: An operation that accumulates the error from a batch of data.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.false_positives(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.false_negatives. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_false_negatives(predictions,
                               labels,
                               weights=None,
                               metrics_collections=None,
                               updates_collections=None,
                               name=None):
   """Computes the total number of false negatives.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
       be cast to `bool`.
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     value_tensor: A `Tensor` representing the current value of the metric.
     update_op: An operation that accumulates the error from a batch of data.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match `values`,
       or if either `metrics_collections` or `updates_collections` are not a list
       or tuple.
   """
   return metrics.false_negatives(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.mean')
 def streaming_mean(values,
                    weights=None,
                    metrics_collections=None,
                    updates_collections=None,
                    name=None):
   """Computes the (weighted) mean of the given values.

   The `streaming_mean` function creates two local variables, `total` and `count`
   that are used to compute the average of `values`. This average is ultimately
   returned as `mean` which is an idempotent operation that simply divides
   `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the `mean`.
   `update_op` increments `total` with the reduced sum of the product of `values`
   and `weights`, and it increments `count` with the reduced sum of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     values: A `Tensor` of arbitrary dimensions.
     weights: `Tensor` whose rank is either 0, or the same rank as `values`, and
       must be broadcastable to `values` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `values` dimension).
     metrics_collections: An optional list of collections that `mean` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     mean: A `Tensor` representing the current mean, the value of `total` divided
       by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `mean`.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match `values`,
       or if either `metrics_collections` or `updates_collections` are not a list
       or tuple.
   """
   return metrics.mean(
       values=values,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.mean_tensor')
 def streaming_mean_tensor(values,
                           weights=None,
                           metrics_collections=None,
                           updates_collections=None,
                           name=None):
   """Computes the element-wise (weighted) mean of the given tensors.

   In contrast to the `streaming_mean` function which returns a scalar with the
   mean,  this function returns an average tensor with the same shape as the
   input tensors.

   The `streaming_mean_tensor` function creates two local variables,
   `total_tensor` and `count_tensor` that are used to compute the average of
   `values`. This average is ultimately returned as `mean` which is an idempotent
   operation that simply divides `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the `mean`.
   `update_op` increments `total` with the reduced sum of the product of `values`
   and `weights`, and it increments `count` with the reduced sum of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     values: A `Tensor` of arbitrary dimensions.
     weights: `Tensor` whose rank is either 0, or the same rank as `values`, and
       must be broadcastable to `values` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `values` dimension).
     metrics_collections: An optional list of collections that `mean` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     mean: A float `Tensor` representing the current mean, the value of `total`
       divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `mean`.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match `values`,
       or if either `metrics_collections` or `updates_collections` are not a list
       or tuple.
   """
   return metrics.mean_tensor(
       values=values,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.accuracy. Note that the order '
             'of the labels and predictions arguments has been switched.')
 def streaming_accuracy(predictions,
                        labels,
                        weights=None,
                        metrics_collections=None,
                        updates_collections=None,
                        name=None):
   """Calculates how often `predictions` matches `labels`.

   The `streaming_accuracy` function creates two local variables, `total` and
   `count` that are used to compute the frequency with which `predictions`
   matches `labels`. This frequency is ultimately returned as `accuracy`: an
   idempotent operation that simply divides `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the `accuracy`.
   Internally, an `is_correct` operation computes a `Tensor` with elements 1.0
   where the corresponding elements of `predictions` and `labels` match and 0.0
   otherwise. Then `update_op` increments `total` with the reduced sum of the
   product of `weights` and `is_correct`, and it increments `count` with the
   reduced sum of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of any shape.
     labels: The ground truth values, a `Tensor` whose shape matches
       `predictions`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that `accuracy` should
       be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     accuracy: A `Tensor` representing the accuracy, the value of `total` divided
       by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `accuracy`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.accuracy(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.precision. Note that the order '
             'of the labels and predictions arguments has been switched.')
 def streaming_precision(predictions,
                         labels,
                         weights=None,
                         metrics_collections=None,
                         updates_collections=None,
                         name=None):
   """Computes the precision of the predictions with respect to the labels.

   The `streaming_precision` function creates two local variables,
   `true_positives` and `false_positives`, that are used to compute the
   precision. This value is ultimately returned as `precision`, an idempotent
   operation that simply divides `true_positives` by the sum of `true_positives`
   and `false_positives`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `precision`. `update_op` weights each prediction by the corresponding value in
   `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
     labels: The ground truth values, a `bool` `Tensor` whose dimensions must
       match `predictions`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that `precision` should
       be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     precision: Scalar float `Tensor` with the value of `true_positives`
       divided by the sum of `true_positives` and `false_positives`.
     update_op: `Operation` that increments `true_positives` and
       `false_positives` variables appropriately and whose value matches
       `precision`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.precision(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None, 'Please switch to tf.metrics.recall. Note that the order '
             'of the labels and predictions arguments has been switched.')
 def streaming_recall(predictions,
                      labels,
                      weights=None,
                      metrics_collections=None,
                      updates_collections=None,
                      name=None):
   """Computes the recall of the predictions with respect to the labels.

   The `streaming_recall` function creates two local variables, `true_positives`
   and `false_negatives`, that are used to compute the recall. This value is
   ultimately returned as `recall`, an idempotent operation that simply divides
   `true_positives` by the sum of `true_positives`  and `false_negatives`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` that updates these variables and returns the `recall`. `update_op`
   weights each prediction by the corresponding value in `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `bool` `Tensor` of arbitrary shape.
     labels: The ground truth values, a `bool` `Tensor` whose dimensions must
       match `predictions`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that `recall` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     recall: Scalar float `Tensor` with the value of `true_positives` divided
       by the sum of `true_positives` and `false_negatives`.
     update_op: `Operation` that increments `true_positives` and
       `false_negatives` variables appropriately and whose value matches
       `recall`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.recall(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_false_positive_rate(predictions,
                                   labels,
                                   weights=None,
                                   metrics_collections=None,
                                   updates_collections=None,
                                   name=None):
   """Computes the false positive rate of predictions with respect to labels.

   The `false_positive_rate` function creates two local variables,
   `false_positives` and `true_negatives`, that are used to compute the
   false positive rate. This value is ultimately returned as
   `false_positive_rate`, an idempotent operation that simply divides
   `false_positives` by the sum of `false_positives` and `true_negatives`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `false_positive_rate`. `update_op` weights each prediction by the
   corresponding value in `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
       be cast to `bool`.
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `false_positive_rate` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     false_positive_rate: Scalar float `Tensor` with the value of
       `false_positives` divided by the sum of `false_positives` and
       `true_negatives`.
     update_op: `Operation` that increments `false_positives` and
       `true_negatives` variables appropriately and whose value matches
       `false_positive_rate`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(name, 'false_positive_rate',
                                      (predictions, labels, weights)):
     predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
         predictions=math_ops.cast(predictions, dtype=dtypes.bool),
         labels=math_ops.cast(labels, dtype=dtypes.bool),
         weights=weights)

     false_p, false_positives_update_op = metrics.false_positives(
         labels=labels,
         predictions=predictions,
         weights=weights,
         metrics_collections=None,
         updates_collections=None,
         name=None)
     true_n, true_negatives_update_op = metrics.true_negatives(
         labels=labels,
         predictions=predictions,
         weights=weights,
         metrics_collections=None,
         updates_collections=None,
         name=None)

     def compute_fpr(fp, tn, name):
       return array_ops.where(
           math_ops.greater(fp + tn, 0), math_ops.div(fp, fp + tn), 0, name)

     fpr = compute_fpr(false_p, true_n, 'value')
     update_op = compute_fpr(false_positives_update_op, true_negatives_update_op,
                             'update_op')

     if metrics_collections:
       ops.add_to_collections(metrics_collections, fpr)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return fpr, update_op


 def streaming_false_negative_rate(predictions,
                                   labels,
                                   weights=None,
                                   metrics_collections=None,
                                   updates_collections=None,
                                   name=None):
   """Computes the false negative rate of predictions with respect to labels.

   The `false_negative_rate` function creates two local variables,
   `false_negatives` and `true_positives`, that are used to compute the
   false positive rate. This value is ultimately returned as
   `false_negative_rate`, an idempotent operation that simply divides
   `false_negatives` by the sum of `false_negatives` and `true_positives`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `false_negative_rate`. `update_op` weights each prediction by the
   corresponding value in `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: The predicted values, a `Tensor` of arbitrary dimensions. Will
       be cast to `bool`.
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `false_negative_rate` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     false_negative_rate: Scalar float `Tensor` with the value of
       `false_negatives` divided by the sum of `false_negatives` and
       `true_positives`.
     update_op: `Operation` that increments `false_negatives` and
       `true_positives` variables appropriately and whose value matches
       `false_negative_rate`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(name, 'false_negative_rate',
                                      (predictions, labels, weights)):
     predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
         predictions=math_ops.cast(predictions, dtype=dtypes.bool),
         labels=math_ops.cast(labels, dtype=dtypes.bool),
         weights=weights)

     false_n, false_negatives_update_op = metrics.false_negatives(
         labels,
         predictions,
         weights,
         metrics_collections=None,
         updates_collections=None,
         name=None)
     true_p, true_positives_update_op = metrics.true_positives(
         labels,
         predictions,
         weights,
         metrics_collections=None,
         updates_collections=None,
         name=None)

     def compute_fnr(fn, tp, name):
       return array_ops.where(
           math_ops.greater(fn + tp, 0), math_ops.div(fn, fn + tp), 0, name)

     fnr = compute_fnr(false_n, true_p, 'value')
     update_op = compute_fnr(false_negatives_update_op, true_positives_update_op,
                             'update_op')

     if metrics_collections:
       ops.add_to_collections(metrics_collections, fnr)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return fnr, update_op


 def _streaming_confusion_matrix_at_thresholds(predictions,
                                               labels,
                                               thresholds,
                                               weights=None,
                                               includes=None):
   """Computes true_positives, false_negatives, true_negatives, false_positives.

   This function creates up to four local variables, `true_positives`,
   `true_negatives`, `false_positives` and `false_negatives`.
   `true_positive[i]` is defined as the total weight of values in `predictions`
   above `thresholds[i]` whose corresponding entry in `labels` is `True`.
   `false_negatives[i]` is defined as the total weight of values in `predictions`
   at most `thresholds[i]` whose corresponding entry in `labels` is `True`.
   `true_negatives[i]` is defined as the total weight of values in `predictions`
   at most `thresholds[i]` whose corresponding entry in `labels` is `False`.
   `false_positives[i]` is defined as the total weight of values in `predictions`
   above `thresholds[i]` whose corresponding entry in `labels` is `False`.

   For estimation of these metrics over a stream of data, for each metric the
   function respectively creates an `update_op` operation that updates the
   variable and returns its value.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `Tensor` whose shape matches `predictions`. `labels` will be cast
       to `bool`.
     thresholds: A python list or tuple of float thresholds in `[0, 1]`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     includes: Tuple of keys to return, from 'tp', 'fn', 'tn', fp'. If `None`,
       default to all four.

   Returns:
     values: Dict of variables of shape `[len(thresholds)]`. Keys are from
         `includes`.
     update_ops: Dict of operations that increments the `values`. Keys are from
         `includes`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       `includes` contains invalid keys.
   """
   all_includes = ('tp', 'fn', 'tn', 'fp')
   if includes is None:
     includes = all_includes
   else:
     for include in includes:
       if include not in all_includes:
         raise ValueError('Invalid key: %s.' % include)

   predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
       predictions, labels, weights)
   predictions.get_shape().assert_is_compatible_with(labels.get_shape())

   num_thresholds = len(thresholds)

   # Reshape predictions and labels.
   predictions_2d = array_ops.reshape(predictions, [-1, 1])
   labels_2d = array_ops.reshape(
       math_ops.cast(labels, dtype=dtypes.bool), [1, -1])

   # Use static shape if known.
   num_predictions = predictions_2d.get_shape().as_list()[0]

   # Otherwise use dynamic shape.
   if num_predictions is None:
     num_predictions = array_ops.shape(predictions_2d)[0]
   thresh_tiled = array_ops.tile(
       array_ops.expand_dims(array_ops.constant(thresholds), [1]),
       array_ops.stack([1, num_predictions]))

   # Tile the predictions after thresholding them across different thresholds.
   pred_is_pos = math_ops.greater(
       array_ops.tile(array_ops.transpose(predictions_2d), [num_thresholds, 1]),
       thresh_tiled)
   if ('fn' in includes) or ('tn' in includes):
     pred_is_neg = math_ops.logical_not(pred_is_pos)

   # Tile labels by number of thresholds
   label_is_pos = array_ops.tile(labels_2d, [num_thresholds, 1])
   if ('fp' in includes) or ('tn' in includes):
     label_is_neg = math_ops.logical_not(label_is_pos)

   if weights is not None:
     broadcast_weights = weights_broadcast_ops.broadcast_weights(
         math_ops.cast(weights, dtypes.float32), predictions)
     weights_tiled = array_ops.tile(
         array_ops.reshape(broadcast_weights, [1, -1]), [num_thresholds, 1])
     thresh_tiled.get_shape().assert_is_compatible_with(
         weights_tiled.get_shape())
   else:
     weights_tiled = None

   values = {}
   update_ops = {}

   if 'tp' in includes:
     true_positives = metrics_impl.metric_variable([num_thresholds],
                                                   dtypes.float32,
                                                   name='true_positives')
     is_true_positive = math_ops.cast(
         math_ops.logical_and(label_is_pos, pred_is_pos), dtypes.float32)
     if weights_tiled is not None:
       is_true_positive *= weights_tiled
     update_ops['tp'] = state_ops.assign_add(
         true_positives, math_ops.reduce_sum(is_true_positive, 1))
     values['tp'] = true_positives

   if 'fn' in includes:
     false_negatives = metrics_impl.metric_variable([num_thresholds],
                                                    dtypes.float32,
                                                    name='false_negatives')
     is_false_negative = math_ops.cast(
         math_ops.logical_and(label_is_pos, pred_is_neg), dtypes.float32)
     if weights_tiled is not None:
       is_false_negative *= weights_tiled
     update_ops['fn'] = state_ops.assign_add(
         false_negatives, math_ops.reduce_sum(is_false_negative, 1))
     values['fn'] = false_negatives

   if 'tn' in includes:
     true_negatives = metrics_impl.metric_variable([num_thresholds],
                                                   dtypes.float32,
                                                   name='true_negatives')
     is_true_negative = math_ops.cast(
         math_ops.logical_and(label_is_neg, pred_is_neg), dtypes.float32)
     if weights_tiled is not None:
       is_true_negative *= weights_tiled
     update_ops['tn'] = state_ops.assign_add(
         true_negatives, math_ops.reduce_sum(is_true_negative, 1))
     values['tn'] = true_negatives

   if 'fp' in includes:
     false_positives = metrics_impl.metric_variable([num_thresholds],
                                                    dtypes.float32,
                                                    name='false_positives')
     is_false_positive = math_ops.cast(
         math_ops.logical_and(label_is_neg, pred_is_pos), dtypes.float32)
     if weights_tiled is not None:
       is_false_positive *= weights_tiled
     update_ops['fp'] = state_ops.assign_add(
         false_positives, math_ops.reduce_sum(is_false_positive, 1))
     values['fp'] = false_positives

   return values, update_ops


 def streaming_true_positives_at_thresholds(predictions,
                                            labels,
                                            thresholds,
                                            weights=None):
   values, update_ops = _streaming_confusion_matrix_at_thresholds(
       predictions, labels, thresholds, weights=weights, includes=('tp',))
   return values['tp'], update_ops['tp']


 def streaming_false_negatives_at_thresholds(predictions,
                                             labels,
                                             thresholds,
                                             weights=None):
   values, update_ops = _streaming_confusion_matrix_at_thresholds(
       predictions, labels, thresholds, weights=weights, includes=('fn',))
   return values['fn'], update_ops['fn']


 def streaming_false_positives_at_thresholds(predictions,
                                             labels,
                                             thresholds,
                                             weights=None):
   values, update_ops = _streaming_confusion_matrix_at_thresholds(
       predictions, labels, thresholds, weights=weights, includes=('fp',))
   return values['fp'], update_ops['fp']


 def streaming_true_negatives_at_thresholds(predictions,
                                            labels,
                                            thresholds,
                                            weights=None):
   values, update_ops = _streaming_confusion_matrix_at_thresholds(
       predictions, labels, thresholds, weights=weights, includes=('tn',))
   return values['tn'], update_ops['tn']


 def streaming_curve_points(labels=None,
                            predictions=None,
                            weights=None,
                            num_thresholds=200,
                            metrics_collections=None,
                            updates_collections=None,
                            curve='ROC',
                            name=None):
   """Computes curve (ROC or PR) values for a prespecified number of points.

   The `streaming_curve_points` function creates four local variables,
   `true_positives`, `true_negatives`, `false_positives` and `false_negatives`
   that are used to compute the curve values. To discretize the curve, a linearly
   spaced set of thresholds is used to compute pairs of recall and precision
   values.

   For best results, `predictions` should be distributed approximately uniformly
   in the range [0, 1] and not peaked around 0 or 1.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     labels: A `Tensor` whose shape matches `predictions`. Will be cast to
       `bool`.
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     num_thresholds: The number of thresholds to use when discretizing the roc
       curve.
     metrics_collections: An optional list of collections that `auc` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     curve: Specifies the name of the curve to be computed, 'ROC' [default] or
       'PR' for the Precision-Recall-curve.
     name: An optional variable_scope name.

   Returns:
     points: A `Tensor` with shape [num_thresholds, 2] that contains points of
       the curve.
     update_op: An operation that increments the `true_positives`,
       `true_negatives`, `false_positives` and `false_negatives` variables.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.

   TODO(chizeng): Consider rewriting this method to make use of logic within the
   precision_recall_at_equal_thresholds method (to improve run time).
   """
   with variable_scope.variable_scope(name, 'curve_points',
                                      (labels, predictions, weights)):
     if curve != 'ROC' and curve != 'PR':
       raise ValueError('curve must be either ROC or PR, %s unknown' % (curve))
     kepsilon = _EPSILON  # to account for floating point imprecisions
     thresholds = [
         (i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)
     ]
     thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon]

     values, update_ops = _streaming_confusion_matrix_at_thresholds(
         labels=labels,
         predictions=predictions,
         thresholds=thresholds,
         weights=weights)

     # Add epsilons to avoid dividing by 0.
     epsilon = 1.0e-6

     def compute_points(tp, fn, tn, fp):
       """Computes the roc-auc or pr-auc based on confusion counts."""
       rec = math_ops.div(tp + epsilon, tp + fn + epsilon)
       if curve == 'ROC':
         fp_rate = math_ops.div(fp, fp + tn + epsilon)
         return fp_rate, rec
       else:  # curve == 'PR'.
         prec = math_ops.div(tp + epsilon, tp + fp + epsilon)
         return rec, prec

     xs, ys = compute_points(values['tp'], values['fn'], values['tn'],
                             values['fp'])
     points = array_ops.stack([xs, ys], axis=1)
     update_op = control_flow_ops.group(*update_ops.values())

     if metrics_collections:
       ops.add_to_collections(metrics_collections, points)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return points, update_op


 @deprecated(None, 'Please switch to tf.metrics.auc. Note that the order of '
             'the labels and predictions arguments has been switched.')
 def streaming_auc(predictions,
                   labels,
                   weights=None,
                   num_thresholds=200,
                   metrics_collections=None,
                   updates_collections=None,
                   curve='ROC',
                   name=None):
   """Computes the approximate AUC via a Riemann sum.

   The `streaming_auc` function creates four local variables, `true_positives`,
   `true_negatives`, `false_positives` and `false_negatives` that are used to
   compute the AUC. To discretize the AUC curve, a linearly spaced set of
   thresholds is used to compute pairs of recall and precision values. The area
   under the ROC-curve is therefore computed using the height of the recall
   values by the false positive rate, while the area under the PR-curve is the
   computed using the height of the precision values by the recall.

   This value is ultimately returned as `auc`, an idempotent operation that
   computes the area under a discretized curve of precision versus recall values
   (computed using the aforementioned variables). The `num_thresholds` variable
   controls the degree of discretization with larger numbers of thresholds more
   closely approximating the true AUC. The quality of the approximation may vary
   dramatically depending on `num_thresholds`.

   For best results, `predictions` should be distributed approximately uniformly
   in the range [0, 1] and not peaked around 0 or 1. The quality of the AUC
   approximation may be poor if this is not the case.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the `auc`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     num_thresholds: The number of thresholds to use when discretizing the roc
       curve.
     metrics_collections: An optional list of collections that `auc` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     curve: Specifies the name of the curve to be computed, 'ROC' [default] or
       'PR' for the Precision-Recall-curve.
     name: An optional variable_scope name.

   Returns:
     auc: A scalar `Tensor` representing the current area-under-curve.
     update_op: An operation that increments the `true_positives`,
       `true_negatives`, `false_positives` and `false_negatives` variables
       appropriately and whose value matches `auc`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.auc(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       num_thresholds=num_thresholds,
       curve=curve,
       updates_collections=updates_collections,
       name=name)


 def _compute_dynamic_auc(labels, predictions, curve='ROC', weights=None):
   """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds.

   Computes the area under the ROC or PR curve using each prediction as a
   threshold. This could be slow for large batches, but has the advantage of not
   having its results degrade depending on the distribution of predictions.

   Args:
     labels: A `Tensor` of ground truth labels with the same shape as
       `predictions` with values of 0 or 1 and type `int64`.
     predictions: A 1-D `Tensor` of predictions whose values are `float64`.
     curve: The name of the curve to be computed, 'ROC' for the Receiving
       Operating Characteristic or 'PR' for the Precision-Recall curve.
     weights: A 1-D `Tensor` of weights whose values are `float64`.

   Returns:
     A scalar `Tensor` containing the area-under-curve value for the input.
   """
   # Compute the total weight and the total positive weight.
   size = array_ops.size(predictions)
   if weights is None:
     weights = array_ops.ones_like(labels, dtype=dtypes.float64)
   labels, predictions, weights = metrics_impl._remove_squeezable_dimensions(
       labels, predictions, weights)
   total_weight = math_ops.reduce_sum(weights)
   total_positive = math_ops.reduce_sum(
       array_ops.where(
           math_ops.greater(labels, 0), weights,
           array_ops.zeros_like(labels, dtype=dtypes.float64)))

   def continue_computing_dynamic_auc():
     """Continues dynamic auc computation, entered if labels are not all equal.

     Returns:
       A scalar `Tensor` containing the area-under-curve value.
     """
     # Sort the predictions descending, keeping the same order for the
     # corresponding labels and weights.
     ordered_predictions, indices = nn.top_k(predictions, k=size)
     ordered_labels = array_ops.gather(labels, indices)
     ordered_weights = array_ops.gather(weights, indices)

     # Get the counts of the unique ordered predictions.
     _, _, counts = array_ops.unique_with_counts(ordered_predictions)

     # Compute the indices of the split points between different predictions.
     splits = math_ops.cast(
         array_ops.pad(math_ops.cumsum(counts), paddings=[[1, 0]]), dtypes.int32)

     # Count the positives to the left of the split indices.
     true_positives = array_ops.gather(
         array_ops.pad(
             math_ops.cumsum(
                 array_ops.where(
                     math_ops.greater(ordered_labels, 0), ordered_weights,
                     array_ops.zeros_like(ordered_labels,
                                          dtype=dtypes.float64))),
             paddings=[[1, 0]]), splits)
     if curve == 'ROC':
       # Compute the weight of the negatives to the left of every split point and
       # the total weight of the negatives number of negatives for computing the
       # FPR.
       false_positives = array_ops.gather(
           array_ops.pad(
               math_ops.cumsum(
                   array_ops.where(
                       math_ops.less(ordered_labels, 1), ordered_weights,
                       array_ops.zeros_like(
                           ordered_labels, dtype=dtypes.float64))),
               paddings=[[1, 0]]), splits)
       total_negative = total_weight - total_positive
       x_axis_values = math_ops.truediv(false_positives, total_negative)
       y_axis_values = math_ops.truediv(true_positives, total_positive)
     elif curve == 'PR':
       x_axis_values = math_ops.truediv(true_positives, total_positive)
       # For conformance, set precision to 1 when the number of positive
       # classifications is 0.
       positives = array_ops.gather(
           array_ops.pad(math_ops.cumsum(ordered_weights), paddings=[[1, 0]]),
           splits)
       y_axis_values = array_ops.where(
           math_ops.greater(splits, 0),
           math_ops.truediv(true_positives, positives),
           array_ops.ones_like(true_positives, dtype=dtypes.float64))

     # Calculate trapezoid areas.
     heights = math_ops.add(y_axis_values[1:], y_axis_values[:-1]) / 2.0
     widths = math_ops.abs(
         math_ops.subtract(x_axis_values[1:], x_axis_values[:-1]))
     return math_ops.reduce_sum(math_ops.multiply(heights, widths))

   # If all the labels are the same, AUC isn't well-defined (but raising an
   # exception seems excessive) so we return 0, otherwise we finish computing.
   return control_flow_ops.cond(
       math_ops.logical_or(
           math_ops.equal(total_positive, 0),
           math_ops.equal(total_positive, total_weight)),
       true_fn=lambda: array_ops.constant(0, dtypes.float64),
       false_fn=continue_computing_dynamic_auc)


 def streaming_dynamic_auc(labels,
                           predictions,
                           curve='ROC',
                           metrics_collections=(),
                           updates_collections=(),
                           name=None,
                           weights=None):
   """Computes the apporixmate AUC by a Riemann sum with data-derived thresholds.

   USAGE NOTE: this approach requires storing all of the predictions and labels
   for a single evaluation in memory, so it may not be usable when the evaluation
   batch size and/or the number of evaluation steps is very large.

   Computes the area under the ROC or PR curve using each prediction as a
   threshold. This has the advantage of being resilient to the distribution of
   predictions by aggregating across batches, accumulating labels and predictions
   and performing the final calculation using all of the concatenated values.

   Args:
     labels:  A `Tensor` of ground truth labels with the same shape as
       `predictions` and with values of 0 or 1 whose values are castable to
       `int64`.
     predictions: A `Tensor` of predictions whose values are castable to
       `float64`. Will be flattened into a 1-D `Tensor`.
     curve: The name of the curve for which to compute AUC, 'ROC' for the
       Receiving Operating Characteristic or 'PR' for the Precision-Recall curve.
     metrics_collections: An optional iterable of collections that `auc` should
       be added to.
     updates_collections: An optional iterable of collections that `update_op`
       should be added to.
     name: An optional name for the variable_scope that contains the metric
       variables.
     weights: A 'Tensor' of non-negative weights whose values are castable to
       `float64`. Will be flattened into a 1-D `Tensor`.

   Returns:
     auc: A scalar `Tensor` containing the current area-under-curve value.
     update_op: An operation that concatenates the input labels and predictions
       to the accumulated values.

   Raises:
     ValueError: If `labels` and `predictions` have mismatched shapes or if
       `curve` isn't a recognized curve type.
   """

   if curve not in ['PR', 'ROC']:
     raise ValueError('curve must be either ROC or PR, %s unknown' % curve)

   with variable_scope.variable_scope(name, default_name='dynamic_auc'):
     labels.get_shape().assert_is_compatible_with(predictions.get_shape())
     predictions = array_ops.reshape(
         math_ops.cast(predictions, dtypes.float64), [-1])
     labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1])
     with ops.control_dependencies([
         check_ops.assert_greater_equal(
             labels,
             array_ops.zeros_like(labels, dtypes.int64),
             message='labels must be 0 or 1, at least one is <0'),
         check_ops.assert_less_equal(
             labels,
             array_ops.ones_like(labels, dtypes.int64),
             message='labels must be 0 or 1, at least one is >1'),
     ]):
       preds_accum, update_preds = streaming_concat(
           predictions, name='concat_preds')
       labels_accum, update_labels = streaming_concat(
           labels, name='concat_labels')
       if weights is not None:
         weights = array_ops.reshape(
             math_ops.cast(weights, dtypes.float64), [-1])
         weights_accum, update_weights = streaming_concat(
             weights, name='concat_weights')
         update_op = control_flow_ops.group(update_labels, update_preds,
                                            update_weights)
       else:
         weights_accum = None
         update_op = control_flow_ops.group(update_labels, update_preds)
       auc = _compute_dynamic_auc(
           labels_accum, preds_accum, curve=curve, weights=weights_accum)
       if updates_collections:
         ops.add_to_collections(updates_collections, update_op)
       if metrics_collections:
         ops.add_to_collections(metrics_collections, auc)
       return auc, update_op


 def _compute_placement_auc(labels, predictions, weights, alpha,
                            logit_transformation, is_valid):
   """Computes the AUC and asymptotic normally distributed confidence interval.

   The calculations are achieved using the fact that AUC = P(Y_1>Y_0) and the
   concept of placement values for each labeled group, as presented by Delong and
   Delong (1988). The actual algorithm used is a more computationally efficient
   approach presented by Sun and Xu (2014). This could be slow for large batches,
   but has the advantage of not having its results degrade depending on the
   distribution of predictions.

   Args:
     labels: A `Tensor` of ground truth labels with the same shape as
       `predictions` with values of 0 or 1 and type `int64`.
     predictions: A 1-D `Tensor` of predictions whose values are `float64`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`.
     alpha: Confidence interval level desired.
     logit_transformation: A boolean value indicating whether the estimate should
       be logit transformed prior to calculating the confidence interval. Doing
       so enforces the restriction that the AUC should never be outside the
       interval [0,1].
     is_valid: A bool tensor describing whether the input is valid.

   Returns:
     A 1-D `Tensor` containing the area-under-curve, lower, and upper confidence
     interval values.
   """
   # Disable the invalid-name checker so that we can capitalize the name.
   # pylint: disable=invalid-name
   AucData = collections_lib.namedtuple('AucData', ['auc', 'lower', 'upper'])
   # pylint: enable=invalid-name

   # If all the labels are the same or if number of observations are too few,
   # AUC isn't well-defined
   size = array_ops.size(predictions, out_type=dtypes.int32)

   # Count the total number of positive and negative labels in the input.
   total_0 = math_ops.reduce_sum(
       math_ops.cast(1 - labels, weights.dtype) * weights)
   total_1 = math_ops.reduce_sum(math_ops.cast(labels, weights.dtype) * weights)

   # Sort the predictions ascending, as well as
   # (i) the corresponding labels and
   # (ii) the corresponding weights.
   ordered_predictions, indices = nn.top_k(predictions, k=size, sorted=True)
   ordered_predictions = array_ops.reverse(
       ordered_predictions, axis=array_ops.zeros(1, dtypes.int32))
   indices = array_ops.reverse(indices, axis=array_ops.zeros(1, dtypes.int32))
   ordered_labels = array_ops.gather(labels, indices)
   ordered_weights = array_ops.gather(weights, indices)

   # We now compute values required for computing placement values.

   # We generate a list of indices (segmented_indices) of increasing order. An
   # index is assigned for each unique prediction float value. Prediction
   # values that are the same share the same index.
   _, segmented_indices = array_ops.unique(ordered_predictions)

   # We create 2 tensors of weights. weights_for_true is non-zero for true
   # labels. weights_for_false is non-zero for false labels.
   float_labels_for_true = math_ops.cast(ordered_labels, dtypes.float32)
   float_labels_for_false = 1.0 - float_labels_for_true
   weights_for_true = ordered_weights * float_labels_for_true
   weights_for_false = ordered_weights * float_labels_for_false

   # For each set of weights with the same segmented indices, we add up the
   # weight values. Note that for each label, we deliberately rely on weights
   # for the opposite label.
   weight_totals_for_true = math_ops.segment_sum(weights_for_false,
                                                 segmented_indices)
   weight_totals_for_false = math_ops.segment_sum(weights_for_true,
                                                  segmented_indices)

   # These cumulative sums of weights importantly exclude the current weight
   # sums.
   cum_weight_totals_for_true = math_ops.cumsum(
       weight_totals_for_true, exclusive=True)
   cum_weight_totals_for_false = math_ops.cumsum(
       weight_totals_for_false, exclusive=True)

   # Compute placement values using the formula. Values with the same segmented
   # indices and labels share the same placement values.
   placements_for_true = (
       (cum_weight_totals_for_true + weight_totals_for_true / 2.0) /
       (math_ops.reduce_sum(weight_totals_for_true) + _EPSILON))
   placements_for_false = (
       (cum_weight_totals_for_false + weight_totals_for_false / 2.0) /
       (math_ops.reduce_sum(weight_totals_for_false) + _EPSILON))

   # We expand the tensors of placement values (for each label) so that their
   # shapes match that of predictions.
   placements_for_true = array_ops.gather(placements_for_true, segmented_indices)
   placements_for_false = array_ops.gather(placements_for_false,
                                           segmented_indices)

   # Select placement values based on the label for each index.
   placement_values = (
       placements_for_true * float_labels_for_true +
       placements_for_false * float_labels_for_false)

   # Split placement values by labeled groups.
   placement_values_0 = placement_values * math_ops.cast(1 - ordered_labels,
                                                         weights.dtype)
   weights_0 = ordered_weights * math_ops.cast(1 - ordered_labels, weights.dtype)
   placement_values_1 = placement_values * math_ops.cast(ordered_labels,
                                                         weights.dtype)
   weights_1 = ordered_weights * math_ops.cast(ordered_labels, weights.dtype)

   # Calculate AUC using placement values
   auc_0 = (
       math_ops.reduce_sum(weights_0 * (1. - placement_values_0)) /
       (total_0 + _EPSILON))
   auc_1 = (
       math_ops.reduce_sum(weights_1 * (placement_values_1)) /
       (total_1 + _EPSILON))
   auc = array_ops.where(math_ops.less(total_0, total_1), auc_1, auc_0)

   # Calculate variance and standard error using the placement values.
   var_0 = (
       math_ops.reduce_sum(
           weights_0 * math_ops.square(1. - placement_values_0 - auc_0)) /
       (total_0 - 1. + _EPSILON))
   var_1 = (
       math_ops.reduce_sum(
           weights_1 * math_ops.squared_difference(placement_values_1, auc_1)) /
       (total_1 - 1. + _EPSILON))
   auc_std_err = math_ops.sqrt((var_0 / (total_0 + _EPSILON)) +
                               (var_1 / (total_1 + _EPSILON)))

   # Calculate asymptotic normal confidence intervals
   std_norm_dist = Normal(loc=0., scale=1.)
   z_value = std_norm_dist.quantile((1.0 - alpha) / 2.0)
   if logit_transformation:
     estimate = math_ops.log(auc / (1. - auc + _EPSILON))
     std_err = auc_std_err / (auc * (1. - auc + _EPSILON))
     transformed_auc_lower = estimate + (z_value * std_err)
     transformed_auc_upper = estimate - (z_value * std_err)

     def inverse_logit_transformation(x):
       exp_negative = math_ops.exp(math_ops.negative(x))
       return 1. / (1. + exp_negative + _EPSILON)

     auc_lower = inverse_logit_transformation(transformed_auc_lower)
     auc_upper = inverse_logit_transformation(transformed_auc_upper)
   else:
     estimate = auc
     std_err = auc_std_err
     auc_lower = estimate + (z_value * std_err)
     auc_upper = estimate - (z_value * std_err)

   ## If estimate is 1 or 0, no variance is present so CI = 1
   ## n.b. This can be misleading, since number obs can just be too low.
   lower = array_ops.where(
       math_ops.logical_or(
           math_ops.equal(auc, array_ops.ones_like(auc)),
           math_ops.equal(auc, array_ops.zeros_like(auc))), auc, auc_lower)
   upper = array_ops.where(
       math_ops.logical_or(
           math_ops.equal(auc, array_ops.ones_like(auc)),
           math_ops.equal(auc, array_ops.zeros_like(auc))), auc, auc_upper)

   # If all the labels are the same, AUC isn't well-defined (but raising an
   # exception seems excessive) so we return 0, otherwise we finish computing.
   trivial_value = array_ops.constant(0.0)

   return AucData(*control_flow_ops.cond(
       is_valid, lambda: [auc, lower, upper], lambda: [trivial_value] * 3))


 def auc_with_confidence_intervals(labels,
                                   predictions,
                                   weights=None,
                                   alpha=0.95,
                                   logit_transformation=True,
                                   metrics_collections=(),
                                   updates_collections=(),
                                   name=None):
   """Computes the AUC and asymptotic normally distributed confidence interval.

   USAGE NOTE: this approach requires storing all of the predictions and labels
   for a single evaluation in memory, so it may not be usable when the evaluation
   batch size and/or the number of evaluation steps is very large.

   Computes the area under the ROC curve and its confidence interval using
   placement values. This has the advantage of being resilient to the
   distribution of predictions by aggregating across batches, accumulating labels
   and predictions and performing the final calculation using all of the
   concatenated values.

   Args:
     labels: A `Tensor` of ground truth labels with the same shape as `labels`
       and with values of 0 or 1 whose values are castable to `int64`.
     predictions: A `Tensor` of predictions whose values are castable to
       `float64`. Will be flattened into a 1-D `Tensor`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`.
     alpha: Confidence interval level desired.
     logit_transformation: A boolean value indicating whether the estimate should
       be logit transformed prior to calculating the confidence interval. Doing
       so enforces the restriction that the AUC should never be outside the
       interval [0,1].
     metrics_collections: An optional iterable of collections that `auc` should
       be added to.
     updates_collections: An optional iterable of collections that `update_op`
       should be added to.
     name: An optional name for the variable_scope that contains the metric
       variables.

   Returns:
     auc: A 1-D `Tensor` containing the current area-under-curve, lower, and
       upper confidence interval values.
     update_op: An operation that concatenates the input labels and predictions
       to the accumulated values.

   Raises:
     ValueError: If `labels`, `predictions`, and `weights` have mismatched shapes
     or if `alpha` isn't in the range (0,1).
   """
   if not (alpha > 0 and alpha < 1):
     raise ValueError('alpha must be between 0 and 1; currently %.02f' % alpha)

   if weights is None:
     weights = array_ops.ones_like(predictions)

   with variable_scope.variable_scope(
       name,
       default_name='auc_with_confidence_intervals',
       values=[labels, predictions, weights]):

     predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
         predictions=predictions,
         labels=labels,
         weights=weights)

     total_weight = math_ops.reduce_sum(weights)

     weights = array_ops.reshape(weights, [-1])
     predictions = array_ops.reshape(
         math_ops.cast(predictions, dtypes.float64), [-1])
     labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1])

     with ops.control_dependencies([
         check_ops.assert_greater_equal(
             labels,
             array_ops.zeros_like(labels, dtypes.int64),
             message='labels must be 0 or 1, at least one is <0'),
         check_ops.assert_less_equal(
             labels,
             array_ops.ones_like(labels, dtypes.int64),
             message='labels must be 0 or 1, at least one is >1'),
     ]):
       preds_accum, update_preds = streaming_concat(
           predictions, name='concat_preds')
       labels_accum, update_labels = streaming_concat(
           labels, name='concat_labels')
       weights_accum, update_weights = streaming_concat(
           weights, name='concat_weights')
       update_op_for_valid_case = control_flow_ops.group(update_labels,
                                                         update_preds,
                                                         update_weights)

       # Only perform updates if this case is valid.
       all_labels_positive_or_0 = math_ops.logical_and(
           math_ops.equal(math_ops.reduce_min(labels), 0),
           math_ops.equal(math_ops.reduce_max(labels), 1))
       sums_of_weights_at_least_1 = math_ops.greater_equal(total_weight, 1.0)
       is_valid = math_ops.logical_and(all_labels_positive_or_0,
                                       sums_of_weights_at_least_1)

       update_op = control_flow_ops.cond(
           sums_of_weights_at_least_1, lambda: update_op_for_valid_case,
           control_flow_ops.no_op)

       auc = _compute_placement_auc(
           labels_accum,
           preds_accum,
           weights_accum,
           alpha=alpha,
           logit_transformation=logit_transformation,
           is_valid=is_valid)

       if updates_collections:
         ops.add_to_collections(updates_collections, update_op)
       if metrics_collections:
         ops.add_to_collections(metrics_collections, auc)
       return auc, update_op


 def precision_recall_at_equal_thresholds(labels,
                                          predictions,
                                          weights=None,
                                          num_thresholds=None,
                                          use_locking=None,
                                          name=None):
   """A helper method for creating metrics related to precision-recall curves.

   These values are true positives, false negatives, true negatives, false
   positives, precision, and recall. This function returns a data structure that
   contains ops within it.

   Unlike _streaming_confusion_matrix_at_thresholds (which exhibits O(T * N)
   space and run time), this op exhibits O(T + N) space and run time, where T is
   the number of thresholds and N is the size of the predictions tensor. Hence,
   it may be advantageous to use this function when `predictions` is big.

   For instance, prefer this method for per-pixel classification tasks, for which
   the predictions tensor may be very large.

   Each number in `predictions`, a float in `[0, 1]`, is compared with its
   corresponding label in `labels`, and counts as a single tp/fp/tn/fn value at
   each threshold. This is then multiplied with `weights` which can be used to
   reweight certain values, or more commonly used for masking values.

   Args:
     labels: A bool `Tensor` whose shape matches `predictions`.
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     weights: Optional; If provided, a `Tensor` that has the same dtype as, and
       broadcastable to, `predictions`. This tensor is multiplied by counts.
     num_thresholds: Optional; Number of thresholds, evenly distributed in `[0,
       1]`. Should be `>= 2`. Defaults to 201. Note that the number of bins is 1
       less than `num_thresholds`. Using an even `num_thresholds` value instead
       of an odd one may yield unfriendly edges for bins.
     use_locking: Optional; If True, the op will be protected by a lock.
       Otherwise, the behavior is undefined, but may exhibit less contention.
       Defaults to True.
     name: Optional; variable_scope name. If not provided, the string
       'precision_recall_at_equal_threshold' is used.

   Returns:
     result: A named tuple (See PrecisionRecallData within the implementation of
       this function) with properties that are variables of shape
       `[num_thresholds]`. The names of the properties are tp, fp, tn, fn,
       precision, recall, thresholds. Types are same as that of predictions.
     update_op: An op that accumulates values.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       `includes` contains invalid keys.
   """
   # Disable the invalid-name checker so that we can capitalize the name.
   # pylint: disable=invalid-name
   PrecisionRecallData = collections_lib.namedtuple(
       'PrecisionRecallData',
       ['tp', 'fp', 'tn', 'fn', 'precision', 'recall', 'thresholds'])
   # pylint: enable=invalid-name

   if num_thresholds is None:
     num_thresholds = 201

   if weights is None:
     weights = 1.0

   if use_locking is None:
     use_locking = True

   check_ops.assert_type(labels, dtypes.bool)

   with variable_scope.variable_scope(name,
                                      'precision_recall_at_equal_thresholds',
                                      (labels, predictions, weights)):
     # Make sure that predictions are within [0.0, 1.0].
     with ops.control_dependencies([
         check_ops.assert_greater_equal(
             predictions,
             math_ops.cast(0.0, dtype=predictions.dtype),
             message='predictions must be in [0, 1]'),
         check_ops.assert_less_equal(
             predictions,
             math_ops.cast(1.0, dtype=predictions.dtype),
             message='predictions must be in [0, 1]')
     ]):
       predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
           predictions=predictions,
           labels=labels,
           weights=weights)

     predictions.get_shape().assert_is_compatible_with(labels.get_shape())

     # It's important we aggregate using float64 since we're accumulating a lot
     # of 1.0's for the true/false labels, and accumulating to float32 will
     # be quite inaccurate even with just a modest amount of values (~20M).
     # We use float64 instead of integer primarily since GPU scatter kernel
     # only support floats.
     agg_dtype = dtypes.float64

     f_labels = math_ops.cast(labels, agg_dtype)
     weights = math_ops.cast(weights, agg_dtype)
     true_labels = f_labels * weights
     false_labels = (1.0 - f_labels) * weights

     # Flatten predictions and labels.
     predictions = array_ops.reshape(predictions, [-1])
     true_labels = array_ops.reshape(true_labels, [-1])
     false_labels = array_ops.reshape(false_labels, [-1])

     # To compute TP/FP/TN/FN, we are measuring a binary classifier
     #   C(t) = (predictions >= t)
     # at each threshold 't'. So we have
     #   TP(t) = sum( C(t) * true_labels )
     #   FP(t) = sum( C(t) * false_labels )
     #
     # But, computing C(t) requires computation for each t. To make it fast,
     # observe that C(t) is a cumulative integral, and so if we have
     #   thresholds = [t_0, ..., t_{n-1}];  t_0 < ... < t_{n-1}
     # where n = num_thresholds, and if we can compute the bucket function
     #   B(i) = Sum( (predictions == t), t_i <= t < t{i+1} )
     # then we get
     #   C(t_i) = sum( B(j), j >= i )
     # which is the reversed cumulative sum in tf.cumsum().
     #
     # We can compute B(i) efficiently by taking advantage of the fact that
     # our thresholds are evenly distributed, in that
     #   width = 1.0 / (num_thresholds - 1)
     #   thresholds = [0.0, 1*width, 2*width, 3*width, ..., 1.0]
     # Given a prediction value p, we can map it to its bucket by
     #   bucket_index(p) = floor( p * (num_thresholds - 1) )
     # so we can use tf.compat.v1.scatter_add() to update the buckets in one pass.
     #
     # This implementation exhibits a run time and space complexity of O(T + N),
     # where T is the number of thresholds and N is the size of predictions.
     # Metrics that rely on _streaming_confusion_matrix_at_thresholds instead
     # exhibit a complexity of O(T * N).

     # Compute the bucket indices for each prediction value.
     bucket_indices = math_ops.cast(
         math_ops.floor(predictions * (num_thresholds - 1)), dtypes.int32)

     with ops.name_scope('variables'):
       tp_buckets_v = metrics_impl.metric_variable([num_thresholds],
                                                   agg_dtype,
                                                   name='tp_buckets')
       fp_buckets_v = metrics_impl.metric_variable([num_thresholds],
                                                   agg_dtype,
                                                   name='fp_buckets')

     with ops.name_scope('update_op'):
       update_tp = state_ops.scatter_add(
           tp_buckets_v, bucket_indices, true_labels, use_locking=use_locking)
       update_fp = state_ops.scatter_add(
           fp_buckets_v, bucket_indices, false_labels, use_locking=use_locking)

     # Set up the cumulative sums to compute the actual metrics.
     tp = math_ops.cumsum(tp_buckets_v, reverse=True, name='tp')
     fp = math_ops.cumsum(fp_buckets_v, reverse=True, name='fp')
     # fn = sum(true_labels) - tp
     #    = sum(tp_buckets) - tp
     #    = tp[0] - tp
     # Similarly,
     # tn = fp[0] - fp
     tn = fp[0] - fp
     fn = tp[0] - tp

     # We use a minimum to prevent division by 0.
     epsilon = ops.convert_to_tensor(1e-7, dtype=agg_dtype)
     precision = tp / math_ops.maximum(epsilon, tp + fp)
     recall = tp / math_ops.maximum(epsilon, tp + fn)

     # Convert all tensors back to predictions' dtype (as per function contract).
     out_dtype = predictions.dtype
     _convert = lambda tensor: math_ops.cast(tensor, out_dtype)
     result = PrecisionRecallData(
         tp=_convert(tp),
         fp=_convert(fp),
         tn=_convert(tn),
         fn=_convert(fn),
         precision=_convert(precision),
         recall=_convert(recall),
         thresholds=_convert(math_ops.lin_space(0.0, 1.0, num_thresholds)))
     update_op = control_flow_ops.group(update_tp, update_fp)
     return result, update_op


 def streaming_specificity_at_sensitivity(predictions,
                                          labels,
                                          sensitivity,
                                          weights=None,
                                          num_thresholds=200,
                                          metrics_collections=None,
                                          updates_collections=None,
                                          name=None):
   """Computes the specificity at a given sensitivity.

   The `streaming_specificity_at_sensitivity` function creates four local
   variables, `true_positives`, `true_negatives`, `false_positives` and
   `false_negatives` that are used to compute the specificity at the given
   sensitivity value. The threshold for the given sensitivity value is computed
   and used to evaluate the corresponding specificity.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `specificity`. `update_op` increments the `true_positives`, `true_negatives`,
   `false_positives` and `false_negatives` counts with the weight of each case
   found in the `predictions` and `labels`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   For additional information about specificity and sensitivity, see the
   following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     sensitivity: A scalar value in range `[0, 1]`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     num_thresholds: The number of thresholds to use for matching the given
       sensitivity.
     metrics_collections: An optional list of collections that `specificity`
       should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     specificity: A scalar `Tensor` representing the specificity at the given
       `specificity` value.
     update_op: An operation that increments the `true_positives`,
       `true_negatives`, `false_positives` and `false_negatives` variables
       appropriately and whose value matches `specificity`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       `sensitivity` is not between 0 and 1, or if either `metrics_collections`
       or `updates_collections` are not a list or tuple.
   """
   return metrics.specificity_at_sensitivity(
       sensitivity=sensitivity,
       num_thresholds=num_thresholds,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_sensitivity_at_specificity(predictions,
                                          labels,
                                          specificity,
                                          weights=None,
                                          num_thresholds=200,
                                          metrics_collections=None,
                                          updates_collections=None,
                                          name=None):
   """Computes the sensitivity at a given specificity.

   The `streaming_sensitivity_at_specificity` function creates four local
   variables, `true_positives`, `true_negatives`, `false_positives` and
   `false_negatives` that are used to compute the sensitivity at the given
   specificity value. The threshold for the given specificity value is computed
   and used to evaluate the corresponding sensitivity.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `sensitivity`. `update_op` increments the `true_positives`, `true_negatives`,
   `false_positives` and `false_negatives` counts with the weight of each case
   found in the `predictions` and `labels`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   For additional information about specificity and sensitivity, see the
   following: https://en.wikipedia.org/wiki/Sensitivity_and_specificity

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     specificity: A scalar value in range `[0, 1]`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     num_thresholds: The number of thresholds to use for matching the given
       specificity.
     metrics_collections: An optional list of collections that `sensitivity`
       should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     sensitivity: A scalar `Tensor` representing the sensitivity at the given
       `specificity` value.
     update_op: An operation that increments the `true_positives`,
       `true_negatives`, `false_positives` and `false_negatives` variables
       appropriately and whose value matches `sensitivity`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       `specificity` is not between 0 and 1, or if either `metrics_collections`
       or `updates_collections` are not a list or tuple.
   """
   return metrics.sensitivity_at_specificity(
       specificity=specificity,
       num_thresholds=num_thresholds,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None,
             'Please switch to tf.metrics.precision_at_thresholds. Note that '
             'the order of the labels and predictions arguments are switched.')
 def streaming_precision_at_thresholds(predictions,
                                       labels,
                                       thresholds,
                                       weights=None,
                                       metrics_collections=None,
                                       updates_collections=None,
                                       name=None):
   """Computes precision values for different `thresholds` on `predictions`.

   The `streaming_precision_at_thresholds` function creates four local variables,
   `true_positives`, `true_negatives`, `false_positives` and `false_negatives`
   for various values of thresholds. `precision[i]` is defined as the total
   weight of values in `predictions` above `thresholds[i]` whose corresponding
   entry in `labels` is `True`, divided by the total weight of values in
   `predictions` above `thresholds[i]` (`true_positives[i] / (true_positives[i] +
   false_positives[i])`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `precision`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     thresholds: A python list or tuple of float thresholds in `[0, 1]`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that `precision` should
       be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     precision: A float `Tensor` of shape `[len(thresholds)]`.
     update_op: An operation that increments the `true_positives`,
       `true_negatives`, `false_positives` and `false_negatives` variables that
       are used in the computation of `precision`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.precision_at_thresholds(
       thresholds=thresholds,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None,
             'Please switch to tf.metrics.recall_at_thresholds. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_recall_at_thresholds(predictions,
                                    labels,
                                    thresholds,
                                    weights=None,
                                    metrics_collections=None,
                                    updates_collections=None,
                                    name=None):
   """Computes various recall values for different `thresholds` on `predictions`.

   The `streaming_recall_at_thresholds` function creates four local variables,
   `true_positives`, `true_negatives`, `false_positives` and `false_negatives`
   for various values of thresholds. `recall[i]` is defined as the total weight
   of values in `predictions` above `thresholds[i]` whose corresponding entry in
   `labels` is `True`, divided by the total weight of `True` values in `labels`
   (`true_positives[i] / (true_positives[i] + false_negatives[i])`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the `recall`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     thresholds: A python list or tuple of float thresholds in `[0, 1]`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that `recall` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     recall: A float `Tensor` of shape `[len(thresholds)]`.
     update_op: An operation that increments the `true_positives`,
       `true_negatives`, `false_positives` and `false_negatives` variables that
       are used in the computation of `recall`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.recall_at_thresholds(
       thresholds=thresholds,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_false_positive_rate_at_thresholds(predictions,
                                                 labels,
                                                 thresholds,
                                                 weights=None,
                                                 metrics_collections=None,
                                                 updates_collections=None,
                                                 name=None):
   """Computes various fpr values for different `thresholds` on `predictions`.

   The `streaming_false_positive_rate_at_thresholds` function creates two
   local variables, `false_positives`, `true_negatives`, for various values of
   thresholds. `false_positive_rate[i]` is defined as the total weight
   of values in `predictions` above `thresholds[i]` whose corresponding entry in
   `labels` is `False`, divided by the total weight of `False` values in `labels`
   (`false_positives[i] / (false_positives[i] + true_negatives[i])`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `false_positive_rate`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     thresholds: A python list or tuple of float thresholds in `[0, 1]`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `false_positive_rate` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     false_positive_rate: A float `Tensor` of shape `[len(thresholds)]`.
     update_op: An operation that increments the `false_positives` and
       `true_negatives` variables that are used in the computation of
       `false_positive_rate`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(name, 'false_positive_rate_at_thresholds',
                                      (predictions, labels, weights)):
     values, update_ops = _streaming_confusion_matrix_at_thresholds(
         predictions, labels, thresholds, weights, includes=('fp', 'tn'))

     # Avoid division by zero.
     epsilon = _EPSILON

     def compute_fpr(fp, tn, name):
       return math_ops.div(fp, epsilon + fp + tn, name='fpr_' + name)

     fpr = compute_fpr(values['fp'], values['tn'], 'value')
     update_op = compute_fpr(update_ops['fp'], update_ops['tn'], 'update_op')

     if metrics_collections:
       ops.add_to_collections(metrics_collections, fpr)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return fpr, update_op


 def streaming_false_negative_rate_at_thresholds(predictions,
                                                 labels,
                                                 thresholds,
                                                 weights=None,
                                                 metrics_collections=None,
                                                 updates_collections=None,
                                                 name=None):
   """Computes various fnr values for different `thresholds` on `predictions`.

   The `streaming_false_negative_rate_at_thresholds` function creates two
   local variables, `false_negatives`, `true_positives`, for various values of
   thresholds. `false_negative_rate[i]` is defined as the total weight
   of values in `predictions` above `thresholds[i]` whose corresponding entry in
   `labels` is `False`, divided by the total weight of `True` values in `labels`
   (`false_negatives[i] / (false_negatives[i] + true_positives[i])`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `false_positive_rate`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     labels: A `bool` `Tensor` whose shape matches `predictions`.
     thresholds: A python list or tuple of float thresholds in `[0, 1]`.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `false_negative_rate` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     false_negative_rate: A float `Tensor` of shape `[len(thresholds)]`.
     update_op: An operation that increments the `false_negatives` and
       `true_positives` variables that are used in the computation of
       `false_negative_rate`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   with variable_scope.variable_scope(name, 'false_negative_rate_at_thresholds',
                                      (predictions, labels, weights)):
     values, update_ops = _streaming_confusion_matrix_at_thresholds(
         predictions, labels, thresholds, weights, includes=('fn', 'tp'))

     # Avoid division by zero.
     epsilon = _EPSILON

     def compute_fnr(fn, tp, name):
       return math_ops.div(fn, epsilon + fn + tp, name='fnr_' + name)

     fnr = compute_fnr(values['fn'], values['tp'], 'value')
     update_op = compute_fnr(update_ops['fn'], update_ops['tp'], 'update_op')

     if metrics_collections:
       ops.add_to_collections(metrics_collections, fnr)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return fnr, update_op


 def _at_k_name(name, k=None, class_id=None):
   if k is not None:
     name = '%s_at_%d' % (name, k)
   else:
     name = '%s_at_k' % (name)
   if class_id is not None:
     name = '%s_class%d' % (name, class_id)
   return name


 @deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, '
             'and reshape labels from [batch_size] to [batch_size, 1].')
 def streaming_recall_at_k(predictions,
                           labels,
                           k,
                           weights=None,
                           metrics_collections=None,
                           updates_collections=None,
                           name=None):
   """Computes the recall@k of the predictions with respect to dense labels.

   The `streaming_recall_at_k` function creates two local variables, `total` and
   `count`, that are used to compute the recall@k frequency. This frequency is
   ultimately returned as `recall_at_<k>`: an idempotent operation that simply
   divides `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `recall_at_<k>`. Internally, an `in_top_k` operation computes a `Tensor` with
   shape [batch_size] whose elements indicate whether or not the corresponding
   label is in the top `k` `predictions`. Then `update_op` increments `total`
   with the reduced sum of `weights` where `in_top_k` is `True`, and it
   increments `count` with the reduced sum of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A float `Tensor` of dimension [batch_size, num_classes].
     labels: A `Tensor` of dimension [batch_size] whose type is in `int32`,
       `int64`.
     k: The number of top elements to look at for computing recall.
     weights: `Tensor` whose rank is either 0, or the same rank as `labels`, and
       must be broadcastable to `labels` (i.e., all dimensions must be either
       `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that `recall_at_k`
       should be added to.
     updates_collections: An optional list of collections `update_op` should be
       added to.
     name: An optional variable_scope name.

   Returns:
     recall_at_k: A `Tensor` representing the recall@k, the fraction of labels
       which fall into the top `k` predictions.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `recall_at_k`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   in_top_k = math_ops.cast(nn.in_top_k(predictions, labels, k), dtypes.float32)
   return streaming_mean(in_top_k, weights, metrics_collections,
                         updates_collections, name or _at_k_name('recall', k))


 # TODO(ptucker): Validate range of values in labels?
 def streaming_sparse_recall_at_k(predictions,
                                  labels,
                                  k,
                                  class_id=None,
                                  weights=None,
                                  metrics_collections=None,
                                  updates_collections=None,
                                  name=None):
   """Computes recall@k of the predictions with respect to sparse labels.

   If `class_id` is not specified, we'll calculate recall as the ratio of true
       positives (i.e., correct predictions, items in the top `k` highest
       `predictions` that are found in the corresponding row in `labels`) to
       actual positives (the full `labels` row).
   If `class_id` is specified, we calculate recall by considering only the rows
       in the batch for which `class_id` is in `labels`, and computing the
       fraction of them for which `class_id` is in the corresponding row in
       `labels`.

   `streaming_sparse_recall_at_k` creates two local variables,
   `true_positive_at_<k>` and `false_negative_at_<k>`, that are used to compute
   the recall_at_k frequency. This frequency is ultimately returned as
   `recall_at_<k>`: an idempotent operation that simply divides
   `true_positive_at_<k>` by total (`true_positive_at_<k>` +
   `false_negative_at_<k>`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `recall_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
   indicating the top `k` `predictions`. Set operations applied to `top_k` and
   `labels` calculate the true positives and false negatives weighted by
   `weights`. Then `update_op` increments `true_positive_at_<k>` and
   `false_negative_at_<k>` using these values.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where N >=
       1. Commonly, N=1 and predictions has shape [batch size, num_classes]. The
       final dimension contains the logit values for each class. [D1, ... DN]
       must match `labels`.
     labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN,
       num_labels], where N >= 1 and num_labels is the number of target classes
       for the associated prediction. Commonly, N=1 and `labels` has shape
       [batch_size, num_labels]. [D1, ... DN] must match `predictions`. Values
       should be in range [0, num_classes), where num_classes is the last
       dimension of `predictions`. Values outside this range always count towards
       `false_negative_at_<k>`.
     k: Integer, k for @k metric.
     class_id: Integer class ID for which we want binary metrics. This should be
       in range [0, num_classes), where num_classes is the last dimension of
       `predictions`. If class_id is outside this range, the method returns NAN.
     weights: `Tensor` whose rank is either 0, or n-1, where n is the rank of
       `labels`. If the latter, it must be broadcastable to `labels` (i.e., all
       dimensions must be either `1`, or the same as the corresponding `labels`
       dimension).
     metrics_collections: An optional list of collections that values should be
       added to.
     updates_collections: An optional list of collections that updates should be
       added to.
     name: Name of new update operation, and namespace for other dependent ops.

   Returns:
     recall: Scalar `float64` `Tensor` with the value of `true_positives` divided
       by the sum of `true_positives` and `false_negatives`.
     update_op: `Operation` that increments `true_positives` and
       `false_negatives` variables appropriately, and whose value matches
       `recall`.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match
     `predictions`, or if either `metrics_collections` or `updates_collections`
     are not a list or tuple.
   """
   return metrics.recall_at_k(
       k=k,
       class_id=class_id,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 # TODO(ptucker): Validate range of values in labels?
 def streaming_sparse_precision_at_k(predictions,
                                     labels,
                                     k,
                                     class_id=None,
                                     weights=None,
                                     metrics_collections=None,
                                     updates_collections=None,
                                     name=None):
   """Computes precision@k of the predictions with respect to sparse labels.

   If `class_id` is not specified, we calculate precision as the ratio of true
       positives (i.e., correct predictions, items in the top `k` highest
       `predictions` that are found in the corresponding row in `labels`) to
       positives (all top `k` `predictions`).
   If `class_id` is specified, we calculate precision by considering only the
       rows in the batch for which `class_id` is in the top `k` highest
       `predictions`, and computing the fraction of them for which `class_id` is
       in the corresponding row in `labels`.

   We expect precision to decrease as `k` increases.

   `streaming_sparse_precision_at_k` creates two local variables,
   `true_positive_at_<k>` and `false_positive_at_<k>`, that are used to compute
   the precision@k frequency. This frequency is ultimately returned as
   `precision_at_<k>`: an idempotent operation that simply divides
   `true_positive_at_<k>` by total (`true_positive_at_<k>` +
   `false_positive_at_<k>`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `precision_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
   indicating the top `k` `predictions`. Set operations applied to `top_k` and
   `labels` calculate the true positives and false positives weighted by
   `weights`. Then `update_op` increments `true_positive_at_<k>` and
   `false_positive_at_<k>` using these values.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where N >=
       1. Commonly, N=1 and predictions has shape [batch size, num_classes]. The
       final dimension contains the logit values for each class. [D1, ... DN]
       must match `labels`.
     labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN,
       num_labels], where N >= 1 and num_labels is the number of target classes
       for the associated prediction. Commonly, N=1 and `labels` has shape
       [batch_size, num_labels]. [D1, ... DN] must match `predictions`. Values
       should be in range [0, num_classes), where num_classes is the last
       dimension of `predictions`. Values outside this range are ignored.
     k: Integer, k for @k metric.
     class_id: Integer class ID for which we want binary metrics. This should be
       in range [0, num_classes], where num_classes is the last dimension of
       `predictions`. If `class_id` is outside this range, the method returns
       NAN.
     weights: `Tensor` whose rank is either 0, or n-1, where n is the rank of
       `labels`. If the latter, it must be broadcastable to `labels` (i.e., all
       dimensions must be either `1`, or the same as the corresponding `labels`
       dimension).
     metrics_collections: An optional list of collections that values should be
       added to.
     updates_collections: An optional list of collections that updates should be
       added to.
     name: Name of new update operation, and namespace for other dependent ops.

   Returns:
     precision: Scalar `float64` `Tensor` with the value of `true_positives`
       divided by the sum of `true_positives` and `false_positives`.
     update_op: `Operation` that increments `true_positives` and
       `false_positives` variables appropriately, and whose value matches
       `precision`.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match
       `predictions`, or if either `metrics_collections` or `updates_collections`
       are not a list or tuple.
   """
   return metrics.precision_at_k(
       k=k,
       class_id=class_id,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 # TODO(ptucker): Validate range of values in labels?
 def streaming_sparse_precision_at_top_k(top_k_predictions,
                                         labels,
                                         class_id=None,
                                         weights=None,
                                         metrics_collections=None,
                                         updates_collections=None,
                                         name=None):
   """Computes precision@k of top-k predictions with respect to sparse labels.

   If `class_id` is not specified, we calculate precision as the ratio of
       true positives (i.e., correct predictions, items in `top_k_predictions`
       that are found in the corresponding row in `labels`) to positives (all
       `top_k_predictions`).
   If `class_id` is specified, we calculate precision by considering only the
       rows in the batch for which `class_id` is in the top `k` highest
       `predictions`, and computing the fraction of them for which `class_id` is
       in the corresponding row in `labels`.

   We expect precision to decrease as `k` increases.

   `streaming_sparse_precision_at_top_k` creates two local variables,
   `true_positive_at_k` and `false_positive_at_k`, that are used to compute
   the precision@k frequency. This frequency is ultimately returned as
   `precision_at_k`: an idempotent operation that simply divides
   `true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `precision_at_k`. Internally, set operations applied to `top_k_predictions`
   and `labels` calculate the true positives and false positives weighted by
   `weights`. Then `update_op` increments `true_positive_at_k` and
   `false_positive_at_k` using these values.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where N >= 1.
       Commonly, N=1 and top_k_predictions has shape [batch size, k]. The final
       dimension contains the indices of top-k labels. [D1, ... DN] must match
       `labels`.
     labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN,
       num_labels], where N >= 1 and num_labels is the number of target classes
       for the associated prediction. Commonly, N=1 and `labels` has shape
       [batch_size, num_labels]. [D1, ... DN] must match `top_k_predictions`.
       Values should be in range [0, num_classes), where num_classes is the last
       dimension of `predictions`. Values outside this range are ignored.
     class_id: Integer class ID for which we want binary metrics. This should be
       in range [0, num_classes), where num_classes is the last dimension of
       `predictions`. If `class_id` is outside this range, the method returns
       NAN.
     weights: `Tensor` whose rank is either 0, or n-1, where n is the rank of
       `labels`. If the latter, it must be broadcastable to `labels` (i.e., all
       dimensions must be either `1`, or the same as the corresponding `labels`
       dimension).
     metrics_collections: An optional list of collections that values should be
       added to.
     updates_collections: An optional list of collections that updates should be
       added to.
     name: Name of new update operation, and namespace for other dependent ops.

   Returns:
     precision: Scalar `float64` `Tensor` with the value of `true_positives`
       divided by the sum of `true_positives` and `false_positives`.
     update_op: `Operation` that increments `true_positives` and
       `false_positives` variables appropriately, and whose value matches
       `precision`.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match
       `predictions`, or if either `metrics_collections` or `updates_collections`
       are not a list or tuple.
     ValueError: If `top_k_predictions` has rank < 2.
   """
   default_name = _at_k_name('precision', class_id=class_id)
   with ops.name_scope(name, default_name,
                       (top_k_predictions, labels, weights)) as name_scope:
     return metrics_impl.precision_at_top_k(
         labels=labels,
         predictions_idx=top_k_predictions,
         class_id=class_id,
         weights=weights,
         metrics_collections=metrics_collections,
         updates_collections=updates_collections,
         name=name_scope)


 def sparse_recall_at_top_k(labels,
                            top_k_predictions,
                            class_id=None,
                            weights=None,
                            metrics_collections=None,
                            updates_collections=None,
                            name=None):
   """Computes recall@k of top-k predictions with respect to sparse labels.

   If `class_id` is specified, we calculate recall by considering only the
       entries in the batch for which `class_id` is in the label, and computing
       the fraction of them for which `class_id` is in the top-k `predictions`.
   If `class_id` is not specified, we'll calculate recall as how often on
       average a class among the labels of a batch entry is in the top-k
       `predictions`.

   `sparse_recall_at_top_k` creates two local variables, `true_positive_at_<k>`
   and `false_negative_at_<k>`, that are used to compute the recall_at_k
   frequency. This frequency is ultimately returned as `recall_at_<k>`: an
   idempotent operation that simply divides `true_positive_at_<k>` by total
   (`true_positive_at_<k>` + `false_negative_at_<k>`).

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `recall_at_<k>`. Set operations applied to `top_k` and `labels` calculate the
   true positives and false negatives weighted by `weights`. Then `update_op`
   increments `true_positive_at_<k>` and `false_negative_at_<k>` using these
   values.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN,
       num_labels], where N >= 1 and num_labels is the number of target classes
       for the associated prediction. Commonly, N=1 and `labels` has shape
       [batch_size, num_labels]. [D1, ... DN] must match `top_k_predictions`.
       Values should be in range [0, num_classes), where num_classes is the last
       dimension of `predictions`. Values outside this range always count towards
       `false_negative_at_<k>`.
     top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where N >= 1.
       Commonly, N=1 and top_k_predictions has shape [batch size, k]. The final
       dimension contains the indices of top-k labels. [D1, ... DN] must match
       `labels`.
     class_id: Integer class ID for which we want binary metrics. This should be
       in range [0, num_classes), where num_classes is the last dimension of
       `predictions`. If class_id is outside this range, the method returns NAN.
     weights: `Tensor` whose rank is either 0, or n-1, where n is the rank of
       `labels`. If the latter, it must be broadcastable to `labels` (i.e., all
       dimensions must be either `1`, or the same as the corresponding `labels`
       dimension).
     metrics_collections: An optional list of collections that values should be
       added to.
     updates_collections: An optional list of collections that updates should be
       added to.
     name: Name of new update operation, and namespace for other dependent ops.

   Returns:
     recall: Scalar `float64` `Tensor` with the value of `true_positives` divided
       by the sum of `true_positives` and `false_negatives`.
     update_op: `Operation` that increments `true_positives` and
       `false_negatives` variables appropriately, and whose value matches
       `recall`.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match
     `predictions`, or if either `metrics_collections` or `updates_collections`
     are not a list or tuple.
   """
   default_name = _at_k_name('recall', class_id=class_id)
   with ops.name_scope(name, default_name,
                       (top_k_predictions, labels, weights)) as name_scope:
     return metrics_impl.recall_at_top_k(
         labels=labels,
         predictions_idx=top_k_predictions,
         class_id=class_id,
         weights=weights,
         metrics_collections=metrics_collections,
         updates_collections=updates_collections,
         name=name_scope)


 def _compute_recall_at_precision(tp, fp, fn, precision, name,
                                  strict_mode=False):
   """Helper function to compute recall at a given `precision`.

   Args:
     tp: The number of true positives.
     fp: The number of false positives.
     fn: The number of false negatives.
     precision: The precision for which the recall will be calculated.
     name: An optional variable_scope name.
     strict_mode: If true and there exists a threshold where the precision is no
       smaller than the target precision, return the corresponding recall at the
       threshold. Otherwise, return 0. If false, find the threshold where the
       precision is closest to the target precision and return the recall at the
       threshold.

   Returns:
     The recall at a given `precision`.
   """
   precisions = math_ops.div(tp, tp + fp + _EPSILON)
   if not strict_mode:
     tf_index = math_ops.argmin(
         math_ops.abs(precisions - precision), 0, output_type=dtypes.int32)
     # Now, we have the implicit threshold, so compute the recall:
     return math_ops.div(tp[tf_index], tp[tf_index] + fn[tf_index] + _EPSILON,
                         name)
   else:
     # We aim to find the threshold where the precision is minimum but no smaller
     # than the target precision.
     # The rationale:
     # 1. Compute the difference between precisions (by different thresholds) and
     #   the target precision.
     # 2. Take the reciprocal of the values by the above step. The intention is
     #   to make the positive values rank before negative values and also the
     #   smaller positives rank before larger positives.
     tf_index = math_ops.argmax(
         math_ops.div(1.0, precisions - precision + _EPSILON),
         0,
         output_type=dtypes.int32)

     def _return_good_recall():
       return math_ops.div(tp[tf_index], tp[tf_index] + fn[tf_index] + _EPSILON,
                           name)

     return control_flow_ops.cond(precisions[tf_index] >= precision,
                                  _return_good_recall, lambda: .0)


 def recall_at_precision(labels,
                         predictions,
                         precision,
                         weights=None,
                         num_thresholds=200,
                         metrics_collections=None,
                         updates_collections=None,
                         name=None,
                         strict_mode=False):
   """Computes `recall` at `precision`.

   The `recall_at_precision` function creates four local variables,
   `tp` (true positives), `fp` (false positives) and `fn` (false negatives)
   that are used to compute the `recall` at the given `precision` value. The
   threshold for the given `precision` value is computed and used to evaluate the
   corresponding `recall`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `recall`. `update_op` increments the `tp`, `fp` and `fn` counts with the
   weight of each case found in the `predictions` and `labels`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     precision: A scalar value in range `[0, 1]`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     num_thresholds: The number of thresholds to use for matching the given
       `precision`.
     metrics_collections: An optional list of collections that `recall` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.
     strict_mode: If true and there exists a threshold where the precision is
       above the target precision, return the corresponding recall at the
       threshold. Otherwise, return 0. If false, find the threshold where the
       precision is closest to the target precision and return the recall at the
       threshold.

   Returns:
     recall: A scalar `Tensor` representing the recall at the given
       `precision` value.
     update_op: An operation that increments the `tp`, `fp` and `fn`
       variables appropriately and whose value matches `recall`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       `precision` is not between 0 and 1, or if either `metrics_collections`
       or `updates_collections` are not a list or tuple.

   """
   if not 0 <= precision <= 1:
     raise ValueError('`precision` must be in the range [0, 1].')

   with variable_scope.variable_scope(name, 'recall_at_precision',
                                      (predictions, labels, weights)):
     thresholds = [
         i * 1.0 / (num_thresholds - 1) for i in range(1, num_thresholds - 1)
     ]
     thresholds = [0.0 - _EPSILON] + thresholds + [1.0 + _EPSILON]

     values, update_ops = _streaming_confusion_matrix_at_thresholds(
         predictions, labels, thresholds, weights)

     recall = _compute_recall_at_precision(values['tp'], values['fp'],
                                           values['fn'], precision, 'value',
                                           strict_mode)
     update_op = _compute_recall_at_precision(update_ops['tp'], update_ops['fp'],
                                              update_ops['fn'], precision,
                                              'update_op', strict_mode)

     if metrics_collections:
       ops.add_to_collections(metrics_collections, recall)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return recall, update_op


 def precision_at_recall(labels,
                         predictions,
                         target_recall,
                         weights=None,
                         num_thresholds=200,
                         metrics_collections=None,
                         updates_collections=None,
                         name=None):
   """Computes the precision at a given recall.

   This function creates variables to track the true positives, false positives,
   true negatives, and false negatives at a set of thresholds. Among those
   thresholds where recall is at least `target_recall`, precision is computed
   at the threshold where recall is closest to `target_recall`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   precision at `target_recall`. `update_op` increments the counts of true
   positives, false positives, true negatives, and false negatives with the
   weight of each case found in the `predictions` and `labels`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   For additional information about precision and recall, see
   http://en.wikipedia.org/wiki/Precision_and_recall

   Args:
     labels: The ground truth values, a `Tensor` whose dimensions must match
       `predictions`. Will be cast to `bool`.
     predictions: A floating point `Tensor` of arbitrary shape and whose values
       are in the range `[0, 1]`.
     target_recall: A scalar value in range `[0, 1]`.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     num_thresholds: The number of thresholds to use for matching the given
       recall.
     metrics_collections: An optional list of collections to which `precision`
       should be added.
     updates_collections: An optional list of collections to which `update_op`
       should be added.
     name: An optional variable_scope name.

   Returns:
     precision: A scalar `Tensor` representing the precision at the given
       `target_recall` value.
     update_op: An operation that increments the variables for tracking the
       true positives, false positives, true negatives, and false negatives and
       whose value matches `precision`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       `target_recall` is not between 0 and 1, or if either `metrics_collections`
       or `updates_collections` are not a list or tuple.
     RuntimeError: If eager execution is enabled.
   """
   if context.executing_eagerly():
     raise RuntimeError('tf.metrics.precision_at_recall is not '
                        'supported when eager execution is enabled.')

   if target_recall < 0 or target_recall > 1:
     raise ValueError('`target_recall` must be in the range [0, 1].')

   with variable_scope.variable_scope(name, 'precision_at_recall',
                                      (predictions, labels, weights)):
     kepsilon = 1e-7  # Used to avoid division by zero.
     thresholds = [
         (i + 1) * 1.0 / (num_thresholds - 1) for i in range(num_thresholds - 2)
     ]
     thresholds = [0.0 - kepsilon] + thresholds + [1.0 + kepsilon]

     values, update_ops = _streaming_confusion_matrix_at_thresholds(
         predictions, labels, thresholds, weights)

     def compute_precision_at_recall(tp, fp, fn, name):
       """Computes the precision at a given recall.

       Args:
         tp: True positives.
         fp: False positives.
         fn: False negatives.
         name: A name for the operation.

       Returns:
         The precision at the desired recall.
       """
       recalls = math_ops.div(tp, tp + fn + kepsilon)

       # Because recall is monotone decreasing as a function of the threshold,
       # the smallest recall exceeding target_recall occurs at the largest
       # threshold where recall >= target_recall.
       admissible_recalls = math_ops.cast(
           math_ops.greater_equal(recalls, target_recall), dtypes.int64)
       tf_index = math_ops.reduce_sum(admissible_recalls) - 1

       # Now we have the threshold at which to compute precision:
       return math_ops.div(tp[tf_index] + kepsilon,
                           tp[tf_index] + fp[tf_index] + kepsilon, name)

     precision_value = compute_precision_at_recall(values['tp'], values['fp'],
                                                   values['fn'], 'value')
     update_op = compute_precision_at_recall(update_ops['tp'], update_ops['fp'],
                                             update_ops['fn'], 'update_op')

     if metrics_collections:
       ops.add_to_collections(metrics_collections, precision_value)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return precision_value, update_op


 def streaming_sparse_average_precision_at_k(predictions,
                                             labels,
                                             k,
                                             weights=None,
                                             metrics_collections=None,
                                             updates_collections=None,
                                             name=None):
   """Computes average precision@k of predictions with respect to sparse labels.

   See `sparse_average_precision_at_k` for details on formula. `weights` are
   applied to the result of `sparse_average_precision_at_k`

   `streaming_sparse_average_precision_at_k` creates two local variables,
   `average_precision_at_<k>/total` and `average_precision_at_<k>/max`, that
   are used to compute the frequency. This frequency is ultimately returned as
   `average_precision_at_<k>`: an idempotent operation that simply divides
   `average_precision_at_<k>/total` by `average_precision_at_<k>/max`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `precision_at_<k>`. Internally, a `top_k` operation computes a `Tensor`
   indicating the top `k` `predictions`. Set operations applied to `top_k` and
   `labels` calculate the true positives and false positives weighted by
   `weights`. Then `update_op` increments `true_positive_at_<k>` and
   `false_positive_at_<k>` using these values.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where N >=
       1. Commonly, N=1 and `predictions` has shape [batch size, num_classes].
       The final dimension contains the logit values for each class. [D1, ... DN]
       must match `labels`.
     labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN,
       num_labels], where N >= 1 and num_labels is the number of target classes
       for the associated prediction. Commonly, N=1 and `labels` has shape
       [batch_size, num_labels]. [D1, ... DN] must match `predictions_`. Values
       should be in range [0, num_classes), where num_classes is the last
       dimension of `predictions`. Values outside this range are ignored.
     k: Integer, k for @k metric. This will calculate an average precision for
       range `[1,k]`, as documented above.
     weights: `Tensor` whose rank is either 0, or n-1, where n is the rank of
       `labels`. If the latter, it must be broadcastable to `labels` (i.e., all
       dimensions must be either `1`, or the same as the corresponding `labels`
       dimension).
     metrics_collections: An optional list of collections that values should be
       added to.
     updates_collections: An optional list of collections that updates should be
       added to.
     name: Name of new update operation, and namespace for other dependent ops.

   Returns:
     mean_average_precision: Scalar `float64` `Tensor` with the mean average
       precision values.
     update: `Operation` that increments variables appropriately, and whose
       value matches `metric`.
   """
   return metrics.average_precision_at_k(
       k=k,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_sparse_average_precision_at_top_k(top_k_predictions,
                                                 labels,
                                                 weights=None,
                                                 metrics_collections=None,
                                                 updates_collections=None,
                                                 name=None):
   """Computes average precision@k of predictions with respect to sparse labels.

   `streaming_sparse_average_precision_at_top_k` creates two local variables,
   `average_precision_at_<k>/total` and `average_precision_at_<k>/max`, that
   are used to compute the frequency. This frequency is ultimately returned as
   `average_precision_at_<k>`: an idempotent operation that simply divides
   `average_precision_at_<k>/total` by `average_precision_at_<k>/max`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `precision_at_<k>`. Set operations applied to `top_k` and `labels` calculate
   the true positives and false positives weighted by `weights`. Then `update_op`
   increments `true_positive_at_<k>` and `false_positive_at_<k>` using these
   values.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where N >= 1.
       Commonly, N=1 and `predictions_idx` has shape [batch size, k]. The final
       dimension must be set and contains the top `k` predicted class indices.
       [D1, ... DN] must match `labels`. Values should be in range [0,
       num_classes).
     labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN,
       num_labels] or [D1, ... DN], where the latter implies num_labels=1. N >= 1
       and num_labels is the number of target classes for the associated
       prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels].
       [D1, ... DN] must match `top_k_predictions`. Values should be in range [0,
       num_classes).
     weights: `Tensor` whose rank is either 0, or n-1, where n is the rank of
       `labels`. If the latter, it must be broadcastable to `labels` (i.e., all
       dimensions must be either `1`, or the same as the corresponding `labels`
       dimension).
     metrics_collections: An optional list of collections that values should be
       added to.
     updates_collections: An optional list of collections that updates should be
       added to.
     name: Name of new update operation, and namespace for other dependent ops.

   Returns:
     mean_average_precision: Scalar `float64` `Tensor` with the mean average
       precision values.
     update: `Operation` that increments variables appropriately, and whose
       value matches `metric`.

   Raises:
     ValueError: if the last dimension of top_k_predictions is not set.
   """
   return metrics_impl._streaming_sparse_average_precision_at_top_k(  # pylint: disable=protected-access
       predictions_idx=top_k_predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None,
             'Please switch to tf.metrics.mean_absolute_error. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_mean_absolute_error(predictions,
                                   labels,
                                   weights=None,
                                   metrics_collections=None,
                                   updates_collections=None,
                                   name=None):
   """Computes the mean absolute error between the labels and predictions.

   The `streaming_mean_absolute_error` function creates two local variables,
   `total` and `count` that are used to compute the mean absolute error. This
   average is weighted by `weights`, and it is ultimately returned as
   `mean_absolute_error`: an idempotent operation that simply divides `total` by
   `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `mean_absolute_error`. Internally, an `absolute_errors` operation computes the
   absolute value of the differences between `predictions` and `labels`. Then
   `update_op` increments `total` with the reduced sum of the product of
   `weights` and `absolute_errors`, and it increments `count` with the reduced
   sum of `weights`

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A `Tensor` of arbitrary shape.
     labels: A `Tensor` of the same shape as `predictions`.
     weights: Optional `Tensor` indicating the frequency with which an example is
       sampled. Rank must be 0, or the same rank as `labels`, and must be
       broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
       same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `mean_absolute_error` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     mean_absolute_error: A `Tensor` representing the current mean, the value of
       `total` divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `mean_absolute_error`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.mean_absolute_error(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_mean_relative_error(predictions,
                                   labels,
                                   normalizer,
                                   weights=None,
                                   metrics_collections=None,
                                   updates_collections=None,
                                   name=None):
   """Computes the mean relative error by normalizing with the given values.

   The `streaming_mean_relative_error` function creates two local variables,
   `total` and `count` that are used to compute the mean relative absolute error.
   This average is weighted by `weights`, and it is ultimately returned as
   `mean_relative_error`: an idempotent operation that simply divides `total` by
   `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `mean_reative_error`. Internally, a `relative_errors` operation divides the
   absolute value of the differences between `predictions` and `labels` by the
   `normalizer`. Then `update_op` increments `total` with the reduced sum of the
   product of `weights` and `relative_errors`, and it increments `count` with the
   reduced sum of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A `Tensor` of arbitrary shape.
     labels: A `Tensor` of the same shape as `predictions`.
     normalizer: A `Tensor` of the same shape as `predictions`.
     weights: Optional `Tensor` indicating the frequency with which an example is
       sampled. Rank must be 0, or the same rank as `labels`, and must be
       broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
       same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `mean_relative_error` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     mean_relative_error: A `Tensor` representing the current mean, the value of
       `total` divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `mean_relative_error`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.mean_relative_error(
       normalizer=normalizer,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(None,
             'Please switch to tf.metrics.mean_squared_error. Note that the '
             'order of the labels and predictions arguments has been switched.')
 def streaming_mean_squared_error(predictions,
                                  labels,
                                  weights=None,
                                  metrics_collections=None,
                                  updates_collections=None,
                                  name=None):
   """Computes the mean squared error between the labels and predictions.

   The `streaming_mean_squared_error` function creates two local variables,
   `total` and `count` that are used to compute the mean squared error.
   This average is weighted by `weights`, and it is ultimately returned as
   `mean_squared_error`: an idempotent operation that simply divides `total` by
   `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `mean_squared_error`. Internally, a `squared_error` operation computes the
   element-wise square of the difference between `predictions` and `labels`. Then
   `update_op` increments `total` with the reduced sum of the product of
   `weights` and `squared_error`, and it increments `count` with the reduced sum
   of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A `Tensor` of arbitrary shape.
     labels: A `Tensor` of the same shape as `predictions`.
     weights: Optional `Tensor` indicating the frequency with which an example is
       sampled. Rank must be 0, or the same rank as `labels`, and must be
       broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
       same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `mean_squared_error` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     mean_squared_error: A `Tensor` representing the current mean, the value of
       `total` divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `mean_squared_error`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.mean_squared_error(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 @deprecated(
     None, 'Please switch to tf.metrics.root_mean_squared_error. Note that the '
     'order of the labels and predictions arguments has been switched.')
 def streaming_root_mean_squared_error(predictions,
                                       labels,
                                       weights=None,
                                       metrics_collections=None,
                                       updates_collections=None,
                                       name=None):
   """Computes the root mean squared error between the labels and predictions.

   The `streaming_root_mean_squared_error` function creates two local variables,
   `total` and `count` that are used to compute the root mean squared error.
   This average is weighted by `weights`, and it is ultimately returned as
   `root_mean_squared_error`: an idempotent operation that takes the square root
   of the division of `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `root_mean_squared_error`. Internally, a `squared_error` operation computes
   the element-wise square of the difference between `predictions` and `labels`.
   Then `update_op` increments `total` with the reduced sum of the product of
   `weights` and `squared_error`, and it increments `count` with the reduced sum
   of `weights`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A `Tensor` of arbitrary shape.
     labels: A `Tensor` of the same shape as `predictions`.
     weights: Optional `Tensor` indicating the frequency with which an example is
       sampled. Rank must be 0, or the same rank as `labels`, and must be
       broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
       same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that
       `root_mean_squared_error` should be added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     root_mean_squared_error: A `Tensor` representing the current mean, the value
       of `total` divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately and whose value matches `root_mean_squared_error`.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.root_mean_squared_error(
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_covariance(predictions,
                          labels,
                          weights=None,
                          metrics_collections=None,
                          updates_collections=None,
                          name=None):
   """Computes the unbiased sample covariance between `predictions` and `labels`.

   The `streaming_covariance` function creates four local variables,
   `comoment`, `mean_prediction`, `mean_label`, and `count`, which are used to
   compute the sample covariance between predictions and labels across multiple
   batches of data. The covariance is ultimately returned as an idempotent
   operation that simply divides `comoment` by `count` - 1. We use `count` - 1
   in order to get an unbiased estimate.

   The algorithm used for this online computation is described in
   https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
   Specifically, the formula used to combine two sample comoments is
   `C_AB = C_A + C_B + (E[x_A] - E[x_B]) * (E[y_A] - E[y_B]) * n_A * n_B / n_AB`
   The comoment for a single batch of data is simply
   `sum((x - E[x]) * (y - E[y]))`, optionally weighted.

   If `weights` is not None, then it is used to compute weighted comoments,
   means, and count. NOTE: these weights are treated as "frequency weights", as
   opposed to "reliability weights". See discussion of the difference on
   https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance

   To facilitate the computation of covariance across multiple batches of data,
   the function creates an `update_op` operation, which updates underlying
   variables and returns the updated covariance.

   Args:
     predictions: A `Tensor` of arbitrary size.
     labels: A `Tensor` of the same size as `predictions`.
     weights: Optional `Tensor` indicating the frequency with which an example is
       sampled. Rank must be 0, or the same rank as `labels`, and must be
       broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
       same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     covariance: A `Tensor` representing the current unbiased sample covariance,
       `comoment` / (`count` - 1).
     update_op: An operation that updates the local variables appropriately.

   Raises:
     ValueError: If labels and predictions are of different sizes or if either
       `metrics_collections` or `updates_collections` are not a list or tuple.
   """
   with variable_scope.variable_scope(name, 'covariance',
                                      (predictions, labels, weights)):
     predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
         predictions, labels, weights)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     count_ = metrics_impl.metric_variable([], dtypes.float32, name='count')
     mean_prediction = metrics_impl.metric_variable([],
                                                    dtypes.float32,
                                                    name='mean_prediction')
     mean_label = metrics_impl.metric_variable([],
                                               dtypes.float32,
                                               name='mean_label')
     comoment = metrics_impl.metric_variable(  # C_A in update equation
         [], dtypes.float32, name='comoment')

     if weights is None:
       batch_count = math_ops.cast(array_ops.size(labels),
                                   dtypes.float32)  # n_B in eqn
       weighted_predictions = predictions
       weighted_labels = labels
     else:
       weights = weights_broadcast_ops.broadcast_weights(weights, labels)
       batch_count = math_ops.reduce_sum(weights)  # n_B in eqn
       weighted_predictions = math_ops.multiply(predictions, weights)
       weighted_labels = math_ops.multiply(labels, weights)

     update_count = state_ops.assign_add(count_, batch_count)  # n_AB in eqn
     prev_count = update_count - batch_count  # n_A in update equation

     # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount)
     # batch_mean_prediction is E[x_B] in the update equation
     batch_mean_prediction = math_ops.div_no_nan(
         math_ops.reduce_sum(weighted_predictions), batch_count)
     delta_mean_prediction = math_ops.div_no_nan(
         (batch_mean_prediction - mean_prediction) * batch_count, update_count)
     update_mean_prediction = state_ops.assign_add(mean_prediction,
                                                   delta_mean_prediction)
     # prev_mean_prediction is E[x_A] in the update equation
     prev_mean_prediction = update_mean_prediction - delta_mean_prediction

     # batch_mean_label is E[y_B] in the update equation
     batch_mean_label = math_ops.div_no_nan(
         math_ops.reduce_sum(weighted_labels), batch_count)
     delta_mean_label = math_ops.div_no_nan(
         (batch_mean_label - mean_label) * batch_count, update_count)
     update_mean_label = state_ops.assign_add(mean_label, delta_mean_label)
     # prev_mean_label is E[y_A] in the update equation
     prev_mean_label = update_mean_label - delta_mean_label

     unweighted_batch_coresiduals = ((predictions - batch_mean_prediction) *
                                     (labels - batch_mean_label))
     # batch_comoment is C_B in the update equation
     if weights is None:
       batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals)
     else:
       batch_comoment = math_ops.reduce_sum(unweighted_batch_coresiduals *
                                            weights)

     # View delta_comoment as = C_AB - C_A in the update equation above.
     # Since C_A is stored in a var, by how much do we need to increment that var
     # to make the var = C_AB?
     delta_comoment = (
         batch_comoment + (prev_mean_prediction - batch_mean_prediction) *
         (prev_mean_label - batch_mean_label) *
         (prev_count * batch_count / update_count))
     update_comoment = state_ops.assign_add(comoment, delta_comoment)

     covariance = array_ops.where(
         math_ops.less_equal(count_, 1.),
         float('nan'),
         math_ops.truediv(comoment, count_ - 1),
         name='covariance')
     with ops.control_dependencies([update_comoment]):
       update_op = array_ops.where(
           math_ops.less_equal(count_, 1.),
           float('nan'),
           math_ops.truediv(comoment, count_ - 1),
           name='update_op')

   if metrics_collections:
     ops.add_to_collections(metrics_collections, covariance)

   if updates_collections:
     ops.add_to_collections(updates_collections, update_op)

   return covariance, update_op


 def streaming_pearson_correlation(predictions,
                                   labels,
                                   weights=None,
                                   metrics_collections=None,
                                   updates_collections=None,
                                   name=None):
   """Computes Pearson correlation coefficient between `predictions`, `labels`.

   The `streaming_pearson_correlation` function delegates to
   `streaming_covariance` the tracking of three [co]variances:

   - `streaming_covariance(predictions, labels)`, i.e. covariance
   - `streaming_covariance(predictions, predictions)`, i.e. variance
   - `streaming_covariance(labels, labels)`, i.e. variance

   The product-moment correlation ultimately returned is an idempotent operation
   `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`. To
   facilitate correlation computation across multiple batches, the function
   groups the `update_op`s of the underlying streaming_covariance and returns an
   `update_op`.

   If `weights` is not None, then it is used to compute a weighted correlation.
   NOTE: these weights are treated as "frequency weights", as opposed to
   "reliability weights". See discussion of the difference on
   https://wikipedia.org/wiki/Weighted_arithmetic_mean#Weighted_sample_variance

   Args:
     predictions: A `Tensor` of arbitrary size.
     labels: A `Tensor` of the same size as predictions.
     weights: Optional `Tensor` indicating the frequency with which an example is
       sampled. Rank must be 0, or the same rank as `labels`, and must be
       broadcastable to `labels` (i.e., all dimensions must be either `1`, or the
       same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     pearson_r: A `Tensor` representing the current Pearson product-moment
       correlation coefficient, the value of
       `cov(predictions, labels) / sqrt(var(predictions) * var(labels))`.
     update_op: An operation that updates the underlying variables appropriately.

   Raises:
     ValueError: If `labels` and `predictions` are of different sizes, or if
       `weights` is the wrong size, or if either `metrics_collections` or
       `updates_collections` are not a `list` or `tuple`.
   """
   with variable_scope.variable_scope(name, 'pearson_r',
                                      (predictions, labels, weights)):
     predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
         predictions, labels, weights)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     # Broadcast weights here to avoid duplicate broadcasting in each call to
     # `streaming_covariance`.
     if weights is not None:
       weights = weights_broadcast_ops.broadcast_weights(weights, labels)
     cov, update_cov = streaming_covariance(
         predictions, labels, weights=weights, name='covariance')
     var_predictions, update_var_predictions = streaming_covariance(
         predictions, predictions, weights=weights, name='variance_predictions')
     var_labels, update_var_labels = streaming_covariance(
         labels, labels, weights=weights, name='variance_labels')

     pearson_r = math_ops.truediv(
         cov,
         math_ops.multiply(
             math_ops.sqrt(var_predictions), math_ops.sqrt(var_labels)),
         name='pearson_r')
     update_op = math_ops.truediv(
         update_cov,
         math_ops.multiply(
             math_ops.sqrt(update_var_predictions),
             math_ops.sqrt(update_var_labels)),
         name='update_op')

   if metrics_collections:
     ops.add_to_collections(metrics_collections, pearson_r)

   if updates_collections:
     ops.add_to_collections(updates_collections, update_op)

   return pearson_r, update_op


 # TODO(nsilberman): add a 'normalized' flag so that the user can request
 # normalization if the inputs are not normalized.
 def streaming_mean_cosine_distance(predictions,
                                    labels,
                                    dim,
                                    weights=None,
                                    metrics_collections=None,
                                    updates_collections=None,
                                    name=None):
   """Computes the cosine distance between the labels and predictions.

   The `streaming_mean_cosine_distance` function creates two local variables,
   `total` and `count` that are used to compute the average cosine distance
   between `predictions` and `labels`. This average is weighted by `weights`,
   and it is ultimately returned as `mean_distance`, which is an idempotent
   operation that simply divides `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `mean_distance`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A `Tensor` of the same shape as `labels`.
     labels: A `Tensor` of arbitrary shape.
     dim: The dimension along which the cosine distance is computed.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`,
       and whose dimension `dim` is 1.
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     mean_distance: A `Tensor` representing the current mean, the value of
       `total` divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
       predictions, labels, weights)
   predictions.get_shape().assert_is_compatible_with(labels.get_shape())
   radial_diffs = math_ops.multiply(predictions, labels)
   radial_diffs = math_ops.reduce_sum(
       radial_diffs, axis=[
           dim,
       ], keepdims=True)
   mean_distance, update_op = streaming_mean(radial_diffs, weights, None, None,
                                             name or 'mean_cosine_distance')
   mean_distance = math_ops.subtract(1.0, mean_distance)
   update_op = math_ops.subtract(1.0, update_op)

   if metrics_collections:
     ops.add_to_collections(metrics_collections, mean_distance)

   if updates_collections:
     ops.add_to_collections(updates_collections, update_op)

   return mean_distance, update_op


 def streaming_percentage_less(values,
                               threshold,
                               weights=None,
                               metrics_collections=None,
                               updates_collections=None,
                               name=None):
   """Computes the percentage of values less than the given threshold.

   The `streaming_percentage_less` function creates two local variables,
   `total` and `count` that are used to compute the percentage of `values` that
   fall below `threshold`. This rate is weighted by `weights`, and it is
   ultimately returned as `percentage` which is an idempotent operation that
   simply divides `total` by `count`.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `percentage`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     values: A numeric `Tensor` of arbitrary size.
     threshold: A scalar threshold.
     weights: An optional `Tensor` whose shape is broadcastable to `values`.
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     percentage: A `Tensor` representing the current mean, the value of `total`
       divided by `count`.
     update_op: An operation that increments the `total` and `count` variables
       appropriately.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match `values`,
       or if either `metrics_collections` or `updates_collections` are not a list
       or tuple.
   """
   return metrics.percentage_below(
       values=values,
       threshold=threshold,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def streaming_mean_iou(predictions,
                        labels,
                        num_classes,
                        weights=None,
                        metrics_collections=None,
                        updates_collections=None,
                        name=None):
   """Calculate per-step mean Intersection-Over-Union (mIOU).

   Mean Intersection-Over-Union is a common evaluation metric for
   semantic image segmentation, which first computes the IOU for each
   semantic class and then computes the average over classes.
   IOU is defined as follows:
     IOU = true_positive / (true_positive + false_positive + false_negative).
   The predictions are accumulated in a confusion matrix, weighted by `weights`,
   and mIOU is then calculated from it.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the `mean_iou`.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     predictions: A `Tensor` of prediction results for semantic labels, whose
       shape is [batch size] and type `int32` or `int64`. The tensor will be
       flattened, if its rank > 1.
     labels: A `Tensor` of ground truth labels with shape [batch size] and of
       type `int32` or `int64`. The tensor will be flattened, if its rank > 1.
     num_classes: The possible number of labels the prediction task can have.
       This value must be provided, since a confusion matrix of dimension =
       [num_classes, num_classes] will be allocated.
     weights: An optional `Tensor` whose shape is broadcastable to `predictions`.
     metrics_collections: An optional list of collections that `mean_iou` should
       be added to.
     updates_collections: An optional list of collections `update_op` should be
       added to.
     name: An optional variable_scope name.

   Returns:
     mean_iou: A `Tensor` representing the mean intersection-over-union.
     update_op: An operation that increments the confusion matrix.

   Raises:
     ValueError: If `predictions` and `labels` have mismatched shapes, or if
       `weights` is not `None` and its shape doesn't match `predictions`, or if
       either `metrics_collections` or `updates_collections` are not a list or
       tuple.
   """
   return metrics.mean_iou(
       num_classes=num_classes,
       predictions=predictions,
       labels=labels,
       weights=weights,
       metrics_collections=metrics_collections,
       updates_collections=updates_collections,
       name=name)


 def _next_array_size(required_size, growth_factor=1.5):
   """Calculate the next size for reallocating a dynamic array.

   Args:
     required_size: number or tf.Tensor specifying required array capacity.
     growth_factor: optional number or tf.Tensor specifying the growth factor
       between subsequent allocations.

   Returns:
     tf.Tensor with dtype=int32 giving the next array size.
   """
   exponent = math_ops.ceil(
       math_ops.log(math_ops.cast(required_size, dtypes.float32)) /
       math_ops.log(math_ops.cast(growth_factor, dtypes.float32)))
   return math_ops.cast(math_ops.ceil(growth_factor**exponent), dtypes.int32)


 def streaming_concat(values,
                      axis=0,
                      max_size=None,
                      metrics_collections=None,
                      updates_collections=None,
                      name=None):
   """Concatenate values along an axis across batches.

   The function `streaming_concat` creates two local variables, `array` and
   `size`, that are used to store concatenated values. Internally, `array` is
   used as storage for a dynamic array (if `maxsize` is `None`), which ensures
   that updates can be run in amortized constant time.

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that appends the values of a tensor and returns the
   length of the concatenated axis.

   This op allows for evaluating metrics that cannot be updated incrementally
   using the same framework as other streaming metrics.

   Args:
     values: `Tensor` to concatenate. Rank and the shape along all axes other
       than the axis to concatenate along must be statically known.
     axis: optional integer axis to concatenate along.
     max_size: optional integer maximum size of `value` along the given axis.
       Once the maximum size is reached, further updates are no-ops. By default,
       there is no maximum size: the array is resized as necessary.
     metrics_collections: An optional list of collections that `value` should be
       added to.
     updates_collections: An optional list of collections `update_op` should be
       added to.
     name: An optional variable_scope name.

   Returns:
     value: A `Tensor` representing the concatenated values.
     update_op: An operation that concatenates the next values.

   Raises:
     ValueError: if `values` does not have a statically known rank, `axis` is
       not in the valid range or the size of `values` is not statically known
       along any axis other than `axis`.
   """
   with variable_scope.variable_scope(name, 'streaming_concat', (values,)):
     # pylint: disable=invalid-slice-index
     values_shape = values.get_shape()
     if values_shape.dims is None:
       raise ValueError('`values` must have known statically known rank')

     ndim = len(values_shape)
     if axis < 0:
       axis += ndim
     if not 0 <= axis < ndim:
       raise ValueError('axis = %r not in [0, %r)' % (axis, ndim))

     fixed_shape = [dim.value for n, dim in enumerate(values_shape) if n != axis]
     if any(value is None for value in fixed_shape):
       raise ValueError('all dimensions of `values` other than the dimension to '
                        'concatenate along must have statically known size')

     # We move `axis` to the front of the internal array so assign ops can be
     # applied to contiguous slices
     init_size = 0 if max_size is None else max_size
     init_shape = [init_size] + fixed_shape
     array = metrics_impl.metric_variable(
         init_shape, values.dtype, validate_shape=False, name='array')
     size = metrics_impl.metric_variable([], dtypes.int32, name='size')

     perm = [0 if n == axis else n + 1 if n < axis else n for n in range(ndim)]
     valid_array = array[:size]
     valid_array.set_shape([None] + fixed_shape)
     value = array_ops.transpose(valid_array, perm, name='concat')

     values_size = array_ops.shape(values)[axis]
     if max_size is None:
       batch_size = values_size
     else:
       batch_size = math_ops.minimum(values_size, max_size - size)

     perm = [axis] + [n for n in range(ndim) if n != axis]
     batch_values = array_ops.transpose(values, perm)[:batch_size]

     def reallocate():
       next_size = _next_array_size(new_size)
       next_shape = array_ops.stack([next_size] + fixed_shape)
       new_value = array_ops.zeros(next_shape, dtype=values.dtype)
       old_value = array.value()
       assign_op = state_ops.assign(array, new_value, validate_shape=False)
       with ops.control_dependencies([assign_op]):
         copy_op = array[:size].assign(old_value[:size])
       # return value needs to be the same dtype as no_op() for cond
       with ops.control_dependencies([copy_op]):
         return control_flow_ops.no_op()

     new_size = size + batch_size
     array_size = array_ops.shape_internal(array, optimize=False)[0]
     maybe_reallocate_op = control_flow_ops.cond(new_size > array_size,
                                                 reallocate,
                                                 control_flow_ops.no_op)
     with ops.control_dependencies([maybe_reallocate_op]):
       append_values_op = array[size:new_size].assign(batch_values)
     with ops.control_dependencies([append_values_op]):
       update_op = size.assign(new_size)

     if metrics_collections:
       ops.add_to_collections(metrics_collections, value)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return value, update_op
     # pylint: enable=invalid-slice-index


 def aggregate_metrics(*value_update_tuples):
   """Aggregates the metric value tensors and update ops into two lists.

   Args:
     *value_update_tuples: a variable number of tuples, each of which contain the
       pair of (value_tensor, update_op) from a streaming metric.

   Returns:
     A list of value `Tensor` objects and a list of update ops.

   Raises:
     ValueError: if `value_update_tuples` is empty.
   """
   if not value_update_tuples:
     raise ValueError('Expected at least one value_tensor/update_op pair')
   value_ops, update_ops = zip(*value_update_tuples)
   return list(value_ops), list(update_ops)


 def aggregate_metric_map(names_to_tuples):
   """Aggregates the metric names to tuple dictionary.

   This function is useful for pairing metric names with their associated value
   and update ops when the list of metrics is long. For example:

   ```python
     metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({
         'Mean Absolute Error': new_slim.metrics.streaming_mean_absolute_error(
             predictions, labels, weights),
         'Mean Relative Error': new_slim.metrics.streaming_mean_relative_error(
             predictions, labels, labels, weights),
         'RMSE Linear': new_slim.metrics.streaming_root_mean_squared_error(
             predictions, labels, weights),
         'RMSE Log': new_slim.metrics.streaming_root_mean_squared_error(
             predictions, labels, weights),
     })
   ```

   Args:
     names_to_tuples: a map of metric names to tuples, each of which contain the
       pair of (value_tensor, update_op) from a streaming metric.

   Returns:
     A dictionary from metric names to value ops and a dictionary from metric
     names to update ops.
   """
   metric_names = names_to_tuples.keys()
   value_ops, update_ops = zip(*names_to_tuples.values())
   return dict(zip(metric_names, value_ops)), dict(zip(metric_names, update_ops))


 def count(values,
           weights=None,
           metrics_collections=None,
           updates_collections=None,
           name=None):
   """Computes the number of examples, or sum of `weights`.

   This metric keeps track of the denominator in `tf.compat.v1.metrics.mean`.
   When evaluating some metric (e.g. mean) on one or more subsets of the data,
   this auxiliary metric is useful for keeping track of how many examples there
   are in each subset.

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   Args:
     values: A `Tensor` of arbitrary dimensions. Only it's shape is used.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `labels` dimension).
     metrics_collections: An optional list of collections that the metric value
       variable should be added to.
     updates_collections: An optional list of collections that the metric update
       ops should be added to.
     name: An optional variable_scope name.

   Returns:
     count: A `Tensor` representing the current value of the metric.
     update_op: An operation that accumulates the metric from a batch of data.

   Raises:
     ValueError: If `weights` is not `None` and its shape doesn't match `values`,
       or if either `metrics_collections` or `updates_collections` are not a list
       or tuple.
     RuntimeError: If eager execution is enabled.
   """
   if context.executing_eagerly():
     raise RuntimeError('tf.contrib.metrics.count is not supported when eager '
                        'execution is enabled.')

   with variable_scope.variable_scope(name, 'count', (values, weights)):

     count_ = metrics_impl.metric_variable([], dtypes.float32, name='count')

     if weights is None:
       num_values = math_ops.cast(array_ops.size(values), dtypes.float32)
     else:
       values = math_ops.cast(values, dtypes.float32)
       values, _, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
           predictions=values,
           labels=None,
           weights=weights)
       weights = weights_broadcast_ops.broadcast_weights(
           math_ops.cast(weights, dtypes.float32), values)
       num_values = math_ops.reduce_sum(weights)

     with ops.control_dependencies([values]):
       update_count_op = state_ops.assign_add(count_, num_values)

     count_ = metrics_impl._aggregate_variable(count_, metrics_collections)  # pylint: disable=protected-access

     if updates_collections:
       ops.add_to_collections(updates_collections, update_count_op)

     return count_, update_count_op


 def cohen_kappa(labels,
                 predictions_idx,
                 num_classes,
                 weights=None,
                 metrics_collections=None,
                 updates_collections=None,
                 name=None):
   """Calculates Cohen's kappa.

   [Cohen's kappa](https://en.wikipedia.org/wiki/Cohen's_kappa) is a statistic
   that measures inter-annotator agreement.

   The `cohen_kappa` function calculates the confusion matrix, and creates three
   local variables to compute the Cohen's kappa: `po`, `pe_row`, and `pe_col`,
   which refer to the diagonal part, rows and columns totals of the confusion
   matrix, respectively. This value is ultimately returned as `kappa`, an
   idempotent operation that is calculated by

       pe = (pe_row * pe_col) / N
       k = (sum(po) - sum(pe)) / (N - sum(pe))

   For estimation of the metric over a stream of data, the function creates an
   `update_op` operation that updates these variables and returns the
   `kappa`. `update_op` weights each prediction by the corresponding value in
   `weights`.

   Class labels are expected to start at 0. E.g., if `num_classes`
   was three, then the possible labels would be [0, 1, 2].

   If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.

   NOTE: Equivalent to `sklearn.metrics.cohen_kappa_score`, but the method
   doesn't support weighted matrix yet.

   Args:
     labels: 1-D `Tensor` of real labels for the classification task. Must be
       one of the following types: int16, int32, int64.
     predictions_idx: 1-D `Tensor` of predicted class indices for a given
       classification. Must have the same type as `labels`.
     num_classes: The possible number of labels.
     weights: Optional `Tensor` whose shape matches `predictions`.
     metrics_collections: An optional list of collections that `kappa` should be
       added to.
     updates_collections: An optional list of collections that `update_op` should
       be added to.
     name: An optional variable_scope name.

   Returns:
     kappa: Scalar float `Tensor` representing the current Cohen's kappa.
     update_op: `Operation` that increments `po`, `pe_row` and `pe_col`
       variables appropriately and whose value matches `kappa`.

   Raises:
     ValueError: If `num_classes` is less than 2, or `predictions` and `labels`
       have mismatched shapes, or if `weights` is not `None` and its shape
       doesn't match `predictions`, or if either `metrics_collections` or
       `updates_collections` are not a list or tuple.
     RuntimeError: If eager execution is enabled.
   """
   if context.executing_eagerly():
     raise RuntimeError('tf.contrib.metrics.cohen_kappa is not supported '
                        'when eager execution is enabled.')
   if num_classes < 2:
     raise ValueError('`num_classes` must be >= 2.'
                      'Found: {}'.format(num_classes))
   with variable_scope.variable_scope(name, 'cohen_kappa',
                                      (labels, predictions_idx, weights)):
     # Convert 2-dim (num, 1) to 1-dim (num,)
     labels.get_shape().with_rank_at_most(2)
     if labels.get_shape().ndims == 2:
       labels = array_ops.squeeze(labels, axis=[-1])
     predictions_idx, labels, weights = (
         metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
             predictions=predictions_idx,
             labels=labels,
             weights=weights))
     predictions_idx.get_shape().assert_is_compatible_with(labels.get_shape())

     stat_dtype = (
         dtypes.int64
         if weights is None or weights.dtype.is_integer else dtypes.float32)
     po = metrics_impl.metric_variable((num_classes,), stat_dtype, name='po')
     pe_row = metrics_impl.metric_variable((num_classes,),
                                           stat_dtype,
                                           name='pe_row')
     pe_col = metrics_impl.metric_variable((num_classes,),
                                           stat_dtype,
                                           name='pe_col')

     # Table of the counts of agreement:
     counts_in_table = confusion_matrix.confusion_matrix(
         labels,
         predictions_idx,
         num_classes=num_classes,
         weights=weights,
         dtype=stat_dtype,
         name='counts_in_table')

     po_t = array_ops.diag_part(counts_in_table)
     pe_row_t = math_ops.reduce_sum(counts_in_table, axis=0)
     pe_col_t = math_ops.reduce_sum(counts_in_table, axis=1)
     update_po = state_ops.assign_add(po, po_t)
     update_pe_row = state_ops.assign_add(pe_row, pe_row_t)
     update_pe_col = state_ops.assign_add(pe_col, pe_col_t)

     def _calculate_k(po, pe_row, pe_col, name):
       po_sum = math_ops.reduce_sum(po)
       total = math_ops.reduce_sum(pe_row)
       pe_sum = math_ops.reduce_sum(
           math_ops.div_no_nan(
               math_ops.cast(pe_row * pe_col, dtypes.float64),
               math_ops.cast(total, dtypes.float64)))
       po_sum, pe_sum, total = (math_ops.cast(po_sum, dtypes.float64),
                                math_ops.cast(pe_sum, dtypes.float64),
                                math_ops.cast(total, dtypes.float64))
       # kappa = (po - pe) / (N - pe)
       k = metrics_impl._safe_scalar_div(  # pylint: disable=protected-access
           po_sum - pe_sum,
           total - pe_sum,
           name=name)
       return k

     kappa = _calculate_k(po, pe_row, pe_col, name='value')
     update_op = _calculate_k(
         update_po, update_pe_row, update_pe_col, name='update_op')

     if metrics_collections:
       ops.add_to_collections(metrics_collections, kappa)

     if updates_collections:
       ops.add_to_collections(updates_collections, update_op)

     return kappa, update_op


 __all__ = [
     'auc_with_confidence_intervals',
     'aggregate_metric_map',
     'aggregate_metrics',
     'cohen_kappa',
     'count',
     'precision_recall_at_equal_thresholds',
     'recall_at_precision',
     'sparse_recall_at_top_k',
     'streaming_accuracy',
     'streaming_auc',
     'streaming_curve_points',
     'streaming_dynamic_auc',
     'streaming_false_negative_rate',
     'streaming_false_negative_rate_at_thresholds',
     'streaming_false_negatives',
     'streaming_false_negatives_at_thresholds',
     'streaming_false_positive_rate',
     'streaming_false_positive_rate_at_thresholds',
     'streaming_false_positives',
     'streaming_false_positives_at_thresholds',
     'streaming_mean',
     'streaming_mean_absolute_error',
     'streaming_mean_cosine_distance',
     'streaming_mean_iou',
     'streaming_mean_relative_error',
     'streaming_mean_squared_error',
     'streaming_mean_tensor',
     'streaming_percentage_less',
     'streaming_precision',
     'streaming_precision_at_thresholds',
     'streaming_recall',
     'streaming_recall_at_k',
     'streaming_recall_at_thresholds',
     'streaming_root_mean_squared_error',
     'streaming_sensitivity_at_specificity',
     'streaming_sparse_average_precision_at_k',
     'streaming_sparse_average_precision_at_top_k',
     'streaming_sparse_precision_at_k',
     'streaming_sparse_precision_at_top_k',
     'streaming_sparse_recall_at_k',
     'streaming_specificity_at_sensitivity',
     'streaming_true_negatives',
     'streaming_true_negatives_at_thresholds',
     'streaming_true_positives',
     'streaming_true_positives_at_thresholds',
 ]