| # Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Model evaluation tools for TF-GAN. |
| |
| These methods come from https://arxiv.org/abs/1606.03498, |
| https://arxiv.org/abs/1706.08500, and https://arxiv.org/abs/1801.01401. |
| |
| NOTE: This implementation uses the same weights as in |
| https://github.com/openai/improved-gan/blob/master/inception_score/model.py, |
| but is more numerically stable and is an unbiased estimator of the true |
| Inception score even when splitting the inputs into batches. |
| """ |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import functools |
| import os |
| import sys |
| import tarfile |
| |
| from six.moves import urllib |
| |
| from tensorflow.contrib.layers.python.layers import layers |
| from tensorflow.core.framework import graph_pb2 |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import importer |
| from tensorflow.python.framework import ops |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import control_flow_ops |
| from tensorflow.python.ops import image_ops |
| from tensorflow.python.ops import linalg_ops |
| from tensorflow.python.ops import map_fn |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops import nn_impl |
| from tensorflow.python.ops import nn_ops |
| from tensorflow.python.platform import gfile |
| from tensorflow.python.platform import resource_loader |
| |
| __all__ = [ |
| 'get_graph_def_from_disk', |
| 'get_graph_def_from_resource', |
| 'get_graph_def_from_url_tarball', |
| 'preprocess_image', |
| 'run_image_classifier', |
| 'run_inception', |
| 'inception_score', |
| 'classifier_score', |
| 'classifier_score_from_logits', |
| 'frechet_inception_distance', |
| 'frechet_classifier_distance', |
| 'frechet_classifier_distance_from_activations', |
| 'mean_only_frechet_classifier_distance_from_activations', |
| 'diagonal_only_frechet_classifier_distance_from_activations', |
| 'kernel_inception_distance', |
| 'kernel_inception_distance_and_std', |
| 'kernel_classifier_distance', |
| 'kernel_classifier_distance_and_std', |
| 'kernel_classifier_distance_from_activations', |
| 'kernel_classifier_distance_and_std_from_activations', |
| 'INCEPTION_DEFAULT_IMAGE_SIZE', |
| ] |
| |
| INCEPTION_URL = 'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz' |
| INCEPTION_FROZEN_GRAPH = 'inceptionv1_for_inception_score.pb' |
| INCEPTION_INPUT = 'Mul:0' |
| INCEPTION_OUTPUT = 'logits:0' |
| INCEPTION_FINAL_POOL = 'pool_3:0' |
| INCEPTION_DEFAULT_IMAGE_SIZE = 299 |
| |
| |
| def _validate_images(images, image_size): |
| images = ops.convert_to_tensor(images) |
| images.shape.with_rank(4) |
| images.shape.assert_is_compatible_with([None, image_size, image_size, None]) |
| return images |
| |
| |
| def _symmetric_matrix_square_root(mat, eps=1e-10): |
| """Compute square root of a symmetric matrix. |
| |
| Note that this is different from an elementwise square root. We want to |
| compute M' where M' = sqrt(mat) such that M' * M' = mat. |
| |
| Also note that this method **only** works for symmetric matrices. |
| |
| Args: |
| mat: Matrix to take the square root of. |
| eps: Small epsilon such that any element less than eps will not be square |
| rooted to guard against numerical instability. |
| |
| Returns: |
| Matrix square root of mat. |
| """ |
| # Unlike numpy, tensorflow's return order is (s, u, v) |
| s, u, v = linalg_ops.svd(mat) |
| # sqrt is unstable around 0, just use 0 in such case |
| si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s)) |
| # Note that the v returned by Tensorflow is v = V |
| # (when referencing the equation A = U S V^T) |
| # This is unlike Numpy which returns v = V^T |
| return math_ops.matmul( |
| math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True) |
| |
| |
| def preprocess_image(images, |
| height=INCEPTION_DEFAULT_IMAGE_SIZE, |
| width=INCEPTION_DEFAULT_IMAGE_SIZE, |
| scope=None): |
| """Prepare a batch of images for evaluation. |
| |
| This is the preprocessing portion of the graph from |
| http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz. |
| |
| Note that it expects Tensors in [0, 255]. This function maps pixel values to |
| [-1, 1] and resizes to match the InceptionV1 network. |
| |
| Args: |
| images: 3-D or 4-D Tensor of images. Values are in [0, 255]. |
| height: Integer. Height of resized output image. |
| width: Integer. Width of resized output image. |
| scope: Optional scope for name_scope. |
| |
| Returns: |
| 3-D or 4-D float Tensor of prepared image(s). Values are in [-1, 1]. |
| """ |
| is_single = images.shape.ndims == 3 |
| with ops.name_scope(scope, 'preprocess', [images, height, width]): |
| if not images.dtype.is_floating: |
| images = math_ops.cast(images, dtypes.float32) |
| if is_single: |
| images = array_ops.expand_dims(images, axis=0) |
| resized = image_ops.resize_bilinear(images, [height, width]) |
| resized = (resized - 128.0) / 128.0 |
| if is_single: |
| resized = array_ops.squeeze(resized, axis=0) |
| return resized |
| |
| |
| def _kl_divergence(p, p_logits, q): |
| """Computes the Kullback-Liebler divergence between p and q. |
| |
| This function uses p's logits in some places to improve numerical stability. |
| |
| Specifically: |
| |
| KL(p || q) = sum[ p * log(p / q) ] |
| = sum[ p * ( log(p) - log(q) ) ] |
| = sum[ p * ( log_softmax(p_logits) - log(q) ) ] |
| |
| Args: |
| p: A 2-D floating-point Tensor p_ij, where `i` corresponds to the minibatch |
| example and `j` corresponds to the probability of being in class `j`. |
| p_logits: A 2-D floating-point Tensor corresponding to logits for `p`. |
| q: A 1-D floating-point Tensor, where q_j corresponds to the probability |
| of class `j`. |
| |
| Returns: |
| KL divergence between two distributions. Output dimension is 1D, one entry |
| per distribution in `p`. |
| |
| Raises: |
| ValueError: If any of the inputs aren't floating-point. |
| ValueError: If p or p_logits aren't 2D. |
| ValueError: If q isn't 1D. |
| """ |
| for tensor in [p, p_logits, q]: |
| if not tensor.dtype.is_floating: |
| raise ValueError('Input %s must be floating type.', tensor.name) |
| p.shape.assert_has_rank(2) |
| p_logits.shape.assert_has_rank(2) |
| q.shape.assert_has_rank(1) |
| return math_ops.reduce_sum( |
| p * (nn_ops.log_softmax(p_logits) - math_ops.log(q)), axis=1) |
| |
| |
| def get_graph_def_from_disk(filename): |
| """Get a GraphDef proto from a disk location.""" |
| with gfile.GFile(filename, 'rb') as f: |
| return graph_pb2.GraphDef.FromString(f.read()) |
| |
| |
| def get_graph_def_from_resource(filename): |
| """Get a GraphDef proto from within a .par file.""" |
| return graph_pb2.GraphDef.FromString(resource_loader.load_resource(filename)) |
| |
| |
| def get_graph_def_from_url_tarball(url, filename, tar_filename=None): |
| """Get a GraphDef proto from a tarball on the web. |
| |
| Args: |
| url: Web address of tarball |
| filename: Filename of graph definition within tarball |
| tar_filename: Temporary download filename (None = always download) |
| |
| Returns: |
| A GraphDef loaded from a file in the downloaded tarball. |
| """ |
| if not (tar_filename and os.path.exists(tar_filename)): |
| |
| def _progress(count, block_size, total_size): |
| sys.stdout.write('\r>> Downloading %s %.1f%%' % |
| (url, |
| float(count * block_size) / float(total_size) * 100.0)) |
| sys.stdout.flush() |
| |
| tar_filename, _ = urllib.request.urlretrieve(url, tar_filename, _progress) |
| with tarfile.open(tar_filename, 'r:gz') as tar: |
| proto_str = tar.extractfile(filename).read() |
| return graph_pb2.GraphDef.FromString(proto_str) |
| |
| |
| def _default_graph_def_fn(): |
| return get_graph_def_from_url_tarball(INCEPTION_URL, INCEPTION_FROZEN_GRAPH, |
| os.path.basename(INCEPTION_URL)) |
| |
| |
| def run_inception(images, |
| graph_def=None, |
| default_graph_def_fn=_default_graph_def_fn, |
| image_size=INCEPTION_DEFAULT_IMAGE_SIZE, |
| input_tensor=INCEPTION_INPUT, |
| output_tensor=INCEPTION_OUTPUT): |
| """Run images through a pretrained Inception classifier. |
| |
| Args: |
| images: Input tensors. Must be [batch, height, width, channels]. Input shape |
| and values must be in [-1, 1], which can be achieved using |
| `preprocess_image`. |
| graph_def: A GraphDef proto of a pretrained Inception graph. If `None`, |
| call `default_graph_def_fn` to get GraphDef. |
| default_graph_def_fn: A function that returns a GraphDef. Used if |
| `graph_def` is `None. By default, returns a pretrained InceptionV3 graph. |
| image_size: Required image width and height. See unit tests for the default |
| values. |
| input_tensor: Name of input Tensor. |
| output_tensor: Name or list of output Tensors. This function will compute |
| activations at the specified layer. Examples include INCEPTION_V3_OUTPUT |
| and INCEPTION_V3_FINAL_POOL which would result in this function computing |
| the final logits or the penultimate pooling layer. |
| |
| Returns: |
| Tensor or Tensors corresponding to computed `output_tensor`. |
| |
| Raises: |
| ValueError: If images are not the correct size. |
| ValueError: If neither `graph_def` nor `default_graph_def_fn` are provided. |
| """ |
| images = _validate_images(images, image_size) |
| |
| if graph_def is None: |
| if default_graph_def_fn is None: |
| raise ValueError('If `graph_def` is `None`, must provide ' |
| '`default_graph_def_fn`.') |
| graph_def = default_graph_def_fn() |
| |
| activations = run_image_classifier(images, graph_def, input_tensor, |
| output_tensor) |
| if isinstance(activations, list): |
| for i, activation in enumerate(activations): |
| if array_ops.rank(activation) != 2: |
| activations[i] = layers.flatten(activation) |
| else: |
| if array_ops.rank(activations) != 2: |
| activations = layers.flatten(activations) |
| |
| return activations |
| |
| |
| def run_image_classifier(tensor, |
| graph_def, |
| input_tensor, |
| output_tensor, |
| scope='RunClassifier'): |
| """Runs a network from a frozen graph. |
| |
| Args: |
| tensor: An Input tensor. |
| graph_def: A GraphDef proto. |
| input_tensor: Name of input tensor in graph def. |
| output_tensor: A tensor name or list of tensor names in graph def. |
| scope: Name scope for classifier. |
| |
| Returns: |
| Classifier output if `output_tensor` is a string, or a list of outputs if |
| `output_tensor` is a list. |
| |
| Raises: |
| ValueError: If `input_tensor` or `output_tensor` aren't in the graph_def. |
| """ |
| input_map = {input_tensor: tensor} |
| is_singleton = isinstance(output_tensor, str) |
| if is_singleton: |
| output_tensor = [output_tensor] |
| classifier_outputs = importer.import_graph_def( |
| graph_def, input_map, output_tensor, name=scope) |
| if is_singleton: |
| classifier_outputs = classifier_outputs[0] |
| |
| return classifier_outputs |
| |
| |
| def classifier_score(images, classifier_fn, num_batches=1): |
| """Classifier score for evaluating a conditional generative model. |
| |
| This is based on the Inception Score, but for an arbitrary classifier. |
| |
| This technique is described in detail in https://arxiv.org/abs/1606.03498. In |
| summary, this function calculates |
| |
| exp( E[ KL(p(y|x) || p(y)) ] ) |
| |
| which captures how different the network's classification prediction is from |
| the prior distribution over classes. |
| |
| NOTE: This function consumes images, computes their logits, and then |
| computes the classifier score. If you would like to precompute many logits for |
| large batches, use classifier_score_from_logits(), which this method also |
| uses. |
| |
| Args: |
| images: Images to calculate the classifier score for. |
| classifier_fn: A function that takes images and produces logits based on a |
| classifier. |
| num_batches: Number of batches to split `generated_images` in to in order to |
| efficiently run them through the classifier network. |
| |
| Returns: |
| The classifier score. A floating-point scalar of the same type as the output |
| of `classifier_fn`. |
| """ |
| generated_images_list = array_ops.split( |
| images, num_or_size_splits=num_batches) |
| |
| # Compute the classifier splits using the memory-efficient `map_fn`. |
| logits = map_fn.map_fn( |
| fn=classifier_fn, |
| elems=array_ops.stack(generated_images_list), |
| parallel_iterations=1, |
| back_prop=False, |
| swap_memory=True, |
| name='RunClassifier') |
| logits = array_ops.concat(array_ops.unstack(logits), 0) |
| |
| return classifier_score_from_logits(logits) |
| |
| |
| def classifier_score_from_logits(logits): |
| """Classifier score for evaluating a generative model from logits. |
| |
| This method computes the classifier score for a set of logits. This can be |
| used independently of the classifier_score() method, especially in the case |
| of using large batches during evaluation where we would like precompute all |
| of the logits before computing the classifier score. |
| |
| This technique is described in detail in https://arxiv.org/abs/1606.03498. In |
| summary, this function calculates: |
| |
| exp( E[ KL(p(y|x) || p(y)) ] ) |
| |
| which captures how different the network's classification prediction is from |
| the prior distribution over classes. |
| |
| Args: |
| logits: Precomputed 2D tensor of logits that will be used to |
| compute the classifier score. |
| |
| Returns: |
| The classifier score. A floating-point scalar of the same type as the output |
| of `logits`. |
| """ |
| logits.shape.assert_has_rank(2) |
| |
| # Use maximum precision for best results. |
| logits_dtype = logits.dtype |
| if logits_dtype != dtypes.float64: |
| logits = math_ops.cast(logits, dtypes.float64) |
| |
| p = nn_ops.softmax(logits) |
| q = math_ops.reduce_mean(p, axis=0) |
| kl = _kl_divergence(p, logits, q) |
| kl.shape.assert_has_rank(1) |
| log_score = math_ops.reduce_mean(kl) |
| final_score = math_ops.exp(log_score) |
| |
| if logits_dtype != dtypes.float64: |
| final_score = math_ops.cast(final_score, logits_dtype) |
| |
| return final_score |
| |
| |
| inception_score = functools.partial( |
| classifier_score, |
| classifier_fn=functools.partial( |
| run_inception, output_tensor=INCEPTION_OUTPUT)) |
| |
| |
| def trace_sqrt_product(sigma, sigma_v): |
| """Find the trace of the positive sqrt of product of covariance matrices. |
| |
| '_symmetric_matrix_square_root' only works for symmetric matrices, so we |
| cannot just take _symmetric_matrix_square_root(sigma * sigma_v). |
| ('sigma' and 'sigma_v' are symmetric, but their product is not necessarily). |
| |
| Let sigma = A A so A = sqrt(sigma), and sigma_v = B B. |
| We want to find trace(sqrt(sigma sigma_v)) = trace(sqrt(A A B B)) |
| Note the following properties: |
| (i) forall M1, M2: eigenvalues(M1 M2) = eigenvalues(M2 M1) |
| => eigenvalues(A A B B) = eigenvalues (A B B A) |
| (ii) if M1 = sqrt(M2), then eigenvalues(M1) = sqrt(eigenvalues(M2)) |
| => eigenvalues(sqrt(sigma sigma_v)) = sqrt(eigenvalues(A B B A)) |
| (iii) forall M: trace(M) = sum(eigenvalues(M)) |
| => trace(sqrt(sigma sigma_v)) = sum(eigenvalues(sqrt(sigma sigma_v))) |
| = sum(sqrt(eigenvalues(A B B A))) |
| = sum(eigenvalues(sqrt(A B B A))) |
| = trace(sqrt(A B B A)) |
| = trace(sqrt(A sigma_v A)) |
| A = sqrt(sigma). Both sigma and A sigma_v A are symmetric, so we **can** |
| use the _symmetric_matrix_square_root function to find the roots of these |
| matrices. |
| |
| Args: |
| sigma: a square, symmetric, real, positive semi-definite covariance matrix |
| sigma_v: same as sigma |
| |
| Returns: |
| The trace of the positive square root of sigma*sigma_v |
| """ |
| |
| # Note sqrt_sigma is called "A" in the proof above |
| sqrt_sigma = _symmetric_matrix_square_root(sigma) |
| |
| # This is sqrt(A sigma_v A) above |
| sqrt_a_sigmav_a = math_ops.matmul(sqrt_sigma, |
| math_ops.matmul(sigma_v, sqrt_sigma)) |
| |
| return math_ops.trace(_symmetric_matrix_square_root(sqrt_a_sigmav_a)) |
| |
| |
| def frechet_classifier_distance(real_images, |
| generated_images, |
| classifier_fn, |
| num_batches=1): |
| """Classifier distance for evaluating a generative model. |
| |
| This is based on the Frechet Inception distance, but for an arbitrary |
| classifier. |
| |
| This technique is described in detail in https://arxiv.org/abs/1706.08500. |
| Given two Gaussian distribution with means m and m_w and covariance matrices |
| C and C_w, this function calculates |
| |
| |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) |
| |
| which captures how different the distributions of real images and generated |
| images (or more accurately, their visual features) are. Note that unlike the |
| Inception score, this is a true distance and utilizes information about real |
| world images. |
| |
| Note that when computed using sample means and sample covariance matrices, |
| Frechet distance is biased. It is more biased for small sample sizes. (e.g. |
| even if the two distributions are the same, for a small sample size, the |
| expected Frechet distance is large). It is important to use the same |
| sample size to compute Frechet classifier distance when comparing two |
| generative models. |
| |
| NOTE: This function consumes images, computes their activations, and then |
| computes the classifier score. If you would like to precompute many |
| activations for real and generated images for large batches, please use |
| frechet_clasifier_distance_from_activations(), which this method also uses. |
| |
| Args: |
| real_images: Real images to use to compute Frechet Inception distance. |
| generated_images: Generated images to use to compute Frechet Inception |
| distance. |
| classifier_fn: A function that takes images and produces activations |
| based on a classifier. |
| num_batches: Number of batches to split images in to in order to |
| efficiently run them through the classifier network. |
| |
| Returns: |
| The Frechet Inception distance. A floating-point scalar of the same type |
| as the output of `classifier_fn`. |
| """ |
| real_images_list = array_ops.split( |
| real_images, num_or_size_splits=num_batches) |
| generated_images_list = array_ops.split( |
| generated_images, num_or_size_splits=num_batches) |
| |
| real_imgs = array_ops.stack(real_images_list) |
| generated_imgs = array_ops.stack(generated_images_list) |
| |
| # Compute the activations using the memory-efficient `map_fn`. |
| def compute_activations(elems): |
| return map_fn.map_fn(fn=classifier_fn, |
| elems=elems, |
| parallel_iterations=1, |
| back_prop=False, |
| swap_memory=True, |
| name='RunClassifier') |
| |
| real_a = compute_activations(real_imgs) |
| gen_a = compute_activations(generated_imgs) |
| |
| # Ensure the activations have the right shapes. |
| real_a = array_ops.concat(array_ops.unstack(real_a), 0) |
| gen_a = array_ops.concat(array_ops.unstack(gen_a), 0) |
| |
| return frechet_classifier_distance_from_activations(real_a, gen_a) |
| |
| |
| def mean_only_frechet_classifier_distance_from_activations( |
| real_activations, generated_activations): |
| """Classifier distance for evaluating a generative model from activations. |
| |
| Given two Gaussian distribution with means m and m_w and covariance matrices |
| C and C_w, this function calcuates |
| |
| |m - m_w|^2 |
| |
| which captures how different the distributions of real images and generated |
| images (or more accurately, their visual features) are. Note that unlike the |
| Inception score, this is a true distance and utilizes information about real |
| world images. |
| |
| Note that when computed using sample means and sample covariance matrices, |
| Frechet distance is biased. It is more biased for small sample sizes. (e.g. |
| even if the two distributions are the same, for a small sample size, the |
| expected Frechet distance is large). It is important to use the same |
| sample size to compute frechet classifier distance when comparing two |
| generative models. |
| |
| In this variant, we only compute the difference between the means of the |
| fitted Gaussians. The computation leads to O(n) vs. O(n^2) memory usage, yet |
| still retains much of the same information as FID. |
| |
| Args: |
| real_activations: 2D array of activations of real images of size |
| [num_images, num_dims] to use to compute Frechet Inception distance. |
| generated_activations: 2D array of activations of generated images of size |
| [num_images, num_dims] to use to compute Frechet Inception distance. |
| |
| Returns: |
| The mean-only Frechet Inception distance. A floating-point scalar of the |
| same type as the output of the activations. |
| """ |
| real_activations.shape.assert_has_rank(2) |
| generated_activations.shape.assert_has_rank(2) |
| |
| activations_dtype = real_activations.dtype |
| if activations_dtype != dtypes.float64: |
| real_activations = math_ops.cast(real_activations, dtypes.float64) |
| generated_activations = math_ops.cast(generated_activations, dtypes.float64) |
| |
| # Compute means of activations. |
| m = math_ops.reduce_mean(real_activations, 0) |
| m_w = math_ops.reduce_mean(generated_activations, 0) |
| |
| # Next the distance between means. |
| mean = math_ops.reduce_sum( |
| math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. |
| mofid = mean |
| if activations_dtype != dtypes.float64: |
| mofid = math_ops.cast(mofid, activations_dtype) |
| |
| return mofid |
| |
| |
| def diagonal_only_frechet_classifier_distance_from_activations( |
| real_activations, generated_activations): |
| """Classifier distance for evaluating a generative model. |
| |
| This is based on the Frechet Inception distance, but for an arbitrary |
| classifier. |
| |
| This technique is described in detail in https://arxiv.org/abs/1706.08500. |
| Given two Gaussian distribution with means m and m_w and covariance matrices |
| C and C_w, this function calcuates |
| |
| |m - m_w|^2 + (sigma + sigma_w - 2(sigma x sigma_w)^(1/2)) |
| |
| which captures how different the distributions of real images and generated |
| images (or more accurately, their visual features) are. Note that unlike the |
| Inception score, this is a true distance and utilizes information about real |
| world images. In this variant, we compute diagonal-only covariance matrices. |
| As a result, instead of computing an expensive matrix square root, we can do |
| something much simpler, and has O(n) vs O(n^2) space complexity. |
| |
| Note that when computed using sample means and sample covariance matrices, |
| Frechet distance is biased. It is more biased for small sample sizes. (e.g. |
| even if the two distributions are the same, for a small sample size, the |
| expected Frechet distance is large). It is important to use the same |
| sample size to compute frechet classifier distance when comparing two |
| generative models. |
| |
| Args: |
| real_activations: Real images to use to compute Frechet Inception distance. |
| generated_activations: Generated images to use to compute Frechet Inception |
| distance. |
| |
| Returns: |
| The diagonal-only Frechet Inception distance. A floating-point scalar of |
| the same type as the output of the activations. |
| |
| Raises: |
| ValueError: If the shape of the variance and mean vectors are not equal. |
| """ |
| real_activations.shape.assert_has_rank(2) |
| generated_activations.shape.assert_has_rank(2) |
| |
| activations_dtype = real_activations.dtype |
| if activations_dtype != dtypes.float64: |
| real_activations = math_ops.cast(real_activations, dtypes.float64) |
| generated_activations = math_ops.cast(generated_activations, dtypes.float64) |
| |
| # Compute mean and covariance matrices of activations. |
| m, var = nn_impl.moments(real_activations, axes=[0]) |
| m_w, var_w = nn_impl.moments(generated_activations, axes=[0]) |
| |
| actual_shape = var.get_shape() |
| expected_shape = m.get_shape() |
| |
| if actual_shape != expected_shape: |
| raise ValueError('shape: {} must match expected shape: {}'.format( |
| actual_shape, expected_shape)) |
| |
| # Compute the two components of FID. |
| |
| # First the covariance component. |
| # Here, note that trace(A + B) = trace(A) + trace(B) |
| trace = math_ops.reduce_sum( |
| (var + var_w) - 2.0 * math_ops.sqrt(math_ops.multiply(var, var_w))) |
| |
| # Next the distance between means. |
| mean = math_ops.reduce_sum( |
| math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. |
| dofid = trace + mean |
| if activations_dtype != dtypes.float64: |
| dofid = math_ops.cast(dofid, activations_dtype) |
| |
| return dofid |
| |
| |
| def frechet_classifier_distance_from_activations(real_activations, |
| generated_activations): |
| """Classifier distance for evaluating a generative model. |
| |
| This methods computes the Frechet classifier distance from activations of |
| real images and generated images. This can be used independently of the |
| frechet_classifier_distance() method, especially in the case of using large |
| batches during evaluation where we would like precompute all of the |
| activations before computing the classifier distance. |
| |
| This technique is described in detail in https://arxiv.org/abs/1706.08500. |
| Given two Gaussian distribution with means m and m_w and covariance matrices |
| C and C_w, this function calculates |
| |
| |m - m_w|^2 + Tr(C + C_w - 2(C * C_w)^(1/2)) |
| |
| which captures how different the distributions of real images and generated |
| images (or more accurately, their visual features) are. Note that unlike the |
| Inception score, this is a true distance and utilizes information about real |
| world images. |
| |
| Note that when computed using sample means and sample covariance matrices, |
| Frechet distance is biased. It is more biased for small sample sizes. (e.g. |
| even if the two distributions are the same, for a small sample size, the |
| expected Frechet distance is large). It is important to use the same |
| sample size to compute frechet classifier distance when comparing two |
| generative models. |
| |
| Args: |
| real_activations: 2D Tensor containing activations of real data. Shape is |
| [batch_size, activation_size]. |
| generated_activations: 2D Tensor containing activations of generated data. |
| Shape is [batch_size, activation_size]. |
| |
| Returns: |
| The Frechet Inception distance. A floating-point scalar of the same type |
| as the output of the activations. |
| |
| """ |
| real_activations.shape.assert_has_rank(2) |
| generated_activations.shape.assert_has_rank(2) |
| |
| activations_dtype = real_activations.dtype |
| if activations_dtype != dtypes.float64: |
| real_activations = math_ops.cast(real_activations, dtypes.float64) |
| generated_activations = math_ops.cast(generated_activations, dtypes.float64) |
| |
| # Compute mean and covariance matrices of activations. |
| m = math_ops.reduce_mean(real_activations, 0) |
| m_w = math_ops.reduce_mean(generated_activations, 0) |
| num_examples_real = math_ops.cast( |
| array_ops.shape(real_activations)[0], dtypes.float64) |
| num_examples_generated = math_ops.cast( |
| array_ops.shape(generated_activations)[0], dtypes.float64) |
| |
| # sigma = (1 / (n - 1)) * (X - mu) (X - mu)^T |
| real_centered = real_activations - m |
| sigma = math_ops.matmul( |
| real_centered, real_centered, transpose_a=True) / ( |
| num_examples_real - 1) |
| |
| gen_centered = generated_activations - m_w |
| sigma_w = math_ops.matmul( |
| gen_centered, gen_centered, transpose_a=True) / ( |
| num_examples_generated - 1) |
| |
| # Find the Tr(sqrt(sigma sigma_w)) component of FID |
| sqrt_trace_component = trace_sqrt_product(sigma, sigma_w) |
| |
| # Compute the two components of FID. |
| |
| # First the covariance component. |
| # Here, note that trace(A + B) = trace(A) + trace(B) |
| trace = math_ops.trace(sigma + sigma_w) - 2.0 * sqrt_trace_component |
| |
| # Next the distance between means. |
| mean = math_ops.reduce_sum( |
| math_ops.squared_difference(m, m_w)) # Equivalent to L2 but more stable. |
| fid = trace + mean |
| if activations_dtype != dtypes.float64: |
| fid = math_ops.cast(fid, activations_dtype) |
| |
| return fid |
| |
| frechet_inception_distance = functools.partial( |
| frechet_classifier_distance, |
| classifier_fn=functools.partial( |
| run_inception, output_tensor=INCEPTION_FINAL_POOL)) |
| |
| |
| def kernel_classifier_distance(real_images, |
| generated_images, |
| classifier_fn, |
| num_classifier_batches=1, |
| max_block_size=1024, |
| dtype=None): |
| """Kernel "classifier" distance for evaluating a generative model. |
| |
| This is based on the Kernel Inception distance, but for an arbitrary |
| embedding. |
| |
| This technique is described in detail in https://arxiv.org/abs/1801.01401. |
| Given two distributions P and Q of activations, this function calculates |
| |
| E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] |
| - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] |
| |
| where k is the polynomial kernel |
| |
| k(x, y) = ( x^T y / dimension + 1 )^3. |
| |
| This captures how different the distributions of real and generated images' |
| visual features are. Like the Frechet distance (and unlike the Inception |
| score), this is a true distance and incorporates information about the |
| target images. Unlike the Frechet score, this function computes an |
| *unbiased* and asymptotically normal estimator, which makes comparing |
| estimates across models much more intuitive. |
| |
| The estimator used takes time quadratic in max_block_size. Larger values of |
| max_block_size will decrease the variance of the estimator but increase the |
| computational cost. This differs slightly from the estimator used by the |
| original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. |
| |
| NOTE: the blocking code assumes that real_activations and |
| generated_activations are both in random order. If either is sorted in a |
| meaningful order, the estimator will behave poorly. |
| |
| NOTE: This function consumes images, computes their activations, and then |
| computes the classifier score. If you would like to precompute many |
| activations for real and generated images for large batches, or to compute |
| multiple scores based on the same images, please use |
| kernel_clasifier_distance_from_activations(), which this method also uses. |
| |
| Args: |
| real_images: Real images to use to compute Kernel Inception distance. |
| generated_images: Generated images to use to compute Kernel Inception |
| distance. |
| classifier_fn: A function that takes images and produces activations based |
| on a classifier. |
| num_classifier_batches: Number of batches to split images in to in order to |
| efficiently run them through the classifier network. |
| max_block_size: integer, default 1024. The distance estimator splits samples |
| into blocks for computational efficiency. Larger values are more |
| computationally expensive but decrease the variance of the distance |
| estimate. |
| dtype: if not None, coerce activations to this dtype before computations. |
| |
| Returns: |
| The Kernel Inception Distance. A floating-point scalar of the same type |
| as the output of the activations. |
| """ |
| return kernel_classifier_distance_and_std( |
| real_images, |
| generated_images, |
| classifier_fn, |
| num_classifier_batches=num_classifier_batches, |
| max_block_size=max_block_size, |
| dtype=dtype)[0] |
| |
| |
| kernel_inception_distance = functools.partial( |
| kernel_classifier_distance, |
| classifier_fn=functools.partial( |
| run_inception, output_tensor=INCEPTION_FINAL_POOL)) |
| |
| |
| def kernel_classifier_distance_and_std(real_images, |
| generated_images, |
| classifier_fn, |
| num_classifier_batches=1, |
| max_block_size=1024, |
| dtype=None): |
| """Kernel "classifier" distance for evaluating a generative model. |
| |
| This is based on the Kernel Inception distance, but for an arbitrary |
| embedding. Also returns an estimate of the standard error of the distance |
| estimator. |
| |
| This technique is described in detail in https://arxiv.org/abs/1801.01401. |
| Given two distributions P and Q of activations, this function calculates |
| |
| E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] |
| - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] |
| |
| where k is the polynomial kernel |
| |
| k(x, y) = ( x^T y / dimension + 1 )^3. |
| |
| This captures how different the distributions of real and generated images' |
| visual features are. Like the Frechet distance (and unlike the Inception |
| score), this is a true distance and incorporates information about the |
| target images. Unlike the Frechet score, this function computes an |
| *unbiased* and asymptotically normal estimator, which makes comparing |
| estimates across models much more intuitive. |
| |
| The estimator used takes time quadratic in max_block_size. Larger values of |
| max_block_size will decrease the variance of the estimator but increase the |
| computational cost. This differs slightly from the estimator used by the |
| original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. |
| |
| NOTE: the blocking code assumes that real_activations and |
| generated_activations are both in random order. If either is sorted in a |
| meaningful order, the estimator will behave poorly. |
| |
| NOTE: This function consumes images, computes their activations, and then |
| computes the classifier score. If you would like to precompute many |
| activations for real and generated images for large batches, or to compute |
| multiple scores based on the same images, please use |
| kernel_clasifier_distance_from_activations(), which this method also uses. |
| |
| Args: |
| real_images: Real images to use to compute Kernel Inception distance. |
| generated_images: Generated images to use to compute Kernel Inception |
| distance. |
| classifier_fn: A function that takes images and produces activations based |
| on a classifier. |
| num_classifier_batches: Number of batches to split images in to in order to |
| efficiently run them through the classifier network. |
| max_block_size: integer, default 1024. The distance estimator splits samples |
| into blocks for computational efficiency. Larger values are more |
| computationally expensive but decrease the variance of the distance |
| estimate. Having a smaller block size also gives a better estimate of the |
| standard error. |
| dtype: if not None, coerce activations to this dtype before computations. |
| |
| Returns: |
| The Kernel Inception Distance. A floating-point scalar of the same type |
| as the output of the activations. |
| An estimate of the standard error of the distance estimator (a scalar of |
| the same type). |
| """ |
| real_images_list = array_ops.split( |
| real_images, num_or_size_splits=num_classifier_batches) |
| generated_images_list = array_ops.split( |
| generated_images, num_or_size_splits=num_classifier_batches) |
| |
| real_imgs = array_ops.stack(real_images_list) |
| generated_imgs = array_ops.stack(generated_images_list) |
| |
| # Compute the activations using the memory-efficient `map_fn`. |
| def compute_activations(elems): |
| return map_fn.map_fn( |
| fn=classifier_fn, |
| elems=elems, |
| parallel_iterations=1, |
| back_prop=False, |
| swap_memory=True, |
| name='RunClassifier') |
| |
| real_a = compute_activations(real_imgs) |
| gen_a = compute_activations(generated_imgs) |
| |
| # Ensure the activations have the right shapes. |
| real_a = array_ops.concat(array_ops.unstack(real_a), 0) |
| gen_a = array_ops.concat(array_ops.unstack(gen_a), 0) |
| |
| return kernel_classifier_distance_and_std_from_activations( |
| real_a, gen_a, max_block_size, dtype) |
| |
| |
| kernel_inception_distance_and_std = functools.partial( |
| kernel_classifier_distance_and_std, |
| classifier_fn=functools.partial( |
| run_inception, output_tensor=INCEPTION_FINAL_POOL)) |
| |
| |
| def kernel_classifier_distance_from_activations(real_activations, |
| generated_activations, |
| max_block_size=1024, |
| dtype=None): |
| """Kernel "classifier" distance for evaluating a generative model. |
| |
| This methods computes the kernel classifier distance from activations of |
| real images and generated images. This can be used independently of the |
| kernel_classifier_distance() method, especially in the case of using large |
| batches during evaluation where we would like to precompute all of the |
| activations before computing the classifier distance, or if we want to |
| compute multiple metrics based on the same images. |
| |
| This technique is described in detail in https://arxiv.org/abs/1801.01401. |
| Given two distributions P and Q of activations, this function calculates |
| |
| E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] |
| - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] |
| |
| where k is the polynomial kernel |
| |
| k(x, y) = ( x^T y / dimension + 1 )^3. |
| |
| This captures how different the distributions of real and generated images' |
| visual features are. Like the Frechet distance (and unlike the Inception |
| score), this is a true distance and incorporates information about the |
| target images. Unlike the Frechet score, this function computes an |
| *unbiased* and asymptotically normal estimator, which makes comparing |
| estimates across models much more intuitive. |
| |
| The estimator used takes time quadratic in max_block_size. Larger values of |
| max_block_size will decrease the variance of the estimator but increase the |
| computational cost. This differs slightly from the estimator used by the |
| original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. |
| |
| NOTE: the blocking code assumes that real_activations and |
| generated_activations are both in random order. If either is sorted in a |
| meaningful order, the estimator will behave poorly. |
| |
| Args: |
| real_activations: 2D Tensor containing activations of real data. Shape is |
| [batch_size, activation_size]. |
| generated_activations: 2D Tensor containing activations of generated data. |
| Shape is [batch_size, activation_size]. |
| max_block_size: integer, default 1024. The distance estimator splits samples |
| into blocks for computational efficiency. Larger values are more |
| computationally expensive but decrease the variance of the distance |
| estimate. |
| dtype: If not None, coerce activations to this dtype before computations. |
| |
| Returns: |
| The Kernel Inception Distance. A floating-point scalar of the same type |
| as the output of the activations. |
| """ |
| return kernel_classifier_distance_and_std_from_activations( |
| real_activations, generated_activations, max_block_size, dtype)[0] |
| |
| |
| def kernel_classifier_distance_and_std_from_activations(real_activations, |
| generated_activations, |
| max_block_size=1024, |
| dtype=None): |
| """Kernel "classifier" distance for evaluating a generative model. |
| |
| This methods computes the kernel classifier distance from activations of |
| real images and generated images. This can be used independently of the |
| kernel_classifier_distance() method, especially in the case of using large |
| batches during evaluation where we would like to precompute all of the |
| activations before computing the classifier distance, or if we want to |
| compute multiple metrics based on the same images. It also returns a rough |
| estimate of the standard error of the estimator. |
| |
| This technique is described in detail in https://arxiv.org/abs/1801.01401. |
| Given two distributions P and Q of activations, this function calculates |
| |
| E_{X, X' ~ P}[k(X, X')] + E_{Y, Y' ~ Q}[k(Y, Y')] |
| - 2 E_{X ~ P, Y ~ Q}[k(X, Y)] |
| |
| where k is the polynomial kernel |
| |
| k(x, y) = ( x^T y / dimension + 1 )^3. |
| |
| This captures how different the distributions of real and generated images' |
| visual features are. Like the Frechet distance (and unlike the Inception |
| score), this is a true distance and incorporates information about the |
| target images. Unlike the Frechet score, this function computes an |
| *unbiased* and asymptotically normal estimator, which makes comparing |
| estimates across models much more intuitive. |
| |
| The estimator used takes time quadratic in max_block_size. Larger values of |
| max_block_size will decrease the variance of the estimator but increase the |
| computational cost. This differs slightly from the estimator used by the |
| original paper; it is the block estimator of https://arxiv.org/abs/1307.1954. |
| The estimate of the standard error will also be more reliable when there are |
| more blocks, i.e. when max_block_size is smaller. |
| |
| NOTE: the blocking code assumes that real_activations and |
| generated_activations are both in random order. If either is sorted in a |
| meaningful order, the estimator will behave poorly. |
| |
| Args: |
| real_activations: 2D Tensor containing activations of real data. Shape is |
| [batch_size, activation_size]. |
| generated_activations: 2D Tensor containing activations of generated data. |
| Shape is [batch_size, activation_size]. |
| max_block_size: integer, default 1024. The distance estimator splits samples |
| into blocks for computational efficiency. Larger values are more |
| computationally expensive but decrease the variance of the distance |
| estimate. Having a smaller block size also gives a better estimate of the |
| standard error. |
| dtype: If not None, coerce activations to this dtype before computations. |
| |
| Returns: |
| The Kernel Inception Distance. A floating-point scalar of the same type |
| as the output of the activations. |
| An estimate of the standard error of the distance estimator (a scalar of |
| the same type). |
| """ |
| |
| real_activations.shape.assert_has_rank(2) |
| generated_activations.shape.assert_has_rank(2) |
| real_activations.shape[1].assert_is_compatible_with( |
| generated_activations.shape[1]) |
| |
| if dtype is None: |
| dtype = real_activations.dtype |
| assert generated_activations.dtype == dtype |
| else: |
| real_activations = math_ops.cast(real_activations, dtype) |
| generated_activations = math_ops.cast(generated_activations, dtype) |
| |
| # Figure out how to split the activations into blocks of approximately |
| # equal size, with none larger than max_block_size. |
| n_r = array_ops.shape(real_activations)[0] |
| n_g = array_ops.shape(generated_activations)[0] |
| |
| n_bigger = math_ops.maximum(n_r, n_g) |
| n_blocks = math_ops.cast(math_ops.ceil(n_bigger / max_block_size), |
| dtypes.int32) |
| |
| v_r = n_r // n_blocks |
| v_g = n_g // n_blocks |
| |
| n_plusone_r = n_r - v_r * n_blocks |
| n_plusone_g = n_g - v_g * n_blocks |
| |
| sizes_r = array_ops.concat([ |
| array_ops.fill([n_blocks - n_plusone_r], v_r), |
| array_ops.fill([n_plusone_r], v_r + 1), |
| ], 0) |
| sizes_g = array_ops.concat([ |
| array_ops.fill([n_blocks - n_plusone_g], v_g), |
| array_ops.fill([n_plusone_g], v_g + 1), |
| ], 0) |
| |
| zero = array_ops.zeros([1], dtype=dtypes.int32) |
| inds_r = array_ops.concat([zero, math_ops.cumsum(sizes_r)], 0) |
| inds_g = array_ops.concat([zero, math_ops.cumsum(sizes_g)], 0) |
| |
| dim = math_ops.cast(real_activations.shape[1], dtype) |
| |
| def compute_kid_block(i): |
| """Computes the ith block of the KID estimate.""" |
| r_s = inds_r[i] |
| r_e = inds_r[i + 1] |
| r = real_activations[r_s:r_e] |
| m = math_ops.cast(r_e - r_s, dtype) |
| |
| g_s = inds_g[i] |
| g_e = inds_g[i + 1] |
| g = generated_activations[g_s:g_e] |
| n = math_ops.cast(g_e - g_s, dtype) |
| |
| k_rr = (math_ops.matmul(r, r, transpose_b=True) / dim + 1)**3 |
| k_rg = (math_ops.matmul(r, g, transpose_b=True) / dim + 1)**3 |
| k_gg = (math_ops.matmul(g, g, transpose_b=True) / dim + 1)**3 |
| return (-2 * math_ops.reduce_mean(k_rg) + |
| (math_ops.reduce_sum(k_rr) - math_ops.trace(k_rr)) / (m * (m - 1)) + |
| (math_ops.reduce_sum(k_gg) - math_ops.trace(k_gg)) / (n * (n - 1))) |
| |
| ests = map_fn.map_fn( |
| compute_kid_block, math_ops.range(n_blocks), dtype=dtype, back_prop=False) |
| |
| mn = math_ops.reduce_mean(ests) |
| |
| # nn_impl.moments doesn't use the Bessel correction, which we want here |
| n_blocks_ = math_ops.cast(n_blocks, dtype) |
| var = control_flow_ops.cond( |
| math_ops.less_equal(n_blocks, 1), |
| lambda: array_ops.constant(float('nan'), dtype=dtype), |
| lambda: math_ops.reduce_sum(math_ops.square(ests - mn)) / (n_blocks_ - 1)) |
| |
| return mn, math_ops.sqrt(var / n_blocks_) |