| # Copyright 2015 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| """Implementation of image ops.""" |
| |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import functools |
| import numpy as np |
| |
| from tensorflow.python.compat import compat |
| from tensorflow.python.eager import def_function |
| from tensorflow.python.framework import constant_op |
| from tensorflow.python.framework import dtypes |
| from tensorflow.python.framework import ops |
| from tensorflow.python.framework import random_seed |
| from tensorflow.python.framework import tensor_shape |
| from tensorflow.python.framework import tensor_util |
| from tensorflow.python.ops import array_ops |
| from tensorflow.python.ops import check_ops |
| from tensorflow.python.ops import control_flow_ops |
| from tensorflow.python.ops import gen_image_ops |
| from tensorflow.python.ops import math_ops |
| from tensorflow.python.ops import nn |
| from tensorflow.python.ops import nn_ops |
| from tensorflow.python.ops import random_ops |
| from tensorflow.python.ops import sort_ops |
| from tensorflow.python.ops import stateless_random_ops |
| from tensorflow.python.ops import string_ops |
| from tensorflow.python.ops import variables |
| from tensorflow.python.util import deprecation |
| from tensorflow.python.util import dispatch |
| from tensorflow.python.util.tf_export import tf_export |
| |
| ops.NotDifferentiable('RandomCrop') |
| # TODO(b/31222613): This op may be differentiable, and there may be |
| # latent bugs here. |
| ops.NotDifferentiable('HSVToRGB') |
| ops.NotDifferentiable('DrawBoundingBoxes') |
| ops.NotDifferentiable('SampleDistortedBoundingBox') |
| ops.NotDifferentiable('SampleDistortedBoundingBoxV2') |
| # TODO(bsteiner): Implement the gradient function for extract_glimpse |
| # TODO(b/31222613): This op may be differentiable, and there may be |
| # latent bugs here. |
| ops.NotDifferentiable('ExtractGlimpse') |
| ops.NotDifferentiable('NonMaxSuppression') |
| ops.NotDifferentiable('NonMaxSuppressionV2') |
| ops.NotDifferentiable('NonMaxSuppressionWithOverlaps') |
| ops.NotDifferentiable('GenerateBoundingBoxProposals') |
| |
| |
| # pylint: disable=invalid-name |
| def _assert(cond, ex_type, msg): |
| """A polymorphic assert, works with tensors and boolean expressions. |
| |
| If `cond` is not a tensor, behave like an ordinary assert statement, except |
| that a empty list is returned. If `cond` is a tensor, return a list |
| containing a single TensorFlow assert op. |
| |
| Args: |
| cond: Something evaluates to a boolean value. May be a tensor. |
| ex_type: The exception class to use. |
| msg: The error message. |
| |
| Returns: |
| A list, containing at most one assert op. |
| """ |
| if _is_tensor(cond): |
| return [control_flow_ops.Assert(cond, [msg])] |
| else: |
| if not cond: |
| raise ex_type(msg) |
| else: |
| return [] |
| |
| |
| def _is_tensor(x): |
| """Returns `True` if `x` is a symbolic tensor-like object. |
| |
| Args: |
| x: A python object to check. |
| |
| Returns: |
| `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`. |
| """ |
| return isinstance(x, (ops.Tensor, variables.Variable)) |
| |
| |
| def _ImageDimensions(image, rank): |
| """Returns the dimensions of an image tensor. |
| |
| Args: |
| image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`. |
| rank: The expected rank of the image |
| |
| Returns: |
| A list of corresponding to the dimensions of the |
| input image. Dimensions that are statically known are python integers, |
| otherwise, they are integer scalar tensors. |
| """ |
| if image.get_shape().is_fully_defined(): |
| return image.get_shape().as_list() |
| else: |
| static_shape = image.get_shape().with_rank(rank).as_list() |
| dynamic_shape = array_ops.unstack(array_ops.shape(image), rank) |
| return [ |
| s if s is not None else d for s, d in zip(static_shape, dynamic_shape) |
| ] |
| |
| |
| def _Check3DImage(image, require_static=True): |
| """Assert that we are working with a properly shaped image. |
| |
| Args: |
| image: 3-D Tensor of shape [height, width, channels] |
| require_static: If `True`, requires that all dimensions of `image` are known |
| and non-zero. |
| |
| Raises: |
| ValueError: if `image.shape` is not a 3-vector. |
| |
| Returns: |
| An empty list, if `image` has fully defined dimensions. Otherwise, a list |
| containing an assert op is returned. |
| """ |
| try: |
| image_shape = image.get_shape().with_rank(3) |
| except ValueError: |
| raise ValueError("'image' (shape %s) must be three-dimensional." % |
| image.shape) |
| if require_static and not image_shape.is_fully_defined(): |
| raise ValueError("'image' (shape %s) must be fully defined." % image_shape) |
| if any(x == 0 for x in image_shape): |
| raise ValueError("all dims of 'image.shape' must be > 0: %s" % image_shape) |
| if not image_shape.is_fully_defined(): |
| return [ |
| check_ops.assert_positive( |
| array_ops.shape(image), |
| ["all dims of 'image.shape' " |
| 'must be > 0.']) |
| ] |
| else: |
| return [] |
| |
| |
| def _Assert3DImage(image): |
| """Assert that we are working with a properly shaped image. |
| |
| Performs the check statically if possible (i.e. if the shape |
| is statically known). Otherwise adds a control dependency |
| to an assert op that checks the dynamic shape. |
| |
| Args: |
| image: 3-D Tensor of shape [height, width, channels] |
| |
| Raises: |
| ValueError: if `image.shape` is not a 3-vector. |
| |
| Returns: |
| If the shape of `image` could be verified statically, `image` is |
| returned unchanged, otherwise there will be a control dependency |
| added that asserts the correct dynamic shape. |
| """ |
| return control_flow_ops.with_dependencies( |
| _Check3DImage(image, require_static=False), image) |
| |
| |
| def _AssertAtLeast3DImage(image): |
| """Assert that we are working with a properly shaped image. |
| |
| Performs the check statically if possible (i.e. if the shape |
| is statically known). Otherwise adds a control dependency |
| to an assert op that checks the dynamic shape. |
| |
| Args: |
| image: >= 3-D Tensor of size [*, height, width, depth] |
| |
| Raises: |
| ValueError: if image.shape is not a [>= 3] vector. |
| |
| Returns: |
| If the shape of `image` could be verified statically, `image` is |
| returned unchanged, otherwise there will be a control dependency |
| added that asserts the correct dynamic shape. |
| """ |
| return control_flow_ops.with_dependencies( |
| _CheckAtLeast3DImage(image, require_static=False), image) |
| |
| |
| def _CheckAtLeast3DImage(image, require_static=True): |
| """Assert that we are working with a properly shaped image. |
| |
| Args: |
| image: >= 3-D Tensor of size [*, height, width, depth] |
| require_static: If `True`, requires that all dimensions of `image` are known |
| and non-zero. |
| |
| Raises: |
| ValueError: if image.shape is not a [>= 3] vector. |
| |
| Returns: |
| An empty list, if `image` has fully defined dimensions. Otherwise, a list |
| containing an assert op is returned. |
| """ |
| try: |
| if image.get_shape().ndims is None: |
| image_shape = image.get_shape().with_rank(3) |
| else: |
| image_shape = image.get_shape().with_rank_at_least(3) |
| except ValueError: |
| raise ValueError("'image' (shape %s) must be at least three-dimensional." % |
| image.shape) |
| if require_static and not image_shape.is_fully_defined(): |
| raise ValueError('\'image\' must be fully defined.') |
| if any(x == 0 for x in image_shape[-3:]): |
| raise ValueError('inner 3 dims of \'image.shape\' must be > 0: %s' % |
| image_shape) |
| if not image_shape[-3:].is_fully_defined(): |
| return [ |
| check_ops.assert_positive( |
| array_ops.shape(image)[-3:], |
| ["inner 3 dims of 'image.shape' " |
| 'must be > 0.']), |
| check_ops.assert_greater_equal( |
| array_ops.rank(image), |
| 3, |
| message="'image' must be at least three-dimensional.") |
| ] |
| else: |
| return [] |
| |
| |
| def _AssertGrayscaleImage(image): |
| """Assert that we are working with a properly shaped grayscale image. |
| |
| Performs the check statically if possible (i.e. if the shape |
| is statically known). Otherwise adds a control dependency |
| to an assert op that checks the dynamic shape. |
| |
| Args: |
| image: >= 2-D Tensor of size [*, 1] |
| |
| Raises: |
| ValueError: if image.shape is not a [>= 2] vector or if |
| last dimension is not size 1. |
| |
| Returns: |
| If the shape of `image` could be verified statically, `image` is |
| returned unchanged, otherwise there will be a control dependency |
| added that asserts the correct dynamic shape. |
| """ |
| return control_flow_ops.with_dependencies( |
| _CheckGrayscaleImage(image, require_static=False), image) |
| |
| |
| def _CheckGrayscaleImage(image, require_static=True): |
| """Assert that we are working with properly shaped grayscale image. |
| |
| Args: |
| image: >= 2-D Tensor of size [*, 1] |
| require_static: Boolean, whether static shape is required. |
| |
| Raises: |
| ValueError: if image.shape is not a [>= 2] vector or if |
| last dimension is not size 1. |
| |
| Returns: |
| An empty list, if `image` has fully defined dimensions. Otherwise, a list |
| containing an assert op is returned. |
| """ |
| try: |
| if image.get_shape().ndims is None: |
| image_shape = image.get_shape().with_rank(2) |
| else: |
| image_shape = image.get_shape().with_rank_at_least(2) |
| except ValueError: |
| raise ValueError('A grayscale image (shape %s) must be at least ' |
| 'two-dimensional.' % image.shape) |
| if require_static and not image_shape.is_fully_defined(): |
| raise ValueError('\'image\' must be fully defined.') |
| if image_shape.is_fully_defined(): |
| if image_shape[-1] != 1: |
| raise ValueError('Last dimension of a grayscale image should be size 1.') |
| if not image_shape.is_fully_defined(): |
| return [ |
| check_ops.assert_equal( |
| array_ops.shape(image)[-1], |
| 1, |
| message='Last dimension of a grayscale image should be size 1.'), |
| check_ops.assert_greater_equal( |
| array_ops.rank(image), |
| 3, |
| message='A grayscale image must be at least two-dimensional.') |
| ] |
| else: |
| return [] |
| |
| |
| def fix_image_flip_shape(image, result): |
| """Set the shape to 3 dimensional if we don't know anything else. |
| |
| Args: |
| image: original image size |
| result: flipped or transformed image |
| |
| Returns: |
| An image whose shape is at least (None, None, None). |
| """ |
| |
| image_shape = image.get_shape() |
| if image_shape == tensor_shape.unknown_shape(): |
| result.set_shape([None, None, None]) |
| else: |
| result.set_shape(image_shape) |
| return result |
| |
| |
| @tf_export('image.random_flip_up_down') |
| @dispatch.add_dispatch_support |
| def random_flip_up_down(image, seed=None): |
| """Randomly flips an image vertically (upside down). |
| |
| With a 1 in 2 chance, outputs the contents of `image` flipped along the first |
| dimension, which is `height`. Otherwise, output the image as-is. |
| When passing a batch of images, each image will be randomly flipped |
| independent of other images. |
| |
| Example usage: |
| |
| >>> image = np.array([[[1], [2]], [[3], [4]]]) |
| >>> tf.image.random_flip_up_down(image, 3).numpy().tolist() |
| [[[3], [4]], [[1], [2]]] |
| |
| Randomly flip multiple images. |
| |
| >>> images = np.array( |
| ... [ |
| ... [[[1], [2]], [[3], [4]]], |
| ... [[[5], [6]], [[7], [8]]] |
| ... ]) |
| >>> tf.image.random_flip_up_down(images, 4).numpy().tolist() |
| [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]] |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| seed: A Python integer. Used to create a random seed. See |
| `tf.compat.v1.set_random_seed` for behavior. |
| |
| Returns: |
| A tensor of the same type and shape as `image`. |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| random_func = functools.partial(random_ops.random_uniform, seed=seed) |
| return _random_flip(image, 0, random_func, 'random_flip_up_down') |
| |
| |
| @tf_export('image.random_flip_left_right') |
| @dispatch.add_dispatch_support |
| def random_flip_left_right(image, seed=None): |
| """Randomly flip an image horizontally (left to right). |
| |
| With a 1 in 2 chance, outputs the contents of `image` flipped along the |
| second dimension, which is `width`. Otherwise output the image as-is. |
| When passing a batch of images, each image will be randomly flipped |
| independent of other images. |
| |
| Example usage: |
| |
| >>> image = np.array([[[1], [2]], [[3], [4]]]) |
| >>> tf.image.random_flip_left_right(image, 5).numpy().tolist() |
| [[[2], [1]], [[4], [3]]] |
| |
| Randomly flip multiple images. |
| |
| >>> images = np.array( |
| ... [ |
| ... [[[1], [2]], [[3], [4]]], |
| ... [[[5], [6]], [[7], [8]]] |
| ... ]) |
| >>> tf.image.random_flip_left_right(images, 6).numpy().tolist() |
| [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]] |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| seed: A Python integer. Used to create a random seed. See |
| `tf.compat.v1.set_random_seed` for behavior. |
| |
| Returns: |
| A tensor of the same type and shape as `image`. |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| random_func = functools.partial(random_ops.random_uniform, seed=seed) |
| return _random_flip(image, 1, random_func, 'random_flip_left_right') |
| |
| |
| @tf_export('image.stateless_random_flip_left_right', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_flip_left_right(image, seed): |
| """Randomly flip an image horizontally (left to right) deterministically. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| Example usage: |
| |
| >>> image = np.array([[[1], [2]], [[3], [4]]]) |
| >>> seed = (2, 3) |
| >>> tf.image.stateless_random_flip_left_right(image, seed).numpy().tolist() |
| [[[2], [1]], [[4], [3]]] |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Returns: |
| A tensor of the same type and shape as `image`. |
| """ |
| random_func = functools.partial( |
| stateless_random_ops.stateless_random_uniform, seed=seed) |
| return _random_flip( |
| image, 1, random_func, 'stateless_random_flip_left_right') |
| |
| |
| @tf_export('image.stateless_random_flip_up_down', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_flip_up_down(image, seed): |
| """Randomly flip an image vertically (upside down) deterministically. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| Example usage: |
| |
| >>> image = np.array([[[1], [2]], [[3], [4]]]) |
| >>> seed = (2, 3) |
| >>> tf.image.stateless_random_flip_up_down(image, seed).numpy().tolist() |
| [[[3], [4]], [[1], [2]]] |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Returns: |
| A tensor of the same type and shape as `image`. |
| """ |
| random_func = functools.partial( |
| stateless_random_ops.stateless_random_uniform, seed=seed) |
| return _random_flip( |
| image, 0, random_func, 'stateless_random_flip_up_down') |
| |
| |
| def _random_flip(image, flip_index, random_func, scope_name): |
| """Randomly (50% chance) flip an image along axis `flip_index`. |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| flip_index: Dimension along which to flip the image. |
| Vertical is 0, Horizontal is 1. |
| random_func: partial function for calling either stateful or stateless |
| random ops with `seed` parameter specified. |
| scope_name: Name of the scope in which the ops are added. |
| |
| Returns: |
| A tensor of the same type and shape as `image`. |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| with ops.name_scope(None, scope_name, [image]) as scope: |
| image = ops.convert_to_tensor(image, name='image') |
| image = _AssertAtLeast3DImage(image) |
| shape = image.get_shape() |
| |
| def f_rank3(): |
| uniform_random = random_func(shape=[], minval=0, maxval=1.0) |
| mirror_cond = math_ops.less(uniform_random, .5) |
| result = control_flow_ops.cond( |
| mirror_cond, |
| lambda: array_ops.reverse(image, [flip_index]), |
| lambda: image, |
| name=scope) |
| return fix_image_flip_shape(image, result) |
| |
| def f_rank4(): |
| batch_size = array_ops.shape(image)[0] |
| uniform_random = random_func(shape=[batch_size], minval=0, maxval=1.0) |
| flips = math_ops.round( |
| array_ops.reshape(uniform_random, [batch_size, 1, 1, 1])) |
| flips = math_ops.cast(flips, image.dtype) |
| flipped_input = array_ops.reverse(image, [flip_index + 1]) |
| return flips * flipped_input + (1 - flips) * image |
| |
| if shape.ndims is None: |
| rank = array_ops.rank(image) |
| return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) |
| if shape.ndims == 3: |
| return f_rank3() |
| elif shape.ndims == 4: |
| return f_rank4() |
| else: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) |
| |
| |
| @tf_export('image.flip_left_right') |
| @dispatch.add_dispatch_support |
| def flip_left_right(image): |
| """Flip an image horizontally (left to right). |
| |
| Outputs the contents of `image` flipped along the width dimension. |
| |
| See also `tf.reverse`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.flip_left_right(x) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 4., 5., 6.], |
| [ 1., 2., 3.]], |
| [[10., 11., 12.], |
| [ 7., 8., 9.]]], dtype=float32)> |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| |
| Returns: |
| A tensor of the same type and shape as `image`. |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| return _flip(image, 1, 'flip_left_right') |
| |
| |
| @tf_export('image.flip_up_down') |
| @dispatch.add_dispatch_support |
| def flip_up_down(image): |
| """Flip an image vertically (upside down). |
| |
| Outputs the contents of `image` flipped along the height dimension. |
| |
| See also `reverse()`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.flip_up_down(x) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 7., 8., 9.], |
| [10., 11., 12.]], |
| [[ 1., 2., 3.], |
| [ 4., 5., 6.]]], dtype=float32)> |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| |
| Returns: |
| A `Tensor` of the same type and shape as `image`. |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| return _flip(image, 0, 'flip_up_down') |
| |
| |
| def _flip(image, flip_index, scope_name): |
| """Flip an image either horizontally or vertically. |
| |
| Outputs the contents of `image` flipped along the dimension `flip_index`. |
| |
| See also `reverse()`. |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| flip_index: 0 For vertical, 1 for horizontal. |
| scope_name: string, scope name. |
| |
| Returns: |
| A `Tensor` of the same type and shape as `image`. |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| with ops.name_scope(None, scope_name, [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| image = _AssertAtLeast3DImage(image) |
| shape = image.get_shape() |
| |
| def f_rank3(): |
| return fix_image_flip_shape(image, array_ops.reverse(image, [flip_index])) |
| |
| def f_rank4(): |
| return array_ops.reverse(image, [flip_index + 1]) |
| |
| if shape.ndims is None: |
| rank = array_ops.rank(image) |
| return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) |
| elif shape.ndims == 3: |
| return f_rank3() |
| elif shape.ndims == 4: |
| return f_rank4() |
| else: |
| raise ValueError( |
| '\'image\' (shape %s)must have either 3 or 4 dimensions.' % shape) |
| |
| |
| @tf_export('image.rot90') |
| @dispatch.add_dispatch_support |
| def rot90(image, k=1, name=None): |
| """Rotate image(s) counter-clockwise by 90 degrees. |
| |
| |
| For example: |
| |
| >>> a=tf.constant([[[1],[2]], |
| ... [[3],[4]]]) |
| >>> # rotating `a` counter clockwise by 90 degrees |
| >>> a_rot=tf.image.rot90(a) |
| >>> print(a_rot[...,0].numpy()) |
| [[2 4] |
| [1 3]] |
| >>> # rotating `a` counter clockwise by 270 degrees |
| >>> a_rot=tf.image.rot90(a, k=3) |
| >>> print(a_rot[...,0].numpy()) |
| [[3 1] |
| [4 2]] |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| k: A scalar integer. The number of times the image is rotated by 90 degrees. |
| name: A name for this operation (optional). |
| |
| Returns: |
| A rotated tensor of the same type and shape as `image`. |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| """ |
| with ops.name_scope(name, 'rot90', [image, k]) as scope: |
| image = ops.convert_to_tensor(image, name='image') |
| image = _AssertAtLeast3DImage(image) |
| k = ops.convert_to_tensor(k, dtype=dtypes.int32, name='k') |
| k.get_shape().assert_has_rank(0) |
| k = math_ops.mod(k, 4) |
| |
| shape = image.get_shape() |
| if shape.ndims is None: |
| rank = array_ops.rank(image) |
| |
| def f_rank3(): |
| return _rot90_3D(image, k, scope) |
| |
| def f_rank4(): |
| return _rot90_4D(image, k, scope) |
| |
| return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) |
| elif shape.ndims == 3: |
| return _rot90_3D(image, k, scope) |
| elif shape.ndims == 4: |
| return _rot90_4D(image, k, scope) |
| else: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) |
| |
| |
| def _rot90_3D(image, k, name_scope): |
| """Rotate image counter-clockwise by 90 degrees `k` times. |
| |
| Args: |
| image: 3-D Tensor of shape `[height, width, channels]`. |
| k: A scalar integer. The number of times the image is rotated by 90 degrees. |
| name_scope: A valid TensorFlow name scope. |
| |
| Returns: |
| A 3-D tensor of the same type and shape as `image`. |
| |
| """ |
| |
| def _rot90(): |
| return array_ops.transpose(array_ops.reverse_v2(image, [1]), [1, 0, 2]) |
| |
| def _rot180(): |
| return array_ops.reverse_v2(image, [0, 1]) |
| |
| def _rot270(): |
| return array_ops.reverse_v2(array_ops.transpose(image, [1, 0, 2]), [1]) |
| |
| cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), |
| (math_ops.equal(k, 3), _rot270)] |
| |
| result = control_flow_ops.case( |
| cases, default=lambda: image, exclusive=True, name=name_scope) |
| result.set_shape([None, None, image.get_shape()[2]]) |
| return result |
| |
| |
| def _rot90_4D(images, k, name_scope): |
| """Rotate batch of images counter-clockwise by 90 degrees `k` times. |
| |
| Args: |
| images: 4-D Tensor of shape `[height, width, channels]`. |
| k: A scalar integer. The number of times the images are rotated by 90 |
| degrees. |
| name_scope: A valid TensorFlow name scope. |
| |
| Returns: |
| A 4-D `Tensor` of the same type and shape as `images`. |
| """ |
| |
| def _rot90(): |
| return array_ops.transpose(array_ops.reverse_v2(images, [2]), [0, 2, 1, 3]) |
| |
| def _rot180(): |
| return array_ops.reverse_v2(images, [1, 2]) |
| |
| def _rot270(): |
| return array_ops.reverse_v2(array_ops.transpose(images, [0, 2, 1, 3]), [2]) |
| |
| cases = [(math_ops.equal(k, 1), _rot90), (math_ops.equal(k, 2), _rot180), |
| (math_ops.equal(k, 3), _rot270)] |
| |
| result = control_flow_ops.case( |
| cases, default=lambda: images, exclusive=True, name=name_scope) |
| shape = result.get_shape() |
| result.set_shape([shape[0], None, None, shape[3]]) |
| return result |
| |
| |
| @tf_export('image.transpose', v1=['image.transpose', 'image.transpose_image']) |
| @dispatch.add_dispatch_support |
| def transpose(image, name=None): |
| """Transpose image(s) by swapping the height and width dimension. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.transpose(x) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 1., 2., 3.], |
| [ 7., 8., 9.]], |
| [[ 4., 5., 6.], |
| [10., 11., 12.]]], dtype=float32)> |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| name: A name for this operation (optional). |
| |
| Returns: |
| If `image` was 4-D, a 4-D float Tensor of shape |
| `[batch, width, height, channels]` |
| If `image` was 3-D, a 3-D float Tensor of shape |
| `[width, height, channels]` |
| |
| Raises: |
| ValueError: if the shape of `image` not supported. |
| |
| Usage Example: |
| |
| >>> image = [[[1, 2], [3, 4]], |
| ... [[5, 6], [7, 8]], |
| ... [[9, 10], [11, 12]]] |
| >>> image = tf.constant(image) |
| >>> tf.image.transpose(image) |
| <tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy= |
| array([[[ 1, 2], |
| [ 5, 6], |
| [ 9, 10]], |
| [[ 3, 4], |
| [ 7, 8], |
| [11, 12]]], dtype=int32)> |
| """ |
| with ops.name_scope(name, 'transpose', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| image = _AssertAtLeast3DImage(image) |
| shape = image.get_shape() |
| if shape.ndims is None: |
| rank = array_ops.rank(image) |
| |
| def f_rank3(): |
| return array_ops.transpose(image, [1, 0, 2], name=name) |
| |
| def f_rank4(): |
| return array_ops.transpose(image, [0, 2, 1, 3], name=name) |
| |
| return control_flow_ops.cond(math_ops.equal(rank, 3), f_rank3, f_rank4) |
| elif shape.ndims == 3: |
| return array_ops.transpose(image, [1, 0, 2], name=name) |
| elif shape.ndims == 4: |
| return array_ops.transpose(image, [0, 2, 1, 3], name=name) |
| else: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % shape) |
| |
| |
| @tf_export('image.central_crop') |
| @dispatch.add_dispatch_support |
| def central_crop(image, central_fraction): |
| """Crop the central region of the image(s). |
| |
| Remove the outer parts of an image but retain the central region of the image |
| along each dimension. If we specify central_fraction = 0.5, this function |
| returns the region marked with "X" in the below diagram. |
| |
| -------- |
| | | |
| | XXXX | |
| | XXXX | |
| | | where "X" is the central 50% of the image. |
| -------- |
| |
| This function works on either a single image (`image` is a 3-D Tensor), or a |
| batch of images (`image` is a 4-D Tensor). |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0], |
| ... [7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]], |
| ... [[13.0, 14.0, 15.0], |
| ... [16.0, 17.0, 18.0], |
| ... [19.0, 20.0, 21.0], |
| ... [22.0, 23.0, 24.0]], |
| ... [[25.0, 26.0, 27.0], |
| ... [28.0, 29.0, 30.0], |
| ... [31.0, 32.0, 33.0], |
| ... [34.0, 35.0, 36.0]], |
| ... [[37.0, 38.0, 39.0], |
| ... [40.0, 41.0, 42.0], |
| ... [43.0, 44.0, 45.0], |
| ... [46.0, 47.0, 48.0]]] |
| >>> tf.image.central_crop(x, 0.5) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[16., 17., 18.], |
| [19., 20., 21.]], |
| [[28., 29., 30.], |
| [31., 32., 33.]]], dtype=float32)> |
| |
| Args: |
| image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D |
| Tensor of shape [batch_size, height, width, depth]. |
| central_fraction: float (0, 1], fraction of size to crop |
| |
| Raises: |
| ValueError: if central_crop_fraction is not within (0, 1]. |
| |
| Returns: |
| 3-D / 4-D float Tensor, as per the input. |
| """ |
| with ops.name_scope(None, 'central_crop', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| if central_fraction <= 0.0 or central_fraction > 1.0: |
| raise ValueError('central_fraction must be within (0, 1]') |
| if central_fraction == 1.0: |
| return image |
| |
| _AssertAtLeast3DImage(image) |
| rank = image.get_shape().ndims |
| if rank != 3 and rank != 4: |
| raise ValueError('`image` should either be a Tensor with rank = 3 or ' |
| 'rank = 4. Had rank = {}.'.format(rank)) |
| |
| # Helper method to return the `idx`-th dimension of `tensor`, along with |
| # a boolean signifying if the dimension is dynamic. |
| def _get_dim(tensor, idx): |
| static_shape = tensor.get_shape().dims[idx].value |
| if static_shape is not None: |
| return static_shape, False |
| return array_ops.shape(tensor)[idx], True |
| |
| # Get the height, width, depth (and batch size, if the image is a 4-D |
| # tensor). |
| if rank == 3: |
| img_h, dynamic_h = _get_dim(image, 0) |
| img_w, dynamic_w = _get_dim(image, 1) |
| img_d = image.get_shape()[2] |
| else: |
| img_bs = image.get_shape()[0] |
| img_h, dynamic_h = _get_dim(image, 1) |
| img_w, dynamic_w = _get_dim(image, 2) |
| img_d = image.get_shape()[3] |
| |
| # Compute the bounding boxes for the crop. The type and value of the |
| # bounding boxes depend on the `image` tensor's rank and whether / not the |
| # dimensions are statically defined. |
| if dynamic_h: |
| img_hd = math_ops.cast(img_h, dtypes.float64) |
| bbox_h_start = math_ops.cast((img_hd - img_hd * central_fraction) / 2, |
| dtypes.int32) |
| else: |
| img_hd = float(img_h) |
| bbox_h_start = int((img_hd - img_hd * central_fraction) / 2) |
| |
| if dynamic_w: |
| img_wd = math_ops.cast(img_w, dtypes.float64) |
| bbox_w_start = math_ops.cast((img_wd - img_wd * central_fraction) / 2, |
| dtypes.int32) |
| else: |
| img_wd = float(img_w) |
| bbox_w_start = int((img_wd - img_wd * central_fraction) / 2) |
| |
| bbox_h_size = img_h - bbox_h_start * 2 |
| bbox_w_size = img_w - bbox_w_start * 2 |
| |
| if rank == 3: |
| bbox_begin = array_ops.stack([bbox_h_start, bbox_w_start, 0]) |
| bbox_size = array_ops.stack([bbox_h_size, bbox_w_size, -1]) |
| else: |
| bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0]) |
| bbox_size = array_ops.stack([-1, bbox_h_size, bbox_w_size, -1]) |
| |
| image = array_ops.slice(image, bbox_begin, bbox_size) |
| |
| # Reshape the `image` tensor to the desired size. |
| if rank == 3: |
| image.set_shape([ |
| None if dynamic_h else bbox_h_size, |
| None if dynamic_w else bbox_w_size, img_d |
| ]) |
| else: |
| image.set_shape([ |
| img_bs, None if dynamic_h else bbox_h_size, |
| None if dynamic_w else bbox_w_size, img_d |
| ]) |
| return image |
| |
| |
| @tf_export('image.pad_to_bounding_box') |
| @dispatch.add_dispatch_support |
| def pad_to_bounding_box(image, offset_height, offset_width, target_height, |
| target_width): |
| """Pad `image` with zeros to the specified `height` and `width`. |
| |
| Adds `offset_height` rows of zeros on top, `offset_width` columns of |
| zeros on the left, and then pads the image on the bottom and right |
| with zeros until it has dimensions `target_height`, `target_width`. |
| |
| This op does nothing if `offset_*` is zero and the image already has size |
| `target_height` by `target_width`. |
| |
| Usage Example: |
| |
| >>> x = [[[1., 2., 3.], |
| ... [4., 5., 6.]], |
| ... [[7., 8., 9.], |
| ... [10., 11., 12.]]] |
| >>> padded_image = tf.image.pad_to_bounding_box(x, 1, 1, 4, 4) |
| >>> padded_image |
| <tf.Tensor: shape=(4, 4, 3), dtype=float32, numpy= |
| array([[[ 0., 0., 0.], |
| [ 0., 0., 0.], |
| [ 0., 0., 0.], |
| [ 0., 0., 0.]], |
| [[ 0., 0., 0.], |
| [ 1., 2., 3.], |
| [ 4., 5., 6.], |
| [ 0., 0., 0.]], |
| [[ 0., 0., 0.], |
| [ 7., 8., 9.], |
| [10., 11., 12.], |
| [ 0., 0., 0.]], |
| [[ 0., 0., 0.], |
| [ 0., 0., 0.], |
| [ 0., 0., 0.], |
| [ 0., 0., 0.]]], dtype=float32)> |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| offset_height: Number of rows of zeros to add on top. |
| offset_width: Number of columns of zeros to add on the left. |
| target_height: Height of output image. |
| target_width: Width of output image. |
| |
| Returns: |
| If `image` was 4-D, a 4-D float Tensor of shape |
| `[batch, target_height, target_width, channels]` |
| If `image` was 3-D, a 3-D float Tensor of shape |
| `[target_height, target_width, channels]` |
| |
| Raises: |
| ValueError: If the shape of `image` is incompatible with the `offset_*` or |
| `target_*` arguments, or either `offset_height` or `offset_width` is |
| negative. |
| """ |
| with ops.name_scope(None, 'pad_to_bounding_box', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| |
| is_batch = True |
| image_shape = image.get_shape() |
| if image_shape.ndims == 3: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| elif image_shape.ndims is None: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| image.set_shape([None] * 4) |
| elif image_shape.ndims != 4: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % |
| image_shape) |
| |
| assert_ops = _CheckAtLeast3DImage(image, require_static=False) |
| batch, height, width, depth = _ImageDimensions(image, rank=4) |
| |
| after_padding_width = target_width - offset_width - width |
| |
| after_padding_height = target_height - offset_height - height |
| |
| assert_ops += _assert(offset_height >= 0, ValueError, |
| 'offset_height must be >= 0') |
| assert_ops += _assert(offset_width >= 0, ValueError, |
| 'offset_width must be >= 0') |
| assert_ops += _assert(after_padding_width >= 0, ValueError, |
| 'width must be <= target - offset') |
| assert_ops += _assert(after_padding_height >= 0, ValueError, |
| 'height must be <= target - offset') |
| image = control_flow_ops.with_dependencies(assert_ops, image) |
| |
| # Do not pad on the depth dimensions. |
| paddings = array_ops.reshape( |
| array_ops.stack([ |
| 0, 0, offset_height, after_padding_height, offset_width, |
| after_padding_width, 0, 0 |
| ]), [4, 2]) |
| padded = array_ops.pad(image, paddings) |
| |
| padded_shape = [ |
| None if _is_tensor(i) else i |
| for i in [batch, target_height, target_width, depth] |
| ] |
| padded.set_shape(padded_shape) |
| |
| if not is_batch: |
| padded = array_ops.squeeze(padded, axis=[0]) |
| |
| return padded |
| |
| |
| @tf_export('image.crop_to_bounding_box') |
| @dispatch.add_dispatch_support |
| def crop_to_bounding_box(image, offset_height, offset_width, target_height, |
| target_width): |
| """Crops an image to a specified bounding box. |
| |
| This op cuts a rectangular part out of `image`. The top-left corner of the |
| returned image is at `offset_height, offset_width` in `image`, and its |
| lower-right corner is at |
| `offset_height + target_height, offset_width + target_width`. |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| offset_height: Vertical coordinate of the top-left corner of the result in |
| the input. |
| offset_width: Horizontal coordinate of the top-left corner of the result in |
| the input. |
| target_height: Height of the result. |
| target_width: Width of the result. |
| |
| Returns: |
| If `image` was 4-D, a 4-D float Tensor of shape |
| `[batch, target_height, target_width, channels]` |
| If `image` was 3-D, a 3-D float Tensor of shape |
| `[target_height, target_width, channels]` |
| |
| Raises: |
| ValueError: If the shape of `image` is incompatible with the `offset_*` or |
| `target_*` arguments, or either `offset_height` or `offset_width` is |
| negative, or either `target_height` or `target_width` is not positive. |
| """ |
| with ops.name_scope(None, 'crop_to_bounding_box', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| |
| is_batch = True |
| image_shape = image.get_shape() |
| if image_shape.ndims == 3: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| elif image_shape.ndims is None: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| image.set_shape([None] * 4) |
| elif image_shape.ndims != 4: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % |
| image_shape) |
| |
| assert_ops = _CheckAtLeast3DImage(image, require_static=False) |
| |
| batch, height, width, depth = _ImageDimensions(image, rank=4) |
| |
| assert_ops += _assert(offset_width >= 0, ValueError, |
| 'offset_width must be >= 0.') |
| assert_ops += _assert(offset_height >= 0, ValueError, |
| 'offset_height must be >= 0.') |
| assert_ops += _assert(target_width > 0, ValueError, |
| 'target_width must be > 0.') |
| assert_ops += _assert(target_height > 0, ValueError, |
| 'target_height must be > 0.') |
| assert_ops += _assert(width >= (target_width + offset_width), ValueError, |
| 'width must be >= target + offset.') |
| assert_ops += _assert(height >= (target_height + offset_height), ValueError, |
| 'height must be >= target + offset.') |
| image = control_flow_ops.with_dependencies(assert_ops, image) |
| |
| cropped = array_ops.slice( |
| image, array_ops.stack([0, offset_height, offset_width, 0]), |
| array_ops.stack([-1, target_height, target_width, -1])) |
| |
| cropped_shape = [ |
| None if _is_tensor(i) else i |
| for i in [batch, target_height, target_width, depth] |
| ] |
| cropped.set_shape(cropped_shape) |
| |
| if not is_batch: |
| cropped = array_ops.squeeze(cropped, axis=[0]) |
| |
| return cropped |
| |
| |
| @tf_export( |
| 'image.resize_with_crop_or_pad', |
| v1=['image.resize_with_crop_or_pad', 'image.resize_image_with_crop_or_pad']) |
| @dispatch.add_dispatch_support |
| def resize_image_with_crop_or_pad(image, target_height, target_width): |
| """Crops and/or pads an image to a target width and height. |
| |
| Resizes an image to a target width and height by either centrally |
| cropping the image or padding it evenly with zeros. |
| |
| If `width` or `height` is greater than the specified `target_width` or |
| `target_height` respectively, this op centrally crops along that dimension. |
| If `width` or `height` is smaller than the specified `target_width` or |
| `target_height` respectively, this op centrally pads with 0 along that |
| dimension. |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| target_height: Target height. |
| target_width: Target width. |
| |
| Raises: |
| ValueError: if `target_height` or `target_width` are zero or negative. |
| |
| Returns: |
| Cropped and/or padded image. |
| If `images` was 4-D, a 4-D float Tensor of shape |
| `[batch, new_height, new_width, channels]`. |
| If `images` was 3-D, a 3-D float Tensor of shape |
| `[new_height, new_width, channels]`. |
| """ |
| with ops.name_scope(None, 'resize_image_with_crop_or_pad', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| image_shape = image.get_shape() |
| is_batch = True |
| if image_shape.ndims == 3: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| elif image_shape.ndims is None: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| image.set_shape([None] * 4) |
| elif image_shape.ndims != 4: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % |
| image_shape) |
| |
| assert_ops = _CheckAtLeast3DImage(image, require_static=False) |
| assert_ops += _assert(target_width > 0, ValueError, |
| 'target_width must be > 0.') |
| assert_ops += _assert(target_height > 0, ValueError, |
| 'target_height must be > 0.') |
| |
| image = control_flow_ops.with_dependencies(assert_ops, image) |
| # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks. |
| # Make sure our checks come first, so that error messages are clearer. |
| if _is_tensor(target_height): |
| target_height = control_flow_ops.with_dependencies( |
| assert_ops, target_height) |
| if _is_tensor(target_width): |
| target_width = control_flow_ops.with_dependencies(assert_ops, |
| target_width) |
| |
| def max_(x, y): |
| if _is_tensor(x) or _is_tensor(y): |
| return math_ops.maximum(x, y) |
| else: |
| return max(x, y) |
| |
| def min_(x, y): |
| if _is_tensor(x) or _is_tensor(y): |
| return math_ops.minimum(x, y) |
| else: |
| return min(x, y) |
| |
| def equal_(x, y): |
| if _is_tensor(x) or _is_tensor(y): |
| return math_ops.equal(x, y) |
| else: |
| return x == y |
| |
| _, height, width, _ = _ImageDimensions(image, rank=4) |
| width_diff = target_width - width |
| offset_crop_width = max_(-width_diff // 2, 0) |
| offset_pad_width = max_(width_diff // 2, 0) |
| |
| height_diff = target_height - height |
| offset_crop_height = max_(-height_diff // 2, 0) |
| offset_pad_height = max_(height_diff // 2, 0) |
| |
| # Maybe crop if needed. |
| cropped = crop_to_bounding_box(image, offset_crop_height, offset_crop_width, |
| min_(target_height, height), |
| min_(target_width, width)) |
| |
| # Maybe pad if needed. |
| resized = pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width, |
| target_height, target_width) |
| |
| # In theory all the checks below are redundant. |
| if resized.get_shape().ndims is None: |
| raise ValueError('resized contains no shape.') |
| |
| _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4) |
| |
| assert_ops = [] |
| assert_ops += _assert( |
| equal_(resized_height, target_height), ValueError, |
| 'resized height is not correct.') |
| assert_ops += _assert( |
| equal_(resized_width, target_width), ValueError, |
| 'resized width is not correct.') |
| |
| resized = control_flow_ops.with_dependencies(assert_ops, resized) |
| |
| if not is_batch: |
| resized = array_ops.squeeze(resized, axis=[0]) |
| |
| return resized |
| |
| |
| @tf_export(v1=['image.ResizeMethod']) |
| class ResizeMethodV1(object): |
| """See `v1.image.resize` for details.""" |
| BILINEAR = 0 |
| NEAREST_NEIGHBOR = 1 |
| BICUBIC = 2 |
| AREA = 3 |
| |
| |
| @tf_export('image.ResizeMethod', v1=[]) |
| class ResizeMethod(object): |
| """See `tf.image.resize` for details.""" |
| BILINEAR = 'bilinear' |
| NEAREST_NEIGHBOR = 'nearest' |
| BICUBIC = 'bicubic' |
| AREA = 'area' |
| LANCZOS3 = 'lanczos3' |
| LANCZOS5 = 'lanczos5' |
| GAUSSIAN = 'gaussian' |
| MITCHELLCUBIC = 'mitchellcubic' |
| |
| |
| def _resize_images_common(images, resizer_fn, size, preserve_aspect_ratio, name, |
| skip_resize_if_same): |
| """Core functionality for v1 and v2 resize functions.""" |
| with ops.name_scope(name, 'resize', [images, size]): |
| images = ops.convert_to_tensor(images, name='images') |
| if images.get_shape().ndims is None: |
| raise ValueError('\'images\' contains no shape.') |
| # TODO(shlens): Migrate this functionality to the underlying Op's. |
| is_batch = True |
| if images.get_shape().ndims == 3: |
| is_batch = False |
| images = array_ops.expand_dims(images, 0) |
| elif images.get_shape().ndims != 4: |
| raise ValueError('\'images\' must have either 3 or 4 dimensions.') |
| |
| _, height, width, _ = images.get_shape().as_list() |
| |
| try: |
| size = ops.convert_to_tensor(size, dtypes.int32, name='size') |
| except (TypeError, ValueError): |
| raise ValueError('\'size\' must be a 1-D int32 Tensor') |
| if not size.get_shape().is_compatible_with([2]): |
| raise ValueError('\'size\' must be a 1-D Tensor of 2 elements: ' |
| 'new_height, new_width') |
| |
| if preserve_aspect_ratio: |
| # Get the current shapes of the image, even if dynamic. |
| _, current_height, current_width, _ = _ImageDimensions(images, rank=4) |
| |
| # do the computation to find the right scale and height/width. |
| scale_factor_height = ( |
| math_ops.cast(size[0], dtypes.float32) / |
| math_ops.cast(current_height, dtypes.float32)) |
| scale_factor_width = ( |
| math_ops.cast(size[1], dtypes.float32) / |
| math_ops.cast(current_width, dtypes.float32)) |
| scale_factor = math_ops.minimum(scale_factor_height, scale_factor_width) |
| scaled_height_const = math_ops.cast( |
| math_ops.round(scale_factor * |
| math_ops.cast(current_height, dtypes.float32)), |
| dtypes.int32) |
| scaled_width_const = math_ops.cast( |
| math_ops.round(scale_factor * |
| math_ops.cast(current_width, dtypes.float32)), |
| dtypes.int32) |
| |
| # NOTE: Reset the size and other constants used later. |
| size = ops.convert_to_tensor([scaled_height_const, scaled_width_const], |
| dtypes.int32, |
| name='size') |
| |
| size_const_as_shape = tensor_util.constant_value_as_shape(size) |
| new_height_const = tensor_shape.dimension_at_index(size_const_as_shape, |
| 0).value |
| new_width_const = tensor_shape.dimension_at_index(size_const_as_shape, |
| 1).value |
| |
| # If we can determine that the height and width will be unmodified by this |
| # transformation, we avoid performing the resize. |
| if skip_resize_if_same and all( |
| x is not None |
| for x in [new_width_const, width, new_height_const, height]) and ( |
| width == new_width_const and height == new_height_const): |
| if not is_batch: |
| images = array_ops.squeeze(images, axis=[0]) |
| return images |
| |
| images = resizer_fn(images, size) |
| |
| # NOTE(mrry): The shape functions for the resize ops cannot unpack |
| # the packed values in `new_size`, so set the shape here. |
| images.set_shape([None, new_height_const, new_width_const, None]) |
| |
| if not is_batch: |
| images = array_ops.squeeze(images, axis=[0]) |
| return images |
| |
| |
| @tf_export(v1=['image.resize_images', 'image.resize']) |
| @dispatch.add_dispatch_support |
| def resize_images(images, |
| size, |
| method=ResizeMethodV1.BILINEAR, |
| align_corners=False, |
| preserve_aspect_ratio=False, |
| name=None): |
| """Resize `images` to `size` using the specified `method`. |
| |
| Resized images will be distorted if their original aspect ratio is not |
| the same as `size`. To avoid distortions see |
| `tf.image.resize_with_pad` or `tf.image.resize_with_crop_or_pad`. |
| |
| The `method` can be one of: |
| |
| * <b>`tf.image.ResizeMethod.BILINEAR`</b>: [Bilinear interpolation.]( |
| https://en.wikipedia.org/wiki/Bilinear_interpolation) |
| * <b>`tf.image.ResizeMethod.NEAREST_NEIGHBOR`</b>: [ |
| Nearest neighbor interpolation.]( |
| https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) |
| * <b>`tf.image.ResizeMethod.BICUBIC`</b>: [Bicubic interpolation.]( |
| https://en.wikipedia.org/wiki/Bicubic_interpolation) |
| * <b>`tf.image.ResizeMethod.AREA`</b>: Area interpolation. |
| |
| The return value has the same type as `images` if `method` is |
| `tf.image.ResizeMethod.NEAREST_NEIGHBOR`. It will also have the same type |
| as `images` if the size of `images` can be statically determined to be the |
| same as `size`, because `images` is returned in this case. Otherwise, the |
| return value has type `float32`. |
| |
| Args: |
| images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new |
| size for the images. |
| method: ResizeMethod. Defaults to `tf.image.ResizeMethod.BILINEAR`. |
| align_corners: bool. If True, the centers of the 4 corner pixels of the |
| input and output tensors are aligned, preserving the values at the corner |
| pixels. Defaults to `False`. |
| preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, |
| then `images` will be resized to a size that fits in `size` while |
| preserving the aspect ratio of the original image. Scales up the image if |
| `size` is bigger than the current size of the `image`. Defaults to False. |
| name: A name for this operation (optional). |
| |
| Raises: |
| ValueError: if the shape of `images` is incompatible with the |
| shape arguments to this function |
| ValueError: if `size` has invalid shape or type. |
| ValueError: if an unsupported resize method is specified. |
| |
| Returns: |
| If `images` was 4-D, a 4-D float Tensor of shape |
| `[batch, new_height, new_width, channels]`. |
| If `images` was 3-D, a 3-D float Tensor of shape |
| `[new_height, new_width, channels]`. |
| """ |
| |
| def resize_fn(images_t, new_size): |
| """Legacy resize core function, passed to _resize_images_common.""" |
| if method == ResizeMethodV1.BILINEAR or method == ResizeMethod.BILINEAR: |
| return gen_image_ops.resize_bilinear( |
| images_t, new_size, align_corners=align_corners) |
| elif (method == ResizeMethodV1.NEAREST_NEIGHBOR or |
| method == ResizeMethod.NEAREST_NEIGHBOR): |
| return gen_image_ops.resize_nearest_neighbor( |
| images_t, new_size, align_corners=align_corners) |
| elif method == ResizeMethodV1.BICUBIC or method == ResizeMethod.BICUBIC: |
| return gen_image_ops.resize_bicubic( |
| images_t, new_size, align_corners=align_corners) |
| elif method == ResizeMethodV1.AREA or method == ResizeMethod.AREA: |
| return gen_image_ops.resize_area( |
| images_t, new_size, align_corners=align_corners) |
| else: |
| raise ValueError('Resize method is not implemented: {}'.format(method)) |
| |
| return _resize_images_common( |
| images, |
| resize_fn, |
| size, |
| preserve_aspect_ratio=preserve_aspect_ratio, |
| name=name, |
| skip_resize_if_same=True) |
| |
| |
| @tf_export('image.resize', v1=[]) |
| @dispatch.add_dispatch_support |
| def resize_images_v2(images, |
| size, |
| method=ResizeMethod.BILINEAR, |
| preserve_aspect_ratio=False, |
| antialias=False, |
| name=None): |
| """Resize `images` to `size` using the specified `method`. |
| |
| Resized images will be distorted if their original aspect ratio is not |
| the same as `size`. To avoid distortions see |
| `tf.image.resize_with_pad`. |
| |
| >>> image = tf.constant([ |
| ... [1,0,0,0,0], |
| ... [0,1,0,0,0], |
| ... [0,0,1,0,0], |
| ... [0,0,0,1,0], |
| ... [0,0,0,0,1], |
| ... ]) |
| >>> # Add "batch" and "channels" dimensions |
| >>> image = image[tf.newaxis, ..., tf.newaxis] |
| >>> image.shape.as_list() # [batch, height, width, channels] |
| [1, 5, 5, 1] |
| >>> tf.image.resize(image, [3,5])[0,...,0].numpy() |
| array([[0.6666667, 0.3333333, 0. , 0. , 0. ], |
| [0. , 0. , 1. , 0. , 0. ], |
| [0. , 0. , 0. , 0.3333335, 0.6666665]], |
| dtype=float32) |
| |
| It works equally well with a single image instead of a batch of images: |
| |
| >>> tf.image.resize(image[0], [3,5]).shape.as_list() |
| [3, 5, 1] |
| |
| When `antialias` is true, the sampling filter will anti-alias the input image |
| as well as interpolate. When downsampling an image with [anti-aliasing]( |
| https://en.wikipedia.org/wiki/Spatial_anti-aliasing) the sampling filter |
| kernel is scaled in order to properly anti-alias the input image signal. |
| `antialias` has no effect when upsampling an image: |
| |
| >>> a = tf.image.resize(image, [5,10]) |
| >>> b = tf.image.resize(image, [5,10], antialias=True) |
| >>> tf.reduce_max(abs(a - b)).numpy() |
| 0.0 |
| |
| The `method` argument expects an item from the `image.ResizeMethod` enum, or |
| the string equivalent. The options are: |
| |
| * <b>`bilinear`</b>: [Bilinear interpolation.]( |
| https://en.wikipedia.org/wiki/Bilinear_interpolation) If `antialias` is |
| true, becomes a hat/tent filter function with radius 1 when downsampling. |
| * <b>`lanczos3`</b>: [Lanczos kernel]( |
| https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 3. |
| High-quality practical filter but may have some ringing, especially on |
| synthetic images. |
| * <b>`lanczos5`</b>: [Lanczos kernel] ( |
| https://en.wikipedia.org/wiki/Lanczos_resampling) with radius 5. |
| Very-high-quality filter but may have stronger ringing. |
| * <b>`bicubic`</b>: [Cubic interpolant]( |
| https://en.wikipedia.org/wiki/Bicubic_interpolation) of Keys. Equivalent to |
| Catmull-Rom kernel. Reasonably good quality and faster than Lanczos3Kernel, |
| particularly when upsampling. |
| * <b>`gaussian`</b>: [Gaussian kernel]( |
| https://en.wikipedia.org/wiki/Gaussian_filter) with radius 3, |
| sigma = 1.5 / 3.0. |
| * <b>`nearest`</b>: [Nearest neighbor interpolation.]( |
| https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation) |
| `antialias` has no effect when used with nearest neighbor interpolation. |
| * <b>`area`</b>: Anti-aliased resampling with area interpolation. |
| `antialias` has no effect when used with area interpolation; it |
| always anti-aliases. |
| * <b>`mitchellcubic`</b>: Mitchell-Netravali Cubic non-interpolating filter. |
| For synthetic images (especially those lacking proper prefiltering), less |
| ringing than Keys cubic kernel but less sharp. |
| |
| Note: Near image edges the filtering kernel may be partially outside the |
| image boundaries. For these pixels, only input pixels inside the image will be |
| included in the filter sum, and the output value will be appropriately |
| normalized. |
| |
| The return value has type `float32`, unless the `method` is |
| `ResizeMethod.NEAREST_NEIGHBOR`, then the return dtype is the dtype |
| of `images`: |
| |
| >>> nn = tf.image.resize(image, [5,7], method='nearest') |
| >>> nn[0,...,0].numpy() |
| array([[1, 0, 0, 0, 0, 0, 0], |
| [0, 1, 1, 0, 0, 0, 0], |
| [0, 0, 0, 1, 0, 0, 0], |
| [0, 0, 0, 0, 1, 1, 0], |
| [0, 0, 0, 0, 0, 0, 1]], dtype=int32) |
| |
| With `preserve_aspect_ratio=True`, the aspect ratio is preserved, so `size` |
| is the maximum for each dimension: |
| |
| >>> max_10_20 = tf.image.resize(image, [10,20], preserve_aspect_ratio=True) |
| >>> max_10_20.shape.as_list() |
| [1, 10, 10, 1] |
| |
| Args: |
| images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`. The new |
| size for the images. |
| method: An `image.ResizeMethod`, or string equivalent. Defaults to |
| `bilinear`. |
| preserve_aspect_ratio: Whether to preserve the aspect ratio. If this is set, |
| then `images` will be resized to a size that fits in `size` while |
| preserving the aspect ratio of the original image. Scales up the image if |
| `size` is bigger than the current size of the `image`. Defaults to False. |
| antialias: Whether to use an anti-aliasing filter when downsampling an |
| image. |
| name: A name for this operation (optional). |
| |
| Raises: |
| ValueError: if the shape of `images` is incompatible with the |
| shape arguments to this function |
| ValueError: if `size` has an invalid shape or type. |
| ValueError: if an unsupported resize method is specified. |
| |
| Returns: |
| If `images` was 4-D, a 4-D float Tensor of shape |
| `[batch, new_height, new_width, channels]`. |
| If `images` was 3-D, a 3-D float Tensor of shape |
| `[new_height, new_width, channels]`. |
| """ |
| |
| def resize_fn(images_t, new_size): |
| """Resize core function, passed to _resize_images_common.""" |
| scale_and_translate_methods = [ |
| ResizeMethod.LANCZOS3, ResizeMethod.LANCZOS5, ResizeMethod.GAUSSIAN, |
| ResizeMethod.MITCHELLCUBIC |
| ] |
| |
| def resize_with_scale_and_translate(method): |
| scale = ( |
| math_ops.cast(new_size, dtype=dtypes.float32) / |
| math_ops.cast(array_ops.shape(images_t)[1:3], dtype=dtypes.float32)) |
| return gen_image_ops.scale_and_translate( |
| images_t, |
| new_size, |
| scale, |
| array_ops.zeros([2]), |
| kernel_type=method, |
| antialias=antialias) |
| |
| if method == ResizeMethod.BILINEAR: |
| if antialias: |
| return resize_with_scale_and_translate('triangle') |
| else: |
| return gen_image_ops.resize_bilinear( |
| images_t, new_size, half_pixel_centers=True) |
| elif method == ResizeMethod.NEAREST_NEIGHBOR: |
| return gen_image_ops.resize_nearest_neighbor( |
| images_t, new_size, half_pixel_centers=True) |
| elif method == ResizeMethod.BICUBIC: |
| if antialias: |
| return resize_with_scale_and_translate('keyscubic') |
| else: |
| return gen_image_ops.resize_bicubic( |
| images_t, new_size, half_pixel_centers=True) |
| elif method == ResizeMethod.AREA: |
| return gen_image_ops.resize_area(images_t, new_size) |
| elif method in scale_and_translate_methods: |
| return resize_with_scale_and_translate(method) |
| else: |
| raise ValueError('Resize method is not implemented: {}'.format(method)) |
| |
| return _resize_images_common( |
| images, |
| resize_fn, |
| size, |
| preserve_aspect_ratio=preserve_aspect_ratio, |
| name=name, |
| skip_resize_if_same=False) |
| |
| |
| def _resize_image_with_pad_common(image, target_height, target_width, |
| resize_fn): |
| """Core functionality for v1 and v2 resize_image_with_pad functions.""" |
| with ops.name_scope(None, 'resize_image_with_pad', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| image_shape = image.get_shape() |
| is_batch = True |
| if image_shape.ndims == 3: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| elif image_shape.ndims is None: |
| is_batch = False |
| image = array_ops.expand_dims(image, 0) |
| image.set_shape([None] * 4) |
| elif image_shape.ndims != 4: |
| raise ValueError( |
| '\'image\' (shape %s) must have either 3 or 4 dimensions.' % |
| image_shape) |
| |
| assert_ops = _CheckAtLeast3DImage(image, require_static=False) |
| assert_ops += _assert(target_width > 0, ValueError, |
| 'target_width must be > 0.') |
| assert_ops += _assert(target_height > 0, ValueError, |
| 'target_height must be > 0.') |
| |
| image = control_flow_ops.with_dependencies(assert_ops, image) |
| |
| def max_(x, y): |
| if _is_tensor(x) or _is_tensor(y): |
| return math_ops.maximum(x, y) |
| else: |
| return max(x, y) |
| |
| _, height, width, _ = _ImageDimensions(image, rank=4) |
| |
| # convert values to float, to ease divisions |
| f_height = math_ops.cast(height, dtype=dtypes.float32) |
| f_width = math_ops.cast(width, dtype=dtypes.float32) |
| f_target_height = math_ops.cast(target_height, dtype=dtypes.float32) |
| f_target_width = math_ops.cast(target_width, dtype=dtypes.float32) |
| |
| # Find the ratio by which the image must be adjusted |
| # to fit within the target |
| ratio = max_(f_width / f_target_width, f_height / f_target_height) |
| resized_height_float = f_height / ratio |
| resized_width_float = f_width / ratio |
| resized_height = math_ops.cast( |
| math_ops.floor(resized_height_float), dtype=dtypes.int32) |
| resized_width = math_ops.cast( |
| math_ops.floor(resized_width_float), dtype=dtypes.int32) |
| |
| padding_height = (f_target_height - resized_height_float) / 2 |
| padding_width = (f_target_width - resized_width_float) / 2 |
| f_padding_height = math_ops.floor(padding_height) |
| f_padding_width = math_ops.floor(padding_width) |
| p_height = max_(0, math_ops.cast(f_padding_height, dtype=dtypes.int32)) |
| p_width = max_(0, math_ops.cast(f_padding_width, dtype=dtypes.int32)) |
| |
| # Resize first, then pad to meet requested dimensions |
| resized = resize_fn(image, [resized_height, resized_width]) |
| |
| padded = pad_to_bounding_box(resized, p_height, p_width, target_height, |
| target_width) |
| |
| if padded.get_shape().ndims is None: |
| raise ValueError('padded contains no shape.') |
| |
| _ImageDimensions(padded, rank=4) |
| |
| if not is_batch: |
| padded = array_ops.squeeze(padded, axis=[0]) |
| |
| return padded |
| |
| |
| @tf_export(v1=['image.resize_image_with_pad']) |
| @dispatch.add_dispatch_support |
| def resize_image_with_pad_v1(image, |
| target_height, |
| target_width, |
| method=ResizeMethodV1.BILINEAR, |
| align_corners=False): |
| """Resizes and pads an image to a target width and height. |
| |
| Resizes an image to a target width and height by keeping |
| the aspect ratio the same without distortion. If the target |
| dimensions don't match the image dimensions, the image |
| is resized and then padded with zeroes to match requested |
| dimensions. |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| target_height: Target height. |
| target_width: Target width. |
| method: Method to use for resizing image. See `resize_images()` |
| align_corners: bool. If True, the centers of the 4 corner pixels of the |
| input and output tensors are aligned, preserving the values at the corner |
| pixels. Defaults to `False`. |
| |
| Raises: |
| ValueError: if `target_height` or `target_width` are zero or negative. |
| |
| Returns: |
| Resized and padded image. |
| If `images` was 4-D, a 4-D float Tensor of shape |
| `[batch, new_height, new_width, channels]`. |
| If `images` was 3-D, a 3-D float Tensor of shape |
| `[new_height, new_width, channels]`. |
| """ |
| |
| def _resize_fn(im, new_size): |
| return resize_images(im, new_size, method, align_corners=align_corners) |
| |
| return _resize_image_with_pad_common(image, target_height, target_width, |
| _resize_fn) |
| |
| |
| @tf_export('image.resize_with_pad', v1=[]) |
| @dispatch.add_dispatch_support |
| def resize_image_with_pad_v2(image, |
| target_height, |
| target_width, |
| method=ResizeMethod.BILINEAR, |
| antialias=False): |
| """Resizes and pads an image to a target width and height. |
| |
| Resizes an image to a target width and height by keeping |
| the aspect ratio the same without distortion. If the target |
| dimensions don't match the image dimensions, the image |
| is resized and then padded with zeroes to match requested |
| dimensions. |
| |
| Args: |
| image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| target_height: Target height. |
| target_width: Target width. |
| method: Method to use for resizing image. See `image.resize()` |
| antialias: Whether to use anti-aliasing when resizing. See 'image.resize()'. |
| |
| Raises: |
| ValueError: if `target_height` or `target_width` are zero or negative. |
| |
| Returns: |
| Resized and padded image. |
| If `images` was 4-D, a 4-D float Tensor of shape |
| `[batch, new_height, new_width, channels]`. |
| If `images` was 3-D, a 3-D float Tensor of shape |
| `[new_height, new_width, channels]`. |
| """ |
| |
| def _resize_fn(im, new_size): |
| return resize_images_v2(im, new_size, method, antialias=antialias) |
| |
| return _resize_image_with_pad_common(image, target_height, target_width, |
| _resize_fn) |
| |
| |
| @tf_export('image.per_image_standardization') |
| @dispatch.add_dispatch_support |
| def per_image_standardization(image): |
| """Linearly scales each image in `image` to have mean 0 and variance 1. |
| |
| For each 3-D image `x` in `image`, computes `(x - mean) / adjusted_stddev`, |
| where |
| |
| - `mean` is the average of all values in `x` |
| - `adjusted_stddev = max(stddev, 1.0/sqrt(N))` is capped away from 0 to |
| protect against division by 0 when handling uniform images |
| - `N` is the number of elements in `x` |
| - `stddev` is the standard deviation of all values in `x` |
| |
| Args: |
| image: An n-D Tensor with at least 3 dimensions, the last 3 of which are the |
| dimensions of each image. |
| |
| Returns: |
| A `Tensor` with the same shape and dtype as `image`. |
| |
| Raises: |
| ValueError: if the shape of 'image' is incompatible with this function. |
| """ |
| with ops.name_scope(None, 'per_image_standardization', [image]) as scope: |
| image = ops.convert_to_tensor(image, name='image') |
| image = _AssertAtLeast3DImage(image) |
| |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = image.dtype |
| if orig_dtype not in [dtypes.float16, dtypes.float32]: |
| image = convert_image_dtype(image, dtypes.float32) |
| |
| num_pixels = math_ops.reduce_prod(array_ops.shape(image)[-3:]) |
| image_mean = math_ops.reduce_mean(image, axis=[-1, -2, -3], keepdims=True) |
| |
| # Apply a minimum normalization that protects us against uniform images. |
| stddev = math_ops.reduce_std(image, axis=[-1, -2, -3], keepdims=True) |
| min_stddev = math_ops.rsqrt(math_ops.cast(num_pixels, image.dtype)) |
| adjusted_stddev = math_ops.maximum(stddev, min_stddev) |
| |
| image -= image_mean |
| image = math_ops.divide(image, adjusted_stddev, name=scope) |
| return convert_image_dtype(image, orig_dtype, saturate=True) |
| |
| |
| @tf_export('image.random_brightness') |
| @dispatch.add_dispatch_support |
| def random_brightness(image, max_delta, seed=None): |
| """Adjust the brightness of images by a random factor. |
| |
| Equivalent to `adjust_brightness()` using a `delta` randomly picked in the |
| interval `[-max_delta, max_delta)`. |
| |
| Args: |
| image: An image or images to adjust. |
| max_delta: float, must be non-negative. |
| seed: A Python integer. Used to create a random seed. See |
| `tf.compat.v1.set_random_seed` for behavior. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.random_brightness(x, 0.2) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> |
| |
| Returns: |
| The brightness-adjusted image(s). |
| |
| Raises: |
| ValueError: if `max_delta` is negative. |
| """ |
| if max_delta < 0: |
| raise ValueError('max_delta must be non-negative.') |
| |
| delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) |
| return adjust_brightness(image, delta) |
| |
| |
| @tf_export('image.stateless_random_brightness', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_brightness(image, max_delta, seed): |
| """Adjust the brightness of images by a random factor deterministically. |
| |
| Equivalent to `adjust_brightness()` using a `delta` randomly picked in the |
| interval `[-max_delta, max_delta)`. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> seed = (1, 2) |
| >>> tf.image.stateless_random_brightness(x, 0.2, seed) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 1.1376241, 2.1376243, 3.1376243], |
| [ 4.1376243, 5.1376243, 6.1376243]], |
| [[ 7.1376243, 8.137624 , 9.137624 ], |
| [10.137624 , 11.137624 , 12.137624 ]]], dtype=float32)> |
| |
| Args: |
| image: An image or images to adjust. |
| max_delta: float, must be non-negative. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Returns: |
| The brightness-adjusted image(s). |
| |
| Raises: |
| ValueError: if `max_delta` is negative. |
| """ |
| if max_delta < 0: |
| raise ValueError('max_delta must be non-negative.') |
| |
| delta = stateless_random_ops.stateless_random_uniform( |
| shape=[], minval=-max_delta, maxval=max_delta, seed=seed) |
| return adjust_brightness(image, delta) |
| |
| |
| @tf_export('image.random_contrast') |
| @dispatch.add_dispatch_support |
| def random_contrast(image, lower, upper, seed=None): |
| """Adjust the contrast of an image or images by a random factor. |
| |
| Equivalent to `adjust_contrast()` but uses a `contrast_factor` randomly |
| picked in the interval `[lower, upper)`. |
| |
| Args: |
| image: An image tensor with 3 or more dimensions. |
| lower: float. Lower bound for the random contrast factor. |
| upper: float. Upper bound for the random contrast factor. |
| seed: A Python integer. Used to create a random seed. See |
| `tf.compat.v1.set_random_seed` for behavior. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.random_contrast(x, 0.2, 0.5) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> |
| |
| Returns: |
| The contrast-adjusted image(s). |
| |
| Raises: |
| ValueError: if `upper <= lower` or if `lower < 0`. |
| """ |
| if upper <= lower: |
| raise ValueError('upper must be > lower.') |
| |
| if lower < 0: |
| raise ValueError('lower must be non-negative.') |
| |
| contrast_factor = random_ops.random_uniform([], lower, upper, seed=seed) |
| return adjust_contrast(image, contrast_factor) |
| |
| |
| @tf_export('image.stateless_random_contrast', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_contrast(image, lower, upper, seed): |
| """Adjust the contrast of images by a random factor deterministically. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| Args: |
| image: An image tensor with 3 or more dimensions. |
| lower: float. Lower bound for the random contrast factor. |
| upper: float. Upper bound for the random contrast factor. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> seed = (1, 2) |
| >>> tf.image.stateless_random_contrast(x, 0.2, 0.5, seed) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[3.4605184, 4.4605184, 5.4605184], |
| [4.820173 , 5.820173 , 6.820173 ]], |
| [[6.179827 , 7.179827 , 8.179828 ], |
| [7.5394816, 8.539482 , 9.539482 ]]], dtype=float32)> |
| |
| Returns: |
| The contrast-adjusted image(s). |
| |
| Raises: |
| ValueError: if `upper <= lower` or if `lower < 0`. |
| """ |
| if upper <= lower: |
| raise ValueError('upper must be > lower.') |
| |
| if lower < 0: |
| raise ValueError('lower must be non-negative.') |
| |
| contrast_factor = stateless_random_ops.stateless_random_uniform( |
| shape=[], minval=lower, maxval=upper, seed=seed) |
| return adjust_contrast(image, contrast_factor) |
| |
| |
| @tf_export('image.adjust_brightness') |
| @dispatch.add_dispatch_support |
| def adjust_brightness(image, delta): |
| """Adjust the brightness of RGB or Grayscale images. |
| |
| This is a convenience method that converts RGB images to float |
| representation, adjusts their brightness, and then converts them back to the |
| original data type. If several adjustments are chained, it is advisable to |
| minimize the number of redundant conversions. |
| |
| The value `delta` is added to all components of the tensor `image`. `image` is |
| converted to `float` and scaled appropriately if it is in fixed-point |
| representation, and `delta` is converted to the same data type. For regular |
| images, `delta` should be in the range `(-1,1)`, as it is added to the image |
| in floating point representation, where pixel values are in the `[0,1)` range. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.adjust_brightness(x, delta=0.1) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 1.1, 2.1, 3.1], |
| [ 4.1, 5.1, 6.1]], |
| [[ 7.1, 8.1, 9.1], |
| [10.1, 11.1, 12.1]]], dtype=float32)> |
| |
| Args: |
| image: RGB image or images to adjust. |
| delta: A scalar. Amount to add to the pixel values. |
| |
| Returns: |
| A brightness-adjusted tensor of the same shape and type as `image`. |
| """ |
| with ops.name_scope(None, 'adjust_brightness', [image, delta]) as name: |
| image = ops.convert_to_tensor(image, name='image') |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = image.dtype |
| |
| if orig_dtype in [dtypes.float16, dtypes.float32]: |
| flt_image = image |
| else: |
| flt_image = convert_image_dtype(image, dtypes.float32) |
| |
| adjusted = math_ops.add( |
| flt_image, math_ops.cast(delta, flt_image.dtype), name=name) |
| |
| return convert_image_dtype(adjusted, orig_dtype, saturate=True) |
| |
| |
| @tf_export('image.adjust_contrast') |
| @dispatch.add_dispatch_support |
| def adjust_contrast(images, contrast_factor): |
| """Adjust contrast of RGB or grayscale images. |
| |
| This is a convenience method that converts RGB images to float |
| representation, adjusts their contrast, and then converts them back to the |
| original data type. If several adjustments are chained, it is advisable to |
| minimize the number of redundant conversions. |
| |
| `images` is a tensor of at least 3 dimensions. The last 3 dimensions are |
| interpreted as `[height, width, channels]`. The other dimensions only |
| represent a collection of images, such as `[batch, height, width, channels].` |
| |
| Contrast is adjusted independently for each channel of each image. |
| |
| For each channel, this Op computes the mean of the image pixels in the |
| channel and then adjusts each component `x` of each pixel to |
| `(x - mean) * contrast_factor + mean`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.adjust_contrast(x, 2) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[-3.5, -2.5, -1.5], |
| [ 2.5, 3.5, 4.5]], |
| [[ 8.5, 9.5, 10.5], |
| [14.5, 15.5, 16.5]]], dtype=float32)> |
| |
| Args: |
| images: Images to adjust. At least 3-D. |
| contrast_factor: A float multiplier for adjusting contrast. |
| |
| Returns: |
| The contrast-adjusted image or images. |
| """ |
| with ops.name_scope(None, 'adjust_contrast', |
| [images, contrast_factor]) as name: |
| images = ops.convert_to_tensor(images, name='images') |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = images.dtype |
| |
| if orig_dtype in (dtypes.float16, dtypes.float32): |
| flt_images = images |
| else: |
| flt_images = convert_image_dtype(images, dtypes.float32) |
| |
| adjusted = gen_image_ops.adjust_contrastv2( |
| flt_images, contrast_factor=contrast_factor, name=name) |
| |
| return convert_image_dtype(adjusted, orig_dtype, saturate=True) |
| |
| |
| @tf_export('image.adjust_gamma') |
| @dispatch.add_dispatch_support |
| def adjust_gamma(image, gamma=1, gain=1): |
| """Performs [Gamma Correction](http://en.wikipedia.org/wiki/Gamma_correction). |
| |
| on the input image. |
| |
| Also known as Power Law Transform. This function converts the |
| input images at first to float representation, then transforms them |
| pixelwise according to the equation `Out = gain * In**gamma`, |
| and then converts the back to the original data type. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.adjust_gamma(x, 0.2) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[1. , 1.1486983, 1.2457309], |
| [1.319508 , 1.3797297, 1.4309691]], |
| [[1.4757731, 1.5157166, 1.5518456], |
| [1.5848932, 1.6153942, 1.6437519]]], dtype=float32)> |
| |
| Args: |
| image : RGB image or images to adjust. |
| gamma : A scalar or tensor. Non-negative real number. |
| gain : A scalar or tensor. The constant multiplier. |
| |
| Returns: |
| A Tensor. A Gamma-adjusted tensor of the same shape and type as `image`. |
| |
| Raises: |
| ValueError: If gamma is negative. |
| Notes: |
| For gamma greater than 1, the histogram will shift towards left and |
| the output image will be darker than the input image. |
| For gamma less than 1, the histogram will shift towards right and |
| the output image will be brighter than the input image. |
| References: |
| [Wikipedia](http://en.wikipedia.org/wiki/Gamma_correction) |
| """ |
| |
| with ops.name_scope(None, 'adjust_gamma', [image, gamma, gain]) as name: |
| image = ops.convert_to_tensor(image, name='image') |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = image.dtype |
| |
| if orig_dtype in [dtypes.float16, dtypes.float32]: |
| flt_image = image |
| else: |
| flt_image = convert_image_dtype(image, dtypes.float32) |
| |
| assert_op = _assert(gamma >= 0, ValueError, |
| 'Gamma should be a non-negative real number.') |
| if assert_op: |
| gamma = control_flow_ops.with_dependencies(assert_op, gamma) |
| |
| # According to the definition of gamma correction. |
| adjusted_img = gain * flt_image**gamma |
| |
| return convert_image_dtype(adjusted_img, orig_dtype, saturate=True) |
| |
| |
| @tf_export('image.convert_image_dtype') |
| @dispatch.add_dispatch_support |
| def convert_image_dtype(image, dtype, saturate=False, name=None): |
| """Convert `image` to `dtype`, scaling its values if needed. |
| |
| The operation supports data types (for `image` and `dtype`) of |
| `uint8`, `uint16`, `uint32`, `uint64`, `int8`, `int16`, `int32`, `int64`, |
| `float16`, `float32`, `float64`, `bfloat16`. |
| |
| Images that are represented using floating point values are expected to have |
| values in the range [0,1). Image data stored in integer data types are |
| expected to have values in the range `[0,MAX]`, where `MAX` is the largest |
| positive representable number for the data type. |
| |
| This op converts between data types, scaling the values appropriately before |
| casting. |
| |
| Usage Example: |
| |
| >>> x = [[[1, 2, 3], [4, 5, 6]], |
| ... [[7, 8, 9], [10, 11, 12]]] |
| >>> x_int8 = tf.convert_to_tensor(x, dtype=tf.int8) |
| >>> tf.image.convert_image_dtype(x_int8, dtype=tf.float16, saturate=False) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float16, numpy= |
| array([[[0.00787, 0.01575, 0.02362], |
| [0.0315 , 0.03937, 0.04724]], |
| [[0.0551 , 0.063 , 0.07086], |
| [0.07874, 0.0866 , 0.0945 ]]], dtype=float16)> |
| |
| Converting integer types to floating point types returns normalized floating |
| point values in the range [0, 1); the values are normalized by the `MAX` value |
| of the input dtype. Consider the following two examples: |
| |
| >>> a = [[[1], [2]], [[3], [4]]] |
| >>> a_int8 = tf.convert_to_tensor(a, dtype=tf.int8) |
| >>> tf.image.convert_image_dtype(a_int8, dtype=tf.float32) |
| <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= |
| array([[[0.00787402], |
| [0.01574803]], |
| [[0.02362205], |
| [0.03149606]]], dtype=float32)> |
| |
| >>> a_int32 = tf.convert_to_tensor(a, dtype=tf.int32) |
| >>> tf.image.convert_image_dtype(a_int32, dtype=tf.float32) |
| <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= |
| array([[[4.6566129e-10], |
| [9.3132257e-10]], |
| [[1.3969839e-09], |
| [1.8626451e-09]]], dtype=float32)> |
| |
| Despite having identical values of `a` and output dtype of `float32`, the |
| outputs differ due to the different input dtypes (`int8` vs. `int32`). This |
| is, again, because the values are normalized by the `MAX` value of the input |
| dtype. |
| |
| Note that converting floating point values to integer type may lose precision. |
| In the example below, an image tensor `b` of dtype `float32` is converted to |
| `int8` and back to `float32`. The final output, howeverm is different from |
| the original input `b` due to precision loss. |
| |
| >>> b = [[[0.12], [0.34]], [[0.56], [0.78]]] |
| >>> b_float32 = tf.convert_to_tensor(b, dtype=tf.float32) |
| >>> b_int8 = tf.image.convert_image_dtype(b_float32, dtype=tf.int8) |
| >>> tf.image.convert_image_dtype(b_int8, dtype=tf.float32) |
| <tf.Tensor: shape=(2, 2, 1), dtype=float32, numpy= |
| array([[[0.11811024], |
| [0.33858266]], |
| [[0.5590551 ], |
| [0.77952754]]], dtype=float32)> |
| |
| Scaling up from an integer type (input dtype) to another integer type (output |
| dtype) will not map input dtype's `MAX` to output dtype's `MAX` but converting |
| back and forth should result in no change. For example, as shown below, the |
| `MAX` value of int8 (=127) is not mapped to the `MAX` value of int16 (=32,767) |
| but, when scaled back, we get the same, original values of `c`. |
| |
| >>> c = [[[1], [2]], [[127], [127]]] |
| >>> c_int8 = tf.convert_to_tensor(c, dtype=tf.int8) |
| >>> c_int16 = tf.image.convert_image_dtype(c_int8, dtype=tf.int16) |
| >>> print(c_int16) |
| tf.Tensor( |
| [[[ 256] |
| [ 512]] |
| [[32512] |
| [32512]]], shape=(2, 2, 1), dtype=int16) |
| >>> c_int8_back = tf.image.convert_image_dtype(c_int16, dtype=tf.int8) |
| >>> print(c_int8_back) |
| tf.Tensor( |
| [[[ 1] |
| [ 2]] |
| [[127] |
| [127]]], shape=(2, 2, 1), dtype=int8) |
| |
| Scaling down from an integer type to another integer type can be a lossy |
| conversion. Notice in the example below that converting `int16` to `uint8` and |
| back to `int16` has lost precision. |
| |
| >>> d = [[[1000], [2000]], [[3000], [4000]]] |
| >>> d_int16 = tf.convert_to_tensor(d, dtype=tf.int16) |
| >>> d_uint8 = tf.image.convert_image_dtype(d_int16, dtype=tf.uint8) |
| >>> d_int16_back = tf.image.convert_image_dtype(d_uint8, dtype=tf.int16) |
| >>> print(d_int16_back) |
| tf.Tensor( |
| [[[ 896] |
| [1920]] |
| [[2944] |
| [3968]]], shape=(2, 2, 1), dtype=int16) |
| |
| Note that converting from floating point inputs to integer types may lead to |
| over/underflow problems. Set saturate to `True` to avoid such problem in |
| problematic conversions. If enabled, saturation will clip the output into the |
| allowed range before performing a potentially dangerous cast (and only before |
| performing such a cast, i.e., when casting from a floating point to an integer |
| type, and when casting from a signed to an unsigned type; `saturate` has no |
| effect on casts between floats, or on casts that increase the type's range). |
| |
| Args: |
| image: An image. |
| dtype: A `DType` to convert `image` to. |
| saturate: If `True`, clip the input before casting (if necessary). |
| name: A name for this operation (optional). |
| |
| Returns: |
| `image`, converted to `dtype`. |
| |
| Raises: |
| AttributeError: Raises an attribute error when dtype is neither |
| float nor integer |
| """ |
| image = ops.convert_to_tensor(image, name='image') |
| dtype = dtypes.as_dtype(dtype) |
| if not dtype.is_floating and not dtype.is_integer: |
| raise AttributeError('dtype must be either floating point or integer') |
| if dtype == image.dtype: |
| return array_ops.identity(image, name=name) |
| |
| with ops.name_scope(name, 'convert_image', [image]) as name: |
| # Both integer: use integer multiplication in the larger range |
| if image.dtype.is_integer and dtype.is_integer: |
| scale_in = image.dtype.max |
| scale_out = dtype.max |
| if scale_in > scale_out: |
| # Scaling down, scale first, then cast. The scaling factor will |
| # cause in.max to be mapped to above out.max but below out.max+1, |
| # so that the output is safely in the supported range. |
| scale = (scale_in + 1) // (scale_out + 1) |
| scaled = math_ops.floordiv(image, scale) |
| |
| if saturate: |
| return math_ops.saturate_cast(scaled, dtype, name=name) |
| else: |
| return math_ops.cast(scaled, dtype, name=name) |
| else: |
| # Scaling up, cast first, then scale. The scale will not map in.max to |
| # out.max, but converting back and forth should result in no change. |
| if saturate: |
| cast = math_ops.saturate_cast(image, dtype) |
| else: |
| cast = math_ops.cast(image, dtype) |
| scale = (scale_out + 1) // (scale_in + 1) |
| return math_ops.multiply(cast, scale, name=name) |
| elif image.dtype.is_floating and dtype.is_floating: |
| # Both float: Just cast, no possible overflows in the allowed ranges. |
| # Note: We're ignoring float overflows. If your image dynamic range |
| # exceeds float range, you're on your own. |
| return math_ops.cast(image, dtype, name=name) |
| else: |
| if image.dtype.is_integer: |
| # Converting to float: first cast, then scale. No saturation possible. |
| cast = math_ops.cast(image, dtype) |
| scale = 1. / image.dtype.max |
| return math_ops.multiply(cast, scale, name=name) |
| else: |
| # Converting from float: first scale, then cast |
| scale = dtype.max + 0.5 # avoid rounding problems in the cast |
| scaled = math_ops.multiply(image, scale) |
| if saturate: |
| return math_ops.saturate_cast(scaled, dtype, name=name) |
| else: |
| return math_ops.cast(scaled, dtype, name=name) |
| |
| |
| @tf_export('image.rgb_to_grayscale') |
| @dispatch.add_dispatch_support |
| def rgb_to_grayscale(images, name=None): |
| """Converts one or more images from RGB to Grayscale. |
| |
| Outputs a tensor of the same `DType` and rank as `images`. The size of the |
| last dimension of the output is 1, containing the Grayscale value of the |
| pixels. |
| |
| >>> original = tf.constant([[[1.0, 2.0, 3.0]]]) |
| >>> converted = tf.image.rgb_to_grayscale(original) |
| >>> print(converted.numpy()) |
| [[[1.81...]]] |
| |
| Args: |
| images: The RGB tensor to convert. The last dimension must have size 3 and |
| should contain RGB values. |
| name: A name for the operation (optional). |
| |
| Returns: |
| The converted grayscale image(s). |
| """ |
| with ops.name_scope(name, 'rgb_to_grayscale', [images]) as name: |
| images = ops.convert_to_tensor(images, name='images') |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = images.dtype |
| flt_image = convert_image_dtype(images, dtypes.float32) |
| |
| # Reference for converting between RGB and grayscale. |
| # https://en.wikipedia.org/wiki/Luma_%28video%29 |
| rgb_weights = [0.2989, 0.5870, 0.1140] |
| gray_float = math_ops.tensordot(flt_image, rgb_weights, [-1, -1]) |
| gray_float = array_ops.expand_dims(gray_float, -1) |
| return convert_image_dtype(gray_float, orig_dtype, name=name) |
| |
| |
| @tf_export('image.grayscale_to_rgb') |
| @dispatch.add_dispatch_support |
| def grayscale_to_rgb(images, name=None): |
| """Converts one or more images from Grayscale to RGB. |
| |
| Outputs a tensor of the same `DType` and rank as `images`. The size of the |
| last dimension of the output is 3, containing the RGB value of the pixels. |
| The input images' last dimension must be size 1. |
| |
| >>> original = tf.constant([[[1.0], [2.0], [3.0]]]) |
| >>> converted = tf.image.grayscale_to_rgb(original) |
| >>> print(converted.numpy()) |
| [[[1. 1. 1.] |
| [2. 2. 2.] |
| [3. 3. 3.]]] |
| |
| Args: |
| images: The Grayscale tensor to convert. The last dimension must be size 1. |
| name: A name for the operation (optional). |
| |
| Returns: |
| The converted grayscale image(s). |
| """ |
| with ops.name_scope(name, 'grayscale_to_rgb', [images]) as name: |
| images = _AssertGrayscaleImage(images) |
| |
| images = ops.convert_to_tensor(images, name='images') |
| rank_1 = array_ops.expand_dims(array_ops.rank(images) - 1, 0) |
| shape_list = ([array_ops.ones(rank_1, dtype=dtypes.int32)] + |
| [array_ops.expand_dims(3, 0)]) |
| multiples = array_ops.concat(shape_list, 0) |
| rgb = array_ops.tile(images, multiples, name=name) |
| rgb.set_shape(images.get_shape()[:-1].concatenate([3])) |
| return rgb |
| |
| |
| # pylint: disable=invalid-name |
| @tf_export('image.random_hue') |
| @dispatch.add_dispatch_support |
| def random_hue(image, max_delta, seed=None): |
| """Adjust the hue of RGB images by a random factor. |
| |
| Equivalent to `adjust_hue()` but uses a `delta` randomly |
| picked in the interval `[-max_delta, max_delta)`. |
| |
| `max_delta` must be in the interval `[0, 0.5]`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.random_hue(x, 0.2) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> |
| |
| Args: |
| image: RGB image or images. The size of the last dimension must be 3. |
| max_delta: float. The maximum value for the random delta. |
| seed: An operation-specific seed. It will be used in conjunction with the |
| graph-level seed to determine the real seeds that will be used in this |
| operation. Please see the documentation of set_random_seed for its |
| interaction with the graph-level random seed. |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| ValueError: if `max_delta` is invalid. |
| """ |
| if max_delta > 0.5: |
| raise ValueError('max_delta must be <= 0.5.') |
| |
| if max_delta < 0: |
| raise ValueError('max_delta must be non-negative.') |
| |
| delta = random_ops.random_uniform([], -max_delta, max_delta, seed=seed) |
| return adjust_hue(image, delta) |
| |
| |
| @tf_export('image.stateless_random_hue', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_hue(image, max_delta, seed): |
| """Adjust the hue of RGB images by a random factor deterministically. |
| |
| Equivalent to `adjust_hue()` but uses a `delta` randomly picked in the |
| interval `[-max_delta, max_delta)`. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| `max_delta` must be in the interval `[0, 0.5]`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> seed = (1, 2) |
| >>> tf.image.stateless_random_hue(x, 0.2, seed) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 1.6514902, 1. , 3. ], |
| [ 4.65149 , 4. , 6. ]], |
| [[ 7.65149 , 7. , 9. ], |
| [10.65149 , 10. , 12. ]]], dtype=float32)> |
| |
| Args: |
| image: RGB image or images. The size of the last dimension must be 3. |
| max_delta: float. The maximum value for the random delta. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| ValueError: if `max_delta` is invalid. |
| """ |
| if max_delta > 0.5: |
| raise ValueError('max_delta must be <= 0.5.') |
| |
| if max_delta < 0: |
| raise ValueError('max_delta must be non-negative.') |
| |
| delta = stateless_random_ops.stateless_random_uniform( |
| shape=[], minval=-max_delta, maxval=max_delta, seed=seed) |
| return adjust_hue(image, delta) |
| |
| |
| @tf_export('image.adjust_hue') |
| @dispatch.add_dispatch_support |
| def adjust_hue(image, delta, name=None): |
| """Adjust hue of RGB images. |
| |
| This is a convenience method that converts an RGB image to float |
| representation, converts it to HSV, adds an offset to the |
| hue channel, converts back to RGB and then back to the original |
| data type. If several adjustments are chained it is advisable to minimize |
| the number of redundant conversions. |
| |
| `image` is an RGB image. The image hue is adjusted by converting the |
| image(s) to HSV and rotating the hue channel (H) by |
| `delta`. The image is then converted back to RGB. |
| |
| `delta` must be in the interval `[-1, 1]`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.adjust_hue(x, 0.2) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 2.3999996, 1. , 3. ], |
| [ 5.3999996, 4. , 6. ]], |
| [[ 8.4 , 7. , 9. ], |
| [11.4 , 10. , 12. ]]], dtype=float32)> |
| |
| Args: |
| image: RGB image or images. The size of the last dimension must be 3. |
| delta: float. How much to add to the hue channel. |
| name: A name for this operation (optional). |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Usage Example: |
| |
| >>> image = [[[1, 2, 3], [4, 5, 6]], |
| ... [[7, 8, 9], [10, 11, 12]], |
| ... [[13, 14, 15], [16, 17, 18]]] |
| >>> image = tf.constant(image) |
| >>> tf.image.adjust_hue(image, 0.2) |
| <tf.Tensor: shape=(3, 2, 3), dtype=int32, numpy= |
| array([[[ 2, 1, 3], |
| [ 5, 4, 6]], |
| [[ 8, 7, 9], |
| [11, 10, 12]], |
| [[14, 13, 15], |
| [17, 16, 18]]], dtype=int32)> |
| """ |
| with ops.name_scope(name, 'adjust_hue', [image]) as name: |
| image = ops.convert_to_tensor(image, name='image') |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = image.dtype |
| if orig_dtype in (dtypes.float16, dtypes.float32): |
| flt_image = image |
| else: |
| flt_image = convert_image_dtype(image, dtypes.float32) |
| |
| rgb_altered = gen_image_ops.adjust_hue(flt_image, delta) |
| |
| return convert_image_dtype(rgb_altered, orig_dtype) |
| |
| |
| # pylint: disable=invalid-name |
| @tf_export('image.random_jpeg_quality') |
| @dispatch.add_dispatch_support |
| def random_jpeg_quality(image, min_jpeg_quality, max_jpeg_quality, seed=None): |
| """Randomly changes jpeg encoding quality for inducing jpeg noise. |
| |
| `min_jpeg_quality` must be in the interval `[0, 100]` and less than |
| `max_jpeg_quality`. |
| `max_jpeg_quality` must be in the interval `[0, 100]`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.random_jpeg_quality(x, 75, 95) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=...> |
| |
| Args: |
| image: 3D image. Size of the last dimension must be 1 or 3. |
| min_jpeg_quality: Minimum jpeg encoding quality to use. |
| max_jpeg_quality: Maximum jpeg encoding quality to use. |
| seed: An operation-specific seed. It will be used in conjunction with the |
| graph-level seed to determine the real seeds that will be used in this |
| operation. Please see the documentation of set_random_seed for its |
| interaction with the graph-level random seed. |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. |
| """ |
| if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or |
| max_jpeg_quality > 100): |
| raise ValueError('jpeg encoding range must be between 0 and 100.') |
| |
| if min_jpeg_quality >= max_jpeg_quality: |
| raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') |
| |
| jpeg_quality = random_ops.random_uniform([], |
| min_jpeg_quality, |
| max_jpeg_quality, |
| seed=seed, |
| dtype=dtypes.int32) |
| return adjust_jpeg_quality(image, jpeg_quality) |
| |
| |
| @tf_export('image.stateless_random_jpeg_quality', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_jpeg_quality(image, |
| min_jpeg_quality, |
| max_jpeg_quality, |
| seed): |
| """Deterministically radomize jpeg encoding quality for inducing jpeg noise. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| `min_jpeg_quality` must be in the interval `[0, 100]` and less than |
| `max_jpeg_quality`. |
| `max_jpeg_quality` must be in the interval `[0, 100]`. |
| |
| Usage Example: |
| |
| >>> x = [[[1, 2, 3], |
| ... [4, 5, 6]], |
| ... [[7, 8, 9], |
| ... [10, 11, 12]]] |
| >>> x_uint8 = tf.cast(x, tf.uint8) |
| >>> seed = (1, 2) |
| >>> tf.image.stateless_random_jpeg_quality(x_uint8, 75, 95, seed) |
| <tf.Tensor: shape=(2, 2, 3), dtype=uint8, numpy= |
| array([[[ 0, 4, 5], |
| [ 1, 5, 6]], |
| [[ 5, 9, 10], |
| [ 5, 9, 10]]], dtype=uint8)> |
| |
| Args: |
| image: 3D image. Size of the last dimension must be 1 or 3. |
| min_jpeg_quality: Minimum jpeg encoding quality to use. |
| max_jpeg_quality: Maximum jpeg encoding quality to use. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| ValueError: if `min_jpeg_quality` or `max_jpeg_quality` is invalid. |
| """ |
| if (min_jpeg_quality < 0 or max_jpeg_quality < 0 or min_jpeg_quality > 100 or |
| max_jpeg_quality > 100): |
| raise ValueError('jpeg encoding range must be between 0 and 100.') |
| |
| if min_jpeg_quality >= max_jpeg_quality: |
| raise ValueError('`min_jpeg_quality` must be less than `max_jpeg_quality`.') |
| |
| jpeg_quality = stateless_random_ops.stateless_random_uniform( |
| shape=[], minval=min_jpeg_quality, maxval=max_jpeg_quality, seed=seed, |
| dtype=dtypes.int32) |
| return adjust_jpeg_quality(image, jpeg_quality) |
| |
| |
| @tf_export('image.adjust_jpeg_quality') |
| @dispatch.add_dispatch_support |
| def adjust_jpeg_quality(image, jpeg_quality, name=None): |
| """Adjust jpeg encoding quality of an image. |
| |
| This is a convenience method that converts an image to uint8 representation, |
| encodes it to jpeg with `jpeg_quality`, decodes it, and then converts back |
| to the original data type. |
| |
| `jpeg_quality` must be in the interval `[0, 100]`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.adjust_jpeg_quality(x, 75) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[1., 1., 1.], |
| [1., 1., 1.]], |
| [[1., 1., 1.], |
| [1., 1., 1.]]], dtype=float32)> |
| |
| Args: |
| image: 3D image. The size of the last dimension must be None, 1 or 3. |
| jpeg_quality: Python int or Tensor of type int32. jpeg encoding quality. |
| name: A name for this operation (optional). |
| |
| Returns: |
| Adjusted image, same shape and DType as `image`. |
| |
| Raises: |
| InvalidArgumentError: quality must be in [0,100] |
| InvalidArgumentError: image must have 1 or 3 channels |
| """ |
| with ops.name_scope(name, 'adjust_jpeg_quality', [image]): |
| image = ops.convert_to_tensor(image, name='image') |
| channels = image.shape.as_list()[-1] |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = image.dtype |
| image = convert_image_dtype(image, dtypes.uint8, saturate=True) |
| if not _is_tensor(jpeg_quality): |
| # If jpeg_quality is a int (not tensor). |
| jpeg_quality = ops.convert_to_tensor(jpeg_quality, dtype=dtypes.int32) |
| image = gen_image_ops.encode_jpeg_variable_quality(image, jpeg_quality) |
| |
| image = gen_image_ops.decode_jpeg(image, channels=channels) |
| return convert_image_dtype(image, orig_dtype, saturate=True) |
| |
| |
| @tf_export('image.random_saturation') |
| @dispatch.add_dispatch_support |
| def random_saturation(image, lower, upper, seed=None): |
| """Adjust the saturation of RGB images by a random factor. |
| |
| Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly |
| picked in the interval `[lower, upper)`. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.random_saturation(x, 5, 10) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 0. , 1.5, 3. ], |
| [ 0. , 3. , 6. ]], |
| [[ 0. , 4.5, 9. ], |
| [ 0. , 6. , 12. ]]], dtype=float32)> |
| |
| Args: |
| image: RGB image or images. The size of the last dimension must be 3. |
| lower: float. Lower bound for the random saturation factor. |
| upper: float. Upper bound for the random saturation factor. |
| seed: An operation-specific seed. It will be used in conjunction with the |
| graph-level seed to determine the real seeds that will be used in this |
| operation. Please see the documentation of set_random_seed for its |
| interaction with the graph-level random seed. |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| ValueError: if `upper <= lower` or if `lower < 0`. |
| """ |
| if upper <= lower: |
| raise ValueError('upper must be > lower.') |
| |
| if lower < 0: |
| raise ValueError('lower must be non-negative.') |
| |
| saturation_factor = random_ops.random_uniform([], lower, upper, seed=seed) |
| return adjust_saturation(image, saturation_factor) |
| |
| |
| @tf_export('image.stateless_random_saturation', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_random_saturation(image, lower, upper, seed=None): |
| """Adjust the saturation of RGB images by a random factor deterministically. |
| |
| Equivalent to `adjust_saturation()` but uses a `saturation_factor` randomly |
| picked in the interval `[lower, upper)`. |
| |
| Guarantees the same results given the same `seed` independent of how many |
| times the function is called, and independent of global seed settings (e.g. |
| `tf.random.set_seed`). |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> seed = (1, 2) |
| >>> tf.image.stateless_random_saturation(x, 0.5, 1.0, seed) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 1.1559395, 2.0779698, 3. ], |
| [ 4.1559396, 5.07797 , 6. ]], |
| [[ 7.1559396, 8.07797 , 9. ], |
| [10.155939 , 11.07797 , 12. ]]], dtype=float32)> |
| |
| Args: |
| image: RGB image or images. The size of the last dimension must be 3. |
| lower: float. Lower bound for the random saturation factor. |
| upper: float. Upper bound for the random saturation factor. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| ValueError: if `upper <= lower` or if `lower < 0`. |
| """ |
| if upper <= lower: |
| raise ValueError('upper must be > lower.') |
| |
| if lower < 0: |
| raise ValueError('lower must be non-negative.') |
| |
| saturation_factor = stateless_random_ops.stateless_random_uniform( |
| shape=[], minval=lower, maxval=upper, seed=seed) |
| return adjust_saturation(image, saturation_factor) |
| |
| |
| @tf_export('image.adjust_saturation') |
| @dispatch.add_dispatch_support |
| def adjust_saturation(image, saturation_factor, name=None): |
| """Adjust saturation of RGB images. |
| |
| This is a convenience method that converts RGB images to float |
| representation, converts them to HSV, adds an offset to the |
| saturation channel, converts back to RGB and then back to the original |
| data type. If several adjustments are chained it is advisable to minimize |
| the number of redundant conversions. |
| |
| `image` is an RGB image or images. The image saturation is adjusted by |
| converting the images to HSV and multiplying the saturation (S) channel by |
| `saturation_factor` and clipping. The images are then converted back to RGB. |
| |
| Usage Example: |
| |
| >>> x = [[[1.0, 2.0, 3.0], |
| ... [4.0, 5.0, 6.0]], |
| ... [[7.0, 8.0, 9.0], |
| ... [10.0, 11.0, 12.0]]] |
| >>> tf.image.adjust_saturation(x, 0.5) |
| <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy= |
| array([[[ 2. , 2.5, 3. ], |
| [ 5. , 5.5, 6. ]], |
| [[ 8. , 8.5, 9. ], |
| [11. , 11.5, 12. ]]], dtype=float32)> |
| |
| Args: |
| image: RGB image or images. The size of the last dimension must be 3. |
| saturation_factor: float. Factor to multiply the saturation by. |
| name: A name for this operation (optional). |
| |
| Returns: |
| Adjusted image(s), same shape and DType as `image`. |
| |
| Raises: |
| InvalidArgumentError: input must have 3 channels |
| """ |
| with ops.name_scope(name, 'adjust_saturation', [image]) as name: |
| image = ops.convert_to_tensor(image, name='image') |
| # Remember original dtype to so we can convert back if needed |
| orig_dtype = image.dtype |
| if orig_dtype in (dtypes.float16, dtypes.float32): |
| flt_image = image |
| else: |
| flt_image = convert_image_dtype(image, dtypes.float32) |
| |
| adjusted = gen_image_ops.adjust_saturation(flt_image, saturation_factor) |
| |
| return convert_image_dtype(adjusted, orig_dtype) |
| |
| |
| @tf_export('io.is_jpeg', 'image.is_jpeg', v1=['io.is_jpeg', 'image.is_jpeg']) |
| def is_jpeg(contents, name=None): |
| r"""Convenience function to check if the 'contents' encodes a JPEG image. |
| |
| Args: |
| contents: 0-D `string`. The encoded image bytes. |
| name: A name for the operation (optional) |
| |
| Returns: |
| A scalar boolean tensor indicating if 'contents' may be a JPEG image. |
| is_jpeg is susceptible to false positives. |
| """ |
| # Normal JPEGs start with \xff\xd8\xff\xe0 |
| # JPEG with EXIF starts with \xff\xd8\xff\xe1 |
| # Use \xff\xd8\xff to cover both. |
| with ops.name_scope(name, 'is_jpeg'): |
| substr = string_ops.substr(contents, 0, 3) |
| return math_ops.equal(substr, b'\xff\xd8\xff', name=name) |
| |
| |
| def _is_png(contents, name=None): |
| r"""Convenience function to check if the 'contents' encodes a PNG image. |
| |
| Args: |
| contents: 0-D `string`. The encoded image bytes. |
| name: A name for the operation (optional) |
| |
| Returns: |
| A scalar boolean tensor indicating if 'contents' may be a PNG image. |
| is_png is susceptible to false positives. |
| """ |
| with ops.name_scope(name, 'is_png'): |
| substr = string_ops.substr(contents, 0, 3) |
| return math_ops.equal(substr, b'\211PN', name=name) |
| |
| |
| tf_export( |
| 'io.decode_and_crop_jpeg', |
| 'image.decode_and_crop_jpeg', |
| v1=['io.decode_and_crop_jpeg', 'image.decode_and_crop_jpeg'])( |
| dispatch.add_dispatch_support(gen_image_ops.decode_and_crop_jpeg)) |
| |
| tf_export( |
| 'io.decode_bmp', |
| 'image.decode_bmp', |
| v1=['io.decode_bmp', 'image.decode_bmp'])( |
| dispatch.add_dispatch_support(gen_image_ops.decode_bmp)) |
| tf_export( |
| 'io.decode_gif', |
| 'image.decode_gif', |
| v1=['io.decode_gif', 'image.decode_gif'])( |
| dispatch.add_dispatch_support(gen_image_ops.decode_gif)) |
| tf_export( |
| 'io.decode_jpeg', |
| 'image.decode_jpeg', |
| v1=['io.decode_jpeg', 'image.decode_jpeg'])( |
| dispatch.add_dispatch_support(gen_image_ops.decode_jpeg)) |
| tf_export( |
| 'io.decode_png', |
| 'image.decode_png', |
| v1=['io.decode_png', 'image.decode_png'])( |
| dispatch.add_dispatch_support(gen_image_ops.decode_png)) |
| |
| tf_export( |
| 'io.encode_jpeg', |
| 'image.encode_jpeg', |
| v1=['io.encode_jpeg', 'image.encode_jpeg'])( |
| dispatch.add_dispatch_support(gen_image_ops.encode_jpeg)) |
| tf_export( |
| 'io.extract_jpeg_shape', |
| 'image.extract_jpeg_shape', |
| v1=['io.extract_jpeg_shape', 'image.extract_jpeg_shape'])( |
| dispatch.add_dispatch_support(gen_image_ops.extract_jpeg_shape)) |
| |
| |
| @tf_export('io.encode_png', 'image.encode_png') |
| @dispatch.add_dispatch_support |
| def encode_png(image, compression=-1, name=None): |
| r"""PNG-encode an image. |
| |
| `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]` |
| where `channels` is: |
| |
| * 1: for grayscale. |
| * 2: for grayscale + alpha. |
| * 3: for RGB. |
| * 4: for RGBA. |
| |
| The ZLIB compression level, `compression`, can be -1 for the PNG-encoder |
| default or a value from 0 to 9. 9 is the highest compression level, |
| generating the smallest output, but is slower. |
| |
| Args: |
| image: A `Tensor`. Must be one of the following types: `uint8`, `uint16`. |
| 3-D with shape `[height, width, channels]`. |
| compression: An optional `int`. Defaults to `-1`. Compression level. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A `Tensor` of type `string`. |
| """ |
| return gen_image_ops.encode_png( |
| ops.convert_to_tensor(image), compression, name) |
| |
| |
| @tf_export( |
| 'io.decode_image', |
| 'image.decode_image', |
| v1=['io.decode_image', 'image.decode_image']) |
| @dispatch.add_dispatch_support |
| def decode_image(contents, |
| channels=None, |
| dtype=dtypes.uint8, |
| name=None, |
| expand_animations=True): |
| """Function for `decode_bmp`, `decode_gif`, `decode_jpeg`, and `decode_png`. |
| |
| Detects whether an image is a BMP, GIF, JPEG, or PNG, and performs the |
| appropriate operation to convert the input bytes `string` into a `Tensor` |
| of type `dtype`. |
| |
| Note: `decode_gif` returns a 4-D array `[num_frames, height, width, 3]`, as |
| opposed to `decode_bmp`, `decode_jpeg` and `decode_png`, which return 3-D |
| arrays `[height, width, num_channels]`. Make sure to take this into account |
| when constructing your graph if you are intermixing GIF files with BMP, JPEG, |
| and/or PNG files. Alternately, set the `expand_animations` argument of this |
| function to `False`, in which case the op will return 3-dimensional tensors |
| and will truncate animated GIF files to the first frame. |
| |
| NOTE: If the first frame of an animated GIF does not occupy the entire |
| canvas (maximum frame width x maximum frame height), then it fills the |
| unoccupied areas (in the first frame) with zeros (black). For frames after the |
| first frame that does not occupy the entire canvas, it uses the previous |
| frame to fill the unoccupied areas. |
| |
| Args: |
| contents: 0-D `string`. The encoded image bytes. |
| channels: An optional `int`. Defaults to `0`. Number of color channels for |
| the decoded image. |
| dtype: The desired DType of the returned `Tensor`. |
| name: A name for the operation (optional) |
| expand_animations: Controls the shape of the returned op's output. If |
| `True`, the returned op will produce a 3-D tensor for PNG, JPEG, and BMP |
| files; and a 4-D tensor for all GIFs, whether animated or not. If, |
| `False`, the returned op will produce a 3-D tensor for all file types and |
| will truncate animated GIFs to the first frame. |
| |
| Returns: |
| `Tensor` with type `dtype` and a 3- or 4-dimensional shape, depending on |
| the file type and the value of the `expand_animations` parameter. |
| |
| Raises: |
| ValueError: On incorrect number of channels. |
| """ |
| with ops.name_scope(name, 'decode_image'): |
| if compat.forward_compatible(2020, 8, 14): |
| channels = 0 if channels is None else channels |
| if dtype not in [dtypes.float32, dtypes.uint8, dtypes.uint16]: |
| dest_dtype = dtype |
| dtype = dtypes.uint16 |
| return convert_image_dtype(gen_image_ops.decode_image( |
| contents=contents, |
| channels=channels, |
| expand_animations=expand_animations, |
| dtype=dtype), dest_dtype) |
| else: |
| return gen_image_ops.decode_image( |
| contents=contents, |
| channels=channels, |
| expand_animations=expand_animations, |
| dtype=dtype) |
| |
| if channels not in (None, 0, 1, 3, 4): |
| raise ValueError('channels must be in (None, 0, 1, 3, 4)') |
| substr = string_ops.substr(contents, 0, 3) |
| |
| def _bmp(): |
| """Decodes a BMP image.""" |
| signature = string_ops.substr(contents, 0, 2) |
| # Create assert op to check that bytes are BMP decodable |
| is_bmp = math_ops.equal(signature, 'BM', name='is_bmp') |
| decode_msg = 'Unable to decode bytes as JPEG, PNG, GIF, or BMP' |
| assert_decode = control_flow_ops.Assert(is_bmp, [decode_msg]) |
| bmp_channels = 0 if channels is None else channels |
| good_channels = math_ops.not_equal(bmp_channels, 1, name='check_channels') |
| channels_msg = ('Channels must be in (None, 0, 3, 4) when decoding BMP ' |
| 'images') |
| assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) |
| with ops.control_dependencies([assert_decode, assert_channels]): |
| return convert_image_dtype( |
| gen_image_ops.decode_bmp(contents, channels=bmp_channels), dtype) |
| |
| def _gif(): |
| """Decodes a GIF image.""" |
| # Create assert to make sure that channels is not set to 1 |
| # Already checked above that channels is in (None, 0, 1, 3) |
| gif_channels = 0 if channels is None else channels |
| good_channels = math_ops.logical_and( |
| math_ops.not_equal(gif_channels, 1, name='check_gif_channels'), |
| math_ops.not_equal(gif_channels, 4, name='check_gif_channels')) |
| channels_msg = 'Channels must be in (None, 0, 3) when decoding GIF images' |
| assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) |
| with ops.control_dependencies([assert_channels]): |
| result = convert_image_dtype(gen_image_ops.decode_gif(contents), dtype) |
| if not expand_animations: |
| # For now we decode animated GIFs fully and toss out all but the |
| # first frame when expand_animations is False |
| result = array_ops.gather(result, 0) |
| return result |
| |
| def check_gif(): |
| # Create assert op to check that bytes are GIF decodable |
| is_gif = math_ops.equal(substr, b'\x47\x49\x46', name='is_gif') |
| return control_flow_ops.cond(is_gif, _gif, _bmp, name='cond_gif') |
| |
| def _png(): |
| """Decodes a PNG image.""" |
| return convert_image_dtype( |
| gen_image_ops.decode_png( |
| contents, |
| channels, |
| dtype=dtypes.uint8 if dtype == dtypes.uint8 else dtypes.uint16), |
| dtype) |
| |
| def check_png(): |
| """Checks if an image is PNG.""" |
| return control_flow_ops.cond( |
| _is_png(contents), _png, check_gif, name='cond_png') |
| |
| def _jpeg(): |
| """Decodes a jpeg image.""" |
| jpeg_channels = 0 if channels is None else channels |
| good_channels = math_ops.not_equal( |
| jpeg_channels, 4, name='check_jpeg_channels') |
| channels_msg = ('Channels must be in (None, 0, 1, 3) when decoding JPEG ' |
| 'images') |
| assert_channels = control_flow_ops.Assert(good_channels, [channels_msg]) |
| with ops.control_dependencies([assert_channels]): |
| return convert_image_dtype( |
| gen_image_ops.decode_jpeg(contents, channels), dtype) |
| |
| # Decode normal JPEG images (start with \xff\xd8\xff\xe0) |
| # as well as JPEG images with EXIF data (start with \xff\xd8\xff\xe1). |
| return control_flow_ops.cond( |
| is_jpeg(contents), _jpeg, check_png, name='cond_jpeg') |
| |
| |
| @tf_export('image.total_variation') |
| @dispatch.add_dispatch_support |
| def total_variation(images, name=None): |
| """Calculate and return the total variation for one or more images. |
| |
| The total variation is the sum of the absolute differences for neighboring |
| pixel-values in the input images. This measures how much noise is in the |
| images. |
| |
| This can be used as a loss-function during optimization so as to suppress |
| noise in images. If you have a batch of images, then you should calculate |
| the scalar loss-value as the sum: |
| `loss = tf.reduce_sum(tf.image.total_variation(images))` |
| |
| This implements the anisotropic 2-D version of the formula described here: |
| |
| https://en.wikipedia.org/wiki/Total_variation_denoising |
| |
| Args: |
| images: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor |
| of shape `[height, width, channels]`. |
| name: A name for the operation (optional). |
| |
| Raises: |
| ValueError: if images.shape is not a 3-D or 4-D vector. |
| |
| Returns: |
| The total variation of `images`. |
| |
| If `images` was 4-D, return a 1-D float Tensor of shape `[batch]` with the |
| total variation for each image in the batch. |
| If `images` was 3-D, return a scalar float with the total variation for |
| that image. |
| """ |
| |
| with ops.name_scope(name, 'total_variation'): |
| ndims = images.get_shape().ndims |
| |
| if ndims == 3: |
| # The input is a single image with shape [height, width, channels]. |
| |
| # Calculate the difference of neighboring pixel-values. |
| # The images are shifted one pixel along the height and width by slicing. |
| pixel_dif1 = images[1:, :, :] - images[:-1, :, :] |
| pixel_dif2 = images[:, 1:, :] - images[:, :-1, :] |
| |
| # Sum for all axis. (None is an alias for all axis.) |
| sum_axis = None |
| elif ndims == 4: |
| # The input is a batch of images with shape: |
| # [batch, height, width, channels]. |
| |
| # Calculate the difference of neighboring pixel-values. |
| # The images are shifted one pixel along the height and width by slicing. |
| pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :] |
| pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :] |
| |
| # Only sum for the last 3 axis. |
| # This results in a 1-D tensor with the total variation for each image. |
| sum_axis = [1, 2, 3] |
| else: |
| raise ValueError('\'images\' must be either 3 or 4-dimensional.') |
| |
| # Calculate the total variation by taking the absolute value of the |
| # pixel-differences and summing over the appropriate axis. |
| tot_var = ( |
| math_ops.reduce_sum(math_ops.abs(pixel_dif1), axis=sum_axis) + |
| math_ops.reduce_sum(math_ops.abs(pixel_dif2), axis=sum_axis)) |
| |
| return tot_var |
| |
| |
| @tf_export('image.sample_distorted_bounding_box', v1=[]) |
| @dispatch.add_dispatch_support |
| def sample_distorted_bounding_box_v2(image_size, |
| bounding_boxes, |
| seed=0, |
| min_object_covered=0.1, |
| aspect_ratio_range=None, |
| area_range=None, |
| max_attempts=None, |
| use_image_if_no_bounding_boxes=None, |
| name=None): |
| """Generate a single randomly distorted bounding box for an image. |
| |
| Bounding box annotations are often supplied in addition to ground-truth labels |
| in image recognition or object localization tasks. A common technique for |
| training such a system is to randomly distort an image while preserving |
| its content, i.e. *data augmentation*. This Op outputs a randomly distorted |
| localization of an object, i.e. bounding box, given an `image_size`, |
| `bounding_boxes` and a series of constraints. |
| |
| The output of this Op is a single bounding box that may be used to crop the |
| original image. The output is returned as 3 tensors: `begin`, `size` and |
| `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the |
| image. The latter may be supplied to `tf.image.draw_bounding_boxes` to |
| visualize what the bounding box looks like. |
| |
| Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. |
| The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width |
| and the height of the underlying image. |
| |
| For example, |
| |
| ```python |
| # Generate a single distorted bounding box. |
| begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( |
| tf.shape(image), |
| bounding_boxes=bounding_boxes, |
| min_object_covered=0.1) |
| |
| # Draw the bounding box in an image summary. |
| image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), |
| bbox_for_draw) |
| tf.compat.v1.summary.image('images_with_box', image_with_box) |
| |
| # Employ the bounding box to distort the image. |
| distorted_image = tf.slice(image, begin, size) |
| ``` |
| |
| Note that if no bounding box information is available, setting |
| `use_image_if_no_bounding_boxes = true` will assume there is a single implicit |
| bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is |
| false and no bounding boxes are supplied, an error is raised. |
| |
| Args: |
| image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, |
| `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. |
| bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` |
| describing the N bounding boxes associated with the image. |
| seed: An optional `int`. Defaults to `0`. If `seed` is set to non-zero, the |
| random number generator is seeded by the given `seed`. Otherwise, it is |
| seeded by a random seed. |
| min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The |
| cropped area of the image must contain at least this fraction of any |
| bounding box supplied. The value of this parameter should be non-negative. |
| In the case of 0, the cropped area does not need to overlap any of the |
| bounding boxes supplied. |
| aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, |
| 1.33]`. The cropped area of the image must have an aspect `ratio = width / |
| height` within this range. |
| area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The |
| cropped area of the image must contain a fraction of the supplied image |
| within this range. |
| max_attempts: An optional `int`. Defaults to `100`. Number of attempts at |
| generating a cropped region of the image of the specified constraints. |
| After `max_attempts` failures, return the entire image. |
| use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. |
| Controls behavior if no bounding boxes supplied. If true, assume an |
| implicit bounding box covering the whole input. If false, raise an error. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A tuple of `Tensor` objects (begin, size, bboxes). |
| |
| begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing |
| `[offset_height, offset_width, 0]`. Provide as input to |
| `tf.slice`. |
| size: A `Tensor`. Has the same type as `image_size`. 1-D, containing |
| `[target_height, target_width, -1]`. Provide as input to |
| `tf.slice`. |
| bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing |
| the distorted bounding box. |
| Provide as input to `tf.image.draw_bounding_boxes`. |
| """ |
| seed1, seed2 = random_seed.get_seed(seed) if seed else (0, 0) |
| with ops.name_scope(name, 'sample_distorted_bounding_box'): |
| return gen_image_ops.sample_distorted_bounding_box_v2( |
| image_size, |
| bounding_boxes, |
| seed=seed1, |
| seed2=seed2, |
| min_object_covered=min_object_covered, |
| aspect_ratio_range=aspect_ratio_range, |
| area_range=area_range, |
| max_attempts=max_attempts, |
| use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, |
| name=name) |
| |
| |
| @tf_export('image.stateless_sample_distorted_bounding_box', v1=[]) |
| @dispatch.add_dispatch_support |
| def stateless_sample_distorted_bounding_box(image_size, |
| bounding_boxes, |
| seed, |
| min_object_covered=0.1, |
| aspect_ratio_range=None, |
| area_range=None, |
| max_attempts=None, |
| use_image_if_no_bounding_boxes=None, |
| name=None): |
| """Generate a randomly distorted bounding box for an image deterministically. |
| |
| Bounding box annotations are often supplied in addition to ground-truth labels |
| in image recognition or object localization tasks. A common technique for |
| training such a system is to randomly distort an image while preserving |
| its content, i.e. *data augmentation*. This Op, given the same `seed`, |
| deterministically outputs a randomly distorted localization of an object, i.e. |
| bounding box, given an `image_size`, `bounding_boxes` and a series of |
| constraints. |
| |
| The output of this Op is a single bounding box that may be used to crop the |
| original image. The output is returned as 3 tensors: `begin`, `size` and |
| `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the |
| image. The latter may be supplied to `tf.image.draw_bounding_boxes` to |
| visualize what the bounding box looks like. |
| |
| Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. |
| The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width |
| and the height of the underlying image. |
| |
| The output of this Op is guaranteed to be the same given the same `seed` and |
| is independent of how many times the function is called, and independent of |
| global seed settings (e.g. `tf.random.set_seed`). |
| |
| Example usage: |
| |
| >>> image = np.array([[[1], [2], [3]], [[4], [5], [6]], [[7], [8], [9]]]) |
| >>> bbox = tf.constant( |
| ... [0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4]) |
| >>> seed = (1, 2) |
| >>> # Generate a single distorted bounding box. |
| >>> bbox_begin, bbox_size, bbox_draw = ( |
| ... tf.image.stateless_sample_distorted_bounding_box( |
| ... tf.shape(image), bounding_boxes=bbox, seed=seed)) |
| >>> # Employ the bounding box to distort the image. |
| >>> tf.slice(image, bbox_begin, bbox_size) |
| <tf.Tensor: shape=(2, 2, 1), dtype=int64, numpy= |
| array([[[1], |
| [2]], |
| [[4], |
| [5]]])> |
| >>> # Draw the bounding box in an image summary. |
| >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) |
| >>> tf.image.draw_bounding_boxes( |
| ... tf.expand_dims(tf.cast(image, tf.float32),0), bbox_draw, colors) |
| <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy= |
| array([[[[1.], |
| [1.], |
| [3.]], |
| [[1.], |
| [1.], |
| [6.]], |
| [[7.], |
| [8.], |
| [9.]]]], dtype=float32)> |
| |
| Note that if no bounding box information is available, setting |
| `use_image_if_no_bounding_boxes = true` will assume there is a single implicit |
| bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is |
| false and no bounding boxes are supplied, an error is raised. |
| |
| Args: |
| image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, |
| `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. |
| bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` |
| describing the N bounding boxes associated with the image. |
| seed: A shape [2] Tensor, the seed to the random number generator. Must have |
| dtype `int32` or `int64`. (When using XLA, only `int32` is allowed.) |
| min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The |
| cropped area of the image must contain at least this fraction of any |
| bounding box supplied. The value of this parameter should be non-negative. |
| In the case of 0, the cropped area does not need to overlap any of the |
| bounding boxes supplied. |
| aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, |
| 1.33]`. The cropped area of the image must have an aspect `ratio = width / |
| height` within this range. |
| area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The |
| cropped area of the image must contain a fraction of the supplied image |
| within this range. |
| max_attempts: An optional `int`. Defaults to `100`. Number of attempts at |
| generating a cropped region of the image of the specified constraints. |
| After `max_attempts` failures, return the entire image. |
| use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. |
| Controls behavior if no bounding boxes supplied. If true, assume an |
| implicit bounding box covering the whole input. If false, raise an error. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A tuple of `Tensor` objects (begin, size, bboxes). |
| |
| begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing |
| `[offset_height, offset_width, 0]`. Provide as input to |
| `tf.slice`. |
| size: A `Tensor`. Has the same type as `image_size`. 1-D, containing |
| `[target_height, target_width, -1]`. Provide as input to |
| `tf.slice`. |
| bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing |
| the distorted bounding box. |
| Provide as input to `tf.image.draw_bounding_boxes`. |
| """ |
| with ops.name_scope(name, 'stateless_sample_distorted_bounding_box'): |
| return gen_image_ops.stateless_sample_distorted_bounding_box( |
| image_size=image_size, |
| bounding_boxes=bounding_boxes, |
| seed=seed, |
| min_object_covered=min_object_covered, |
| aspect_ratio_range=aspect_ratio_range, |
| area_range=area_range, |
| max_attempts=max_attempts, |
| use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, |
| name=name) |
| |
| |
| @tf_export(v1=['image.sample_distorted_bounding_box']) |
| @dispatch.add_dispatch_support |
| @deprecation.deprecated( |
| date=None, |
| instructions='`seed2` arg is deprecated.' |
| 'Use sample_distorted_bounding_box_v2 instead.') |
| def sample_distorted_bounding_box(image_size, |
| bounding_boxes, |
| seed=None, |
| seed2=None, |
| min_object_covered=0.1, |
| aspect_ratio_range=None, |
| area_range=None, |
| max_attempts=None, |
| use_image_if_no_bounding_boxes=None, |
| name=None): |
| """Generate a single randomly distorted bounding box for an image. |
| |
| Bounding box annotations are often supplied in addition to ground-truth labels |
| in image recognition or object localization tasks. A common technique for |
| training such a system is to randomly distort an image while preserving |
| its content, i.e. *data augmentation*. This Op outputs a randomly distorted |
| localization of an object, i.e. bounding box, given an `image_size`, |
| `bounding_boxes` and a series of constraints. |
| |
| The output of this Op is a single bounding box that may be used to crop the |
| original image. The output is returned as 3 tensors: `begin`, `size` and |
| `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the |
| image. The latter may be supplied to `tf.image.draw_bounding_boxes` to |
| visualize what the bounding box looks like. |
| |
| Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. |
| The |
| bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and |
| height of the underlying image. |
| |
| For example, |
| |
| ```python |
| # Generate a single distorted bounding box. |
| begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box( |
| tf.shape(image), |
| bounding_boxes=bounding_boxes, |
| min_object_covered=0.1) |
| |
| # Draw the bounding box in an image summary. |
| image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), |
| bbox_for_draw) |
| tf.compat.v1.summary.image('images_with_box', image_with_box) |
| |
| # Employ the bounding box to distort the image. |
| distorted_image = tf.slice(image, begin, size) |
| ``` |
| |
| Note that if no bounding box information is available, setting |
| `use_image_if_no_bounding_boxes = True` will assume there is a single implicit |
| bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is |
| false and no bounding boxes are supplied, an error is raised. |
| |
| Args: |
| image_size: A `Tensor`. Must be one of the following types: `uint8`, `int8`, |
| `int16`, `int32`, `int64`. 1-D, containing `[height, width, channels]`. |
| bounding_boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, N, 4]` |
| describing the N bounding boxes associated with the image. |
| seed: An optional `int`. Defaults to `0`. If either `seed` or `seed2` are |
| set to non-zero, the random number generator is seeded by the given |
| `seed`. Otherwise, it is seeded by a random seed. |
| seed2: An optional `int`. Defaults to `0`. A second seed to avoid seed |
| collision. |
| min_object_covered: A Tensor of type `float32`. Defaults to `0.1`. The |
| cropped area of the image must contain at least this fraction of any |
| bounding box supplied. The value of this parameter should be non-negative. |
| In the case of 0, the cropped area does not need to overlap any of the |
| bounding boxes supplied. |
| aspect_ratio_range: An optional list of `floats`. Defaults to `[0.75, |
| 1.33]`. The cropped area of the image must have an aspect ratio = width / |
| height within this range. |
| area_range: An optional list of `floats`. Defaults to `[0.05, 1]`. The |
| cropped area of the image must contain a fraction of the supplied image |
| within this range. |
| max_attempts: An optional `int`. Defaults to `100`. Number of attempts at |
| generating a cropped region of the image of the specified constraints. |
| After `max_attempts` failures, return the entire image. |
| use_image_if_no_bounding_boxes: An optional `bool`. Defaults to `False`. |
| Controls behavior if no bounding boxes supplied. If true, assume an |
| implicit bounding box covering the whole input. If false, raise an error. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A tuple of `Tensor` objects (begin, size, bboxes). |
| |
| begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing |
| `[offset_height, offset_width, 0]`. Provide as input to |
| `tf.slice`. |
| size: A `Tensor`. Has the same type as `image_size`. 1-D, containing |
| `[target_height, target_width, -1]`. Provide as input to |
| `tf.slice`. |
| bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing |
| the distorted bounding box. |
| Provide as input to `tf.image.draw_bounding_boxes`. |
| """ |
| with ops.name_scope(name, 'sample_distorted_bounding_box'): |
| return gen_image_ops.sample_distorted_bounding_box_v2( |
| image_size, |
| bounding_boxes, |
| seed=seed, |
| seed2=seed2, |
| min_object_covered=min_object_covered, |
| aspect_ratio_range=aspect_ratio_range, |
| area_range=area_range, |
| max_attempts=max_attempts, |
| use_image_if_no_bounding_boxes=use_image_if_no_bounding_boxes, |
| name=name) |
| |
| |
| @tf_export('image.non_max_suppression') |
| @dispatch.add_dispatch_support |
| def non_max_suppression(boxes, |
| scores, |
| max_output_size, |
| iou_threshold=0.5, |
| score_threshold=float('-inf'), |
| name=None): |
| """Greedily selects a subset of bounding boxes in descending order of score. |
| |
| Prunes away boxes that have high intersection-over-union (IOU) overlap |
| with previously selected boxes. Bounding boxes are supplied as |
| `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any |
| diagonal pair of box corners and the coordinates can be provided as normalized |
| (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm |
| is agnostic to where the origin is in the coordinate system. Note that this |
| algorithm is invariant to orthogonal transformations and translations |
| of the coordinate system; thus translating or reflections of the coordinate |
| system result in the same boxes being selected by the algorithm. |
| The output of this operation is a set of integers indexing into the input |
| collection of bounding boxes representing the selected boxes. The bounding |
| box coordinates corresponding to the selected indices can then be obtained |
| using the `tf.gather` operation. For example: |
| ```python |
| selected_indices = tf.image.non_max_suppression( |
| boxes, scores, max_output_size, iou_threshold) |
| selected_boxes = tf.gather(boxes, selected_indices) |
| ``` |
| |
| Args: |
| boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. |
| scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single |
| score corresponding to each box (each row of boxes). |
| max_output_size: A scalar integer `Tensor` representing the maximum number |
| of boxes to be selected by non-max suppression. |
| iou_threshold: A float representing the threshold for deciding whether boxes |
| overlap too much with respect to IOU. |
| score_threshold: A float representing the threshold for deciding when to |
| remove boxes based on score. |
| name: A name for the operation (optional). |
| |
| Returns: |
| selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the |
| selected indices from the boxes tensor, where `M <= max_output_size`. |
| """ |
| with ops.name_scope(name, 'non_max_suppression'): |
| iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') |
| score_threshold = ops.convert_to_tensor( |
| score_threshold, name='score_threshold') |
| return gen_image_ops.non_max_suppression_v3(boxes, scores, max_output_size, |
| iou_threshold, score_threshold) |
| |
| |
| @tf_export('image.non_max_suppression_with_scores') |
| @dispatch.add_dispatch_support |
| def non_max_suppression_with_scores(boxes, |
| scores, |
| max_output_size, |
| iou_threshold=0.5, |
| score_threshold=float('-inf'), |
| soft_nms_sigma=0.0, |
| name=None): |
| """Greedily selects a subset of bounding boxes in descending order of score. |
| |
| Prunes away boxes that have high intersection-over-union (IOU) overlap |
| with previously selected boxes. Bounding boxes are supplied as |
| `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any |
| diagonal pair of box corners and the coordinates can be provided as normalized |
| (i.e., lying in the interval `[0, 1]`) or absolute. Note that this algorithm |
| is agnostic to where the origin is in the coordinate system. Note that this |
| algorithm is invariant to orthogonal transformations and translations |
| of the coordinate system; thus translating or reflections of the coordinate |
| system result in the same boxes being selected by the algorithm. |
| The output of this operation is a set of integers indexing into the input |
| collection of bounding boxes representing the selected boxes. The bounding |
| box coordinates corresponding to the selected indices can then be obtained |
| using the `tf.gather` operation. For example: |
| ```python |
| selected_indices, selected_scores = tf.image.non_max_suppression_padded( |
| boxes, scores, max_output_size, iou_threshold=1.0, score_threshold=0.1, |
| soft_nms_sigma=0.5) |
| selected_boxes = tf.gather(boxes, selected_indices) |
| ``` |
| |
| This function generalizes the `tf.image.non_max_suppression` op by also |
| supporting a Soft-NMS (with Gaussian weighting) mode (c.f. |
| Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score |
| of other overlapping boxes instead of directly causing them to be pruned. |
| Consequently, in contrast to `tf.image.non_max_suppression`, |
| `tf.image.non_max_suppression_padded` returns the new scores of each input box |
| in the second output, `selected_scores`. |
| |
| To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be |
| larger than 0. When `soft_nms_sigma` equals 0, the behavior of |
| `tf.image.non_max_suppression_padded` is identical to that of |
| `tf.image.non_max_suppression` (except for the extra output) both in function |
| and in running time. |
| |
| Args: |
| boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. |
| scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single |
| score corresponding to each box (each row of boxes). |
| max_output_size: A scalar integer `Tensor` representing the maximum number |
| of boxes to be selected by non-max suppression. |
| iou_threshold: A float representing the threshold for deciding whether boxes |
| overlap too much with respect to IOU. |
| score_threshold: A float representing the threshold for deciding when to |
| remove boxes based on score. |
| soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter; |
| See Bodla et al, https://arxiv.org/abs/1704.04503). When |
| `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard) |
| NMS. |
| name: A name for the operation (optional). |
| |
| Returns: |
| selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the |
| selected indices from the boxes tensor, where `M <= max_output_size`. |
| selected_scores: A 1-D float tensor of shape `[M]` representing the |
| corresponding scores for each selected box, where `M <= max_output_size`. |
| Scores only differ from corresponding input scores when using Soft NMS |
| (i.e. when `soft_nms_sigma>0`) |
| """ |
| with ops.name_scope(name, 'non_max_suppression_with_scores'): |
| iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') |
| score_threshold = ops.convert_to_tensor( |
| score_threshold, name='score_threshold') |
| soft_nms_sigma = ops.convert_to_tensor( |
| soft_nms_sigma, name='soft_nms_sigma') |
| (selected_indices, selected_scores, |
| _) = gen_image_ops.non_max_suppression_v5( |
| boxes, |
| scores, |
| max_output_size, |
| iou_threshold, |
| score_threshold, |
| soft_nms_sigma, |
| pad_to_max_output_size=False) |
| return selected_indices, selected_scores |
| |
| |
| @tf_export('image.non_max_suppression_overlaps') |
| @dispatch.add_dispatch_support |
| def non_max_suppression_with_overlaps(overlaps, |
| scores, |
| max_output_size, |
| overlap_threshold=0.5, |
| score_threshold=float('-inf'), |
| name=None): |
| """Greedily selects a subset of bounding boxes in descending order of score. |
| |
| Prunes away boxes that have high overlap with previously selected boxes. |
| N-by-n overlap values are supplied as square matrix. |
| The output of this operation is a set of integers indexing into the input |
| collection of bounding boxes representing the selected boxes. The bounding |
| box coordinates corresponding to the selected indices can then be obtained |
| using the `tf.gather` operation. For example: |
| ```python |
| selected_indices = tf.image.non_max_suppression_overlaps( |
| overlaps, scores, max_output_size, iou_threshold) |
| selected_boxes = tf.gather(boxes, selected_indices) |
| ``` |
| |
| Args: |
| overlaps: A 2-D float `Tensor` of shape `[num_boxes, num_boxes]`. |
| scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single |
| score corresponding to each box (each row of boxes). |
| max_output_size: A scalar integer `Tensor` representing the maximum number |
| of boxes to be selected by non-max suppression. |
| overlap_threshold: A float representing the threshold for deciding whether |
| boxes overlap too much with respect to the provided overlap values. |
| score_threshold: A float representing the threshold for deciding when to |
| remove boxes based on score. |
| name: A name for the operation (optional). |
| |
| Returns: |
| selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the |
| selected indices from the overlaps tensor, where `M <= max_output_size`. |
| """ |
| with ops.name_scope(name, 'non_max_suppression_overlaps'): |
| overlap_threshold = ops.convert_to_tensor( |
| overlap_threshold, name='overlap_threshold') |
| # pylint: disable=protected-access |
| return gen_image_ops.non_max_suppression_with_overlaps( |
| overlaps, scores, max_output_size, overlap_threshold, score_threshold) |
| # pylint: enable=protected-access |
| |
| |
| _rgb_to_yiq_kernel = [[0.299, 0.59590059, 0.2115], |
| [0.587, -0.27455667, -0.52273617], |
| [0.114, -0.32134392, 0.31119955]] |
| |
| |
| @tf_export('image.rgb_to_yiq') |
| @dispatch.add_dispatch_support |
| def rgb_to_yiq(images): |
| """Converts one or more images from RGB to YIQ. |
| |
| Outputs a tensor of the same shape as the `images` tensor, containing the YIQ |
| value of the pixels. |
| The output is only well defined if the value in images are in [0,1]. |
| |
| Usage Example: |
| |
| >>> x = tf.constant([[[1.0, 2.0, 3.0]]]) |
| >>> tf.image.rgb_to_yiq(x) |
| <tf.Tensor: shape=(1, 1, 3), dtype=float32, |
| numpy=array([[[ 1.815 , -0.91724455, 0.09962624]]], dtype=float32)> |
| |
| Args: |
| images: 2-D or higher rank. Image data to convert. Last dimension must be |
| size 3. |
| |
| Returns: |
| images: tensor with the same shape as `images`. |
| """ |
| images = ops.convert_to_tensor(images, name='images') |
| kernel = ops.convert_to_tensor( |
| _rgb_to_yiq_kernel, dtype=images.dtype, name='kernel') |
| ndims = images.get_shape().ndims |
| return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) |
| |
| |
| _yiq_to_rgb_kernel = [[1, 1, 1], [0.95598634, -0.27201283, -1.10674021], |
| [0.6208248, -0.64720424, 1.70423049]] |
| |
| |
| @tf_export('image.yiq_to_rgb') |
| @dispatch.add_dispatch_support |
| def yiq_to_rgb(images): |
| """Converts one or more images from YIQ to RGB. |
| |
| Outputs a tensor of the same shape as the `images` tensor, containing the RGB |
| value of the pixels. |
| The output is only well defined if the Y value in images are in [0,1], |
| I value are in [-0.5957,0.5957] and Q value are in [-0.5226,0.5226]. |
| |
| Args: |
| images: 2-D or higher rank. Image data to convert. Last dimension must be |
| size 3. |
| |
| Returns: |
| images: tensor with the same shape as `images`. |
| """ |
| images = ops.convert_to_tensor(images, name='images') |
| kernel = ops.convert_to_tensor( |
| _yiq_to_rgb_kernel, dtype=images.dtype, name='kernel') |
| ndims = images.get_shape().ndims |
| return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) |
| |
| |
| _rgb_to_yuv_kernel = [[0.299, -0.14714119, 0.61497538], |
| [0.587, -0.28886916, -0.51496512], |
| [0.114, 0.43601035, -0.10001026]] |
| |
| |
| @tf_export('image.rgb_to_yuv') |
| @dispatch.add_dispatch_support |
| def rgb_to_yuv(images): |
| """Converts one or more images from RGB to YUV. |
| |
| Outputs a tensor of the same shape as the `images` tensor, containing the YUV |
| value of the pixels. |
| The output is only well defined if the value in images are in [0, 1]. |
| There are two ways of representing an image: [0, 255] pixel values range or |
| [0, 1] (as float) pixel values range. Users need to convert the input image |
| into a float [0, 1] range. |
| |
| Args: |
| images: 2-D or higher rank. Image data to convert. Last dimension must be |
| size 3. |
| |
| Returns: |
| images: tensor with the same shape as `images`. |
| """ |
| images = ops.convert_to_tensor(images, name='images') |
| kernel = ops.convert_to_tensor( |
| _rgb_to_yuv_kernel, dtype=images.dtype, name='kernel') |
| ndims = images.get_shape().ndims |
| return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) |
| |
| |
| _yuv_to_rgb_kernel = [[1, 1, 1], [0, -0.394642334, 2.03206185], |
| [1.13988303, -0.58062185, 0]] |
| |
| |
| @tf_export('image.yuv_to_rgb') |
| @dispatch.add_dispatch_support |
| def yuv_to_rgb(images): |
| """Converts one or more images from YUV to RGB. |
| |
| Outputs a tensor of the same shape as the `images` tensor, containing the RGB |
| value of the pixels. |
| The output is only well defined if the Y value in images are in [0,1], |
| U and V value are in [-0.5,0.5]. |
| |
| As per the above description, you need to scale your YUV images if their |
| pixel values are not in the required range. Below given example illustrates |
| preprocessing of each channel of images before feeding them to `yuv_to_rgb`. |
| |
| ```python |
| yuv_images = tf.random.uniform(shape=[100, 64, 64, 3], maxval=255) |
| last_dimension_axis = len(yuv_images.shape) - 1 |
| yuv_tensor_images = tf.truediv( |
| tf.subtract( |
| yuv_images, |
| tf.reduce_min(yuv_images) |
| ), |
| tf.subtract( |
| tf.reduce_max(yuv_images), |
| tf.reduce_min(yuv_images) |
| ) |
| ) |
| y, u, v = tf.split(yuv_tensor_images, 3, axis=last_dimension_axis) |
| target_uv_min, target_uv_max = -0.5, 0.5 |
| u = u * (target_uv_max - target_uv_min) + target_uv_min |
| v = v * (target_uv_max - target_uv_min) + target_uv_min |
| preprocessed_yuv_images = tf.concat([y, u, v], axis=last_dimension_axis) |
| rgb_tensor_images = tf.image.yuv_to_rgb(preprocessed_yuv_images) |
| ``` |
| |
| Args: |
| images: 2-D or higher rank. Image data to convert. Last dimension must be |
| size 3. |
| |
| Returns: |
| images: tensor with the same shape as `images`. |
| """ |
| images = ops.convert_to_tensor(images, name='images') |
| kernel = ops.convert_to_tensor( |
| _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel') |
| ndims = images.get_shape().ndims |
| return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) |
| |
| |
| def _verify_compatible_image_shapes(img1, img2): |
| """Checks if two image tensors are compatible for applying SSIM or PSNR. |
| |
| This function checks if two sets of images have ranks at least 3, and if the |
| last three dimensions match. |
| |
| Args: |
| img1: Tensor containing the first image batch. |
| img2: Tensor containing the second image batch. |
| |
| Returns: |
| A tuple containing: the first tensor shape, the second tensor shape, and a |
| list of control_flow_ops.Assert() ops implementing the checks. |
| |
| Raises: |
| ValueError: When static shape check fails. |
| """ |
| shape1 = img1.get_shape().with_rank_at_least(3) |
| shape2 = img2.get_shape().with_rank_at_least(3) |
| shape1[-3:].assert_is_compatible_with(shape2[-3:]) |
| |
| if shape1.ndims is not None and shape2.ndims is not None: |
| for dim1, dim2 in zip( |
| reversed(shape1.dims[:-3]), reversed(shape2.dims[:-3])): |
| if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): |
| raise ValueError('Two images are not compatible: %s and %s' % |
| (shape1, shape2)) |
| |
| # Now assign shape tensors. |
| shape1, shape2 = array_ops.shape_n([img1, img2]) |
| |
| # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. |
| checks = [] |
| checks.append( |
| control_flow_ops.Assert( |
| math_ops.greater_equal(array_ops.size(shape1), 3), [shape1, shape2], |
| summarize=10)) |
| checks.append( |
| control_flow_ops.Assert( |
| math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])), |
| [shape1, shape2], |
| summarize=10)) |
| return shape1, shape2, checks |
| |
| |
| @tf_export('image.psnr') |
| @dispatch.add_dispatch_support |
| def psnr(a, b, max_val, name=None): |
| """Returns the Peak Signal-to-Noise Ratio between a and b. |
| |
| This is intended to be used on signals (or images). Produces a PSNR value for |
| each image in batch. |
| |
| The last three dimensions of input are expected to be [height, width, depth]. |
| |
| Example: |
| |
| ```python |
| # Read images from file. |
| im1 = tf.decode_png('path/to/im1.png') |
| im2 = tf.decode_png('path/to/im2.png') |
| # Compute PSNR over tf.uint8 Tensors. |
| psnr1 = tf.image.psnr(im1, im2, max_val=255) |
| |
| # Compute PSNR over tf.float32 Tensors. |
| im1 = tf.image.convert_image_dtype(im1, tf.float32) |
| im2 = tf.image.convert_image_dtype(im2, tf.float32) |
| psnr2 = tf.image.psnr(im1, im2, max_val=1.0) |
| # psnr1 and psnr2 both have type tf.float32 and are almost equal. |
| ``` |
| |
| Arguments: |
| a: First set of images. |
| b: Second set of images. |
| max_val: The dynamic range of the images (i.e., the difference between the |
| maximum the and minimum allowed values). |
| name: Namespace to embed the computation in. |
| |
| Returns: |
| The scalar PSNR between a and b. The returned tensor has type `tf.float32` |
| and shape [batch_size, 1]. |
| """ |
| with ops.name_scope(name, 'PSNR', [a, b]): |
| # Need to convert the images to float32. Scale max_val accordingly so that |
| # PSNR is computed correctly. |
| max_val = math_ops.cast(max_val, a.dtype) |
| max_val = convert_image_dtype(max_val, dtypes.float32) |
| a = convert_image_dtype(a, dtypes.float32) |
| b = convert_image_dtype(b, dtypes.float32) |
| mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1]) |
| psnr_val = math_ops.subtract( |
| 20 * math_ops.log(max_val) / math_ops.log(10.0), |
| np.float32(10 / np.log(10)) * math_ops.log(mse), |
| name='psnr') |
| |
| _, _, checks = _verify_compatible_image_shapes(a, b) |
| with ops.control_dependencies(checks): |
| return array_ops.identity(psnr_val) |
| |
| |
| def _ssim_helper(x, y, reducer, max_val, compensation=1.0, k1=0.01, k2=0.03): |
| r"""Helper function for computing SSIM. |
| |
| SSIM estimates covariances with weighted sums. The default parameters |
| use a biased estimate of the covariance: |
| Suppose `reducer` is a weighted sum, then the mean estimators are |
| \mu_x = \sum_i w_i x_i, |
| \mu_y = \sum_i w_i y_i, |
| where w_i's are the weighted-sum weights, and covariance estimator is |
| cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) |
| with assumption \sum_i w_i = 1. This covariance estimator is biased, since |
| E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y). |
| For SSIM measure with unbiased covariance estimators, pass as `compensation` |
| argument (1 - \sum_i w_i ^ 2). |
| |
| Arguments: |
| x: First set of images. |
| y: Second set of images. |
| reducer: Function that computes 'local' averages from the set of images. For |
| non-convolutional version, this is usually tf.reduce_mean(x, [1, 2]), and |
| for convolutional version, this is usually tf.nn.avg_pool2d or |
| tf.nn.conv2d with weighted-sum kernel. |
| max_val: The dynamic range (i.e., the difference between the maximum |
| possible allowed value and the minimum allowed value). |
| compensation: Compensation factor. See above. |
| k1: Default value 0.01 |
| k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so |
| it would be better if we took the values in the range of 0 < K2 < 0.4). |
| |
| Returns: |
| A pair containing the luminance measure, and the contrast-structure measure. |
| """ |
| |
| c1 = (k1 * max_val)**2 |
| c2 = (k2 * max_val)**2 |
| |
| # SSIM luminance measure is |
| # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1). |
| mean0 = reducer(x) |
| mean1 = reducer(y) |
| num0 = mean0 * mean1 * 2.0 |
| den0 = math_ops.square(mean0) + math_ops.square(mean1) |
| luminance = (num0 + c1) / (den0 + c1) |
| |
| # SSIM contrast-structure measure is |
| # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2). |
| # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then |
| # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) |
| # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j). |
| num1 = reducer(x * y) * 2.0 |
| den1 = reducer(math_ops.square(x) + math_ops.square(y)) |
| c2 *= compensation |
| cs = (num1 - num0 + c2) / (den1 - den0 + c2) |
| |
| # SSIM score is the product of the luminance and contrast-structure measures. |
| return luminance, cs |
| |
| |
| def _fspecial_gauss(size, sigma): |
| """Function to mimic the 'fspecial' gaussian MATLAB function.""" |
| size = ops.convert_to_tensor(size, dtypes.int32) |
| sigma = ops.convert_to_tensor(sigma) |
| |
| coords = math_ops.cast(math_ops.range(size), sigma.dtype) |
| coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0 |
| |
| g = math_ops.square(coords) |
| g *= -0.5 / math_ops.square(sigma) |
| |
| g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1]) |
| g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax(). |
| g = nn_ops.softmax(g) |
| return array_ops.reshape(g, shape=[size, size, 1, 1]) |
| |
| |
| def _ssim_per_channel(img1, |
| img2, |
| max_val=1.0, |
| filter_size=11, |
| filter_sigma=1.5, |
| k1=0.01, |
| k2=0.03): |
| """Computes SSIM index between img1 and img2 per color channel. |
| |
| This function matches the standard SSIM implementation from: |
| Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image |
| quality assessment: from error visibility to structural similarity. IEEE |
| transactions on image processing. |
| |
| Details: |
| - 11x11 Gaussian filter of width 1.5 is used. |
| - k1 = 0.01, k2 = 0.03 as in the original paper. |
| |
| Args: |
| img1: First image batch. |
| img2: Second image batch. |
| max_val: The dynamic range of the images (i.e., the difference between the |
| maximum the and minimum allowed values). |
| filter_size: Default value 11 (size of gaussian filter). |
| filter_sigma: Default value 1.5 (width of gaussian filter). |
| k1: Default value 0.01 |
| k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so |
| it would be better if we took the values in the range of 0 < K2 < 0.4). |
| |
| Returns: |
| A pair of tensors containing and channel-wise SSIM and contrast-structure |
| values. The shape is [..., channels]. |
| """ |
| filter_size = constant_op.constant(filter_size, dtype=dtypes.int32) |
| filter_sigma = constant_op.constant(filter_sigma, dtype=img1.dtype) |
| |
| shape1, shape2 = array_ops.shape_n([img1, img2]) |
| checks = [ |
| control_flow_ops.Assert( |
| math_ops.reduce_all( |
| math_ops.greater_equal(shape1[-3:-1], filter_size)), |
| [shape1, filter_size], |
| summarize=8), |
| control_flow_ops.Assert( |
| math_ops.reduce_all( |
| math_ops.greater_equal(shape2[-3:-1], filter_size)), |
| [shape2, filter_size], |
| summarize=8) |
| ] |
| |
| # Enforce the check to run before computation. |
| with ops.control_dependencies(checks): |
| img1 = array_ops.identity(img1) |
| |
| # TODO(sjhwang): Try to cache kernels and compensation factor. |
| kernel = _fspecial_gauss(filter_size, filter_sigma) |
| kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) |
| |
| # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, |
| # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. |
| compensation = 1.0 |
| |
| # TODO(sjhwang): Try FFT. |
| # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying |
| # 1-by-n and n-by-1 Gaussian filters instead of an n-by-n filter. |
| def reducer(x): |
| shape = array_ops.shape(x) |
| x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) |
| y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') |
| return array_ops.reshape( |
| y, array_ops.concat([shape[:-3], array_ops.shape(y)[1:]], 0)) |
| |
| luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation, k1, |
| k2) |
| |
| # Average over the second and the third from the last: height, width. |
| axes = constant_op.constant([-3, -2], dtype=dtypes.int32) |
| ssim_val = math_ops.reduce_mean(luminance * cs, axes) |
| cs = math_ops.reduce_mean(cs, axes) |
| return ssim_val, cs |
| |
| |
| @tf_export('image.ssim') |
| @dispatch.add_dispatch_support |
| def ssim(img1, |
| img2, |
| max_val, |
| filter_size=11, |
| filter_sigma=1.5, |
| k1=0.01, |
| k2=0.03): |
| """Computes SSIM index between img1 and img2. |
| |
| This function is based on the standard SSIM implementation from: |
| Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image |
| quality assessment: from error visibility to structural similarity. IEEE |
| transactions on image processing. |
| |
| Note: The true SSIM is only defined on grayscale. This function does not |
| perform any colorspace transform. (If the input is already YUV, then it will |
| compute YUV SSIM average.) |
| |
| Details: |
| - 11x11 Gaussian filter of width 1.5 is used. |
| - k1 = 0.01, k2 = 0.03 as in the original paper. |
| |
| The image sizes must be at least 11x11 because of the filter size. |
| |
| Example: |
| |
| ```python |
| # Read images from file. |
| im1 = tf.decode_png('path/to/im1.png') |
| im2 = tf.decode_png('path/to/im2.png') |
| # Compute SSIM over tf.uint8 Tensors. |
| ssim1 = tf.image.ssim(im1, im2, max_val=255, filter_size=11, |
| filter_sigma=1.5, k1=0.01, k2=0.03) |
| |
| # Compute SSIM over tf.float32 Tensors. |
| im1 = tf.image.convert_image_dtype(im1, tf.float32) |
| im2 = tf.image.convert_image_dtype(im2, tf.float32) |
| ssim2 = tf.image.ssim(im1, im2, max_val=1.0, filter_size=11, |
| filter_sigma=1.5, k1=0.01, k2=0.03) |
| # ssim1 and ssim2 both have type tf.float32 and are almost equal. |
| ``` |
| |
| Args: |
| img1: First image batch. |
| img2: Second image batch. |
| max_val: The dynamic range of the images (i.e., the difference between the |
| maximum the and minimum allowed values). |
| filter_size: Default value 11 (size of gaussian filter). |
| filter_sigma: Default value 1.5 (width of gaussian filter). |
| k1: Default value 0.01 |
| k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so |
| it would be better if we took the values in the range of 0 < K2 < 0.4). |
| |
| Returns: |
| A tensor containing an SSIM value for each image in batch. Returned SSIM |
| values are in range (-1, 1], when pixel values are non-negative. Returns |
| a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]). |
| """ |
| with ops.name_scope(None, 'SSIM', [img1, img2]): |
| # Convert to tensor if needed. |
| img1 = ops.convert_to_tensor(img1, name='img1') |
| img2 = ops.convert_to_tensor(img2, name='img2') |
| # Shape checking. |
| _, _, checks = _verify_compatible_image_shapes(img1, img2) |
| with ops.control_dependencies(checks): |
| img1 = array_ops.identity(img1) |
| |
| # Need to convert the images to float32. Scale max_val accordingly so that |
| # SSIM is computed correctly. |
| max_val = math_ops.cast(max_val, img1.dtype) |
| max_val = convert_image_dtype(max_val, dtypes.float32) |
| img1 = convert_image_dtype(img1, dtypes.float32) |
| img2 = convert_image_dtype(img2, dtypes.float32) |
| ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val, filter_size, |
| filter_sigma, k1, k2) |
| # Compute average over color channels. |
| return math_ops.reduce_mean(ssim_per_channel, [-1]) |
| |
| |
| # Default values obtained by Wang et al. |
| _MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333) |
| |
| |
| @tf_export('image.ssim_multiscale') |
| @dispatch.add_dispatch_support |
| def ssim_multiscale(img1, |
| img2, |
| max_val, |
| power_factors=_MSSSIM_WEIGHTS, |
| filter_size=11, |
| filter_sigma=1.5, |
| k1=0.01, |
| k2=0.03): |
| """Computes the MS-SSIM between img1 and img2. |
| |
| This function assumes that `img1` and `img2` are image batches, i.e. the last |
| three dimensions are [height, width, channels]. |
| |
| Note: The true SSIM is only defined on grayscale. This function does not |
| perform any colorspace transform. (If the input is already YUV, then it will |
| compute YUV SSIM average.) |
| |
| Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale |
| structural similarity for image quality assessment." Signals, Systems and |
| Computers, 2004. |
| |
| Arguments: |
| img1: First image batch. |
| img2: Second image batch. Must have the same rank as img1. |
| max_val: The dynamic range of the images (i.e., the difference between the |
| maximum the and minimum allowed values). |
| power_factors: Iterable of weights for each of the scales. The number of |
| scales used is the length of the list. Index 0 is the unscaled |
| resolution's weight and each increasing scale corresponds to the image |
| being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363, |
| 0.1333), which are the values obtained in the original paper. |
| filter_size: Default value 11 (size of gaussian filter). |
| filter_sigma: Default value 1.5 (width of gaussian filter). |
| k1: Default value 0.01 |
| k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so |
| it would be better if we took the values in the range of 0 < K2 < 0.4). |
| |
| Returns: |
| A tensor containing an MS-SSIM value for each image in batch. The values |
| are in range [0, 1]. Returns a tensor with shape: |
| broadcast(img1.shape[:-3], img2.shape[:-3]). |
| """ |
| with ops.name_scope(None, 'MS-SSIM', [img1, img2]): |
| # Convert to tensor if needed. |
| img1 = ops.convert_to_tensor(img1, name='img1') |
| img2 = ops.convert_to_tensor(img2, name='img2') |
| # Shape checking. |
| shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2) |
| with ops.control_dependencies(checks): |
| img1 = array_ops.identity(img1) |
| |
| # Need to convert the images to float32. Scale max_val accordingly so that |
| # SSIM is computed correctly. |
| max_val = math_ops.cast(max_val, img1.dtype) |
| max_val = convert_image_dtype(max_val, dtypes.float32) |
| img1 = convert_image_dtype(img1, dtypes.float32) |
| img2 = convert_image_dtype(img2, dtypes.float32) |
| |
| imgs = [img1, img2] |
| shapes = [shape1, shape2] |
| |
| # img1 and img2 are assumed to be a (multi-dimensional) batch of |
| # 3-dimensional images (height, width, channels). `heads` contain the batch |
| # dimensions, and `tails` contain the image dimensions. |
| heads = [s[:-3] for s in shapes] |
| tails = [s[-3:] for s in shapes] |
| |
| divisor = [1, 2, 2, 1] |
| divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32) |
| |
| def do_pad(images, remainder): |
| padding = array_ops.expand_dims(remainder, -1) |
| padding = array_ops.pad(padding, [[1, 0], [1, 0]]) |
| return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images] |
| |
| mcs = [] |
| for k in range(len(power_factors)): |
| with ops.name_scope(None, 'Scale%d' % k, imgs): |
| if k > 0: |
| # Avg pool takes rank 4 tensors. Flatten leading dimensions. |
| flat_imgs = [ |
| array_ops.reshape(x, array_ops.concat([[-1], t], 0)) |
| for x, t in zip(imgs, tails) |
| ] |
| |
| remainder = tails[0] % divisor_tensor |
| need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0)) |
| # pylint: disable=cell-var-from-loop |
| padded = control_flow_ops.cond(need_padding, |
| lambda: do_pad(flat_imgs, remainder), |
| lambda: flat_imgs) |
| # pylint: enable=cell-var-from-loop |
| |
| downscaled = [ |
| nn_ops.avg_pool( |
| x, ksize=divisor, strides=divisor, padding='VALID') |
| for x in padded |
| ] |
| tails = [x[1:] for x in array_ops.shape_n(downscaled)] |
| imgs = [ |
| array_ops.reshape(x, array_ops.concat([h, t], 0)) |
| for x, h, t in zip(downscaled, heads, tails) |
| ] |
| |
| # Overwrite previous ssim value since we only need the last one. |
| ssim_per_channel, cs = _ssim_per_channel( |
| *imgs, |
| max_val=max_val, |
| filter_size=filter_size, |
| filter_sigma=filter_sigma, |
| k1=k1, |
| k2=k2) |
| mcs.append(nn_ops.relu(cs)) |
| |
| # Remove the cs score for the last scale. In the MS-SSIM calculation, |
| # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p). |
| mcs.pop() # Remove the cs score for the last scale. |
| mcs_and_ssim = array_ops.stack( |
| mcs + [nn_ops.relu(ssim_per_channel)], axis=-1) |
| # Take weighted geometric mean across the scale axis. |
| ms_ssim = math_ops.reduce_prod( |
| math_ops.pow(mcs_and_ssim, power_factors), [-1]) |
| |
| return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels. |
| |
| |
| @tf_export('image.image_gradients') |
| @dispatch.add_dispatch_support |
| def image_gradients(image): |
| """Returns image gradients (dy, dx) for each color channel. |
| |
| Both output tensors have the same shape as the input: [batch_size, h, w, |
| d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in |
| location (x, y). That means that dy will always have zeros in the last row, |
| and dx will always have zeros in the last column. |
| |
| Usage Example: |
| ```python |
| BATCH_SIZE = 1 |
| IMAGE_HEIGHT = 5 |
| IMAGE_WIDTH = 5 |
| CHANNELS = 1 |
| image = tf.reshape(tf.range(IMAGE_HEIGHT * IMAGE_WIDTH * CHANNELS, |
| delta=1, dtype=tf.float32), |
| shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS)) |
| dx, dy = tf.image.image_gradients(image) |
| print(image[0, :,:,0]) |
| tf.Tensor( |
| [[ 0. 1. 2. 3. 4.] |
| [ 5. 6. 7. 8. 9.] |
| [10. 11. 12. 13. 14.] |
| [15. 16. 17. 18. 19.] |
| [20. 21. 22. 23. 24.]], shape=(5, 5), dtype=float32) |
| print(dx[0, :,:,0]) |
| tf.Tensor( |
| [[5. 5. 5. 5. 5.] |
| [5. 5. 5. 5. 5.] |
| [5. 5. 5. 5. 5.] |
| [5. 5. 5. 5. 5.] |
| [0. 0. 0. 0. 0.]], shape=(5, 5), dtype=float32) |
| print(dy[0, :,:,0]) |
| tf.Tensor( |
| [[1. 1. 1. 1. 0.] |
| [1. 1. 1. 1. 0.] |
| [1. 1. 1. 1. 0.] |
| [1. 1. 1. 1. 0.] |
| [1. 1. 1. 1. 0.]], shape=(5, 5), dtype=float32) |
| ``` |
| |
| Arguments: |
| image: Tensor with shape [batch_size, h, w, d]. |
| |
| Returns: |
| Pair of tensors (dy, dx) holding the vertical and horizontal image |
| gradients (1-step finite difference). |
| |
| Raises: |
| ValueError: If `image` is not a 4D tensor. |
| """ |
| if image.get_shape().ndims != 4: |
| raise ValueError('image_gradients expects a 4D tensor ' |
| '[batch_size, h, w, d], not {}.'.format(image.get_shape())) |
| image_shape = array_ops.shape(image) |
| batch_size, height, width, depth = array_ops.unstack(image_shape) |
| dy = image[:, 1:, :, :] - image[:, :-1, :, :] |
| dx = image[:, :, 1:, :] - image[:, :, :-1, :] |
| |
| # Return tensors with same size as original image by concatenating |
| # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y). |
| shape = array_ops.stack([batch_size, 1, width, depth]) |
| dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1) |
| dy = array_ops.reshape(dy, image_shape) |
| |
| shape = array_ops.stack([batch_size, height, 1, depth]) |
| dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2) |
| dx = array_ops.reshape(dx, image_shape) |
| |
| return dy, dx |
| |
| |
| @tf_export('image.sobel_edges') |
| @dispatch.add_dispatch_support |
| def sobel_edges(image): |
| """Returns a tensor holding Sobel edge maps. |
| |
| Arguments: |
| image: Image tensor with shape [batch_size, h, w, d] and type float32 or |
| float64. The image(s) must be 2x2 or larger. |
| |
| Returns: |
| Tensor holding edge maps for each channel. Returns a tensor with shape |
| [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]], |
| [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter. |
| """ |
| # Define vertical and horizontal Sobel filters. |
| static_image_shape = image.get_shape() |
| image_shape = array_ops.shape(image) |
| kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]], |
| [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]] |
| num_kernels = len(kernels) |
| kernels = np.transpose(np.asarray(kernels), (1, 2, 0)) |
| kernels = np.expand_dims(kernels, -2) |
| kernels_tf = constant_op.constant(kernels, dtype=image.dtype) |
| |
| kernels_tf = array_ops.tile( |
| kernels_tf, [1, 1, image_shape[-1], 1], name='sobel_filters') |
| |
| # Use depth-wise convolution to calculate edge maps per channel. |
| pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]] |
| padded = array_ops.pad(image, pad_sizes, mode='REFLECT') |
| |
| # Output tensor has shape [batch_size, h, w, d * num_kernels]. |
| strides = [1, 1, 1, 1] |
| output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID') |
| |
| # Reshape to [batch_size, h, w, d, num_kernels]. |
| shape = array_ops.concat([image_shape, [num_kernels]], 0) |
| output = array_ops.reshape(output, shape=shape) |
| output.set_shape(static_image_shape.concatenate([num_kernels])) |
| return output |
| |
| |
| def resize_bicubic(images, |
| size, |
| align_corners=False, |
| name=None, |
| half_pixel_centers=False): |
| return gen_image_ops.resize_bicubic( |
| images=images, |
| size=size, |
| align_corners=align_corners, |
| half_pixel_centers=half_pixel_centers, |
| name=name) |
| |
| |
| def resize_bilinear(images, |
| size, |
| align_corners=False, |
| name=None, |
| half_pixel_centers=False): |
| return gen_image_ops.resize_bilinear( |
| images=images, |
| size=size, |
| align_corners=align_corners, |
| half_pixel_centers=half_pixel_centers, |
| name=name) |
| |
| |
| def resize_nearest_neighbor(images, |
| size, |
| align_corners=False, |
| name=None, |
| half_pixel_centers=False): |
| return gen_image_ops.resize_nearest_neighbor( |
| images=images, |
| size=size, |
| align_corners=align_corners, |
| half_pixel_centers=half_pixel_centers, |
| name=name) |
| |
| |
| resize_area_deprecation = deprecation.deprecated( |
| date=None, |
| instructions=( |
| 'Use `tf.image.resize(...method=ResizeMethod.AREA...)` instead.')) |
| tf_export(v1=['image.resize_area'])( |
| resize_area_deprecation( |
| dispatch.add_dispatch_support(gen_image_ops.resize_area))) |
| |
| resize_bicubic_deprecation = deprecation.deprecated( |
| date=None, |
| instructions=( |
| 'Use `tf.image.resize(...method=ResizeMethod.BICUBIC...)` instead.')) |
| tf_export(v1=['image.resize_bicubic'])( |
| dispatch.add_dispatch_support(resize_bicubic_deprecation(resize_bicubic))) |
| |
| resize_bilinear_deprecation = deprecation.deprecated( |
| date=None, |
| instructions=( |
| 'Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.')) |
| tf_export(v1=['image.resize_bilinear'])( |
| dispatch.add_dispatch_support(resize_bilinear_deprecation(resize_bilinear))) |
| |
| resize_nearest_neighbor_deprecation = deprecation.deprecated( |
| date=None, |
| instructions=( |
| 'Use `tf.image.resize(...method=ResizeMethod.NEAREST_NEIGHBOR...)` ' |
| 'instead.')) |
| tf_export(v1=['image.resize_nearest_neighbor'])( |
| dispatch.add_dispatch_support( |
| resize_nearest_neighbor_deprecation(resize_nearest_neighbor))) |
| |
| |
| @tf_export('image.crop_and_resize', v1=[]) |
| @dispatch.add_dispatch_support |
| def crop_and_resize_v2(image, |
| boxes, |
| box_indices, |
| crop_size, |
| method='bilinear', |
| extrapolation_value=0, |
| name=None): |
| """Extracts crops from the input image tensor and resizes them. |
| |
| Extracts crops from the input image tensor and resizes them using bilinear |
| sampling or nearest neighbor sampling (possibly with aspect ratio change) to a |
| common output size specified by `crop_size`. This is more general than the |
| `crop_to_bounding_box` op which extracts a fixed size slice from the input |
| image and does not allow resizing or aspect ratio change. |
| |
| Returns a tensor with `crops` from the input `image` at positions defined at |
| the bounding box locations in `boxes`. The cropped boxes are all resized (with |
| bilinear or nearest neighbor interpolation) to a fixed |
| `size = [crop_height, crop_width]`. The result is a 4-D tensor |
| `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned. |
| In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical |
| results to using `tf.compat.v1.image.resize_bilinear()` or |
| `tf.compat.v1.image.resize_nearest_neighbor()`(depends on the `method` |
| argument) with |
| `align_corners=True`. |
| |
| Args: |
| image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`. |
| Both `image_height` and `image_width` need to be positive. |
| boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor |
| specifies the coordinates of a box in the `box_ind[i]` image and is |
| specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized |
| coordinate value of `y` is mapped to the image coordinate at `y * |
| (image_height - 1)`, so as the `[0, 1]` interval of normalized image |
| height is mapped to `[0, image_height - 1]` in image height coordinates. |
| We do allow `y1` > `y2`, in which case the sampled crop is an up-down |
| flipped version of the original image. The width dimension is treated |
| similarly. Normalized coordinates outside the `[0, 1]` range are allowed, |
| in which case we use `extrapolation_value` to extrapolate the input image |
| values. |
| box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, |
| batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box |
| refers to. |
| crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. |
| All cropped image patches are resized to this size. The aspect ratio of |
| the image content is not preserved. Both `crop_height` and `crop_width` |
| need to be positive. |
| method: An optional string specifying the sampling method for resizing. It |
| can be either `"bilinear"` or `"nearest"` and default to `"bilinear"`. |
| Currently two sampling methods are supported: Bilinear and Nearest |
| Neighbor. |
| extrapolation_value: An optional `float`. Defaults to `0`. Value used for |
| extrapolation, when applicable. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`. |
| |
| Example: |
| |
| ```python |
| import tensorflow as tf |
| BATCH_SIZE = 1 |
| NUM_BOXES = 5 |
| IMAGE_HEIGHT = 256 |
| IMAGE_WIDTH = 256 |
| CHANNELS = 3 |
| CROP_SIZE = (24, 24) |
| |
| image = tf.random.normal(shape=(BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, |
| CHANNELS) ) |
| boxes = tf.random.uniform(shape=(NUM_BOXES, 4)) |
| box_indices = tf.random.uniform(shape=(NUM_BOXES,), minval=0, |
| maxval=BATCH_SIZE, dtype=tf.int32) |
| output = tf.image.crop_and_resize(image, boxes, box_indices, CROP_SIZE) |
| output.shape #=> (5, 24, 24, 3) |
| ``` |
| """ |
| return gen_image_ops.crop_and_resize(image, boxes, box_indices, crop_size, |
| method, extrapolation_value, name) |
| |
| |
| @tf_export(v1=['image.crop_and_resize']) |
| @dispatch.add_dispatch_support |
| @deprecation.deprecated_args(None, |
| 'box_ind is deprecated, use box_indices instead', |
| 'box_ind') |
| def crop_and_resize_v1( # pylint: disable=missing-docstring |
| image, |
| boxes, |
| box_ind=None, |
| crop_size=None, |
| method='bilinear', |
| extrapolation_value=0, |
| name=None, |
| box_indices=None): |
| box_ind = deprecation.deprecated_argument_lookup('box_indices', box_indices, |
| 'box_ind', box_ind) |
| return gen_image_ops.crop_and_resize(image, boxes, box_ind, crop_size, method, |
| extrapolation_value, name) |
| |
| |
| crop_and_resize_v1.__doc__ = gen_image_ops.crop_and_resize.__doc__ |
| |
| |
| @tf_export(v1=['image.extract_glimpse']) |
| @dispatch.add_dispatch_support |
| def extract_glimpse( |
| input, # pylint: disable=redefined-builtin |
| size, |
| offsets, |
| centered=True, |
| normalized=True, |
| uniform_noise=True, |
| name=None): |
| """Extracts a glimpse from the input tensor. |
| |
| Returns a set of windows called glimpses extracted at location |
| `offsets` from the input tensor. If the windows only partially |
| overlaps the inputs, the non-overlapping areas will be filled with |
| random noise. |
| |
| The result is a 4-D tensor of shape `[batch_size, glimpse_height, |
| glimpse_width, channels]`. The channels and batch dimensions are the |
| same as that of the input tensor. The height and width of the output |
| windows are specified in the `size` parameter. |
| |
| The argument `normalized` and `centered` controls how the windows are built: |
| |
| * If the coordinates are normalized but not centered, 0.0 and 1.0 |
| correspond to the minimum and maximum of each height and width |
| dimension. |
| * If the coordinates are both normalized and centered, they range from |
| -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper |
| left corner, the lower right corner is located at (1.0, 1.0) and the |
| center is at (0, 0). |
| * If the coordinates are not normalized they are interpreted as |
| numbers of pixels. |
| |
| Usage Example: |
| |
| >>> x = [[[[0.0], |
| ... [1.0], |
| ... [2.0]], |
| ... [[3.0], |
| ... [4.0], |
| ... [5.0]], |
| ... [[6.0], |
| ... [7.0], |
| ... [8.0]]]] |
| >>> tf.compat.v1.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], |
| ... centered=False, normalized=False) |
| <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= |
| array([[[[0.], |
| [1.]], |
| [[3.], |
| [4.]]]], dtype=float32)> |
| |
| Args: |
| input: A `Tensor` of type `float32`. A 4-D float tensor of shape |
| `[batch_size, height, width, channels]`. |
| size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the |
| size of the glimpses to extract. The glimpse height must be specified |
| first, following by the glimpse width. |
| offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape |
| `[batch_size, 2]` containing the y, x locations of the center of each |
| window. |
| centered: An optional `bool`. Defaults to `True`. indicates if the offset |
| coordinates are centered relative to the image, in which case the (0, 0) |
| offset is relative to the center of the input images. If false, the (0,0) |
| offset corresponds to the upper left corner of the input images. |
| normalized: An optional `bool`. Defaults to `True`. indicates if the offset |
| coordinates are normalized. |
| uniform_noise: An optional `bool`. Defaults to `True`. indicates if the |
| noise should be generated using a uniform distribution or a Gaussian |
| distribution. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A `Tensor` of type `float32`. |
| """ |
| return gen_image_ops.extract_glimpse( |
| input=input, |
| size=size, |
| offsets=offsets, |
| centered=centered, |
| normalized=normalized, |
| uniform_noise=uniform_noise, |
| name=name) |
| |
| |
| @tf_export('image.extract_glimpse', v1=[]) |
| @dispatch.add_dispatch_support |
| def extract_glimpse_v2( |
| input, # pylint: disable=redefined-builtin |
| size, |
| offsets, |
| centered=True, |
| normalized=True, |
| noise='uniform', |
| name=None): |
| """Extracts a glimpse from the input tensor. |
| |
| Returns a set of windows called glimpses extracted at location |
| `offsets` from the input tensor. If the windows only partially |
| overlaps the inputs, the non-overlapping areas will be filled with |
| random noise. |
| |
| The result is a 4-D tensor of shape `[batch_size, glimpse_height, |
| glimpse_width, channels]`. The channels and batch dimensions are the |
| same as that of the input tensor. The height and width of the output |
| windows are specified in the `size` parameter. |
| |
| The argument `normalized` and `centered` controls how the windows are built: |
| |
| * If the coordinates are normalized but not centered, 0.0 and 1.0 |
| correspond to the minimum and maximum of each height and width |
| dimension. |
| * If the coordinates are both normalized and centered, they range from |
| -1.0 to 1.0. The coordinates (-1.0, -1.0) correspond to the upper |
| left corner, the lower right corner is located at (1.0, 1.0) and the |
| center is at (0, 0). |
| * If the coordinates are not normalized they are interpreted as |
| numbers of pixels. |
| |
| Usage Example: |
| |
| >>> x = [[[[0.0], |
| ... [1.0], |
| ... [2.0]], |
| ... [[3.0], |
| ... [4.0], |
| ... [5.0]], |
| ... [[6.0], |
| ... [7.0], |
| ... [8.0]]]] |
| >>> tf.image.extract_glimpse(x, size=(2, 2), offsets=[[1, 1]], |
| ... centered=False, normalized=False) |
| <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy= |
| array([[[[4.], |
| [5.]], |
| [[7.], |
| [8.]]]], dtype=float32)> |
| |
| Args: |
| input: A `Tensor` of type `float32`. A 4-D float tensor of shape |
| `[batch_size, height, width, channels]`. |
| size: A `Tensor` of type `int32`. A 1-D tensor of 2 elements containing the |
| size of the glimpses to extract. The glimpse height must be specified |
| first, following by the glimpse width. |
| offsets: A `Tensor` of type `float32`. A 2-D integer tensor of shape |
| `[batch_size, 2]` containing the y, x locations of the center of each |
| window. |
| centered: An optional `bool`. Defaults to `True`. indicates if the offset |
| coordinates are centered relative to the image, in which case the (0, 0) |
| offset is relative to the center of the input images. If false, the (0,0) |
| offset corresponds to the upper left corner of the input images. |
| normalized: An optional `bool`. Defaults to `True`. indicates if the offset |
| coordinates are normalized. |
| noise: An optional `string`. Defaults to `uniform`. indicates if the noise |
| should be `uniform` (uniform distribution), `gaussian` (gaussian |
| distribution), or `zero` (zero padding). |
| name: A name for the operation (optional). |
| |
| Returns: |
| A `Tensor` of type `float32`. |
| """ |
| return gen_image_ops.extract_glimpse_v2( |
| input=input, |
| size=size, |
| offsets=offsets, |
| centered=centered, |
| normalized=normalized, |
| noise=noise, |
| uniform_noise=False, |
| name=name) |
| |
| |
| @tf_export('image.combined_non_max_suppression') |
| @dispatch.add_dispatch_support |
| def combined_non_max_suppression(boxes, |
| scores, |
| max_output_size_per_class, |
| max_total_size, |
| iou_threshold=0.5, |
| score_threshold=float('-inf'), |
| pad_per_class=False, |
| clip_boxes=True, |
| name=None): |
| """Greedily selects a subset of bounding boxes in descending order of score. |
| |
| This operation performs non_max_suppression on the inputs per batch, across |
| all classes. |
| Prunes away boxes that have high intersection-over-union (IOU) overlap |
| with previously selected boxes. Bounding boxes are supplied as |
| [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any |
| diagonal pair of box corners and the coordinates can be provided as normalized |
| (i.e., lying in the interval [0, 1]) or absolute. Note that this algorithm |
| is agnostic to where the origin is in the coordinate system. Also note that |
| this algorithm is invariant to orthogonal transformations and translations |
| of the coordinate system; thus translating or reflections of the coordinate |
| system result in the same boxes being selected by the algorithm. |
| The output of this operation is the final boxes, scores and classes tensor |
| returned after performing non_max_suppression. |
| |
| Args: |
| boxes: A 4-D float `Tensor` of shape `[batch_size, num_boxes, q, 4]`. If `q` |
| is 1 then same boxes are used for all classes otherwise, if `q` is equal |
| to number of classes, class-specific boxes are used. |
| scores: A 3-D float `Tensor` of shape `[batch_size, num_boxes, num_classes]` |
| representing a single score corresponding to each box (each row of boxes). |
| max_output_size_per_class: A scalar integer `Tensor` representing the |
| maximum number of boxes to be selected by non-max suppression per class |
| max_total_size: A int32 scalar representing maximum number of boxes retained |
| over all classes. Note that setting this value to a large number may |
| result in OOM error depending on the system workload. |
| iou_threshold: A float representing the threshold for deciding whether boxes |
| overlap too much with respect to IOU. |
| score_threshold: A float representing the threshold for deciding when to |
| remove boxes based on score. |
| pad_per_class: If false, the output nmsed boxes, scores and classes are |
| padded/clipped to `max_total_size`. If true, the output nmsed boxes, |
| scores and classes are padded to be of length |
| `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in |
| which case it is clipped to `max_total_size`. Defaults to false. |
| clip_boxes: If true, the coordinates of output nmsed boxes will be clipped |
| to [0, 1]. If false, output the box coordinates as it is. Defaults to |
| true. |
| name: A name for the operation (optional). |
| |
| Returns: |
| 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor |
| containing the non-max suppressed boxes. |
| 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing |
| the scores for the boxes. |
| 'nmsed_classes': A [batch_size, max_detections] float32 tensor |
| containing the class for boxes. |
| 'valid_detections': A [batch_size] int32 tensor indicating the number of |
| valid detections per batch item. Only the top valid_detections[i] entries |
| in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the |
| entries are zero paddings. |
| """ |
| with ops.name_scope(name, 'combined_non_max_suppression'): |
| iou_threshold = ops.convert_to_tensor( |
| iou_threshold, dtype=dtypes.float32, name='iou_threshold') |
| score_threshold = ops.convert_to_tensor( |
| score_threshold, dtype=dtypes.float32, name='score_threshold') |
| |
| # Convert `max_total_size` to tensor *without* setting the `dtype` param. |
| # This allows us to catch `int32` overflow case with `max_total_size` |
| # whose expected dtype is `int32` by the op registration. Any number within |
| # `int32` will get converted to `int32` tensor. Anything larger will get |
| # converted to `int64`. Passing in `int64` for `max_total_size` to the op |
| # will throw dtype mismatch exception. |
| # TODO(b/173251596): Once there is a more general solution to warn against |
| # int overflow conversions, revisit this check. |
| max_total_size = ops.convert_to_tensor(max_total_size) |
| |
| return gen_image_ops.combined_non_max_suppression( |
| boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, |
| score_threshold, pad_per_class, clip_boxes) |
| |
| |
| def _bbox_overlap(boxes_a, boxes_b): |
| """Calculates the overlap (iou - intersection over union) between boxes_a and boxes_b. |
| |
| Args: |
| boxes_a: a tensor with a shape of [batch_size, N, 4]. N is the number of |
| boxes per image. The last dimension is the pixel coordinates in |
| [ymin, xmin, ymax, xmax] form. |
| boxes_b: a tensor with a shape of [batch_size, M, 4]. M is the number of |
| boxes. The last dimension is the pixel coordinates in |
| [ymin, xmin, ymax, xmax] form. |
| Returns: |
| intersection_over_union: a tensor with as a shape of [batch_size, N, M], |
| representing the ratio of intersection area over union area (IoU) between |
| two boxes |
| """ |
| with ops.name_scope('bbox_overlap'): |
| a_y_min, a_x_min, a_y_max, a_x_max = array_ops.split( |
| value=boxes_a, num_or_size_splits=4, axis=2) |
| b_y_min, b_x_min, b_y_max, b_x_max = array_ops.split( |
| value=boxes_b, num_or_size_splits=4, axis=2) |
| |
| # Calculates the intersection area. |
| i_xmin = math_ops.maximum( |
| a_x_min, array_ops.transpose(b_x_min, [0, 2, 1])) |
| i_xmax = math_ops.minimum( |
| a_x_max, array_ops.transpose(b_x_max, [0, 2, 1])) |
| i_ymin = math_ops.maximum( |
| a_y_min, array_ops.transpose(b_y_min, [0, 2, 1])) |
| i_ymax = math_ops.minimum( |
| a_y_max, array_ops.transpose(b_y_max, [0, 2, 1])) |
| i_area = math_ops.maximum( |
| (i_xmax - i_xmin), 0) * math_ops.maximum((i_ymax - i_ymin), 0) |
| |
| # Calculates the union area. |
| a_area = (a_y_max - a_y_min) * (a_x_max - a_x_min) |
| b_area = (b_y_max - b_y_min) * (b_x_max - b_x_min) |
| EPSILON = 1e-8 |
| # Adds a small epsilon to avoid divide-by-zero. |
| u_area = a_area + array_ops.transpose(b_area, [0, 2, 1]) - i_area + EPSILON |
| |
| # Calculates IoU. |
| intersection_over_union = i_area / u_area |
| |
| return intersection_over_union |
| |
| |
| def _self_suppression(iou, _, iou_sum, iou_threshold): |
| """Suppress boxes in the same tile. |
| |
| Compute boxes that cannot be suppressed by others (i.e., |
| can_suppress_others), and then use them to suppress boxes in the same tile. |
| |
| Args: |
| iou: a tensor of shape [batch_size, num_boxes_with_padding] representing |
| intersection over union. |
| iou_sum: a scalar tensor. |
| iou_threshold: a scalar tensor. |
| |
| Returns: |
| iou_suppressed: a tensor of shape [batch_size, num_boxes_with_padding]. |
| iou_diff: a scalar tensor representing whether any box is supressed in |
| this step. |
| iou_sum_new: a scalar tensor of shape [batch_size] that represents |
| the iou sum after suppression. |
| iou_threshold: a scalar tensor. |
| """ |
| batch_size = array_ops.shape(iou)[0] |
| can_suppress_others = math_ops.cast( |
| array_ops.reshape( |
| math_ops.reduce_max(iou, 1) < iou_threshold, [batch_size, -1, 1]), |
| iou.dtype) |
| iou_after_suppression = array_ops.reshape( |
| math_ops.cast( |
| math_ops.reduce_max(can_suppress_others * iou, 1) < iou_threshold, |
| iou.dtype), |
| [batch_size, -1, 1]) * iou |
| iou_sum_new = math_ops.reduce_sum(iou_after_suppression, [1, 2]) |
| return [ |
| iou_after_suppression, |
| math_ops.reduce_any(iou_sum - iou_sum_new > iou_threshold), iou_sum_new, |
| iou_threshold |
| ] |
| |
| |
| def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, tile_size): |
| """Suppress boxes between different tiles. |
| |
| Args: |
| boxes: a tensor of shape [batch_size, num_boxes_with_padding, 4] |
| box_slice: a tensor of shape [batch_size, tile_size, 4] |
| iou_threshold: a scalar tensor |
| inner_idx: a scalar tensor representing the tile index of the tile |
| that is used to supress box_slice |
| tile_size: an integer representing the number of boxes in a tile |
| |
| Returns: |
| boxes: unchanged boxes as input |
| box_slice_after_suppression: box_slice after suppression |
| iou_threshold: unchanged |
| """ |
| batch_size = array_ops.shape(boxes)[0] |
| new_slice = array_ops.slice( |
| boxes, [0, inner_idx * tile_size, 0], |
| [batch_size, tile_size, 4]) |
| iou = _bbox_overlap(new_slice, box_slice) |
| box_slice_after_suppression = array_ops.expand_dims( |
| math_ops.cast(math_ops.reduce_all(iou < iou_threshold, [1]), |
| box_slice.dtype), |
| 2) * box_slice |
| return boxes, box_slice_after_suppression, iou_threshold, inner_idx + 1 |
| |
| |
| def _suppression_loop_body(boxes, iou_threshold, output_size, idx, tile_size): |
| """Process boxes in the range [idx*tile_size, (idx+1)*tile_size). |
| |
| Args: |
| boxes: a tensor with a shape of [batch_size, anchors, 4]. |
| iou_threshold: a float representing the threshold for deciding whether boxes |
| overlap too much with respect to IOU. |
| output_size: an int32 tensor of size [batch_size]. Representing the number |
| of selected boxes for each batch. |
| idx: an integer scalar representing induction variable. |
| tile_size: an integer representing the number of boxes in a tile |
| |
| Returns: |
| boxes: updated boxes. |
| iou_threshold: pass down iou_threshold to the next iteration. |
| output_size: the updated output_size. |
| idx: the updated induction variable. |
| """ |
| with ops.name_scope('suppression_loop_body'): |
| num_tiles = array_ops.shape(boxes)[1] // tile_size |
| batch_size = array_ops.shape(boxes)[0] |
| |
| def cross_suppression_func(boxes, box_slice, iou_threshold, inner_idx): |
| return _cross_suppression(boxes, box_slice, iou_threshold, inner_idx, |
| tile_size) |
| |
| # Iterates over tiles that can possibly suppress the current tile. |
| box_slice = array_ops.slice(boxes, [0, idx * tile_size, 0], |
| [batch_size, tile_size, 4]) |
| _, box_slice, _, _ = control_flow_ops.while_loop( |
| lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx, |
| cross_suppression_func, |
| [boxes, box_slice, iou_threshold, constant_op.constant(0)]) |
| |
| # Iterates over the current tile to compute self-suppression. |
| iou = _bbox_overlap(box_slice, box_slice) |
| mask = array_ops.expand_dims( |
| array_ops.reshape( |
| math_ops.range(tile_size), [1, -1]) > array_ops.reshape( |
| math_ops.range(tile_size), [-1, 1]), 0) |
| iou *= math_ops.cast( |
| math_ops.logical_and(mask, iou >= iou_threshold), iou.dtype) |
| suppressed_iou, _, _, _ = control_flow_ops.while_loop( |
| lambda _iou, loop_condition, _iou_sum, _: loop_condition, |
| _self_suppression, |
| [iou, constant_op.constant(True), math_ops.reduce_sum(iou, [1, 2]), |
| iou_threshold]) |
| suppressed_box = math_ops.reduce_sum(suppressed_iou, 1) > 0 |
| box_slice *= array_ops.expand_dims( |
| 1.0 - math_ops.cast(suppressed_box, box_slice.dtype), 2) |
| |
| # Uses box_slice to update the input boxes. |
| mask = array_ops.reshape( |
| math_ops.cast( |
| math_ops.equal(math_ops.range(num_tiles), idx), boxes.dtype), |
| [1, -1, 1, 1]) |
| boxes = array_ops.tile(array_ops.expand_dims( |
| box_slice, [1]), [1, num_tiles, 1, 1]) * mask + array_ops.reshape( |
| boxes, [batch_size, num_tiles, tile_size, 4]) * (1 - mask) |
| boxes = array_ops.reshape(boxes, [batch_size, -1, 4]) |
| |
| # Updates output_size. |
| output_size += math_ops.reduce_sum( |
| math_ops.cast( |
| math_ops.reduce_any(box_slice > 0, [2]), dtypes.int32), [1]) |
| return boxes, iou_threshold, output_size, idx + 1 |
| |
| |
| @tf_export('image.non_max_suppression_padded') |
| @dispatch.add_dispatch_support |
| def non_max_suppression_padded(boxes, |
| scores, |
| max_output_size, |
| iou_threshold=0.5, |
| score_threshold=float('-inf'), |
| pad_to_max_output_size=False, |
| name=None, |
| sorted_input=False, |
| canonicalized_coordinates=False, |
| tile_size=512): |
| """Greedily selects a subset of bounding boxes in descending order of score. |
| |
| Performs algorithmically equivalent operation to tf.image.non_max_suppression, |
| with the addition of an optional parameter which zero-pads the output to |
| be of size `max_output_size`. |
| The output of this operation is a tuple containing the set of integers |
| indexing into the input collection of bounding boxes representing the selected |
| boxes and the number of valid indices in the index set. The bounding box |
| coordinates corresponding to the selected indices can then be obtained using |
| the `tf.slice` and `tf.gather` operations. For example: |
| ```python |
| selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( |
| boxes, scores, max_output_size, iou_threshold, |
| score_threshold, pad_to_max_output_size=True) |
| selected_indices = tf.slice( |
| selected_indices_padded, tf.constant([0]), num_valid) |
| selected_boxes = tf.gather(boxes, selected_indices) |
| ``` |
| |
| Args: |
| boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. |
| Dimensions except the last two are batch dimensions. |
| scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. |
| max_output_size: a scalar integer `Tensor` representing the maximum number |
| of boxes to be selected by non max suppression. |
| iou_threshold: a float representing the threshold for deciding whether boxes |
| overlap too much with respect to IoU (intersection over union). |
| score_threshold: a float representing the threshold for box scores. Boxes |
| with a score that is not larger than this threshold will be suppressed. |
| pad_to_max_output_size: whether to pad the output idx to max_output_size. |
| Must be set to True when the input is a batch of images. |
| name: name of operation. |
| sorted_input: a boolean indicating whether the input boxes and scores |
| are sorted in descending order by the score. |
| canonicalized_coordinates: if box coordinates are given as |
| `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant |
| computation to canonicalize box coordinates. |
| tile_size: an integer representing the number of boxes in a tile, i.e., |
| the maximum number of boxes per image that can be used to suppress other |
| boxes in parallel; larger tile_size means larger parallelism and |
| potentially more redundant work. |
| Returns: |
| idx: a tensor with a shape of [..., num_boxes] representing the |
| indices selected by non-max suppression. The leading dimensions |
| are the batch dimensions of the input boxes. All numbers are within |
| [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] |
| indices (i.e., idx[i][:num_valid[i]]) are valid. |
| num_valid: a tensor of rank 0 or higher with a shape of [...] |
| representing the number of valid indices in idx. Its dimensions are the |
| batch dimensions of the input boxes. |
| Raises: |
| ValueError: When set pad_to_max_output_size to False for batched input. |
| """ |
| # if no new arguments are used and no later than 2020/6/23, use the old |
| # version to give us time to fix TFLite conversion after the TF 2.3 release. |
| if (not sorted_input) and \ |
| (not canonicalized_coordinates) and \ |
| tile_size == 512 and not compat.forward_compatible(2020, 6, 23): |
| return non_max_suppression_padded_v1( |
| boxes, scores, max_output_size, iou_threshold, score_threshold, |
| pad_to_max_output_size, name) |
| else: |
| with ops.name_scope(name, 'non_max_suppression_padded'): |
| if not pad_to_max_output_size: |
| # pad_to_max_output_size may be set to False only when the shape of |
| # boxes is [num_boxes, 4], i.e., a single image. We make best effort to |
| # detect violations at compile time. If `boxes` does not have a static |
| # rank, the check allows computation to proceed. |
| if boxes.get_shape().rank is not None and boxes.get_shape().rank > 2: |
| raise ValueError( |
| "'pad_to_max_output_size' (value {}) must be True for " |
| 'batched input'.format(pad_to_max_output_size)) |
| if name is None: |
| name = '' |
| idx, num_valid = non_max_suppression_padded_v2( |
| boxes, scores, max_output_size, iou_threshold, score_threshold, |
| sorted_input, canonicalized_coordinates, tile_size) |
| # def_function.function seems to lose shape information, so set it here. |
| if not pad_to_max_output_size: |
| idx = idx[0, :num_valid] |
| else: |
| batch_dims = array_ops.concat([ |
| array_ops.shape(boxes)[:-2], |
| array_ops.expand_dims(max_output_size, 0) |
| ], 0) |
| idx = array_ops.reshape(idx, batch_dims) |
| return idx, num_valid |
| |
| |
| # TODO(b/158709815): Improve performance regression due to |
| # def_function.function. |
| @def_function.function( |
| experimental_implements='non_max_suppression_padded_v2') |
| def non_max_suppression_padded_v2(boxes, |
| scores, |
| max_output_size, |
| iou_threshold=0.5, |
| score_threshold=float('-inf'), |
| sorted_input=False, |
| canonicalized_coordinates=False, |
| tile_size=512): |
| """Non-maximum suppression. |
| |
| Prunes away boxes that have high intersection-over-union (IOU) overlap |
| with previously selected boxes. Bounding boxes are supplied as |
| `[y1, x1, y2, x2]`, where `(y1, x1)` and `(y2, x2)` are the coordinates of any |
| diagonal pair of box corners and the coordinates can be provided as normalized |
| (i.e., lying in the interval `[0, 1]`) or absolute. The bounding box |
| coordinates are cannonicalized to `[y_min, x_min, y_max, x_max]`, |
| where `(y_min, x_min)` and `(y_max, x_mas)` are the coordinates of the lower |
| left and upper right corner. User may indiciate the input box coordinates are |
| already canonicalized to eliminate redundant work by setting |
| canonicalized_coordinates to `True`. Note that this algorithm is agnostic to |
| where the origin is in the coordinate system. Note that this algorithm is |
| invariant to orthogonal transformations and translations of the coordinate |
| system; thus translating or reflections of the coordinate system result in the |
| same boxes being selected by the algorithm. |
| |
| Similar to tf.image.non_max_suppression, non_max_suppression_padded |
| implements hard NMS but can operate on a batch of images and improves |
| performance by titling the bounding boxes. Non_max_suppression_padded should |
| be preferred over tf.image_non_max_suppression when running on devices with |
| abundant parallelsim for higher computation speed. For soft NMS, refer to |
| tf.image.non_max_suppression_with_scores. |
| |
| While a serial NMS algorithm iteratively uses the highest-scored unprocessed |
| box to suppress boxes, this algorithm uses many boxes to suppress other boxes |
| in parallel. The key idea is to partition boxes into tiles based on their |
| score and suppresses boxes tile by tile, thus achieving parallelism within a |
| tile. The tile size determines the degree of parallelism. |
| |
| In cross suppression (using boxes of tile A to suppress boxes of tile B), |
| all boxes in A can independently suppress boxes in B. |
| |
| Self suppression (suppressing boxes of the same tile) needs to be iteratively |
| applied until there's no more suppression. In each iteration, boxes that |
| cannot be suppressed are used to suppress boxes in the same tile. |
| |
| boxes = boxes.pad_to_multiply_of(tile_size) |
| num_tiles = len(boxes) // tile_size |
| output_boxes = [] |
| for i in range(num_tiles): |
| box_tile = boxes[i*tile_size : (i+1)*tile_size] |
| for j in range(i - 1): |
| # in parallel suppress boxes in box_tile using boxes from suppressing_tile |
| suppressing_tile = boxes[j*tile_size : (j+1)*tile_size] |
| iou = _bbox_overlap(box_tile, suppressing_tile) |
| # if the box is suppressed in iou, clear it to a dot |
| box_tile *= _update_boxes(iou) |
| # Iteratively handle the diagnal tile. |
| iou = _box_overlap(box_tile, box_tile) |
| iou_changed = True |
| while iou_changed: |
| # boxes that are not suppressed by anything else |
| suppressing_boxes = _get_suppressing_boxes(iou) |
| # boxes that are suppressed by suppressing_boxes |
| suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes) |
| # clear iou to 0 for boxes that are suppressed, as they cannot be used |
| # to suppress other boxes any more |
| new_iou = _clear_iou(iou, suppressed_boxes) |
| iou_changed = (new_iou != iou) |
| iou = new_iou |
| # remaining boxes that can still suppress others, are selected boxes. |
| output_boxes.append(_get_suppressing_boxes(iou)) |
| if len(output_boxes) >= max_output_size: |
| break |
| |
| Args: |
| boxes: a tensor of rank 2 or higher with a shape of [..., num_boxes, 4]. |
| Dimensions except the last two are batch dimensions. The last dimension |
| represents box coordinates, given as [y_1, x_1, y_2, x_2]. The coordinates |
| on each dimension can be given in any order |
| (see also `canonicalized_coordinates`) but must describe a box with |
| a positive area. |
| scores: a tensor of rank 1 or higher with a shape of [..., num_boxes]. |
| max_output_size: a scalar integer `Tensor` representing the maximum number |
| of boxes to be selected by non max suppression. |
| iou_threshold: a float representing the threshold for deciding whether boxes |
| overlap too much with respect to IoU (intersection over union). |
| score_threshold: a float representing the threshold for box scores. Boxes |
| with a score that is not larger than this threshold will be suppressed. |
| sorted_input: a boolean indicating whether the input boxes and scores |
| are sorted in descending order by the score. |
| canonicalized_coordinates: if box coordinates are given as |
| `[y_min, x_min, y_max, x_max]`, setting to True eliminate redundant |
| computation to canonicalize box coordinates. |
| tile_size: an integer representing the number of boxes in a tile, i.e., |
| the maximum number of boxes per image that can be used to suppress other |
| boxes in parallel; larger tile_size means larger parallelism and |
| potentially more redundant work. |
| Returns: |
| idx: a tensor with a shape of [..., num_boxes] representing the |
| indices selected by non-max suppression. The leading dimensions |
| are the batch dimensions of the input boxes. All numbers are within |
| [0, num_boxes). For each image (i.e., idx[i]), only the first num_valid[i] |
| indices (i.e., idx[i][:num_valid[i]]) are valid. |
| num_valid: a tensor of rank 0 or higher with a shape of [...] |
| representing the number of valid indices in idx. Its dimensions are the |
| batch dimensions of the input boxes. |
| Raises: |
| ValueError: When set pad_to_max_output_size to False for batched input. |
| """ |
| def _sort_scores_and_boxes(scores, boxes): |
| """Sort boxes based their score from highest to lowest. |
| |
| Args: |
| scores: a tensor with a shape of [batch_size, num_boxes] representing |
| the scores of boxes. |
| boxes: a tensor with a shape of [batch_size, num_boxes, 4] representing |
| the boxes. |
| Returns: |
| sorted_scores: a tensor with a shape of [batch_size, num_boxes] |
| representing the sorted scores. |
| sorted_boxes: a tensor representing the sorted boxes. |
| sorted_scores_indices: a tensor with a shape of [batch_size, num_boxes] |
| representing the index of the scores in a sorted descending order. |
| """ |
| with ops.name_scope('sort_scores_and_boxes'): |
| batch_size = array_ops.shape(boxes)[0] |
| num_boxes = array_ops.shape(boxes)[1] |
| sorted_scores_indices = sort_ops.argsort( |
| scores, axis=1, direction='DESCENDING') |
| index_offsets = math_ops.range(batch_size) * num_boxes |
| indices = array_ops.reshape( |
| sorted_scores_indices + array_ops.expand_dims(index_offsets, 1), [-1]) |
| sorted_scores = array_ops.reshape( |
| array_ops.gather(array_ops.reshape(scores, [-1]), indices), |
| [batch_size, -1]) |
| sorted_boxes = array_ops.reshape( |
| array_ops.gather(array_ops.reshape(boxes, [-1, 4]), indices), |
| [batch_size, -1, 4]) |
| return sorted_scores, sorted_boxes, sorted_scores_indices |
| |
| batch_dims = array_ops.shape(boxes)[:-2] |
| num_boxes = array_ops.shape(boxes)[-2] |
| boxes = array_ops.reshape(boxes, [-1, num_boxes, 4]) |
| scores = array_ops.reshape(scores, [-1, num_boxes]) |
| batch_size = array_ops.shape(boxes)[0] |
| if score_threshold != float('-inf'): |
| with ops.name_scope('filter_by_score'): |
| score_mask = math_ops.cast(scores > score_threshold, scores.dtype) |
| scores *= score_mask |
| box_mask = array_ops.expand_dims( |
| math_ops.cast(score_mask, boxes.dtype), 2) |
| boxes *= box_mask |
| |
| if not canonicalized_coordinates: |
| with ops.name_scope('canonicalize_coordinates'): |
| y_1, x_1, y_2, x_2 = array_ops.split( |
| value=boxes, num_or_size_splits=4, axis=2) |
| y_1_is_min = math_ops.reduce_all( |
| math_ops.less_equal(y_1[0, 0, 0], y_2[0, 0, 0])) |
| y_min, y_max = control_flow_ops.cond( |
| y_1_is_min, lambda: (y_1, y_2), lambda: (y_2, y_1)) |
| x_1_is_min = math_ops.reduce_all( |
| math_ops.less_equal(x_1[0, 0, 0], x_2[0, 0, 0])) |
| x_min, x_max = control_flow_ops.cond( |
| x_1_is_min, lambda: (x_1, x_2), lambda: (x_2, x_1)) |
| boxes = array_ops.concat([y_min, x_min, y_max, x_max], axis=2) |
| |
| if not sorted_input: |
| scores, boxes, sorted_indices = _sort_scores_and_boxes(scores, boxes) |
| else: |
| # Default value required for Autograph. |
| sorted_indices = array_ops.zeros_like(scores, dtype=dtypes.int32) |
| |
| pad = math_ops.cast( |
| math_ops.ceil( |
| math_ops.cast( |
| math_ops.maximum(num_boxes, max_output_size), dtypes.float32) / |
| math_ops.cast(tile_size, dtypes.float32)), |
| dtypes.int32) * tile_size - num_boxes |
| boxes = array_ops.pad( |
| math_ops.cast(boxes, dtypes.float32), [[0, 0], [0, pad], [0, 0]]) |
| scores = array_ops.pad( |
| math_ops.cast(scores, dtypes.float32), [[0, 0], [0, pad]]) |
| num_boxes_after_padding = num_boxes + pad |
| num_iterations = num_boxes_after_padding // tile_size |
| def _loop_cond(unused_boxes, unused_threshold, output_size, idx): |
| return math_ops.logical_and( |
| math_ops.reduce_min(output_size) < max_output_size, |
| idx < num_iterations) |
| |
| def suppression_loop_body(boxes, iou_threshold, output_size, idx): |
| return _suppression_loop_body( |
| boxes, iou_threshold, output_size, idx, tile_size) |
| |
| selected_boxes, _, output_size, _ = control_flow_ops.while_loop( |
| _loop_cond, |
| suppression_loop_body, |
| [ |
| boxes, iou_threshold, |
| array_ops.zeros([batch_size], dtypes.int32), |
| constant_op.constant(0) |
| ], |
| shape_invariants=[ |
| tensor_shape.TensorShape([None, None, 4]), |
| tensor_shape.TensorShape([]), |
| tensor_shape.TensorShape([None]), |
| tensor_shape.TensorShape([]), |
| ], |
| ) |
| num_valid = math_ops.minimum(output_size, max_output_size) |
| idx = num_boxes_after_padding - math_ops.cast( |
| nn_ops.top_k( |
| math_ops.cast(math_ops.reduce_any( |
| selected_boxes > 0, [2]), dtypes.int32) * |
| array_ops.expand_dims( |
| math_ops.range(num_boxes_after_padding, 0, -1), 0), |
| max_output_size)[0], dtypes.int32) |
| idx = math_ops.minimum(idx, num_boxes - 1) |
| |
| if not sorted_input: |
| index_offsets = math_ops.range(batch_size) * num_boxes |
| gather_idx = array_ops.reshape( |
| idx + array_ops.expand_dims(index_offsets, 1), [-1]) |
| idx = array_ops.reshape( |
| array_ops.gather(array_ops.reshape(sorted_indices, [-1]), |
| gather_idx), |
| [batch_size, -1]) |
| invalid_index = array_ops.fill([batch_size, max_output_size], 0) |
| idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0) |
| num_valid_expanded = array_ops.expand_dims(num_valid, 1) |
| idx = array_ops.where(idx_index < num_valid_expanded, |
| idx, invalid_index) |
| |
| num_valid = array_ops.reshape(num_valid, batch_dims) |
| return idx, num_valid |
| |
| |
| def non_max_suppression_padded_v1(boxes, |
| scores, |
| max_output_size, |
| iou_threshold=0.5, |
| score_threshold=float('-inf'), |
| pad_to_max_output_size=False, |
| name=None): |
| """Greedily selects a subset of bounding boxes in descending order of score. |
| |
| Performs algorithmically equivalent operation to tf.image.non_max_suppression, |
| with the addition of an optional parameter which zero-pads the output to |
| be of size `max_output_size`. |
| The output of this operation is a tuple containing the set of integers |
| indexing into the input collection of bounding boxes representing the selected |
| boxes and the number of valid indices in the index set. The bounding box |
| coordinates corresponding to the selected indices can then be obtained using |
| the `tf.slice` and `tf.gather` operations. For example: |
| ```python |
| selected_indices_padded, num_valid = tf.image.non_max_suppression_padded( |
| boxes, scores, max_output_size, iou_threshold, |
| score_threshold, pad_to_max_output_size=True) |
| selected_indices = tf.slice( |
| selected_indices_padded, tf.constant([0]), num_valid) |
| selected_boxes = tf.gather(boxes, selected_indices) |
| ``` |
| |
| Args: |
| boxes: A 2-D float `Tensor` of shape `[num_boxes, 4]`. |
| scores: A 1-D float `Tensor` of shape `[num_boxes]` representing a single |
| score corresponding to each box (each row of boxes). |
| max_output_size: A scalar integer `Tensor` representing the maximum number |
| of boxes to be selected by non-max suppression. |
| iou_threshold: A float representing the threshold for deciding whether boxes |
| overlap too much with respect to IOU. |
| score_threshold: A float representing the threshold for deciding when to |
| remove boxes based on score. |
| pad_to_max_output_size: bool. If True, size of `selected_indices` output is |
| padded to `max_output_size`. |
| name: A name for the operation (optional). |
| |
| Returns: |
| selected_indices: A 1-D integer `Tensor` of shape `[M]` representing the |
| selected indices from the boxes tensor, where `M <= max_output_size`. |
| valid_outputs: A scalar integer `Tensor` denoting how many elements in |
| `selected_indices` are valid. Valid elements occur first, then padding. |
| """ |
| with ops.name_scope(name, 'non_max_suppression_padded'): |
| iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') |
| score_threshold = ops.convert_to_tensor( |
| score_threshold, name='score_threshold') |
| return gen_image_ops.non_max_suppression_v4(boxes, scores, max_output_size, |
| iou_threshold, score_threshold, |
| pad_to_max_output_size) |
| |
| |
| @tf_export('image.draw_bounding_boxes', v1=[]) |
| @dispatch.add_dispatch_support |
| def draw_bounding_boxes_v2(images, boxes, colors, name=None): |
| """Draw bounding boxes on a batch of images. |
| |
| Outputs a copy of `images` but draws on top of the pixels zero or more |
| bounding boxes specified by the locations in `boxes`. The coordinates of the |
| each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. |
| The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width |
| and the height of the underlying image. |
| |
| For example, if an image is 100 x 200 pixels (height x width) and the bounding |
| box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of |
| the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). |
| |
| Parts of the bounding box may fall outside the image. |
| |
| Args: |
| images: A `Tensor`. Must be one of the following types: `float32`, `half`. |
| 4-D with shape `[batch, height, width, depth]`. A batch of images. |
| boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, |
| num_bounding_boxes, 4]` containing bounding boxes. |
| colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle |
| through for the boxes. |
| name: A name for the operation (optional). |
| |
| Returns: |
| A `Tensor`. Has the same type as `images`. |
| |
| Usage Example: |
| |
| >>> # create an empty image |
| >>> img = tf.zeros([1, 3, 3, 3]) |
| >>> # draw a box around the image |
| >>> box = np.array([0, 0, 1, 1]) |
| >>> boxes = box.reshape([1, 1, 4]) |
| >>> # alternate between red and blue |
| >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) |
| >>> tf.image.draw_bounding_boxes(img, boxes, colors) |
| <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= |
| array([[[[1., 0., 0.], |
| [1., 0., 0.], |
| [1., 0., 0.]], |
| [[1., 0., 0.], |
| [0., 0., 0.], |
| [1., 0., 0.]], |
| [[1., 0., 0.], |
| [1., 0., 0.], |
| [1., 0., 0.]]]], dtype=float32)> |
| """ |
| if colors is None: |
| return gen_image_ops.draw_bounding_boxes(images, boxes, name) |
| return gen_image_ops.draw_bounding_boxes_v2(images, boxes, colors, name) |
| |
| |
| @tf_export(v1=['image.draw_bounding_boxes']) |
| @dispatch.add_dispatch_support |
| def draw_bounding_boxes(images, boxes, name=None, colors=None): |
| """Draw bounding boxes on a batch of images. |
| |
| Outputs a copy of `images` but draws on top of the pixels zero or more |
| bounding boxes specified by the locations in `boxes`. The coordinates of the |
| each bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. |
| The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width |
| and the height of the underlying image. |
| |
| For example, if an image is 100 x 200 pixels (height x width) and the bounding |
| box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of |
| the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). |
| |
| Parts of the bounding box may fall outside the image. |
| |
| Args: |
| images: A `Tensor`. Must be one of the following types: `float32`, `half`. |
| 4-D with shape `[batch, height, width, depth]`. A batch of images. |
| boxes: A `Tensor` of type `float32`. 3-D with shape `[batch, |
| num_bounding_boxes, 4]` containing bounding boxes. |
| name: A name for the operation (optional). |
| colors: A `Tensor` of type `float32`. 2-D. A list of RGBA colors to cycle |
| through for the boxes. |
| |
| Returns: |
| A `Tensor`. Has the same type as `images`. |
| |
| Usage Example: |
| |
| >>> # create an empty image |
| >>> img = tf.zeros([1, 3, 3, 3]) |
| >>> # draw a box around the image |
| >>> box = np.array([0, 0, 1, 1]) |
| >>> boxes = box.reshape([1, 1, 4]) |
| >>> # alternate between red and blue |
| >>> colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) |
| >>> tf.image.draw_bounding_boxes(img, boxes, colors) |
| <tf.Tensor: shape=(1, 3, 3, 3), dtype=float32, numpy= |
| array([[[[1., 0., 0.], |
| [1., 0., 0.], |
| [1., 0., 0.]], |
| [[1., 0., 0.], |
| [0., 0., 0.], |
| [1., 0., 0.]], |
| [[1., 0., 0.], |
| [1., 0., 0.], |
| [1., 0., 0.]]]], dtype=float32)> |
| """ |
| return draw_bounding_boxes_v2(images, boxes, colors, name) |
| |
| |
| @tf_export('image.generate_bounding_box_proposals') |
| @dispatch.add_dispatch_support |
| def generate_bounding_box_proposals(scores, |
| bbox_deltas, |
| image_info, |
| anchors, |
| nms_threshold=0.7, |
| pre_nms_topn=6000, |
| min_size=16, |
| post_nms_topn=300, |
| name=None): |
| """Generate bounding box proposals from encoded bounding boxes. |
| |
| Args: |
| scores: A 4-D float `Tensor` of shape |
| `[num_images, height, width, num_achors]` containing scores of |
| the boxes for given anchors, can be unsorted. |
| bbox_deltas: A 4-D float `Tensor` of shape |
| `[num_images, height, width, 4 x num_anchors]` encoding boxes |
| with respect to each anchor. Coordinates are given |
| in the form `[dy, dx, dh, dw]`. |
| image_info: A 2-D float `Tensor` of shape `[num_images, 5]` |
| containing image information Height, Width, Scale. |
| anchors: A 2-D float `Tensor` of shape `[num_anchors, 4]` |
| describing the anchor boxes. |
| Boxes are formatted in the form `[y1, x1, y2, x2]`. |
| nms_threshold: A scalar float `Tensor` for non-maximal-suppression |
| threshold. Defaults to 0.7. |
| pre_nms_topn: A scalar int `Tensor` for the number of |
| top scoring boxes to be used as input. Defaults to 6000. |
| min_size: A scalar float `Tensor`. Any box that has a smaller size |
| than min_size will be discarded. Defaults to 16. |
| post_nms_topn: An integer. Maximum number of rois in the output. |
| name: A name for this operation (optional). |
| |
| Returns: |
| rois: Region of interest boxes sorted by their scores. |
| roi_probabilities: scores of the ROI boxes in the ROIs' `Tensor`. |
| """ |
| return gen_image_ops.generate_bounding_box_proposals( |
| scores=scores, |
| bbox_deltas=bbox_deltas, |
| image_info=image_info, |
| anchors=anchors, |
| nms_threshold=nms_threshold, |
| pre_nms_topn=pre_nms_topn, |
| min_size=min_size, |
| post_nms_topn=post_nms_topn, |
| name=name) |