| # Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # ============================================================================== |
| # pylint: disable=invalid-name |
| """MobileNet v2 models for Keras. |
| |
| MobileNetV2 is a general architecture and can be used for multiple use cases. |
| Depending on the use case, it can use different input layer size and |
| different width factors. This allows different width models to reduce |
| the number of multiply-adds and thereby |
| reduce inference cost on mobile devices. |
| |
| MobileNetV2 is very similar to the original MobileNet, |
| except that it uses inverted residual blocks with |
| bottlenecking features. It has a drastically lower |
| parameter count than the original MobileNet. |
| MobileNets support any input size greater |
| than 32 x 32, with larger image sizes |
| offering better performance. |
| |
| The number of parameters and number of multiply-adds |
| can be modified by using the `alpha` parameter, |
| which increases/decreases the number of filters in each layer. |
| By altering the image size and `alpha` parameter, |
| all 22 models from the paper can be built, with ImageNet weights provided. |
| |
| The paper demonstrates the performance of MobileNets using `alpha` values of |
| 1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 |
| For each of these `alpha` values, weights for 5 different input image sizes |
| are provided (224, 192, 160, 128, and 96). |
| |
| The following table describes the performance of |
| MobileNet on various input sizes: |
| ------------------------------------------------------------------------ |
| MACs stands for Multiply Adds |
| Classification Checkpoint|MACs (M)|Parameters (M)|Top 1 Accuracy|Top 5 Accuracy |
| --------------------------|------------|---------------|---------|----|--------- |
| | [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | |
| | [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | |
| | [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | |
| | [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | |
| | [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | |
| | [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | |
| | [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | |
| | [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | |
| | [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | |
| | [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | |
| | [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | |
| | [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | |
| | [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | |
| | [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | |
| | [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | |
| | [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | |
| | [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | |
| | [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | |
| | [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | |
| | [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | |
| | [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | |
| | [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | |
| |
| """ |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| |
| import os |
| |
| from tensorflow.python.keras import backend |
| from tensorflow.python.keras import layers |
| from tensorflow.python.keras.applications import imagenet_utils |
| from tensorflow.python.keras.engine import training |
| from tensorflow.python.keras.utils import data_utils |
| from tensorflow.python.keras.utils import layer_utils |
| from tensorflow.python.platform import tf_logging as logging |
| from tensorflow.python.util.tf_export import keras_export |
| |
| |
| BASE_WEIGHT_PATH = ('https://storage.googleapis.com/tensorflow/' |
| 'keras-applications/mobilenet_v2/') |
| |
| |
| @keras_export('keras.applications.mobilenet_v2.MobileNetV2', |
| 'keras.applications.MobileNetV2') |
| def MobileNetV2(input_shape=None, |
| alpha=1.0, |
| include_top=True, |
| weights='imagenet', |
| input_tensor=None, |
| pooling=None, |
| classes=1000, |
| **kwargs): |
| """Instantiates the MobileNetV2 architecture. |
| |
| Reference paper: |
| - [MobileNetV2: Inverted Residuals and Linear Bottlenecks] |
| (https://arxiv.org/abs/1801.04381) (CVPR 2018) |
| |
| Optionally loads weights pre-trained on ImageNet. |
| |
| Arguments: |
| input_shape: Optional shape tuple, to be specified if you would |
| like to use a model with an input image resolution that is not |
| (224, 224, 3). |
| It should have exactly 3 inputs channels (224, 224, 3). |
| You can also omit this option if you would like |
| to infer input_shape from an input_tensor. |
| If you choose to include both input_tensor and input_shape then |
| input_shape will be used if they match, if the shapes |
| do not match then we will throw an error. |
| E.g. `(160, 160, 3)` would be one valid value. |
| alpha: Float between 0 and 1. controls the width of the network. |
| This is known as the width multiplier in the MobileNetV2 paper, |
| but the name is kept for consistency with `applications.MobileNetV1` |
| model in Keras. |
| - If `alpha` < 1.0, proportionally decreases the number |
| of filters in each layer. |
| - If `alpha` > 1.0, proportionally increases the number |
| of filters in each layer. |
| - If `alpha` = 1, default number of filters from the paper |
| are used at each layer. |
| include_top: Boolean, whether to include the fully-connected |
| layer at the top of the network. Defaults to `True`. |
| weights: String, one of `None` (random initialization), |
| 'imagenet' (pre-training on ImageNet), |
| or the path to the weights file to be loaded. |
| input_tensor: Optional Keras tensor (i.e. output of |
| `layers.Input()`) |
| to use as image input for the model. |
| pooling: String, optional pooling mode for feature extraction |
| when `include_top` is `False`. |
| - `None` means that the output of the model |
| will be the 4D tensor output of the |
| last convolutional block. |
| - `avg` means that global average pooling |
| will be applied to the output of the |
| last convolutional block, and thus |
| the output of the model will be a |
| 2D tensor. |
| - `max` means that global max pooling will |
| be applied. |
| classes: Integer, optional number of classes to classify images |
| into, only to be specified if `include_top` is True, and |
| if no `weights` argument is specified. |
| **kwargs: For backwards compatibility only. |
| |
| Returns: |
| A `keras.Model` instance. |
| |
| Raises: |
| ValueError: in case of invalid argument for `weights`, |
| or invalid input shape or invalid alpha, rows when |
| weights='imagenet' |
| """ |
| if 'layers' in kwargs: |
| global layers |
| layers = kwargs.pop('layers') |
| if kwargs: |
| raise ValueError('Unknown argument(s): %s' % (kwargs,)) |
| if not (weights in {'imagenet', None} or os.path.exists(weights)): |
| raise ValueError('The `weights` argument should be either ' |
| '`None` (random initialization), `imagenet` ' |
| '(pre-training on ImageNet), ' |
| 'or the path to the weights file to be loaded.') |
| |
| if weights == 'imagenet' and include_top and classes != 1000: |
| raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' |
| 'as true, `classes` should be 1000') |
| |
| # Determine proper input shape and default size. |
| # If both input_shape and input_tensor are used, they should match |
| if input_shape is not None and input_tensor is not None: |
| try: |
| is_input_t_tensor = backend.is_keras_tensor(input_tensor) |
| except ValueError: |
| try: |
| is_input_t_tensor = backend.is_keras_tensor( |
| layer_utils.get_source_inputs(input_tensor)) |
| except ValueError: |
| raise ValueError('input_tensor: ', input_tensor, |
| 'is not type input_tensor') |
| if is_input_t_tensor: |
| if backend.image_data_format == 'channels_first': |
| if backend.int_shape(input_tensor)[1] != input_shape[1]: |
| raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', |
| input_tensor, |
| 'do not meet the same shape requirements') |
| else: |
| if backend.int_shape(input_tensor)[2] != input_shape[1]: |
| raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', |
| input_tensor, |
| 'do not meet the same shape requirements') |
| else: |
| raise ValueError('input_tensor specified: ', input_tensor, |
| 'is not a keras tensor') |
| |
| # If input_shape is None, infer shape from input_tensor |
| if input_shape is None and input_tensor is not None: |
| |
| try: |
| backend.is_keras_tensor(input_tensor) |
| except ValueError: |
| raise ValueError('input_tensor: ', input_tensor, 'is type: ', |
| type(input_tensor), 'which is not a valid type') |
| |
| if input_shape is None and not backend.is_keras_tensor(input_tensor): |
| default_size = 224 |
| elif input_shape is None and backend.is_keras_tensor(input_tensor): |
| if backend.image_data_format() == 'channels_first': |
| rows = backend.int_shape(input_tensor)[2] |
| cols = backend.int_shape(input_tensor)[3] |
| else: |
| rows = backend.int_shape(input_tensor)[1] |
| cols = backend.int_shape(input_tensor)[2] |
| |
| if rows == cols and rows in [96, 128, 160, 192, 224]: |
| default_size = rows |
| else: |
| default_size = 224 |
| |
| # If input_shape is None and no input_tensor |
| elif input_shape is None: |
| default_size = 224 |
| |
| # If input_shape is not None, assume default size |
| else: |
| if backend.image_data_format() == 'channels_first': |
| rows = input_shape[1] |
| cols = input_shape[2] |
| else: |
| rows = input_shape[0] |
| cols = input_shape[1] |
| |
| if rows == cols and rows in [96, 128, 160, 192, 224]: |
| default_size = rows |
| else: |
| default_size = 224 |
| |
| input_shape = imagenet_utils.obtain_input_shape( |
| input_shape, |
| default_size=default_size, |
| min_size=32, |
| data_format=backend.image_data_format(), |
| require_flatten=include_top, |
| weights=weights) |
| |
| if backend.image_data_format() == 'channels_last': |
| row_axis, col_axis = (0, 1) |
| else: |
| row_axis, col_axis = (1, 2) |
| rows = input_shape[row_axis] |
| cols = input_shape[col_axis] |
| |
| if weights == 'imagenet': |
| if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: |
| raise ValueError('If imagenet weights are being loaded, ' |
| 'alpha can be one of `0.35`, `0.50`, `0.75`, ' |
| '`1.0`, `1.3` or `1.4` only.') |
| |
| if rows != cols or rows not in [96, 128, 160, 192, 224]: |
| rows = 224 |
| logging.warning('`input_shape` is undefined or non-square, ' |
| 'or `rows` is not in [96, 128, 160, 192, 224].' |
| ' Weights for input shape (224, 224) will be' |
| ' loaded as the default.') |
| |
| if input_tensor is None: |
| img_input = layers.Input(shape=input_shape) |
| else: |
| if not backend.is_keras_tensor(input_tensor): |
| img_input = layers.Input(tensor=input_tensor, shape=input_shape) |
| else: |
| img_input = input_tensor |
| |
| channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 |
| |
| first_block_filters = _make_divisible(32 * alpha, 8) |
| x = layers.ZeroPadding2D( |
| padding=imagenet_utils.correct_pad(img_input, 3), |
| name='Conv1_pad')(img_input) |
| x = layers.Conv2D( |
| first_block_filters, |
| kernel_size=3, |
| strides=(2, 2), |
| padding='valid', |
| use_bias=False, |
| name='Conv1')( |
| x) |
| x = layers.BatchNormalization( |
| axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')( |
| x) |
| x = layers.ReLU(6., name='Conv1_relu')(x) |
| |
| x = _inverted_res_block( |
| x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) |
| |
| x = _inverted_res_block( |
| x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) |
| x = _inverted_res_block( |
| x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) |
| |
| x = _inverted_res_block( |
| x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) |
| x = _inverted_res_block( |
| x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) |
| x = _inverted_res_block( |
| x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) |
| |
| x = _inverted_res_block( |
| x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) |
| x = _inverted_res_block( |
| x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) |
| x = _inverted_res_block( |
| x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) |
| x = _inverted_res_block( |
| x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) |
| |
| x = _inverted_res_block( |
| x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) |
| x = _inverted_res_block( |
| x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) |
| x = _inverted_res_block( |
| x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) |
| |
| x = _inverted_res_block( |
| x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) |
| x = _inverted_res_block( |
| x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) |
| x = _inverted_res_block( |
| x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) |
| |
| x = _inverted_res_block( |
| x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) |
| |
| # no alpha applied to last conv as stated in the paper: |
| # if the width multiplier is greater than 1 we |
| # increase the number of output channels |
| if alpha > 1.0: |
| last_block_filters = _make_divisible(1280 * alpha, 8) |
| else: |
| last_block_filters = 1280 |
| |
| x = layers.Conv2D( |
| last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')( |
| x) |
| x = layers.BatchNormalization( |
| axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')( |
| x) |
| x = layers.ReLU(6., name='out_relu')(x) |
| |
| if include_top: |
| x = layers.GlobalAveragePooling2D()(x) |
| x = layers.Dense( |
| classes, activation='softmax', use_bias=True, name='Logits')( |
| x) |
| else: |
| if pooling == 'avg': |
| x = layers.GlobalAveragePooling2D()(x) |
| elif pooling == 'max': |
| x = layers.GlobalMaxPooling2D()(x) |
| |
| # Ensure that the model takes into account |
| # any potential predecessors of `input_tensor`. |
| if input_tensor is not None: |
| inputs = layer_utils.get_source_inputs(input_tensor) |
| else: |
| inputs = img_input |
| |
| # Create model. |
| model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) |
| |
| # Load weights. |
| if weights == 'imagenet': |
| if include_top: |
| model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + |
| str(alpha) + '_' + str(rows) + '.h5') |
| weight_path = BASE_WEIGHT_PATH + model_name |
| weights_path = data_utils.get_file( |
| model_name, weight_path, cache_subdir='models') |
| else: |
| model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + |
| str(alpha) + '_' + str(rows) + '_no_top' + '.h5') |
| weight_path = BASE_WEIGHT_PATH + model_name |
| weights_path = data_utils.get_file( |
| model_name, weight_path, cache_subdir='models') |
| model.load_weights(weights_path) |
| elif weights is not None: |
| model.load_weights(weights) |
| |
| return model |
| |
| |
| def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): |
| """Inverted ResNet block.""" |
| channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 |
| |
| in_channels = backend.int_shape(inputs)[channel_axis] |
| pointwise_conv_filters = int(filters * alpha) |
| pointwise_filters = _make_divisible(pointwise_conv_filters, 8) |
| x = inputs |
| prefix = 'block_{}_'.format(block_id) |
| |
| if block_id: |
| # Expand |
| x = layers.Conv2D( |
| expansion * in_channels, |
| kernel_size=1, |
| padding='same', |
| use_bias=False, |
| activation=None, |
| name=prefix + 'expand')( |
| x) |
| x = layers.BatchNormalization( |
| axis=channel_axis, |
| epsilon=1e-3, |
| momentum=0.999, |
| name=prefix + 'expand_BN')( |
| x) |
| x = layers.ReLU(6., name=prefix + 'expand_relu')(x) |
| else: |
| prefix = 'expanded_conv_' |
| |
| # Depthwise |
| if stride == 2: |
| x = layers.ZeroPadding2D( |
| padding=imagenet_utils.correct_pad(x, 3), |
| name=prefix + 'pad')(x) |
| x = layers.DepthwiseConv2D( |
| kernel_size=3, |
| strides=stride, |
| activation=None, |
| use_bias=False, |
| padding='same' if stride == 1 else 'valid', |
| name=prefix + 'depthwise')( |
| x) |
| x = layers.BatchNormalization( |
| axis=channel_axis, |
| epsilon=1e-3, |
| momentum=0.999, |
| name=prefix + 'depthwise_BN')( |
| x) |
| |
| x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) |
| |
| # Project |
| x = layers.Conv2D( |
| pointwise_filters, |
| kernel_size=1, |
| padding='same', |
| use_bias=False, |
| activation=None, |
| name=prefix + 'project')( |
| x) |
| x = layers.BatchNormalization( |
| axis=channel_axis, |
| epsilon=1e-3, |
| momentum=0.999, |
| name=prefix + 'project_BN')( |
| x) |
| |
| if in_channels == pointwise_filters and stride == 1: |
| return layers.Add(name=prefix + 'add')([inputs, x]) |
| return x |
| |
| |
| def _make_divisible(v, divisor, min_value=None): |
| if min_value is None: |
| min_value = divisor |
| new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) |
| # Make sure that round down does not go down by more than 10%. |
| if new_v < 0.9 * v: |
| new_v += divisor |
| return new_v |
| |
| |
| @keras_export('keras.applications.mobilenet_v2.preprocess_input') |
| def preprocess_input(x, data_format=None): |
| """Preprocesses the input (encoding a batch of images) for the model.""" |
| return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf') |
| |
| |
| @keras_export('keras.applications.mobilenet_v2.decode_predictions') |
| def decode_predictions(preds, top=5): |
| """Decodes the prediction result from the model.""" |
| return imagenet_utils.decode_predictions(preds, top=top) |