Remove the legacy KPL directory as well as the preprocessing for keras.

Also update related build targets and tests to not use them.

PiperOrigin-RevId: 406052251
Change-Id: I8e4a0a7747434db997d3e34f82d22cf4423dcd5e
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index 3959e3d..7dea85f 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -40,7 +40,6 @@
         "//tensorflow/python/keras/mixed_precision:mixed_precision_experimental",
         "//tensorflow/python/keras/optimizer_v2",
         "//tensorflow/python/keras/premade",
-        "//tensorflow/python/keras/preprocessing",
         "//tensorflow/python/keras/saving",
         "//tensorflow/python/keras/utils",
         "//tensorflow/python/keras/wrappers",
diff --git a/tensorflow/python/keras/layers/BUILD b/tensorflow/python/keras/layers/BUILD
index b6e9279..f80301e 100644
--- a/tensorflow/python/keras/layers/BUILD
+++ b/tensorflow/python/keras/layers/BUILD
@@ -51,9 +51,9 @@
         ":recurrent_v2",
         ":rnn_cell_wrapper_v2",
         ":wrappers",
+        "//tensorflow/python/keras/engine",
         "//tensorflow/python/keras/feature_column",
         "//tensorflow/python/keras/layers/normalization",
-        "//tensorflow/python/keras/layers/preprocessing",
         "//tensorflow/python/keras/premade",
         "//tensorflow/python/keras/utils:tf_utils",
     ],
@@ -881,15 +881,3 @@
         "@absl_py//absl/testing:parameterized",
     ],
 )
-
-tf_py_test(
-    name = "layers_test",
-    size = "small",
-    srcs = ["layers_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":layers",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:tf2",
-    ],
-)
diff --git a/tensorflow/python/keras/layers/__init__.py b/tensorflow/python/keras/layers/__init__.py
index 4223f7d..7671c02 100644
--- a/tensorflow/python/keras/layers/__init__.py
+++ b/tensorflow/python/keras/layers/__init__.py
@@ -25,29 +25,6 @@
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.base_preprocessing_layer import PreprocessingLayer
 
-# Image preprocessing layers.
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import CenterCrop
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomCrop
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomFlip
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomContrast
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomHeight
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomRotation
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomTranslation
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomWidth
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import RandomZoom
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import Resizing
-from tensorflow.python.keras.layers.preprocessing.image_preprocessing import Rescaling
-
-# Preprocessing layers.
-from tensorflow.python.keras.layers.preprocessing.category_crossing import CategoryCrossing
-from tensorflow.python.keras.layers.preprocessing.category_encoding import CategoryEncoding
-from tensorflow.python.keras.layers.preprocessing.discretization import Discretization
-from tensorflow.python.keras.layers.preprocessing.hashing import Hashing
-from tensorflow.python.keras.layers.preprocessing.integer_lookup import IntegerLookup
-from tensorflow.python.keras.layers.preprocessing.normalization import Normalization
-from tensorflow.python.keras.layers.preprocessing.string_lookup import StringLookup
-from tensorflow.python.keras.layers.preprocessing.text_vectorization import TextVectorization
-
 # Advanced activations.
 from tensorflow.python.keras.layers.advanced_activations import LeakyReLU
 from tensorflow.python.keras.layers.advanced_activations import PReLU
diff --git a/tensorflow/python/keras/layers/layers_test.py b/tensorflow/python/keras/layers/layers_test.py
deleted file mode 100644
index 60349b2..0000000
--- a/tensorflow/python/keras/layers/layers_test.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=g-classes-have-attributes
-"""Tests for layers.__init__."""
-
-from tensorflow.python import tf2
-from tensorflow.python.keras import layers
-from tensorflow.python.platform import test
-
-
-class LayersTest(test.TestCase):
-
-  def test_keras_private_symbol(self):
-    if tf2.enabled():
-      normalization_parent = layers.Normalization.__module__.split('.')[-1]
-      self.assertEqual('normalization', normalization_parent)
-      self.assertTrue(layers.BatchNormalization._USE_V2_BEHAVIOR)
-    else:
-      self.assertFalse(layers.BatchNormalization._USE_V2_BEHAVIOR)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/BUILD b/tensorflow/python/keras/layers/preprocessing/BUILD
deleted file mode 100644
index 4459ca3..0000000
--- a/tensorflow/python/keras/layers/preprocessing/BUILD
+++ /dev/null
@@ -1,757 +0,0 @@
-# Description:
-#   Contains the Keras preprocess layers (internal TensorFlow version).
-
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
-
-# buildifier: disable=same-origin-load
-load("//tensorflow:tensorflow.bzl", "cuda_py_test")
-load("//tensorflow/core/platform/default:distribute.bzl", "distribute_py_test")
-
-package(
-    default_visibility = [
-        "//tensorflow/python/keras:__subpackages__",
-        "//tensorflow/tools/pip_package:__pkg__",
-    ],
-    licenses = ["notice"],
-)
-
-filegroup(
-    name = "all_py_srcs",
-    srcs = glob(["*.py"]),
-    visibility = ["//tensorflow/python/keras/google/private_tf_api_test:__pkg__"],
-)
-
-py_library(
-    name = "preprocessing",
-    srcs = [
-        "__init__.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        ":category_crossing",
-        ":discretization",
-        ":hashing",
-        ":image_preprocessing",
-        ":integer_lookup",
-        ":normalization",
-        ":preprocessing_stage",
-        ":preprocessing_test_utils",
-        ":reduction",
-        ":string_lookup",
-        ":text_vectorization",
-    ],
-)
-
-py_library(
-    name = "discretization",
-    srcs = [
-        "discretization.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:boosted_trees_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:resources",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/utils:tf_utils",
-        "//tensorflow/python/ops/parallel_for:control_flow_ops",
-        "//tensorflow/python/ops/ragged:ragged_functional_ops",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "category_crossing",
-    srcs = [
-        "category_crossing.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/utils:tf_utils",
-        "//tensorflow/python/ops/ragged:ragged_array_ops",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "hashing",
-    srcs = [
-        "hashing.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python:tensor_util",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "image_preprocessing",
-    srcs = [
-        "image_preprocessing.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:check_ops",
-        "//tensorflow/python:control_flow_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:image_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:stateful_random_ops",
-        "//tensorflow/python:stateless_random_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_util",
-        "//tensorflow/python:variables",
-        "//tensorflow/python/compat",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/engine:input_spec",
-        "//tensorflow/python/keras/preprocessing:image",
-        "//tensorflow/python/keras/utils:control_flow_util",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "index_lookup",
-    srcs = [
-        "index_lookup.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        ":category_encoding",
-        ":table_utils",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:lookup_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python:util",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/engine",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "normalization",
-    srcs = [
-        "normalization.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:util",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "integer_lookup",
-    srcs = [
-        "integer_lookup.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        ":index_lookup",
-        ":table_utils",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/util:tf_export",
-    ],
-)
-
-py_library(
-    name = "table_utils",
-    srcs = [
-        "table_utils.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python/keras/utils:tf_utils",
-        "//tensorflow/python/ops/ragged:ragged_functional_ops",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "text_vectorization",
-    srcs = [
-        "text_vectorization.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        ":category_encoding",
-        ":string_lookup",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:string_ops",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/utils:layer_utils",
-        "//tensorflow/python/keras/utils:tf_utils",
-        "//tensorflow/python/ops/ragged:ragged_functional_ops",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "category_encoding",
-    srcs = [
-        "category_encoding.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:bincount_ops",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:init_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python:util",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/engine:input_spec",
-        "//tensorflow/python/keras/utils:layer_utils",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//tensorflow/python/util:tf_export",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "reduction",
-    srcs = [
-        "reduction.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform",
-        "//tensorflow/python/keras/engine:base_layer",
-    ],
-)
-
-py_library(
-    name = "string_lookup",
-    srcs = [
-        "string_lookup.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        ":index_lookup",
-        ":table_utils",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/util:tf_export",
-    ],
-)
-
-py_library(
-    name = "preprocessing_stage",
-    srcs = [
-        "preprocessing_stage.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:framework_ops",
-        "//tensorflow/python:util",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/utils:tf_utils",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "preprocessing_test_utils",
-    srcs = ["preprocessing_test_utils.py"],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:util",
-        "//third_party/py/numpy",
-    ],
-)
-
-cuda_py_test(
-    name = "category_crossing_test",
-    srcs = ["category_crossing_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    tags = [
-        "no_windows",  # b/149031156
-    ],
-    deps = [
-        ":category_crossing",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:sparse_ops",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras:testing_utils",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/ops/ragged:ragged_factory_ops",
-        "//tensorflow/python/ops/ragged:ragged_tensor",
-        "//third_party/py/numpy",
-    ],
-)
-
-tf_py_test(
-    name = "category_encoding_test",
-    srcs = ["category_encoding_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":category_encoding",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-distribute_py_test(
-    name = "category_encoding_distribution_test",
-    srcs = ["category_encoding_distribution_test.py"],
-    disable_mlir_bridge = False,
-    main = "category_encoding_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "multi_and_single_gpu",
-        "no_oss",  # b/189866692
-        "no_rocm",
-        "noguitar",  # b/190034522
-    ],
-    tpu_tags = [
-        "no_oss",  # b/155502591
-    ],
-    deps = [
-        ":category_encoding",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:config",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_test_combinations_lib",
-        "//tensorflow/python/compat:v2_compat",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/distribute:multi_process_runner",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-distribute_py_test(
-    name = "category_crossing_distribution_test",
-    srcs = ["category_crossing_distribution_test.py"],
-    main = "category_crossing_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "multi_and_single_gpu",
-    ],
-    tpu_tags = [
-        "no_oss",  # b/155502591
-    ],
-    deps = [
-        ":category_crossing",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:config",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_test_combinations_lib",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-distribute_py_test(
-    name = "image_preprocessing_distribution_test",
-    srcs = ["image_preprocessing_distribution_test.py"],
-    main = "image_preprocessing_distribution_test.py",
-    python_version = "PY3",
-    shard_count = 4,
-    tags = [
-        "multi_and_single_gpu",
-        "no_rocm",
-    ],
-    tpu_tags = [
-        "no_oss",
-        "noguitar",  # TODO(b/183957207)
-    ],
-    deps = [
-        ":image_preprocessing",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_test_combinations_lib",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-tf_py_test(
-    name = "discretization_test",
-    srcs = ["discretization_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    tags = ["no_rocm"],
-    deps = [
-        ":discretization",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-distribute_py_test(
-    name = "discretization_distribution_test",
-    srcs = ["discretization_distribution_test.py"],
-    main = "discretization_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "multi_and_single_gpu",
-        "no_oss",  # TODO(b/189956080)
-        "no_rocm",
-        "noguitar",  # b/190034522
-    ],
-    deps = [
-        ":discretization",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:config",
-        "//tensorflow/python:framework_test_combinations_lib",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-cuda_py_test(
-    name = "hashing_test",
-    srcs = ["hashing_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    deps = [
-        ":hashing",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:constant_op",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:sparse_tensor",
-        "//tensorflow/python:tensor_shape",
-        "//tensorflow/python:tensor_spec",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras:testing_utils",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/ops/ragged:ragged_factory_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-distribute_py_test(
-    name = "hashing_distribution_test",
-    srcs = ["hashing_distribution_test.py"],
-    disable_mlir_bridge = False,
-    main = "hashing_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "multi_and_single_gpu",
-    ],
-    deps = [
-        ":hashing",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-tf_py_test(
-    name = "index_lookup_test",
-    srcs = ["index_lookup_test.py"],
-    python_version = "PY3",
-    tags = ["noasan"],  # TODO(b/183961255)
-    deps = [
-        ":index_lookup",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-distribute_py_test(
-    name = "index_lookup_distribution_test",
-    srcs = ["index_lookup_distribution_test.py"],
-    disable_mlir_bridge = False,
-    main = "index_lookup_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "multi_and_single_gpu",
-        "no_rocm",
-    ],
-    tpu_tags = ["no_oss"],
-    deps = [
-        ":index_lookup",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-cuda_py_test(
-    name = "image_preprocessing_test",
-    srcs = ["image_preprocessing_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    tags = [
-        "no_windows",  # TODO(b/184424727): Re-enable this.
-    ],
-    deps = [
-        ":image_preprocessing",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:errors",
-        "//tensorflow/python:image_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:stateful_random_ops",
-        "//tensorflow/python:stateless_random_ops",
-        "//tensorflow/python/compat",
-        "//tensorflow/python/distribute:mirrored_strategy",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras:testing_utils",
-        "//tensorflow/python/keras/engine",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "normalization_test",
-    srcs = ["normalization_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    tags = [
-        "broken",  # b/170974360
-        "noasan",  # TODO(b/337374867) fails with -fsanitize=null
-    ],
-    deps = [
-        ":normalization",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "integer_lookup_test",
-    srcs = ["integer_lookup_test.py"],
-    python_version = "PY3",
-    tags = ["noasan"],  # TODO(b/183961255)
-    deps = [
-        ":integer_lookup",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-distribute_py_test(
-    name = "normalization_distribution_test",
-    srcs = ["normalization_distribution_test.py"],
-    main = "normalization_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "no_cuda_asan",
-        "no_oss",
-    ],
-    deps = [
-        ":normalization",
-        ":preprocessing_test_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework_test_combinations_lib",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-tf_py_test(
-    name = "table_utils_test",
-    srcs = ["table_utils_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":table_utils",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "text_vectorization_test",
-    srcs = ["text_vectorization_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    deps = [
-        ":preprocessing_test_utils",
-        ":text_vectorization",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-distribute_py_test(
-    name = "text_vectorization_distribution_test",
-    srcs = ["text_vectorization_distribution_test.py"],
-    main = "text_vectorization_distribution_test.py",
-    python_version = "PY3",
-    tags = [
-        "multi_and_single_gpu",
-        "no_rocm",
-    ],
-    tpu_tags = [
-        "no_oss",  # b/155502591
-    ],
-    deps = [
-        ":preprocessing_test_utils",
-        ":text_vectorization",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:config",
-        "//tensorflow/python:dtypes",
-        "//tensorflow/python:framework_test_combinations_lib",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//tensorflow/python/distribute:combinations",
-        "//tensorflow/python/eager:context",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/distribute:strategy_combinations",
-    ],
-)
-
-tf_py_test(
-    name = "reduction_test",
-    srcs = ["reduction_test.py"],
-    python_version = "PY3",
-    shard_count = 4,
-    tags = ["notsan"],  # TODO(b/170783154)
-    deps = [
-        ":reduction",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "string_lookup_test",
-    srcs = ["string_lookup_test.py"],
-    python_version = "PY3",
-    tags = [
-        "notsan",  #b/168758821
-    ],
-    deps = [
-        ":preprocessing_test_utils",
-        ":string_lookup",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/utils:generic_utils",
-        "//tensorflow/python/ops/ragged:ragged_string_ops",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
-
-tf_py_test(
-    name = "preprocessing_stage_test",
-    srcs = ["preprocessing_stage_test.py"],
-    python_version = "PY3",
-    tags = ["no_windows"],  # TODO(b/152991402)
-    deps = [
-        ":preprocessing_stage",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/keras",
-        "//third_party/py/numpy",
-        "@absl_py//absl/testing:parameterized",
-    ],
-)
diff --git a/tensorflow/python/keras/layers/preprocessing/__init__.py b/tensorflow/python/keras/layers/preprocessing/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/tensorflow/python/keras/layers/preprocessing/__init__.py
+++ /dev/null
diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing.py b/tensorflow/python/keras/layers/preprocessing/category_crossing.py
deleted file mode 100644
index 4e0b63e..0000000
--- a/tensorflow/python/keras/layers/preprocessing/category_crossing.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras category crossing preprocessing layers."""
-# pylint: disable=g-classes-have-attributes
-
-import itertools
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras.engine import base_layer
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops.ragged import ragged_array_ops
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.util.tf_export import keras_export
-
-
-@keras_export('keras.layers.experimental.preprocessing.CategoryCrossing')
-class CategoryCrossing(base_layer.Layer):
-  """Category crossing layer.
-
-  This layer concatenates multiple categorical inputs into a single categorical
-  output (similar to Cartesian product). The output dtype is string.
-
-  Usage:
-  >>> inp_1 = ['a', 'b', 'c']
-  >>> inp_2 = ['d', 'e', 'f']
-  >>> layer = tf.keras.layers.experimental.preprocessing.CategoryCrossing()
-  >>> layer([inp_1, inp_2])
-  <tf.Tensor: shape=(3, 1), dtype=string, numpy=
-    array([[b'a_X_d'],
-           [b'b_X_e'],
-           [b'c_X_f']], dtype=object)>
-
-
-  >>> inp_1 = ['a', 'b', 'c']
-  >>> inp_2 = ['d', 'e', 'f']
-  >>> layer = tf.keras.layers.experimental.preprocessing.CategoryCrossing(
-  ...    separator='-')
-  >>> layer([inp_1, inp_2])
-  <tf.Tensor: shape=(3, 1), dtype=string, numpy=
-    array([[b'a-d'],
-           [b'b-e'],
-           [b'c-f']], dtype=object)>
-
-  Args:
-    depth: depth of input crossing. By default None, all inputs are crossed into
-      one output. It can also be an int or tuple/list of ints. Passing an
-      integer will create combinations of crossed outputs with depth up to that
-      integer, i.e., [1, 2, ..., `depth`), and passing a tuple of integers will
-      create crossed outputs with depth for the specified values in the tuple,
-      i.e., `depth`=(N1, N2) will create all possible crossed outputs with depth
-      equal to N1 or N2. Passing `None` means a single crossed output with all
-      inputs. For example, with inputs `a`, `b` and `c`, `depth=2` means the
-      output will be [a;b;c;cross(a, b);cross(bc);cross(ca)].
-    separator: A string added between each input being joined. Defaults to
-      '_X_'.
-    name: Name to give to the layer.
-    **kwargs: Keyword arguments to construct a layer.
-
-  Input shape: a list of string or int tensors or sparse tensors of shape
-    `[batch_size, d1, ..., dm]`
-
-  Output shape: a single string or int tensor or sparse tensor of shape
-    `[batch_size, d1, ..., dm]`
-
-  Returns:
-    If any input is `RaggedTensor`, the output is `RaggedTensor`.
-    Else, if any input is `SparseTensor`, the output is `SparseTensor`.
-    Otherwise, the output is `Tensor`.
-
-  Example: (`depth`=None)
-    If the layer receives three inputs:
-    `a=[[1], [4]]`, `b=[[2], [5]]`, `c=[[3], [6]]`
-    the output will be a string tensor:
-    `[[b'1_X_2_X_3'], [b'4_X_5_X_6']]`
-
-  Example: (`depth` is an integer)
-    With the same input above, and if `depth`=2,
-    the output will be a list of 6 string tensors:
-    `[[b'1'], [b'4']]`
-    `[[b'2'], [b'5']]`
-    `[[b'3'], [b'6']]`
-    `[[b'1_X_2'], [b'4_X_5']]`,
-    `[[b'2_X_3'], [b'5_X_6']]`,
-    `[[b'3_X_1'], [b'6_X_4']]`
-
-  Example: (`depth` is a tuple/list of integers)
-    With the same input above, and if `depth`=(2, 3)
-    the output will be a list of 4 string tensors:
-    `[[b'1_X_2'], [b'4_X_5']]`,
-    `[[b'2_X_3'], [b'5_X_6']]`,
-    `[[b'3_X_1'], [b'6_X_4']]`,
-    `[[b'1_X_2_X_3'], [b'4_X_5_X_6']]`
-  """
-
-  def __init__(self, depth=None, name=None, separator='_X_', **kwargs):
-    super(CategoryCrossing, self).__init__(name=name, **kwargs)
-    self.depth = depth
-    self.separator = separator
-    if isinstance(depth, (tuple, list)):
-      self._depth_tuple = depth
-    elif depth is not None:
-      self._depth_tuple = tuple([i for i in range(1, depth + 1)])
-
-  def partial_crossing(self, partial_inputs, ragged_out, sparse_out):
-    """Gets the crossed output from a partial list/tuple of inputs."""
-    # If ragged_out=True, convert output from sparse to ragged.
-    if ragged_out:
-      # TODO(momernick): Support separator with ragged_cross.
-      if self.separator != '_X_':
-        raise ValueError('Non-default separator with ragged input is not '
-                         'supported yet, given {}'.format(self.separator))
-      return ragged_array_ops.cross(partial_inputs)
-    elif sparse_out:
-      return sparse_ops.sparse_cross(partial_inputs, separator=self.separator)
-    else:
-      return sparse_ops.sparse_tensor_to_dense(
-          sparse_ops.sparse_cross(partial_inputs, separator=self.separator))
-
-  def _preprocess_input(self, inp):
-    if isinstance(inp, (list, tuple, np.ndarray)):
-      inp = ops.convert_to_tensor_v2_with_dispatch(inp)
-    if inp.shape.rank == 1:
-      inp = array_ops.expand_dims(inp, axis=-1)
-    return inp
-
-  def call(self, inputs):
-    inputs = [self._preprocess_input(inp) for inp in inputs]
-    depth_tuple = self._depth_tuple if self.depth else (len(inputs),)
-    ragged_out = sparse_out = False
-    if any(tf_utils.is_ragged(inp) for inp in inputs):
-      ragged_out = True
-    elif any(isinstance(inp, sparse_tensor.SparseTensor) for inp in inputs):
-      sparse_out = True
-
-    outputs = []
-    for depth in depth_tuple:
-      if len(inputs) < depth:
-        raise ValueError(
-            'Number of inputs cannot be less than depth, got {} input tensors, '
-            'and depth {}'.format(len(inputs), depth))
-      for partial_inps in itertools.combinations(inputs, depth):
-        partial_out = self.partial_crossing(
-            partial_inps, ragged_out, sparse_out)
-        outputs.append(partial_out)
-    if sparse_out:
-      return sparse_ops.sparse_concat_v2(axis=1, sp_inputs=outputs)
-    return array_ops.concat(outputs, axis=1)
-
-  def compute_output_shape(self, input_shape):
-    if not isinstance(input_shape, (tuple, list)):
-      raise ValueError('A `CategoryCrossing` layer should be called '
-                       'on a list of inputs.')
-    input_shapes = input_shape
-    batch_size = None
-    for inp_shape in input_shapes:
-      inp_tensor_shape = tensor_shape.TensorShape(inp_shape).as_list()
-      if len(inp_tensor_shape) != 2:
-        raise ValueError('Inputs must be rank 2, get {}'.format(input_shapes))
-      if batch_size is None:
-        batch_size = inp_tensor_shape[0]
-    # The second dimension is dynamic based on inputs.
-    output_shape = [batch_size, None]
-    return tensor_shape.TensorShape(output_shape)
-
-  def compute_output_signature(self, input_spec):
-    input_shapes = [x.shape for x in input_spec]
-    output_shape = self.compute_output_shape(input_shapes)
-    if any(
-        isinstance(inp_spec, ragged_tensor.RaggedTensorSpec)
-        for inp_spec in input_spec):
-      return tensor_spec.TensorSpec(shape=output_shape, dtype=dtypes.string)
-    elif any(
-        isinstance(inp_spec, sparse_tensor.SparseTensorSpec)
-        for inp_spec in input_spec):
-      return sparse_tensor.SparseTensorSpec(
-          shape=output_shape, dtype=dtypes.string)
-    return tensor_spec.TensorSpec(shape=output_shape, dtype=dtypes.string)
-
-  def get_config(self):
-    config = {
-        'depth': self.depth,
-        'separator': self.separator,
-    }
-    base_config = super(CategoryCrossing, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/category_crossing_distribution_test.py
deleted file mode 100644
index 4807f23..0000000
--- a/tensorflow/python/keras/layers/preprocessing/category_crossing_distribution_test.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.category_crossing."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.framework import config
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute.strategy_combinations import all_strategies
-from tensorflow.python.keras.layers.preprocessing import category_crossing
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.platform import test
-
-
-def batch_wrapper(dataset, batch_size, distribution, repeat=None):
-  if repeat:
-    dataset = dataset.repeat(repeat)
-  # TPUs currently require fully defined input shapes, drop_remainder ensures
-  # the input will have fully defined shapes.
-  if backend.is_tpu_strategy(distribution):
-    return dataset.batch(batch_size, drop_remainder=True)
-  else:
-    return dataset.batch(batch_size)
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        # Investigate why crossing is not supported with TPU.
-        distribution=all_strategies,
-        mode=['eager', 'graph']))
-class CategoryCrossingDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_distribution(self, distribution):
-    input_array_1 = np.array([['a', 'b'], ['c', 'd']])
-    input_array_2 = np.array([['e', 'f'], ['g', 'h']])
-    inp_dataset = dataset_ops.DatasetV2.from_tensor_slices(
-        {'input_1': input_array_1, 'input_2': input_array_2})
-    inp_dataset = batch_wrapper(inp_dataset, 2, distribution)
-
-    # pyformat: disable
-    expected_output = [[b'a_X_e', b'a_X_f', b'b_X_e', b'b_X_f'],
-                       [b'c_X_g', b'c_X_h', b'd_X_g', b'd_X_h']]
-    config.set_soft_device_placement(True)
-
-    with distribution.scope():
-      input_data_1 = keras.Input(shape=(2,), dtype=dtypes.string,
-                                 name='input_1')
-      input_data_2 = keras.Input(shape=(2,), dtype=dtypes.string,
-                                 name='input_2')
-      input_data = [input_data_1, input_data_2]
-      layer = category_crossing.CategoryCrossing()
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(inp_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py b/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py
deleted file mode 100644
index e65bccb..0000000
--- a/tensorflow/python/keras/layers/preprocessing/category_crossing_test.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for categorical preprocessing layers."""
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.engine import input_layer
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.layers.preprocessing import category_crossing
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.platform import test
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoryCrossingTest(keras_parameterized.TestCase):
-
-  def test_crossing_sparse_inputs(self):
-    layer = category_crossing.CategoryCrossing()
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [1, 1]],
-        values=['a', 'b', 'c'],
-        dense_shape=[2, 2])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    self.assertAllEqual([b'a_X_d', b'b_X_e', b'c_X_e'], output.values)
-
-  def test_crossing_sparse_inputs_custom_sep(self):
-    layer = category_crossing.CategoryCrossing(separator='_Y_')
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [1, 1]],
-        values=['a', 'b', 'c'],
-        dense_shape=[2, 2])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    self.assertAllEqual([b'a_Y_d', b'b_Y_e', b'c_Y_e'], output.values)
-
-  def test_crossing_sparse_inputs_empty_sep(self):
-    layer = category_crossing.CategoryCrossing(separator='')
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [1, 1]],
-        values=['a', 'b', 'c'],
-        dense_shape=[2, 2])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
-    self.assertAllEqual([b'ad', b'be', b'ce'], output.values)
-
-  def test_crossing_sparse_inputs_depth_int(self):
-    layer = category_crossing.CategoryCrossing(depth=1)
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [2, 0]],
-        values=['a', 'b', 'c'],
-        dense_shape=[3, 1])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [2, 0]],
-        values=['d', 'e', 'f'],
-        dense_shape=[3, 1])
-    output = layer([inputs_0, inputs_1])
-    self.assertIsInstance(output, sparse_tensor.SparseTensor)
-    output = sparse_ops.sparse_tensor_to_dense(output)
-    expected_out = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
-    self.assertAllEqual(expected_out, output)
-
-  def test_crossing_sparse_inputs_depth_tuple(self):
-    layer = category_crossing.CategoryCrossing(depth=(2, 3))
-    inputs_0 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [2, 0]],
-        values=['a', 'b', 'c'],
-        dense_shape=[3, 1])
-    inputs_1 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [2, 0]],
-        values=['d', 'e', 'f'],
-        dense_shape=[3, 1])
-    inputs_2 = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 0], [2, 0]],
-        values=['g', 'h', 'i'],
-        dense_shape=[3, 1])
-    inp_0_t = input_layer.Input(shape=(1,), sparse=True, dtype=dtypes.string)
-    inp_1_t = input_layer.Input(shape=(1,), sparse=True, dtype=dtypes.string)
-    inp_2_t = input_layer.Input(shape=(1,), sparse=True, dtype=dtypes.string)
-    out_t = layer([inp_0_t, inp_1_t, inp_2_t])
-    model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t)
-    output = model.predict([inputs_0, inputs_1, inputs_2])
-    self.assertIsInstance(output, sparse_tensor.SparseTensor)
-    output = sparse_ops.sparse_tensor_to_dense(output)
-    expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']]
-    expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']]
-    expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']]
-    expected_out = array_ops.concat(
-        [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0)
-    self.assertAllEqual(expected_out, output)
-
-  def test_crossing_ragged_inputs(self):
-    inputs_0 = ragged_factory_ops.constant(
-        [['omar', 'skywalker'], ['marlo']],
-        dtype=dtypes.string)
-    inputs_1 = ragged_factory_ops.constant(
-        [['a'], ['b']],
-        dtype=dtypes.string)
-    inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-
-    non_hashed_layer = category_crossing.CategoryCrossing()
-    out_t = non_hashed_layer([inp_0_t, inp_1_t])
-    model = training.Model(inputs=[inp_0_t, inp_1_t], outputs=out_t)
-    expected_output = [[b'omar_X_a', b'skywalker_X_a'], [b'marlo_X_b']]
-    self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
-
-  def test_crossing_ragged_inputs_depth_int(self):
-    layer = category_crossing.CategoryCrossing(depth=1)
-    inputs_0 = ragged_factory_ops.constant([['a'], ['b'], ['c']])
-    inputs_1 = ragged_factory_ops.constant([['d'], ['e'], ['f']])
-    output = layer([inputs_0, inputs_1])
-    expected_output = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
-    self.assertIsInstance(output, ragged_tensor.RaggedTensor)
-    self.assertAllEqual(expected_output, output)
-
-    layer = category_crossing.CategoryCrossing(depth=2)
-    inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    out_t = layer([inp_0_t, inp_1_t])
-    model = training.Model([inp_0_t, inp_1_t], out_t)
-    expected_output = [[b'a', b'd', b'a_X_d'], [b'b', b'e', b'b_X_e'],
-                       [b'c', b'f', b'c_X_f']]
-    self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
-
-  def test_crossing_ragged_inputs_depth_tuple(self):
-    layer = category_crossing.CategoryCrossing(depth=[2, 3])
-    inputs_0 = ragged_factory_ops.constant([['a'], ['b'], ['c']])
-    inputs_1 = ragged_factory_ops.constant([['d'], ['e'], ['f']])
-    inputs_2 = ragged_factory_ops.constant([['g'], ['h'], ['i']])
-    inp_0_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    inp_1_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    inp_2_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    out_t = layer([inp_0_t, inp_1_t, inp_2_t])
-    model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t)
-    expected_output = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g'],
-                       [b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h'],
-                       [b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']]
-    output = model.predict([inputs_0, inputs_1, inputs_2])
-    self.assertIsInstance(output, ragged_tensor.RaggedTensor)
-    self.assertAllEqual(expected_output, output)
-
-  def test_crossing_with_dense_inputs(self):
-    layer = category_crossing.CategoryCrossing()
-    inputs_0 = np.asarray([[1, 2]])
-    inputs_1 = np.asarray([[1, 3]])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllEqual([[b'1_X_1', b'1_X_3', b'2_X_1', b'2_X_3']], output)
-
-  def test_crossing_with_list_inputs(self):
-    layer = category_crossing.CategoryCrossing()
-    inputs_0 = [[1, 2]]
-    inputs_1 = [[1, 3]]
-    output = layer([inputs_0, inputs_1])
-    self.assertAllEqual([[b'1_X_1', b'1_X_3', b'2_X_1', b'2_X_3']], output)
-
-    inputs_0 = [1, 2]
-    inputs_1 = [1, 3]
-    output = layer([inputs_0, inputs_1])
-    self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output)
-
-    inputs_0 = np.asarray([1, 2])
-    inputs_1 = np.asarray([1, 3])
-    output = layer([inputs_0, inputs_1])
-    self.assertAllEqual([[b'1_X_1'], [b'2_X_3']], output)
-
-  def test_crossing_dense_inputs_depth_int(self):
-    layer = category_crossing.CategoryCrossing(depth=1)
-    inputs_0 = constant_op.constant([['a'], ['b'], ['c']])
-    inputs_1 = constant_op.constant([['d'], ['e'], ['f']])
-    output = layer([inputs_0, inputs_1])
-    expected_output = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
-    self.assertAllEqual(expected_output, output)
-
-    layer = category_crossing.CategoryCrossing(depth=2)
-    inp_0_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
-    inp_1_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
-    out_t = layer([inp_0_t, inp_1_t])
-    model = training.Model([inp_0_t, inp_1_t], out_t)
-    crossed_output = [[b'a_X_d'], [b'b_X_e'], [b'c_X_f']]
-    expected_output = array_ops.concat([expected_output, crossed_output],
-                                       axis=1)
-    self.assertAllEqual(expected_output, model.predict([inputs_0, inputs_1]))
-
-  def test_crossing_dense_inputs_depth_tuple(self):
-    layer = category_crossing.CategoryCrossing(depth=[2, 3])
-    inputs_0 = constant_op.constant([['a'], ['b'], ['c']])
-    inputs_1 = constant_op.constant([['d'], ['e'], ['f']])
-    inputs_2 = constant_op.constant([['g'], ['h'], ['i']])
-    inp_0_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
-    inp_1_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
-    inp_2_t = input_layer.Input(shape=(1,), dtype=dtypes.string)
-    out_t = layer([inp_0_t, inp_1_t, inp_2_t])
-    model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t)
-    expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']]
-    expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']]
-    expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']]
-    expected_output = array_ops.concat(
-        [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0)
-    self.assertAllEqual(expected_output,
-                        model.predict([inputs_0, inputs_1, inputs_2]))
-
-  def test_crossing_compute_output_signature(self):
-    input_shapes = [
-        tensor_shape.TensorShape([2, 2]),
-        tensor_shape.TensorShape([2, 3])
-    ]
-    input_specs = [
-        tensor_spec.TensorSpec(input_shape, dtypes.string)
-        for input_shape in input_shapes
-    ]
-    layer = category_crossing.CategoryCrossing()
-    output_spec = layer.compute_output_signature(input_specs)
-    self.assertEqual(output_spec.shape.dims[0], input_shapes[0].dims[0])
-    self.assertEqual(output_spec.dtype, dtypes.string)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = category_crossing.CategoryCrossing(depth=2, name='hashing')
-    config = layer.get_config()
-    layer_1 = category_crossing.CategoryCrossing.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-    layer = category_crossing.CategoryCrossing(name='hashing')
-    config = layer.get_config()
-    layer_1 = category_crossing.CategoryCrossing.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/category_encoding.py b/tensorflow/python/keras/layers/preprocessing/category_encoding.py
deleted file mode 100644
index fde5927..0000000
--- a/tensorflow/python/keras/layers/preprocessing/category_encoding.py
+++ /dev/null
@@ -1,274 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras CategoryEncoding preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.engine import base_layer
-from tensorflow.python.keras.utils import layer_utils
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import bincount_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util.tf_export import keras_export
-
-INT = "int"
-ONE_HOT = "one_hot"
-MULTI_HOT = "multi_hot"
-COUNT = "count"
-
-
-@keras_export("keras.layers.experimental.preprocessing.CategoryEncoding")
-class CategoryEncoding(base_layer.Layer):
-  """Category encoding layer.
-
-  This layer provides options for condensing data into a categorical encoding
-  when the total number of tokens are known in advance. It accepts integer
-  values as inputs and outputs a dense representation (one sample = 1-index
-  tensor of float values representing data about the sample's tokens) of those
-  inputs. For integer inputs where the total number of tokens is not known, see
-  `tf.keras.layers.experimental.preprocessing.IntegerLookup`.
-
-  Examples:
-
-  **One-hot encoding data**
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
-  ...           num_tokens=4, output_mode="one_hot")
-  >>> layer([3, 2, 0, 1])
-  <tf.Tensor: shape=(4, 4), dtype=float32, numpy=
-    array([[0., 0., 0., 1.],
-           [0., 0., 1., 0.],
-           [1., 0., 0., 0.],
-           [0., 1., 0., 0.]], dtype=float32)>
-
-  **Multi-hot encoding data**
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
-  ...           num_tokens=4, output_mode="multi_hot")
-  >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]])
-  <tf.Tensor: shape=(4, 4), dtype=float32, numpy=
-    array([[1., 1., 0., 0.],
-           [1., 0., 0., 0.],
-           [0., 1., 1., 0.],
-           [0., 1., 0., 1.]], dtype=float32)>
-
-  **Using weighted inputs in `"count"` mode**
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.CategoryEncoding(
-  ...           num_tokens=4, output_mode="count")
-  >>> count_weights = np.array([[.1, .2], [.1, .1], [.2, .3], [.4, .2]])
-  >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]], count_weights=count_weights)
-  <tf.Tensor: shape=(4, 4), dtype=float64, numpy=
-    array([[0.1, 0.2, 0. , 0. ],
-           [0.2, 0. , 0. , 0. ],
-           [0. , 0.2, 0.3, 0. ],
-           [0. , 0.2, 0. , 0.4]])>
-
-  Args:
-    num_tokens: The total number of tokens the layer should support. All inputs
-      to the layer must integers in the range 0 <= value < num_tokens or an
-      error will be thrown.
-    output_mode: Specification for the output of the layer.
-      Defaults to `"multi_hot"`. Values can be `"one_hot"`, `"multi_hot"` or
-      `"count"`, configuring the layer as follows:
-        - `"one_hot"`: Encodes each individual element in the input into an
-          array of `num_tokens` size, containing a 1 at the element index. If
-          the last dimension is size 1, will encode on that dimension. If the
-          last dimension is not size 1, will append a new dimension for the
-          encoded output.
-        - `"multi_hot"`: Encodes each sample in the input into a single array
-          of `num_tokens` size, containing a 1 for each vocabulary term present
-          in the sample. Treats the last dimension as the sample dimension, if
-          input shape is (..., sample_length), output shape will be
-          (..., num_tokens).
-        - `"count"`: As `"multi_hot"`, but the int array contains a count of the
-          number of times the token at that index appeared in the sample.
-    sparse: Boolean. If true, returns a `SparseTensor` instead of a dense
-      `Tensor`. Defaults to `False`.
-
-  Call arguments:
-    inputs: A 2D tensor `(samples, timesteps)`.
-    count_weights: A 2D tensor in the same shape as `inputs` indicating the
-      weight for each sample value when summing up in `count` mode. Not used in
-      `"multi_hot"` mode.
-  """
-
-  def __init__(self,
-               num_tokens=None,
-               output_mode=MULTI_HOT,
-               sparse=False,
-               **kwargs):
-    # max_tokens is an old name for the num_tokens arg we continue to support
-    # because of usage.
-    if "max_tokens" in kwargs:
-      logging.warning(
-          "max_tokens is deprecated, please use num_tokens instead.")
-      num_tokens = kwargs["max_tokens"]
-      del kwargs["max_tokens"]
-
-    super(CategoryEncoding, self).__init__(**kwargs)
-
-    # Support deprecated names for output_modes.
-    if output_mode == "binary":
-      output_mode = MULTI_HOT
-    # 'output_mode' must be one of (COUNT, ONE_HOT, MULTI_HOT)
-    layer_utils.validate_string_arg(
-        output_mode,
-        allowable_strings=(COUNT, ONE_HOT, MULTI_HOT),
-        layer_name="CategoryEncoding",
-        arg_name="output_mode")
-
-    if num_tokens is None:
-      raise ValueError("num_tokens must be set to use this layer. If the "
-                       "number of tokens is not known beforehand, use the "
-                       "IntegerLookup layer instead.")
-    if num_tokens < 1:
-      raise ValueError("num_tokens must be >= 1.")
-
-    self.num_tokens = num_tokens
-    self.output_mode = output_mode
-    self.sparse = sparse
-
-  def compute_output_shape(self, input_shape):
-    if not input_shape:
-      return tensor_shape.TensorShape([self.num_tokens])
-    if self.output_mode == ONE_HOT and input_shape[-1] != 1:
-      return tensor_shape.TensorShape(input_shape + [self.num_tokens])
-    else:
-      return tensor_shape.TensorShape(input_shape[:-1] + [self.num_tokens])
-
-  def compute_output_signature(self, input_spec):
-    output_shape = self.compute_output_shape(input_spec.shape.as_list())
-    if self.sparse:
-      return sparse_tensor.SparseTensorSpec(
-          shape=output_shape, dtype=dtypes.int64)
-    else:
-      return tensor_spec.TensorSpec(shape=output_shape, dtype=dtypes.int64)
-
-  def get_config(self):
-    config = {
-        "num_tokens": self.num_tokens,
-        "output_mode": self.output_mode,
-        "sparse": self.sparse,
-    }
-    base_config = super(CategoryEncoding, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-  def call(self, inputs, count_weights=None):
-    if isinstance(inputs, (list, np.ndarray)):
-      inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)
-
-    def expand_dims(inputs, axis):
-      if tf_utils.is_sparse(inputs):
-        return sparse_ops.sparse_expand_dims(inputs, axis)
-      else:
-        return array_ops.expand_dims(inputs, axis)
-
-    original_shape = inputs.shape
-    # In all cases, we should uprank scalar input to a single sample.
-    if inputs.shape.rank == 0:
-      inputs = expand_dims(inputs, -1)
-    # One hot will unprank only if the final output dimension is not already 1.
-    if self.output_mode == ONE_HOT:
-      if inputs.shape[-1] != 1:
-        inputs = expand_dims(inputs, -1)
-
-    # TODO(b/190445202): remove output rank restriction.
-    if inputs.shape.rank > 2:
-      raise ValueError(
-          "Received input shape {}, which would result in output rank {}. "
-          "Currently only outputs up to rank 2 are supported.".format(
-              original_shape, inputs.shape.rank))
-
-    if count_weights is not None and self.output_mode != COUNT:
-      raise ValueError(
-          "`count_weights` is not used when `output_mode` is not `'count'`. "
-          "Received `count_weights={}`.".format(count_weights))
-
-    out_depth = self.num_tokens
-    binary_output = self.output_mode in (MULTI_HOT, ONE_HOT)
-    if isinstance(inputs, sparse_tensor.SparseTensor):
-      max_value = math_ops.reduce_max(inputs.values)
-      min_value = math_ops.reduce_min(inputs.values)
-    else:
-      max_value = math_ops.reduce_max(inputs)
-      min_value = math_ops.reduce_min(inputs)
-    condition = math_ops.logical_and(
-        math_ops.greater(
-            math_ops.cast(out_depth, max_value.dtype), max_value),
-        math_ops.greater_equal(
-            min_value, math_ops.cast(0, min_value.dtype)))
-    assertion = control_flow_ops.Assert(condition, [
-        "Input values must be in the range 0 <= values < num_tokens"
-        " with num_tokens={}".format(out_depth)
-    ])
-    with ops.control_dependencies([assertion]):
-      if self.sparse:
-        return sparse_bincount(inputs, out_depth, binary_output,
-                               count_weights)
-      else:
-        return dense_bincount(inputs, out_depth, binary_output,
-                              count_weights)
-
-
-def sparse_bincount(inputs, out_depth, binary_output, count_weights=None):
-  """Apply binary or count encoding to an input and return a sparse tensor."""
-  result = bincount_ops.sparse_bincount(
-      inputs,
-      weights=count_weights,
-      minlength=out_depth,
-      maxlength=out_depth,
-      axis=-1,
-      binary_output=binary_output)
-  if inputs.shape.rank == 1:
-    output_shape = (out_depth,)
-  else:
-    result = math_ops.cast(result, backend.floatx())
-    batch_size = array_ops.shape(result)[0]
-    output_shape = (batch_size, out_depth)
-  result = sparse_tensor.SparseTensor(
-      indices=result.indices,
-      values=result.values,
-      dense_shape=output_shape)
-  return result
-
-
-def dense_bincount(inputs, out_depth, binary_output, count_weights=None):
-  """Apply binary or count encoding to an input."""
-  result = bincount_ops.bincount(
-      inputs,
-      weights=count_weights,
-      minlength=out_depth,
-      maxlength=out_depth,
-      dtype=backend.floatx(),
-      axis=-1,
-      binary_output=binary_output)
-  if inputs.shape.rank == 1:
-    result.set_shape(tensor_shape.TensorShape((out_depth,)))
-  else:
-    batch_size = inputs.shape.as_list()[0]
-    result.set_shape(tensor_shape.TensorShape((batch_size, out_depth)))
-  return result
diff --git a/tensorflow/python/keras/layers/preprocessing/category_encoding_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/category_encoding_distribution_test.py
deleted file mode 100644
index e4e0d12..0000000
--- a/tensorflow/python/keras/layers/preprocessing/category_encoding_distribution_test.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.category_encoding."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.distribute import multi_process_runner
-from tensorflow.python.framework import config
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import strategy_combinations
-from tensorflow.python.keras.layers.preprocessing import category_encoding
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-
-
-def batch_wrapper(dataset, batch_size, strategy, repeat=None):
-  if repeat:
-    dataset = dataset.repeat(repeat)
-  # TPUs currently require fully defined input shapes, drop_remainder ensures
-  # the input will have fully defined shapes.
-  if backend.is_tpu_strategy(strategy):
-    return dataset.batch(batch_size, drop_remainder=True)
-  else:
-    return dataset.batch(batch_size)
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        # (b/156783625): Outside compilation failed for eager mode only.
-        strategy=strategy_combinations.strategies_minus_tpu +
-        strategy_combinations.multi_worker_mirrored_strategies,
-        mode=["eager", "graph"]))
-class CategoryEncodingDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_strategy(self, strategy):
-    input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]])
-    inp_dataset = dataset_ops.DatasetV2.from_tensor_slices(input_array)
-    inp_dataset = batch_wrapper(inp_dataset, 2, strategy)
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0, 0],
-                       [1, 1, 0, 1, 0, 0]]
-    # pyformat: enable
-    num_tokens = 6
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(4,), dtype=dtypes.int32)
-      layer = category_encoding.CategoryEncoding(
-          num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(inp_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-if __name__ == "__main__":
-  v2_compat.enable_v2_behavior()
-  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py b/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py
deleted file mode 100644
index f955ee4..0000000
--- a/tensorflow/python/keras/layers/preprocessing/category_encoding_test.py
+++ /dev/null
@@ -1,505 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras text category_encoding preprocessing layer."""
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.layers.preprocessing import category_encoding
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import test
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoryEncodingInputTest(keras_parameterized.TestCase,
-                                preprocessing_test_utils.PreprocessingLayerTest
-                               ):
-
-  def test_dense_input_sparse_output(self):
-    input_array = constant_op.constant([[1, 2, 3], [3, 3, 0]])
-
-    # The expected output should be (X for missing value):
-    # [[X, 1, 1, 1, X, X]
-    #  [1, X, X, 2, X, X]]
-    expected_indices = [[0, 1], [0, 2], [0, 3], [1, 0], [1, 3]]
-    expected_values = [1, 1, 1, 1, 2]
-    num_tokens = 6
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32)
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True)
-    int_data = layer(input_data)
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    sp_output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_values, sp_output_dataset.values)
-    self.assertAllEqual(expected_indices, sp_output_dataset.indices)
-
-    # Assert sparse output is same as dense output.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens,
-        output_mode=category_encoding.COUNT,
-        sparse=False)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(
-        sparse_ops.sparse_tensor_to_dense(sp_output_dataset, default_value=0),
-        output_dataset)
-
-  def test_sparse_input(self):
-    input_array = np.array([[1, 2, 3, 0], [0, 3, 1, 0]], dtype=np.int64)
-    sparse_tensor_data = sparse_ops.from_dense(input_array)
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0, 0],
-                       [0, 1, 0, 1, 0, 0]]
-    # pyformat: enable
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(sparse_tensor_data, steps=1)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_sparse_input_with_weights(self):
-    input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 4]], dtype=np.int64)
-    weights_array = np.array([[.1, .2, .3, .4], [.2, .1, .4, .3]])
-    sparse_tensor_data = sparse_ops.from_dense(input_array)
-    sparse_weight_data = sparse_ops.from_dense(weights_array)
-
-    # pyformat: disable
-    expected_output = [[0, .1, .2, .3, .4, 0],
-                       [0, .4, 0, .1, .5, 0]]
-    # pyformat: enable
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    weight_data = keras.Input(shape=(None,), dtype=dtypes.float32, sparse=True)
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.COUNT)
-    int_data = layer(input_data, count_weights=weight_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=[input_data, weight_data], outputs=int_data)
-    output_dataset = model.predict([sparse_tensor_data, sparse_weight_data],
-                                   steps=1)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_sparse_input_sparse_output(self):
-    sp_inp = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]],
-        values=[0, 2, 1, 1, 0],
-        dense_shape=[4, 2])
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-
-    # The expected output should be (X for missing value):
-    # [[1, X, X, X]
-    #  [X, X, 1, X]
-    #  [X, 2, X, X]
-    #  [1, X, X, X]]
-    expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]]
-    expected_values = [1, 1, 2, 1]
-    num_tokens = 6
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True)
-    int_data = layer(input_data)
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    sp_output_dataset = model.predict(sp_inp, steps=1)
-    self.assertAllEqual(expected_values, sp_output_dataset.values)
-    self.assertAllEqual(expected_indices, sp_output_dataset.indices)
-
-    # Assert sparse output is same as dense output.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens,
-        output_mode=category_encoding.COUNT,
-        sparse=False)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(sp_inp, steps=1)
-    self.assertAllEqual(
-        sparse_ops.sparse_tensor_to_dense(sp_output_dataset, default_value=0),
-        output_dataset)
-
-  def test_sparse_input_sparse_output_with_weights(self):
-    indices = [[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]]
-    sp_inp = sparse_tensor.SparseTensor(
-        indices=indices, values=[0, 2, 1, 1, 0], dense_shape=[4, 2])
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    sp_weight = sparse_tensor.SparseTensor(
-        indices=indices, values=[.1, .2, .4, .3, .2], dense_shape=[4, 2])
-    weight_data = keras.Input(shape=(None,), dtype=dtypes.float32, sparse=True)
-
-    # The expected output should be (X for missing value):
-    # [[1, X, X, X]
-    #  [X, X, 1, X]
-    #  [X, 2, X, X]
-    #  [1, X, X, X]]
-    expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]]
-    expected_values = [.1, .2, .7, .2]
-    num_tokens = 6
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True)
-    int_data = layer(input_data, count_weights=weight_data)
-
-    model = keras.Model(inputs=[input_data, weight_data], outputs=int_data)
-    sp_output_dataset = model.predict([sp_inp, sp_weight], steps=1)
-    self.assertAllClose(expected_values, sp_output_dataset.values)
-    self.assertAllEqual(expected_indices, sp_output_dataset.indices)
-
-  def test_ragged_input(self):
-    input_array = ragged_factory_ops.constant([[1, 2, 3], [3, 1]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0, 0],
-                       [0, 1, 0, 1, 0, 0]]
-    # pyformat: enable
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32, ragged=True)
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-    int_data = layer(input_data)
-
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_ragged_input_sparse_output(self):
-    input_array = ragged_factory_ops.constant([[1, 2, 3], [3, 3]])
-
-    # The expected output should be (X for missing value):
-    # [[X, 1, 1, 1]
-    #  [X, X, X, 2]]
-    expected_indices = [[0, 1], [0, 2], [0, 3], [1, 3]]
-    expected_values = [1, 1, 1, 2]
-    num_tokens = 6
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32, ragged=True)
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True)
-    int_data = layer(input_data)
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    sp_output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_values, sp_output_dataset.values)
-    self.assertAllEqual(expected_indices, sp_output_dataset.indices)
-
-    # Assert sparse output is same as dense output.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens,
-        output_mode=category_encoding.COUNT,
-        sparse=False)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(
-        sparse_ops.sparse_tensor_to_dense(sp_output_dataset, default_value=0),
-        output_dataset)
-
-  def test_sparse_output_and_dense_layer(self):
-    input_array = constant_op.constant([[1, 2, 3], [3, 3, 0]])
-
-    num_tokens = 4
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32)
-    encoding_layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True)
-    int_data = encoding_layer(input_data)
-    dense_layer = keras.layers.Dense(units=1)
-    output_data = dense_layer(int_data)
-
-    model = keras.Model(inputs=input_data, outputs=output_data)
-    _ = model.predict(input_array, steps=1)
-
-  def test_dense_oov_input(self):
-    valid_array = constant_op.constant([[0, 1, 2], [0, 1, 2]])
-    invalid_array = constant_op.constant([[0, 1, 2], [2, 3, 1]])
-    num_tokens = 3
-    expected_output_shape = [None, num_tokens]
-    encoder_layer = category_encoding.CategoryEncoding(num_tokens)
-    input_data = keras.Input(shape=(3,), dtype=dtypes.int32)
-    int_data = encoder_layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    # Call predict once on valid input to compile a graph and test control flow.
-    _ = model.predict(valid_array, steps=1)
-    with self.assertRaisesRegex(
-        errors.InvalidArgumentError,
-        ".*must be in the range 0 <= values < num_tokens.*"):
-      _ = model.predict(invalid_array, steps=1)
-
-  def test_dense_negative(self):
-    valid_array = constant_op.constant([[0, 1, 2], [0, 1, 2]])
-    invalid_array = constant_op.constant([[1, 2, 0], [2, 2, -1]])
-    num_tokens = 3
-    expected_output_shape = [None, num_tokens]
-    encoder_layer = category_encoding.CategoryEncoding(num_tokens)
-    input_data = keras.Input(shape=(3,), dtype=dtypes.int32)
-    int_data = encoder_layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    # Call predict once on valid input to compile a graph and test control flow.
-    _ = model.predict(valid_array, steps=1)
-    with self.assertRaisesRegex(
-        errors.InvalidArgumentError,
-        ".*must be in the range 0 <= values < num_tokens.*"):
-      _ = model.predict(invalid_array, steps=1)
-
-  def test_legacy_max_tokens_arg(self):
-    input_array = np.array([[1, 2, 3, 1]])
-    expected_output = [[0, 1, 1, 1, 0, 0]]
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32)
-    layer = category_encoding.CategoryEncoding(
-        max_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes
-class CategoryEncodingOutputTest(keras_parameterized.TestCase,
-                                 preprocessing_test_utils.PreprocessingLayerTest
-                                ):
-
-  def test_one_hot_output(self):
-    input_data = np.array([[3], [2], [0], [1]])
-    expected_output = [
-        [0, 0, 0, 1],
-        [0, 0, 1, 0],
-        [1, 0, 0, 0],
-        [0, 1, 0, 0],
-    ]
-    num_tokens = 4
-    expected_output_shape = [None, num_tokens]
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT)
-    inputs = keras.Input(shape=(1,), dtype=dtypes.int32)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-    output_dataset = model(input_data)
-    self.assertAllEqual(expected_output_shape, outputs.shape.as_list())
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_one_hot_output_rank_one_input(self):
-    input_data = np.array([3, 2, 0, 1])
-    expected_output = [
-        [0, 0, 0, 1],
-        [0, 0, 1, 0],
-        [1, 0, 0, 0],
-        [0, 1, 0, 0],
-    ]
-    num_tokens = 4
-    expected_output_shape = [None, num_tokens]
-
-    # Test call on layer directly.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT)
-    output_data = layer(input_data)
-    self.assertAllEqual(expected_output, output_data)
-
-    # Test call on model.
-    inputs = keras.Input(shape=(1,), dtype=dtypes.int32)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-    output_data = model(input_data)
-    self.assertAllEqual(expected_output_shape, outputs.shape.as_list())
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_one_hot_output_rank_zero_input(self):
-    input_data = np.array(3)
-    expected_output = [0, 0, 0, 1]
-    num_tokens = 4
-    expected_output_shape = [None, num_tokens]
-
-    # Test call on layer directly.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.ONE_HOT)
-    output_data = layer(input_data)
-    self.assertAllEqual(expected_output, output_data)
-
-    # Test call on model.
-    inputs = keras.Input(shape=(1,), dtype=dtypes.int32)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-    output_data = model(input_data)
-
-    self.assertAllEqual(expected_output_shape, outputs.shape.as_list())
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_one_hot_rank_3_output_fails(self):
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=4, output_mode=category_encoding.ONE_HOT)
-    with self.assertRaisesRegex(ValueError, "only outputs up to rank 2"):
-      _ = layer(keras.Input(shape=(4,), dtype=dtypes.int32))
-    with self.assertRaisesRegex(ValueError, "only outputs up to rank 2"):
-      _ = layer(np.array([[3, 2, 0, 1], [3, 2, 0, 1]]))
-
-  def test_multi_hot_output(self):
-    input_data = np.array([[1, 2, 3, 1], [0, 3, 1, 0]])
-    expected_output = [
-        [0, 1, 1, 1, 0, 0],
-        [1, 1, 0, 1, 0, 0],
-    ]
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-    inputs = keras.Input(shape=(None,), dtype=dtypes.int32)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-    output_data = model.predict(input_data)
-    self.assertAllEqual(expected_output_shape, outputs.shape.as_list())
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_multi_hot_output_rank_one_input(self):
-    input_data = np.array([3, 2, 0, 1])
-    expected_output = [1, 1, 1, 1, 0, 0]
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    # Test call on layer directly.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-    output_data = layer(input_data)
-    self.assertAllEqual(expected_output, output_data)
-
-    # Test call on model.
-    inputs = keras.Input(shape=(4,), dtype=dtypes.int32)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-    output_data = model(input_data)
-    self.assertAllEqual(expected_output_shape, outputs.shape.as_list())
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_multi_hot_output_rank_zero_input(self):
-    input_data = np.array(3)
-    expected_output = [0, 0, 0, 1, 0, 0]
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    # Test call on layer directly.
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=category_encoding.MULTI_HOT)
-    output_data = layer(input_data)
-    self.assertAllEqual(expected_output, output_data)
-
-    # Test call on model.
-    inputs = keras.Input(shape=(4,), dtype=dtypes.int32)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-    output_data = model(input_data)
-    self.assertAllEqual(expected_output_shape, outputs.shape.as_list())
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_multi_hot_rank_3_output_fails(self):
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=4, output_mode=category_encoding.ONE_HOT)
-    with self.assertRaisesRegex(ValueError, "only outputs up to rank 2"):
-      _ = layer(keras.Input(shape=(3, 4,), dtype=dtypes.int32))
-    with self.assertRaisesRegex(ValueError, "only outputs up to rank 2"):
-      _ = layer(np.array([[[3, 2, 0, 1], [3, 2, 0, 1]]]))
-
-  def test_count_output(self):
-    input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]])
-
-    # pyformat: disable
-    expected_output = [[0, 2, 1, 1, 0, 0],
-                       [2, 1, 0, 1, 0, 0]]
-    # pyformat: enable
-    num_tokens = 6
-    expected_output_shape = [None, num_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32)
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=6, output_mode=category_encoding.COUNT)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-class CategoryEncodingModelBuildingTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "count_output",
-          "num_tokens": 5,
-          "output_mode": category_encoding.COUNT
-      }, {
-          "testcase_name": "multi_hot_output",
-          "num_tokens": 5,
-          "output_mode": category_encoding.MULTI_HOT
-      })
-  def test_end_to_end_bagged_modeling(self, output_mode, num_tokens):
-    input_array = np.array([[1, 2, 3, 1], [0, 3, 1, 0]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32)
-    layer = category_encoding.CategoryEncoding(
-        num_tokens=num_tokens, output_mode=output_mode)
-
-    weights = []
-    if num_tokens is None:
-      layer.set_num_elements(5)
-    layer.set_weights(weights)
-
-    int_data = layer(input_data)
-    float_data = backend.cast(int_data, dtype="float32")
-    output_data = core.Dense(64)(float_data)
-    model = keras.Model(inputs=input_data, outputs=output_data)
-    _ = model.predict(input_array)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/discretization.py b/tensorflow/python/keras/layers/preprocessing/discretization.py
deleted file mode 100644
index aaaaa51..0000000
--- a/tensorflow/python/keras/layers/preprocessing/discretization.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras discretization preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_math_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import script_ops
-from tensorflow.python.ops import sort_ops
-from tensorflow.python.ops.ragged import ragged_functional_ops
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util.tf_export import keras_export
-
-
-def summarize(values, epsilon):
-  """Reduce a 1D sequence of values to a summary.
-
-  This algorithm is based on numpy.quantiles but modified to allow for
-  intermediate steps between multiple data sets. It first finds the target
-  number of bins as the reciprocal of epsilon and then takes the individual
-  values spaced at appropriate intervals to arrive at that target.
-  The final step is to return the corresponding counts between those values
-  If the target num_bins is larger than the size of values, the whole array is
-  returned (with weights of 1).
-
-  Args:
-      values: 1-D `np.ndarray` to be summarized.
-      epsilon: A `'float32'` that determines the approxmiate desired precision.
-
-  Returns:
-      A 2-D `np.ndarray` that is a summary of the inputs. First column is the
-      interpolated partition values, the second is the weights (counts).
-  """
-
-  values = array_ops.reshape(values, [-1])
-  values = sort_ops.sort(values)
-  elements = math_ops.cast(array_ops.size(values), dtypes.float32)
-  num_buckets = 1. / epsilon
-  increment = math_ops.cast(elements / num_buckets, dtypes.int32)
-  start = increment
-  step = math_ops.maximum(increment, 1)
-  boundaries = values[start::step]
-  weights = array_ops.ones_like(boundaries)
-  weights = weights * math_ops.cast(step, dtypes.float32)
-  return array_ops.stack([boundaries, weights])
-
-
-def compress(summary, epsilon):
-  """Compress a summary to within `epsilon` accuracy.
-
-  The compression step is needed to keep the summary sizes small after merging,
-  and also used to return the final target boundaries. It finds the new bins
-  based on interpolating cumulative weight percentages from the large summary.
-  Taking the difference of the cumulative weights from the previous bin's
-  cumulative weight will give the new weight for that bin.
-
-  Args:
-      summary: 2-D `np.ndarray` summary to be compressed.
-      epsilon: A `'float32'` that determines the approxmiate desired precision.
-
-  Returns:
-      A 2-D `np.ndarray` that is a compressed summary. First column is the
-      interpolated partition values, the second is the weights (counts).
-  """
-  # TODO(b/184863356): remove the numpy escape hatch here.
-  return script_ops.numpy_function(
-      lambda s: _compress_summary_numpy(s, epsilon), [summary], dtypes.float32)
-
-
-def _compress_summary_numpy(summary, epsilon):
-  """Compress a summary with numpy."""
-  if summary.shape[1] * epsilon < 1:
-    return summary
-
-  percents = epsilon + np.arange(0.0, 1.0, epsilon)
-  cum_weights = summary[1].cumsum()
-  cum_weight_percents = cum_weights / cum_weights[-1]
-  new_bins = np.interp(percents, cum_weight_percents, summary[0])
-  cum_weights = np.interp(percents, cum_weight_percents, cum_weights)
-  new_weights = cum_weights - np.concatenate((np.array([0]), cum_weights[:-1]))
-  summary = np.stack((new_bins, new_weights))
-  return summary.astype(np.float32)
-
-
-def merge_summaries(prev_summary, next_summary, epsilon):
-  """Weighted merge sort of summaries.
-
-  Given two summaries of distinct data, this function merges (and compresses)
-  them to stay within `epsilon` error tolerance.
-
-  Args:
-      prev_summary: 2-D `np.ndarray` summary to be merged with `next_summary`.
-      next_summary: 2-D `np.ndarray` summary to be merged with `prev_summary`.
-      epsilon: A float that determines the approxmiate desired precision.
-
-  Returns:
-      A 2-D `np.ndarray` that is a merged summary. First column is the
-      interpolated partition values, the second is the weights (counts).
-  """
-  merged = array_ops.concat((prev_summary, next_summary), axis=1)
-  merged = array_ops.gather_v2(merged, sort_ops.argsort(merged[0]), axis=1)
-  return compress(merged, epsilon)
-
-
-def get_bin_boundaries(summary, num_bins):
-  return compress(summary, 1.0 / num_bins)[0, :-1]
-
-
-@keras_export("keras.layers.experimental.preprocessing.Discretization")
-class Discretization(base_preprocessing_layer.PreprocessingLayer):
-  """Buckets data into discrete ranges.
-
-  This layer will place each element of its input data into one of several
-  contiguous ranges and output an integer index indicating which range each
-  element was placed in.
-
-  Input shape:
-    Any `tf.Tensor` or `tf.RaggedTensor` of dimension 2 or higher.
-
-  Output shape:
-    Same as input shape.
-
-  Attributes:
-    bin_boundaries: A list of bin boundaries. The leftmost and rightmost bins
-      will always extend to `-inf` and `inf`, so `bin_boundaries=[0., 1., 2.]`
-      generates bins `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`, and `[2., +inf)`. If
-      this option is set, `adapt` should not be called.
-    num_bins: The integer number of bins to compute. If this option is set,
-      `adapt` should be called to learn the bin boundaries.
-    epsilon: Error tolerance, typically a small fraction close to zero (e.g.
-      0.01). Higher values of epsilon increase the quantile approximation, and
-      hence result in more unequal buckets, but could improve performance
-      and resource consumption.
-
-  Examples:
-
-  Bucketize float values based on provided buckets.
-  >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])
-  >>> layer = tf.keras.layers.experimental.preprocessing.Discretization(
-  ...          bin_boundaries=[0., 1., 2.])
-  >>> layer(input)
-  <tf.Tensor: shape=(2, 4), dtype=int64, numpy=
-  array([[0, 2, 3, 1],
-         [1, 3, 2, 1]])>
-
-  Bucketize float values based on a number of buckets to compute.
-  >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])
-  >>> layer = tf.keras.layers.experimental.preprocessing.Discretization(
-  ...          num_bins=4, epsilon=0.01)
-  >>> layer.adapt(input)
-  >>> layer(input)
-  <tf.Tensor: shape=(2, 4), dtype=int64, numpy=
-  array([[0, 2, 3, 2],
-         [1, 3, 3, 1]])>
-  """
-
-  def __init__(self,
-               bin_boundaries=None,
-               num_bins=None,
-               epsilon=0.01,
-               **kwargs):
-    # bins is a deprecated arg for setting bin_boundaries or num_bins that still
-    # has some usage.
-    if "bins" in kwargs:
-      logging.warning(
-          "bins is deprecated, please use bin_boundaries or num_bins instead.")
-      if isinstance(kwargs["bins"], int) and num_bins is None:
-        num_bins = kwargs["bins"]
-      elif bin_boundaries is None:
-        bin_boundaries = kwargs["bins"]
-      del kwargs["bins"]
-    super().__init__(streaming=True, **kwargs)
-    if num_bins is not None and num_bins < 0:
-      raise ValueError("`num_bins` must be must be greater than or equal to 0. "
-                       "You passed `num_bins={}`".format(num_bins))
-    if num_bins is not None and bin_boundaries is not None:
-      raise ValueError("Both `num_bins` and `bin_boundaries` should not be "
-                       "set. You passed `num_bins={}` and "
-                       "`bin_boundaries={}`".format(num_bins, bin_boundaries))
-    bin_boundaries = self._convert_to_list(bin_boundaries)
-    self.input_bin_boundaries = bin_boundaries
-    self.bin_boundaries = bin_boundaries if bin_boundaries is not None else []
-    self.num_bins = num_bins
-    self.epsilon = epsilon
-
-  def build(self, input_shape):
-    super().build(input_shape)
-
-    if self.input_bin_boundaries is not None:
-      return
-
-    # Summary contains two equal length vectors of bins at index 0 and weights
-    # at index 1.
-    self.summary = self.add_weight(
-        name="summary",
-        shape=(2, None),
-        dtype=dtypes.float32,
-        initializer=lambda shape, dtype: [[], []],  # pylint: disable=unused-arguments
-        trainable=False)
-
-  def update_state(self, data):
-    if self.input_bin_boundaries is not None:
-      raise ValueError(
-          "Cannot adapt a Discretization layer that has been initialized with "
-          "`bin_boundaries`, use `num_bins` instead. You passed "
-          "`bin_boundaries={}`.".format(self.input_bin_boundaries))
-
-    if not self.built:
-      raise RuntimeError("`build` must be called before `update_state`.")
-
-    data = ops.convert_to_tensor_v2_with_dispatch(data)
-    if data.dtype != dtypes.float32:
-      data = math_ops.cast(data, dtypes.float32)
-    summary = summarize(data, self.epsilon)
-    self.summary.assign(merge_summaries(summary, self.summary, self.epsilon))
-
-  def merge_state(self, layers):
-    for l in layers + [self]:
-      if l.input_bin_boundaries is not None:
-        raise ValueError(
-            "Cannot merge Discretization layer {} that has been initialized "
-            "with `bin_boundaries`, use `num_bins` instead. You passed "
-            "`bin_boundaries={}`.".format(l.name, l.input_bin_boundaries))
-      if not l.built:
-        raise ValueError(
-            "Cannot merge Discretization layer {}, it has no state. You need "
-            "to call `adapt` on this layer before merging.".format(l.name))
-
-    summary = self.summary
-    for l in layers:
-      summary = merge_summaries(summary, l.summary, self.epsilon)
-    self.summary.assign(summary)
-    self.finalize_state()
-
-  def finalize_state(self):
-    if self.input_bin_boundaries is not None or not self.built:
-      return
-
-    # The bucketize op only support list boundaries.
-    self.bin_boundaries = self._convert_to_list(
-        get_bin_boundaries(self.summary, self.num_bins))
-
-  def reset_state(self):  # pylint: disable=method-hidden
-    if self.input_bin_boundaries is not None or not self.built:
-      return
-
-    self.summary.assign([[], []])
-
-  def get_config(self):
-    config = super().get_config()
-    config.update({
-        "bin_boundaries": self.input_bin_boundaries,
-        "num_bins": self.num_bins,
-        "epsilon": self.epsilon,
-    })
-    return config
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def compute_output_signature(self, input_spec):
-    output_shape = self.compute_output_shape(input_spec.shape.as_list())
-    output_dtype = dtypes.int64
-    if isinstance(input_spec, sparse_tensor.SparseTensorSpec):
-      return sparse_tensor.SparseTensorSpec(
-          shape=output_shape, dtype=output_dtype)
-    return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
-
-  def call(self, inputs):
-    def bucketize(inputs):
-      return gen_math_ops.Bucketize(
-          input=inputs, boundaries=self.bin_boundaries)
-
-    if tf_utils.is_ragged(inputs):
-      integer_buckets = ragged_functional_ops.map_flat_values(bucketize, inputs)
-      # Ragged map_flat_values doesn't touch the non-values tensors in the
-      # ragged composite tensor. If this op is the only op a Keras model,
-      # this can cause errors in Graph mode, so wrap the tensor in an identity.
-      return array_ops.identity(integer_buckets)
-    elif tf_utils.is_sparse(inputs):
-      return sparse_tensor.SparseTensor(
-          indices=array_ops.identity(inputs.indices),
-          values=bucketize(inputs.values),
-          dense_shape=array_ops.identity(inputs.dense_shape))
-    else:
-      return bucketize(inputs)
-
-  def _convert_to_list(self, inputs):
-    if tensor_util.is_tensor(inputs):
-      inputs = inputs.numpy()
-    if isinstance(inputs, (np.ndarray)):
-      inputs = inputs.tolist()
-      inputs = list(inputs)
-    return inputs
diff --git a/tensorflow/python/keras/layers/preprocessing/discretization_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/discretization_distribution_test.py
deleted file mode 100644
index a040ffc..0000000
--- a/tensorflow/python/keras/layers/preprocessing/discretization_distribution_test.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.discretization."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.distribute import multi_process_runner
-from tensorflow.python.framework import config
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import strategy_combinations
-from tensorflow.python.keras.layers.preprocessing import discretization
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        strategy=strategy_combinations.all_strategies +
-        strategy_combinations.multi_worker_mirrored_strategies,
-        mode=["eager", "graph"]))
-class DiscretizationDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_distribution(self, strategy):
-    input_array = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])
-
-    expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]]
-    expected_output_shape = [None, 4]
-
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(4,))
-      layer = discretization.Discretization(bin_boundaries=[0., 1., 2.])
-      bucket_data = layer(input_data)
-      self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())
-
-      model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-if __name__ == "__main__":
-  v2_compat.enable_v2_behavior()
-  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/layers/preprocessing/discretization_test.py b/tensorflow/python/keras/layers/preprocessing/discretization_test.py
deleted file mode 100644
index 9680dfb..0000000
--- a/tensorflow/python/keras/layers/preprocessing/discretization_test.py
+++ /dev/null
@@ -1,373 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras discretization preprocessing layer."""
-
-import os
-
-from absl.testing import parameterized
-
-import numpy as np
-
-from tensorflow.python import keras
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers.preprocessing import discretization
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import load
-from tensorflow.python.saved_model import save
-
-
-@keras_parameterized.run_all_keras_modes
-class DiscretizationTest(keras_parameterized.TestCase,
-                         preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_bucketize_with_explicit_buckets_integer(self):
-    input_array = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])
-
-    expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]]
-    expected_output_shape = [None, 4]
-
-    input_data = keras.Input(shape=(4,))
-    layer = discretization.Discretization(bin_boundaries=[0., 1., 2.])
-    bucket_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bucketize_with_explicit_buckets_int_input(self):
-    input_array = np.array([[-1, 1, 3, 0], [0, 3, 1, 0]], dtype=np.int64)
-
-    expected_output = [[0, 2, 3, 1], [1, 3, 2, 1]]
-    expected_output_shape = [None, 4]
-
-    input_data = keras.Input(shape=(4,), dtype=dtypes.int64)
-    layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
-    bucket_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bucketize_with_explicit_buckets_sparse_float_input(self):
-    indices = [[0, 1], [0, 2], [1, 1]]
-    input_array = sparse_tensor.SparseTensor(
-        indices=indices, values=[-1.5, 1.0, 3.4], dense_shape=[2, 3])
-    expected_output = [0, 2, 3]
-    input_data = keras.Input(shape=(3,), dtype=dtypes.float32, sparse=True)
-    layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
-    bucket_data = layer(input_data)
-
-    model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(indices, output_dataset.indices)
-    self.assertAllEqual(expected_output, output_dataset.values)
-
-  def test_bucketize_with_explicit_buckets_ragged_float_input(self):
-    input_array = ragged_factory_ops.constant([[-1.5, 1.0, 3.4, .5],
-                                               [0.0, 3.0, 1.3]])
-
-    expected_output = [[0, 2, 3, 1], [1, 3, 2]]
-    expected_output_shape = [None, None]
-
-    input_data = keras.Input(shape=(None,), ragged=True)
-    layer = discretization.Discretization(bin_boundaries=[0., 1., 2.])
-    bucket_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bucketize_with_explicit_buckets_ragged_int_input(self):
-    input_array = ragged_factory_ops.constant([[-1, 1, 3, 0], [0, 3, 1]],
-                                              dtype=dtypes.int64)
-
-    expected_output = [[0, 2, 3, 1], [1, 3, 2]]
-    expected_output_shape = [None, None]
-
-    input_data = keras.Input(shape=(None,), ragged=True, dtype=dtypes.int64)
-    layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
-    bucket_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, bucket_data.shape.as_list())
-    model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bucketize_with_explicit_buckets_sparse_int_input(self):
-    indices = [[0, 1], [0, 2], [1, 1]]
-    input_array = sparse_tensor.SparseTensor(
-        indices=indices, values=[-1, 1, 3], dense_shape=[2, 3])
-    expected_output = [0, 2, 3]
-    input_data = keras.Input(shape=(3,), dtype=dtypes.int32, sparse=True)
-    layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
-    bucket_data = layer(input_data)
-
-    model = keras.Model(inputs=input_data, outputs=bucket_data)
-    output_dataset = model.predict(input_array, steps=1)
-    self.assertAllEqual(indices, output_dataset.indices)
-    self.assertAllEqual(expected_output, output_dataset.values)
-
-  def test_output_shape(self):
-    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.int64)
-    layer = discretization.Discretization(bin_boundaries=[-.5, 0.5, 1.5])
-    output = layer(input_data)
-    self.assertAllEqual(output.shape.as_list(), [16, 4])
-
-  def test_num_bins_negative_fails(self):
-    with self.assertRaisesRegex(ValueError, "`num_bins` must be.*num_bins=-7"):
-      _ = discretization.Discretization(num_bins=-7)
-
-  def test_num_bins_and_bins_set_fails(self):
-    with self.assertRaisesRegex(
-        ValueError,
-        r"`num_bins` and `bin_boundaries` should not be set.*5.*\[1, 2\]"):
-      _ = discretization.Discretization(num_bins=5, bins=[1, 2])
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class DiscretizationAdaptTest(keras_parameterized.TestCase,
-                              preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters([
-      {
-          "testcase_name": "2d_single_element",
-          "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]]),
-          "test_data": np.array([[1.], [2.], [3.]]),
-          "use_dataset": True,
-          "expected": np.array([[1], [2], [3]]),
-          "num_bins": 5,
-          "epsilon": 0.01
-      }, {
-          "testcase_name": "2d_multi_element",
-          "adapt_data": np.array([[1., 6.], [2., 7.], [3., 8.], [4., 9.],
-                                  [5., 10.]]),
-          "test_data": np.array([[1., 10.], [2., 6.], [3., 8.]]),
-          "use_dataset": True,
-          "expected": np.array([[0, 4], [1, 3], [1, 4]]),
-          "num_bins": 5,
-          "epsilon": 0.01
-      }, {
-          "testcase_name": "1d_single_element",
-          "adapt_data": np.array([3., 2., 1., 5., 4.]),
-          "test_data": np.array([1., 2., 3.]),
-          "use_dataset": True,
-          "expected": np.array([1, 2, 3]),
-          "num_bins": 5,
-          "epsilon": 0.01
-      }, {
-          "testcase_name": "300_batch_1d_single_element_1",
-          "adapt_data": np.arange(300),
-          "test_data": np.arange(300),
-          "use_dataset": True,
-          "expected":
-              np.concatenate([np.zeros(101), np.ones(99), 2 * np.ones(100)]),
-          "num_bins": 3,
-          "epsilon": 0.01
-      }, {
-          "testcase_name": "300_batch_1d_single_element_2",
-          "adapt_data": np.arange(300) ** 2,
-          "test_data": np.arange(300) ** 2,
-          "use_dataset": True,
-          "expected":
-              np.concatenate([np.zeros(101), np.ones(99), 2 * np.ones(100)]),
-          "num_bins": 3,
-          "epsilon": 0.01
-      }, {
-          "testcase_name": "300_batch_1d_single_element_large_epsilon",
-          "adapt_data": np.arange(300),
-          "test_data": np.arange(300),
-          "use_dataset": True,
-          "expected": np.concatenate([np.zeros(136), np.ones(164)]),
-          "num_bins": 2,
-          "epsilon": 0.1
-      }])
-  def test_layer_computation(self, adapt_data, test_data, use_dataset,
-                             expected, num_bins=5, epsilon=0.01):
-
-    input_shape = tuple(list(test_data.shape)[1:])
-    np.random.shuffle(adapt_data)
-    if use_dataset:
-      # Keras APIs expect batched datasets
-      adapt_data = dataset_ops.Dataset.from_tensor_slices(adapt_data).batch(
-          test_data.shape[0] // 2)
-      test_data = dataset_ops.Dataset.from_tensor_slices(test_data).batch(
-          test_data.shape[0] // 2)
-
-    layer = discretization.Discretization(epsilon=epsilon, num_bins=num_bins)
-    layer.adapt(adapt_data)
-
-    input_data = keras.Input(shape=input_shape)
-    output = layer(input_data)
-    model = keras.Model(input_data, output)
-    model._run_eagerly = testing_utils.should_run_eagerly()
-    output_data = model.predict(test_data)
-    self.assertAllClose(expected, output_data)
-
-  def test_merge_state(self):
-    data = np.arange(300)
-    partial_ds_1 = dataset_ops.Dataset.from_tensor_slices(data[:100])
-    partial_ds_2 = dataset_ops.Dataset.from_tensor_slices(data[100:200])
-    partial_ds_3 = dataset_ops.Dataset.from_tensor_slices(data[200:])
-    full_ds = partial_ds_1.concatenate(partial_ds_2).concatenate(partial_ds_3)
-
-    # Use a higher epsilon to avoid any discrepencies from the quantile
-    # approximation.
-    full_layer = discretization.Discretization(num_bins=3, epsilon=0.001)
-    full_layer.adapt(full_ds.batch(2))
-
-    partial_layer_1 = discretization.Discretization(num_bins=3, epsilon=0.001)
-    partial_layer_1.adapt(partial_ds_1.batch(2))
-    partial_layer_2 = discretization.Discretization(num_bins=3, epsilon=0.001)
-    partial_layer_2.adapt(partial_ds_2.batch(2))
-    partial_layer_3 = discretization.Discretization(num_bins=3, epsilon=0.001)
-    partial_layer_3.adapt(partial_ds_3.batch(2))
-    partial_layer_1.merge_state([partial_layer_2, partial_layer_3])
-    merged_layer = partial_layer_1
-
-    data = np.arange(300)
-    self.assertAllClose(full_layer(data), merged_layer(data))
-
-  def test_merge_with_stateless_layers_fails(self):
-    layer1 = discretization.Discretization(num_bins=2, name="layer1")
-    layer1.adapt([1, 2, 3])
-    layer2 = discretization.Discretization(bin_boundaries=[0, 1], name="layer2")
-    with self.assertRaisesRegex(ValueError, "Cannot merge.*layer2"):
-      layer1.merge_state([layer2])
-
-  def test_merge_with_unadapted_layers_fails(self):
-    layer1 = discretization.Discretization(num_bins=2, name="layer1")
-    layer1.adapt([1, 2, 3])
-    layer2 = discretization.Discretization(num_bins=2, name="layer2")
-    with self.assertRaisesRegex(ValueError, "Cannot merge.*layer2"):
-      layer1.merge_state([layer2])
-
-  def test_multiple_adapts(self):
-    first_adapt = [[1], [2], [3]]
-    second_adapt = [[4], [5], [6]]
-    predict_input = [[2], [2]]
-    expected_first_output = [[2], [2]]
-    expected_second_output = [[0], [0]]
-
-    inputs = keras.Input(shape=(1,), dtype=dtypes.int32)
-    layer = discretization.Discretization(num_bins=3)
-    layer.adapt(first_adapt)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    actual_output = model.predict(predict_input)
-    self.assertAllClose(actual_output, expected_first_output)
-
-    # Re-adapt the layer on new inputs.
-    layer.adapt(second_adapt)
-    # Re-compile the model.
-    model.compile()
-    # `predict` should now use the new model state.
-    actual_output = model.predict(predict_input)
-    self.assertAllClose(actual_output, expected_second_output)
-
-  def test_saved_model_tf(self):
-    input_data = [[1], [2], [3]]
-    predict_data = [[0.5], [1.5], [2.5]]
-    expected_output = [[0], [1], [2]]
-
-    inputs = keras.Input(shape=(1,), dtype=dtypes.float32)
-    layer = discretization.Discretization(num_bins=3)
-    layer.adapt(input_data)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    output_data = model.predict(predict_data)
-    self.assertAllClose(output_data, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_saved_model")
-    save.save(model, output_path)
-    loaded_model = load.load(output_path)
-    f = loaded_model.signatures["serving_default"]
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_data = f(constant_op.constant(predict_data))["discretization"]
-    self.assertAllClose(new_output_data, expected_output)
-
-  def test_saved_model_keras(self):
-    input_data = [[1], [2], [3]]
-    predict_data = [[0.5], [1.5], [2.5]]
-    expected_output = [[0], [1], [2]]
-
-    cls = discretization.Discretization
-    inputs = keras.Input(shape=(1,), dtype=dtypes.float32)
-    layer = cls(num_bins=3)
-    layer.adapt(input_data)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    output_data = model.predict(predict_data)
-    self.assertAllClose(output_data, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"Discretization": cls})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_data = loaded_model.predict(predict_data)
-    self.assertAllClose(new_output_data, expected_output)
-
-  def test_saved_weights_keras(self):
-    input_data = [[1], [2], [3]]
-    predict_data = [[0.5], [1.5], [2.5]]
-    expected_output = [[0], [1], [2]]
-
-    cls = discretization.Discretization
-    inputs = keras.Input(shape=(1,), dtype=dtypes.float32)
-    layer = cls(num_bins=3)
-    layer.adapt(input_data)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    output_data = model.predict(predict_data)
-    self.assertAllClose(output_data, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_weights")
-    model.save_weights(output_path, save_format="tf")
-    new_model = keras.Model.from_config(
-        model.get_config(), custom_objects={"Discretization": cls})
-    new_model.load_weights(output_path)
-
-    # Validate correctness of the new model.
-    new_output_data = new_model.predict(predict_data)
-    self.assertAllClose(new_output_data, expected_output)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/hashing.py b/tensorflow/python/keras/layers/preprocessing/hashing.py
deleted file mode 100644
index 1a07b6d..0000000
--- a/tensorflow/python/keras/layers/preprocessing/hashing.py
+++ /dev/null
@@ -1,221 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras hashing preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import functools
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras.engine import base_layer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.util.tf_export import keras_export
-
-# Default key from tf.sparse.cross_hashed
-_DEFAULT_SALT_KEY = [0xDECAFCAFFE, 0xDECAFCAFFE]
-
-
-@keras_export('keras.layers.experimental.preprocessing.Hashing')
-class Hashing(base_layer.Layer):
-  """Implements categorical feature hashing, also known as "hashing trick".
-
-  This layer transforms single or multiple categorical inputs to hashed output.
-  It converts a sequence of int or string to a sequence of int. The stable hash
-  function uses tensorflow::ops::Fingerprint to produce universal output that
-  is consistent across platforms.
-
-  This layer uses [FarmHash64](https://github.com/google/farmhash) by default,
-  which provides a consistent hashed output across different platforms and is
-  stable across invocations, regardless of device and context, by mixing the
-  input bits thoroughly.
-
-  If you want to obfuscate the hashed output, you can also pass a random `salt`
-  argument in the constructor. In that case, the layer will use the
-  [SipHash64](https://github.com/google/highwayhash) hash function, with
-  the `salt` value serving as additional input to the hash function.
-
-  Example (FarmHash64):
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3)
-  >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']]
-  >>> layer(inp)
-  <tf.Tensor: shape=(5, 1), dtype=int64, numpy=
-    array([[1],
-           [0],
-           [1],
-           [1],
-           [2]])>
-
-  Example (FarmHash64) with a mask value:
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3,
-  ...    mask_value='')
-  >>> inp = [['A'], ['B'], [''], ['C'], ['D']]
-  >>> layer(inp)
-  <tf.Tensor: shape=(5, 1), dtype=int64, numpy=
-    array([[1],
-           [1],
-           [0],
-           [2],
-           [2]])>
-
-  Example (SipHash64):
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3,
-  ...    salt=[133, 137])
-  >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']]
-  >>> layer(inp)
-  <tf.Tensor: shape=(5, 1), dtype=int64, numpy=
-    array([[1],
-           [2],
-           [1],
-           [0],
-           [2]])>
-
-  Example (Siphash64 with a single integer, same as `salt=[133, 133]`
-
-  >>> layer = tf.keras.layers.experimental.preprocessing.Hashing(num_bins=3,
-  ...    salt=133)
-  >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']]
-  >>> layer(inp)
-  <tf.Tensor: shape=(5, 1), dtype=int64, numpy=
-    array([[0],
-           [0],
-           [2],
-           [1],
-           [0]])>
-
-  Reference: [SipHash with salt](https://www.131002.net/siphash/siphash.pdf)
-
-  Args:
-    num_bins: Number of hash bins. Note that this includes the `mask_value` bin,
-      so the effective number of bins is `(num_bins - 1)` if `mask_value` is
-      set.
-    mask_value: A value that represents masked inputs, which are mapped to
-      index 0. Defaults to None, meaning no mask term will be added and the
-      hashing will start at index 0.
-    salt: A single unsigned integer or None.
-      If passed, the hash function used will be SipHash64, with these values
-      used as an additional input (known as a "salt" in cryptography).
-      These should be non-zero. Defaults to `None` (in that
-      case, the FarmHash64 hash function is used). It also supports
-      tuple/list of 2 unsigned integer numbers, see reference paper for details.
-    **kwargs: Keyword arguments to construct a layer.
-
-  Input shape: A single or list of string, int32 or int64 `Tensor`,
-    `SparseTensor` or `RaggedTensor` of shape `[batch_size, ...,]`
-
-  Output shape: An int64 `Tensor`, `SparseTensor` or `RaggedTensor` of shape
-    `[batch_size, ...]`. If any input is `RaggedTensor` then output is
-    `RaggedTensor`, otherwise if any input is `SparseTensor` then output is
-    `SparseTensor`, otherwise the output is `Tensor`.
-
-  """
-
-  def __init__(self, num_bins, mask_value=None, salt=None, **kwargs):
-    if num_bins is None or num_bins <= 0:
-      raise ValueError('`num_bins` cannot be `None` or non-positive values.')
-    super(Hashing, self).__init__(**kwargs)
-    self.num_bins = num_bins
-    self.mask_value = mask_value
-    self.strong_hash = True if salt is not None else False
-    if salt is not None:
-      if isinstance(salt, (tuple, list)) and len(salt) == 2:
-        self.salt = salt
-      elif isinstance(salt, int):
-        self.salt = [salt, salt]
-      else:
-        raise ValueError('`salt can only be a tuple of size 2 integers, or a '
-                         'single integer, given {}'.format(salt))
-    else:
-      self.salt = _DEFAULT_SALT_KEY
-
-  def _preprocess_single_input(self, inp):
-    if isinstance(inp, (list, tuple, np.ndarray)):
-      inp = ops.convert_to_tensor_v2_with_dispatch(inp)
-    return inp
-
-  def _preprocess_inputs(self, inputs):
-    if isinstance(inputs, (tuple, list)):
-      # If any of them is tensor or ndarray, then treat as list
-      if any(
-          tensor_util.is_tf_type(inp) or isinstance(inp, np.ndarray)
-          for inp in inputs):
-        return [self._preprocess_single_input(inp) for inp in inputs]
-    return self._preprocess_single_input(inputs)
-
-  def call(self, inputs):
-    inputs = self._preprocess_inputs(inputs)
-    if isinstance(inputs, sparse_tensor.SparseTensor):
-      return sparse_tensor.SparseTensor(
-          indices=inputs.indices,
-          values=self._hash_values_to_bins(inputs.values),
-          dense_shape=inputs.dense_shape)
-    return self._hash_values_to_bins(inputs)
-
-  def _hash_values_to_bins(self, values):
-    """Converts a non-sparse tensor of values to bin indices."""
-    str_to_hash_bucket = self._get_string_to_hash_bucket_fn()
-    num_available_bins = self.num_bins
-    mask = None
-    # If mask_value is set, the zeroth bin is reserved for it.
-    if self.mask_value is not None and num_available_bins > 1:
-      num_available_bins -= 1
-      mask = math_ops.equal(values, self.mask_value)
-    # Convert all values to strings before hashing.
-    if values.dtype.is_integer:
-      values = string_ops.as_string(values)
-    values = str_to_hash_bucket(values, num_available_bins, name='hash')
-    if mask is not None:
-      values = math_ops.add(values, array_ops.ones_like(values))
-      values = array_ops.where(mask, array_ops.zeros_like(values), values)
-    return values
-
-  def _get_string_to_hash_bucket_fn(self):
-    """Returns the string_to_hash_bucket op to use based on `hasher_key`."""
-    # string_to_hash_bucket_fast uses FarmHash64 as hash function.
-    if not self.strong_hash:
-      return string_ops.string_to_hash_bucket_fast
-    # string_to_hash_bucket_strong uses SipHash64 as hash function.
-    else:
-      return functools.partial(
-          string_ops.string_to_hash_bucket_strong, key=self.salt)
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def compute_output_signature(self, input_spec):
-    output_shape = self.compute_output_shape(input_spec.shape)
-    output_dtype = dtypes.int64
-    if isinstance(input_spec, sparse_tensor.SparseTensorSpec):
-      return sparse_tensor.SparseTensorSpec(
-          shape=output_shape, dtype=output_dtype)
-    else:
-      return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
-
-  def get_config(self):
-    config = {
-        'num_bins': self.num_bins,
-        'salt': self.salt,
-        'mask_value': self.mask_value,
-    }
-    base_config = super(Hashing, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
diff --git a/tensorflow/python/keras/layers/preprocessing/hashing_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/hashing_distribution_test.py
deleted file mode 100644
index d619b14..0000000
--- a/tensorflow/python/keras/layers/preprocessing/hashing_distribution_test.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for keras.layers.preprocessing.hashing."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.framework import config
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute.strategy_combinations import all_strategies
-from tensorflow.python.keras.layers.preprocessing import hashing
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.platform import test
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        distribution=all_strategies,
-        mode=["eager", "graph"]))
-class HashingDistributionTest(keras_parameterized.TestCase,
-                              preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_distribution(self, distribution):
-    input_data = np.asarray([["omar"], ["stringer"], ["marlo"], ["wire"]])
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
-        2, drop_remainder=True)
-    expected_output = [[0], [0], [1], [0]]
-
-    config.set_soft_device_placement(True)
-
-    with distribution.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = hashing.Hashing(num_bins=2)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/hashing_test.py b/tensorflow/python/keras/layers/preprocessing/hashing_test.py
deleted file mode 100644
index 351160b..0000000
--- a/tensorflow/python/keras/layers/preprocessing/hashing_test.py
+++ /dev/null
@@ -1,279 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for hashing layer."""
-
-import numpy as np
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.engine import input_layer
-from tensorflow.python.keras.engine import training
-from tensorflow.python.keras.layers.preprocessing import hashing
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import test
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class HashingTest(keras_parameterized.TestCase):
-
-  def test_hash_single_bin(self):
-    layer = hashing.Hashing(num_bins=1)
-    inp = np.asarray([['A'], ['B'], ['C'], ['D'], ['E']])
-    output = layer(inp)
-    self.assertAllClose([[0], [0], [0], [0], [0]], output)
-
-  def test_hash_dense_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=2)
-    inp = np.asarray([['omar'], ['stringer'], ['marlo'], ['wire'],
-                      ['skywalker']])
-    output = layer(inp)
-    # Assert equal for hashed output that should be true on all platforms.
-    self.assertAllClose([[0], [0], [1], [0], [0]], output)
-
-  def test_hash_dense_input_mask_value_farmhash(self):
-    empty_mask_layer = hashing.Hashing(num_bins=3, mask_value='')
-    omar_mask_layer = hashing.Hashing(num_bins=3, mask_value='omar')
-    inp = np.asarray([['omar'], ['stringer'], ['marlo'], ['wire'],
-                      ['skywalker']])
-    empty_mask_output = empty_mask_layer(inp)
-    omar_mask_output = omar_mask_layer(inp)
-    # Outputs should be one more than test_hash_dense_input_farmhash (the zeroth
-    # bin is now reserved for masks).
-    self.assertAllClose([[1], [1], [2], [1], [1]], empty_mask_output)
-    # 'omar' should map to 0.
-    self.assertAllClose([[0], [1], [2], [1], [1]], omar_mask_output)
-
-  def test_hash_dense_list_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=2)
-    inp = [['omar'], ['stringer'], ['marlo'], ['wire'], ['skywalker']]
-    output = layer(inp)
-    # Assert equal for hashed output that should be true on all platforms.
-    self.assertAllClose([[0], [0], [1], [0], [0]], output)
-
-    inp = ['omar', 'stringer', 'marlo', 'wire', 'skywalker']
-    output = layer(inp)
-    # Assert equal for hashed output that should be true on all platforms.
-    self.assertAllClose([0, 0, 1, 0, 0], output)
-
-  def test_hash_dense_int_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=3)
-    inp = np.asarray([[0], [1], [2], [3], [4]])
-    output = layer(inp)
-    # Assert equal for hashed output that should be true on all platforms.
-    self.assertAllClose([[1], [0], [1], [0], [2]], output)
-
-  def test_hash_dense_input_siphash(self):
-    layer = hashing.Hashing(num_bins=2, salt=[133, 137])
-    inp = np.asarray([['omar'], ['stringer'], ['marlo'], ['wire'],
-                      ['skywalker']])
-    output = layer(inp)
-    # Assert equal for hashed output that should be true on all platforms.
-    # Note the result is different from FarmHash.
-    self.assertAllClose([[0], [1], [0], [1], [0]], output)
-
-    layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137])
-    output_2 = layer_2(inp)
-    # Note the result is different from (133, 137).
-    self.assertAllClose([[1], [0], [1], [0], [1]], output_2)
-
-  def test_hash_dense_int_input_siphash(self):
-    layer = hashing.Hashing(num_bins=3, salt=[133, 137])
-    inp = np.asarray([[0], [1], [2], [3], [4]])
-    output = layer(inp)
-    # Assert equal for hashed output that should be true on all platforms.
-    self.assertAllClose([[1], [1], [2], [0], [1]], output)
-
-  def test_hash_sparse_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=2)
-    indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]
-    inp = sparse_tensor.SparseTensor(
-        indices=indices,
-        values=['omar', 'stringer', 'marlo', 'wire', 'skywalker'],
-        dense_shape=[3, 2])
-    output = layer(inp)
-    self.assertAllClose(indices, output.indices)
-    self.assertAllClose([0, 0, 1, 0, 0], output.values)
-
-  def test_hash_sparse_input_mask_value_farmhash(self):
-    empty_mask_layer = hashing.Hashing(num_bins=3, mask_value='')
-    omar_mask_layer = hashing.Hashing(num_bins=3, mask_value='omar')
-    indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]
-    inp = sparse_tensor.SparseTensor(
-        indices=indices,
-        values=['omar', 'stringer', 'marlo', 'wire', 'skywalker'],
-        dense_shape=[3, 2])
-    empty_mask_output = empty_mask_layer(inp)
-    omar_mask_output = omar_mask_layer(inp)
-    self.assertAllClose(indices, omar_mask_output.indices)
-    self.assertAllClose(indices, empty_mask_output.indices)
-    # Outputs should be one more than test_hash_sparse_input_farmhash (the
-    # zeroth bin is now reserved for masks).
-    self.assertAllClose([1, 1, 2, 1, 1], empty_mask_output.values)
-    # 'omar' should map to 0.
-    self.assertAllClose([0, 1, 2, 1, 1], omar_mask_output.values)
-
-  def test_hash_sparse_int_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=3)
-    indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]
-    inp = sparse_tensor.SparseTensor(
-        indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2])
-    output = layer(inp)
-    self.assertAllClose(indices, output.indices)
-    self.assertAllClose([1, 0, 1, 0, 2], output.values)
-
-  def test_hash_sparse_input_siphash(self):
-    layer = hashing.Hashing(num_bins=2, salt=[133, 137])
-    indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]
-    inp = sparse_tensor.SparseTensor(
-        indices=indices,
-        values=['omar', 'stringer', 'marlo', 'wire', 'skywalker'],
-        dense_shape=[3, 2])
-    output = layer(inp)
-    self.assertAllClose(output.indices, indices)
-    # The result should be same with test_hash_dense_input_siphash.
-    self.assertAllClose([0, 1, 0, 1, 0], output.values)
-
-    layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137])
-    output = layer_2(inp)
-    # The result should be same with test_hash_dense_input_siphash.
-    self.assertAllClose([1, 0, 1, 0, 1], output.values)
-
-  def test_hash_sparse_int_input_siphash(self):
-    layer = hashing.Hashing(num_bins=3, salt=[133, 137])
-    indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]
-    inp = sparse_tensor.SparseTensor(
-        indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2])
-    output = layer(inp)
-    self.assertAllClose(indices, output.indices)
-    self.assertAllClose([1, 1, 2, 0, 1], output.values)
-
-  def test_hash_ragged_string_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=2)
-    inp_data = ragged_factory_ops.constant(
-        [['omar', 'stringer', 'marlo', 'wire'], ['marlo', 'skywalker', 'wire']],
-        dtype=dtypes.string)
-    out_data = layer(inp_data)
-    # Same hashed output as test_hash_sparse_input_farmhash
-    expected_output = [[0, 0, 1, 0], [1, 0, 0]]
-    self.assertAllEqual(expected_output, out_data)
-
-    inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    out_t = layer(inp_t)
-    model = training.Model(inputs=inp_t, outputs=out_t)
-    self.assertAllClose(out_data, model.predict(inp_data))
-
-  def test_hash_ragged_input_mask_value(self):
-    empty_mask_layer = hashing.Hashing(num_bins=3, mask_value='')
-    omar_mask_layer = hashing.Hashing(num_bins=3, mask_value='omar')
-    inp_data = ragged_factory_ops.constant(
-        [['omar', 'stringer', 'marlo', 'wire'], ['marlo', 'skywalker', 'wire']],
-        dtype=dtypes.string)
-    empty_mask_output = empty_mask_layer(inp_data)
-    omar_mask_output = omar_mask_layer(inp_data)
-    # Outputs should be one more than test_hash_ragged_string_input_farmhash
-    # (the zeroth bin is now reserved for masks).
-    expected_output = [[1, 1, 2, 1], [2, 1, 1]]
-    self.assertAllClose(expected_output, empty_mask_output)
-    # 'omar' should map to 0.
-    expected_output = [[0, 1, 2, 1], [2, 1, 1]]
-    self.assertAllClose(expected_output, omar_mask_output)
-
-  def test_hash_ragged_int_input_farmhash(self):
-    layer = hashing.Hashing(num_bins=3)
-    inp_data = ragged_factory_ops.constant([[0, 1, 3, 4], [2, 1, 0]],
-                                           dtype=dtypes.int64)
-    out_data = layer(inp_data)
-    # Same hashed output as test_hash_sparse_input_farmhash
-    expected_output = [[1, 0, 0, 2], [1, 0, 1]]
-    self.assertAllEqual(expected_output, out_data)
-
-    inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.int64)
-    out_t = layer(inp_t)
-    model = training.Model(inputs=inp_t, outputs=out_t)
-    self.assertAllClose(out_data, model.predict(inp_data))
-
-  def test_hash_ragged_string_input_siphash(self):
-    layer = hashing.Hashing(num_bins=2, salt=[133, 137])
-    inp_data = ragged_factory_ops.constant(
-        [['omar', 'stringer', 'marlo', 'wire'], ['marlo', 'skywalker', 'wire']],
-        dtype=dtypes.string)
-    out_data = layer(inp_data)
-    # Same hashed output as test_hash_dense_input_siphash
-    expected_output = [[0, 1, 0, 1], [0, 0, 1]]
-    self.assertAllEqual(expected_output, out_data)
-
-    inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    out_t = layer(inp_t)
-    model = training.Model(inputs=inp_t, outputs=out_t)
-    self.assertAllClose(out_data, model.predict(inp_data))
-
-    layer_2 = hashing.Hashing(num_bins=2, salt=[211, 137])
-    out_data = layer_2(inp_data)
-    expected_output = [[1, 0, 1, 0], [1, 1, 0]]
-    self.assertAllEqual(expected_output, out_data)
-
-    out_t = layer_2(inp_t)
-    model = training.Model(inputs=inp_t, outputs=out_t)
-    self.assertAllClose(out_data, model.predict(inp_data))
-
-  def test_hash_ragged_int_input_siphash(self):
-    layer = hashing.Hashing(num_bins=3, salt=[133, 137])
-    inp_data = ragged_factory_ops.constant([[0, 1, 3, 4], [2, 1, 0]],
-                                           dtype=dtypes.int64)
-    out_data = layer(inp_data)
-    # Same hashed output as test_hash_sparse_input_farmhash
-    expected_output = [[1, 1, 0, 1], [2, 1, 1]]
-    self.assertAllEqual(expected_output, out_data)
-
-    inp_t = input_layer.Input(shape=(None,), ragged=True, dtype=dtypes.int64)
-    out_t = layer(inp_t)
-    model = training.Model(inputs=inp_t, outputs=out_t)
-    self.assertAllClose(out_data, model.predict(inp_data))
-
-  def test_invalid_inputs(self):
-    with self.assertRaisesRegex(ValueError, 'cannot be `None`'):
-      _ = hashing.Hashing(num_bins=None)
-    with self.assertRaisesRegex(ValueError, 'cannot be `None`'):
-      _ = hashing.Hashing(num_bins=-1)
-    with self.assertRaisesRegex(ValueError, 'can only be a tuple of size 2'):
-      _ = hashing.Hashing(num_bins=2, salt='string')
-    with self.assertRaisesRegex(ValueError, 'can only be a tuple of size 2'):
-      _ = hashing.Hashing(num_bins=2, salt=[1])
-    with self.assertRaisesRegex(ValueError, 'can only be a tuple of size 2'):
-      _ = hashing.Hashing(num_bins=1, salt=constant_op.constant([133, 137]))
-
-  def test_hash_compute_output_signature(self):
-    input_shape = tensor_shape.TensorShape([2, 3])
-    input_spec = tensor_spec.TensorSpec(input_shape, dtypes.string)
-    layer = hashing.Hashing(num_bins=2)
-    output_spec = layer.compute_output_signature(input_spec)
-    self.assertEqual(output_spec.shape.dims, input_shape.dims)
-    self.assertEqual(output_spec.dtype, dtypes.int64)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = hashing.Hashing(num_bins=2, name='hashing')
-    config = layer.get_config()
-    layer_1 = hashing.Hashing.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
deleted file mode 100644
index 32e8f39..0000000
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
+++ /dev/null
@@ -1,1318 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras image preprocessing layers."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.engine import base_layer
-from tensorflow.python.keras.engine.input_spec import InputSpec
-from tensorflow.python.keras.preprocessing import image as image_preprocessing
-from tensorflow.python.keras.utils import control_flow_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import gen_image_ops
-from tensorflow.python.ops import image_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import stateful_random_ops
-from tensorflow.python.ops import stateless_random_ops
-from tensorflow.python.util.tf_export import keras_export
-
-ResizeMethod = image_ops.ResizeMethod
-
-_RESIZE_METHODS = {
-    'bilinear': ResizeMethod.BILINEAR,
-    'nearest': ResizeMethod.NEAREST_NEIGHBOR,
-    'bicubic': ResizeMethod.BICUBIC,
-    'area': ResizeMethod.AREA,
-    'lanczos3': ResizeMethod.LANCZOS3,
-    'lanczos5': ResizeMethod.LANCZOS5,
-    'gaussian': ResizeMethod.GAUSSIAN,
-    'mitchellcubic': ResizeMethod.MITCHELLCUBIC
-}
-
-H_AXIS = 1
-W_AXIS = 2
-
-
-def check_fill_mode_and_interpolation(fill_mode, interpolation):
-  if fill_mode not in {'reflect', 'wrap', 'constant', 'nearest'}:
-    raise NotImplementedError(
-        'Unknown `fill_mode` {}. Only `reflect`, `wrap`, '
-        '`constant` and `nearest` are supported.'.format(fill_mode))
-  if interpolation not in {'nearest', 'bilinear'}:
-    raise NotImplementedError('Unknown `interpolation` {}. Only `nearest` and '
-                              '`bilinear` are supported.'.format(interpolation))
-
-
-@keras_export('keras.layers.experimental.preprocessing.Resizing')
-class Resizing(base_layer.Layer):
-  """Image resizing layer.
-
-  Resize the batched image input to target height and width. The input should
-  be a 4-D tensor in the format of NHWC.
-
-  Args:
-    height: Integer, the height of the output shape.
-    width: Integer, the width of the output shape.
-    interpolation: String, the interpolation method. Defaults to `bilinear`.
-      Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
-      `gaussian`, `mitchellcubic`
-    crop_to_aspect_ratio: If True, resize the images without aspect
-      ratio distortion. When the original aspect ratio differs from the target
-      aspect ratio, the output image will be cropped so as to return the largest
-      possible window in the image (of size `(height, width)`) that matches
-      the target aspect ratio. By default (`crop_to_aspect_ratio=False`),
-      aspect ratio may not be preserved.
-  """
-
-  def __init__(self,
-               height,
-               width,
-               interpolation='bilinear',
-               crop_to_aspect_ratio=False,
-               **kwargs):
-    self.target_height = height
-    self.target_width = width
-    self.interpolation = interpolation
-    self.crop_to_aspect_ratio = crop_to_aspect_ratio
-    self._interpolation_method = get_interpolation(interpolation)
-    self.input_spec = InputSpec(ndim=4)
-    super(Resizing, self).__init__(**kwargs)
-
-  def call(self, inputs):
-    if self.crop_to_aspect_ratio:
-      outputs = image_preprocessing.smart_resize(
-          inputs,
-          size=[self.target_height, self.target_width],
-          interpolation=self._interpolation_method)
-    else:
-      outputs = image_ops.resize_images_v2(
-          inputs,
-          size=[self.target_height, self.target_width],
-          method=self._interpolation_method)
-    return outputs
-
-  def compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape(
-        [input_shape[0], self.target_height, self.target_width, input_shape[3]])
-
-  def get_config(self):
-    config = {
-        'height': self.target_height,
-        'width': self.target_width,
-        'interpolation': self.interpolation,
-        'crop_to_aspect_ratio': self.crop_to_aspect_ratio,
-    }
-    base_config = super(Resizing, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-@keras_export('keras.layers.experimental.preprocessing.CenterCrop')
-class CenterCrop(base_layer.Layer):
-  """Crop the central portion of the images to target height and width.
-
-  Input shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Output shape:
-    4D tensor with shape:
-    `(samples, target_height, target_width, channels)`.
-
-  If the input height/width is even and the target height/width is odd (or
-  inversely), the input image is left-padded by 1 pixel.
-
-  Args:
-    height: Integer, the height of the output shape.
-    width: Integer, the width of the output shape.
-  """
-
-  def __init__(self, height, width, **kwargs):
-    self.target_height = height
-    self.target_width = width
-    self.input_spec = InputSpec(ndim=4)
-    super(CenterCrop, self).__init__(**kwargs)
-
-  def call(self, inputs):
-    inputs_shape = array_ops.shape(inputs)
-    img_hd = inputs_shape[H_AXIS]
-    img_wd = inputs_shape[W_AXIS]
-    img_hd_diff = img_hd - self.target_height
-    img_wd_diff = img_wd - self.target_width
-    checks = []
-    checks.append(
-        check_ops.assert_non_negative(
-            img_hd_diff,
-            message='The crop height {} should not be greater than input '
-            'height.'.format(self.target_height)))
-    checks.append(
-        check_ops.assert_non_negative(
-            img_wd_diff,
-            message='The crop width {} should not be greater than input '
-            'width.'.format(self.target_width)))
-    with ops.control_dependencies(checks):
-      bbox_h_start = math_ops.cast(img_hd_diff / 2, dtypes.int32)
-      bbox_w_start = math_ops.cast(img_wd_diff / 2, dtypes.int32)
-      bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
-      bbox_size = array_ops.stack(
-          [-1, self.target_height, self.target_width, -1])
-      outputs = array_ops.slice(inputs, bbox_begin, bbox_size)
-      return outputs
-
-  def compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape(
-        [input_shape[0], self.target_height, self.target_width, input_shape[3]])
-
-  def get_config(self):
-    config = {
-        'height': self.target_height,
-        'width': self.target_width,
-    }
-    base_config = super(CenterCrop, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomCrop')
-class RandomCrop(base_layer.Layer):
-  """Randomly crop the images to target height and width.
-
-  This layer will crop all the images in the same batch to the same cropping
-  location.
-  By default, random cropping is only applied during training. At inference
-  time, the images will be first rescaled to preserve the shorter side, and
-  center cropped. If you need to apply random cropping at inference time,
-  set `training` to True when calling the layer.
-
-  Input shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Output shape:
-    4D tensor with shape:
-    `(samples, target_height, target_width, channels)`.
-
-  Args:
-    height: Integer, the height of the output shape.
-    width: Integer, the width of the output shape.
-    seed: Integer. Used to create a random seed.
-  """
-
-  def __init__(self, height, width, seed=None, **kwargs):
-    self.height = height
-    self.width = width
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    self.input_spec = InputSpec(ndim=4)
-    super(RandomCrop, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_cropped_inputs():
-      """Cropped inputs with stateless random ops."""
-      input_shape = array_ops.shape(inputs)
-      crop_size = array_ops.stack(
-          [input_shape[0], self.height, self.width, input_shape[3]])
-      check = control_flow_ops.Assert(
-          math_ops.reduce_all(input_shape >= crop_size),
-          [self.height, self.width])
-      with ops.control_dependencies([check]):
-        limit = input_shape - crop_size + 1
-        offset = stateless_random_ops.stateless_random_uniform(
-            array_ops.shape(input_shape),
-            dtype=crop_size.dtype,
-            maxval=crop_size.dtype.max,
-            seed=self._rng.make_seeds()[:, 0]) % limit
-        return array_ops.slice(inputs, offset, crop_size)
-
-    # TODO(b/143885775): Share logic with Resize and CenterCrop.
-    def resize_and_center_cropped_inputs():
-      """Deterministically resize to shorter side and center crop."""
-      input_shape = array_ops.shape(inputs)
-      input_height_t = input_shape[H_AXIS]
-      input_width_t = input_shape[W_AXIS]
-      ratio_cond = (input_height_t / input_width_t > (self.height / self.width))
-      # pylint: disable=g-long-lambda
-      resized_height = control_flow_util.smart_cond(
-          ratio_cond,
-          lambda: math_ops.cast(self.width * input_height_t / input_width_t,
-                                input_height_t.dtype), lambda: self.height)
-      resized_width = control_flow_util.smart_cond(
-          ratio_cond, lambda: self.width,
-          lambda: math_ops.cast(self.height * input_width_t / input_height_t,
-                                input_width_t.dtype))
-      # pylint: enable=g-long-lambda
-      resized_inputs = image_ops.resize_images_v2(
-          images=inputs, size=array_ops.stack([resized_height, resized_width]))
-
-      img_hd_diff = resized_height - self.height
-      img_wd_diff = resized_width - self.width
-      bbox_h_start = math_ops.cast(img_hd_diff / 2, dtypes.int32)
-      bbox_w_start = math_ops.cast(img_wd_diff / 2, dtypes.int32)
-      bbox_begin = array_ops.stack([0, bbox_h_start, bbox_w_start, 0])
-      bbox_size = array_ops.stack([-1, self.height, self.width, -1])
-      outputs = array_ops.slice(resized_inputs, bbox_begin, bbox_size)
-      return outputs
-
-    output = control_flow_util.smart_cond(training, random_cropped_inputs,
-                                          resize_and_center_cropped_inputs)
-    original_shape = inputs.shape.as_list()
-    batch_size, num_channels = original_shape[0], original_shape[3]
-    output_shape = [batch_size] + [self.height, self.width] + [num_channels]
-    output.set_shape(output_shape)
-    return output
-
-  def compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape(
-        [input_shape[0], self.height, self.width, input_shape[3]])
-
-  def get_config(self):
-    config = {
-        'height': self.height,
-        'width': self.width,
-        'seed': self.seed,
-    }
-    base_config = super(RandomCrop, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-@keras_export('keras.layers.experimental.preprocessing.Rescaling')
-class Rescaling(base_layer.Layer):
-  """Multiply inputs by `scale` and adds `offset`.
-
-  For instance:
-
-  1. To rescale an input in the `[0, 255]` range
-  to be in the `[0, 1]` range, you would pass `scale=1./255`.
-
-  2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range,
-  you would pass `scale=1./127.5, offset=-1`.
-
-  The rescaling is applied both during training and inference.
-
-  Input shape:
-    Arbitrary.
-
-  Output shape:
-    Same as input.
-
-  Args:
-    scale: Float, the scale to apply to the inputs.
-    offset: Float, the offset to apply to the inputs.
-  """
-
-  def __init__(self, scale, offset=0., **kwargs):
-    self.scale = scale
-    self.offset = offset
-    super(Rescaling, self).__init__(**kwargs)
-
-  def call(self, inputs):
-    dtype = self._compute_dtype
-    scale = math_ops.cast(self.scale, dtype)
-    offset = math_ops.cast(self.offset, dtype)
-    return math_ops.cast(inputs, dtype) * scale + offset
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def get_config(self):
-    config = {
-        'scale': self.scale,
-        'offset': self.offset,
-    }
-    base_config = super(Rescaling, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-HORIZONTAL = 'horizontal'
-VERTICAL = 'vertical'
-HORIZONTAL_AND_VERTICAL = 'horizontal_and_vertical'
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomFlip')
-class RandomFlip(base_layer.Layer):
-  """Randomly flip each image horizontally and vertically.
-
-  This layer will flip the images based on the `mode` attribute.
-  During inference time, the output will be identical to input. Call the layer
-  with `training=True` to flip the input.
-
-  Input shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Output shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Attributes:
-    mode: String indicating which flip mode to use. Can be "horizontal",
-      "vertical", or "horizontal_and_vertical". Defaults to
-      "horizontal_and_vertical". "horizontal" is a left-right flip and
-      "vertical" is a top-bottom flip.
-    seed: Integer. Used to create a random seed.
-  """
-
-  def __init__(self,
-               mode=HORIZONTAL_AND_VERTICAL,
-               seed=None,
-               **kwargs):
-    super(RandomFlip, self).__init__(**kwargs)
-    self.mode = mode
-    if mode == HORIZONTAL:
-      self.horizontal = True
-      self.vertical = False
-    elif mode == VERTICAL:
-      self.horizontal = False
-      self.vertical = True
-    elif mode == HORIZONTAL_AND_VERTICAL:
-      self.horizontal = True
-      self.vertical = True
-    else:
-      raise ValueError('RandomFlip layer {name} received an unknown mode '
-                       'argument {arg}'.format(name=self.name, arg=mode))
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    self.input_spec = InputSpec(ndim=4)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_flipped_inputs():
-      flipped_outputs = inputs
-      if self.horizontal:
-        flipped_outputs = image_ops.stateless_random_flip_left_right(
-            flipped_outputs,
-            self._rng.make_seeds()[:, 0])
-      if self.vertical:
-        flipped_outputs = image_ops.stateless_random_flip_up_down(
-            flipped_outputs,
-            self._rng.make_seeds()[:, 0])
-      return flipped_outputs
-
-    output = control_flow_util.smart_cond(training, random_flipped_inputs,
-                                          lambda: inputs)
-    output.set_shape(inputs.shape)
-    return output
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def get_config(self):
-    config = {
-        'mode': self.mode,
-        'seed': self.seed,
-    }
-    base_config = super(RandomFlip, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-# TODO(tanzheny): Add examples, here and everywhere.
-@keras_export('keras.layers.experimental.preprocessing.RandomTranslation')
-class RandomTranslation(base_layer.Layer):
-  """Randomly translate each image during training.
-
-  Args:
-    height_factor: a float represented as fraction of value, or a tuple of size
-      2 representing lower and upper bound for shifting vertically. A negative
-      value means shifting image up, while a positive value means shifting image
-      down. When represented as a single positive float, this value is used for
-      both the upper and lower bound. For instance, `height_factor=(-0.2, 0.3)`
-      results in an output shifted by a random amount in the range [-20%, +30%].
-      `height_factor=0.2` results in an output height shifted by a random amount
-      in the range [-20%, +20%].
-    width_factor: a float represented as fraction of value, or a tuple of size 2
-      representing lower and upper bound for shifting horizontally. A negative
-      value means shifting image left, while a positive value means shifting
-      image right. When represented as a single positive float, this value is
-      used for both the upper and lower bound. For instance,
-      `width_factor=(-0.2, 0.3)` results in an output shifted left by 20%, and
-      shifted right by 30%. `width_factor=0.2` results in an output height
-      shifted left or right by 20%.
-    fill_mode: Points outside the boundaries of the input are filled according
-      to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
-      - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by
-        reflecting about the edge of the last pixel.
-      - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by
-        filling all values beyond the edge with the same constant value k = 0.
-      - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
-        wrapping around to the opposite edge.
-      - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the
-        nearest pixel.
-    interpolation: Interpolation mode. Supported values: "nearest", "bilinear".
-    seed: Integer. Used to create a random seed.
-    fill_value: a float represents the value to be filled outside the boundaries
-      when `fill_mode` is "constant".
-  Input shape:
-    4D tensor with shape: `(samples, height, width, channels)`,
-      data_format='channels_last'.
-  Output shape:
-    4D tensor with shape: `(samples, height, width, channels)`,
-      data_format='channels_last'.
-  Raise:
-    ValueError: if either bound is not between [0, 1], or upper bound is less
-      than lower bound.
-  """
-
-  def __init__(self,
-               height_factor,
-               width_factor,
-               fill_mode='reflect',
-               interpolation='bilinear',
-               seed=None,
-               fill_value=0.0,
-               **kwargs):
-    self.height_factor = height_factor
-    if isinstance(height_factor, (tuple, list)):
-      self.height_lower = height_factor[0]
-      self.height_upper = height_factor[1]
-    else:
-      self.height_lower = -height_factor
-      self.height_upper = height_factor
-    if self.height_upper < self.height_lower:
-      raise ValueError('`height_factor` cannot have upper bound less than '
-                       'lower bound, got {}'.format(height_factor))
-    if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.:
-      raise ValueError('`height_factor` must have values between [-1, 1], '
-                       'got {}'.format(height_factor))
-
-    self.width_factor = width_factor
-    if isinstance(width_factor, (tuple, list)):
-      self.width_lower = width_factor[0]
-      self.width_upper = width_factor[1]
-    else:
-      self.width_lower = -width_factor
-      self.width_upper = width_factor
-    if self.width_upper < self.width_lower:
-      raise ValueError('`width_factor` cannot have upper bound less than '
-                       'lower bound, got {}'.format(width_factor))
-    if abs(self.width_lower) > 1. or abs(self.width_upper) > 1.:
-      raise ValueError('`width_factor` must have values between [-1, 1], '
-                       'got {}'.format(width_factor))
-
-    check_fill_mode_and_interpolation(fill_mode, interpolation)
-
-    self.fill_mode = fill_mode
-    self.fill_value = fill_value
-    self.interpolation = interpolation
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    self.input_spec = InputSpec(ndim=4)
-    super(RandomTranslation, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_translated_inputs():
-      """Translated inputs with random ops."""
-      inputs_shape = array_ops.shape(inputs)
-      batch_size = inputs_shape[0]
-      h_axis, w_axis = H_AXIS, W_AXIS
-      img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32)
-      img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32)
-      height_translate = self._rng.uniform(
-          shape=[batch_size, 1],
-          minval=self.height_lower,
-          maxval=self.height_upper,
-          dtype=dtypes.float32)
-      height_translate = height_translate * img_hd
-      width_translate = self._rng.uniform(
-          shape=[batch_size, 1],
-          minval=self.width_lower,
-          maxval=self.width_upper,
-          dtype=dtypes.float32)
-      width_translate = width_translate * img_wd
-      translations = math_ops.cast(
-          array_ops.concat([width_translate, height_translate], axis=1),
-          dtype=dtypes.float32)
-      return transform(
-          inputs,
-          get_translation_matrix(translations),
-          interpolation=self.interpolation,
-          fill_mode=self.fill_mode,
-          fill_value=self.fill_value)
-
-    output = control_flow_util.smart_cond(training, random_translated_inputs,
-                                          lambda: inputs)
-    output.set_shape(inputs.shape)
-    return output
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def get_config(self):
-    config = {
-        'height_factor': self.height_factor,
-        'width_factor': self.width_factor,
-        'fill_mode': self.fill_mode,
-        'fill_value': self.fill_value,
-        'interpolation': self.interpolation,
-        'seed': self.seed,
-    }
-    base_config = super(RandomTranslation, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-def get_translation_matrix(translations, name=None):
-  """Returns projective transform(s) for the given translation(s).
-
-  Args:
-    translations: A matrix of 2-element lists representing [dx, dy] to translate
-      for each image (for a batch of images).
-    name: The name of the op.
-
-  Returns:
-    A tensor of shape (num_images, 8) projective transforms which can be given
-      to `transform`.
-  """
-  with backend.name_scope(name or 'translation_matrix'):
-    num_translations = array_ops.shape(translations)[0]
-    # The translation matrix looks like:
-    #     [[1 0 -dx]
-    #      [0 1 -dy]
-    #      [0 0 1]]
-    # where the last entry is implicit.
-    # Translation matrices are always float32.
-    return array_ops.concat(
-        values=[
-            array_ops.ones((num_translations, 1), dtypes.float32),
-            array_ops.zeros((num_translations, 1), dtypes.float32),
-            -translations[:, 0, None],
-            array_ops.zeros((num_translations, 1), dtypes.float32),
-            array_ops.ones((num_translations, 1), dtypes.float32),
-            -translations[:, 1, None],
-            array_ops.zeros((num_translations, 2), dtypes.float32),
-        ],
-        axis=1)
-
-
-def transform(images,
-              transforms,
-              fill_mode='reflect',
-              fill_value=0.0,
-              interpolation='bilinear',
-              output_shape=None,
-              name=None):
-  """Applies the given transform(s) to the image(s).
-
-  Args:
-    images: A tensor of shape (num_images, num_rows, num_columns, num_channels)
-      (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows,
-      num_columns) (HW). The rank must be statically known (the shape is not
-      `TensorShape(None)`.
-    transforms: Projective transform matrix/matrices. A vector of length 8 or
-      tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2,
-      c0, c1], then it maps the *output* point `(x, y)` to a transformed *input*
-      point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
-      `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the
-      transform mapping input points to output points. Note that gradients are
-      not backpropagated into transformation parameters.
-    fill_mode: Points outside the boundaries of the input are filled according
-      to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
-    fill_value: a float represents the value to be filled outside the boundaries
-      when `fill_mode` is "constant".
-    interpolation: Interpolation mode. Supported values: "nearest", "bilinear".
-    output_shape: Output dimesion after the transform, [height, width]. If None,
-      output is the same size as input image.
-    name: The name of the op.  ## Fill mode.
-  Behavior for each valid value is as follows:  reflect (d c b a | a b c d | d c
-    b a) The input is extended by reflecting about the edge of the last pixel.
-    constant (k k k k | a b c d | k k k k) The input is extended by filling all
-    values beyond the edge with the same constant value k = 0.  wrap (a b c d |
-    a b c d | a b c d) The input is extended by wrapping around to the opposite
-    edge.  nearest (a a a a | a b c d | d d d d) The input is extended by the
-    nearest pixel.
-  Input shape:
-    4D tensor with shape: `(samples, height, width, channels)`,
-      data_format='channels_last'.
-  Output shape:
-    4D tensor with shape: `(samples, height, width, channels)`,
-      data_format='channels_last'.
-
-  Returns:
-    Image(s) with the same type and shape as `images`, with the given
-    transform(s) applied. Transformed coordinates outside of the input image
-    will be filled with zeros.
-
-  Raises:
-    TypeError: If `image` is an invalid type.
-    ValueError: If output shape is not 1-D int32 Tensor.
-  """
-  with backend.name_scope(name or 'transform'):
-    if output_shape is None:
-      output_shape = array_ops.shape(images)[1:3]
-      if not context.executing_eagerly():
-        output_shape_value = tensor_util.constant_value(output_shape)
-        if output_shape_value is not None:
-          output_shape = output_shape_value
-
-    output_shape = ops.convert_to_tensor_v2_with_dispatch(
-        output_shape, dtypes.int32, name='output_shape')
-
-    if not output_shape.get_shape().is_compatible_with([2]):
-      raise ValueError('output_shape must be a 1-D Tensor of 2 elements: '
-                       'new_height, new_width, instead got '
-                       '{}'.format(output_shape))
-
-    fill_value = ops.convert_to_tensor_v2_with_dispatch(
-        fill_value, dtypes.float32, name='fill_value')
-
-    return gen_image_ops.ImageProjectiveTransformV3(
-        images=images,
-        output_shape=output_shape,
-        fill_value=fill_value,
-        transforms=transforms,
-        fill_mode=fill_mode.upper(),
-        interpolation=interpolation.upper())
-
-
-def get_rotation_matrix(angles, image_height, image_width, name=None):
-  """Returns projective transform(s) for the given angle(s).
-
-  Args:
-    angles: A scalar angle to rotate all images by, or (for batches of images) a
-      vector with an angle to rotate each image in the batch. The rank must be
-      statically known (the shape is not `TensorShape(None)`).
-    image_height: Height of the image(s) to be transformed.
-    image_width: Width of the image(s) to be transformed.
-    name: The name of the op.
-
-  Returns:
-    A tensor of shape (num_images, 8). Projective transforms which can be given
-      to operation `image_projective_transform_v2`. If one row of transforms is
-       [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point
-       `(x, y)` to a transformed *input* point
-       `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
-       where `k = c0 x + c1 y + 1`.
-  """
-  with backend.name_scope(name or 'rotation_matrix'):
-    x_offset = ((image_width - 1) - (math_ops.cos(angles) *
-                                     (image_width - 1) - math_ops.sin(angles) *
-                                     (image_height - 1))) / 2.0
-    y_offset = ((image_height - 1) - (math_ops.sin(angles) *
-                                      (image_width - 1) + math_ops.cos(angles) *
-                                      (image_height - 1))) / 2.0
-    num_angles = array_ops.shape(angles)[0]
-    return array_ops.concat(
-        values=[
-            math_ops.cos(angles)[:, None],
-            -math_ops.sin(angles)[:, None],
-            x_offset[:, None],
-            math_ops.sin(angles)[:, None],
-            math_ops.cos(angles)[:, None],
-            y_offset[:, None],
-            array_ops.zeros((num_angles, 2), dtypes.float32),
-        ],
-        axis=1)
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomRotation')
-class RandomRotation(base_layer.Layer):
-  """Randomly rotate each image.
-
-  By default, random rotations are only applied during training.
-  At inference time, the layer does nothing. If you need to apply random
-  rotations at inference time, set `training` to True when calling the layer.
-
-  Input shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Output shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Attributes:
-    factor: a float represented as fraction of 2pi, or a tuple of size 2
-      representing lower and upper bound for rotating clockwise and
-      counter-clockwise. A positive values means rotating counter clock-wise,
-      while a negative value means clock-wise. When represented as a single
-      float, this value is used for both the upper and lower bound. For
-      instance, `factor=(-0.2, 0.3)` results in an output rotation by a random
-      amount in the range `[-20% * 2pi, 30% * 2pi]`. `factor=0.2` results in an
-      output rotating by a random amount in the range `[-20% * 2pi, 20% * 2pi]`.
-    fill_mode: Points outside the boundaries of the input are filled according
-      to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
-      - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by
-        reflecting about the edge of the last pixel.
-      - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by
-        filling all values beyond the edge with the same constant value k = 0.
-      - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
-        wrapping around to the opposite edge.
-      - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the
-        nearest pixel.
-    interpolation: Interpolation mode. Supported values: "nearest", "bilinear".
-    seed: Integer. Used to create a random seed.
-    fill_value: a float represents the value to be filled outside the boundaries
-      when `fill_mode` is "constant".
-  Raise:
-    ValueError: if either bound is not between [0, 1], or upper bound is less
-      than lower bound.
-  """
-
-  def __init__(self,
-               factor,
-               fill_mode='reflect',
-               interpolation='bilinear',
-               seed=None,
-               fill_value=0.0,
-               **kwargs):
-    self.factor = factor
-    if isinstance(factor, (tuple, list)):
-      self.lower = factor[0]
-      self.upper = factor[1]
-    else:
-      self.lower = -factor
-      self.upper = factor
-    if self.upper < self.lower:
-      raise ValueError('Factor cannot have negative values, '
-                       'got {}'.format(factor))
-    check_fill_mode_and_interpolation(fill_mode, interpolation)
-    self.fill_mode = fill_mode
-    self.fill_value = fill_value
-    self.interpolation = interpolation
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    self.input_spec = InputSpec(ndim=4)
-    super(RandomRotation, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_rotated_inputs():
-      """Rotated inputs with random ops."""
-      inputs_shape = array_ops.shape(inputs)
-      batch_size = inputs_shape[0]
-      img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32)
-      img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32)
-      min_angle = self.lower * 2. * np.pi
-      max_angle = self.upper * 2. * np.pi
-      angles = self._rng.uniform(
-          shape=[batch_size], minval=min_angle, maxval=max_angle)
-      return transform(
-          inputs,
-          get_rotation_matrix(angles, img_hd, img_wd),
-          fill_mode=self.fill_mode,
-          fill_value=self.fill_value,
-          interpolation=self.interpolation)
-
-    output = control_flow_util.smart_cond(training, random_rotated_inputs,
-                                          lambda: inputs)
-    output.set_shape(inputs.shape)
-    return output
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def get_config(self):
-    config = {
-        'factor': self.factor,
-        'fill_mode': self.fill_mode,
-        'fill_value': self.fill_value,
-        'interpolation': self.interpolation,
-        'seed': self.seed,
-    }
-    base_config = super(RandomRotation, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomZoom')
-class RandomZoom(base_layer.Layer):
-  """Randomly zoom each image during training.
-
-  Args:
-    height_factor: a float represented as fraction of value, or a tuple of size
-      2 representing lower and upper bound for zooming vertically. When
-      represented as a single float, this value is used for both the upper and
-      lower bound. A positive value means zooming out, while a negative value
-      means zooming in. For instance, `height_factor=(0.2, 0.3)` result in an
-      output zoomed out by a random amount in the range [+20%, +30%].
-      `height_factor=(-0.3, -0.2)` result in an output zoomed in by a random
-      amount in the range [+20%, +30%].
-    width_factor: a float represented as fraction of value, or a tuple of size 2
-      representing lower and upper bound for zooming horizontally. When
-      represented as a single float, this value is used for both the upper and
-      lower bound. For instance, `width_factor=(0.2, 0.3)` result in an output
-      zooming out between 20% to 30%. `width_factor=(-0.3, -0.2)` result in an
-      output zooming in between 20% to 30%. Defaults to `None`, i.e., zooming
-      vertical and horizontal directions by preserving the aspect ratio.
-    fill_mode: Points outside the boundaries of the input are filled according
-      to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`).
-      - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by
-        reflecting about the edge of the last pixel.
-      - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by
-        filling all values beyond the edge with the same constant value k = 0.
-      - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by
-        wrapping around to the opposite edge.
-      - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the
-        nearest pixel.
-    interpolation: Interpolation mode. Supported values: "nearest", "bilinear".
-    seed: Integer. Used to create a random seed.
-    fill_value: a float represents the value to be filled outside the boundaries
-      when `fill_mode` is "constant".
-
-  Example:
-
-  >>> input_img = np.random.random((32, 224, 224, 3))
-  >>> layer = tf.keras.layers.experimental.preprocessing.RandomZoom(.5, .2)
-  >>> out_img = layer(input_img)
-  >>> out_img.shape
-  TensorShape([32, 224, 224, 3])
-
-  Input shape:
-    4D tensor with shape: `(samples, height, width, channels)`,
-      data_format='channels_last'.
-  Output shape:
-    4D tensor with shape: `(samples, height, width, channels)`,
-      data_format='channels_last'.
-  Raise:
-    ValueError: if lower bound is not between [0, 1], or upper bound is
-      negative.
-  """
-
-  def __init__(self,
-               height_factor,
-               width_factor=None,
-               fill_mode='reflect',
-               interpolation='bilinear',
-               seed=None,
-               fill_value=0.0,
-               **kwargs):
-    self.height_factor = height_factor
-    if isinstance(height_factor, (tuple, list)):
-      self.height_lower = height_factor[0]
-      self.height_upper = height_factor[1]
-    else:
-      self.height_lower = -height_factor
-      self.height_upper = height_factor
-
-    if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.:
-      raise ValueError('`height_factor` must have values between [-1, 1], '
-                       'got {}'.format(height_factor))
-
-    self.width_factor = width_factor
-    if width_factor is not None:
-      if isinstance(width_factor, (tuple, list)):
-        self.width_lower = width_factor[0]
-        self.width_upper = width_factor[1]
-      else:
-        self.width_lower = -width_factor  # pylint: disable=invalid-unary-operand-type
-        self.width_upper = width_factor
-
-      if self.width_lower < -1. or self.width_upper < -1.:
-        raise ValueError('`width_factor` must have values larger than -1, '
-                         'got {}'.format(width_factor))
-
-    check_fill_mode_and_interpolation(fill_mode, interpolation)
-
-    self.fill_mode = fill_mode
-    self.fill_value = fill_value
-    self.interpolation = interpolation
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    self.input_spec = InputSpec(ndim=4)
-    super(RandomZoom, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_zoomed_inputs():
-      """Zoomed inputs with random ops."""
-      inputs_shape = array_ops.shape(inputs)
-      batch_size = inputs_shape[0]
-      img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32)
-      img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32)
-      height_zoom = self._rng.uniform(
-          shape=[batch_size, 1],
-          minval=1. + self.height_lower,
-          maxval=1. + self.height_upper)
-      if self.width_factor is not None:
-        width_zoom = self._rng.uniform(
-            shape=[batch_size, 1],
-            minval=1. + self.width_lower,
-            maxval=1. + self.width_upper)
-      else:
-        width_zoom = height_zoom
-      zooms = math_ops.cast(
-          array_ops.concat([width_zoom, height_zoom], axis=1),
-          dtype=dtypes.float32)
-      return transform(
-          inputs,
-          get_zoom_matrix(zooms, img_hd, img_wd),
-          fill_mode=self.fill_mode,
-          fill_value=self.fill_value,
-          interpolation=self.interpolation)
-
-    output = control_flow_util.smart_cond(training, random_zoomed_inputs,
-                                          lambda: inputs)
-    output.set_shape(inputs.shape)
-    return output
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def get_config(self):
-    config = {
-        'height_factor': self.height_factor,
-        'width_factor': self.width_factor,
-        'fill_mode': self.fill_mode,
-        'fill_value': self.fill_value,
-        'interpolation': self.interpolation,
-        'seed': self.seed,
-    }
-    base_config = super(RandomZoom, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-def get_zoom_matrix(zooms, image_height, image_width, name=None):
-  """Returns projective transform(s) for the given zoom(s).
-
-  Args:
-    zooms: A matrix of 2-element lists representing [zx, zy] to zoom for each
-      image (for a batch of images).
-    image_height: Height of the image(s) to be transformed.
-    image_width: Width of the image(s) to be transformed.
-    name: The name of the op.
-
-  Returns:
-    A tensor of shape (num_images, 8). Projective transforms which can be given
-      to operation `image_projective_transform_v2`. If one row of transforms is
-       [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point
-       `(x, y)` to a transformed *input* point
-       `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`,
-       where `k = c0 x + c1 y + 1`.
-  """
-  with backend.name_scope(name or 'zoom_matrix'):
-    num_zooms = array_ops.shape(zooms)[0]
-    # The zoom matrix looks like:
-    #     [[zx 0 0]
-    #      [0 zy 0]
-    #      [0 0 1]]
-    # where the last entry is implicit.
-    # Zoom matrices are always float32.
-    x_offset = ((image_width - 1.) / 2.0) * (1.0 - zooms[:, 0, None])
-    y_offset = ((image_height - 1.) / 2.0) * (1.0 - zooms[:, 1, None])
-    return array_ops.concat(
-        values=[
-            zooms[:, 0, None],
-            array_ops.zeros((num_zooms, 1), dtypes.float32),
-            x_offset,
-            array_ops.zeros((num_zooms, 1), dtypes.float32),
-            zooms[:, 1, None],
-            y_offset,
-            array_ops.zeros((num_zooms, 2), dtypes.float32),
-        ],
-        axis=1)
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomContrast')
-class RandomContrast(base_layer.Layer):
-  """Adjust the contrast of an image or images by a random factor.
-
-  Contrast is adjusted independently for each channel of each image during
-  training.
-
-  For each channel, this layer computes the mean of the image pixels in the
-  channel and then adjusts each component `x` of each pixel to
-  `(x - mean) * contrast_factor + mean`.
-
-  Input shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Output shape:
-    4D tensor with shape:
-    `(samples, height, width, channels)`, data_format='channels_last'.
-
-  Attributes:
-    factor: a positive float represented as fraction of value, or a tuple of
-      size 2 representing lower and upper bound. When represented as a single
-      float, lower = upper. The contrast factor will be randomly picked between
-      [1.0 - lower, 1.0 + upper].
-    seed: Integer. Used to create a random seed.
-  Raise:
-    ValueError: if lower bound is not between [0, 1], or upper bound is
-      negative.
-  """
-
-  def __init__(self, factor, seed=None, **kwargs):
-    self.factor = factor
-    if isinstance(factor, (tuple, list)):
-      self.lower = factor[0]
-      self.upper = factor[1]
-    else:
-      self.lower = self.upper = factor
-    if self.lower < 0. or self.upper < 0. or self.lower > 1.:
-      raise ValueError('Factor cannot have negative values or greater than 1.0,'
-                       ' got {}'.format(factor))
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    self.input_spec = InputSpec(ndim=4)
-    super(RandomContrast, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_contrasted_inputs():
-      return image_ops.stateless_random_contrast(inputs, 1. - self.lower,
-                                                 1. + self.upper,
-                                                 self._rng.make_seeds()[:, 0])
-
-    output = control_flow_util.smart_cond(training, random_contrasted_inputs,
-                                          lambda: inputs)
-    output.set_shape(inputs.shape)
-    return output
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def get_config(self):
-    config = {
-        'factor': self.factor,
-        'seed': self.seed,
-    }
-    base_config = super(RandomContrast, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomHeight')
-class RandomHeight(base_layer.Layer):
-  """Randomly vary the height of a batch of images during training.
-
-  Adjusts the height of a batch of images by a random factor. The input
-  should be a 4-D tensor in the "channels_last" image data format.
-
-  By default, this layer is inactive during inference.
-
-  Args:
-    factor: A positive float (fraction of original height), or a tuple of size 2
-      representing lower and upper bound for resizing vertically. When
-      represented as a single float, this value is used for both the upper and
-      lower bound. For instance, `factor=(0.2, 0.3)` results in an output with
-      height changed by a random amount in the range `[20%, 30%]`.
-      `factor=(-0.2, 0.3)` results in an output with height changed by a random
-      amount in the range `[-20%, +30%]. `factor=0.2` results in an output with
-      height changed by a random amount in the range `[-20%, +20%]`.
-    interpolation: String, the interpolation method. Defaults to `bilinear`.
-      Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
-      `gaussian`, `mitchellcubic`
-    seed: Integer. Used to create a random seed.
-  Input shape:
-    4D tensor with shape: `(samples, height, width, channels)`
-      (data_format='channels_last').
-  Output shape:
-    4D tensor with shape: `(samples, random_height, width, channels)`.
-  """
-
-  def __init__(self,
-               factor,
-               interpolation='bilinear',
-               seed=None,
-               **kwargs):
-    self.factor = factor
-    if isinstance(factor, (tuple, list)):
-      self.height_lower = factor[0]
-      self.height_upper = factor[1]
-    else:
-      self.height_lower = -factor
-      self.height_upper = factor
-
-    if self.height_upper < self.height_lower:
-      raise ValueError('`factor` cannot have upper bound less than '
-                       'lower bound, got {}'.format(factor))
-    if self.height_lower < -1. or self.height_upper < -1.:
-      raise ValueError('`factor` must have values larger than -1, '
-                       'got {}'.format(factor))
-    self.interpolation = interpolation
-    self._interpolation_method = get_interpolation(interpolation)
-    self.input_spec = InputSpec(ndim=4)
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    super(RandomHeight, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_height_inputs():
-      """Inputs height-adjusted with random ops."""
-      inputs_shape = array_ops.shape(inputs)
-      img_hd = math_ops.cast(inputs_shape[H_AXIS], dtypes.float32)
-      img_wd = inputs_shape[W_AXIS]
-      height_factor = self._rng.uniform(
-          shape=[],
-          minval=(1.0 + self.height_lower),
-          maxval=(1.0 + self.height_upper))
-      adjusted_height = math_ops.cast(height_factor * img_hd, dtypes.int32)
-      adjusted_size = array_ops.stack([adjusted_height, img_wd])
-      output = image_ops.resize_images_v2(
-          images=inputs, size=adjusted_size, method=self._interpolation_method)
-      original_shape = inputs.shape.as_list()
-      output_shape = [original_shape[0]] + [None] + original_shape[2:4]
-      output.set_shape(output_shape)
-      return output
-
-    return control_flow_util.smart_cond(training, random_height_inputs,
-                                        lambda: inputs)
-
-  def compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape(
-        [input_shape[0], None, input_shape[2], input_shape[3]])
-
-  def get_config(self):
-    config = {
-        'factor': self.factor,
-        'interpolation': self.interpolation,
-        'seed': self.seed,
-    }
-    base_config = super(RandomHeight, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-@keras_export('keras.layers.experimental.preprocessing.RandomWidth')
-class RandomWidth(base_layer.Layer):
-  """Randomly vary the width of a batch of images during training.
-
-  Adjusts the width of a batch of images by a random factor. The input
-  should be a 4-D tensor in the "channels_last" image data format.
-
-  By default, this layer is inactive during inference.
-
-  Args:
-    factor: A positive float (fraction of original height), or a tuple of size 2
-      representing lower and upper bound for resizing vertically. When
-      represented as a single float, this value is used for both the upper and
-      lower bound. For instance, `factor=(0.2, 0.3)` results in an output with
-      width changed by a random amount in the range `[20%, 30%]`. `factor=(-0.2,
-      0.3)` results in an output with width changed by a random amount in the
-      range `[-20%, +30%]`. `factor=0.2` results in an output with width changed
-      by a random amount in the range `[-20%, +20%]`.
-    interpolation: String, the interpolation method. Defaults to `bilinear`.
-      Supports `bilinear`, `nearest`, `bicubic`, `area`, `lanczos3`, `lanczos5`,
-      `gaussian`, `mitchellcubic`
-    seed: Integer. Used to create a random seed.
-  Input shape:
-    4D tensor with shape: `(samples, height, width, channels)`
-      (data_format='channels_last').
-  Output shape:
-    4D tensor with shape: `(samples, height, random_width, channels)`.
-  """
-
-  def __init__(self,
-               factor,
-               interpolation='bilinear',
-               seed=None,
-               **kwargs):
-    self.factor = factor
-    if isinstance(factor, (tuple, list)):
-      self.width_lower = factor[0]
-      self.width_upper = factor[1]
-    else:
-      self.width_lower = -factor
-      self.width_upper = factor
-    if self.width_upper < self.width_lower:
-      raise ValueError('`factor` cannot have upper bound less than '
-                       'lower bound, got {}'.format(factor))
-    if self.width_lower < -1. or self.width_upper < -1.:
-      raise ValueError('`factor` must have values larger than -1, '
-                       'got {}'.format(factor))
-    self.interpolation = interpolation
-    self._interpolation_method = get_interpolation(interpolation)
-    self.input_spec = InputSpec(ndim=4)
-    self.seed = seed
-    self._rng = make_generator(self.seed)
-    super(RandomWidth, self).__init__(**kwargs)
-
-  def call(self, inputs, training=True):
-    if training is None:
-      training = backend.learning_phase()
-
-    def random_width_inputs():
-      """Inputs width-adjusted with random ops."""
-      inputs_shape = array_ops.shape(inputs)
-      img_hd = inputs_shape[H_AXIS]
-      img_wd = math_ops.cast(inputs_shape[W_AXIS], dtypes.float32)
-      width_factor = self._rng.uniform(
-          shape=[],
-          minval=(1.0 + self.width_lower),
-          maxval=(1.0 + self.width_upper))
-      adjusted_width = math_ops.cast(width_factor * img_wd, dtypes.int32)
-      adjusted_size = array_ops.stack([img_hd, adjusted_width])
-      output = image_ops.resize_images_v2(
-          images=inputs, size=adjusted_size, method=self._interpolation_method)
-      original_shape = inputs.shape.as_list()
-      output_shape = original_shape[0:2] + [None] + [original_shape[3]]
-      output.set_shape(output_shape)
-      return output
-
-    return control_flow_util.smart_cond(training, random_width_inputs,
-                                        lambda: inputs)
-
-  def compute_output_shape(self, input_shape):
-    input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    return tensor_shape.TensorShape(
-        [input_shape[0], input_shape[1], None, input_shape[3]])
-
-  def get_config(self):
-    config = {
-        'factor': self.factor,
-        'interpolation': self.interpolation,
-        'seed': self.seed,
-    }
-    base_config = super(RandomWidth, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-
-def make_generator(seed=None):
-  """Creates a random generator.
-
-  Args:
-    seed: the seed to initialize the generator. If None, the generator will be
-      initialized non-deterministically.
-
-  Returns:
-    A generator object.
-  """
-  if seed is not None:
-    return stateful_random_ops.Generator.from_seed(seed)
-  else:
-    return stateful_random_ops.Generator.from_non_deterministic_state()
-
-
-def get_interpolation(interpolation):
-  interpolation = interpolation.lower()
-  if interpolation not in _RESIZE_METHODS:
-    raise NotImplementedError(
-        'Value not recognized for `interpolation`: {}. Supported values '
-        'are: {}'.format(interpolation, _RESIZE_METHODS.keys()))
-  return _RESIZE_METHODS[interpolation]
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py
deleted file mode 100644
index aa57a8f..0000000
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_distribution_test.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.image_preprocessing."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.distribute import multi_process_runner
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import strategy_combinations
-from tensorflow.python.keras.layers.preprocessing import image_preprocessing
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        strategy=strategy_combinations.all_strategies +
-        strategy_combinations.multi_worker_mirrored_strategies,
-        mode=["eager", "graph"]))
-class ImagePreprocessingDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_distribution(self, strategy):
-    if "CentralStorage" in type(strategy).__name__:
-      self.skipTest("Does not work with CentralStorageStrategy yet.")
-    # TODO(b/159738418): large image input causes OOM in ubuntu multi gpu.
-    np_images = np.random.random((32, 32, 32, 3)).astype(np.float32)
-    image_dataset = dataset_ops.Dataset.from_tensor_slices(np_images).batch(
-        16, drop_remainder=True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(32, 32, 3), dtype=dtypes.float32)
-      image_preprocessor = keras.Sequential([
-          image_preprocessing.Resizing(height=256, width=256),
-          image_preprocessing.RandomCrop(height=224, width=224),
-          image_preprocessing.RandomTranslation(.1, .1),
-          image_preprocessing.RandomRotation(.2),
-          image_preprocessing.RandomFlip(),
-          image_preprocessing.RandomZoom(.2, .2)])
-      preprocessed_image = image_preprocessor(input_data)
-      flatten_layer = keras.layers.Flatten(data_format="channels_last")
-      output = flatten_layer(preprocessed_image)
-      cls_layer = keras.layers.Dense(units=1, activation="sigmoid")
-      output = cls_layer(output)
-      model = keras.Model(inputs=input_data, outputs=output)
-    model.compile(loss="binary_crossentropy")
-    _ = model.predict(image_dataset)
-
-
-if __name__ == "__main__":
-  v2_compat.enable_v2_behavior()
-  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
deleted file mode 100644
index 0f03b69..0000000
--- a/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
+++ /dev/null
@@ -1,1493 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for image preprocessing layers."""
-
-import functools
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python.compat import compat
-from tensorflow.python.distribute.mirrored_strategy import MirroredStrategy
-from tensorflow.python.framework import errors
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.engine import sequential
-from tensorflow.python.keras.layers.preprocessing import image_preprocessing
-from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.ops import gen_stateful_random_ops
-from tensorflow.python.ops import gen_stateless_random_ops_v2
-from tensorflow.python.ops import image_ops_impl as image_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import stateless_random_ops
-from tensorflow.python.platform import test
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class ResizingTest(keras_parameterized.TestCase):
-
-  def _run_test(self, kwargs, expected_height, expected_width):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    kwargs.update({'height': expected_height, 'width': expected_width})
-    with testing_utils.use_gpu():
-      testing_utils.layer_test(
-          image_preprocessing.Resizing,
-          kwargs=kwargs,
-          input_shape=(num_samples, orig_height, orig_width, channels),
-          expected_output_shape=(None, expected_height, expected_width,
-                                 channels))
-
-  @parameterized.named_parameters(('down_sample_bilinear_2_by_2', {
-      'interpolation': 'bilinear'
-  }, 2, 2), ('down_sample_bilinear_3_by_2', {
-      'interpolation': 'bilinear'
-  }, 3, 2), ('down_sample_nearest_2_by_2', {
-      'interpolation': 'nearest'
-  }, 2, 2), ('down_sample_nearest_3_by_2', {
-      'interpolation': 'nearest'
-  }, 3, 2), ('down_sample_area_2_by_2', {
-      'interpolation': 'area'
-  }, 2, 2), ('down_sample_area_3_by_2', {
-      'interpolation': 'area'
-  }, 3, 2), ('down_sample_crop_to_aspect_ratio_3_by_2', {
-      'interpolation': 'bilinear',
-      'crop_to_aspect_ratio': True,
-  }, 3, 2))
-  def test_down_sampling(self, kwargs, expected_height, expected_width):
-    with CustomObjectScope({'Resizing': image_preprocessing.Resizing}):
-      self._run_test(kwargs, expected_height, expected_width)
-
-  @parameterized.named_parameters(('up_sample_bilinear_10_by_12', {
-      'interpolation': 'bilinear'
-  }, 10, 12), ('up_sample_bilinear_12_by_12', {
-      'interpolation': 'bilinear'
-  }, 12, 12), ('up_sample_nearest_10_by_12', {
-      'interpolation': 'nearest'
-  }, 10, 12), ('up_sample_nearest_12_by_12', {
-      'interpolation': 'nearest'
-  }, 12, 12), ('up_sample_area_10_by_12', {
-      'interpolation': 'area'
-  }, 10, 12), ('up_sample_area_12_by_12', {
-      'interpolation': 'area'
-  }, 12, 12), ('up_sample_crop_to_aspect_ratio_12_by_14', {
-      'interpolation': 'bilinear',
-      'crop_to_aspect_ratio': True,
-  }, 12, 14))
-  def test_up_sampling(self, kwargs, expected_height, expected_width):
-    with CustomObjectScope({'Resizing': image_preprocessing.Resizing}):
-      self._run_test(kwargs, expected_height, expected_width)
-
-  def test_down_sampling_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype)
-        layer = image_preprocessing.Resizing(
-            height=2, width=2, interpolation='nearest')
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [5, 7],
-            [13, 15]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 2, 2, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_up_sampling_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype)
-        layer = image_preprocessing.Resizing(
-            height=4, width=4, interpolation='nearest')
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 0, 1, 1],
-            [0, 0, 1, 1],
-            [2, 2, 3, 3],
-            [2, 2, 3, 3]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 4, 4, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  @parameterized.named_parameters(('reshape_bilinear_10_by_4', {
-      'interpolation': 'bilinear'
-  }, 10, 4))
-  def test_reshaping(self, kwargs, expected_height, expected_width):
-    with CustomObjectScope({'Resizing': image_preprocessing.Resizing}):
-      self._run_test(kwargs, expected_height, expected_width)
-
-  def test_invalid_interpolation(self):
-    with self.assertRaises(NotImplementedError):
-      image_preprocessing.Resizing(5, 5, 'invalid_interpolation')
-
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.Resizing(5, 5, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.Resizing.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-  def test_crop_to_aspect_ratio(self):
-    with testing_utils.use_gpu():
-      input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype('float32')
-      layer = image_preprocessing.Resizing(4, 2, crop_to_aspect_ratio=True)
-      output_image = layer(input_image)
-      expected_output = np.asarray([
-          [1, 2],
-          [5, 6],
-          [9, 10],
-          [13, 14]
-      ]).astype('float32')
-      expected_output = np.reshape(expected_output, (1, 4, 2, 1))
-      self.assertAllEqual(expected_output, output_image)
-
-
-def get_numpy_center_crop(images, expected_height, expected_width):
-  orig_height = images.shape[1]
-  orig_width = images.shape[2]
-  height_start = int((orig_height - expected_height) / 2)
-  width_start = int((orig_width - expected_width) / 2)
-  height_end = height_start + expected_height
-  width_end = width_start + expected_width
-  return images[:, height_start:height_end, width_start:width_end, :]
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CenterCropTest(keras_parameterized.TestCase):
-
-  def _run_test(self, expected_height, expected_width):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    kwargs = {'height': expected_height, 'width': expected_width}
-    input_images = np.random.random(
-        (num_samples, orig_height, orig_width, channels)).astype(np.float32)
-    expected_output = get_numpy_center_crop(input_images, expected_height,
-                                            expected_width)
-    with testing_utils.use_gpu():
-      testing_utils.layer_test(
-          image_preprocessing.CenterCrop,
-          kwargs=kwargs,
-          input_shape=(num_samples, orig_height, orig_width, channels),
-          input_data=input_images,
-          expected_output=expected_output,
-          expected_output_shape=(None, expected_height, expected_width,
-                                 channels))
-
-  @parameterized.named_parameters(('center_crop_3_by_4', 3, 4),
-                                  ('center_crop_3_by_2', 3, 2))
-  def test_center_crop_aligned(self, expected_height, expected_width):
-    with CustomObjectScope({'CenterCrop': image_preprocessing.CenterCrop}):
-      self._run_test(expected_height, expected_width)
-
-  @parameterized.named_parameters(('center_crop_4_by_5', 4, 5),
-                                  ('center_crop_4_by_3', 4, 3))
-  def test_center_crop_mis_aligned(self, expected_height, expected_width):
-    with CustomObjectScope({'CenterCrop': image_preprocessing.CenterCrop}):
-      self._run_test(expected_height, expected_width)
-
-  @parameterized.named_parameters(('center_crop_4_by_6', 4, 6),
-                                  ('center_crop_3_by_2', 3, 2))
-  def test_center_crop_half_mis_aligned(self, expected_height, expected_width):
-    with CustomObjectScope({'CenterCrop': image_preprocessing.CenterCrop}):
-      self._run_test(expected_height, expected_width)
-
-  @parameterized.named_parameters(('center_crop_5_by_12', 5, 12),
-                                  ('center_crop_10_by_8', 10, 8),
-                                  ('center_crop_10_by_12', 10, 12))
-  def test_invalid_center_crop(self, expected_height, expected_width):
-    # InternelError is raised by tf.function MLIR lowering pass when TFRT
-    # is enabled.
-    with self.assertRaisesRegex(
-        (errors.InvalidArgumentError, errors.InternalError),
-        r'assertion failed|error: \'tf.Slice\' op'):
-      self._run_test(expected_height, expected_width)
-
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.CenterCrop(5, 5, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.CenterCrop.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomCropTest(keras_parameterized.TestCase):
-
-  def _run_test(self, expected_height, expected_width):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    kwargs = {'height': expected_height, 'width': expected_width}
-    with testing_utils.use_gpu():
-      testing_utils.layer_test(
-          image_preprocessing.RandomCrop,
-          kwargs=kwargs,
-          input_shape=(num_samples, orig_height, orig_width, channels),
-          expected_output_shape=(None, expected_height, expected_width,
-                                 channels))
-
-  @parameterized.named_parameters(('random_crop_5_by_12', 5, 12),
-                                  ('random_crop_10_by_8', 10, 8),
-                                  ('random_crop_10_by_12', 10, 12))
-  def test_invalid_random_crop(self, expected_height, expected_width):
-    # InternelError is raised by tf.function MLIR lowering pass when TFRT
-    # is enabled.
-    with self.assertRaises((errors.InvalidArgumentError, errors.InternalError)):
-      with CustomObjectScope({'RandomCrop': image_preprocessing.RandomCrop}):
-        self._run_test(expected_height, expected_width)
-
-  def test_training_with_mock(self):
-    np.random.seed(1337)
-    height, width = 3, 4
-    height_offset = np.random.randint(low=0, high=3)
-    width_offset = np.random.randint(low=0, high=5)
-    mock_offset = [0, height_offset, width_offset, 0]
-    with test.mock.patch.object(
-        stateless_random_ops,
-        'stateless_random_uniform',
-        return_value=mock_offset):
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomCrop(height, width)
-        inp = np.random.random((12, 5, 8, 3))
-        actual_output = layer(inp, training=1)
-        expected_output = inp[:, height_offset:(height_offset + height),
-                              width_offset:(width_offset + width), :]
-        self.assertAllClose(expected_output, actual_output)
-
-  @parameterized.named_parameters(('random_crop_4_by_6', 4, 6),
-                                  ('random_crop_3_by_2', 3, 2))
-  def test_random_crop_output_shape(self, expected_height, expected_width):
-    with CustomObjectScope({'RandomCrop': image_preprocessing.RandomCrop}):
-      self._run_test(expected_height, expected_width)
-
-  def test_random_crop_full_height(self):
-    self._run_test(5, 2)
-
-  def test_random_crop_full_width(self):
-    self._run_test(3, 8)
-
-  def test_random_crop_full(self):
-    np.random.seed(1337)
-    height, width = 8, 16
-    inp = np.random.random((12, 8, 16, 3))
-    with testing_utils.use_gpu():
-      layer = image_preprocessing.RandomCrop(height, width)
-      actual_output = layer(inp, training=0)
-      self.assertAllClose(inp, actual_output)
-
-  def test_predicting_with_mock_longer_height(self):
-    np.random.seed(1337)
-    height, width = 3, 3
-    inp = np.random.random((12, 10, 6, 3))
-    with testing_utils.use_gpu():
-      layer = image_preprocessing.RandomCrop(height, width)
-      actual_output = layer(inp, training=0)
-      resized_inp = image_ops.resize_images_v2(inp, size=[5, 3])
-      expected_output = resized_inp[:, 1:4, :, :]
-      self.assertAllClose(expected_output, actual_output)
-
-  def test_predicting_with_mock_longer_width(self):
-    np.random.seed(1337)
-    height, width = 4, 6
-    inp = np.random.random((12, 8, 16, 3))
-    with testing_utils.use_gpu():
-      layer = image_preprocessing.RandomCrop(height, width)
-      actual_output = layer(inp, training=0)
-      resized_inp = image_ops.resize_images_v2(inp, size=[4, 8])
-      expected_output = resized_inp[:, :, 1:7, :]
-      self.assertAllClose(expected_output, actual_output)
-
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomCrop(5, 5, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomCrop.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-class RescalingTest(keras_parameterized.TestCase):
-
-  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-  def test_rescaling_base(self):
-    kwargs = {'scale': 1. / 127.5, 'offset': -1.}
-    testing_utils.layer_test(
-        image_preprocessing.Rescaling,
-        kwargs=kwargs,
-        input_shape=(2, 5, 6, 3),
-        expected_output_shape=(None, 5, 6, 3))
-
-  @testing_utils.run_v2_only
-  def test_rescaling_correctness_float(self):
-    layer = image_preprocessing.Rescaling(scale=1. / 127.5, offset=-1.)
-    inputs = random_ops.random_uniform((2, 4, 5, 3))
-    outputs = layer(inputs)
-    self.assertAllClose(outputs.numpy(), inputs.numpy() * (1. / 127.5) - 1)
-
-  @testing_utils.run_v2_only
-  def test_rescaling_correctness_int(self):
-    layer = image_preprocessing.Rescaling(scale=1. / 127.5, offset=-1)
-    inputs = random_ops.random_uniform((2, 4, 5, 3), 0, 100, dtype='int32')
-    outputs = layer(inputs)
-    self.assertEqual(outputs.dtype.name, 'float32')
-    self.assertAllClose(outputs.numpy(), inputs.numpy() * (1. / 127.5) - 1)
-
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.Rescaling(0.5, name='rescaling')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.Rescaling.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomFlipTest(keras_parameterized.TestCase):
-
-  def _run_test(self, mode, expected_output=None, mock_random=None):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    if mock_random is None:
-      mock_random = [1 for _ in range(num_samples)]
-      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
-    inp = np.random.random((num_samples, orig_height, orig_width, channels))
-    if expected_output is None:
-      expected_output = inp
-      if mode == 'horizontal' or mode == 'horizontal_and_vertical':
-        expected_output = np.flip(expected_output, axis=2)
-      if mode == 'vertical' or mode == 'horizontal_and_vertical':
-        expected_output = np.flip(expected_output, axis=1)
-    with test.mock.patch.object(
-        stateless_random_ops,
-        'stateless_random_uniform',
-        return_value=mock_random,
-    ):
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomFlip(mode)
-        actual_output = layer(inp, training=1)
-        self.assertAllClose(expected_output, actual_output)
-
-  @parameterized.named_parameters(
-      ('random_flip_horizontal', 'horizontal'),
-      ('random_flip_vertical', 'vertical'),
-      ('random_flip_both', 'horizontal_and_vertical'))
-  def test_random_flip(self, mode):
-    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
-      self._run_test(mode)
-
-  def test_random_flip_horizontal_half(self):
-    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
-      np.random.seed(1337)
-      mock_random = [1, 0]
-      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images.copy()
-      expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=1)
-      self._run_test('horizontal', expected_output, mock_random)
-
-  def test_random_flip_vertical_half(self):
-    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
-      np.random.seed(1337)
-      mock_random = [1, 0]
-      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images.copy()
-      expected_output[0, :, :, :] = np.flip(input_images[0, :, :, :], axis=0)
-      self._run_test('vertical', expected_output, mock_random)
-
-  def test_random_flip_inference(self):
-    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomFlip()
-        actual_output = layer(input_images, training=0)
-        self.assertAllClose(expected_output, actual_output)
-
-  def test_random_flip_default(self):
-    with CustomObjectScope({'RandomFlip': image_preprocessing.RandomFlip}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = np.flip(np.flip(input_images, axis=1), axis=2)
-      mock_random = [1, 1]
-      mock_random = np.reshape(mock_random, [2, 1, 1, 1])
-      with test.mock.patch.object(
-          stateless_random_ops,
-          'stateless_random_uniform',
-          return_value=mock_random,
-      ):
-        with self.cached_session():
-          layer = image_preprocessing.RandomFlip()
-          actual_output = layer(input_images, training=1)
-          self.assertAllClose(expected_output, actual_output)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomFlip(name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomFlip.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomContrastTest(keras_parameterized.TestCase):
-
-  def _run_test(self, lower, upper, expected_output=None, mock_random=None):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    if mock_random is None:
-      mock_random = 0.2
-    inp = np.random.random((num_samples, orig_height, orig_width, channels))
-    if expected_output is None:
-      # reduce mean on height.
-      inp_mean = np.mean(inp, axis=1, keepdims=True)
-      # reduce mean on width.
-      inp_mean = np.mean(inp_mean, axis=2, keepdims=True)
-      expected_output = (inp - inp_mean) * mock_random + inp_mean
-    with test.mock.patch.object(
-        stateless_random_ops,
-        'stateless_random_uniform',
-        return_value=mock_random,
-    ):
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomContrast((lower, upper))
-        actual_output = layer(inp, training=True)
-        self.assertAllClose(expected_output, actual_output)
-
-  @parameterized.named_parameters(('random_contrast_2_by_5', 0.2, 0.5),
-                                  ('random_contrast_2_by_13', 0.2, 1.3),
-                                  ('random_contrast_5_by_2', 0.5, 0.2),
-                                  ('random_contrast_10_by_10', 1.0, 1.0))
-  def test_random_contrast(self, lower, upper):
-    with CustomObjectScope(
-        {'RandomContrast': image_preprocessing.RandomContrast}):
-      self._run_test(lower, upper)
-
-  @parameterized.named_parameters(('random_contrast_amplitude_2', 0.2),
-                                  ('random_contrast_amplitude_5', 0.5))
-  def test_random_contrast_amplitude(self, amplitude):
-    with CustomObjectScope(
-        {'RandomContrast': image_preprocessing.RandomContrast}):
-      input_images = np.random.random((2, 5, 8, 3))
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomContrast(amplitude)
-        layer(input_images)
-
-  def test_random_contrast_inference(self):
-    with CustomObjectScope(
-        {'RandomContrast': image_preprocessing.RandomContrast}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomContrast((0.1, 0.2))
-        actual_output = layer(input_images, training=False)
-        self.assertAllClose(expected_output, actual_output)
-
-  def test_random_contrast_int_dtype(self):
-    with CustomObjectScope(
-        {'RandomContrast': image_preprocessing.RandomContrast}):
-      input_images = np.random.randint(low=0, high=255, size=(2, 5, 8, 3))
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomContrast((0.1, 0.2))
-        layer(input_images)
-
-  def test_random_contrast_invalid_bounds(self):
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomContrast((-0.1, .5))
-
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomContrast((1.1, .5))
-
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomContrast((0.1, -0.2))
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomContrast((.5, .6), name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomContrast.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomTranslationTest(keras_parameterized.TestCase):
-
-  def _run_test(self, height_factor, width_factor):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    kwargs = {'height_factor': height_factor, 'width_factor': width_factor}
-    with testing_utils.use_gpu():
-      testing_utils.layer_test(
-          image_preprocessing.RandomTranslation,
-          kwargs=kwargs,
-          input_shape=(num_samples, orig_height, orig_width, channels),
-          expected_output_shape=(None, orig_height, orig_width, channels))
-
-  @parameterized.named_parameters(
-      ('random_translate_4_by_6', .4, .6), ('random_translate_3_by_2', .3, .2),
-      ('random_translate_tuple_factor', (-.5, .4), (.2, .3)))
-  def test_random_translation(self, height_factor, width_factor):
-    self._run_test(height_factor, width_factor)
-
-  def test_random_translation_up_numeric_reflect(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
-        # Shifting by -.2 * 5 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=(-.2, -.2), width_factor=0.)
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [5, 6, 7, 8, 9],
-            [10, 11, 12, 13, 14],
-            [15, 16, 17, 18, 19],
-            [20, 21, 22, 23, 24],
-            [20, 21, 22, 23, 24]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_up_numeric_constant(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
-        # Shifting by -.2 * 5 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=(-.2, -.2), width_factor=0., fill_mode='constant')
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [5, 6, 7, 8, 9],
-            [10, 11, 12, 13, 14],
-            [15, 16, 17, 18, 19],
-            [20, 21, 22, 23, 24],
-            [0, 0, 0, 0, 0]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_down_numeric_reflect(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
-        # Shifting by .2 * 5 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=(.2, .2), width_factor=0.)
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 1, 2, 3, 4],
-            [0, 1, 2, 3, 4],
-            [5, 6, 7, 8, 9],
-            [10, 11, 12, 13, 14],
-            [15, 16, 17, 18, 19]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_asymmetric_size_numeric_reflect(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype)
-        # Shifting by .5 * 8 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=(.5, .5), width_factor=0.)
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [6, 7],
-            [4, 5],
-            [2, 3],
-            [0, 1],
-            [0, 1],
-            [2, 3],
-            [4, 5],
-            [6, 7],
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 8, 2, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_down_numeric_constant(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
-        # Shifting by -.2 * 5 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=(.2, .2), width_factor=0., fill_mode='constant')
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 0, 0, 0, 0],
-            [0, 1, 2, 3, 4],
-            [5, 6, 7, 8, 9],
-            [10, 11, 12, 13, 14],
-            [15, 16, 17, 18, 19]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_left_numeric_reflect(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
-        # Shifting by .2 * 5 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=0., width_factor=(-.2, -.2))
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [1, 2, 3, 4, 4],
-            [6, 7, 8, 9, 9],
-            [11, 12, 13, 14, 14],
-            [16, 17, 18, 19, 19],
-            [21, 22, 23, 24, 24]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_left_numeric_constant(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype)
-        # Shifting by -.2 * 5 = 1 pixel.
-        layer = image_preprocessing.RandomTranslation(
-            height_factor=0., width_factor=(-.2, -.2), fill_mode='constant')
-        output_image = layer(input_image)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [1, 2, 3, 4, 0],
-            [6, 7, 8, 9, 0],
-            [11, 12, 13, 14, 0],
-            [16, 17, 18, 19, 0],
-            [21, 22, 23, 24, 0]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_translation_inference(self):
-    with CustomObjectScope(
-        {'RandomTranslation': image_preprocessing.RandomTranslation}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomTranslation(.5, .5)
-        actual_output = layer(input_images, training=0)
-        self.assertAllClose(expected_output, actual_output)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomTranslation(.5, .6, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomTranslation.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomTransformTest(keras_parameterized.TestCase):
-
-  def _run_random_transform_with_mock(self,
-                                      transform_matrix,
-                                      expected_output,
-                                      mode,
-                                      fill_value=0.0,
-                                      interpolation='bilinear'):
-    inp = np.arange(15).reshape((1, 5, 3, 1)).astype(np.float32)
-    with self.cached_session():
-      output = image_preprocessing.transform(
-          inp,
-          transform_matrix,
-          fill_mode=mode,
-          fill_value=fill_value,
-          interpolation=interpolation)
-    self.assertAllClose(expected_output, output)
-
-  def test_random_translation_reflect(self):
-    # reflected output is (dcba|abcd|dcba)
-
-    # Test down shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 1., 2.],
-         [0., 1., 2.],
-         [3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'reflect')
-
-    # Test up shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11.],
-         [12., 13., 14.],
-         [12., 13., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'reflect')
-
-    # Test left shift by 1.
-    # reflected output is (dcba|abcd|dcba)
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[1., 2., 2.],
-         [4., 5., 5.],
-         [7., 8., 8.],
-         [10., 11., 11.],
-         [13., 14., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'reflect')
-
-    # Test right shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 0., 1.],
-         [3., 3., 4],
-         [6., 6., 7.],
-         [9., 9., 10.],
-         [12., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'reflect')
-
-  def test_random_translation_wrap(self):
-    # warpped output is (abcd|abcd|abcd)
-
-    # Test down shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[12., 13., 14.],
-         [0., 1., 2.],
-         [3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'wrap')
-
-    # Test up shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11.],
-         [12., 13., 14.],
-         [0., 1., 2.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'wrap')
-
-    # Test left shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[1., 2., 0.],
-         [4., 5., 3.],
-         [7., 8., 6.],
-         [10., 11., 9.],
-         [13., 14., 12.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'wrap')
-
-    # Test right shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[2., 0., 1.],
-         [5., 3., 4],
-         [8., 6., 7.],
-         [11., 9., 10.],
-         [14., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'wrap')
-
-  def test_random_translation_nearest(self):
-    # nearest output is (aaaa|abcd|dddd)
-
-    # Test down shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 1., 2.],
-         [0., 1., 2.],
-         [3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'nearest')
-
-    # Test up shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11.],
-         [12., 13., 14.],
-         [12., 13., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'nearest')
-
-    # Test left shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[1., 2., 2.],
-         [4., 5., 5.],
-         [7., 8., 8.],
-         [10., 11., 11.],
-         [13., 14., 14.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'nearest')
-
-    # Test right shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 0., 1.],
-         [3., 3., 4],
-         [6., 6., 7.],
-         [9., 9., 10.],
-         [12., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'nearest')
-
-  def test_random_translation_constant_0(self):
-    # constant output is (0000|abcd|0000)
-
-    # Test down shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 0., 0.],
-         [0., 1., 2.],
-         [3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'constant')
-
-    # Test up shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11.],
-         [12., 13., 14.],
-         [0., 0., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'constant')
-
-    # Test left shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[1., 2., 0.],
-         [4., 5., 0.],
-         [7., 8., 0.],
-         [10., 11., 0.],
-         [13., 14., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'constant')
-
-    # Test right shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 0., 1.],
-         [0., 3., 4],
-         [0., 6., 7.],
-         [0., 9., 10.],
-         [0., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(transform_matrix, expected_output,
-                                         'constant')
-
-  def test_random_translation_constant_1(self):
-    with compat.forward_compatibility_horizon(2020, 8, 6):
-      # constant output is (1111|abcd|1111)
-
-      # Test down shift by 1.
-      # pyformat: disable
-      expected_output = np.asarray(
-          [[1., 1., 1.],
-           [0., 1., 2.],
-           [3., 4., 5.],
-           [6., 7., 8],
-           [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32)
-      # pyformat: enable
-      transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]])
-      self._run_random_transform_with_mock(
-          transform_matrix, expected_output, 'constant', fill_value=1.0)
-
-      # Test up shift by 1.
-      # pyformat: disable
-      expected_output = np.asarray(
-          [[3., 4., 5.],
-           [6., 7., 8],
-           [9., 10., 11.],
-           [12., 13., 14.],
-           [1., 1., 1.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-      # pyformat: enable
-      transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]])
-      self._run_random_transform_with_mock(
-          transform_matrix, expected_output, 'constant', fill_value=1.0)
-
-      # Test left shift by 1.
-      # pyformat: disable
-      expected_output = np.asarray(
-          [[1., 2., 1.],
-           [4., 5., 1.],
-           [7., 8., 1.],
-           [10., 11., 1.],
-           [13., 14., 1.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-      # pyformat: enable
-      transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]])
-      self._run_random_transform_with_mock(
-          transform_matrix, expected_output, 'constant', fill_value=1.0)
-
-      # Test right shift by 1.
-      # pyformat: disable
-      expected_output = np.asarray(
-          [[1., 0., 1.],
-           [1., 3., 4],
-           [1., 6., 7.],
-           [1., 9., 10.],
-           [1., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-      # pyformat: enable
-      transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]])
-      self._run_random_transform_with_mock(
-          transform_matrix, expected_output, 'constant', fill_value=1.0)
-
-  def test_random_translation_nearest_interpolation(self):
-    # nearest output is (aaaa|abcd|dddd)
-
-    # Test down shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 0., 0.],
-         [0., 1., 2.],
-         [3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., -1., 0., 0.]])
-    self._run_random_transform_with_mock(
-        transform_matrix,
-        expected_output,
-        mode='constant',
-        interpolation='nearest')
-
-    # Test up shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[3., 4., 5.],
-         [6., 7., 8],
-         [9., 10., 11.],
-         [12., 13., 14.],
-         [0., 0., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 0., 0., 1., 1., 0., 0.]])
-    self._run_random_transform_with_mock(
-        transform_matrix,
-        expected_output,
-        mode='constant',
-        interpolation='nearest')
-
-    # Test left shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[1., 2., 0.],
-         [4., 5., 0.],
-         [7., 8., 0.],
-         [10., 11., 0.],
-         [13., 14., 0.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., 1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(
-        transform_matrix,
-        expected_output,
-        mode='constant',
-        interpolation='nearest')
-
-    # Test right shift by 1.
-    # pyformat: disable
-    expected_output = np.asarray(
-        [[0., 0., 1.],
-         [0., 3., 4],
-         [0., 6., 7.],
-         [0., 9., 10.],
-         [0., 12., 13.]]).reshape((1, 5, 3, 1)).astype(np.float32)
-    # pyformat: enable
-    transform_matrix = np.asarray([[1., 0., -1., 0., 1., 0., 0., 0.]])
-    self._run_random_transform_with_mock(
-        transform_matrix,
-        expected_output,
-        mode='constant',
-        interpolation='nearest')
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomRotationTest(keras_parameterized.TestCase):
-
-  def _run_test(self, factor):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    kwargs = {'factor': factor}
-    with testing_utils.use_gpu():
-      testing_utils.layer_test(
-          image_preprocessing.RandomRotation,
-          kwargs=kwargs,
-          input_shape=(num_samples, orig_height, orig_width, channels),
-          expected_output_shape=(None, orig_height, orig_width, channels))
-
-  @parameterized.named_parameters(('random_rotate_4', .4),
-                                  ('random_rotate_3', .3),
-                                  ('random_rotate_tuple_factor', (-.5, .4)))
-  def test_random_rotation(self, factor):
-    self._run_test(factor)
-
-  def test_random_rotation_inference(self):
-    with CustomObjectScope(
-        {'RandomTranslation': image_preprocessing.RandomRotation}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomRotation(.5)
-        actual_output = layer(input_images, training=0)
-        self.assertAllClose(expected_output, actual_output)
-
-  def test_distribution_strategy(self):
-    """Tests that RandomRotation can be created within distribution strategies.
-    """
-    input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-    with testing_utils.use_gpu():
-      strat = MirroredStrategy(devices=['cpu', 'gpu'])
-      with strat.scope():
-        layer = image_preprocessing.RandomRotation(.5)
-        output = strat.run(lambda: layer(input_images, training=True))
-      values = output.values
-      self.assertAllEqual(2, len(values))
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomRotation(.5, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomRotation.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomZoomTest(keras_parameterized.TestCase):
-
-  def _run_test(self, height_factor, width_factor):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    kwargs = {'height_factor': height_factor, 'width_factor': width_factor}
-    with testing_utils.use_gpu():
-      testing_utils.layer_test(
-          image_preprocessing.RandomZoom,
-          kwargs=kwargs,
-          input_shape=(num_samples, orig_height, orig_width, channels),
-          expected_output_shape=(None, orig_height, orig_width, channels))
-
-  @parameterized.named_parameters(
-      ('random_zoom_4_by_6', -.4, -.6), ('random_zoom_2_by_3', -.2, -.3),
-      ('random_zoom_tuple_factor', (-.4, -.5), (-.2, -.3)))
-  def test_random_zoom_in(self, height_factor, width_factor):
-    self._run_test(height_factor, width_factor)
-
-  @parameterized.named_parameters(
-      ('random_zoom_4_by_6', .4, .6), ('random_zoom_2_by_3', .2, .3),
-      ('random_zoom_tuple_factor', (.4, .5), (.2, .3)))
-  def test_random_zoom_out(self, height_factor, width_factor):
-    self._run_test(height_factor, width_factor)
-
-  def test_random_zoom_in_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
-        layer = image_preprocessing.RandomZoom((-.5, -.5), (-.5, -.5),
-                                               interpolation='nearest')
-        output_image = layer(np.expand_dims(input_image, axis=0))
-        # pyformat: disable
-        expected_output = np.asarray([
-            [6, 7, 7, 8, 8],
-            [11, 12, 12, 13, 13],
-            [11, 12, 12, 13, 13],
-            [16, 17, 17, 18, 18],
-            [16, 17, 17, 18, 18]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_zoom_out_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
-        layer = image_preprocessing.RandomZoom((.5, .5), (.8, .8),
-                                               fill_mode='constant',
-                                               interpolation='nearest')
-        output_image = layer(np.expand_dims(input_image, axis=0))
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 0, 0, 0, 0],
-            [0, 5, 7, 9, 0],
-            [0, 10, 12, 14, 0],
-            [0, 20, 22, 24, 0],
-            [0, 0, 0, 0, 0]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_zoom_out_numeric_preserve_aspect_ratio(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype)
-        layer = image_preprocessing.RandomZoom((.5, .5),
-                                               fill_mode='constant',
-                                               interpolation='nearest')
-        output_image = layer(np.expand_dims(input_image, axis=0))
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 0, 0, 0, 0],
-            [0, 6, 7, 9, 0],
-            [0, 11, 12, 14, 0],
-            [0, 21, 22, 24, 0],
-            [0, 0, 0, 0, 0]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 5, 5, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_zoom_inference(self):
-    with CustomObjectScope({'RandomZoom': image_preprocessing.RandomZoom}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomZoom(.5, .5)
-        actual_output = layer(input_images, training=0)
-        self.assertAllClose(expected_output, actual_output)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomZoom(.5, .6, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomZoom.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomHeightTest(keras_parameterized.TestCase):
-
-  def _run_test(self, factor):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    with testing_utils.use_gpu():
-      img = np.random.random((num_samples, orig_height, orig_width, channels))
-      layer = image_preprocessing.RandomHeight(factor)
-      img_out = layer(img, training=True)
-      self.assertEqual(img_out.shape[0], 2)
-      self.assertEqual(img_out.shape[2], 8)
-      self.assertEqual(img_out.shape[3], 3)
-
-  @parameterized.named_parameters(('random_height_4_by_6', (.4, .6)),
-                                  ('random_height_3_by_2', (-.3, .2)),
-                                  ('random_height_3', .3))
-  def test_random_height_basic(self, factor):
-    self._run_test(factor)
-
-  def test_valid_random_height(self):
-    # need (maxval - minval) * rnd + minval = 0.6
-    mock_factor = 0
-    with test.mock.patch.object(
-        gen_stateful_random_ops, 'stateful_uniform', return_value=mock_factor):
-      with test.mock.patch.object(
-          gen_stateless_random_ops_v2,
-          'stateless_random_uniform_v2',
-          return_value=mock_factor):
-        with testing_utils.use_gpu():
-          img = np.random.random((12, 5, 8, 3))
-          layer = image_preprocessing.RandomHeight(.4)
-          img_out = layer(img, training=True)
-          self.assertEqual(img_out.shape[1], 3)
-
-  def test_random_height_longer_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 6), (2, 3, 1)).astype(dtype)
-        layer = image_preprocessing.RandomHeight(factor=(1., 1.))
-        # Return type of RandomHeight() is float32 if `interpolation` is not
-        # set to `ResizeMethod.NEAREST_NEIGHBOR`; cast `layer` to desired dtype.
-        output_image = math_ops.cast(
-            layer(np.expand_dims(input_image, axis=0)), dtype=dtype)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 1, 2],
-            [0.75, 1.75, 2.75],
-            [2.25, 3.25, 4.25],
-            [3, 4, 5]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 4, 3, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_height_shorter_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 8), (4, 2, 1)).astype(dtype)
-        layer = image_preprocessing.RandomHeight(
-            factor=(-.5, -.5), interpolation='nearest')
-        output_image = layer(np.expand_dims(input_image, axis=0))
-        # pyformat: disable
-        expected_output = np.asarray([
-            [2, 3],
-            [6, 7]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 2, 2, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_height_invalid_factor(self):
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomHeight((-1.5, .4))
-
-  def test_random_height_inference(self):
-    with CustomObjectScope({'RandomHeight': image_preprocessing.RandomHeight}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomHeight(.5)
-        actual_output = layer(input_images, training=0)
-        self.assertAllClose(expected_output, actual_output)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomHeight(.5, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomHeight.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class RandomWidthTest(keras_parameterized.TestCase):
-
-  def _run_test(self, factor):
-    np.random.seed(1337)
-    num_samples = 2
-    orig_height = 5
-    orig_width = 8
-    channels = 3
-    with testing_utils.use_gpu():
-      img = np.random.random((num_samples, orig_height, orig_width, channels))
-      layer = image_preprocessing.RandomWidth(factor)
-      img_out = layer(img, training=True)
-      self.assertEqual(img_out.shape[0], 2)
-      self.assertEqual(img_out.shape[1], 5)
-      self.assertEqual(img_out.shape[3], 3)
-
-  @parameterized.named_parameters(('random_width_4_by_6', (.4, .6)),
-                                  ('random_width_3_by_2', (-.3, .2)),
-                                  ('random_width_3', .3))
-  def test_random_width_basic(self, factor):
-    self._run_test(factor)
-
-  def test_valid_random_width(self):
-    # need (maxval - minval) * rnd + minval = 0.6
-    mock_factor = 0
-    with test.mock.patch.object(
-        gen_stateful_random_ops, 'stateful_uniform', return_value=mock_factor):
-      with test.mock.patch.object(
-          gen_stateless_random_ops_v2,
-          'stateless_random_uniform_v2',
-          return_value=mock_factor):
-        with testing_utils.use_gpu():
-          img = np.random.random((12, 8, 5, 3))
-          layer = image_preprocessing.RandomWidth(.4)
-          img_out = layer(img, training=True)
-          self.assertEqual(img_out.shape[2], 3)
-
-  def test_random_width_longer_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 6), (3, 2, 1)).astype(dtype)
-        layer = image_preprocessing.RandomWidth(factor=(1., 1.))
-        # Return type of RandomWidth() is float32 if `interpolation` is not
-        # set to `ResizeMethod.NEAREST_NEIGHBOR`; cast `layer` to desired dtype.
-        output_image = math_ops.cast(
-            layer(np.expand_dims(input_image, axis=0)), dtype=dtype)
-        # pyformat: disable
-        expected_output = np.asarray([
-            [0, 0.25, 0.75, 1],
-            [2, 2.25, 2.75, 3],
-            [4, 4.25, 4.75, 5]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 3, 4, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_width_shorter_numeric(self):
-    for dtype in (np.int64, np.float32):
-      with testing_utils.use_gpu():
-        input_image = np.reshape(np.arange(0, 8), (2, 4, 1)).astype(dtype)
-        layer = image_preprocessing.RandomWidth(
-            factor=(-.5, -.5), interpolation='nearest')
-        output_image = layer(np.expand_dims(input_image, axis=0))
-        # pyformat: disable
-        expected_output = np.asarray([
-            [1, 3],
-            [5, 7]
-        ]).astype(dtype)
-        # pyformat: enable
-        expected_output = np.reshape(expected_output, (1, 2, 2, 1))
-        self.assertAllEqual(expected_output, output_image)
-
-  def test_random_width_invalid_factor(self):
-    with self.assertRaises(ValueError):
-      image_preprocessing.RandomWidth((-1.5, .4))
-
-  def test_random_width_inference(self):
-    with CustomObjectScope({'RandomWidth': image_preprocessing.RandomWidth}):
-      input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
-      expected_output = input_images
-      with testing_utils.use_gpu():
-        layer = image_preprocessing.RandomWidth(.5)
-        actual_output = layer(input_images, training=0)
-        self.assertAllClose(expected_output, actual_output)
-
-  @testing_utils.run_v2_only
-  def test_config_with_custom_name(self):
-    layer = image_preprocessing.RandomWidth(.5, name='image_preproc')
-    config = layer.get_config()
-    layer_1 = image_preprocessing.RandomWidth.from_config(config)
-    self.assertEqual(layer_1.name, layer.name)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class LearningPhaseTest(keras_parameterized.TestCase):
-
-  def test_plain_call(self):
-    layer = image_preprocessing.RandomWidth(.5, seed=123)
-    shape = (12, 12, 3)
-    img = np.random.random((12,) + shape)
-    out = layer(img)  # Default to training=True
-    self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape)
-
-    out = layer(img, training=True)
-    self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape)
-
-    out = layer(img, training=False)
-    self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape)
-
-  def test_call_in_container(self):
-    layer1 = image_preprocessing.RandomWidth(.5, seed=123)
-    layer2 = image_preprocessing.RandomHeight(.5, seed=123)
-    seq = sequential.Sequential([layer1, layer2])
-
-    shape = (12, 12, 3)
-    img = np.random.random((12,) + shape)
-    out = seq(img)  # Default to training=True
-    self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape)
-
-    out = seq(img, training=True)
-    self.assertNotEqual(tuple(int(i) for i in out.shape[1:]), shape)
-
-    out = seq(img, training=False)
-    self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class DeterminismTest(keras_parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ('random_flip', image_preprocessing.RandomFlip),
-      ('random_contrast',
-       functools.partial(image_preprocessing.RandomContrast, factor=1.)),
-      ('random_crop',
-       functools.partial(image_preprocessing.RandomCrop, height=2, width=2)),
-      ('random_translation',
-       functools.partial(image_preprocessing.RandomTranslation, 0.3, 0.2)),
-      ('random_rotation',
-       functools.partial(image_preprocessing.RandomRotation, 0.5)),
-      ('random_zoom', functools.partial(image_preprocessing.RandomZoom, 0.2)),
-      ('random_height', functools.partial(image_preprocessing.RandomHeight,
-                                          0.4)),
-      ('random_width', functools.partial(image_preprocessing.RandomWidth, 0.3)),
-  )
-  def test_seed_constructor_arg(self, layer_cls):
-    input_image = np.random.random((2, 5, 8, 3)).astype(np.float32)
-
-    layer1 = layer_cls(seed=0.)
-    layer2 = layer_cls(seed=0.)
-    layer1_output = layer1(input_image)
-    layer2_output = layer2(input_image)
-
-    self.assertAllClose(layer1_output.numpy().tolist(),
-                        layer2_output.numpy().tolist())
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup.py b/tensorflow/python/keras/layers/preprocessing/index_lookup.py
deleted file mode 100644
index 08b14d2..0000000
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup.py
+++ /dev/null
@@ -1,931 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras index lookup preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import collections
-import json
-import operator
-
-import numpy as np
-
-from tensorflow.python.eager import context
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.keras.layers.preprocessing import category_encoding
-from tensorflow.python.keras.layers.preprocessing import table_utils
-from tensorflow.python.keras.saving.saved_model import layer_serialization
-from tensorflow.python.keras.utils import layer_utils
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util import compat
-
-INT = "int"
-MULTI_HOT = "multi_hot"
-ONE_HOT = "one_hot"
-COUNT = "count"
-TF_IDF = "tf_idf"
-
-_VOCAB_NAME = "vocab"
-_IDF_WEIGHTS_NAME = "idf_weights"
-
-
-class _NullInitializer(lookup_ops.TextFileInitializer):
-  """A placeholder initializer for restoring this layer from a SavedModel."""
-
-  def __init__(self, key_dtype, value_dtype):
-    """Construct a table initializer object.
-
-    Args:
-      key_dtype: Type of the table keys.
-      value_dtype: Type of the table values.
-    """
-    self._key_dtype = dtypes.as_dtype(key_dtype)
-    self._value_dtype = dtypes.as_dtype(value_dtype)
-
-  @property
-  def key_dtype(self):
-    """The expected table key dtype."""
-    return self._key_dtype
-
-  @property
-  def value_dtype(self):
-    """The expected table value dtype."""
-    return self._value_dtype
-
-  def initialize(self, table):
-    """Returns the table initialization op."""
-    pass
-
-  @property
-  def _shared_name(self):
-    """Returns a shared name to be used by the table."""
-    shared_name = "NULL_INITIALIZER_"
-    if context.executing_eagerly():
-      # Ensure a unique name when eager execution is enabled to avoid spurious
-      # sharing issues..
-      shared_name += str(backend.get_uid(shared_name))
-    return shared_name
-
-
-class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
-  """Maps values from a vocabulary to integer indices.
-
-  This layer translates a set of arbitrary hashables into an integer output via
-  a table-based lookup, with optional out-of-vocabulary handling. This is the
-  basis layer for both IntegerLookup and StringLookup; it holds the common
-  logic but is not intended to be exported as part of the Keras API.
-
-  Args:
-    max_tokens: The maximum size of the vocabulary for this layer. If None,
-      there is no cap on the size of the vocabulary. Note that this size
-      includes the OOV and mask tokens.
-    num_oov_indices: The number of out-of-vocabulary tokens to use. If this
-      value is more than 1, OOV inputs are hashed to determine their OOV value.
-      If this value is 0, OOV inputs will cause an error when calling the layer.
-    mask_token: A token that represents masked inputs. When `output_mode` is
-      `"int"`, the token is included in vocabulary and mapped to index 0. In
-      other output modes, the token will not appear in the vocabulary and
-      instances of the mask token in the input will be dropped. If set to None,
-      no mask term will be added.
-    oov_token: Only used when `invert` is True. The token to return for OOV
-      indices.
-    vocabulary: An optional list of vocabulary terms. If the list contains the
-      same token multiple times, an error will be thrown.
-    invert: Only valid when `output_mode` is `"int"`. If True, this layer will
-      map indices to vocabulary items instead of mapping vocabulary items to
-      indices. Default to False.
-    output_mode: Specification for the output of the layer. Defaults to `"int"`.
-      Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or
-      `"tf_idf"` configuring the layer as follows:
-        - `"int"`: Return the raw integer indices of the input tokens.
-        - `"one_hot"`: Encodes each individual element in the input into an
-          array the same size as the vocabulary, containing a 1 at the element
-          index. If the last dimension is size 1, will encode on that dimension.
-          If the last dimension is not size 1, will append a new dimension for
-          the encoded output.
-        - `"multi_hot"`: Encodes each sample in the input into a single array
-          the same size as the vocabulary, containing a 1 for each vocabulary
-          term present in the sample. Treats the last dimension as the sample
-          dimension, if input shape is (..., sample_length), output shape will
-          be (..., num_tokens).
-        - `"count"`: As `"multi_hot"`, but the int array contains a count of the
-          number of times the token at that index appeared in the sample.
-        - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to
-          find the value in each token slot.
-    pad_to_max_tokens: Only valid when `output_mode` is `"multi_hot"`,
-      `"count"`, or `"tf_idf"`. If True, the output will have its feature axis
-      padded to `max_tokens` even if the number of unique tokens in the
-      vocabulary is less than max_tokens, resulting in a tensor of shape
-      [batch_size, max_tokens] regardless of vocabulary size. Defaults to False.
-    sparse: Boolean. Only applicable to `"multi_hot"` and `"count"` output
-      modes. If True, returns a `SparseTensor` instead of a dense `Tensor`.
-      Defaults to False.
-  """
-
-  def __init__(self,
-               max_tokens,
-               num_oov_indices,
-               mask_token,
-               oov_token,
-               vocabulary=None,
-               invert=False,
-               output_mode=INT,
-               sparse=False,
-               pad_to_max_tokens=False,
-               **kwargs):
-    # If max_tokens is set, the value must be greater than 1 - otherwise we
-    # are creating a 0-element vocab, which doesn't make sense.
-    if max_tokens is not None and max_tokens <= 1:
-      raise ValueError("If set, `max_tokens` must be greater than 1. "
-                       "You passed {}".format(max_tokens))
-
-    if num_oov_indices < 0:
-      raise ValueError("`num_oov_indices` must be greater than or equal to 0. "
-                       "You passed {}".format(num_oov_indices))
-
-    # Support deprecated names for output_modes.
-    if output_mode == "binary":
-      output_mode = MULTI_HOT
-    if output_mode == "tf-idf":
-      output_mode = TF_IDF
-    # 'output_mode' must be one of (INT, ONE_HOT, MULTI_HOT, COUNT, TF_IDF)
-    layer_utils.validate_string_arg(
-        output_mode,
-        allowable_strings=(INT, ONE_HOT, MULTI_HOT, COUNT, TF_IDF),
-        layer_name=self.__class__.__name__,
-        arg_name="output_mode")
-
-    if invert and output_mode != INT:
-      raise ValueError("`output_mode` must be {} when `invert` is true. You "
-                       "passed {}".format(INT, output_mode))
-
-    self.invert = invert
-    self.max_tokens = max_tokens
-    self.num_oov_indices = num_oov_indices
-    self.output_mode = output_mode
-    self.sparse = sparse
-    self.pad_to_max_tokens = pad_to_max_tokens
-    self._called = False
-
-    # A note on vocab_size: we need to always keep a non-Tensor representation
-    # of vocab_size around to use in graph building. Because we might be
-    # in a tf.function, we can't rely on evaluating the actual tables to
-    # find the value either.
-    self._vocab_size = None
-    # We need to keep track our current vocab size outside of our layer weights
-    # to support a static output shape when `output_mode != INT`. The bincount
-    # ops do not set shape on their outputs, which means we have to set it
-    # ourselves. We persist the current vocab size as a hidden part of the
-    # config when serializing our model.
-    if "vocabulary_size" in kwargs:
-      self._vocab_size = kwargs["vocabulary_size"]
-      del kwargs["vocabulary_size"]
-
-    restore_from_static_table = kwargs.pop("has_static_table", False)
-
-    # Make sure the mask token and oov token are truly of the dtype we want. We
-    # can ignore strings here, because they have only one dtype.
-    dtype = kwargs["dtype"]
-    if dtype == dtypes.int32:
-      mask_token = None if mask_token is None else np.int32(mask_token)
-      oov_token = None if oov_token is None else np.int32(oov_token)
-    elif dtype == dtypes.int64:
-      mask_token = None if mask_token is None else np.int64(mask_token)
-      oov_token = None if oov_token is None else np.int64(oov_token)
-    self.mask_token = mask_token
-    self.oov_token = oov_token
-
-    if max_tokens is not None:
-      available_vocab_size = max_tokens - self._token_start_index()
-    else:
-      available_vocab_size = None
-
-    super(IndexLookup, self).__init__(
-        combiner=_IndexLookupCombiner(
-            vocab_size=available_vocab_size,
-            mask_value=mask_token,
-            oov_value=oov_token,
-            compute_idf=(output_mode == TF_IDF)),
-        **kwargs)
-
-    # We need to save the key dtype so that we know if we're expecting int64
-    # keys. If we are, we will cast int32 inputs to int64 as well.
-    if invert:
-      self._key_dtype = dtypes.int64
-      self._value_dtype = self.dtype
-      self._mask_key = 0
-      self._mask_value = mask_token
-      key_index = lookup_ops.TextFileIndex.LINE_NUMBER
-      value_index = lookup_ops.TextFileIndex.WHOLE_LINE
-      default_value = self.oov_token
-      oov_indices = None
-    else:
-      self._key_dtype = self.dtype
-      self._value_dtype = dtypes.int64
-      self._mask_key = mask_token
-      key_index = lookup_ops.TextFileIndex.WHOLE_LINE
-      value_index = lookup_ops.TextFileIndex.LINE_NUMBER
-      # Masks should map to 0 for int output and be dropped otherwise. Max ints
-      # will be dropped from the bincount op.
-      self._mask_value = 0 if self.output_mode == INT else dtypes.int64.max
-      oov_start = self._oov_start_index()
-      token_start = self._token_start_index()
-      if self.num_oov_indices == 0:
-        # If there are no OOV indices, we map OOV tokens to -1 and error out
-        # during call if we find a negative index.
-        default_value = -1
-        oov_indices = None
-      elif self.num_oov_indices == 1:
-        # If there is only one OOV index, we can set that index as the default
-        # value of the index_lookup table.
-        default_value = oov_start
-        oov_indices = None
-      else:
-        # If we hav multiple OOV values, we need to do a further hashing step;
-        # to make this easier, we set the OOV value to -1. (This lets us do a
-        # vectorized add and cast to boolean to determine locations where we
-        # need to do extra hashing.)
-        default_value = -1
-        oov_indices = list(range(oov_start, token_start))
-
-    self._static_vocabulary_path = None
-    has_vocab_path = (vocabulary is not None and isinstance(vocabulary, str))
-    if has_vocab_path or restore_from_static_table:
-      self._has_static_table = True
-      if vocabulary is None:
-        # If we're restoring a layer that was saved with a static table
-        # initializer, we create a fake initializer object to let the code
-        # progress. The savedmodel restoration code will handle restoring
-        # the actual data.
-        initializer = _NullInitializer(self._key_dtype, self._value_dtype)
-      else:
-        if not gfile.Exists(vocabulary):
-          raise ValueError("Vocabulary file %s does not exist." % (vocabulary,))
-        self._static_vocabulary_path = vocabulary
-        num_tokens = table_utils.num_tokens_in_file(vocabulary)
-        self._vocab_size = self._token_start_index() + num_tokens
-
-        initializer = lookup_ops.TextFileInitializer(
-            filename=vocabulary,
-            key_dtype=self._key_dtype,
-            key_index=key_index,
-            value_dtype=self._value_dtype,
-            value_index=value_index,
-            value_index_offset=self._token_start_index())
-
-      self._table = lookup_ops.StaticHashTable(
-          initializer, default_value=default_value)
-      self._table_handler = table_utils.TableHandler(
-          table=self._table,
-          mask_token=self._mask_key if self.mask_token is not None else None,
-          mask_value=self._mask_value,
-          oov_tokens=oov_indices)
-
-      tracked_table = self._add_trackable(self._table, trainable=False)
-
-    else:
-      self._has_static_table = False
-      self._table = lookup_ops.MutableHashTable(
-          key_dtype=self._key_dtype,
-          value_dtype=self._value_dtype,
-          default_value=default_value,
-          name=(self._name + "_index_table"))
-      self._table_handler = table_utils.TableHandler(
-          table=self._table,
-          oov_tokens=oov_indices)
-      if vocabulary is not None:
-        self.set_vocabulary(vocabulary)
-      tracked_table = self._add_trackable(self._table, trainable=False)
-
-    if self.output_mode == TF_IDF:
-      # The TF-IDF weight may have a (None,) tensorshape. This creates
-      # a 1D variable with arbitrary shape, which we can assign any weight to
-      # so long as it has 1 dimension. In order to properly initialize this
-      # weight in Keras, we need to provide a custom callable initializer which
-      # does not depend on the shape of the weight (as all other initializers
-      # do) since the weight is not known. Hence the lambda shape, dtype: [0].
-      if not self.pad_to_max_tokens or max_tokens is None:
-        initializer = lambda shape, dtype: [0]
-      else:
-        initializer = init_ops.zeros_initializer
-
-      # We are adding these here instead of in build() since they do not depend
-      # on the input shape at all.
-      idf_shape = (max_tokens,) if self.pad_to_max_tokens else (None,)
-      self.tf_idf_weights = self._add_state_variable(
-          name="idf",
-          shape=tensor_shape.TensorShape(idf_shape),
-          dtype=backend.floatx(),
-          initializer=initializer)
-
-    # This is a workaround for summary() on this layer. Because the table is
-    # not mutable during training, the effective number of parameters (and so
-    # the weight shape) is 0; we add this as an attr so that the parameter
-    # counting code in the Model object doesn't throw an attribute error.
-    tracked_table.shape = tensor_shape.TensorShape((0,))
-
-  def compute_output_shape(self, input_shape):
-    if self.output_mode == INT:
-      return input_shape
-    if self._vocab_size and not self.pad_to_max_tokens:
-      out_depth = self._vocab_size
-    else:
-      out_depth = self.max_tokens
-    return tensor_shape.TensorShape([input_shape[0], out_depth])
-
-  def compute_output_signature(self, input_spec):
-    output_shape = self.compute_output_shape(input_spec.shape.as_list())
-    output_dtype = (self._value_dtype if self.output_mode == INT
-                    else backend.floatx())
-    return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
-
-  def adapt(self, data, reset_state=True):
-    """Fits the state of the preprocessing layer to the dataset.
-
-    Overrides the default adapt method to apply relevant preprocessing to the
-    inputs before passing to the combiner.
-
-    Args:
-      data: The data to train on. It can be passed either as a tf.data Dataset,
-        or as a numpy array.
-      reset_state: Optional argument specifying whether to clear the state of
-        the layer at the start of the call to `adapt`. This must be True for
-        this layer, which does not support repeated calls to `adapt`.
-    """
-    if not reset_state:
-      raise ValueError("IndexLookup does not support streaming adapts.")
-    super(IndexLookup, self).adapt(data, reset_state)
-
-  def get_vocabulary(self, include_special_tokens=True):
-    """Returns the current vocabulary of the layer.
-
-    Args:
-      include_special_tokens: If True, the returned vocabulary will include mask
-        and OOV tokens, and a term's index in the vocabulary will equal the
-        term's index when calling the layer. If False, the returned vocabulary
-        will not include any mask or OOV tokens.
-    """
-    if self.vocabulary_size() is None:
-      return []
-
-    # The MutableHashTable data will not be sorted, so we will create a inverted
-    # lookup here, and use that to lookup a range of indices [0, vocab_size).
-    keys, values = self._table.export()
-    vocab, indices = (values, keys) if self.invert else (keys, values)
-    lookup = collections.defaultdict(
-        lambda: self.oov_token,
-        zip(indices.numpy(), self._tensor_vocab_to_numpy(vocab)))
-    vocab = [lookup[x] for x in range(self.vocabulary_size())]
-    if self.mask_token is not None and self.output_mode == INT:
-      vocab[0] = self.mask_token
-    if not include_special_tokens:
-      vocab = vocab[self._token_start_index():]
-    return vocab
-
-  def vocabulary_size(self):
-    """Gets the current size of the layer's vocabulary.
-
-    Returns:
-      The integer size of the voculary, including optional mask and oov indices.
-    """
-    return self._vocab_size
-
-  def vocab_size(self):
-    logging.warning("vocab_size is deprecated, please use vocabulary_size.")
-    return self.vocabulary_size()
-
-  def get_config(self):
-    if self._has_static_table:
-      vocabulary_path = self._static_vocabulary_path
-    else:
-      vocabulary_path = None
-
-    config = {
-        "invert": self.invert,
-        "max_tokens": self.max_tokens,
-        "num_oov_indices": self.num_oov_indices,
-        "oov_token": self.oov_token,
-        "mask_token": self.mask_token,
-        "output_mode": self.output_mode,
-        "pad_to_max_tokens": self.pad_to_max_tokens,
-        "vocabulary_size": self.vocabulary_size(),
-        "vocabulary": vocabulary_path,
-    }
-    if self._has_static_table:
-      config["has_static_table"] = True
-
-    base_config = super(IndexLookup, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-  def count_params(self):
-    # This method counts the number of scalars in the weights of this layer.
-    # Since this layer doesn't have any /actual/ weights (in that there's
-    # nothing in this layer that can be trained - we only use the weight
-    # abstraction for ease of saving!) we return 0.
-    return 0
-
-  def set_vocabulary(self, vocabulary, idf_weights=None):
-    """Sets vocabulary (and optionally document frequency) data for this layer.
-
-    This method sets the vocabulary and idf weights for this layer directly,
-    instead of analyzing a dataset through `adapt`. It should be used whenever
-    the vocab (and optionally document frequency) information is already known.
-    If vocabulary data is already present in the layer, this method will replace
-    it.
-
-    Args:
-      vocabulary: An array, numpy array, or tensor of hashable tokens.
-      idf_weights: An array, numpy array, or tensor of inverse document
-        frequency weights with equal length to vocab. Only necessary if the
-        layer output_mode is TF_IDF.
-
-    Raises:
-      ValueError: If there are too many inputs, the inputs do not match, or
-        input data is missing.
-      RuntimeError: If the vocabulary cannot be set when this function is
-        called. This happens when `"multi_hot"`, `"count"`, and `"tfidf"` modes,
-        if `pad_to_max_tokens` is False and the layer itself has already been
-        called.
-      RuntimeError: If a tensor vocabulary is passed outside of eager execution.
-    """
-    if self._has_static_table:
-      raise RuntimeError("Layer {} was created with a static file-based table "
-                         "because a file path was passed to the layer "
-                         "init. Layers created with static file-based tables "
-                         "do not support changing the vocabulary after "
-                         "creation.".format(self.name))
-
-    if self.output_mode != TF_IDF and idf_weights is not None:
-      raise ValueError("`idf_weights` should only be set if output_mode is "
-                       "TF_IDF. output_mode is {}.".format(self.output_mode))
-
-    if (self.output_mode in [MULTI_HOT, COUNT, TF_IDF] and self._called and
-        not self.pad_to_max_tokens):
-      raise RuntimeError("When using {} mode and `pad_to_max_tokens` is "
-                         "False, the vocabulary cannot be changed after the "
-                         "layer is called.".format(self.output_mode))
-
-    if not context.executing_eagerly() and (tensor_util.is_tensor(vocabulary) or
-                                            tensor_util.is_tensor(idf_weights)):
-      raise RuntimeError(
-          "Cannot set a tensor vocabulary on {} layer {} when not executing "
-          "eagerly. Create this layer or call `set_vocabulary` outside of "
-          "any `tf.function`s and with eager execution enabled.".format(
-              self.__class__.__name__, self.name))
-
-    # TODO(mattdangerw): for better performance we should rewrite this entire
-    # function to operate on tensors and convert vocabulary to a tensor here.
-    if tensor_util.is_tensor(vocabulary):
-      vocabulary = self._tensor_vocab_to_numpy(vocabulary)
-    if tensor_util.is_tensor(idf_weights):
-      idf_weights = idf_weights.numpy()
-
-    oov_start = self._oov_start_index()
-    token_start = self._token_start_index()
-    should_have_mask = (oov_start > 0)
-    has_mask = should_have_mask and vocabulary[0] == self.mask_token
-
-    should_have_oov = (self.num_oov_indices > 0)
-    expected_oov = [self.oov_token] * self.num_oov_indices
-    found_oov = vocabulary[oov_start:token_start]
-    has_oov = should_have_oov and found_oov == expected_oov
-    # If we get a numpy array, then has_oov may end up being a numpy array
-    # instead of a bool. Fix this by collapsing the variable if it's not bool.
-    if not isinstance(has_oov, bool):
-      has_oov = any(has_oov)
-
-    if all([should_have_mask, has_mask, should_have_oov]) and not has_oov:
-      raise ValueError(
-          "Invalid vocabulary format. The layer was created with "
-          "`mask_token={mask}` and `oov_token={oov}`. These tokens should be "
-          "included in the provided vocabulary. The passed vocabulary has the "
-          "correct mask token `{mask}` at index 0, but does not have the OOV "
-          "token `{oov}` in indices [{start}:{end}]. Instead, we found "
-          "`{found}`. Was this vocabulary generated by a layer with "
-          "incompatible settings?".format(
-              mask=self.mask_token,
-              oov=self.oov_token,
-              start=oov_start,
-              end=token_start,
-              found=found_oov))
-
-    if all([should_have_oov, has_oov, should_have_mask]) and not has_mask:
-      raise ValueError(
-          "Invalid vocabulary format. The layer was created with "
-          "`mask_token={mask}` and `oov_token={oov}`. These tokens should be "
-          "included in the provided vocabulary. The passed vocabulary has the "
-          "correct OOV token `{oov}` at indices [{start}:{end}], but does not "
-          "have the mask token `{mask}` in index 0. Instead, we found "
-          "`{found}`. Was this vocabulary generated by a layer with "
-          "incompatible settings?".format(
-              mask=self.mask_token,
-              oov=self.oov_token,
-              start=oov_start,
-              end=token_start,
-              found=vocabulary[0]))
-
-    found_special_tokens = has_oov or has_mask
-    if found_special_tokens:
-      tokens = vocabulary[token_start:]
-    else:
-      tokens = vocabulary
-
-    repeated_tokens = table_utils.find_repeated_tokens(tokens)
-    if repeated_tokens:
-      raise ValueError("The passed vocabulary has at least one repeated "
-                       "term. Please uniquify your dataset. The repeated terms "
-                       "are {}".format(repeated_tokens))
-
-    if self.mask_token in tokens:
-      raise ValueError("Reserved mask token {} was found in the passed "
-                       "vocabulary at index {}. Please either remove the "
-                       "reserved token from the vocabulary or change the "
-                       "mask token for this layer.".format(
-                           self.mask_token, tokens.index(self.mask_token)))
-    if self.oov_token in tokens:
-      raise ValueError("Reserved OOV token {} was found in the passed "
-                       "vocabulary at index {}. Please either remove the "
-                       "reserved token from the vocabulary or change the "
-                       "OOV token for this layer.".format(
-                           self.oov_token, tokens.index(self.oov_token)))
-
-    self._vocab_size = token_start + len(tokens)
-    if self.max_tokens is not None and self._vocab_size > self.max_tokens:
-      raise ValueError(
-          "Attempted to set a vocabulary larger than the maximum vocab size. "
-          "Passed vocab size is {}, max vocab size is {}.".format(
-              self._vocab_size, self.max_tokens))
-
-    if self.output_mode == TF_IDF:
-      if idf_weights is None:
-        raise ValueError("`idf_weights` must be set if output_mode is TF_IDF")
-      if len(vocabulary) != len(idf_weights):
-        raise ValueError("`idf_weights` must be the same length as vocabulary. "
-                         "len(idf_weights) is {}, len(vocabulary) is {}".format(
-                             len(vocabulary), len(idf_weights)))
-      idf_weights = self._convert_to_ndarray(idf_weights)
-      if idf_weights.ndim != 1:
-        raise ValueError(
-            "TF-IDF data must be a 1-index array, but received {}".format(
-                type(idf_weights)))
-
-    # We add the non-special vocab tokens and optionally the mask_token to our
-    # hash table. OOV tokens are handled with the hash table default value and
-    # not added directly.
-    self._table_handler.clear()
-    indices = np.arange(token_start, len(tokens) + token_start, dtype=np.int64)
-    if self.invert:
-      self._table_handler.insert(indices, tokens)
-    else:
-      self._table_handler.insert(tokens, indices)
-    if self.mask_token is not None:
-      self._table_handler.insert([self._mask_key], [self._mask_value])
-
-    if self.output_mode == TF_IDF:
-      # If the passed vocabulary has no special tokens, we need to pad the front
-      # of idf_weights. We don't have real document frequencies for these tokens
-      # so we will use an average of all idf_weights passed in as a reasonable
-      # default.
-      if found_special_tokens:
-        front_padding = 0
-        front_padding_value = 0
-      else:
-        front_padding = token_start
-        front_padding_value = np.average(idf_weights)
-      # If pad_to_max_tokens is true, and max_tokens is greater than our total
-      # vocab size, we need to pad the back of idf_weights with zeros as well.
-      back_padding_value = 0
-      if self.pad_to_max_tokens and self.max_tokens is not None:
-        back_padding = self.max_tokens - front_padding - len(idf_weights)
-      else:
-        back_padding = 0
-      idf_weights = np.pad(
-          idf_weights, (front_padding, back_padding),
-          "constant",
-          constant_values=(front_padding_value, back_padding_value))
-      backend.set_value(self.tf_idf_weights, idf_weights)
-
-  def _set_state_variables(self, updates):
-    if not self.built:
-      raise RuntimeError("_set_state_variables() must be called after build().")
-    self.set_vocabulary(
-        updates[_VOCAB_NAME], idf_weights=updates[_IDF_WEIGHTS_NAME])
-
-  def call(self, inputs):
-    if isinstance(inputs, (list, tuple, np.ndarray)):
-      inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)
-
-    if not self.max_tokens and self._vocab_size is None:
-      raise ValueError("You must set the layer's vocabulary before calling it. "
-                       "Either pass a `vocabulary` argument to the layer, or "
-                       "call `layer.adapt(dataset)` with some sample data.")
-    self._called = True
-    if self._key_dtype == dtypes.int64 and inputs.dtype == dtypes.int32:
-      inputs = math_ops.cast(inputs, dtypes.int64)
-    lookup_result = self._table_handler.lookup(inputs)
-
-    lookup_checks = []
-
-    if self.num_oov_indices == 0 and not self.invert:
-      if tf_utils.is_sparse(inputs):
-        lookup_values = lookup_result.values
-        input_values = inputs.values
-      elif tf_utils.is_ragged(inputs):
-        lookup_values = lookup_result.flat_values
-        input_values = inputs.flat_values
-      else:
-        lookup_values = lookup_result
-        input_values = inputs
-      oov_indices = array_ops.where_v2(math_ops.equal(lookup_values, -1))
-      oov_inputs = array_ops.gather_nd(input_values, oov_indices)
-      msg = string_ops.string_format(
-          "When `num_oov_indices=0` all inputs should be in vocabulary, "
-          "found OOV values {}, consider setting `num_oov_indices=1`.",
-          (oov_inputs,))
-      assertion = control_flow_ops.Assert(
-          math_ops.equal(array_ops.size(oov_indices), 0), [msg])
-      lookup_checks.append(assertion)
-
-    with ops.control_dependencies(lookup_checks):
-      if self.output_mode == INT:
-        return array_ops.identity(lookup_result)
-      else:
-        return self._encode_output(lookup_result)
-
-  def _encode_output(self, lookup_result):
-    def expand_dims(inputs, axis):
-      if tf_utils.is_sparse(inputs):
-        return sparse_ops.sparse_expand_dims(inputs, axis)
-      else:
-        return array_ops.expand_dims(inputs, axis)
-
-    original_shape = lookup_result.shape
-    # In all cases, we should uprank scalar input to a single sample.
-    if lookup_result.shape.rank == 0:
-      lookup_result = expand_dims(lookup_result, -1)
-    # One hot will unprank only if the final output dimension is not already 1.
-    if self.output_mode == ONE_HOT:
-      if lookup_result.shape[-1] != 1:
-        lookup_result = expand_dims(lookup_result, -1)
-
-    # TODO(b/190445202): remove output rank restriction.
-    if lookup_result.shape.rank > 2:
-      raise ValueError(
-          "Received input shape {}, which would result in output rank {}. "
-          "Currently only outputs up to rank 2 are supported for "
-          "`output_mode={}`.".format(original_shape, lookup_result.shape.rank,
-                                     self.output_mode))
-
-    binary_output = self.output_mode in (MULTI_HOT, ONE_HOT)
-    if self._vocab_size and not self.pad_to_max_tokens:
-      out_depth = self._vocab_size
-    else:
-      out_depth = self.max_tokens
-    if self.sparse:
-      bincounts = category_encoding.sparse_bincount(lookup_result, out_depth,
-                                                    binary_output)
-    else:
-      bincounts = category_encoding.dense_bincount(lookup_result, out_depth,
-                                                   binary_output)
-
-    if self.output_mode == TF_IDF:
-      return math_ops.multiply(bincounts, self.tf_idf_weights)
-
-    return bincounts
-
-  def _convert_to_ndarray(self, x):
-    return np.array(x) if isinstance(x, (list, tuple)) else x
-
-  def _oov_start_index(self):
-    return 1 if self.mask_token is not None and self.output_mode == INT else 0
-
-  def _token_start_index(self):
-    return self._oov_start_index() + self.num_oov_indices
-
-  @property
-  def _trackable_saved_model_saver(self):
-    return layer_serialization.IndexLookupLayerSavedModelSaver(self)
-
-  # Override points for IntegerLookup and StringLookup.
-  def _tensor_vocab_to_numpy(self, vocabulary):
-    """Converts a tensor vocabulary to a numpy vocabulary."""
-    return vocabulary.numpy()
-
-
-class _IndexLookupAccumulator(
-    collections.namedtuple("Accumulator",
-                           ["data", "count_dict", "per_doc_count_dict"])):
-  pass
-
-
-class _IndexLookupCombiner(base_preprocessing_layer.Combiner):
-  """Combiner for the IndexLookup preprocessing layer.
-
-  This class encapsulates the logic for computing a vocabulary based on the
-  frequency of each token.
-
-  Attributes:
-    vocab_size: (Optional) If set, only the top `vocab_size` tokens (based on
-      frequency across the dataset) are retained in the vocabulary. If None, or
-      set to a value greater than the total number of distinct tokens in the
-      dataset, all tokens are retained.
-  """
-
-  def __init__(self,
-               vocab_size=None,
-               mask_value=None,
-               oov_value=None,
-               compute_idf=False):
-    self._vocab_size = vocab_size
-    self._mask_value = mask_value
-    self._oov_value = oov_value
-    self._compute_idf = compute_idf
-
-  def compute(self, values, accumulator=None):
-    """Compute a step in this computation, returning a new accumulator."""
-    values = base_preprocessing_layer.convert_to_list(
-        values, sparse_default_value=self._mask_value)
-
-    if accumulator is None:
-      accumulator = self._create_accumulator()
-
-    # TODO(momernick): Benchmark improvements to this algorithm.
-    if not isinstance(values, list):
-      values = [values]
-    for document in values:
-      if not isinstance(document, list):
-        document = [document]
-      if self._compute_idf:
-        current_doc_id = accumulator.data["next_doc_id"]
-        accumulator.data["next_doc_id"] += 1
-      for token in document:
-        accumulator.count_dict[token] += 1
-        if self._compute_idf:
-          doc_count = accumulator.per_doc_count_dict[token]
-          if doc_count["last_doc_id"] != current_doc_id:
-            doc_count["count"] += 1
-            doc_count["last_doc_id"] = current_doc_id
-
-    return accumulator
-
-  def merge(self, accumulators):
-    """Merge several accumulators to a single accumulator."""
-    if not accumulators:
-      return accumulators
-
-    base_accumulator = accumulators[0]
-    for accumulator in accumulators[1:]:
-      for token, value in accumulator.count_dict.items():
-        base_accumulator.count_dict[token] += value
-
-      if self._compute_idf:
-        base_accumulator.data["next_doc_id"] += accumulator.data["next_doc_id"]
-        if self._compute_idf:
-          for token, value in accumulator.per_doc_count_dict.items():
-            # Any newly created token counts in 'base_accumulator''s
-            # per_doc_count_dict will have a last_doc_id of -1. This is always
-            # less than the next doc id (which are strictly positive), so any
-            # future occurrences are guaranteed to be counted.
-            base_accumulator.per_doc_count_dict[token]["count"] += value[
-                "count"]
-
-    return base_accumulator
-
-  def extract(self, accumulator):
-    """Convert an accumulator into a dict of output values.
-
-    Args:
-      accumulator: An accumulator aggregating over the full dataset.
-
-    Returns:
-      A dict of:
-        "vocab": A list of the retained items in the vocabulary.
-    """
-    vocab_counts = accumulator.count_dict
-
-    # Drop special tokens from our vocab.
-    if self._mask_value in vocab_counts:
-      del vocab_counts[self._mask_value]
-    if self._oov_value in vocab_counts:
-      del vocab_counts[self._oov_value]
-    # Data processed by the accumulator could be tensors, numpy arrays or lists.
-    # For tensor string input, values will have been converted into bytes. We
-    # need to check the bytes version of special tokens in this case.
-    if isinstance(self._mask_value, str):
-      mask_value_bytes = compat.as_bytes(self._mask_value)
-      if mask_value_bytes in vocab_counts:
-        del vocab_counts[mask_value_bytes]
-    if isinstance(self._oov_value, str):
-      oov_value_bytes = compat.as_bytes(self._oov_value)
-      if oov_value_bytes in vocab_counts:
-        del vocab_counts[oov_value_bytes]
-
-    sorted_counts = sorted(
-        vocab_counts.items(), key=operator.itemgetter(1, 0), reverse=True)
-    vocab_data = (
-        sorted_counts[:self._vocab_size] if self._vocab_size else sorted_counts)
-    vocab = [data[0] for data in vocab_data]
-
-    if self._compute_idf:
-      num_documents = accumulator.data["next_doc_id"]
-      document_counts = accumulator.per_doc_count_dict
-      doc_counts = [document_counts[token]["count"] for token in vocab]
-      idf_weights = self._inverse_document_frequency(doc_counts, num_documents)
-    else:
-      idf_weights = None
-
-    return {_VOCAB_NAME: vocab, _IDF_WEIGHTS_NAME: idf_weights}
-
-  def restore(self, output):
-    """Create an accumulator based on 'output'."""
-    raise NotImplementedError(
-        "IndexLookup does not restore or support streaming updates.")
-
-  def serialize(self, accumulator):
-    """Serialize an accumulator for a remote call."""
-    output_dict = {}
-    output_dict["vocab"] = list(accumulator.count_dict.keys())
-    output_dict["vocab_counts"] = list(accumulator.count_dict.values())
-
-    if self._compute_idf:
-      output_dict["data"] = accumulator.data
-      output_dict["idf_vocab"] = list(accumulator.per_doc_count_dict.keys())
-      output_dict["idf_counts"] = [
-          counter["count"]
-          for counter in accumulator.per_doc_count_dict.values()
-      ]
-    return compat.as_bytes(json.dumps(output_dict))
-
-  def deserialize(self, encoded_accumulator):
-    """Deserialize an accumulator received from 'serialize()'."""
-    accumulator_dict = json.loads(compat.as_text(encoded_accumulator))
-
-    accumulator = self._create_accumulator()
-    count_dict = dict(
-        zip(accumulator_dict["vocab"], accumulator_dict["vocab_counts"]))
-    accumulator.count_dict.update(count_dict)
-
-    if self._compute_idf:
-      accumulator.data = accumulator_dict["data"]
-      create_dict = lambda x: {"count": x, "last_doc_id": -1}
-      idf_count_dicts = [
-          create_dict(count) for count in accumulator_dict["idf_counts"]
-      ]
-      idf_dict = dict(zip(accumulator_dict["idf_vocab"], idf_count_dicts))
-      accumulator.per_doc_count_dict.update(idf_dict)
-    return accumulator
-
-  def _create_accumulator(self):
-    """Accumulate a sorted array of vocab tokens and corresponding counts."""
-
-    if self._compute_idf:
-      create_default_dict = lambda: {"count": 0, "last_doc_id": -1}
-      per_doc_count_dict = collections.defaultdict(create_default_dict)
-      data = {"next_doc_id": 0}
-    else:
-      per_doc_count_dict = None
-      data = None
-
-    count_dict = collections.defaultdict(int)
-    return _IndexLookupAccumulator(data, count_dict, per_doc_count_dict)
-
-  def _inverse_document_frequency(self, document_counts, num_documents):
-    """Computes the inverse-document-frequency (IDF) component of TF-IDF.
-
-    Uses the default weighting scheme described in
-    https://en.wikipedia.org/wiki/Tf%E2%80%93idf.
-
-    Args:
-      document_counts: An array of the # of documents each token appears in.
-      num_documents: An int representing the total number of documents
-
-    Returns:
-      An array of "inverse document frequency" weights.
-    """
-    return np.log(1 + num_documents / (1 + np.array(document_counts)))
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py
deleted file mode 100644
index fdbc232..0000000
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup_distribution_test.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.index_lookup."""
-
-import os
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.distribute import multi_process_runner
-from tensorflow.python.framework import config
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import strategy_combinations
-from tensorflow.python.keras.layers.preprocessing import index_lookup
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.platform import gfile
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        strategy=strategy_combinations.all_strategies +
-        strategy_combinations.multi_worker_mirrored_strategies,
-        mode=["eager"]))  # Eager-only, no graph: b/158793009
-class IndexLookupDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(vocab + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
-  def test_strategy(self, strategy):
-    # TODO(b/180614455): remove this check when MLIR bridge is always enabled.
-    if backend.is_tpu_strategy(strategy):
-      self.skipTest("This test needs MLIR bridge on TPU.")
-
-    vocab_data = [[
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ]]
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_array).batch(
-        2, drop_remainder=True)
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = index_lookup.IndexLookup(
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string)
-      layer.adapt(vocab_dataset)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    model.compile(loss="mse")
-    output_dataset = model.predict(input_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_strategy_with_file(self, strategy):
-    # TODO(b/180614455): remove this check when MLIR bridge is always enabled.
-    if backend.is_tpu_strategy(strategy):
-      self.skipTest("This test needs MLIR bridge on TPU.")
-
-    vocab_data = ["earth", "wind", "and", "fire"]
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_array).batch(
-        2, drop_remainder=True)
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = index_lookup.IndexLookup(
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string,
-          vocabulary=vocab_file)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    model.compile(loss="mse")
-    output_dataset = model.predict(input_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_tpu_with_multiple_oov(self, strategy):
-    # TODO(b/180614455): remove this check when MLIR bridge is always enabled.
-    if backend.is_tpu_strategy(strategy):
-      self.skipTest("This test needs MLIR bridge on TPU.")
-
-    vocab_data = [[
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ]]
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_array).batch(
-        2, drop_remainder=True)
-    expected_output = [[3, 4, 5, 6], [6, 5, 3, 1]]
-
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = index_lookup.IndexLookup(
-          max_tokens=None,
-          num_oov_indices=2,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string)
-      layer.adapt(vocab_dataset)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-    model.compile(loss="mse")
-    output_dataset = model.predict(input_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-if __name__ == "__main__":
-  v2_compat.enable_v2_behavior()
-  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py
deleted file mode 100644
index f27d04b..0000000
--- a/tensorflow/python/keras/layers/preprocessing/index_lookup_test.py
+++ /dev/null
@@ -1,2588 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras text vectorization preprocessing layer."""
-
-import itertools
-import os
-import random
-import string
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python import tf2
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers.preprocessing import index_lookup
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.ops import sparse_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import load
-from tensorflow.python.saved_model import save
-
-
-def zip_and_sort(weight_values):
-  keys, values = weight_values
-  return sorted(zip(keys, values), key=lambda x: x[1])
-
-
-def _get_end_to_end_test_cases():
-  test_cases = (
-      {
-          "testcase_name":
-              "test_strings_soft_vocab_cap",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # accumulator is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-              "num_oov_indices": 1,
-              "mask_token": "",
-              "oov_token": "[OOV]",
-              "dtype": dtypes.string,
-          },
-          "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
-          "input_dtype":
-              dtypes.string
-      },
-      {
-          "testcase_name":
-              "test_inverse_strings_soft_vocab_cap",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # accumulator is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([[2], [3], [4], [1], [1], [4], [2], [5]]),
-          "kwargs": {
-              "max_tokens": None,
-              "num_oov_indices": 1,
-              "mask_token": "",
-              "oov_token": "[OOV]",
-              "dtype": dtypes.string,
-              "invert": True
-          },
-          "expected_output":
-              np.array([[b"earth"], [b"wind"], [b"and"], [b"[OOV]"], [b"[OOV]"],
-                        [b"and"], [b"earth"], [b"fire"]]),
-          "input_dtype":
-              dtypes.int64
-      },
-      {
-          "testcase_name":
-              "test_strings_with_special_tokens",
-          # Mask and oov values in the vocab data should be dropped, and mapped
-          # to 0 and 1 respectively when calling the layer.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        [""], [""], [""], ["[OOV]"], ["[OOV]"], ["[OOV]"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], [""], ["wind"], ["[OOV]"], ["and"], [""],
-                        ["fire"], ["and"], ["[OOV]"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-              "num_oov_indices": 1,
-              "mask_token": "",
-              "oov_token": "[OOV]",
-              "dtype": dtypes.string,
-          },
-          "expected_output": [[2], [0], [3], [1], [4], [0], [5], [4], [1], [1]],
-          "input_dtype":
-              dtypes.string
-      },
-      {
-          "testcase_name":
-              "test_ints_soft_vocab_cap",
-          # Create an array where 1138 is the most frequent term, followed by
-          # 1729, then 725, then 42. This ensures that the vocab accumulator
-          # is sorting by frequency.
-          "vocab_data":
-              np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729],
-                        [1729], [725], [725]],
-                       dtype=np.int64),
-          "input_data":
-              np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]],
-                       dtype=np.int64),
-          "kwargs": {
-              "max_tokens": None,
-              "num_oov_indices": 1,
-              "mask_token": 0,
-              "oov_token": -1,
-              "dtype": dtypes.int64,
-          },
-          "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
-          "input_dtype":
-              dtypes.int64
-      },
-      {
-          "testcase_name":
-              "test_ints_with_special_tokens",
-          # Mask and oov values in the vocab data should be dropped, and mapped
-          # to 0 and 1 respectively when calling the layer.
-          "vocab_data":
-              np.array([[42], [1138], [1138], [1138], [1138], [0], [0], [0],
-                        [-1], [-1], [-1], [1729], [1729], [1729], [725], [725]],
-                       dtype=np.int64),
-          "input_data":
-              np.array([[1138], [0], [1729], [-1], [725], [0], [42], [725],
-                        [-1], [4]],
-                       dtype=np.int64),
-          "kwargs": {
-              "max_tokens": None,
-              "num_oov_indices": 1,
-              "mask_token": 0,
-              "oov_token": -1,
-              "dtype": dtypes.int64,
-          },
-          "expected_output": [[2], [0], [3], [1], [4], [0], [5], [4], [1], [1]],
-          "input_dtype":
-              dtypes.int64
-      },
-      {
-          "testcase_name":
-              "test_strings_hard_vocab_cap",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # accumulator is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "num_oov_indices": 1,
-              "mask_token": "",
-              "oov_token": "[OOV]",
-              "dtype": dtypes.string,
-          },
-          "expected_output": [[2], [3], [4], [1], [1], [4], [2], [1]],
-          "input_dtype":
-              dtypes.string
-      },
-      {
-          "testcase_name":
-              "test_inverse_strings_hard_vocab_cap",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # accumulator is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([[2], [3], [4], [1], [1], [4], [2], [5]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "num_oov_indices": 1,
-              "mask_token": "",
-              "oov_token": "[OOV]",
-              "dtype": dtypes.string,
-              "invert": True
-          },
-          "expected_output":
-              np.array([[b"earth"], [b"wind"], [b"and"], [b"[OOV]"], [b"[OOV]"],
-                        [b"and"], [b"earth"], [b"[OOV]"]]),
-          "input_dtype":
-              dtypes.int64
-      },
-      {
-          "testcase_name":
-              "test_ints_hard_vocab_cap",
-          # Create an array where 1138 is the most frequent term, followed by
-          # 1729, then 725, then 42. This ensures that the vocab accumulator
-          # is sorting by frequency.
-          "vocab_data":
-              np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729],
-                        [1729], [725], [725]],
-                       dtype=np.int64),
-          "input_data":
-              np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]],
-                       dtype=np.int64),
-          "kwargs": {
-              "max_tokens": 5,
-              "num_oov_indices": 1,
-              "mask_token": 0,
-              "oov_token": -1,
-              "dtype": dtypes.int64,
-          },
-          "expected_output": [[2], [3], [4], [1], [1], [4], [2], [1]],
-          "input_dtype":
-              dtypes.int64
-      },
-      {
-          "testcase_name":
-              "test_ints_tf_idf_output",
-          "vocab_data":
-              np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729],
-                        [1729], [725], [725]]),
-          "input_data":
-              np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "num_oov_indices": 1,
-              "mask_token": 0,
-              "oov_token": -1,
-              "output_mode": index_lookup.TF_IDF,
-              "dtype": dtypes.int64,
-          },
-          "expected_output": [[0, 1.098612, 0, 0, 0], [0, 0, 1.252763, 0, 0],
-                              [0, 0, 0, 1.466337, 0], [0, 0, 0, 0, 1.7917595],
-                              [0, 0, 0, 0, 1.7917595], [0, 0, 0, 1.4663371, 0],
-                              [0, 1.098612, 0, 0, 0], [1.402368, 0, 0, 0, 0]],
-          "input_dtype":
-              dtypes.int64
-      },
-      {
-          "testcase_name":
-              "test_strings_tf_idf_output",
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "num_oov_indices": 1,
-              "mask_token": "",
-              "oov_token": "[OOV]",
-              "output_mode": index_lookup.TF_IDF,
-              "dtype": dtypes.string,
-          },
-          "expected_output": [[0, 1.098612, 0, 0, 0], [0, 0, 1.252763, 0, 0],
-                              [0, 0, 0, 1.466337, 0], [0, 0, 0, 0, 1.7917595],
-                              [0, 0, 0, 0, 1.7917595], [0, 0, 0, 1.4663371, 0],
-                              [0, 1.098612, 0, 0, 0], [1.402368, 0, 0, 0, 0]],
-          "input_dtype":
-              dtypes.string
-      },
-  )
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupLayerTest(keras_parameterized.TestCase,
-                           preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters(*_get_end_to_end_test_cases())
-  def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs,
-                                       use_dataset, expected_output,
-                                       input_dtype):
-    cls = index_lookup.IndexLookup
-    if "invert" in kwargs and kwargs["invert"]:
-      expected_output_dtype = kwargs["dtype"]
-    elif "output_mode" in kwargs and kwargs["output_mode"] != index_lookup.INT:
-      expected_output_dtype = dtypes.float32
-    else:
-      expected_output_dtype = dtypes.int64
-
-    input_shape = input_data.shape
-
-    if use_dataset:
-      # Keras APIs expect batched datasets.
-      # TODO(rachelim): `model.predict` predicts the result on each
-      # dataset batch separately, then tries to concatenate the results
-      # together. When the results have different shapes on the non-concat
-      # axis (which can happen in the output_mode = INT case for
-      # IndexLookup), the concatenation fails. In real use cases, this may
-      # not be an issue because users are likely to pipe the preprocessing layer
-      # into other keras layers instead of predicting it directly. A workaround
-      # for these unit tests is to have the dataset only contain one batch, so
-      # no concatenation needs to happen with the result. For consistency with
-      # numpy input, we should make `predict` join differently shaped results
-      # together sensibly, with 0 padding.
-      input_data = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
-          input_shape[0])
-      vocab_data = dataset_ops.Dataset.from_tensor_slices(vocab_data).batch(
-          input_shape[0])
-
-    with CustomObjectScope({"IndexLookup": cls}):
-      output_data = testing_utils.layer_test(
-          cls,
-          kwargs=kwargs,
-          input_shape=input_shape,
-          input_data=input_data,
-          input_dtype=input_dtype,
-          expected_output_dtype=expected_output_dtype,
-          validate_training=False,
-          adapt_data=vocab_data)
-    if "invert" in kwargs and kwargs["invert"]:
-      self.assertAllEqual(expected_output, output_data)
-    else:
-      self.assertAllClose(expected_output, output_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingInputTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=["fire", "michigan"],
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string, sparse=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_sparse_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 32], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant(
-        [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_ragged_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
-                                              dtype=np.int64)
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int32_input_with_int64_keys(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
-                                              dtype=np.int32)
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int32, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingMultiOOVTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_string_input_multi_bucket(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]], values=["fire", "ohio"], dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [6, 2]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string, sparse=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=2,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_sparse_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 133], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [6, 2]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=2,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_string_input_multi_bucket(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant([["earth", "wind", "fire"],
-                                               ["fire", "and", "earth",
-                                                "ohio"]])
-    expected_output = [[3, 4, 6], [6, 5, 3, 2]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=2,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_ragged_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 133]],
-                                              dtype=np.int64)
-    expected_output = [[3, 4, 6], [6, 5, 3, 2]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=2,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingAdaptTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_adapt(self):
-    vocab_data = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [0, 1], [1, 2]],
-        values=["michigan", "fire", "michigan"],
-        dense_shape=[3, 4])
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.adapt(vocab_dataset)
-    expected_vocabulary = ["", "[OOV]", "michigan", "fire"]
-    self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
-
-  def test_ragged_adapt(self):
-    vocab_data = ragged_factory_ops.constant([["michigan"],
-                                              ["fire", "michigan"]])
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.adapt(vocab_dataset)
-    expected_vocabulary = ["", "[OOV]", "michigan", "fire"]
-    self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
-
-  def test_sparse_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 32], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant(
-        [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_ragged_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
-                                              dtype=np.int64)
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_single_string_generator_dataset(self):
-
-    def word_gen():
-      for _ in itertools.count(1):
-        yield "".join(random.choice(string.ascii_letters) for i in range(2))
-
-    ds = dataset_ops.Dataset.from_generator(word_gen, dtypes.string,
-                                            tensor_shape.TensorShape([]))
-    batched_ds = ds.take(2)
-    input_t = keras.Input(shape=(), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=10,
-        num_oov_indices=0,
-        mask_token=None,
-        oov_token=None,
-        dtype=dtypes.string)
-    _ = layer(input_t)
-    layer.adapt(batched_ds)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupOutputTest(keras_parameterized.TestCase,
-                            preprocessing_test_utils.PreprocessingLayerTest):
-
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(vocab + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
-  def test_int_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_shape(self):
-    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=2,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    self.assertAllEqual(int_data.shape.as_list(), [16, 4])
-
-  def test_int_output_no_reserved_zero(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=None,
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_no_oov(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    valid_input = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", ""]])
-    invalid_input = np.array([["earth", "wind", "and", "michigan"],
-                              ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=0,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(valid_input)
-    self.assertAllEqual(expected_output, output_data)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "found OOV values.*michigan"):
-      _ = model.predict(invalid_input)
-
-  def test_int_output_no_oov_ragged(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    valid_input = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", ""]])
-    invalid_input = np.array([["earth", "wind", "and", "michigan"],
-                              ["fire", "and", "earth", "michigan"]])
-    valid_input = ragged_tensor.RaggedTensor.from_tensor(valid_input)
-    invalid_input = ragged_tensor.RaggedTensor.from_tensor(invalid_input)
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=0,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(valid_input)
-    self.assertAllEqual(expected_output, output_data)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "found OOV values.*michigan"):
-      _ = model.predict(invalid_input)
-
-  def test_int_output_no_oov_sparse(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    valid_input = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", ""]])
-    invalid_input = np.array([["earth", "wind", "and", "michigan"],
-                              ["fire", "and", "earth", "michigan"]])
-    valid_input = sparse_ops.from_dense(valid_input)
-    invalid_input = sparse_ops.from_dense(invalid_input)
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=0,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(valid_input)
-    self.assertAllEqual(expected_output,
-                        sparse_ops.sparse_tensor_to_dense(output_data))
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "found OOV values.*michigan"):
-      _ = model.predict(invalid_input)
-
-  def test_int_output_explicit_vocab(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_one_hot_output_hard_maximum(self):
-    """Check binary output when pad_to_max_tokens=True."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array(["earth", "wind", "and", "fire", "michigan", ""])
-    expected_output = [
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 1, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 1, 0],
-        [1, 0, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0, 0],
-    ]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=6,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.ONE_HOT,
-        pad_to_max_tokens=True,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    binary_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=binary_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_one_hot_output_soft_maximum(self):
-    """Check binary output when pad_to_max_tokens=False."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array(["earth", "wind", "and", "fire", "michigan", ""])
-    expected_output = [
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-        [1, 0, 0, 0, 0],
-        [0, 0, 0, 0, 0],
-    ]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.ONE_HOT,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    binary_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=binary_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_one_hot_output_shape(self):
-    inputs = keras.Input(batch_size=16, shape=(1,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=["earth"],
-        max_tokens=2,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.ONE_HOT,
-        dtype=dtypes.string)
-    outputs = layer(inputs)
-    self.assertAllEqual(outputs.shape.as_list(), [16, 2])
-
-  def test_multi_hot_output_hard_maximum(self):
-    """Check binary output when pad_to_max_tokens=True."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire", ""],
-                            ["fire", "fire", "and", "earth", "michigan"]])
-    expected_output = [
-        [0, 1, 1, 1, 1, 0],
-        [1, 1, 0, 1, 1, 0],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=6,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.MULTI_HOT,
-        pad_to_max_tokens=True,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    binary_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=binary_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_multi_hot_output_no_oov(self):
-    """Check binary output when pad_to_max_tokens=True."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    valid_input = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", ""]])
-    invalid_input = np.array([["earth", "wind", "and", "michigan"],
-                              ["fire", "and", "earth", "michigan"]])
-    expected_output = [
-        [1, 1, 1, 1, 0],
-        [1, 0, 1, 1, 0],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=0,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.MULTI_HOT,
-        pad_to_max_tokens=True,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    binary_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=binary_data)
-    output_data = model.predict(valid_input)
-    self.assertAllEqual(expected_output, output_data)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "found OOV values.*michigan"):
-      _ = model.predict(invalid_input)
-
-  def test_multi_hot_output_hard_maximum_multiple_adapts(self):
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-    adapt_data = ["earth", "earth", "earth", "earth", "wind", "wind", "wind"]
-    first_expected_output = [
-        [1, 1, 1, 0, 0],
-        [1, 1, 0, 0, 0],
-    ]
-    second_adapt_data = [
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ]
-    second_expected_output = [
-        [0, 1, 1, 1, 0],
-        [1, 1, 0, 1, 0],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.MULTI_HOT,
-        pad_to_max_tokens=True,
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    # Test the first adapt
-    layer.adapt(adapt_data)
-    first_output = model.predict(input_array)
-    # Test the second adapt
-    layer.adapt(second_adapt_data)
-    second_output = model.predict(input_array)
-    self.assertAllEqual(first_expected_output, first_output)
-    self.assertAllEqual(second_expected_output, second_output)
-
-  def test_multi_hot_output_soft_maximum(self):
-    """Check multi_hot output when pad_to_max_tokens=False."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire", ""],
-                            ["fire", "and", "earth", "michigan", ""]])
-    expected_output = [
-        [0, 1, 1, 1, 1],
-        [1, 1, 0, 1, 1],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.MULTI_HOT,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    binary_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=binary_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_multi_hot_output_shape(self):
-    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=2,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.MULTI_HOT,
-        dtype=dtypes.string)
-    binary_data = layer(input_data)
-    self.assertAllEqual(binary_data.shape.as_list(), [16, 2])
-
-  def test_count_output_hard_maxiumum(self):
-    """Check count output when pad_to_max_tokens=True."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "wind", ""],
-                            ["fire", "fire", "fire", "michigan", ""]])
-    expected_output = [
-        [0, 1, 2, 1, 0, 0],
-        [1, 0, 0, 0, 3, 0],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=6,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.COUNT,
-        pad_to_max_tokens=True,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    count_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=count_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_count_output_soft_maximum(self):
-    """Check count output when pad_to_max_tokens=False."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "wind", ""],
-                            ["fire", "fire", "fire", "michigan", ""]])
-    expected_output = [
-        [0, 1, 2, 1, 0],
-        [1, 0, 0, 0, 3],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.COUNT,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    count_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=count_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_count_output_shape(self):
-    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=2,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.COUNT,
-        dtype=dtypes.string)
-    count_data = layer(input_data)
-    self.assertAllEqual(count_data.shape.as_list(), [16, 2])
-
-  def test_ifidf_output_hard_maximum(self):
-    """Check tf-idf output when pad_to_max_tokens=True."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # OOV idf weight (bucket 0) should 0.5, the average of passed weights.
-    idf_weights = [.4, .25, .75, .6]
-    input_array = np.array([["earth", "wind", "and", "earth", ""],
-                            ["ohio", "fire", "earth", "michigan", ""]])
-    expected_output = [
-        [0.00, 0.80, 0.25, 0.75, 0.00, 0.00],
-        [1.00, 0.40, 0.00, 0.00, 0.60, 0.00],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=6,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.TF_IDF,
-        pad_to_max_tokens=True,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-    layer_output = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=layer_output)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_ifidf_output_soft_maximum(self):
-    """Check tf-idf output when pad_to_max_tokens=False."""
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # OOV idf weight (bucket 0) should 0.5, the average of passed weights.
-    idf_weights = [.4, .25, .75, .6]
-    input_array = np.array([["earth", "wind", "and", "earth", ""],
-                            ["ohio", "fire", "earth", "michigan", ""]])
-    expected_output = [
-        [0.00, 0.80, 0.25, 0.75, 0.00],
-        [1.00, 0.40, 0.00, 0.00, 0.60],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.TF_IDF,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-    layer_output = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=layer_output)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_ifidf_output_shape(self):
-    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=2,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.COUNT,
-        dtype=dtypes.string)
-    layer_output = layer(input_data)
-    self.assertAllEqual(layer_output.shape.as_list(), [16, 2])
-
-  def test_int_output_file_vocab(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 0, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_non_int_output_file_vocab_in_tf_function(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = constant_op.constant(
-        [["earth", "wind", "and", "fire", ""],
-         ["fire", "and", "earth", "michigan", ""]],
-        dtype=dtypes.string)
-
-    expected_output = [
-        [0, 1, 1, 1, 1],
-        [1, 1, 0, 1, 1],
-    ]
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    @def_function.function
-    def compute(data):
-      layer = index_lookup.IndexLookup(
-          vocabulary=vocab_file,
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          output_mode=index_lookup.MULTI_HOT,
-          dtype=dtypes.string)
-      return layer(data)
-
-    output_dataset = compute(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_file_vocab_and_list_vocab_identical_attrs(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    file_layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    list_layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    expected_vocab = ["", "[OOV]", "earth", "wind", "and", "fire"]
-    self.assertAllEqual(expected_vocab, list_layer.get_vocabulary())
-    expected_vocab_size = 6
-    self.assertAllEqual(expected_vocab_size, list_layer.vocab_size())
-    self.assertAllEqual(list_layer.get_vocabulary(),
-                        file_layer.get_vocabulary())
-    self.assertAllEqual(list_layer.vocab_size(), file_layer.vocab_size())
-
-    # We expect the weights to be DIFFERENT in these cases.
-    expected_weights = (["", "earth", "wind", "and", "fire"], [0, 2, 3, 4, 5])
-    sorted_weights = zip_and_sort(expected_weights)
-    self.assertAllEqual(sorted_weights, zip_and_sort(list_layer.get_weights()))
-    self.assertAllEqual(0, len(file_layer.get_weights()))
-
-  def test_file_vocab_and_list_vocab_identical_attrs_multi_oov(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    file_layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        num_oov_indices=2,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    list_layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=2,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    expected_vocab = ["", "[OOV]", "[OOV]", "earth", "wind", "and", "fire"]
-    self.assertAllEqual(expected_vocab, list_layer.get_vocabulary())
-    expected_vocab_size = 7
-    self.assertAllEqual(expected_vocab_size, list_layer.vocab_size())
-    self.assertAllEqual(list_layer.get_vocabulary(),
-                        file_layer.get_vocabulary())
-    self.assertAllEqual(list_layer.vocab_size(), file_layer.vocab_size())
-
-    expected_weights = (["", "earth", "wind", "and", "fire"], [0, 3, 4, 5, 6])
-    sorted_weights = zip_and_sort(expected_weights)
-    self.assertAllEqual(sorted_weights, zip_and_sort(list_layer.get_weights()))
-    self.assertAllEqual(0, len(file_layer.get_weights()))
-
-  def test_file_vocab_and_list_vocab_identical_attrs_no_mask(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    file_layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        num_oov_indices=2,
-        mask_token=None,
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    list_layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=2,
-        mask_token=None,
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    expected_vocab = ["[OOV]", "[OOV]", "earth", "wind", "and", "fire"]
-    self.assertAllEqual(expected_vocab, list_layer.get_vocabulary())
-    expected_vocab_size = 6
-    self.assertAllEqual(expected_vocab_size, list_layer.vocab_size())
-    self.assertAllEqual(list_layer.get_vocabulary(),
-                        file_layer.get_vocabulary())
-    self.assertAllEqual(list_layer.vocab_size(), file_layer.vocab_size())
-
-    expected_weights = (["earth", "wind", "and", "fire"], [2, 3, 4, 5])
-    sorted_weights = zip_and_sort(expected_weights)
-    self.assertAllEqual(sorted_weights, zip_and_sort(list_layer.get_weights()))
-    self.assertAllEqual(0, len(file_layer.get_weights()))
-
-  def test_int_output_file_vocab_no_mask(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 0, 1, 0]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        mask_token=None,
-        num_oov_indices=1,
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_file_vocab_no_oov_or_mask(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "wind", "earth", "and"]])
-    expected_output = [[0, 1, 2, 3], [3, 1, 0, 2]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        mask_token=None,
-        num_oov_indices=0,
-        oov_token=None,
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_file_vocab_inversion(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([[1, 2, 3, 4], [4, 0, 1, 0]])
-    expected_output = [["earth", "wind", "and", "fire"],
-                       ["fire", "[OOV]", "earth", "[OOV]"]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-    idata = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        mask_token=None,
-        num_oov_indices=1,
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    _ = layer(idata)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-
-    invert_layer = index_lookup.IndexLookup(
-        vocabulary=layer.get_vocabulary(),
-        max_tokens=None,
-        oov_token="[OOV]",
-        mask_token=None,
-        num_oov_indices=1,
-        invert=True,
-        dtype=dtypes.string)
-    int_data = invert_layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_int_file_vocab(self):
-    vocab_data = ["10", "20", "30", "40"]
-    input_array = np.array([[10, 20, 30, 40], [40, 0, 10, 42]])
-    expected_output = [[2, 3, 4, 5], [5, 0, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_file_vocab_setting_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_file,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-
-    with self.assertRaisesRegexp(RuntimeError, "file path"):
-      layer.set_vocabulary(vocab_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupVocabularyTest(keras_parameterized.TestCase,
-                                preprocessing_test_utils.PreprocessingLayerTest
-                               ):
-
-  def test_int_output_explicit_vocab(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_explicit_vocab_with_special_tokens(self):
-    vocab_data = ["", "[OOV]", "earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_get_vocabulary_no_special_tokens(self):
-    vocab_data = ["", "[OOV]", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary(include_special_tokens=False)
-    self.assertAllEqual(returned_vocab, ["wind", "and", "fire"])
-    self.assertAllEqual(layer.vocabulary_size(), 5)
-
-  def test_vocab_with_max_cap(self):
-    vocab_data = ["", "[OOV]", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-    self.assertAllEqual(layer.vocabulary_size(), 5)
-
-  def test_int_vocab_with_max_cap(self):
-    vocab_data = [0, -1, 42, 1276, 1138]
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-    self.assertAllEqual(layer.vocabulary_size(), 5)
-
-  def test_vocab_with_multiple_oov_indices(self):
-    vocab_data = ["", "[OOV]", "[OOV]", "[OOV]", "wind"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=3,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-
-  def test_int_vocab_with_multiple_oov_indices(self):
-    vocab_data = [0, -1, -1, -1, 42]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=3,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-
-  def test_non_unique_vocab_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire", "fire"]
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"):
-      _ = index_lookup.IndexLookup(
-          vocabulary=vocab_data,
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string)
-
-  def test_vocab_with_oov_and_wrong_mask_fails(self):
-    vocab_data = ["custom_mask", "[OOV]", "earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError, ".*does not have the mask token.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_oov_and_no_mask_fails(self):
-    vocab_data = ["[OOV]", "earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError, ".*Reserved OOV.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_mask_but_no_oov_fails(self):
-    vocab_data = ["", "earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError, ".*does not have the OOV token.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_repeated_element_fails(self):
-    vocab_data = ["earth", "earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*earth.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_reserved_oov_element_fails(self):
-    vocab_data = ["earth", "test", "[OOV]", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError, ".*Reserved OOV.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_reserved_mask_element_fails(self):
-    vocab_data = ["earth", "mask_token", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="mask_token",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError, ".*Reserved mask.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_set_after_call_pad_to_max_false_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        pad_to_max_tokens=False,
-        output_mode=index_lookup.MULTI_HOT,
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    # Calling the layer should lock the vocabulary.
-    _ = layer([["earth"]])
-    with self.assertRaisesRegex(RuntimeError, "vocabulary cannot be changed"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_idf_weights_non_tfidf_output_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    weight_data = [1, 1, 1, 1, 1]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.MULTI_HOT,
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError,
-                                "`idf_weights` should only be set if"):
-      layer.set_vocabulary(vocab_data, idf_weights=weight_data)
-
-  def test_vocab_with_idf_weights_length_mismatch_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    weight_data = [1, 1, 1, 1, 1]  # too long
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.TF_IDF,
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(
-        ValueError, "`idf_weights` must be the same length as vocab"):
-      layer.set_vocabulary(vocab_data, idf_weights=weight_data)
-
-  def test_vocab_without_idf_weights_tfidf_output_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        output_mode=index_lookup.TF_IDF,
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(
-        ValueError, "`idf_weights` must be set if output_mode is TF_IDF"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_non_unique_int_vocab_fails(self):
-    vocab_data = [12, 13, 14, 15, 15]
-    with self.assertRaisesRegex(ValueError, "repeated term.*15"):
-      _ = index_lookup.IndexLookup(
-          vocabulary=vocab_data,
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token=0,
-          oov_token=-1,
-          dtype=dtypes.int64)
-
-  def test_int_vocab_with_oov_and_wrong_mask_fails(self):
-    vocab_data = [1234, -1, 11, 21, 13, 14]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    with self.assertRaisesRegex(ValueError, "does not have the mask token `0`"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_int_vocab_with_oov_and_no_mask_fails(self):
-    vocab_data = [-1, 11, 12, 13, 14]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    with self.assertRaisesRegex(ValueError, "Reserved OOV"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_int_vocab_with_mask_but_no_oov_fails(self):
-    vocab_data = [0, 11, 12, 13, 14]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    with self.assertRaisesRegex(ValueError, "does not have the OOV token `-1`"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_int_vocab_with_repeated_element_fails(self):
-    vocab_data = [11, 11, 34, 23, 124]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    with self.assertRaisesRegex(ValueError, "repeated term.*11"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_int_vocab_with_reserved_oov_element_fails(self):
-    vocab_data = [14, 38, -1, 34, 3, 84]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    with self.assertRaisesRegex(ValueError, "Reserved OOV"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_int_vocab_with_reserved_mask_element_fails(self):
-    vocab_data = [125, 0, 3, 4, 94]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64)
-    with self.assertRaisesRegex(ValueError, "Reserved mask"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_no_vocab_file_string_fails(self):
-    with self.assertRaisesRegex(ValueError, ".*non_existent_file.*"):
-      _ = index_lookup.IndexLookup(
-          vocabulary="non_existent_file",
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token=0,
-          oov_token=-1,
-          dtype=dtypes.int64)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupInverseVocabularyTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_int_output_explicit_vocab(self):
-    vocab_data = ["", "[OOV]", "earth", "wind", "and", "fire"]
-    input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 1]])
-    expected_output = np.array([["earth", "wind", "and", "fire"],
-                                ["fire", "and", "earth", "[OOV]"]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = index_lookup.IndexLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        invert=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_vocab_with_max_cap(self):
-    vocab_data = ["", "[OOV]", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        invert=True)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-
-  def test_int_vocab_with_max_cap(self):
-    vocab_data = [0, -1, 42, 1276, 1138]
-    layer = index_lookup.IndexLookup(
-        max_tokens=5,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64,
-        invert=True)
-    layer.set_vocabulary(vocab_data)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-
-  def test_non_unique_vocab_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire", "fire"]
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"):
-      _ = index_lookup.IndexLookup(
-          vocabulary=vocab_data,
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string,
-          invert=True)
-
-  def test_non_int_output_fails(self):
-    with self.assertRaisesRegex(ValueError, "`output_mode` must be int"):
-      _ = index_lookup.IndexLookup(
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string,
-          output_mode=index_lookup.COUNT,
-          invert=True)
-
-  def test_vocab_with_repeated_element_fails(self):
-    vocab_data = ["earth", "earth", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        invert=True)
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*earth.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_vocab_with_reserved_mask_element_fails(self):
-    vocab_data = ["earth", "mask_token", "wind", "and", "fire"]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="mask_token",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        invert=True)
-    with self.assertRaisesRegex(ValueError, ".*Reserved mask.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_non_unique_int_vocab_fails(self):
-    vocab_data = [12, 13, 14, 15, 15]
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*15.*"):
-      _ = index_lookup.IndexLookup(
-          vocabulary=vocab_data,
-          max_tokens=None,
-          num_oov_indices=1,
-          mask_token=0,
-          oov_token=-1,
-          dtype=dtypes.int64,
-          invert=True)
-
-  def test_int_vocab_with_repeated_element_fails(self):
-    vocab_data = [11, 11, 34, 23, 124]
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token=0,
-        oov_token=-1,
-        dtype=dtypes.int64,
-        invert=True)
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*11.*"):
-      layer.set_vocabulary(vocab_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupErrorTest(keras_parameterized.TestCase,
-                           preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_too_long_vocab_fails_in_single_setting(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    layer = index_lookup.IndexLookup(
-        max_tokens=4,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    with self.assertRaisesRegex(ValueError,
-                                "vocabulary larger than the maximum vocab.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_zero_max_tokens_fails(self):
-    with self.assertRaisesRegex(ValueError, ".*max_tokens.*"):
-      _ = index_lookup.IndexLookup(
-          max_tokens=0,
-          num_oov_indices=1,
-          mask_token="",
-          oov_token="[OOV]",
-          dtype=dtypes.string)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupSavingTest(keras_parameterized.TestCase,
-                            preprocessing_test_utils.PreprocessingLayerTest):
-
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(vocab + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
-  def test_vocabulary_persistence_across_saving(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_vocabulary_persistence_file_across_cloning(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Clone the model.
-    new_model = keras.models.clone_model(model)
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, new_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = new_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_persistence_file_vocabs_tf_save_tf_load(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    save.save(obj=model, export_dir=output_path)
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = load.load(output_path)
-    f = loaded_model.signatures["serving_default"]
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = f(constant_op.constant(input_array))["index_lookup"]
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_vocabulary_persistence_file_vocab_keras_save_tf_load(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = load.load(output_path)
-    f = loaded_model.signatures["serving_default"]
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = f(constant_op.constant(input_array))["index_lookup"]
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_persistence_file_vocab_keras_save_keras_load(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-    gfile.Remove(vocab_file)
-
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-    # Try re-saving the layer. This simulates saving a layer contained at
-    # a hub Module.
-    input_data_2 = keras.Input(shape=(None,), dtype=dtypes.string)
-    output_2 = loaded_model(input_data_2)
-    model_2 = keras.Model(inputs=input_data_2, outputs=output_2)
-    new_output_dataset = model_2.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model_2")
-    model_2.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_persistence_file_vocab_keras_save_keras_load_tf_save_tf_load(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-    gfile.Remove(vocab_file)
-
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-    # Try re-saving the layer. This simulates saving a layer contained at
-    # a hub Module.
-    input_data_2 = keras.Input(shape=(None,), dtype=dtypes.string)
-    output_2 = loaded_model(input_data_2)
-    model_2 = keras.Model(inputs=input_data_2, outputs=output_2)
-    new_output_dataset = model_2.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model_2")
-    save.save(model_2, output_path)
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = load.load(output_path)
-    f = loaded_model.signatures["serving_default"]
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = f(constant_op.constant(input_array))["model"]
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_persistence_file_vocab_keras_save_keras_load_keras_save_keras_load(
-      self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = index_lookup.IndexLookup(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-    gfile.Remove(vocab_file)
-
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-    # Try re-saving the layer. This simulates saving a layer contained at
-    # a hub Module.
-    input_data_2 = keras.Input(shape=(None,), dtype=dtypes.string)
-    output_2 = loaded_model(input_data_2)
-    model_2 = keras.Model(inputs=input_data_2, outputs=output_2)
-    new_output_dataset = model_2.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model_2")
-    model_2.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"IndexLookup": index_lookup.IndexLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = model_2.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-  def test_static_table_config_weight_data_transfer_succeeds(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    vocab_file = self._write_to_temp_file("temp", vocab_data)
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    # Build and validate a golden model.
-    layer_cls = index_lookup.IndexLookup
-    layer = layer_cls(
-        max_tokens=None,
-        num_oov_indices=1,
-        mask_token="",
-        oov_token="[OOV]",
-        dtype=dtypes.string,
-        vocabulary=vocab_file)
-    config = layer.get_config()
-    weights = layer.get_weights()
-
-    layer = layer_cls.from_config(config)
-    layer.set_weights(weights)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    output = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=output)
-
-    new_output_dataset = model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupStringCombinerTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def compare_text_accumulators(self, a, b, msg=None):
-    if a is None or b is None:
-      self.assertAllEqual(a, b, msg=msg)
-
-    self.assertAllEqual(a.count_dict, b.count_dict, msg=msg)
-
-  compare_accumulators = compare_text_accumulators
-
-  def update_accumulator(self, accumulator, data):
-    accumulator.count_dict.update(dict(zip(data["vocab"], data["counts"])))
-
-    return accumulator
-
-  def test_combiner_api_compatibility_int_mode(self):
-    data = np.array([["earth", "wind", "and", "fire"],
-                     ["earth", "wind", "and", "michigan"]])
-    combiner = index_lookup._IndexLookupCombiner()
-    expected_accumulator_output = {
-        "vocab": np.array(["and", "earth", "wind", "fire", "michigan"]),
-        "counts": np.array([2, 2, 2, 1, 1]),
-    }
-    expected_extract_output = {
-        "vocab": np.array(["wind", "earth", "and", "michigan", "fire"]),
-        "idf_weights": None,
-    }
-    expected_accumulator = combiner._create_accumulator()
-    expected_accumulator = self.update_accumulator(expected_accumulator,
-                                                   expected_accumulator_output)
-    self.validate_accumulator_serialize_and_deserialize(combiner, data,
-                                                        expected_accumulator)
-    self.validate_accumulator_uniqueness(combiner, data)
-    self.validate_accumulator_extract(combiner, data, expected_extract_output)
-
-  # TODO(askerryryan): Add tests confirming equivalence to behavior of
-  # existing tf.keras.preprocessing.text.Tokenizer.
-  @parameterized.named_parameters(
-      {
-          "testcase_name":
-              "top_k_smaller_than_full_vocab",
-          "data":
-              np.array([["earth", "wind"], ["fire", "wind"], ["and"],
-                        ["fire", "wind"]]),
-          "vocab_size":
-              3,
-          "expected_accumulator_output": {
-              "vocab": np.array(["wind", "fire", "earth", "and"]),
-              "counts": np.array([3, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array(["wind", "fire", "earth"]),
-              "idf_weights": None,
-          },
-      },
-      {
-          "testcase_name":
-              "top_k_larger_than_full_vocab",
-          "data":
-              np.array([["earth", "wind"], ["fire", "wind"], ["and"],
-                        ["fire", "wind"]]),
-          "vocab_size":
-              10,
-          "expected_accumulator_output": {
-              "vocab": np.array(["wind", "fire", "earth", "and"]),
-              "counts": np.array([3, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array(["wind", "fire", "earth", "and"]),
-              "idf_weights": None,
-          },
-      },
-      {
-          "testcase_name":
-              "no_top_k",
-          "data":
-              np.array([["earth", "wind"], ["fire", "wind"], ["and"],
-                        ["fire", "wind"]]),
-          "vocab_size":
-              None,
-          "expected_accumulator_output": {
-              "vocab": np.array(["wind", "fire", "earth", "and"]),
-              "counts": np.array([3, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array(["wind", "fire", "earth", "and"]),
-              "idf_weights": None,
-          },
-      },
-      {
-          "testcase_name": "single_element_per_row",
-          "data": np.array([["earth"], ["wind"], ["fire"], ["wind"], ["and"]]),
-          "vocab_size": 3,
-          "expected_accumulator_output": {
-              "vocab": np.array(["wind", "and", "earth", "fire"]),
-              "counts": np.array([2, 1, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array(["wind", "fire", "earth"]),
-              "idf_weights": None,
-          },
-      },
-      # Which tokens are retained are based on global frequency, and thus are
-      # sensitive to frequency within a document. In contrast, because idf only
-      # considers the presence of a token in a document, it is insensitive
-      # to the frequency of the token within the document.
-      {
-          "testcase_name":
-              "retained_tokens_sensitive_to_within_document_frequency",
-          "data":
-              np.array([["earth", "earth"], ["wind", "wind"], ["fire", "fire"],
-                        ["wind", "wind"], ["and", "michigan"]]),
-          "vocab_size":
-              3,
-          "expected_accumulator_output": {
-              "vocab": np.array(["wind", "earth", "fire", "and", "michigan"]),
-              "counts": np.array([4, 2, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array(["wind", "fire", "earth"]),
-              "idf_weights": None,
-          },
-      })
-  def test_combiner_computation(self, data, vocab_size,
-                                expected_accumulator_output,
-                                expected_extract_output):
-    combiner = index_lookup._IndexLookupCombiner(vocab_size=vocab_size)
-    expected_accumulator = combiner._create_accumulator()
-    expected_accumulator = self.update_accumulator(expected_accumulator,
-                                                   expected_accumulator_output)
-    self.validate_accumulator_computation(combiner, data, expected_accumulator)
-    self.validate_accumulator_extract(combiner, data, expected_extract_output)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupIntCombinerTest(keras_parameterized.TestCase,
-                                 preprocessing_test_utils.PreprocessingLayerTest
-                                ):
-
-  def compare_text_accumulators(self, a, b, msg=None):
-    if a is None or b is None:
-      self.assertAllEqual(a, b, msg=msg)
-
-    self.assertAllEqual(a.count_dict, b.count_dict, msg=msg)
-
-  compare_accumulators = compare_text_accumulators
-
-  def update_accumulator(self, accumulator, data):
-    accumulator.count_dict.update(dict(zip(data["vocab"], data["counts"])))
-
-    return accumulator
-
-  def test_combiner_api_compatibility_int_mode(self):
-    data = np.array([[42, 1138, 725, 1729], [42, 1138, 725, 203]])
-    combiner = index_lookup._IndexLookupCombiner()
-    expected_accumulator_output = {
-        "vocab": np.array([1138, 725, 42, 1729, 203]),
-        "counts": np.array([2, 2, 2, 1, 1]),
-    }
-    expected_extract_output = {
-        "vocab": np.array([1138, 725, 42, 1729, 203]),
-        "idf_weights": None,
-    }
-    expected_accumulator = combiner._create_accumulator()
-    expected_accumulator = self.update_accumulator(expected_accumulator,
-                                                   expected_accumulator_output)
-    self.validate_accumulator_serialize_and_deserialize(combiner, data,
-                                                        expected_accumulator)
-    self.validate_accumulator_uniqueness(combiner, data)
-    self.validate_accumulator_extract(combiner, data, expected_extract_output)
-
-  # TODO(askerryryan): Add tests confirming equivalence to behavior of
-  # existing tf.keras.preprocessing.text.Tokenizer.
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "top_k_smaller_than_full_vocab",
-          "data": np.array([[42, 1138], [1729, 1138], [725], [1729, 1138]]),
-          "vocab_size": 3,
-          "expected_accumulator_output": {
-              "vocab": np.array([1138, 1729, 725, 42]),
-              "counts": np.array([3, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array([1138, 1729, 725]),
-              "idf_weights": None,
-          },
-      },
-      {
-          "testcase_name": "top_k_larger_than_full_vocab",
-          "data": np.array([[42, 1138], [1729, 1138], [725], [1729, 1138]]),
-          "vocab_size": 10,
-          "expected_accumulator_output": {
-              "vocab": np.array([1138, 1729, 725, 42]),
-              "counts": np.array([3, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array([1138, 1729, 725, 42]),
-              "idf_weights": None,
-          },
-      },
-      {
-          "testcase_name": "no_top_k",
-          "data": np.array([[42, 1138], [1729, 1138], [725], [1729, 1138]]),
-          "vocab_size": None,
-          "expected_accumulator_output": {
-              "vocab": np.array([1138, 1729, 725, 42]),
-              "counts": np.array([3, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array([1138, 1729, 725, 42]),
-              "idf_weights": None,
-          },
-      },
-      {
-          "testcase_name": "single_element_per_row",
-          "data": np.array([[42], [1138], [1729], [1138], [725]]),
-          "vocab_size": 3,
-          "expected_accumulator_output": {
-              "vocab": np.array([1138, 1729, 725, 42]),
-              "counts": np.array([2, 1, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array([1138, 1729, 725]),
-              "idf_weights": None,
-          },
-      },
-      # Which tokens are retained are based on global frequency, and thus are
-      # sensitive to frequency within a document. In contrast, because idf only
-      # considers the presence of a token in a document, it is insensitive
-      # to the frequency of the token within the document.
-      {
-          "testcase_name":
-              "retained_tokens_sensitive_to_within_document_frequency",
-          "data":
-              np.array([[42, 42], [1138, 1138], [1729, 1729], [1138, 1138],
-                        [725, 203]]),
-          "vocab_size":
-              3,
-          "expected_accumulator_output": {
-              "vocab": np.array([1138, 42, 1729, 725, 203]),
-              "counts": np.array([4, 2, 2, 1, 1]),
-          },
-          "expected_extract_output": {
-              "vocab": np.array([1138, 1729, 42]),
-              "idf_weights": None,
-          },
-      })
-  def test_combiner_computation(self, data, vocab_size,
-                                expected_accumulator_output,
-                                expected_extract_output):
-    combiner = index_lookup._IndexLookupCombiner(vocab_size=vocab_size)
-    expected_accumulator = combiner._create_accumulator()
-    expected_accumulator = self.update_accumulator(expected_accumulator,
-                                                   expected_accumulator_output)
-    self.validate_accumulator_computation(combiner, data, expected_accumulator)
-    self.validate_accumulator_extract(combiner, data, expected_extract_output)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup.py
deleted file mode 100644
index d6ce3b7..0000000
--- a/tensorflow/python/keras/layers/preprocessing/integer_lookup.py
+++ /dev/null
@@ -1,358 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras string lookup preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.keras.layers.preprocessing import index_lookup
-from tensorflow.python.keras.layers.preprocessing import table_utils
-from tensorflow.python.platform import tf_logging as logging
-from tensorflow.python.util.tf_export import keras_export
-
-
-@keras_export("keras.layers.experimental.preprocessing.IntegerLookup", v1=[])
-class IntegerLookup(index_lookup.IndexLookup):
-  """Reindex integer inputs to be in a contiguous range, via a dict lookup.
-
-  This layer maps a set of arbitrary integer input tokens into indexed
-  integer output via a table-based vocabulary lookup. The layer's output indices
-  will be contiguously arranged up to the maximum vocab size, even if the input
-  tokens are non-continguous or unbounded. The layer supports multiple options
-  for encoding the output via `output_mode`, and has optional support for
-  out-of-vocabulary (OOV) tokens and masking.
-
-  The vocabulary for the layer can be supplied on construction or learned via
-  `adapt()`. During `adapt()`, the layer will analyze a data set, determine the
-  frequency of individual integer tokens, and create a vocabulary from them. If
-  the vocabulary is capped in size, the most frequent tokens will be used to
-  create the vocabulary and all others will be treated as OOV.
-
-  There are two possible output modes for the layer.
-  When `output_mode` is `"int"`,
-  input integers are converted to their index in the vocabulary (an integer).
-  When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input integers
-  are encoded into an array where each dimension corresponds to an element in
-  the vocabulary.
-
-  The vocabulary can optionally contain a mask token as well as an OOV token
-  (which can optionally occupy multiple indices in the vocabulary, as set
-  by `num_oov_indices`).
-  The position of these tokens in the vocabulary is fixed. When `output_mode` is
-  `"int"`, the vocabulary will begin with the mask token at index 0, followed by
-  OOV indices, followed by the rest of the vocabulary. When `output_mode` is
-  `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with OOV
-  indices and instances of the mask token will be dropped.
-
-  Args:
-    max_tokens: The maximum size of the vocabulary for this layer. If None,
-      there is no cap on the size of the vocabulary. Note that this size
-      includes the OOV and mask tokens. Default to None.
-    num_oov_indices: The number of out-of-vocabulary tokens to use. If this
-      value is more than 1, OOV inputs are modulated to determine their OOV
-      value. If this value is 0, OOV inputs will cause an error when calling the
-      layer. Defaults to 1.
-    mask_token: An integer token that represents masked inputs. When
-      `output_mode` is `"int"`, the token is included in vocabulary and mapped
-      to index 0. In other output modes, the token will not appear in the
-      vocabulary and instances of the mask token in the input will be dropped.
-      If set to None, no mask term will be added. Defaults to None.
-    oov_token: Only used when `invert` is True. The token to return for OOV
-      indices. Defaults to -1.
-    vocabulary: An optional list of integer tokens, or a path to a text file
-      containing a vocabulary to load into this layer. The file should contain
-      one integer token per line. If the list or file contains the same token
-      multiple times, an error will be thrown.
-    invert: Only valid when `output_mode` is `"int"`. If True, this layer will
-      map indices to vocabulary items instead of mapping vocabulary items to
-      indices. Default to False.
-    output_mode: Specification for the output of the layer. Defaults to `"int"`.
-      Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or
-      `"tf_idf"` configuring the layer as follows:
-        - `"int"`: Return the vocabulary indices of the input tokens.
-        - `"one_hot"`: Encodes each individual element in the input into an
-          array the same size as the vocabulary, containing a 1 at the element
-          index. If the last dimension is size 1, will encode on that dimension.
-          If the last dimension is not size 1, will append a new dimension for
-          the encoded output.
-        - `"multi_hot"`: Encodes each sample in the input into a single array
-          the same size as the vocabulary, containing a 1 for each vocabulary
-          term present in the sample. Treats the last dimension as the sample
-          dimension, if input shape is (..., sample_length), output shape will
-          be (..., num_tokens).
-        - `"count"`: As `"multi_hot"`, but the int array contains a count of the
-          number of times the token at that index appeared in the sample.
-        - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to
-          find the value in each token slot.
-    pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`,
-      `"count"`, or `"tf_idf"`. If True, the output will have its feature axis
-      padded to `max_tokens` even if the number of unique tokens in the
-      vocabulary is less than max_tokens, resulting in a tensor of shape
-      [batch_size, max_tokens] regardless of vocabulary size. Defaults to False.
-    sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`,
-      `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a
-      dense `Tensor`. Defaults to False.
-
-  Examples:
-
-  **Creating a lookup layer with a known vocabulary**
-
-  This example creates a lookup layer with a pre-existing vocabulary.
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])  # Note OOV tokens
-  >>> layer = IntegerLookup(vocabulary=vocab)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[1, 3, 4],
-         [4, 0, 2]])>
-
-  **Creating a lookup layer with an adapted vocabulary**
-
-  This example creates a lookup layer and generates the vocabulary by analyzing
-  the dataset.
-
-  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
-  >>> layer = IntegerLookup()
-  >>> layer.adapt(data)
-  >>> layer.get_vocabulary()
-  [-1, 42, 1138, 1000, 36, 12]
-
-  Note that the OOV token -1 have been added to the vocabulary. The remaining
-  tokens are sorted by frequency (42, which has 2 occurrences, is first) then
-  by inverse sort order.
-
-  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
-  >>> layer = IntegerLookup()
-  >>> layer.adapt(data)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[5, 2, 1],
-         [1, 3, 4]])>
-
-
-  **Lookups with multiple OOV indices**
-
-  This example demonstrates how to use a lookup layer with multiple OOV indices.
-  When a layer is created with more than one OOV index, any OOV tokens are
-  hashed into the number of OOV buckets, distributing OOV tokens in a
-  deterministic fashion across the set.
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([[12, 1138, 42], [37, 1000, 36]])
-  >>> layer = IntegerLookup(vocabulary=vocab, num_oov_indices=2)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[2, 4, 5],
-         [1, 0, 3]])>
-
-  Note that the output for OOV token 37 is 1, while the output for OOV token
-  1000 is 0. The in-vocab terms have their output index increased by 1 from
-  earlier examples (12 maps to 2, etc) in order to make space for the extra OOV
-  token.
-
-  **One-hot output**
-
-  Configure the layer with `output_mode='one_hot'`. Note that the first
-  `num_oov_indices` dimensions in the ont_hot encoding represent OOV values.
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([12, 36, 1138, 42, 7]) # Note OOV tokens
-  >>> layer = IntegerLookup(vocabulary=vocab, output_mode='one_hot')
-  >>> layer(data)
-  <tf.Tensor: shape=(5, 5), dtype=float32, numpy=
-    array([[0., 1., 0., 0., 0.],
-           [0., 0., 1., 0., 0.],
-           [0., 0., 0., 1., 0.],
-           [0., 0., 0., 0., 1.],
-           [1., 0., 0., 0., 0.]], dtype=float32)>
-
-  **Multi-hot output**
-
-  Configure the layer with `output_mode='multi_hot'`. Note that the first
-  `num_oov_indices` dimensions in the multi_hot encoding represent OOV tokens
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens
-  >>> layer = IntegerLookup(vocabulary=vocab, output_mode='multi_hot')
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0., 1., 0., 1., 1.],
-           [1., 0., 1., 0., 1.]], dtype=float32)>
-
-  **Token count output**
-
-  Configure the layer with `output_mode='count'`. As with multi_hot output, the
-  first `num_oov_indices` dimensions in the output represent OOV tokens.
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens
-  >>> layer = IntegerLookup(vocabulary=vocab, output_mode='count')
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0., 1., 0., 1., 2.],
-           [2., 0., 1., 0., 1.]], dtype=float32)>
-
-  **TF-IDF output**
-
-  Configure the layer with `output_mode='tf_idf'`. As with multi_hot output, the
-  first `num_oov_indices` dimensions in the output represent OOV tokens.
-
-  Each token bin will output `token_count * idf_weight`, where the idf weights
-  are the inverse document frequency weights per token. These should be provided
-  along with the vocabulary. Note that the `idf_weight` for OOV tokens will
-  default to the average of all idf weights passed in.
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> idf_weights = [0.25, 0.75, 0.6, 0.4]
-  >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens
-  >>> layer = IntegerLookup(output_mode='tf_idf')
-  >>> layer.set_vocabulary(vocab, idf_weights=idf_weights)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0.  , 0.25, 0.  , 0.6 , 0.8 ],
-           [1.0 , 0.  , 0.75, 0.  , 0.4 ]], dtype=float32)>
-
-  To specify the idf weights for oov tokens, you will need to pass the entire
-  vocabularly including the leading oov token.
-
-  >>> vocab = [-1, 12, 36, 1138, 42]
-  >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4]
-  >>> data = tf.constant([[12, 1138, 42, 42], [42, 7, 36, 7]]) # Note OOV tokens
-  >>> layer = IntegerLookup(output_mode='tf_idf')
-  >>> layer.set_vocabulary(vocab, idf_weights=idf_weights)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0.  , 0.25, 0.  , 0.6 , 0.8 ],
-           [1.8 , 0.  , 0.75, 0.  , 0.4 ]], dtype=float32)>
-
-  When adapting the layer in tf_idf mode, each input sample will be considered a
-  document, and idf weight per token will be calculated as
-  `log(1 + num_documents / (1 + token_document_count))`.
-
-  **Inverse lookup**
-
-  This example demonstrates how to map indices to tokens using this layer. (You
-  can also use adapt() with inverse=True, but for simplicity we'll pass the
-  vocab in this example.)
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([[1, 3, 4], [4, 0, 2]])
-  >>> layer = IntegerLookup(vocabulary=vocab, invert=True)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[  12, 1138,   42],
-         [  42,   -1,   36]])>
-
-  Note that the first index correspond to the oov token by default.
-
-
-  **Forward and inverse lookup pairs**
-
-  This example demonstrates how to use the vocabulary of a standard lookup
-  layer to create an inverse lookup layer.
-
-  >>> vocab = [12, 36, 1138, 42]
-  >>> data = tf.constant([[12, 1138, 42], [42, 1000, 36]])
-  >>> layer = IntegerLookup(vocabulary=vocab)
-  >>> i_layer = IntegerLookup(vocabulary=layer.get_vocabulary(), invert=True)
-  >>> int_data = layer(data)
-  >>> i_layer(int_data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[  12, 1138,   42],
-         [  42,   -1,   36]])>
-
-  In this example, the input token 1000 resulted in an output of -1, since
-  1000 was not in the vocabulary - it got represented as an OOV, and all OOV
-  tokens are returned as -1 in the inverse layer. Also, note that for the
-  inverse to work, you must have already set the forward layer vocabulary
-  either directly or via `adapt()` before calling `get_vocabulary()`.
-  """
-
-  def __init__(self,
-               max_tokens=None,
-               num_oov_indices=1,
-               mask_token=None,
-               oov_token=-1,
-               vocabulary=None,
-               invert=False,
-               output_mode=index_lookup.INT,
-               sparse=False,
-               pad_to_max_tokens=False,
-               **kwargs):
-    allowed_dtypes = [dtypes.int64]
-
-    # Support deprecated args for this layer.
-    if "max_values" in kwargs:
-      logging.log_first_n(logging.WARN,
-                          "max_values is deprecated, use max_tokens instead.",
-                          1)
-      max_tokens = kwargs["max_values"]
-      del kwargs["max_values"]
-    if "mask_value" in kwargs:
-      logging.log_first_n(logging.WARN,
-                          "mask_value is deprecated, use mask_token instead.",
-                          1)
-      mask_token = kwargs["mask_value"]
-      del kwargs["mask_value"]
-    if "oov_value" in kwargs:
-      logging.log_first_n(logging.WARN,
-                          "oov_value is deprecated, use oov_token instead.", 1)
-      oov_token = kwargs["oov_value"]
-      del kwargs["oov_value"]
-
-    if "dtype" in kwargs and kwargs["dtype"] not in allowed_dtypes:
-      raise ValueError("The value of the dtype argument for IntegerLookup may "
-                       "only be one of %s." % (allowed_dtypes,))
-
-    if "dtype" not in kwargs:
-      kwargs["dtype"] = dtypes.int64
-
-    # If max_tokens is set, the token must be greater than 1 - otherwise we
-    # are creating a 0-element vocab, which doesn't make sense.
-    if max_tokens is not None and max_tokens <= 1:
-      raise ValueError("If set, max_tokens must be greater than 1. "
-                       "You passed %s" % (max_tokens,))
-
-    if num_oov_indices < 0:
-      raise ValueError(
-          "num_oov_indices must be greater than or equal to 0. You passed %s" %
-          (num_oov_indices,))
-
-    super(IntegerLookup, self).__init__(
-        max_tokens=max_tokens,
-        num_oov_indices=num_oov_indices,
-        mask_token=mask_token,
-        oov_token=oov_token,
-        vocabulary=vocabulary,
-        invert=invert,
-        output_mode=output_mode,
-        sparse=sparse,
-        pad_to_max_tokens=pad_to_max_tokens,
-        **kwargs)
-
-  def set_vocabulary(self, vocabulary, idf_weights=None):
-    if isinstance(vocabulary, str):
-      if self.output_mode == index_lookup.TF_IDF:
-        raise RuntimeError(
-            "Setting vocabulary directly from a file is not "
-            "supported in TF-IDF mode, since this layer cannot "
-            "read files containing TF-IDF weight data. Please "
-            "read the file using Python and set the vocabulary "
-            "and weights by passing lists or arrays to the "
-            "set_vocabulary function's `vocabulary` and `idf_weights` "
-            "args.")
-      vocabulary = table_utils.get_vocabulary_from_file(vocabulary)
-      vocabulary = [int(v) for v in vocabulary]
-    super().set_vocabulary(vocabulary, idf_weights=idf_weights)
diff --git a/tensorflow/python/keras/layers/preprocessing/integer_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/integer_lookup_test.py
deleted file mode 100644
index d659cd9..0000000
--- a/tensorflow/python/keras/layers/preprocessing/integer_lookup_test.py
+++ /dev/null
@@ -1,632 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras text vectorization preprocessing layer."""
-
-import gc
-import itertools
-import os
-import random
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python import tf2
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers.preprocessing import integer_lookup
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-
-
-def _get_end_to_end_test_cases():
-  test_cases = (
-      {
-          "testcase_name":
-              "test_ints_soft_vocab_cap",
-          # Create an array where 1138 is the most frequent term, followed by
-          # 1729, then 725, then 42. This ensures that the vocab accumulator
-          # is sorting by frequency.
-          "vocab_data":
-              np.array([[42], [1138], [1138], [1138], [1138], [1729], [1729],
-                        [1729], [725], [725]],
-                       dtype=np.int64),
-          "input_data":
-              np.array([[1138], [1729], [725], [42], [42], [725], [1138], [4]],
-                       dtype=np.int64),
-          "kwargs": {
-              "max_tokens": None,
-              "dtype": dtypes.int64,
-          },
-          "expected_output": [[1], [2], [3], [4], [4], [3], [1], [0]],
-          "input_dtype":
-              dtypes.int64
-      },)
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IntegerLookupLayerTest(keras_parameterized.TestCase,
-                             preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters(*_get_end_to_end_test_cases())
-  def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs,
-                                       use_dataset, expected_output,
-                                       input_dtype):
-    cls = integer_lookup.IntegerLookup
-    expected_output_dtype = dtypes.int64
-    input_shape = input_data.shape
-
-    if use_dataset:
-      # Keras APIs expect batched datasets.
-      # TODO(rachelim): `model.predict` predicts the result on each
-      # dataset batch separately, then tries to concatenate the results
-      # together. When the results have different shapes on the non-concat
-      # axis (which can happen in the output_mode = INT case for
-      # IntegerLookup), the concatenation fails. In real use cases, this may
-      # not be an issue because users are likely to pipe the preprocessing layer
-      # into other keras layers instead of predicting it directly. A workaround
-      # for these unit tests is to have the dataset only contain one batch, so
-      # no concatenation needs to happen with the result. For consistency with
-      # numpy input, we should make `predict` join differently shaped results
-      # together sensibly, with 0 padding.
-      input_data = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
-          input_shape[0])
-      vocab_data = dataset_ops.Dataset.from_tensor_slices(vocab_data).batch(
-          input_shape[0])
-
-    with CustomObjectScope({"IntegerLookup": cls}):
-      output_data = testing_utils.layer_test(
-          cls,
-          kwargs=kwargs,
-          input_shape=input_shape,
-          input_data=input_data,
-          input_dtype=input_dtype,
-          expected_output_dtype=expected_output_dtype,
-          validate_training=False,
-          adapt_data=vocab_data)
-    self.assertAllClose(expected_output, output_data)
-
-  def test_layer_with_list_input(self):
-    vocab = [12, 36, 1138, 42]
-    data = [[12, 1138, 42], [42, 1000, 36]]  # Note OOV tokens
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab)
-    output = layer(data)
-    expected_output = np.array([[1, 3, 4], [4, 0, 2]])
-    self.assertEqual(output.numpy().tolist(), expected_output.tolist())
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingInputTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 32], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [4, 0]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = integer_lookup.IntegerLookup(max_tokens=None)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
-                                              dtype=np.int64)
-    expected_output = [[1, 2, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = integer_lookup.IntegerLookup(max_tokens=None)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingMultiOOVTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 133], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [6, 2]
-    expected_dense_shape = [3, 4]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, sparse=True)
-    layer = integer_lookup.IntegerLookup(
-        max_tokens=None,
-        dtype=dtypes.int64,
-        num_oov_indices=2,
-        mask_token=0,
-        oov_token=-1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array, steps=1)
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 133]],
-                                              dtype=np.int64)
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64, ragged=True)
-    layer = integer_lookup.IntegerLookup(max_tokens=None, num_oov_indices=2)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingAdaptTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_adapt(self):
-    vocab_data = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [0, 1], [1, 2]],
-        values=[203, 1729, 203],
-        dense_shape=[3, 4])
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-
-    layer = integer_lookup.IntegerLookup()
-    layer.adapt(vocab_dataset)
-    expected_vocabulary = [-1, 203, 1729]
-    self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
-
-  def test_ragged_adapt(self):
-    vocab_data = ragged_factory_ops.constant([[203], [1729, 203]])
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-
-    layer = integer_lookup.IntegerLookup()
-    layer.adapt(vocab_dataset)
-    expected_vocabulary = [-1, 203, 1729]
-    self.assertAllEqual(expected_vocabulary, layer.get_vocabulary())
-
-  def test_single_int_generator_dataset(self):
-
-    def word_gen():
-      for _ in itertools.count(1):
-        yield random.randint(0, 100)
-
-    ds = dataset_ops.Dataset.from_generator(word_gen, dtypes.int64,
-                                            tensor_shape.TensorShape([]))
-    batched_ds = ds.take(2)
-    input_t = keras.Input(shape=(), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        max_tokens=10, num_oov_indices=0, mask_token=None, oov_token=None)
-    _ = layer(input_t)
-    layer.adapt(batched_ds)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IntegerLookupOutputTest(keras_parameterized.TestCase,
-                              preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_int_output(self):
-    vocab_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup()
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_output_shape(self):
-    input_data = keras.Input(shape=(4,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(max_tokens=2, num_oov_indices=1)
-    int_data = layer(input_data)
-    self.assertAllEqual(int_data.shape[1:], input_data.shape[1:])
-
-  def test_int_output_with_mask(self):
-    vocab_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(max_tokens=None, mask_token=0)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_explicit_vocab(self):
-    vocab_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-    )
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_explicit_vocab_with_special_tokens(self):
-    vocab_data = [0, -1, 42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data,
-        max_tokens=None,
-        mask_token=0,
-    )
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_no_oov(self):
-    vocab_data = [42, 1138, 725, 1729]
-    valid_input = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 0]])
-    invalid_input = np.array([[42, 1138, 725, 203], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data, mask_token=0, num_oov_indices=0)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(valid_input)
-    self.assertAllEqual(expected_output, output_data)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "found OOV values.*203"):
-      _ = model.predict(invalid_input)
-
-  def test_inverse_output(self):
-    vocab_data = [-1, 42, 1138, 725, 1729]
-    input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]])
-    expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(invert=True)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_forward_backward_explicit_vocab(self):
-    vocab_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab_data)
-    inverse_layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data, invert=True)
-    int_data = layer(input_data)
-    inverse_data = inverse_layer(int_data)
-    model = keras.Model(inputs=input_data, outputs=inverse_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_forward_backward_adapted_vocab(self):
-    adapt_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = np.array([[42, 1138, 725, 1729], [1729, 725, 42, -1]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup()
-    layer.adapt(adapt_data)
-    inverse_layer = integer_lookup.IntegerLookup(
-        vocabulary=layer.get_vocabulary(), invert=True)
-    int_data = layer(input_data)
-    inverse_data = inverse_layer(int_data)
-    model = keras.Model(inputs=input_data, outputs=inverse_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IntegerLookupVocabularyTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(str(vocab) + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
-  def test_int_output_explicit_vocab(self):
-    vocab_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_no_vocab(self):
-    with self.assertRaisesRegex(ValueError,
-                                "You must set the layer's vocabulary"):
-      layer = integer_lookup.IntegerLookup()
-      layer([[1]])
-
-  def test_one_hot_output(self):
-    vocab_data = [2, 3, 4, 5]
-    input_array = np.array([2, 3, 4, 5, 6])
-    expected_output = [
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-        [1, 0, 0, 0, 0],
-    ]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data, output_mode="one_hot")
-    res = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=res)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_multi_hot_output(self):
-    vocab_data = [2, 3, 4, 5]
-    input_array = np.array([[2, 2, 3, 4], [0, 1, 5, 2]])
-    expected_output = [[0, 1, 1, 1, 0], [1, 1, 0, 0, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data, output_mode="multi_hot")
-    res = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=res)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_count_output(self):
-    vocab_data = [2, 3, 4, 5]
-    input_array = np.array([[2, 2, 3, 4], [0, 1, 5, 6]])
-    expected_output = [[0, 2, 1, 1, 0], [3, 0, 0, 0, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data, output_mode="count")
-    res = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=res)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_sparse_output(self):
-    vocab_data = [2, 3, 4, 5]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_data, output_mode="multi_hot", sparse=True)
-    res = layer(input_data)
-    self.assertTrue(res.__class__.__name__, "SparseKerasTensor")
-
-  def test_get_vocab_returns_int(self):
-    vocab_data = [42, 1138, 725, 1729]
-    expected_vocab = [-1, 42, 1138, 725, 1729]
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab_data)
-    layer_vocab = layer.get_vocabulary()
-    self.assertAllEqual(expected_vocab, layer_vocab)
-    self.assertIsInstance(layer_vocab[0], np.int64)
-
-  def test_int_output_explicit_vocab_from_file(self):
-    vocab_list = [42, 1138, 725, 1729]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
-
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_inverted_vocab_from_file(self):
-    vocab_list = [42, 1138, 725, 1729]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
-
-    input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]])
-    expected_output = [[42, 1138, 725, 1729], [1729, 725, 42, -1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab_path, invert=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_inverted_vocab_from_file_with_mask(self):
-    vocab_list = [42, 1138, 725, 1729]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
-
-    input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]])
-    expected_output = [[42, 1138, 725, 1729], [1729, 725, 42, -10]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(
-        vocabulary=vocab_path, invert=True, mask_value=-10)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_explicit_vocab_from_file_via_setter(self):
-    vocab_list = [42, 1138, 725, 1729]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
-
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup()
-    layer.set_vocabulary(vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_non_unique_vocab_fails(self):
-    vocab_data = [42, 1138, 725, 1729, 1729]
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*1729.*"):
-      _ = integer_lookup.IntegerLookup(vocabulary=vocab_data)
-
-  def test_non_unique_vocab_from_file_fails(self):
-    vocab_list = [42, 1138, 725, 1729, 42]
-    vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list)
-    with self.assertRaisesRegex(
-        errors_impl.FailedPreconditionError,
-        ".*HashTable has different value for same key.*42.*"):
-      _ = integer_lookup.IntegerLookup(vocabulary=vocab_path)
-
-  def test_tensor_vocab(self):
-    vocab_data = [-1, 42, 1138, 725, 1729]
-    vocab_tensor = constant_op.constant(vocab_data, dtypes.int64)
-    layer = integer_lookup.IntegerLookup(vocabulary=vocab_tensor)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-    self.assertAllEqual(layer.vocabulary_size(), 5)
-    fn = def_function.function(lambda: layer.set_vocabulary(vocab_tensor))
-    with self.assertRaisesRegex(RuntimeError, "Cannot set a tensor vocabulary"):
-      fn()
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IntegerLookupErrorTest(keras_parameterized.TestCase,
-                             preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_too_long_vocab_fails_in_single_setting(self):
-    vocab_data = [42, 1138, 725, 1729]
-
-    layer = integer_lookup.IntegerLookup(max_tokens=4, num_oov_indices=1)
-    with self.assertRaisesRegex(ValueError,
-                                "vocabulary larger than the maximum vocab.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_zero_max_tokens_fails(self):
-    with self.assertRaisesRegex(ValueError, ".*max_tokens.*"):
-      _ = integer_lookup.IntegerLookup(max_tokens=0, num_oov_indices=1)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IntegerLookupSavingTest(keras_parameterized.TestCase,
-                              preprocessing_test_utils.PreprocessingLayerTest):
-
-  def tearDown(self):
-    keras.backend.clear_session()
-    gc.collect()
-    super(IntegerLookupSavingTest, self).tearDown()
-
-  def test_vocabulary_persistence_across_saving(self):
-    vocab_data = [42, 1138, 725, 1729]
-    input_array = np.array([[42, 1138, 725, 1729], [1729, 725, 42, 203]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = integer_lookup.IntegerLookup(max_tokens=None, num_oov_indices=1)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = keras.models.load_model(
-        output_path,
-        custom_objects={"IntegerLookup": integer_lookup.IntegerLookup})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllEqual(new_output_dataset, expected_output)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization.py b/tensorflow/python/keras/layers/preprocessing/normalization.py
deleted file mode 100644
index a83742f..0000000
--- a/tensorflow/python/keras/layers/preprocessing/normalization.py
+++ /dev/null
@@ -1,302 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Normalization preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_impl
-from tensorflow.python.ops import variables
-from tensorflow.python.util.tf_export import keras_export
-
-
-@keras_export('keras.layers.experimental.preprocessing.Normalization')
-class Normalization(base_preprocessing_layer.PreprocessingLayer):
-  """Feature-wise normalization of the data.
-
-  This layer will coerce its inputs into a distribution centered around
-  0 with standard deviation 1. It accomplishes this by precomputing the mean and
-  variance of the data, and calling (input-mean)/sqrt(var) at runtime.
-
-  What happens in `adapt`: Compute mean and variance of the data and store them
-    as the layer's weights. `adapt` should be called before `fit`, `evaluate`,
-    or `predict`.
-
-  Args:
-      axis: Integer or tuple of integers, the axis or axes that should be
-        "kept". These axes are not be summed over when calculating the
-        normalization statistics. By default the last axis, the `features` axis
-        is kept and any `space` or `time` axes are summed. Each element in the
-        the axes that are kept is normalized independently. If `axis` is set to
-        'None', the layer will perform scalar normalization (dividing the input
-        by a single scalar value). The `batch` axis, 0, is always summed over
-        (`axis=0` is not allowed).
-      mean: The mean value(s) to use during normalization. The passed value(s)
-        will be broadcast to the shape of the kept axes above; if the value(s)
-        cannot be broadcast, an error will be raised when this layer's build()
-        method is called.
-      variance: The variance value(s) to use during normalization. The passed
-        value(s) will be broadcast to the shape of the kept axes above; if the
-        value(s) cannot be broadcast, an error will be raised when this layer's
-        build() method is called.
-
-  Examples:
-
-  Calculate the mean and variance by analyzing the dataset in `adapt`.
-
-  >>> adapt_data = np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32)
-  >>> input_data = np.array([[1.], [2.], [3.]], np.float32)
-  >>> layer = Normalization()
-  >>> layer.adapt(adapt_data)
-  >>> layer(input_data)
-  <tf.Tensor: shape=(3, 1), dtype=float32, numpy=
-  array([[-1.4142135 ],
-         [-0.70710677],
-         [ 0.        ]], dtype=float32)>
-
-  Pass the mean and variance directly.
-
-  >>> input_data = np.array([[1.], [2.], [3.]], np.float32)
-  >>> layer = Normalization(mean=3., variance=2.)
-  >>> layer(input_data)
-  <tf.Tensor: shape=(3, 1), dtype=float32, numpy=
-  array([[-1.4142135 ],
-         [-0.70710677],
-         [ 0.        ]], dtype=float32)>
-  """
-
-  def __init__(self, axis=-1, mean=None, variance=None, **kwargs):
-    super().__init__(streaming=True, **kwargs)
-
-    # Standardize `axis` to a tuple.
-    if axis is None:
-      axis = ()
-    elif isinstance(axis, int):
-      axis = (axis,)
-    else:
-      axis = tuple(axis)
-    if 0 in axis:
-      raise ValueError('The argument \'axis\' may not be 0.')
-    self.axis = axis
-
-    # Set `mean` and `variance` if passed.
-    if isinstance(mean, variables.Variable):
-      raise ValueError('Normalization does not support passing a Variable '
-                       'for the `mean` init arg.')
-    if isinstance(variance, variables.Variable):
-      raise ValueError('Normalization does not support passing a Variable '
-                       'for the `variance` init arg.')
-    if (mean is not None) != (variance is not None):
-      raise ValueError(
-          'When setting values directly, both `mean` and `variance` '
-          'must be set. Got mean: {} and variance: {}'.format(mean, variance))
-    self.input_mean = mean
-    self.input_variance = variance
-
-  def build(self, input_shape):
-    super().build(input_shape)
-
-    input_shape = tensor_shape.TensorShape(input_shape).as_list()
-    if len(input_shape) == 1:
-      input_shape = input_shape + [1]
-    ndim = len(input_shape)
-
-    if any(a < 1 - ndim or a >= ndim for a in self.axis):
-      raise ValueError('All `axis` values must be in the range '
-                       '[1 - ndim, ndim - 1]. Found '
-                       'ndim: `{}`, axis: {}'.format(ndim, self.axis))
-
-    # Axes to be kept, replacing negative values with positive equivalents.
-    # Sorted to avoid transposing axes.
-    self._keep_axis = sorted([d if d >= 0 else d + ndim for d in self.axis])
-    # Axes to be reduced.
-    self._reduce_axis = [d for d in range(ndim) if d not in self._keep_axis]
-    # 1 if an axis should be reduced, 0 otherwise.
-    self._reduce_axis_mask = [
-        0 if d in self._keep_axis else 1 for d in range(ndim)
-    ]
-    # Broadcast any reduced axes.
-    self._broadcast_shape = [
-        input_shape[d] if d in self._keep_axis else 1 for d in range(ndim)
-    ]
-    mean_and_var_shape = tuple(input_shape[d] for d in self._keep_axis)
-
-    if self.input_mean is None:
-      self.adapt_mean = self.add_weight(
-          name='mean',
-          shape=mean_and_var_shape,
-          dtype=self.dtype,
-          initializer=init_ops.zeros_initializer,
-          trainable=False)
-      self.adapt_variance = self.add_weight(
-          name='variance',
-          shape=mean_and_var_shape,
-          dtype=self.dtype,
-          initializer=init_ops.ones_initializer,
-          trainable=False)
-      self.count = self.add_weight(
-          name='count',
-          shape=(),
-          dtype=dtypes.int64,
-          initializer=init_ops.zeros_initializer,
-          trainable=False)
-      self.finalize_state()
-    else:
-      # In the no adapt case, make constant tensors for mean and variance with
-      # proper broadcast shape for use during call.
-      mean = self.input_mean * np.ones(mean_and_var_shape)
-      variance = self.input_variance * np.ones(mean_and_var_shape)
-      mean = array_ops.reshape(mean, self._broadcast_shape)
-      variance = array_ops.reshape(variance, self._broadcast_shape)
-      self.mean = math_ops.cast(mean, self.compute_dtype)
-      self.variance = math_ops.cast(variance, self.compute_dtype)
-
-  def update_state(self, data):
-    if self.input_mean is not None:
-      raise ValueError(
-          'Cannot `adapt` a Normalization layer that is initialized with '
-          'static `mean` and `variance`, you passed mean {} and variance {}.'
-          .format(self.input_mean, self.input_variance))
-
-    if not self.built:
-      raise RuntimeError('`build` must be called before `update_state`.')
-
-    data = self._standardize_inputs(data)
-    data = math_ops.cast(data, self.adapt_mean.dtype)
-    batch_mean, batch_variance = nn_impl.moments_v2(
-        data, axes=self._reduce_axis)
-    batch_shape = array_ops.shape(data, out_type=self.count.dtype)
-    batch_reduce_shape = array_ops.gather(batch_shape, self._reduce_axis)
-    batch_count = math_ops.reduce_prod(batch_reduce_shape)
-
-    total_count = batch_count + self.count
-    batch_weight = (
-        math_ops.cast(batch_count, dtype=self.dtype) /
-        math_ops.cast(total_count, dtype=self.dtype))
-    existing_weight = 1. - batch_weight
-
-    total_mean = self.adapt_mean * existing_weight + batch_mean * batch_weight
-    # The variance is computed using the lack-of-fit sum of squares
-    # formula (see https://en.wikipedia.org/wiki/Lack-of-fit_sum_of_squares).
-    total_variance = ((self.adapt_variance +
-                       (self.adapt_mean - total_mean)**2) * existing_weight +
-                      (batch_variance +
-                       (batch_mean - total_mean)**2) * batch_weight)
-    self.adapt_mean.assign(total_mean)
-    self.adapt_variance.assign(total_variance)
-    self.count.assign(total_count)
-
-  def merge_state(self, layers):
-    layers = layers + [self]
-    for l in layers:
-      if l.input_mean is not None:
-        raise ValueError(
-            'Cannot merge Normalization layer {} that has initialized with '
-            '`mean` and `variance`, you passed `mean={}` and `variance={}`.'
-            .format(l.name, l.input_mean, l.input_variance))
-      if not l.built:
-        raise ValueError(
-            'Cannot merge Normalization layer {}, it has no state. You need to '
-            'call `adapt` on this layer before merging.'.format(l.name))
-
-    layer_counts = [l.count for l in layers]
-    layer_means = [l.adapt_mean for l in layers]
-    layer_variances = [l.adapt_variance for l in layers]
-
-    total_count = math_ops.reduce_sum(layer_counts)
-    layer_weightings = (
-        math_ops.cast(layer_counts, self.dtype) /
-        math_ops.cast(total_count, self.dtype))
-    layer_weightings = array_ops.reshape(
-        layer_weightings,
-        shape=[len(layers)] + [1] * self.adapt_mean.shape.rank)
-
-    total_mean = math_ops.reduce_sum(layer_means * layer_weightings, axis=0)
-    inter_layer_variances = (layer_means - total_mean)**2
-    total_variance = math_ops.reduce_sum(
-        ((layer_variances + inter_layer_variances) * layer_weightings), axis=0)
-
-    self.adapt_mean.assign(total_mean)
-    self.adapt_variance.assign(total_variance)
-    self.count.assign(total_count)
-    self.finalize_state()
-
-  def reset_state(self):  # pylint: disable=method-hidden
-    if self.input_mean is not None or not self.built:
-      return
-
-    self.adapt_mean.assign(array_ops.zeros_like(self.adapt_mean))
-    self.adapt_variance.assign(array_ops.ones_like(self.adapt_variance))
-    self.count.assign(array_ops.zeros_like(self.count))
-
-  def finalize_state(self):
-    if self.input_mean is not None or not self.built:
-      return
-
-    # In the adapt case, we make constant tensors for mean and variance with
-    # proper broadcast shape and dtype each time `finalize_state` is called.
-    self.mean = array_ops.reshape(self.adapt_mean, self._broadcast_shape)
-    self.mean = math_ops.cast(self.mean, self.compute_dtype)
-    self.variance = array_ops.reshape(self.adapt_variance,
-                                      self._broadcast_shape)
-    self.variance = math_ops.cast(self.variance, self.compute_dtype)
-
-  def call(self, inputs):
-    inputs = self._standardize_inputs(inputs)
-    # The base layer automatically casts floating-point inputs, but we
-    # explicitly cast here to also allow integer inputs to be passed
-    inputs = math_ops.cast(inputs, self.compute_dtype)
-    return ((inputs - self.mean) /
-            math_ops.maximum(math_ops.sqrt(self.variance), backend.epsilon()))
-
-  def compute_output_shape(self, input_shape):
-    return input_shape
-
-  def compute_output_signature(self, input_spec):
-    return input_spec
-
-  def get_config(self):
-    config = super().get_config()
-    config.update({
-        'axis': self.axis,
-        'mean': self._convert_to_list(self.input_mean),
-        'variance': self._convert_to_list(self.input_variance),
-    })
-    return config
-
-  def _standardize_inputs(self, inputs):
-    inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)
-    if inputs.shape.rank == 0:
-      inputs = array_ops.reshape(inputs, [1, 1])
-    elif inputs.shape.rank == 1:
-      inputs = array_ops.expand_dims(inputs, 1)
-    return inputs
-
-  def _convert_to_list(self, inputs):
-    if tensor_util.is_tensor(inputs):
-      inputs = inputs.numpy()
-    if isinstance(inputs, (np.ndarray)):
-      inputs = inputs.tolist()
-      inputs = list(inputs)
-    return inputs
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/normalization_distribution_test.py
deleted file mode 100644
index 6050e9e6..0000000
--- a/tensorflow/python/keras/layers/preprocessing/normalization_distribution_test.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.normalization."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.distribute import multi_process_runner
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import strategy_combinations
-from tensorflow.python.keras.layers.preprocessing import normalization
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-
-
-def _get_layer_computation_test_cases():
-  test_cases = ({
-      "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32),
-      "axis": -1,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element"
-  }, {
-      "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32),
-      "axis": None,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element_none_axis"
-  }, {
-      "adapt_data": np.array([[1., 2., 3., 4., 5.]], dtype=np.float32),
-      "axis": None,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element_none_axis_flat_data"
-  }, {
-      "adapt_data":
-          np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]],
-                   np.float32),
-      "axis":
-          1,
-      "test_data":
-          np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]],
-                   np.float32),
-      "expected":
-          np.array([[[-1.549193, -0.774597, 0.], [-1.549193, -0.774597, 0.]],
-                    [[0., 0.774597, 1.549193], [0., 0.774597, 1.549193]]],
-                   np.float32),
-      "testcase_name":
-          "3d_internal_axis"
-  }, {
-      "adapt_data":
-          np.array(
-              [[[1., 0., 3.], [2., 3., 4.]], [[3., -1., 5.], [4., 5., 8.]]],
-              np.float32),
-      "axis": (1, 2),
-      "test_data":
-          np.array(
-              [[[3., 1., -1.], [2., 5., 4.]], [[3., 0., 5.], [2., 5., 8.]]],
-              np.float32),
-      "expected":
-          np.array(
-              [[[1., 3., -5.], [-1., 1., -1.]], [[1., 1., 1.], [-1., 1., 1.]]],
-              np.float32),
-      "testcase_name":
-          "3d_multiple_axis"
-  })
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@ds_combinations.generate(
-    combinations.times(
-        combinations.combine(
-            strategy=strategy_combinations.all_strategies +
-            strategy_combinations.multi_worker_mirrored_strategies,
-            mode=["eager"]), _get_layer_computation_test_cases()))
-class NormalizationTest(keras_parameterized.TestCase,
-                        preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_layer_computation(self, strategy, adapt_data, axis, test_data,
-                             use_dataset, expected):
-    input_shape = tuple([None for _ in range(test_data.ndim - 1)])
-    if use_dataset:
-      # Keras APIs expect batched datasets
-      adapt_data = dataset_ops.Dataset.from_tensor_slices(adapt_data).batch(
-          test_data.shape[0] // 2)
-      test_data = dataset_ops.Dataset.from_tensor_slices(test_data).batch(
-          test_data.shape[0] // 2)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=input_shape)
-      layer = normalization.Normalization(axis=axis)
-      layer.adapt(adapt_data)
-      output = layer(input_data)
-      model = keras.Model(input_data, output)
-      output_data = model.predict(test_data)
-    self.assertAllClose(expected, output_data)
-
-
-if __name__ == "__main__":
-  v2_compat.enable_v2_behavior()
-  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization_test.py b/tensorflow/python/keras/layers/preprocessing/normalization_test.py
deleted file mode 100644
index 2e35d46..0000000
--- a/tensorflow/python/keras/layers/preprocessing/normalization_test.py
+++ /dev/null
@@ -1,408 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for keras.layers.preprocessing.normalization."""
-
-import os
-
-from absl.testing import parameterized
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers.preprocessing import normalization
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test
-from tensorflow.python.saved_model import load
-from tensorflow.python.saved_model import save
-
-
-def _get_layer_computation_test_cases():
-  test_cases = ({
-      "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32),
-      "axis": -1,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element"
-  }, {
-      "adapt_data": np.array([[1], [2], [3], [4], [5]], dtype=np.int32),
-      "axis": -1,
-      "test_data": np.array([[1], [2], [3]], np.int32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_int_data"
-  }, {
-      "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32),
-      "axis": None,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element_none_axis"
-  }, {
-      "adapt_data": np.array([[1., 2., 3., 4., 5.]], dtype=np.float32),
-      "axis": None,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element_none_axis_flat_data"
-  }, {
-      "adapt_data":
-          np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]],
-                   np.float32),
-      "axis":
-          1,
-      "test_data":
-          np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]],
-                   np.float32),
-      "expected":
-          np.array([[[-1.549193, -0.774597, 0.], [-1.549193, -0.774597, 0.]],
-                    [[0., 0.774597, 1.549193], [0., 0.774597, 1.549193]]],
-                   np.float32),
-      "testcase_name":
-          "3d_internal_axis"
-  }, {
-      "adapt_data":
-          np.array(
-              [[[1., 0., 3.], [2., 3., 4.]], [[3., -1., 5.], [4., 5., 8.]]],
-              np.float32),
-      "axis": (1, 2),
-      "test_data":
-          np.array(
-              [[[3., 1., -1.], [2., 5., 4.]], [[3., 0., 5.], [2., 5., 8.]]],
-              np.float32),
-      "expected":
-          np.array(
-              [[[1., 3., -5.], [-1., 1., -1.]], [[1., 1., 1.], [-1., 1., 1.]]],
-              np.float32),
-      "testcase_name":
-          "3d_multiple_axis"
-  }, {
-      "adapt_data":
-          np.zeros((3, 4)),
-      "axis": -1,
-      "test_data":
-          np.zeros((3, 4)),
-      "expected":
-          np.zeros((3, 4)),
-      "testcase_name":
-          "zero_variance"
-  })
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@keras_parameterized.run_all_keras_modes
-class NormalizationTest(keras_parameterized.TestCase,
-                        preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_broadcasting_during_direct_setting(self):
-    layer = normalization.Normalization(axis=-1, mean=[1.0], variance=[1.0])
-    output = layer(np.array([[1., 2.]]))
-    expected_output = [[0., 1.]]
-    self.assertAllClose(output, expected_output)
-    self.assertAllClose(layer.get_weights(), [])
-
-  def test_broadcasting_during_direct_setting_with_tensors(self):
-    if not context.executing_eagerly():
-      self.skipTest("Only supported in TF2.")
-
-    layer = normalization.Normalization(
-        axis=-1,
-        mean=constant_op.constant([1.0]),
-        variance=constant_op.constant([1.0]))
-    output = layer(np.array([[1., 2.]]))
-    expected_output = [[0., 1.]]
-    self.assertAllClose(output, expected_output)
-    self.assertAllClose(layer.get_weights(), [])
-
-  def test_broadcasting_during_direct_setting_with_variables_fails(self):
-    with self.assertRaisesRegex(ValueError, "passing a Variable"):
-      _ = normalization.Normalization(
-          axis=-1,
-          mean=variables.Variable([1.0]),
-          variance=variables.Variable([2.0]))
-
-  @parameterized.parameters(
-      {"axis": 0},
-      {"axis": (-1, 0)},
-  )
-  def test_zeros_fail_init(self, axis):
-    with self.assertRaisesRegex(ValueError,
-                                "The argument 'axis' may not be 0."):
-      normalization.Normalization(axis=axis)
-
-  @parameterized.parameters(
-      # Out of bounds
-      {"axis": 3},
-      {"axis": -3},
-      # In a tuple
-      {"axis": (1, 3)},
-      {"axis": (1, -3)},
-  )
-  def test_bad_axis_fail_build(self, axis):
-    layer = normalization.Normalization(axis=axis)
-    with self.assertRaisesRegex(ValueError, r"in the range"):
-      layer.build([None, 2, 3])
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class NormalizationAdaptTest(keras_parameterized.TestCase,
-                             preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_layer_api_compatibility(self):
-    cls = normalization.Normalization
-    with CustomObjectScope({"Normalization": cls}):
-      output_data = testing_utils.layer_test(
-          cls,
-          kwargs={"axis": -1},
-          input_shape=(None, 3),
-          input_data=np.array([[3, 1, 2], [6, 5, 4]], dtype=np.float32),
-          validate_training=False,
-          adapt_data=np.array([[1, 2, 1], [2, 3, 4], [1, 2, 1], [2, 3, 4]]))
-    expected = np.array([[3., -3., -0.33333333], [9., 5., 1.]])
-    self.assertAllClose(expected, output_data)
-
-  @parameterized.named_parameters(*_get_layer_computation_test_cases())
-  def test_layer_computation(self, adapt_data, axis, test_data, use_dataset,
-                             expected):
-    input_shape = tuple([test_data.shape[i] for i in range(1, test_data.ndim)])
-    if use_dataset:
-      # Keras APIs expect batched datasets
-      adapt_data = dataset_ops.Dataset.from_tensor_slices(adapt_data).batch(
-          test_data.shape[0] // 2)
-      test_data = dataset_ops.Dataset.from_tensor_slices(test_data).batch(
-          test_data.shape[0] // 2)
-
-    layer = normalization.Normalization(axis=axis)
-    layer.adapt(adapt_data)
-
-    input_data = keras.Input(shape=input_shape)
-    output = layer(input_data)
-    model = keras.Model(input_data, output)
-    model._run_eagerly = testing_utils.should_run_eagerly()
-    output_data = model.predict(test_data)
-    self.assertAllClose(expected, output_data)
-
-  def test_1d_data(self):
-    data = [0, 2, 0, 2]
-    layer = normalization.Normalization(axis=-1)
-    layer.adapt(data)
-    output = layer(data)
-    self.assertListEqual(output.shape.as_list(), [4, 1])
-    if context.executing_eagerly():
-      self.assertAllClose(output.numpy(), [[-1], [1], [-1], [1]])
-
-  def test_0d_data(self):
-    if not context.executing_eagerly():
-      self.skipTest("Only supported in TF2.")
-
-    data = [0, 2, 0, 2]
-    layer = normalization.Normalization(axis=-1)
-    layer.adapt(data)
-    output = layer(0.)
-    self.assertListEqual(output.shape.as_list(), [1, 1])
-    self.assertAllClose(output.numpy(), [[-1]])
-
-  @parameterized.parameters(
-      # Results should be identical no matter how the axes are specified (3d).
-      {"axis": (1, 2)},
-      {"axis": (2, 1)},
-      {"axis": (1, -1)},
-      {"axis": (-1, 1)},
-  )
-  def test_axis_permutations(self, axis):
-    layer = normalization.Normalization(axis=axis)
-    # data.shape = [2, 2, 3]
-    data = np.array([[[0., 1., 2.], [0., 2., 6.]],
-                     [[2., 3., 4.], [3., 6., 10.]]])
-    expect = np.array([[[-1., -1., -1.], [-1., -1., -1.]],
-                       [[1., 1., 1.], [1., 1., 1.]]])
-    layer.adapt(data)
-    self.assertAllClose(expect, layer(data))
-
-  def test_model_summary_after_layer_adapt(self):
-    data = np.array([[[0., 1., 2.], [0., 2., 6.]],
-                     [[2., 3., 4.], [3., 6., 10.]]])
-    layer = normalization.Normalization(axis=-1)
-    layer.adapt(data)
-    model = keras.Sequential(
-        [layer,
-         keras.layers.Dense(64, activation="relu"),
-         keras.layers.Dense(1)])
-    model.summary()
-
-  def test_merge_state(self):
-    data = np.random.rand(30, 10, 2)
-    ds = dataset_ops.Dataset.from_tensor_slices(data).batch(2)
-    norm = normalization.Normalization(axis=(1, 2))
-    norm.adapt(ds)
-
-    partial_ds_1 = ds.shard(3, 0)
-    partial_ds_2 = ds.shard(3, 1)
-    partial_ds_3 = ds.shard(3, 2)
-
-    norm_1 = normalization.Normalization(axis=(1, 2))
-    norm_2 = normalization.Normalization(axis=(1, 2))
-    norm_3 = normalization.Normalization(axis=(1, 2))
-
-    norm_1.adapt(partial_ds_1)
-    norm_2.adapt(partial_ds_2)
-    norm_3.adapt(partial_ds_3)
-
-    norm_1.merge_state([norm_2, norm_3])
-    merged_norm = norm_1
-
-    self.assertAllClose(norm(data), merged_norm(data))
-
-  def test_multiple_adapts(self):
-    first_adapt = [[0], [2], [0], [2]]
-    second_adapt = [[2], [4], [2], [4]]
-    predict_input = [[2], [2]]
-    expected_first_output = [[1], [1]]
-    expected_second_output = [[-1], [-1]]
-
-    inputs = keras.Input(shape=(1,), dtype=dtypes.int32)
-    layer = normalization.Normalization(axis=-1)
-    layer.adapt(first_adapt)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    actual_output = model.predict(predict_input)
-    self.assertAllClose(actual_output, expected_first_output)
-
-    # Re-adapt the layer on new inputs.
-    layer.adapt(second_adapt)
-    # Re-compile the model.
-    model.compile()
-    # `predict` should now use the new model state.
-    actual_output = model.predict(predict_input)
-    self.assertAllClose(actual_output, expected_second_output)
-
-  @parameterized.parameters(
-      {"adapted": True},
-      {"adapted": False},
-  )
-  def test_saved_model_tf(self, adapted):
-    input_data = [[0.], [2.], [0.], [2.]]
-    expected_output = [[-1.], [1.], [-1.], [1.]]
-
-    inputs = keras.Input(shape=(1,), dtype=dtypes.float32)
-    if adapted:
-      layer = normalization.Normalization(axis=-1)
-      layer.adapt(input_data)
-    else:
-      layer = normalization.Normalization(mean=1., variance=1.)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    output_data = model.predict(input_data)
-    self.assertAllClose(output_data, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_saved_model")
-    save.save(model, output_path)
-    loaded_model = load.load(output_path)
-    f = loaded_model.signatures["serving_default"]
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_data = f(constant_op.constant(input_data))["normalization"]
-    self.assertAllClose(new_output_data, expected_output)
-
-  @parameterized.parameters(
-      {"adapted": True},
-      {"adapted": False},
-  )
-  def test_saved_model_keras(self, adapted):
-    input_data = [[0.], [2.], [0.], [2.]]
-    expected_output = [[-1.], [1.], [-1.], [1.]]
-
-    cls = normalization.Normalization
-    inputs = keras.Input(shape=(1,), dtype=dtypes.float32)
-    if adapted:
-      layer = cls(axis=-1)
-      layer.adapt(input_data)
-    else:
-      layer = cls(mean=1., variance=1.)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    output_data = model.predict(input_data)
-    self.assertAllClose(output_data, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-    loaded_model = keras.models.load_model(
-        output_path, custom_objects={"Normalization": cls})
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_data = loaded_model.predict(input_data)
-    self.assertAllClose(new_output_data, expected_output)
-
-  @parameterized.parameters(
-      {"adapted": True},
-      {"adapted": False},
-  )
-  def test_saved_weights_keras(self, adapted):
-    input_data = [[0.], [2.], [0.], [2.]]
-    expected_output = [[-1.], [1.], [-1.], [1.]]
-
-    cls = normalization.Normalization
-    inputs = keras.Input(shape=(1,), dtype=dtypes.float32)
-    if adapted:
-      layer = cls(axis=-1)
-      layer.adapt(input_data)
-    else:
-      layer = cls(mean=1., variance=1.)
-    outputs = layer(inputs)
-    model = keras.Model(inputs=inputs, outputs=outputs)
-
-    output_data = model.predict(input_data)
-    self.assertAllClose(output_data, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_weights")
-    model.save_weights(output_path, save_format="tf")
-    new_model = keras.Model.from_config(
-        model.get_config(), custom_objects={"Normalization": cls})
-    new_model.load_weights(output_path)
-
-    # Validate correctness of the new model.
-    new_output_data = new_model.predict(input_data)
-    self.assertAllClose(new_output_data, expected_output)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/normalization_tpu_test.py b/tensorflow/python/keras/layers/preprocessing/normalization_tpu_test.py
deleted file mode 100644
index 50684fe..0000000
--- a/tensorflow/python/keras/layers/preprocessing/normalization_tpu_test.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for keras.layers.preprocessing.normalization."""
-
-from absl.testing import parameterized
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import tpu_strategy_test_utils
-from tensorflow.python.keras.layers.preprocessing import normalization
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.platform import test
-
-
-def _get_layer_computation_test_cases():
-  test_cases = ({
-      "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32),
-      "axis": -1,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element"
-  }, {
-      "adapt_data": np.array([[1.], [2.], [3.], [4.], [5.]], dtype=np.float32),
-      "axis": None,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element_none_axis"
-  }, {
-      "adapt_data": np.array([[1., 2., 3., 4., 5.]], dtype=np.float32),
-      "axis": None,
-      "test_data": np.array([[1.], [2.], [3.]], np.float32),
-      "expected": np.array([[-1.414214], [-.707107], [0]], np.float32),
-      "testcase_name": "2d_single_element_none_axis_flat_data"
-  }, {
-      "adapt_data":
-          np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]],
-                   np.float32),
-      "axis":
-          1,
-      "test_data":
-          np.array([[[1., 2., 3.], [2., 3., 4.]], [[3., 4., 5.], [4., 5., 6.]]],
-                   np.float32),
-      "expected":
-          np.array([[[-1.549193, -0.774597, 0.], [-1.549193, -0.774597, 0.]],
-                    [[0., 0.774597, 1.549193], [0., 0.774597, 1.549193]]],
-                   np.float32),
-      "testcase_name":
-          "3d_internal_axis"
-  }, {
-      "adapt_data":
-          np.array(
-              [[[1., 0., 3.], [2., 3., 4.]], [[3., -1., 5.], [4., 5., 8.]]],
-              np.float32),
-      "axis": (1, 2),
-      "test_data":
-          np.array(
-              [[[3., 1., -1.], [2., 5., 4.]], [[3., 0., 5.], [2., 5., 8.]]],
-              np.float32),
-      "expected":
-          np.array(
-              [[[1., 3., -5.], [-1., 1., -1.]], [[1., 1., 1.], [-1., 1., 1.]]],
-              np.float32),
-      "testcase_name":
-          "3d_multiple_axis"
-  })
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@keras_parameterized.run_all_keras_modes(
-    always_skip_v1=True, always_skip_eager=True)
-class NormalizationTest(keras_parameterized.TestCase,
-                        preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters(*_get_layer_computation_test_cases())
-  def test_layer_computation(self, adapt_data, axis, test_data, use_dataset,
-                             expected):
-    input_shape = tuple([None for _ in range(test_data.ndim - 1)])
-    if use_dataset:
-      # Keras APIs expect batched datasets
-      adapt_data = dataset_ops.Dataset.from_tensor_slices(adapt_data).batch(
-          test_data.shape[0] // 2)
-      test_data = dataset_ops.Dataset.from_tensor_slices(test_data).batch(
-          test_data.shape[0] // 2)
-
-    strategy = tpu_strategy_test_utils.get_tpu_strategy()
-
-    with strategy.scope():
-      input_data = keras.Input(shape=input_shape)
-      layer = normalization.Normalization(axis=axis)
-      layer.adapt(adapt_data)
-      output = layer(input_data)
-      model = keras.Model(input_data, output)
-      output_data = model.predict(test_data)
-    self.assertAllClose(expected, output_data)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage.py b/tensorflow/python/keras/layers/preprocessing/preprocessing_stage.py
deleted file mode 100644
index 525d5b4..0000000
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Preprocessing stage."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import ops
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.keras.engine import functional
-from tensorflow.python.keras.engine import sequential
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.util import nest
-
-
-# Sequential methods should take precedence.
-class PreprocessingStage(sequential.Sequential,
-                         base_preprocessing_layer.PreprocessingLayer):
-  """A sequential preprocessing stage.
-
-  This preprocessing stage wraps a list of preprocessing layers into a
-  Sequential-like object that enables you to `adapt()` the whole list via
-  a single `adapt()` call on the preprocessing stage.
-
-  Args:
-    layers: List of layers. Can include layers that aren't preprocessing layers.
-    name: String. Optional name for the preprocessing stage object.
-  """
-
-  def adapt(self, data, reset_state=True):
-    """Adapt the state of the layers of the preprocessing stage to the data.
-
-    Args:
-      data: A batched Dataset object, or a NumPy array, or an EagerTensor.
-        Data to be iterated over to adapt the state of the layers in this
-        preprocessing stage.
-      reset_state: Whether this call to `adapt` should reset the state of
-        the layers in this preprocessing stage.
-    """
-    if not isinstance(data,
-                      (dataset_ops.DatasetV2, np.ndarray, ops.EagerTensor)):
-      raise ValueError(
-          '`adapt()` requires a batched Dataset, an EagerTensor, '
-          'or a Numpy array as input, '
-          'got {}'.format(type(data)))
-    if isinstance(data, dataset_ops.DatasetV2):
-      # Validate the datasets to try and ensure we haven't been passed one with
-      # infinite size. That would cause an infinite loop here.
-      if tf_utils.dataset_is_infinite(data):
-        raise ValueError(
-            'The dataset passed to `adapt()` has an infinite number of '
-            'elements. Please use dataset.take(...) to make the number '
-            'of elements finite.')
-
-    for current_layer_index in range(0, len(self.layers)):
-      if not hasattr(self.layers[current_layer_index], 'adapt'):
-        # Skip any layer that does not need adapting.
-        continue
-
-      def map_fn(x):
-        """Maps `PreprocessingStage` inputs to inputs at `current_layer_index`.
-
-        Args:
-          x: Batch of inputs seen in entry of the `PreprocessingStage` instance.
-
-        Returns:
-          Batch of inputs to be processed by layer
-            `self.layers[current_layer_index]`
-        """
-        if current_layer_index == 0:  # pylint: disable=cell-var-from-loop
-          return x
-        for i in range(current_layer_index):  # pylint: disable=cell-var-from-loop
-          x = self.layers[i](x)
-        return x
-
-      if isinstance(data, dataset_ops.DatasetV2):
-        current_layer_data = data.map(map_fn)
-      else:
-        current_layer_data = map_fn(data)
-      self.layers[current_layer_index].adapt(current_layer_data,
-                                             reset_state=reset_state)
-
-
-# Functional methods shoud take precedence.
-class FunctionalPreprocessingStage(functional.Functional,
-                                   base_preprocessing_layer.PreprocessingLayer):
-  """A functional preprocessing stage.
-
-  This preprocessing stage wraps a graph of preprocessing layers into a
-  Functional-like object that enables you to `adapt()` the whole graph via
-  a single `adapt()` call on the preprocessing stage.
-
-  Preprocessing stage is not a complete model, so it cannot be called with
-  `fit()`. However, it is possible to add regular layers that may be trainable
-  to a preprocessing stage.
-
-  A functional preprocessing stage is created in the same way as `Functional`
-  models. A stage can be instantiated by passing two arguments to
-  `__init__`. The first argument is the `keras.Input` Tensors that represent
-  the inputs to the stage. The second argument specifies the output
-  tensors that represent the outputs of this stage. Both arguments can be a
-  nested structure of tensors.
-
-  Example:
-
-  >>> inputs = {'x2': tf.keras.Input(shape=(5,)),
-  ...           'x1': tf.keras.Input(shape=(1,))}
-  >>> norm_layer = tf.keras.layers.experimental.preprocessing.Normalization()
-  >>> y = norm_layer(inputs['x2'])
-  >>> y, z = tf.keras.layers.Lambda(lambda x: (x, x))(inputs['x1'])
-  >>> outputs = [inputs['x1'], [y, z]]
-  >>> stage = FunctionalPreprocessingStage(inputs, outputs)
-
-  Args:
-    inputs: An input tensor (must be created via `tf.keras.Input()`), or a list,
-      a dict, or a nested strcture of input tensors.
-    outputs: An output tensor, or a list, a dict or a nested structure of output
-      tensors.
-    name: String, optional. Name of the preprocessing stage.
-  """
-
-  def fit(self, *args, **kwargs):
-    raise ValueError(
-        'Preprocessing stage is not a complete model, and hence should not be '
-        '`fit`. Instead, you may feed data to `adapt` the stage to set '
-        'appropriate states of the layers in the stage.')
-
-  def adapt(self, data, reset_state=True):
-    """Adapt the state of the layers of the preprocessing stage to the data.
-
-    Args:
-      data: A batched Dataset object, a NumPy array, an EagerTensor, or a list,
-        dict or nested structure of Numpy Arrays or EagerTensors. The elements
-        of Dataset object need to conform with inputs of the stage. The first
-        dimension of NumPy arrays or EagerTensors are understood to be batch
-        dimension. Data to be iterated over to adapt the state of the layers in
-        this preprocessing stage.
-      reset_state: Whether this call to `adapt` should reset the state of the
-        layers in this preprocessing stage.
-
-    Examples:
-
-    >>> # For a stage with dict input
-    >>> inputs = {'x2': tf.keras.Input(shape=(5,)),
-    ...           'x1': tf.keras.Input(shape=(1,))}
-    >>> outputs = [inputs['x1'], inputs['x2']]
-    >>> stage = FunctionalPreprocessingStage(inputs, outputs)
-    >>> ds = tf.data.Dataset.from_tensor_slices({'x1': tf.ones((4,5)),
-    ...                                          'x2': tf.ones((4,1))})
-    >>> sorted(ds.element_spec.items()) # Check element_spec
-    [('x1', TensorSpec(shape=(5,), dtype=tf.float32, name=None)),
-     ('x2', TensorSpec(shape=(1,), dtype=tf.float32, name=None))]
-    >>> stage.adapt(ds)
-    >>> data_np = {'x1': np.ones((4, 5)), 'x2': np.ones((4, 1))}
-    >>> stage.adapt(data_np)
-
-    """
-    if not isinstance(data, dataset_ops.Dataset):
-      data = self._flatten_to_reference_inputs(data)
-      if any(not isinstance(datum, (np.ndarray, ops.EagerTensor))
-             for datum in data):
-        raise ValueError(
-            '`adapt()` requires a batched Dataset, a list of EagerTensors '
-            'or Numpy arrays as input, got {}'.format(type(data)))
-      ds_input = [
-          dataset_ops.Dataset.from_tensor_slices(x).batch(1) for x in data
-      ]
-
-    if isinstance(data, dataset_ops.Dataset):
-      # Validate the datasets to try and ensure we haven't been passed one with
-      # infinite size. That would cause an infinite loop here.
-      if tf_utils.dataset_is_infinite(data):
-        raise ValueError(
-            'The dataset passed to `adapt()` has an infinite number of '
-            'elements. Please use dataset.take(...) to make the number '
-            'of elements finite.')
-      # Unzip dataset object to a list of single input dataset.
-      ds_input = _unzip_dataset(data)
-
-    # Dictionary mapping reference tensors to datasets
-    ds_dict = {}
-    tensor_usage_count = self._tensor_usage_count
-    for x, y in zip(self.inputs, ds_input):
-      x_id = str(id(x))
-      ds_dict[x_id] = [y] * tensor_usage_count[x_id]
-
-    nodes_by_depth = self._nodes_by_depth
-    depth_keys = sorted(nodes_by_depth.keys(), reverse=True)
-
-    def build_map_fn(node, args, kwargs):
-      if not isinstance(args.element_spec, tuple):
-
-        def map_fn(*x):
-          return nest.flatten(node.layer(*x, **kwargs))
-      else:
-
-        def map_fn(*x):
-          return nest.flatten(node.layer(x, **kwargs))
-
-      return map_fn
-
-    for depth in depth_keys:
-      for node in nodes_by_depth[depth]:
-        # Input node
-        if node.is_input:
-          continue
-
-        # Node with input not computed yet
-        if any(t_id not in ds_dict for t_id in node.flat_input_ids):
-          continue
-
-        args, kwargs = node.map_arguments(ds_dict)
-        args = dataset_ops.Dataset.zip(nest.list_to_tuple(*args))
-
-        if node.layer.stateful and hasattr(node.layer, 'adapt'):
-          node.layer.adapt(args, reset_state=reset_state)
-
-        map_fn = build_map_fn(node, args, kwargs)
-        outputs = args.map(map_fn)
-        outputs = _unzip_dataset(outputs)
-
-        # Update ds_dict.
-        for x_id, y in zip(node.flat_output_ids, outputs):
-          ds_dict[x_id] = [y] * tensor_usage_count[x_id]
-
-
-def _unzip_dataset(ds):
-  """Unzip dataset into a list of single element datasets.
-
-  Args:
-    ds: A Dataset object.
-
-  Returns:
-    A list of Dataset object, each correspond to one of the `element_spec` of
-    the input Dataset object.
-
-  Example:
-
-  >>> ds1 = tf.data.Dataset.from_tensor_slices([1, 2, 3])
-  >>> ds2 = tf.data.Dataset.from_tensor_slices([4, 5, 6])
-  >>> ds_zipped_tuple = tf.data.Dataset.zip((ds1, ds2))
-  >>> ds_unzipped_tuple = _unzip_dataset(ds_zipped_tuple)
-  >>> ds_zipped_dict = tf.data.Dataset.zip({'ds1': ds1, 'ds2': ds2})
-  >>> ds_unzipped_dict = _unzip_dataset(ds_zipped_dict)
-
-  Then the two elements of `ds_unzipped_tuple` and `ds_unzipped_dict` are both
-  the same as `ds1` and `ds2`.
-  """
-  element_count = len(nest.flatten(ds.element_spec))
-  ds_unzipped = []
-  for i in range(element_count):
-
-    def map_fn(*x, j=i):
-      return nest.flatten(x)[j]
-
-    ds_unzipped.append(ds.map(map_fn))
-  return ds_unzipped
diff --git a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_functional_test.py b/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_functional_test.py
deleted file mode 100644
index cfff5d0..0000000
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_functional_test.py
+++ /dev/null
@@ -1,442 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functional preprocessing stage tests."""
-# pylint: disable=g-classes-have-attributes
-
-import time
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.keras.engine.input_layer import Input
-from tensorflow.python.keras.layers import convolutional
-from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.layers import merge
-from tensorflow.python.keras.layers.preprocessing import image_preprocessing
-from tensorflow.python.keras.layers.preprocessing import normalization
-from tensorflow.python.keras.layers.preprocessing import preprocessing_stage
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import test
-
-
-class PL(base_preprocessing_layer.PreprocessingLayer):
-
-  def __init__(self, **kwargs):
-    self.adapt_time = None
-    self.adapt_count = 0
-    super(PL, self).__init__(**kwargs)
-
-  def adapt(self, data, reset_state=True):
-    self.adapt_time = time.time()
-    self.adapt_count += 1
-
-  def call(self, inputs):
-    return inputs + 1
-
-
-class PLMerge(PL):
-
-  def call(self, inputs):
-    return inputs[0] + inputs[1]
-
-
-class PLSplit(PL):
-
-  def call(self, inputs):
-    return inputs + 1, inputs - 1
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class PreprocessingStageTest(keras_parameterized.TestCase,
-                             preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_adapt_preprocessing_stage_with_single_input_output(self):
-
-    x = Input(shape=(3,))
-
-    l0 = PL()
-    y = l0(x)
-
-    l1 = PL()
-    z = l1(y)
-
-    stage = preprocessing_stage.FunctionalPreprocessingStage(x, z)
-    stage.compile()
-
-    # Test with NumPy array
-    one_array = np.ones((4, 3), dtype='float32')
-    stage.adapt(one_array)
-    self.assertEqual(l0.adapt_count, 1)
-    self.assertEqual(l1.adapt_count, 1)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-
-    # Check call
-    z = stage(array_ops.ones((4, 3), dtype='float32'))
-    self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 2.)
-
-    # Test with dataset
-    adapt_data = dataset_ops.Dataset.from_tensor_slices(one_array)
-    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
-
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 2)
-    self.assertEqual(l1.adapt_count, 2)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-
-    # Test error with bad data
-    with self.assertRaisesRegex(ValueError, 'requires a '):
-      stage.adapt(None)
-
-    # Disallow calling fit
-    with self.assertRaisesRegex(ValueError, 'Preprocessing stage'):
-      stage.fit(None)
-
-  def test_adapt_preprocessing_stage_with_list_input(self):
-
-    x0 = Input(shape=(3,))
-    x1 = Input(shape=(3,))
-    x2 = Input(shape=(3,))
-
-    l0 = PLMerge()
-    y = l0([x0, x1])
-
-    l1 = PLMerge()
-    y = l1([y, x2])
-
-    l2 = PLSplit()
-    z, y = l2(y)
-
-    stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1, x2],
-                                                             [y, z])
-    stage.compile()
-
-    # Test with NumPy array
-    one_array = np.ones((4, 3), dtype='float32')
-    stage.adapt([one_array, one_array, one_array])
-    self.assertEqual(l0.adapt_count, 1)
-    self.assertEqual(l1.adapt_count, 1)
-    self.assertEqual(l2.adapt_count, 1)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Check call
-    y, z = stage([
-        array_ops.ones((4, 3), dtype='float32'),
-        array_ops.ones((4, 3), dtype='float32'),
-        array_ops.ones((4, 3), dtype='float32')
-    ])
-    self.assertAllClose(y, np.ones((4, 3), dtype='float32') + 1.)
-    self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 3.)
-
-    # Test with dataset
-    adapt_data = dataset_ops.Dataset.from_tensor_slices(
-        (one_array, one_array, one_array))
-    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
-
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 2)
-    self.assertEqual(l1.adapt_count, 2)
-    self.assertEqual(l2.adapt_count, 2)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Test error with bad data
-    with self.assertRaisesRegex(ValueError, 'requires a '):
-      stage.adapt(None)
-
-  def test_adapt_preprocessing_stage_with_dict_input(self):
-    x0 = Input(shape=(3,), name='x0')
-    x1 = Input(shape=(4,), name='x1')
-    x2 = Input(shape=(3, 5), name='x2')
-
-    # dimension will mismatch if x1 incorrectly placed.
-    x1_sum = core.Lambda(
-        lambda x: math_ops.reduce_sum(x, axis=-1, keepdims=True))(
-            x1)
-    x2_sum = core.Lambda(lambda x: math_ops.reduce_sum(x, axis=-1))(x2)
-
-    l0 = PLMerge()
-    y = l0([x0, x1_sum])
-
-    l1 = PLMerge()
-    y = l1([y, x2_sum])
-
-    l2 = PLSplit()
-    z, y = l2(y)
-    stage = preprocessing_stage.FunctionalPreprocessingStage(
-        {
-            'x2': x2,
-            'x0': x0,
-            'x1': x1
-        }, [y, z])
-    stage.compile()
-
-    # Test with dict of NumPy array
-    one_array0 = np.ones((4, 3), dtype='float32')
-    one_array1 = np.ones((4, 4), dtype='float32')
-    one_array2 = np.ones((4, 3, 5), dtype='float32')
-    adapt_data = {'x1': one_array1, 'x0': one_array0, 'x2': one_array2}
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 1)
-    self.assertEqual(l1.adapt_count, 1)
-    self.assertEqual(l2.adapt_count, 1)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Check call
-    y, z = stage({
-        'x1': array_ops.constant(one_array1),
-        'x2': array_ops.constant(one_array2),
-        'x0': array_ops.constant(one_array0)
-    })
-    self.assertAllClose(y, np.zeros((4, 3), dtype='float32') + 9.)
-    self.assertAllClose(z, np.zeros((4, 3), dtype='float32') + 11.)
-
-    # Test with list of NumPy array
-    adapt_data = [one_array0, one_array1, one_array2]
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 2)
-    self.assertEqual(l1.adapt_count, 2)
-    self.assertEqual(l2.adapt_count, 2)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Test with flattened dataset
-    adapt_data = dataset_ops.Dataset.from_tensor_slices(
-        (one_array0, one_array1, one_array2))
-    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
-
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 3)
-    self.assertEqual(l1.adapt_count, 3)
-    self.assertEqual(l2.adapt_count, 3)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Test with dataset in dict shape
-    adapt_data = dataset_ops.Dataset.from_tensor_slices({
-        'x0': one_array0,
-        'x2': one_array2,
-        'x1': one_array1
-    })
-    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 4)
-    self.assertEqual(l1.adapt_count, 4)
-    self.assertEqual(l2.adapt_count, 4)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Test error with bad data
-    with self.assertRaisesRegex(ValueError, 'requires a '):
-      stage.adapt(None)
-
-  def test_adapt_preprocessing_stage_with_dict_output(self):
-    x = Input(shape=(3,), name='x')
-
-    l0 = PLSplit()
-    y0, y1 = l0(x)
-
-    l1 = PLSplit()
-    z0, z1 = l1(y0)
-    stage = preprocessing_stage.FunctionalPreprocessingStage({'x': x}, {
-        'y1': y1,
-        'z1': z1,
-        'y0': y0,
-        'z0': z0
-    })
-    stage.compile()
-
-    # Test with NumPy array
-    one_array = np.ones((4, 3), dtype='float32')
-    adapt_data = {'x': one_array}
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 1)
-    self.assertEqual(l1.adapt_count, 1)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-
-    # Check call
-    outputs = stage({'x': array_ops.constant(one_array)})
-    self.assertEqual(set(outputs.keys()), {'y0', 'y1', 'z0', 'z1'})
-    self.assertAllClose(outputs['y0'], np.ones((4, 3), dtype='float32') + 1.)
-    self.assertAllClose(outputs['y1'], np.ones((4, 3), dtype='float32') - 1.)
-    self.assertAllClose(outputs['z0'], np.ones((4, 3), dtype='float32') + 2.)
-    self.assertAllClose(outputs['z1'], np.ones((4, 3), dtype='float32'))
-
-  def test_preprocessing_stage_with_nested_input(self):
-    # Test with NumPy array
-    x0 = Input(shape=(3,))
-    x1 = Input(shape=(3,))
-    x2 = Input(shape=(3,))
-
-    l0 = PLMerge()
-    y = l0([x0, x1])
-
-    l1 = PLMerge()
-    y = l1([y, x2])
-
-    l2 = PLSplit()
-    z, y = l2(y)
-
-    stage = preprocessing_stage.FunctionalPreprocessingStage([x0, [x1, x2]],
-                                                             [y, z])
-    stage.compile()
-    one_array = np.ones((4, 3), dtype='float32')
-    stage.adapt([one_array, [one_array, one_array]])
-    self.assertEqual(l0.adapt_count, 1)
-    self.assertEqual(l1.adapt_count, 1)
-    self.assertEqual(l2.adapt_count, 1)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Check call
-    y, z = stage([
-        array_ops.ones((4, 3), dtype='float32'),
-        [
-            array_ops.ones((4, 3), dtype='float32'),
-            array_ops.ones((4, 3), dtype='float32')
-        ]
-    ])
-    self.assertAllClose(y, np.ones((4, 3), dtype='float32') + 1.)
-    self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 3.)
-
-    # Test with dataset
-    adapt_data = dataset_ops.Dataset.from_tensor_slices(
-        (one_array, (one_array, one_array)))
-    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
-
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 2)
-    self.assertEqual(l1.adapt_count, 2)
-    self.assertEqual(l2.adapt_count, 2)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-    self.assertLessEqual(l1.adapt_time, l2.adapt_time)
-
-    # Test error with bad data
-    with self.assertRaisesRegex(ValueError, 'requires a '):
-      stage.adapt(None)
-
-  def test_include_layers_with_dict_input(self):
-
-    class PLMergeDict(PLMerge):
-
-      def call(self, inputs):
-        return inputs['a'] + inputs['b']
-
-    x0 = Input(shape=(3,))
-    x1 = Input(shape=(3,))
-
-    l0 = PLMergeDict()
-    y = l0({'a': x0, 'b': x1})
-
-    l1 = PLSplit()
-    z, y = l1(y)
-
-    stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1], [y, z])
-    stage.compile()
-
-    one_array = np.ones((4, 3), dtype='float32')
-    adapt_data = dataset_ops.Dataset.from_tensor_slices((one_array, one_array))
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 1)
-    self.assertEqual(l1.adapt_count, 1)
-    self.assertLessEqual(l0.adapt_time, l1.adapt_time)
-
-    # Check call
-    y, z = stage([
-        array_ops.ones((4, 3), dtype='float32'),
-        array_ops.ones((4, 3), dtype='float32')
-    ])
-    self.assertAllClose(y, np.ones((4, 3), dtype='float32'))
-    self.assertAllClose(z, np.ones((4, 3), dtype='float32') + 2.)
-
-  def test_include_layers_with_nested_input(self):
-
-    class PLMergeNest(PLMerge):
-
-      def call(self, inputs):
-        a = inputs[0]
-        b = inputs[1][0]
-        c = inputs[1][1]
-        return a + b + c
-
-    x0 = Input(shape=(3,))
-    x1 = Input(shape=(3,))
-    x2 = Input(shape=(3,))
-
-    l0 = PLMergeNest()
-    y = l0([x0, [x1, x2]])
-
-    stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1, x2], y)
-    stage.compile()
-
-    one_array = np.ones((4, 3), dtype='float32')
-    adapt_data = dataset_ops.Dataset.from_tensor_slices((one_array,) * 3)
-    stage.adapt(adapt_data)
-    self.assertEqual(l0.adapt_count, 1)
-
-    # Check call
-    y = stage([
-        array_ops.ones((4, 3), dtype='float32'),
-        array_ops.ones((4, 3), dtype='float32'),
-        array_ops.ones((4, 3), dtype='float32')
-    ])
-    self.assertAllClose(y, np.ones((4, 3), dtype='float32') + 2.)
-
-  def test_mixing_preprocessing_and_regular_layers(self):
-    x0 = Input(shape=(10, 10, 3))
-    x1 = Input(shape=(10, 10, 3))
-    x2 = Input(shape=(10, 10, 3))
-
-    y0 = merge.Add()([x0, x1])
-    y1 = image_preprocessing.CenterCrop(8, 8)(x2)
-    y1 = convolutional.ZeroPadding2D(padding=1)(y1)
-
-    z = merge.Add()([y0, y1])
-    z = normalization.Normalization()(z)
-    z = convolutional.Conv2D(4, 3)(z)
-
-    stage = preprocessing_stage.FunctionalPreprocessingStage([x0, x1, x2], z)
-
-    data = [
-        np.ones((12, 10, 10, 3), dtype='float32'),
-        np.ones((12, 10, 10, 3), dtype='float32'),
-        np.ones((12, 10, 10, 3), dtype='float32')
-    ]
-
-    stage.adapt(data)
-    _ = stage(data)
-    stage.compile('rmsprop', 'mse')
-    with self.assertRaisesRegex(ValueError, 'Preprocessing stage'):
-      stage.fit(data, np.ones((12, 8, 8, 4)))
-
-    ds_x0 = dataset_ops.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3)))
-    ds_x1 = dataset_ops.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3)))
-    ds_x2 = dataset_ops.Dataset.from_tensor_slices(np.ones((12, 10, 10, 3)))
-    ds_x = dataset_ops.Dataset.zip((ds_x0, ds_x1, ds_x2))
-    ds_y = dataset_ops.Dataset.from_tensor_slices(np.ones((12, 8, 8, 4)))
-    dataset = dataset_ops.Dataset.zip((ds_x, ds_y)).batch(4)
-
-    with self.assertRaisesRegex(ValueError, 'Preprocessing stage'):
-      stage.fit(dataset)
-    _ = stage.evaluate(data, np.ones((12, 8, 8, 4)))
-    _ = stage.predict(data)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_test.py b/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_test.py
deleted file mode 100644
index 7da6e62..0000000
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_stage_test.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Preprocessing stage tests."""
-# pylint: disable=g-classes-have-attributes
-
-import time
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.keras.layers.preprocessing import preprocessing_stage
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class PreprocessingStageTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_adapt(self):
-
-    class PL(base_preprocessing_layer.PreprocessingLayer):
-
-      def __init__(self, **kwargs):
-        self.adapt_time = None
-        self.adapt_count = 0
-        super(PL, self).__init__(**kwargs)
-
-      def adapt(self, data, reset_state=True):
-        self.adapt_time = time.time()
-        self.adapt_count += 1
-
-      def call(self, inputs):
-        return inputs + 1.
-
-    # Test with NumPy array
-    stage = preprocessing_stage.PreprocessingStage([
-        PL(),
-        PL(),
-        PL(),
-    ])
-    stage.adapt(np.ones((3, 4)))
-    self.assertEqual(stage.layers[0].adapt_count, 1)
-    self.assertEqual(stage.layers[1].adapt_count, 1)
-    self.assertEqual(stage.layers[2].adapt_count, 1)
-    self.assertLessEqual(stage.layers[0].adapt_time, stage.layers[1].adapt_time)
-    self.assertLessEqual(stage.layers[1].adapt_time, stage.layers[2].adapt_time)
-
-    # Check call
-    y = stage(array_ops.ones((3, 4)))
-    self.assertAllClose(y, np.ones((3, 4)) + 3.)
-
-    # Test with dataset
-    adapt_data = dataset_ops.Dataset.from_tensor_slices(np.ones((3, 10)))
-    adapt_data = adapt_data.batch(2)  # 5 batches of 2 samples
-
-    stage.adapt(adapt_data)
-    self.assertEqual(stage.layers[0].adapt_count, 2)
-    self.assertEqual(stage.layers[1].adapt_count, 2)
-    self.assertEqual(stage.layers[2].adapt_count, 2)
-    self.assertLess(stage.layers[0].adapt_time, stage.layers[1].adapt_time)
-    self.assertLess(stage.layers[1].adapt_time, stage.layers[2].adapt_time)
-
-    # Test error with bad data
-    with self.assertRaisesRegex(ValueError, 'requires a '):
-      stage.adapt(None)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py b/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py
deleted file mode 100644
index 86278d3..0000000
--- a/tensorflow/python/keras/layers/preprocessing/preprocessing_test_utils.py
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras' base preprocessing layer."""
-
-import collections
-import numpy as np
-
-from tensorflow.python.platform import test
-
-
-class PreprocessingLayerTest(test.TestCase):
-  """Base test class for preprocessing layer API validation."""
-  # TODO(b/137303934): Consider incorporating something like this Close vs All
-  # behavior into core tf.test.TestCase.
-
-  def assertAllCloseOrEqual(self, a, b, msg=None):
-    """Asserts that elements are close (if numeric) or equal (if string)."""
-    if a is None or b is None:
-      self.assertAllEqual(a, b, msg=msg)
-    elif isinstance(a, (list, tuple)):
-      self.assertEqual(len(a), len(b))
-      for a_value, b_value in zip(a, b):
-        self.assertAllCloseOrEqual(a_value, b_value, msg=msg)
-    elif isinstance(a, collections.abc.Mapping):
-      self.assertEqual(len(a), len(b))
-      for key, a_value in a.items():
-        b_value = b[key]
-        error_message = "{} ({})".format(msg, key) if msg else None
-        self.assertAllCloseOrEqual(a_value, b_value, error_message)
-    elif (isinstance(a, float) or
-          hasattr(a, "dtype") and np.issubdtype(a.dtype, np.number)):
-      self.assertAllClose(a, b, msg=msg)
-    else:
-      self.assertAllEqual(a, b, msg=msg)
-
-  def assert_extracted_output_equal(self, combiner, acc1, acc2, msg=None):
-    data_1 = combiner.extract(acc1)
-    data_2 = combiner.extract(acc2)
-    self.assertAllCloseOrEqual(data_1, data_2, msg=msg)
-
-  # This is an injection seam so that tests like TextVectorizationTest can
-  # define their own methods for asserting that accumulators are equal.
-  compare_accumulators = assertAllCloseOrEqual
-
-  def validate_accumulator_computation(self, combiner, data, expected):
-    """Validate that various combinations of compute and merge are identical."""
-    if len(data) < 4:
-      raise AssertionError("Data must have at least 4 elements.")
-    data_0 = np.array([data[0]])
-    data_1 = np.array([data[1]])
-    data_2 = np.array(data[2:])
-
-    single_compute = combiner.compute(data)
-
-    all_merge = combiner.merge([
-        combiner.compute(data_0),
-        combiner.compute(data_1),
-        combiner.compute(data_2)
-    ])
-
-    self.compare_accumulators(
-        single_compute,
-        all_merge,
-        msg="Sharding data should not change the data output.")
-
-    unordered_all_merge = combiner.merge([
-        combiner.compute(data_1),
-        combiner.compute(data_2),
-        combiner.compute(data_0)
-    ])
-    self.compare_accumulators(
-        all_merge,
-        unordered_all_merge,
-        msg="The order of merge arguments should not change the data "
-        "output.")
-
-    hierarchical_merge = combiner.merge([
-        combiner.compute(data_1),
-        combiner.merge([combiner.compute(data_2),
-                        combiner.compute(data_0)])
-    ])
-    self.compare_accumulators(
-        all_merge,
-        hierarchical_merge,
-        msg="Nesting merge arguments should not change the data output.")
-
-    nested_compute = combiner.compute(
-        data_0, combiner.compute(data_1, combiner.compute(data_2)))
-    self.compare_accumulators(
-        all_merge,
-        nested_compute,
-        msg="Nesting compute arguments should not change the data output.")
-
-    mixed_compute = combiner.merge([
-        combiner.compute(data_0),
-        combiner.compute(data_1, combiner.compute(data_2))
-    ])
-    self.compare_accumulators(
-        all_merge,
-        mixed_compute,
-        msg="Mixing merge and compute calls should not change the data "
-        "output.")
-
-    single_merge = combiner.merge([
-        combiner.merge([combiner.compute(data_0)]),
-        combiner.compute(data_1, combiner.compute(data_2))
-    ])
-    self.compare_accumulators(
-        all_merge,
-        single_merge,
-        msg="Calling merge with a data length of 1 should not change the data "
-        "output.")
-
-    self.compare_accumulators(
-        expected,
-        all_merge,
-        msg="Calculated accumulators "
-        "did not match expected accumulator.")
-
-  def validate_accumulator_extract(self, combiner, data, expected):
-    """Validate that the expected results of computing and extracting."""
-    acc = combiner.compute(data)
-    extracted_data = combiner.extract(acc)
-    self.assertAllCloseOrEqual(expected, extracted_data)
-
-  def validate_accumulator_extract_and_restore(self, combiner, data, expected):
-    """Validate that the extract<->restore loop loses no data."""
-    acc = combiner.compute(data)
-    extracted_data = combiner.extract(acc)
-    restored_acc = combiner.restore(extracted_data)
-    self.assert_extracted_output_equal(combiner, acc, restored_acc)
-    self.assertAllCloseOrEqual(expected, combiner.extract(restored_acc))
-
-  def validate_accumulator_serialize_and_deserialize(self, combiner, data,
-                                                     expected):
-    """Validate that the serialize<->deserialize loop loses no data."""
-    acc = combiner.compute(data)
-    serialized_data = combiner.serialize(acc)
-    deserialized_data = combiner.deserialize(serialized_data)
-    self.compare_accumulators(acc, deserialized_data)
-    self.compare_accumulators(expected, deserialized_data)
-
-  def validate_accumulator_uniqueness(self, combiner, data):
-    """Validate that every call to compute creates a unique accumulator."""
-    acc = combiner.compute(data)
-    acc2 = combiner.compute(data)
-    self.assertIsNot(acc, acc2)
-    self.compare_accumulators(acc, acc2)
diff --git a/tensorflow/python/keras/layers/preprocessing/reduction.py b/tensorflow/python/keras/layers/preprocessing/reduction.py
deleted file mode 100644
index 9d8c4f5..0000000
--- a/tensorflow/python/keras/layers/preprocessing/reduction.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras reduction layer."""
-# pylint: disable=g-classes-have-attributes
-
-from tensorflow.python.keras.engine.base_layer import Layer
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import tf_logging as logging
-
-
-def get_reduce_op(reduction_str):
-  """Translate a reduction string name to a reduction op."""
-  if reduction_str == "max":
-    return math_ops.reduce_max
-  elif reduction_str == "mean":
-    return math_ops.reduce_mean
-  elif reduction_str == "min":
-    return math_ops.reduce_min
-  elif reduction_str == "prod":
-    return math_ops.reduce_prod
-  elif reduction_str == "sum":
-    return math_ops.reduce_sum
-  else:
-    raise ValueError("Reduction %s is not supported for unweighted inputs." %
-                     reduction_str)
-
-
-class Reduction(Layer):
-  """Performs an optionally-weighted reduction.
-
-  This layer performs a reduction across one axis of its input data. This
-  data may optionally be weighted by passing in an identical float tensor.
-
-  Args:
-    reduction: The type of reduction to perform. Can be one of the following:
-      "max", "mean", "min", "prod", or "sum". This layer uses the Tensorflow
-      reduce op which corresponds to that reduction (so, for "mean", we use
-      "reduce_mean").
-    axis: The axis to reduce along. Defaults to '-2', which is usually the axis
-      that contains embeddings (but is not within the embedding itself).
-
-  Input shape:
-    A tensor of 2 or more dimensions of any numeric dtype.
-
-  Output:
-    A tensor of 1 less dimension than the input tensor, of the same dtype.
-
-  Call arguments:
-    inputs: The data to reduce.
-    weights: An optional tensor or constant of the same shape as inputs that
-      will weight the input data before it is reduced.
-  """
-  # TODO(momernick): Add example here.
-
-  def __init__(self, reduction, axis=-2, **kwargs):
-    self.reduction = reduction
-    self.axis = axis
-    # We temporarily turn off autocasting, as it does not apply to named call
-    # kwargs.
-    super(Reduction, self).__init__(**kwargs)
-
-  def call(self, inputs, weights=None):
-    # If we are not weighting the inputs we can immediately reduce the data
-    # and return it.
-    if weights is None:
-      return get_reduce_op(self.reduction)(inputs, axis=self.axis)
-
-    # TODO(momernick): Add checks for this and a decent error message if the
-    # weight shape isn't compatible.
-    if weights.shape.rank + 1 == inputs.shape.rank:
-      weights = array_ops.expand_dims(weights, -1)
-
-    weighted_inputs = math_ops.multiply(inputs, weights)
-
-    # Weighted sum and prod can be expressed as reductions over the weighted
-    # values, as can min and max.
-    if self.reduction in ("sum", "prod", "min", "max"):
-      return get_reduce_op(self.reduction)(weighted_inputs, axis=self.axis)
-
-    # Weighted mean is a bit more complicated: we have to do a sum of the
-    # weighted values and divide by the sum of the weights.
-    if self.reduction == "mean":
-      input_sum = math_ops.reduce_sum(weighted_inputs, axis=self.axis)
-      weight_sum = math_ops.reduce_sum(weights, axis=self.axis)
-      return math_ops.divide(input_sum, weight_sum)
-
-    # sqrtn is also more complicated: it's like mean but with a normalized
-    # divisor.
-    if self.reduction == "sqrtn":
-      logging.warning("Reduction `sqrtn` is deprecated and will be removed "
-                      "2021-01-01. Please use the `sum` reduction and divide "
-                      "the output by the normalized weights instead.")
-      input_sum = math_ops.reduce_sum(weighted_inputs, axis=self.axis)
-      squared_weights = math_ops.pow(weights, 2)
-      squared_weights_sum = math_ops.reduce_sum(squared_weights, axis=self.axis)
-      sqrt_weights_sum = math_ops.sqrt(squared_weights_sum)
-      return math_ops.divide(input_sum, sqrt_weights_sum)
-
-    raise ValueError("%s is not a supported weighted reduction." %
-                     self.reduction)
diff --git a/tensorflow/python/keras/layers/preprocessing/reduction_test.py b/tensorflow/python/keras/layers/preprocessing/reduction_test.py
deleted file mode 100644
index dd8bfa8..0000000
--- a/tensorflow/python/keras/layers/preprocessing/reduction_test.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for keras.layers.preprocessing.reduction."""
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.layers.preprocessing import reduction
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import test
-
-
-@keras_parameterized.run_all_keras_modes
-class ReductionTest(keras_parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "max",
-          "reduction_str": "max",
-          "expected_output": [[3.0, 3.0], [3.0, 2.0]]
-      }, {
-          "testcase_name": "mean",
-          "reduction_str": "mean",
-          "expected_output": [[2.0, 2.0], [2.0, 1.5]]
-      }, {
-          "testcase_name": "min",
-          "reduction_str": "min",
-          "expected_output": [[1.0, 1.0], [1.0, 1.0]]
-      }, {
-          "testcase_name": "prod",
-          "reduction_str": "prod",
-          "expected_output": [[6.0, 6.0], [3.0, 2.0]]
-      }, {
-          "testcase_name": "sum",
-          "reduction_str": "sum",
-          "expected_output": [[6.0, 6.0], [4.0, 3.0]]
-      })
-  def test_unweighted_ragged_reduction(self, reduction_str, expected_output):
-    data = ragged_factory_ops.constant([[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]],
-                                        [[3.0, 1.0], [1.0, 2.0]]])
-    input_tensor = keras.Input(shape=(None, None), ragged=True)
-
-    output_tensor = reduction.Reduction(reduction=reduction_str)(input_tensor)
-    model = keras.Model(input_tensor, output_tensor)
-
-    output = model.predict(data)
-
-    self.assertAllClose(expected_output, output)
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "max",
-          "reduction_str": "max",
-          "expected_output": [[4.0, 4.0], [1.5, 6.0]]
-      }, {
-          "testcase_name": "mean",
-          "reduction_str": "mean",
-          "expected_output": [[2.0, 2.0], [1.666667, 1.75]]
-      }, {
-          "testcase_name": "min",
-          "reduction_str": "min",
-          "expected_output": [[1.0, 1.0], [1.0, 1.0]]
-      }, {
-          "testcase_name": "prod",
-          "reduction_str": "prod",
-          "expected_output": [[12.0, 12.0], [1.5, 6.0]]
-      }, {
-          "testcase_name": "sum",
-          "reduction_str": "sum",
-          "expected_output": [[8.0, 8.0], [2.5, 7.0]]
-      }, {
-          "testcase_name": "sqrtn",
-          "reduction_str": "sqrtn",
-          "expected_output": [[3.265986, 3.265986], [2.236067, 2.213594]]
-      })
-  def test_weighted_ragged_reduction(self, reduction_str, expected_output):
-    data = ragged_factory_ops.constant([[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]],
-                                        [[3.0, 1.0], [1.0, 2.0]]])
-    input_tensor = keras.Input(shape=(None, None), ragged=True)
-
-    weights = ragged_factory_ops.constant([[[1.0, 1.0], [2.0, 2.0], [1.0, 1.0]],
-                                           [[0.5, 1.0], [1.0, 3.0]]])
-    weight_input_tensor = keras.Input(shape=(None, None), ragged=True)
-
-    output_tensor = reduction.Reduction(reduction=reduction_str)(
-        input_tensor, weights=weight_input_tensor)
-    model = keras.Model([input_tensor, weight_input_tensor], output_tensor)
-
-    output = model.predict([data, weights])
-    self.assertAllClose(expected_output, output)
-
-  def test_weighted_ragged_reduction_with_different_dimensionality(self):
-    data = ragged_factory_ops.constant([[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]],
-                                        [[3.0, 1.0], [1.0, 2.0]]])
-    input_tensor = keras.Input(shape=(None, None), ragged=True)
-
-    weights = ragged_factory_ops.constant([[1.0, 2.0, 1.0], [1.0, 1.0]])
-    weight_input_tensor = keras.Input(shape=(None,), ragged=True)
-
-    output_tensor = reduction.Reduction(reduction="mean")(
-        input_tensor, weights=weight_input_tensor)
-    model = keras.Model([input_tensor, weight_input_tensor], output_tensor)
-
-    output = model.predict([data, weights])
-    expected_output = [[2.0, 2.0], [2.0, 1.5]]
-    self.assertAllClose(expected_output, output)
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "max",
-          "reduction_str": "max",
-          "expected_output": [[3.0, 3.0], [3.0, 2.0]]
-      }, {
-          "testcase_name": "mean",
-          "reduction_str": "mean",
-          "expected_output": [[2.0, 2.0], [1.333333, 1.0]]
-      }, {
-          "testcase_name": "min",
-          "reduction_str": "min",
-          "expected_output": [[1.0, 1.0], [0.0, 0.0]]
-      }, {
-          "testcase_name": "prod",
-          "reduction_str": "prod",
-          "expected_output": [[6.0, 6.0], [0.0, 0.0]]
-      }, {
-          "testcase_name": "sum",
-          "reduction_str": "sum",
-          "expected_output": [[6.0, 6.0], [4.0, 3.0]]
-      })
-  def test_unweighted_dense_reduction(self, reduction_str, expected_output):
-    data = np.array([[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]],
-                     [[3.0, 1.0], [1.0, 2.0], [0.0, 0.0]]])
-    input_tensor = keras.Input(shape=(None, None))
-
-    output_tensor = reduction.Reduction(reduction=reduction_str)(input_tensor)
-    model = keras.Model(input_tensor, output_tensor)
-
-    output = model.predict(data)
-
-    self.assertAllClose(expected_output, output)
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "max",
-          "reduction_str": "max",
-          "expected_output": [[4.0, 4.0], [1.5, 6.0]]
-      }, {
-          "testcase_name": "mean",
-          "reduction_str": "mean",
-          "expected_output": [[2.0, 2.0], [1.666667, 1.75]]
-      }, {
-          "testcase_name": "min",
-          "reduction_str": "min",
-          "expected_output": [[1.0, 1.0], [0.0, 0.0]]
-      }, {
-          "testcase_name": "prod",
-          "reduction_str": "prod",
-          "expected_output": [[12.0, 12.0], [0.0, 0.0]]
-      }, {
-          "testcase_name": "sum",
-          "reduction_str": "sum",
-          "expected_output": [[8.0, 8.0], [2.5, 7.0]]
-      }, {
-          "testcase_name": "sqrtn",
-          "reduction_str": "sqrtn",
-          "expected_output": [[3.265986, 3.265986], [2.236067, 2.213594]]
-      })
-  def test_weighted_dense_reduction(self, reduction_str, expected_output):
-    data = np.array([[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]],
-                     [[3.0, 1.0], [1.0, 2.0], [0.0, 0.0]]])
-    input_tensor = keras.Input(shape=(None, None))
-
-    weights = np.array([[[1.0, 1.0], [2.0, 2.0], [1.0, 1.0]],
-                        [[0.5, 1.0], [1.0, 3.0], [0.0, 0.0]]])
-    weight_input_tensor = keras.Input(shape=(None, None))
-
-    output_tensor = reduction.Reduction(reduction=reduction_str)(
-        input_tensor, weights=weight_input_tensor)
-    model = keras.Model([input_tensor, weight_input_tensor], output_tensor)
-
-    output = model.predict([data, weights])
-
-    self.assertAllClose(expected_output, output)
-
-  def test_weighted_dense_reduction_with_different_dimensionality(self):
-    data = np.array([[[1.0, 1.0], [2.0, 2.0], [3.0, 3.0]],
-                     [[3.0, 1.0], [1.0, 2.0], [0.0, 0.0]]])
-    input_tensor = keras.Input(shape=(None, None))
-
-    weights = np.array([[1.0, 2.0, 1.0], [1.0, 1.0, 0.0]])
-    weight_input_tensor = keras.Input(shape=(None,))
-
-    output_tensor = reduction.Reduction(reduction="mean")(
-        input_tensor, weights=weight_input_tensor)
-    model = keras.Model([input_tensor, weight_input_tensor], output_tensor)
-
-    output = model.predict([data, weights])
-    expected_output = [[2.0, 2.0], [2.0, 1.5]]
-    self.assertAllClose(expected_output, output)
-
-  def test_sqrtn_fails_on_unweighted_ragged(self):
-    input_tensor = keras.Input(shape=(None, None), ragged=True)
-    with self.assertRaisesRegex(ValueError, ".*sqrtn.*"):
-      _ = reduction.Reduction(reduction="sqrtn")(input_tensor)
-
-  def test_sqrtn_fails_on_unweighted_dense(self):
-    input_tensor = keras.Input(shape=(None, None))
-    with self.assertRaisesRegex(ValueError, ".*sqrtn.*"):
-      _ = reduction.Reduction(reduction="sqrtn")(input_tensor)
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup.py b/tensorflow/python/keras/layers/preprocessing/string_lookup.py
deleted file mode 100644
index 0c5c130..0000000
--- a/tensorflow/python/keras/layers/preprocessing/string_lookup.py
+++ /dev/null
@@ -1,341 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras string lookup preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.keras.layers.preprocessing import index_lookup
-from tensorflow.python.keras.layers.preprocessing import table_utils
-from tensorflow.python.util import compat
-from tensorflow.python.util.tf_export import keras_export
-
-
-@keras_export("keras.layers.experimental.preprocessing.StringLookup", v1=[])
-class StringLookup(index_lookup.IndexLookup):
-  """Maps strings from a vocabulary to integer indices.
-
-  This layer translates a set of arbitrary strings into an integer output via a
-  table-based vocabulary lookup.
-
-  The vocabulary for the layer can be supplied on construction or learned via
-  `adapt()`. During `adapt()`, the layer will analyze a data set, determine the
-  frequency of individual strings tokens, and create a vocabulary from them. If
-  the vocabulary is capped in size, the most frequent tokens will be used to
-  create the vocabulary and all others will be treated as out-of-vocabulary
-  (OOV).
-
-  There are two possible output modes for the layer.
-  When `output_mode` is `"int"`,
-  input strings are converted to their index in the vocabulary (an integer).
-  When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input strings
-  are encoded into an array where each dimension corresponds to an element in
-  the vocabulary.
-
-  The vocabulary can optionally contain a mask token as well as an OOV token
-  (which can optionally occupy multiple indices in the vocabulary, as set
-  by `num_oov_indices`).
-  The position of these tokens in the vocabulary is fixed. When `output_mode` is
-  `"int"`, the vocabulary will begin with the mask token (if set), followed by
-  OOV indices, followed by the rest of the vocabulary. When `output_mode` is
-  `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with OOV
-  indices and instances of the mask token will be dropped.
-
-  Args:
-    max_tokens: The maximum size of the vocabulary for this layer. If None,
-      there is no cap on the size of the vocabulary. Note that this size
-      includes the OOV and mask tokens. Default to None.
-    num_oov_indices: The number of out-of-vocabulary tokens to use. If this
-      value is more than 1, OOV inputs are hashed to determine their OOV value.
-      If this value is 0, OOV inputs will cause an error when calling the layer.
-      Defaults to 1.
-    mask_token: A token that represents masked inputs. When `output_mode` is
-      `"int"`, the token is included in vocabulary and mapped to index 0. In
-      other output modes, the token will not appear in the vocabulary and
-      instances of the mask token in the input will be dropped. If set to None,
-      no mask term will be added. Defaults to `None`.
-    oov_token: Only used when `invert` is True. The token to return for OOV
-      indices. Defaults to `"[UNK]"`.
-    vocabulary: An optional list of tokens, or a path to a text file containing
-      a vocabulary to load into this layer. The file should contain one token
-      per line. If the list or file contains the same token multiple times, an
-      error will be thrown.
-    invert: Only valid when `output_mode` is `"int"`. If True, this layer will
-      map indices to vocabulary items instead of mapping vocabulary items to
-      indices. Default to False.
-    output_mode: Specification for the output of the layer. Defaults to `"int"`.
-      Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or
-      `"tf_idf"` configuring the layer as follows:
-        - `"int"`: Return the raw integer indices of the input tokens.
-        - `"one_hot"`: Encodes each individual element in the input into an
-          array the same size as the vocabulary, containing a 1 at the element
-          index. If the last dimension is size 1, will encode on that dimension.
-          If the last dimension is not size 1, will append a new dimension for
-          the encoded output.
-        - `"multi_hot"`: Encodes each sample in the input into a single array
-          the same size as the vocabulary, containing a 1 for each vocabulary
-          term present in the sample. Treats the last dimension as the sample
-          dimension, if input shape is (..., sample_length), output shape will
-          be (..., num_tokens).
-        - `"count"`: As `"multi_hot"`, but the int array contains a count of the
-          number of times the token at that index appeared in the sample.
-        - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to
-          find the value in each token slot.
-    pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`,
-      `"count"`, or `"tf_idf"`. If True, the output will have its feature axis
-      padded to `max_tokens` even if the number of unique tokens in the
-      vocabulary is less than max_tokens, resulting in a tensor of shape
-      [batch_size, max_tokens] regardless of vocabulary size. Defaults to False.
-    sparse: Boolean. Only applicable when `output_mode` is `"multi_hot"`,
-      `"count"`, or `"tf_idf"`. If True, returns a `SparseTensor` instead of a
-      dense `Tensor`. Defaults to False.
-
-  Examples:
-
-  **Creating a lookup layer with a known vocabulary**
-
-  This example creates a lookup layer with a pre-existing vocabulary.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
-  >>> layer = StringLookup(vocabulary=vocab)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[1, 3, 4],
-         [4, 0, 2]])>
-
-  **Creating a lookup layer with an adapted vocabulary**
-
-  This example creates a lookup layer and generates the vocabulary by analyzing
-  the dataset.
-
-  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
-  >>> layer = StringLookup()
-  >>> layer.adapt(data)
-  >>> layer.get_vocabulary()
-  ['[UNK]', 'd', 'z', 'c', 'b', 'a']
-
-  Note that the OOV token [UNK] has been added to the vocabulary. The remaining
-  tokens are sorted by frequency ('d', which has 2 occurrences, is first) then
-  by inverse sort order.
-
-  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
-  >>> layer = StringLookup()
-  >>> layer.adapt(data)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[5, 3, 1],
-         [1, 2, 4]])>
-
-  **Lookups with multiple OOV indices**
-
-  This example demonstrates how to use a lookup layer with multiple OOV indices.
-  When a layer is created with more than one OOV index, any OOV values are
-  hashed into the number of OOV buckets, distributing OOV values in a
-  deterministic fashion across the set.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant([["a", "c", "d"], ["m", "z", "b"]])
-  >>> layer = StringLookup(vocabulary=vocab, num_oov_indices=2)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=int64, numpy=
-  array([[2, 4, 5],
-         [0, 1, 3]])>
-
-  Note that the output for OOV value 'm' is 0, while the output for OOV value
-  'z' is 1. The in-vocab terms have their output index increased by 1 from
-  earlier examples (a maps to 2, etc) in order to make space for the extra OOV
-  value.
-
-  **One-hot output**
-
-  Configure the layer with `output_mode='one_hot'`. Note that the first
-  `num_oov_indices` dimensions in the ont_hot encoding represent OOV values.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant(["a", "b", "c", "d", "z"])
-  >>> layer = StringLookup(vocabulary=vocab, output_mode='one_hot')
-  >>> layer(data)
-  <tf.Tensor: shape=(5, 5), dtype=float32, numpy=
-    array([[0., 1., 0., 0., 0.],
-           [0., 0., 1., 0., 0.],
-           [0., 0., 0., 1., 0.],
-           [0., 0., 0., 0., 1.],
-           [1., 0., 0., 0., 0.]], dtype=float32)>
-
-  **Multi-hot output**
-
-  Configure the layer with `output_mode='multi_hot'`. Note that the first
-  `num_oov_indices` dimensions in the multi_hot encoding represent OOV values.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]])
-  >>> layer = StringLookup(vocabulary=vocab, output_mode='multi_hot')
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0., 1., 0., 1., 1.],
-           [1., 0., 1., 0., 1.]], dtype=float32)>
-
-  **Token count output**
-
-  Configure the layer with `output_mode='count'`. As with multi_hot output, the
-  first `num_oov_indices` dimensions in the output represent OOV values.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]])
-  >>> layer = StringLookup(vocabulary=vocab, output_mode='count')
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0., 1., 0., 1., 2.],
-           [2., 0., 1., 0., 1.]], dtype=float32)>
-
-  **TF-IDF output**
-
-  Configure the layer with `output_mode='tf_idf'`. As with multi_hot output, the
-  first `num_oov_indices` dimensions in the output represent OOV values.
-
-  Each token bin will output `token_count * idf_weight`, where the idf weights
-  are the inverse document frequency weights per token. These should be provided
-  along with the vocabulary. Note that the `idf_weight` for OOV values will
-  default to the average of all idf weights passed in.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> idf_weights = [0.25, 0.75, 0.6, 0.4]
-  >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]])
-  >>> layer = StringLookup(output_mode='tf_idf')
-  >>> layer.set_vocabulary(vocab, idf_weights=idf_weights)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0.  , 0.25, 0.  , 0.6 , 0.8 ],
-           [1.0 , 0.  , 0.75, 0.  , 0.4 ]], dtype=float32)>
-
-  To specify the idf weights for oov values, you will need to pass the entire
-  vocabularly including the leading oov token.
-
-  >>> vocab = ["[UNK]", "a", "b", "c", "d"]
-  >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4]
-  >>> data = tf.constant([["a", "c", "d", "d"], ["d", "z", "b", "z"]])
-  >>> layer = StringLookup(output_mode='tf_idf')
-  >>> layer.set_vocabulary(vocab, idf_weights=idf_weights)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 5), dtype=float32, numpy=
-    array([[0.  , 0.25, 0.  , 0.6 , 0.8 ],
-           [1.8 , 0.  , 0.75, 0.  , 0.4 ]], dtype=float32)>
-
-  When adapting the layer in tf_idf mode, each input sample will be considered a
-  document, and idf weight per token will be calculated as
-  `log(1 + num_documents / (1 + token_document_count))`.
-
-  **Inverse lookup**
-
-  This example demonstrates how to map indices to strings using this layer. (You
-  can also use adapt() with inverse=True, but for simplicity we'll pass the
-  vocab in this example.)
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant([[1, 3, 4], [4, 0, 2]])
-  >>> layer = StringLookup(vocabulary=vocab, invert=True)
-  >>> layer(data)
-  <tf.Tensor: shape=(2, 3), dtype=string, numpy=
-  array([[b'a', b'c', b'd'],
-         [b'd', b'[UNK]', b'b']], dtype=object)>
-
-  Note that the first index correspond to the oov token by default.
-
-
-  **Forward and inverse lookup pairs**
-
-  This example demonstrates how to use the vocabulary of a standard lookup
-  layer to create an inverse lookup layer.
-
-  >>> vocab = ["a", "b", "c", "d"]
-  >>> data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
-  >>> layer = StringLookup(vocabulary=vocab)
-  >>> i_layer = StringLookup(vocabulary=vocab, invert=True)
-  >>> int_data = layer(data)
-  >>> i_layer(int_data)
-  <tf.Tensor: shape=(2, 3), dtype=string, numpy=
-  array([[b'a', b'c', b'd'],
-         [b'd', b'[UNK]', b'b']], dtype=object)>
-
-  In this example, the input value 'z' resulted in an output of '[UNK]', since
-  1000 was not in the vocabulary - it got represented as an OOV, and all OOV
-  values are returned as '[OOV}' in the inverse layer. Also, note that for the
-  inverse to work, you must have already set the forward layer vocabulary
-  either directly or via adapt() before calling get_vocabulary().
-  """
-
-  def __init__(self,
-               max_tokens=None,
-               num_oov_indices=1,
-               mask_token=None,
-               oov_token="[UNK]",
-               vocabulary=None,
-               encoding=None,
-               invert=False,
-               output_mode=index_lookup.INT,
-               sparse=False,
-               pad_to_max_tokens=False,
-               **kwargs):
-    allowed_dtypes = [dtypes.string]
-
-    if "dtype" in kwargs and kwargs["dtype"] not in allowed_dtypes:
-      raise ValueError("The value of the dtype argument for StringLookup may "
-                       "only be one of %s." % (allowed_dtypes,))
-
-    if "dtype" not in kwargs:
-      kwargs["dtype"] = dtypes.string
-
-    if encoding is None:
-      encoding = "utf-8"
-
-    self.encoding = encoding
-
-    super(StringLookup, self).__init__(
-        max_tokens=max_tokens,
-        num_oov_indices=num_oov_indices,
-        mask_token=mask_token,
-        oov_token=oov_token,
-        vocabulary=vocabulary,
-        invert=invert,
-        output_mode=output_mode,
-        sparse=sparse,
-        pad_to_max_tokens=pad_to_max_tokens,
-        **kwargs)
-
-  def get_config(self):
-    config = {"encoding": self.encoding}
-    base_config = super(StringLookup, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-  def set_vocabulary(self, vocabulary, idf_weights=None):
-    if isinstance(vocabulary, str):
-      if self.output_mode == index_lookup.TF_IDF:
-        raise RuntimeError("Setting vocabulary directly from a file is not "
-                           "supported in TF-IDF mode, since this layer cannot "
-                           "read files containing TF-IDF weight data. Please "
-                           "read the file using Python and set the vocabulary "
-                           "and weights by passing lists or arrays to the "
-                           "set_vocabulary function's `vocabulary` and "
-                           "`idf_weights` args.")
-      vocabulary = table_utils.get_vocabulary_from_file(vocabulary,
-                                                        self.encoding)
-    super().set_vocabulary(vocabulary, idf_weights=idf_weights)
-
-  # Overriden methods from IndexLookup.
-  def _tensor_vocab_to_numpy(self, vocabulary):
-    vocabulary = vocabulary.numpy()
-    return np.array([compat.as_text(x, self.encoding) for x in vocabulary])
diff --git a/tensorflow/python/keras/layers/preprocessing/string_lookup_test.py b/tensorflow/python/keras/layers/preprocessing/string_lookup_test.py
deleted file mode 100644
index cdd8cfb..0000000
--- a/tensorflow/python/keras/layers/preprocessing/string_lookup_test.py
+++ /dev/null
@@ -1,401 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras text vectorization preprocessing layer."""
-
-import os
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import errors_impl
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.layers.preprocessing import string_lookup
-from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-
-
-def _get_end_to_end_test_cases():
-  test_cases = (
-      {
-          "testcase_name": "test_strings_soft_vocab_cap",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # accumulator is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-          },
-          "expected_output": [[1], [2], [3], [4], [4], [3], [1], [0]],
-          "input_dtype":
-              dtypes.string
-      },
-  )
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class StringLookupLayerTest(keras_parameterized.TestCase,
-                            preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters(*_get_end_to_end_test_cases())
-  def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs,
-                                       use_dataset, expected_output,
-                                       input_dtype):
-    cls = string_lookup.StringLookup
-    expected_output_dtype = dtypes.int64
-    input_shape = input_data.shape
-
-    if use_dataset:
-      # Keras APIs expect batched datasets.
-      # TODO(rachelim): `model.predict` predicts the result on each
-      # dataset batch separately, then tries to concatenate the results
-      # together. When the results have different shapes on the non-concat
-      # axis (which can happen in the output_mode = INT case for
-      # StringLookup), the concatenation fails. In real use cases, this may
-      # not be an issue because users are likely to pipe the preprocessing layer
-      # into other keras layers instead of predicting it directly. A workaround
-      # for these unit tests is to have the dataset only contain one batch, so
-      # no concatenation needs to happen with the result. For consistency with
-      # numpy input, we should make `predict` join differently shaped results
-      # together sensibly, with 0 padding.
-      input_data = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
-          input_shape[0])
-      vocab_data = dataset_ops.Dataset.from_tensor_slices(vocab_data).batch(
-          input_shape[0])
-
-    with CustomObjectScope({"StringLookup": cls}):
-      output_data = testing_utils.layer_test(
-          cls,
-          kwargs=kwargs,
-          input_shape=input_shape,
-          input_data=input_data,
-          input_dtype=input_dtype,
-          expected_output_dtype=expected_output_dtype,
-          validate_training=False,
-          adapt_data=vocab_data)
-    self.assertAllClose(expected_output, output_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class StringLookupVocabularyTest(keras_parameterized.TestCase,
-                                 preprocessing_test_utils.PreprocessingLayerTest
-                                ):
-
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(vocab + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
-  def test_int_output_explicit_vocab(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(vocabulary=vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_int_output_explicit_vocab_with_special_tokens(self):
-    vocab_data = ["", "[UNK]", "earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(vocabulary=vocab_data, mask_token="")
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_int_output_no_oov(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    valid_input = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", ""]])
-    invalid_input = np.array([["earth", "wind", "and", "michigan"],
-                              ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, mask_token="", num_oov_indices=0)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(valid_input)
-    self.assertAllEqual(expected_output, output_data)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "found OOV values.*michigan"):
-      _ = model.predict(invalid_input)
-
-  def test_no_vocab(self):
-    with self.assertRaisesRegex(
-        ValueError, "You must set the layer's vocabulary"):
-      layer = string_lookup.StringLookup()
-      layer([["a"]])
-
-  def test_one_hot_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array(["earth", "wind", "and", "fire", "michigan"])
-    expected_output = [
-        [0, 1, 0, 0, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 1, 0],
-        [0, 0, 0, 0, 1],
-        [1, 0, 0, 0, 0],
-    ]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, output_mode="one_hot")
-    res = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=res)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_multi_hot_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[0, 1, 1, 1, 1], [1, 1, 0, 1, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, output_mode="multi_hot")
-    res = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=res)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_count_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "earth", "fire", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[0, 2, 0, 0, 2], [1, 1, 0, 1, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, output_mode="count")
-    res = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=res)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_sparse_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, output_mode="multi_hot", sparse=True)
-    res = layer(input_data)
-    self.assertTrue(res.__class__.__name__, "SparseKerasTensor")
-
-  def test_get_vocab_returns_str(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    expected_vocab = ["[UNK]", "earth", "wind", "and", "fire"]
-    layer = string_lookup.StringLookup(vocabulary=vocab_data)
-    layer_vocab = layer.get_vocabulary()
-    self.assertAllEqual(expected_vocab, layer_vocab)
-    self.assertIsInstance(layer_vocab[0], str)
-
-    inverse_layer = string_lookup.StringLookup(
-        vocabulary=layer.get_vocabulary(), invert=True)
-    layer_vocab = inverse_layer.get_vocabulary()
-    self.assertAllEqual(expected_vocab, layer_vocab)
-    self.assertIsInstance(layer_vocab[0], str)
-
-  def test_int_output_explicit_vocab_from_file(self):
-    vocab_list = ["earth", "wind", "and", "fire"]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
-
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(vocabulary=vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_int_output_explicit_vocab_from_file_via_setter(self):
-    vocab_list = ["earth", "wind", "and", "fire"]
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_list)
-
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, 0]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup()
-    layer.set_vocabulary(vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_non_unique_vocab_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire", "fire"]
-    with self.assertRaisesRegex(ValueError, ".*repeated term.*fire.*"):
-      _ = string_lookup.StringLookup(vocabulary=vocab_data)
-
-  def test_non_unique_vocab_from_file_fails(self):
-    vocab_list = ["earth", "wind", "and", "fire", "earth"]
-    vocab_path = self._write_to_temp_file("repeat_vocab_file", vocab_list)
-    with self.assertRaisesRegex(
-        errors_impl.FailedPreconditionError,
-        "HashTable has different value for same key.*earth"):
-      _ = string_lookup.StringLookup(vocabulary=vocab_path)
-
-  def test_inverse_layer(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]])
-    expected_output = np.array([["earth", "wind", "and", "fire"],
-                                ["fire", "and", "earth", ""]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, invert=True, mask_token="")
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_inverse_layer_from_file(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([[1, 2, 3, 4], [4, 3, 1, 0]])
-    expected_output = np.array([["earth", "wind", "and", "fire"],
-                                ["fire", "and", "earth", "[UNK]"]])
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_data)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = string_lookup.StringLookup(vocabulary=vocab_path, invert=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_inverse_layer_from_file_with_mask(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([[2, 3, 4, 5], [5, 4, 2, 0]])
-    expected_output = np.array([["earth", "wind", "and", "fire"],
-                                ["fire", "and", "earth", "[M]"]])
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_data)
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.int64)
-    layer = string_lookup.StringLookup(
-        vocabulary=vocab_path, invert=True, mask_token="[M]")
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_forward_backward_explicit_vocab(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = np.array([["earth", "wind", "and", "fire"],
-                                ["fire", "and", "earth", "[UNK]"]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup(vocabulary=vocab_data)
-    invert_layer = string_lookup.StringLookup(
-        vocabulary=vocab_data, invert=True)
-    int_data = layer(input_data)
-    out_data = invert_layer(int_data)
-    model = keras.Model(inputs=input_data, outputs=out_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_forward_backward_adapted_vocab(self):
-    adapt_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = np.array([["earth", "wind", "and", "fire"],
-                                ["fire", "and", "earth", "[UNK]"]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = string_lookup.StringLookup()
-    layer.adapt(adapt_data)
-    invert_layer = string_lookup.StringLookup(
-        vocabulary=layer.get_vocabulary(), invert=True)
-    int_data = layer(input_data)
-    out_data = invert_layer(int_data)
-    model = keras.Model(inputs=input_data, outputs=out_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_ragged_string_input_multi_bucket(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant([["earth", "wind", "fire"],
-                                               ["fire", "and", "earth",
-                                                "ohio"]])
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string, ragged=True)
-    layer = string_lookup.StringLookup(num_oov_indices=2)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_data = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_tensor_vocab(self):
-    vocab_data = ["[UNK]", "wind", "and", "fire"]
-    vocab_tensor = constant_op.constant(vocab_data)
-    layer = string_lookup.StringLookup(vocabulary=vocab_tensor)
-    returned_vocab = layer.get_vocabulary()
-    self.assertAllEqual(vocab_data, returned_vocab)
-    self.assertAllEqual(layer.vocabulary_size(), 4)
-    fn = def_function.function(lambda: layer.set_vocabulary(vocab_tensor))
-    with self.assertRaisesRegex(RuntimeError, "Cannot set a tensor vocabulary"):
-      fn()
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils.py b/tensorflow/python/keras/layers/preprocessing/table_utils.py
deleted file mode 100644
index 264e6a1..0000000
--- a/tensorflow/python/keras/layers/preprocessing/table_utils.py
+++ /dev/null
@@ -1,209 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities for working with tf.lookup tables in Keras."""
-
-import collections
-import os
-import numpy as np
-
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops.ragged import ragged_functional_ops
-from tensorflow.python.ops.ragged import ragged_tensor
-from tensorflow.python.ops.ragged import ragged_tensor_value
-from tensorflow.python.platform import gfile
-
-
-class TableHandler(object):
-  """Wrapper object that holds a lookup table and provides accessors."""
-
-  def __init__(self,
-               table,
-               oov_tokens=None,
-               mask_token=None,
-               mask_value=0):
-    self.table = table
-    self.mutable = isinstance(table, lookup_ops.MutableHashTable)
-    self.mask_token = mask_token
-    self.mask_value = mask_value
-
-    if oov_tokens is None:
-      self.oov_tokens = oov_tokens
-    else:
-      if not isinstance(oov_tokens, (list, tuple, np.ndarray)):
-        oov_tokens = [oov_tokens]
-      self.oov_tokens = math_ops.cast(oov_tokens, table._value_dtype)  # pylint: disable=protected-access
-
-  def table_size(self):
-    return self.table.size().numpy()
-
-  def clear(self):
-    if not self.mutable:
-      return RuntimeError("Unable to clear a statically-backed table.")
-
-    keys, _ = self.table.export()
-    self.table.remove(keys)
-
-  def insert(self, keys, values):
-    """Insert values into the backed table."""
-    if not self.mutable:
-      raise RuntimeError("Unable to insert into a statically-backed table.")
-
-    if len(values) != len(keys):
-      raise RuntimeError("Size mismatch between values and key arrays. "
-                         "Keys had size %s, values had size %s." %
-                         (len(keys), len(values)))
-    keys = ops.convert_to_tensor_v2_with_dispatch(
-        keys, dtype=self.table._key_dtype)  # pylint: disable=protected-access
-    values = ops.convert_to_tensor_v2_with_dispatch(
-        values, dtype=self.table._value_dtype)  # pylint: disable=protected-access
-    if values.shape.ndims != 1:
-      raise ValueError("`values` must be 1-dimensional, got an input with "
-                       " %s dimensions." % values.shape.ndims)
-    self.table.insert(keys, values)
-
-  def _replace_oov_buckets(self, inputs, lookups):
-    """Replace the default OOV value with one of the OOV bucket values."""
-    if self.oov_tokens is None:
-      return lookups
-
-    num_oov_elements = self.oov_tokens.shape.num_elements()
-    if inputs.dtype.is_integer:
-      oov_indices = math_ops.floormod(inputs, num_oov_elements)
-    else:
-      oov_indices = string_ops.string_to_hash_bucket_fast(
-          inputs, num_buckets=num_oov_elements)
-
-    oov_values = array_ops.gather(self.oov_tokens, oov_indices)
-    oov_locations = math_ops.equal(lookups, self.table._default_value)  # pylint: disable=protected-access
-
-    return array_ops.where(oov_locations, oov_values, lookups)
-
-  def _lookup_and_mask(self, inputs):
-    """Return a lookup with any location with the mask_token masked to 0."""
-    lookups = self.table.lookup(inputs)
-    # If we don't need to handle masking, return the lookup values directly.
-    if self.mask_token is None:
-      return lookups
-
-    # Inject 0s wherever the mask token was in the inputs.
-    mask_locations = math_ops.equal(inputs, self.mask_token)
-    return array_ops.where_v2(
-        mask_locations,
-        math_ops.cast(self.mask_value, self.table._value_dtype),  # pylint: disable=protected-access
-        lookups)  # pylint: disable=protected-access
-
-  def _ragged_lookup(self, inputs):
-    """Perform a table lookup on a ragged tensor."""
-    # The table lookup ops don't natively support ragged tensors, so if we have
-    # a RT we need to use map_flat_values to look up every element.
-    indexed_data = ragged_functional_ops.map_flat_values(
-        self._lookup_and_mask, inputs)
-    indexed_data = ragged_functional_ops.map_flat_values(
-        self._replace_oov_buckets, inputs, indexed_data)
-    # table.lookup is not shape-preserving, so we need to set the shape here.
-    indexed_data._set_shape(inputs.shape)  # pylint: disable=protected-access
-    # Composite tensors can pass tensor values through, which will cause
-    # errors if all operations in the TF graph do so. We can break this chain
-    # with an identity here.
-    return array_ops.identity(indexed_data)
-
-  def _sparse_lookup(self, inputs):
-    """Perform a table lookup on a sparse tensor."""
-    values = self._lookup_and_mask(inputs.values)
-    values = self._replace_oov_buckets(inputs.values, values)
-    indexed_data = sparse_tensor.SparseTensor(inputs.indices, values,
-                                              inputs.dense_shape)
-    # Composite tensors can pass tensor values through, which will cause
-    # errors if all operations in the TF graph do so. We can break this chain
-    # with an identity here.
-    return array_ops.identity(indexed_data)
-
-  def _tensor_lookup(self, inputs):
-    """Perform a table lookup on a tf.tensor."""
-    values = self._lookup_and_mask(inputs)
-    indexed_data = self._replace_oov_buckets(inputs, values)
-    # (b/149446477): output does not preserve input shape.
-    indexed_data.set_shape(inputs.shape)
-    return indexed_data
-
-  def lookup(self, inputs):
-    """Perform a table lookup."""
-    # Sparse tensors don't play nicely with tensor conversion, so we handle
-    # them before attempting to convert lists or arrays to tensors.
-    if isinstance(
-        inputs, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
-      return self._sparse_lookup(inputs)
-
-    if tf_utils.is_ragged(inputs):
-      if isinstance(inputs, ragged_tensor_value.RaggedTensorValue):
-        flat_values = ops.convert_to_tensor_v2_with_dispatch(
-            value=inputs.flat_values, name="flat_values")
-        inputs = ragged_tensor.RaggedTensor.from_nested_row_splits(
-            flat_values, inputs.nested_row_splits, validate=False)
-      return self._ragged_lookup(inputs)
-
-    # For normal tensor inputs
-    inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)
-    return self._tensor_lookup(inputs)
-
-
-def num_tokens_in_file(vocabulary_path):
-  """Count the number of lines in a vocab file to get the number of tokens."""
-  num_tokens = 0
-  with gfile.GFile(vocabulary_path, "r") as reader:
-    text = reader.readline()
-    while text:
-      num_tokens += 1
-      text = reader.readline()
-
-  return num_tokens
-
-
-def get_vocabulary_from_file(vocabulary_path, encoding="utf-8"):
-  """Read a vocabulary in from a file."""
-  vocab = []
-  with gfile.GFile(vocabulary_path, "r") as reader:
-    while True:
-      # Get the next line (incl. \n), and break if nothing is left to read.
-      text = reader.readline()
-      if not text:
-        break
-
-      # Convert the raw text and strip whitespace.
-      if isinstance(text, str):
-        token = text
-      elif isinstance(text, bytes):
-        token = text.decode(encoding, "ignore")
-      token = token.rstrip(os.linesep)
-      vocab.append(token)
-  return vocab
-
-
-def find_repeated_tokens(vocabulary):
-  """Return all repeated tokens in a vocabulary."""
-  vocabulary_set = set(vocabulary)
-  if len(vocabulary) != len(vocabulary_set):
-    return [
-        item for item, count in collections.Counter(vocabulary).items()
-        if count > 1
-    ]
-  else:
-    return []
diff --git a/tensorflow/python/keras/layers/preprocessing/table_utils_test.py b/tensorflow/python/keras/layers/preprocessing/table_utils_test.py
deleted file mode 100644
index 4a46ce0..0000000
--- a/tensorflow/python/keras/layers/preprocessing/table_utils_test.py
+++ /dev/null
@@ -1,439 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras lookup table utils."""
-
-import os
-import tempfile
-
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.layers.preprocessing import table_utils
-from tensorflow.python.ops import lookup_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-
-
-def get_table(dtype=dtypes.string, oov_tokens=None):
-  table = lookup_ops.MutableHashTable(
-      key_dtype=dtype,
-      value_dtype=dtypes.int64,
-      default_value=-7,
-      name="index_table")
-  return table_utils.TableHandler(table, oov_tokens)
-
-
-def get_static_table(tmpdir,
-                     vocab_list,
-                     mask_token=None,
-                     dtype=dtypes.string,
-                     oov_tokens=None):
-  vocabulary_file = os.path.join(tmpdir, "tmp_vocab.txt")
-
-  if dtype == dtypes.string:
-    with open(vocabulary_file, "w") as f:
-      f.write("\n".join(vocab_list) + "\n")
-  else:
-    with open(vocabulary_file, "w") as f:
-      f.write("\n".join([str(v) for v in vocab_list]) + "\n")
-
-  offset = ((0 if mask_token is None else 1) +
-            (len(oov_tokens) if oov_tokens is not None else 0))
-  init = lookup_ops.TextFileInitializer(
-      vocabulary_file,
-      dtype,
-      lookup_ops.TextFileIndex.WHOLE_LINE,
-      dtypes.int64,
-      lookup_ops.TextFileIndex.LINE_NUMBER,
-      value_index_offset=offset)
-  table = lookup_ops.StaticHashTable(init, default_value=-7)
-  return table_utils.TableHandler(
-      table,
-      oov_tokens,
-      mask_token=mask_token)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingInputTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=["fire", "michigan"],
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    table = get_table(oov_tokens=[1])
-    table.insert(vocab_data, range(2, len(vocab_data) + 2))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_sparse_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 32], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    table = get_table(dtype=dtypes.int64, oov_tokens=[1])
-    table.insert(vocab_data, range(2, len(vocab_data) + 2))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant(
-        [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    table = get_table(oov_tokens=[1])
-    table.insert(vocab_data, range(2, len(vocab_data) + 2))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_ragged_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
-                                              dtype=np.int64)
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    table = get_table(dtype=dtypes.int64, oov_tokens=[1])
-    table.insert(vocab_data, range(2, len(vocab_data) + 2))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_tensor_multi_dim_values_fails(self):
-    key_data = np.array([0, 1], dtype=np.int64)
-    value_data = np.array([[11, 12], [21, 22]])
-
-    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
-
-    with self.assertRaisesRegex(ValueError, "must be 1-dimensional"):
-      table.insert(key_data, value_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingMultiOOVTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_string_input_multi_bucket(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]], values=["fire", "ohio"], dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [6, 2]
-    expected_dense_shape = [3, 4]
-
-    table = get_table(oov_tokens=[1, 2])
-    table.insert(vocab_data, range(3, len(vocab_data) + 3))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_sparse_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 132], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [6, 1]
-    expected_dense_shape = [3, 4]
-
-    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
-    table.insert(vocab_data, range(3, len(vocab_data) + 3))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_string_input_multi_bucket(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant([["earth", "wind", "fire"],
-                                               ["fire", "and", "earth",
-                                                "ohio"]])
-    expected_output = [[3, 4, 6], [6, 5, 3, 2]]
-
-    table = get_table(oov_tokens=[1, 2])
-    table.insert(vocab_data, range(3, len(vocab_data) + 3))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_ragged_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 132]],
-                                              dtype=np.int64)
-    expected_output = [[3, 4, 6], [6, 5, 3, 1]]
-
-    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
-    table.insert(vocab_data, range(3, len(vocab_data) + 3))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_tensor_int_input_multi_bucket(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = np.array([[13, 132], [13, 133]], dtype=np.int64)
-    expected_values = [[6, 1], [6, 2]]
-
-    table = get_table(dtype=dtypes.int64, oov_tokens=[1, 2])
-    table.insert(vocab_data, range(3, len(vocab_data) + 3))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_values, output_data)
-
-  def test_tensor_string_input_multi_bucket(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = [["earth", "wind", "fire", "michigan"],
-                   ["fire", "and", "earth", "ohio"]]
-    expected_output = [[3, 4, 6, 1], [6, 5, 3, 2]]
-
-    table = get_table(oov_tokens=[1, 2])
-    table.insert(vocab_data, range(3, len(vocab_data) + 3))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class IndexLookupOutputTest(keras_parameterized.TestCase,
-                            preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_int_output_default_lookup_value(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, -7]]
-
-    table = get_table(oov_tokens=None)
-    table.insert(vocab_data, range(1, len(vocab_data) + 1))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_output_shape(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-
-    table = get_table()
-    table.insert(vocab_data, range(1, len(vocab_data) + 1))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(input_array.shape[1:], output_data.shape[1:])
-
-  def test_int_output_no_reserved_zero_default_lookup_value(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[0, 1, 2, 3], [3, 2, 0, -7]]
-
-    table = get_table(oov_tokens=None)
-    table.insert(vocab_data, range(len(vocab_data)))
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class StaticIndexLookupOutputTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_int_output_default_lookup_value(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[1, 2, 3, 4], [4, 3, 1, -7]]
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(),
-        vocab_list=vocab_data,
-        mask_token="",
-        oov_tokens=None)
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_output_shape(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(), vocab_list=vocab_data, oov_tokens=None)
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(input_array.shape[1:], output_data.shape[1:])
-
-  def test_int_output_no_reserved_zero_default_lookup_value(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[0, 1, 2, 3], [3, 2, 0, -7]]
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(), vocab_list=vocab_data, oov_tokens=None)
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class CategoricalEncodingStaticInputTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_sparse_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=["fire", "michigan"],
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(),
-        vocab_list=vocab_data,
-        mask_token="",
-        oov_tokens=[1])
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_sparse_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = sparse_tensor.SparseTensor(
-        indices=[[0, 0], [1, 2]],
-        values=np.array([13, 32], dtype=np.int64),
-        dense_shape=[3, 4])
-
-    expected_indices = [[0, 0], [1, 2]]
-    expected_values = [5, 1]
-    expected_dense_shape = [3, 4]
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(),
-        vocab_list=vocab_data,
-        dtype=dtypes.int64,
-        mask_token=0,
-        oov_tokens=[1])
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_indices, output_data.indices)
-    self.assertAllEqual(expected_values, output_data.values)
-    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
-
-  def test_ragged_string_input(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = ragged_factory_ops.constant(
-        [["earth", "wind", "fire"], ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(),
-        vocab_list=vocab_data,
-        mask_token="",
-        oov_tokens=[1])
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-  def test_ragged_int_input(self):
-    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
-    input_array = ragged_factory_ops.constant([[10, 11, 13], [13, 12, 10, 42]],
-                                              dtype=np.int64)
-    expected_output = [[2, 3, 5], [5, 4, 2, 1]]
-
-    table = get_static_table(
-        tmpdir=self.get_temp_dir(),
-        vocab_list=vocab_data,
-        dtype=dtypes.int64,
-        mask_token=0,
-        oov_tokens=[1])
-    output_data = table.lookup(input_array)
-
-    self.assertAllEqual(expected_output, output_data)
-
-
-class GetVocabularyFromFileTest(test.TestCase):
-
-  def setUp(self):
-    super(GetVocabularyFromFileTest, self).setUp()
-    dir_path = tempfile.mkdtemp(prefix=test.get_temp_dir())
-    self._vocab_path = os.path.join(dir_path, "vocab")
-
-  def test_only_line_separator_is_stripped(self):
-    expected = ["foo", " foo", "foo ", " foo "]
-    with gfile.GFile(self._vocab_path, "w") as writer:
-      for word in expected:
-        writer.write(word)
-        writer.write(os.linesep)
-
-    actual = actual = table_utils.get_vocabulary_from_file(self._vocab_path)
-    self.assertAllEqual(expected, actual)
-
-  def test_linux_file(self):
-    content = b"line1\nline2\nline3"
-    with gfile.GFile(self._vocab_path, "wb") as writer:
-      writer.write(content)
-
-    actual = table_utils.get_vocabulary_from_file(self._vocab_path)
-    self.assertAllEqual(["line1", "line2", "line3"], actual)
-
-  def test_windows_file(self):
-    content = b"line1\r\nline2\r\nline3"
-    with gfile.GFile(self._vocab_path, "wb") as writer:
-      writer.write(content)
-
-    actual = table_utils.get_vocabulary_from_file(self._vocab_path)
-    self.assertAllEqual(["line1", "line2", "line3"], actual)
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
deleted file mode 100644
index 4aac6c2..0000000
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization.py
+++ /dev/null
@@ -1,572 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras text vectorization preprocessing layer."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.engine import base_preprocessing_layer
-from tensorflow.python.keras.layers.preprocessing import index_lookup
-from tensorflow.python.keras.layers.preprocessing import string_lookup
-from tensorflow.python.keras.utils import layer_utils
-from tensorflow.python.keras.utils import tf_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_string_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.ops.ragged import ragged_functional_ops
-from tensorflow.python.ops.ragged import ragged_string_ops
-from tensorflow.python.util.tf_export import keras_export
-
-LOWER_AND_STRIP_PUNCTUATION = "lower_and_strip_punctuation"
-
-SPLIT_ON_WHITESPACE = "whitespace"
-
-TF_IDF = index_lookup.TF_IDF
-INT = index_lookup.INT
-MULTI_HOT = index_lookup.MULTI_HOT
-COUNT = index_lookup.COUNT
-
-# This is an explicit regex of all the tokens that will be stripped if
-# LOWER_AND_STRIP_PUNCTUATION is set. If an application requires other
-# stripping, a Callable should be passed into the 'standardize' arg.
-DEFAULT_STRIP_REGEX = r'[!"#$%&()\*\+,-\./:;<=>?@\[\\\]^_`{|}~\']'
-
-# The string tokens in the extracted vocabulary
-_VOCAB_NAME = "vocab"
-# The inverse-document-frequency weights
-_IDF_NAME = "idf"
-# The IDF data for the OOV token
-_OOV_IDF_NAME = "oov_idf"
-
-# The string tokens in the full vocabulary
-_ACCUMULATOR_VOCAB_NAME = "vocab"
-# The total counts of each token in the vocabulary
-_ACCUMULATOR_COUNTS_NAME = "counts"
-# The number of documents / examples that each token appears in.
-_ACCUMULATOR_DOCUMENT_COUNTS = "document_counts"
-# The total number of documents / examples in the dataset.
-_ACCUMULATOR_NUM_DOCUMENTS = "num_documents"
-
-
-@keras_export(
-    "keras.layers.experimental.preprocessing.TextVectorization", v1=[])
-class TextVectorization(base_preprocessing_layer.CombinerPreprocessingLayer):
-  """Text vectorization layer.
-
-  This layer has basic options for managing text in a Keras model. It
-  transforms a batch of strings (one example = one string) into either a list of
-  token indices (one example = 1D tensor of integer token indices) or a dense
-  representation (one example = 1D tensor of float values representing data
-  about the example's tokens).
-
-  If desired, the user can call this layer's adapt() method on a dataset.
-  When this layer is adapted, it will analyze the dataset, determine the
-  frequency of individual string values, and create a 'vocabulary' from them.
-  This vocabulary can have unlimited size or be capped, depending on the
-  configuration options for this layer; if there are more unique values in the
-  input than the maximum vocabulary size, the most frequent terms will be used
-  to create the vocabulary.
-
-  The processing of each example contains the following steps:
-
-    1. standardize each example (usually lowercasing + punctuation stripping)
-    2. split each example into substrings (usually words)
-    3. recombine substrings into tokens (usually ngrams)
-    4. index tokens (associate a unique int value with each token)
-    5. transform each example using this index, either into a vector of ints or
-       a dense float vector.
-
-  Some notes on passing Callables to customize splitting and normalization for
-  this layer:
-
-    1. Any callable can be passed to this Layer, but if you want to serialize
-       this object you should only pass functions that are registered Keras
-       serializables (see `tf.keras.utils.register_keras_serializable` for more
-       details).
-    2. When using a custom callable for `standardize`, the data received
-       by the callable will be exactly as passed to this layer. The callable
-       should return a tensor of the same shape as the input.
-    3. When using a custom callable for `split`, the data received by the
-       callable will have the 1st dimension squeezed out - instead of
-       `[["string to split"], ["another string to split"]]`, the Callable will
-       see `["string to split", "another string to split"]`. The callable should
-       return a Tensor with the first dimension containing the split tokens -
-       in this example, we should see something like `[["string", "to",
-       "split"], ["another", "string", "to", "split"]]`. This makes the callable
-       site natively compatible with `tf.strings.split()`.
-
-  Args:
-    max_tokens: The maximum size of the vocabulary for this layer. If None,
-      there is no cap on the size of the vocabulary. Note that this vocabulary
-      contains 1 OOV token, so the effective number of tokens is `(max_tokens -
-      1 - (1 if output == `"int"` else 0))`.
-    standardize: Optional specification for standardization to apply to the
-      input text. Values can be None (no standardization),
-      `"lower_and_strip_punctuation"` (lowercase and remove punctuation) or a
-      Callable. Default is `"lower_and_strip_punctuation"`.
-    split: Optional specification for splitting the input text. Values can be
-      None (no splitting), `"whitespace"` (split on ASCII whitespace), or a
-      Callable. The default is `"whitespace"`.
-    ngrams: Optional specification for ngrams to create from the possibly-split
-      input text. Values can be None, an integer or tuple of integers; passing
-      an integer will create ngrams up to that integer, and passing a tuple of
-      integers will create ngrams for the specified values in the tuple. Passing
-      None means that no ngrams will be created.
-    output_mode: Optional specification for the output of the layer. Values can
-      be `"int"`, `"multi_hot"`, `"count"` or `"tf_idf"`, configuring the layer
-      as follows:
-        - `"int"`: Outputs integer indices, one integer index per split string
-          token. When output == `"int"`, 0 is reserved for masked locations;
-          this reduces the vocab size to max_tokens-2 instead of max_tokens-1
-        - `"multi_hot"`: Outputs a single int array per batch, of either
-          vocab_size or max_tokens size, containing 1s in all elements where the
-          token mapped to that index exists at least once in the batch item.
-        - `"count"`: As `"multi_hot"`, but the int array contains a count of the
-          number of times the token at that index appeared in the batch item.
-        - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to
-          find the value in each token slot.
-    output_sequence_length: Only valid in INT mode. If set, the output will have
-      its time dimension padded or truncated to exactly `output_sequence_length`
-      values, resulting in a tensor of shape [batch_size,
-      output_sequence_length] regardless of how many tokens resulted from the
-      splitting step. Defaults to None.
-    pad_to_max_tokens: Only valid in  `"multi_hot"`, `"count"`, and `"tf_idf"`
-      modes. If True, the output will have its feature axis padded to
-      `max_tokens` even if the number of unique tokens in the vocabulary is less
-      than max_tokens, resulting in a tensor of shape [batch_size, max_tokens]
-      regardless of vocabulary size. Defaults to False.
-    vocabulary: An optional list of vocabulary terms, or a path to a text file
-      containing a vocabulary to load into this layer. The file should contain
-      one token per line. If the list or file contains the same token multiple
-      times, an error will be thrown.
-
-  Example:
-
-  This example instantiates a TextVectorization layer that lowercases text,
-  splits on whitespace, strips punctuation, and outputs integer vocab indices.
-
-  >>> text_dataset = tf.data.Dataset.from_tensor_slices(["foo", "bar", "baz"])
-  >>> max_features = 5000  # Maximum vocab size.
-  >>> max_len = 4  # Sequence length to pad the outputs to.
-  >>>
-  >>> # Create the layer.
-  >>> vectorize_layer = TextVectorization(
-  ...  max_tokens=max_features,
-  ...  output_mode='int',
-  ...  output_sequence_length=max_len)
-  >>>
-  >>> # Now that the vocab layer has been created, call `adapt` on the text-only
-  >>> # dataset to create the vocabulary. You don't have to batch, but for large
-  >>> # datasets this means we're not keeping spare copies of the dataset.
-  >>> vectorize_layer.adapt(text_dataset.batch(64))
-  >>>
-  >>> # Create the model that uses the vectorize text layer
-  >>> model = tf.keras.models.Sequential()
-  >>>
-  >>> # Start by creating an explicit input layer. It needs to have a shape of
-  >>> # (1,) (because we need to guarantee that there is exactly one string
-  >>> # input per batch), and the dtype needs to be 'string'.
-  >>> model.add(tf.keras.Input(shape=(1,), dtype=tf.string))
-  >>>
-  >>> # The first layer in our model is the vectorization layer. After this
-  >>> # layer, we have a tensor of shape (batch_size, max_len) containing vocab
-  >>> # indices.
-  >>> model.add(vectorize_layer)
-  >>>
-  >>> # Now, the model can map strings to integers, and you can add an embedding
-  >>> # layer to map these integers to learned embeddings.
-  >>> input_data = [["foo qux bar"], ["qux baz"]]
-  >>> model.predict(input_data)
-  array([[2, 1, 4, 0],
-         [1, 3, 0, 0]])
-
-  Example:
-
-  This example instantiates a TextVectorization layer by passing a list
-  of vocabulary terms to the layer's __init__ method.
-
-  >>> vocab_data = ["earth", "wind", "and", "fire"]
-  >>> max_len = 4  # Sequence length to pad the outputs to.
-  >>>
-  >>> # Create the layer, passing the vocab directly. You can also pass the
-  >>> # vocabulary arg a path to a file containing one vocabulary word per
-  >>> # line.
-  >>> vectorize_layer = TextVectorization(
-  ...  max_tokens=max_features,
-  ...  output_mode='int',
-  ...  output_sequence_length=max_len,
-  ...  vocabulary=vocab_data)
-  >>>
-  >>> # Because we've passed the vocabulary directly, we don't need to adapt
-  >>> # the layer - the vocabulary is already set. The vocabulary contains the
-  >>> # padding token ('') and OOV token ('[UNK]') as well as the passed tokens.
-  >>> vectorize_layer.get_vocabulary()
-  ['', '[UNK]', 'earth', 'wind', 'and', 'fire']
-
-  """
-  # TODO(momernick): Add an examples section to the docstring.
-
-  def __init__(self,
-               max_tokens=None,
-               standardize=LOWER_AND_STRIP_PUNCTUATION,
-               split=SPLIT_ON_WHITESPACE,
-               ngrams=None,
-               output_mode=INT,
-               output_sequence_length=None,
-               pad_to_max_tokens=False,
-               vocabulary=None,
-               **kwargs):
-
-    # This layer only applies to string processing, and so should only have
-    # a dtype of 'string'.
-    if "dtype" in kwargs and kwargs["dtype"] != dtypes.string:
-      raise ValueError("TextVectorization may only have a dtype of string.")
-    elif "dtype" not in kwargs:
-      kwargs["dtype"] = dtypes.string
-
-    # 'standardize' must be one of (None, LOWER_AND_STRIP_PUNCTUATION, callable)
-    layer_utils.validate_string_arg(
-        standardize,
-        allowable_strings=(LOWER_AND_STRIP_PUNCTUATION),
-        layer_name="TextVectorization",
-        arg_name="standardize",
-        allow_none=True,
-        allow_callables=True)
-
-    # 'split' must be one of (None, SPLIT_ON_WHITESPACE, callable)
-    layer_utils.validate_string_arg(
-        split,
-        allowable_strings=(SPLIT_ON_WHITESPACE),
-        layer_name="TextVectorization",
-        arg_name="split",
-        allow_none=True,
-        allow_callables=True)
-
-    # Support deprecated names for output_modes.
-    if output_mode == "binary":
-      output_mode = MULTI_HOT
-    if output_mode == "tf-idf":
-      output_mode = TF_IDF
-    # 'output_mode' must be one of (None, INT, COUNT, MULTI_HOT, TF_IDF)
-    layer_utils.validate_string_arg(
-        output_mode,
-        allowable_strings=(INT, COUNT, MULTI_HOT, TF_IDF),
-        layer_name="TextVectorization",
-        arg_name="output_mode",
-        allow_none=True)
-
-    # 'ngrams' must be one of (None, int, tuple(int))
-    if not (ngrams is None or
-            isinstance(ngrams, int) or
-            isinstance(ngrams, tuple) and
-            all(isinstance(item, int) for item in ngrams)):
-      raise ValueError(("`ngrams` must be None, an integer, or a tuple of "
-                        "integers. Got %s") % (ngrams,))
-
-    # 'output_sequence_length' must be one of (None, int) and is only
-    # set if output_mode is INT.
-    if (output_mode == INT and not (isinstance(output_sequence_length, int) or
-                                    (output_sequence_length is None))):
-      raise ValueError("`output_sequence_length` must be either None or an "
-                       "integer when `output_mode` is 'int'. "
-                       "Got %s" % output_sequence_length)
-
-    if output_mode != INT and output_sequence_length is not None:
-      raise ValueError("`output_sequence_length` must not be set if "
-                       "`output_mode` is not 'int'.")
-
-    self._max_tokens = max_tokens
-    self._standardize = standardize
-    self._split = split
-    self._ngrams_arg = ngrams
-    if isinstance(ngrams, int):
-      self._ngrams = tuple(range(1, ngrams + 1))
-    else:
-      self._ngrams = ngrams
-
-    self._output_mode = output_mode
-    self._output_sequence_length = output_sequence_length
-    vocabulary_size = 0
-    # IndexLookup needs to keep track the current vocab size outside of its
-    # layer weights. We persist it as a hidden part of the config during
-    # serialization.
-    if "vocabulary_size" in kwargs:
-      vocabulary_size = kwargs["vocabulary_size"]
-      del kwargs["vocabulary_size"]
-
-    super(TextVectorization, self).__init__(
-        combiner=None,
-        **kwargs)
-
-    self._index_lookup_layer = string_lookup.StringLookup(
-        max_tokens=max_tokens,
-        vocabulary=vocabulary,
-        pad_to_max_tokens=pad_to_max_tokens,
-        mask_token="",
-        output_mode=output_mode if output_mode is not None else INT,
-        vocabulary_size=vocabulary_size)
-
-  def _assert_same_type(self, expected_type, values, value_name):
-    if dtypes.as_dtype(expected_type) != dtypes.as_dtype(values.dtype):
-      raise RuntimeError("Expected %s type %s, got %s" %
-                         (value_name, expected_type, values.dtype))
-
-  def compute_output_shape(self, input_shape):
-    if self._output_mode != INT:
-      return tensor_shape.TensorShape([input_shape[0], self._max_tokens])
-
-    if self._output_mode == INT and self._split is None:
-      if len(input_shape) <= 1:
-        input_shape = tuple(input_shape) + (1,)
-      return tensor_shape.TensorShape(input_shape)
-
-    if self._output_mode == INT and self._split is not None:
-      input_shape = list(input_shape)
-      if len(input_shape) <= 1:
-        input_shape = input_shape + [self._output_sequence_length]
-      else:
-        input_shape[1] = self._output_sequence_length
-      return tensor_shape.TensorShape(input_shape)
-
-  def compute_output_signature(self, input_spec):
-    output_shape = self.compute_output_shape(input_spec.shape.as_list())
-    output_dtype = (dtypes.int64 if self._output_mode == INT
-                    else backend.floatx())
-    return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
-
-  def adapt(self, data, reset_state=True):
-    """Fits the state of the preprocessing layer to the dataset.
-
-    Overrides the default adapt method to apply relevant preprocessing to the
-    inputs before passing to the combiner.
-
-    Args:
-      data: The data to train on. It can be passed either as a tf.data Dataset,
-        as a NumPy array, a string tensor, or as a list of texts.
-      reset_state: Optional argument specifying whether to clear the state of
-        the layer at the start of the call to `adapt`. This must be True for
-        this layer, which does not support repeated calls to `adapt`.
-    """
-    if not reset_state:
-      raise ValueError("TextVectorization does not support streaming adapts.")
-
-    # Build the layer explicitly with the original data shape instead of relying
-    # on an implicit call to `build` in the base layer's `adapt`, since
-    # preprocessing changes the input shape.
-    if isinstance(data, (list, tuple, np.ndarray)):
-      data = ops.convert_to_tensor_v2_with_dispatch(data)
-
-    if isinstance(data, ops.Tensor):
-      if data.shape.rank == 1:
-        data = array_ops.expand_dims(data, axis=-1)
-      self.build(data.shape)
-      preprocessed_inputs = self._preprocess(data)
-    elif isinstance(data, dataset_ops.DatasetV2):
-      # TODO(momernick): Replace this with a more V2-friendly API.
-      shape = dataset_ops.get_legacy_output_shapes(data)
-      if not isinstance(shape, tensor_shape.TensorShape):
-        raise ValueError("The dataset passed to 'adapt' must contain a single "
-                         "tensor value.")
-      if shape.rank == 0:
-        data = data.map(lambda tensor: array_ops.expand_dims(tensor, 0))
-        shape = dataset_ops.get_legacy_output_shapes(data)
-      if shape.rank == 1:
-        data = data.map(lambda tensor: array_ops.expand_dims(tensor, -1))
-      self.build(dataset_ops.get_legacy_output_shapes(data))
-      preprocessed_inputs = data.map(self._preprocess)
-    else:
-      raise ValueError(
-          "adapt() requires a Dataset or an array as input, got {}".format(
-              type(data)))
-
-    self._index_lookup_layer.adapt(preprocessed_inputs)
-
-  def get_vocabulary(self, include_special_tokens=True):
-    """Returns the current vocabulary of the layer.
-
-    Args:
-      include_special_tokens: If True, the returned vocabulary will include
-        the padding and OOV tokens, and a term's index in the vocabulary will
-        equal the term's index when calling the layer. If False, the returned
-        vocabulary will not include any padding or OOV tokens.
-    """
-    return self._index_lookup_layer.get_vocabulary(include_special_tokens)
-
-  def vocabulary_size(self):
-    """Gets the current size of the layer's vocabulary.
-
-    Returns:
-      The integer size of the voculary, including optional mask and oov indices.
-    """
-    return self._index_lookup_layer.vocabulary_size()
-
-  def get_config(self):
-    # This does not include the 'vocabulary' arg, since if the vocab was passed
-    # at init time it's now stored in variable state - we don't need to
-    # pull it off disk again.
-    config = {
-        "max_tokens": self._index_lookup_layer.max_tokens,
-        "standardize": self._standardize,
-        "split": self._split,
-        "ngrams": self._ngrams_arg,
-        "output_mode": self._output_mode,
-        "output_sequence_length": self._output_sequence_length,
-        "pad_to_max_tokens": self._index_lookup_layer.pad_to_max_tokens,
-        "vocabulary_size": self._index_lookup_layer.vocabulary_size(),
-    }
-    base_config = super(TextVectorization, self).get_config()
-    return dict(list(base_config.items()) + list(config.items()))
-
-  def count_params(self):
-    # This method counts the number of scalars in the weights of this layer.
-    # Since this layer doesn't have any /actual/ weights (in that there's
-    # nothing in this layer that can be trained - we only use the weight
-    # abstraction for ease of saving!) we return 0.
-    return 0
-
-  def set_vocabulary(self, vocabulary, idf_weights=None):
-    """Sets vocabulary (and optionally document frequency) data for this layer.
-
-    This method sets the vocabulary and idf weights for this layer directly,
-    instead of analyzing a dataset through 'adapt'. It should be used whenever
-    the vocab (and optionally document frequency) information is already known.
-    If vocabulary data is already present in the layer, this method will replace
-    it.
-
-    Args:
-      vocabulary: An array of string tokens, or a path to a file containing one
-        token per line.
-      idf_weights: An array of document frequency data with equal length to
-        vocab. Only necessary if the layer output_mode is TF_IDF.
-
-    Raises:
-      ValueError: If there are too many inputs, the inputs do not match, or
-        input data is missing.
-      RuntimeError: If the vocabulary cannot be set when this function is
-        called. This happens when `"multi_hot"`, `"count"`, and "tfidf" modes,
-        if `pad_to_max_tokens` is False and the layer itself has already been
-        called.
-    """
-    self._index_lookup_layer.set_vocabulary(vocabulary, idf_weights=idf_weights)
-
-  def build(self, input_shape):
-    # We have to use 'and not ==' here, because input_shape[1] !/== 1 can result
-    # in None for undefined shape axes. If using 'and !=', this causes the
-    # expression to evaluate to False instead of True if the shape is undefined;
-    # the expression needs to evaluate to True in that case.
-    if self._split is not None:
-      if input_shape.ndims > 1 and not input_shape[-1] == 1:  # pylint: disable=g-comparison-negation
-        raise RuntimeError(
-            "When using TextVectorization to tokenize strings, the innermost "
-            "dimension of the input array must be 1, got shape "
-            "{}".format(input_shape))
-
-    super(TextVectorization, self).build(input_shape)
-
-  def _set_state_variables(self, updates):
-    if not self.built:
-      raise RuntimeError("_set_state_variables() must be called after build().")
-    if self._output_mode == TF_IDF:
-      self.set_vocabulary(updates[_VOCAB_NAME], idf_weights=updates[_IDF_NAME])
-    else:
-      self.set_vocabulary(updates[_VOCAB_NAME])
-
-  def _preprocess(self, inputs):
-    if self._standardize == LOWER_AND_STRIP_PUNCTUATION:
-      if tf_utils.is_ragged(inputs):
-        lowercase_inputs = ragged_functional_ops.map_flat_values(
-            gen_string_ops.string_lower, inputs)
-        # Depending on configuration, we may never touch the non-data tensor
-        # in the ragged inputs tensor. If that is the case, and this is the
-        # only layer in the keras model, running it will throw an error.
-        # To get around this, we wrap the result in an identity.
-        lowercase_inputs = array_ops.identity(lowercase_inputs)
-      else:
-        lowercase_inputs = gen_string_ops.string_lower(inputs)
-      inputs = string_ops.regex_replace(lowercase_inputs, DEFAULT_STRIP_REGEX,
-                                        "")
-    elif callable(self._standardize):
-      inputs = self._standardize(inputs)
-    elif self._standardize is not None:
-      raise ValueError(("%s is not a supported standardization. "
-                        "TextVectorization supports the following options "
-                        "for `standardize`: None, "
-                        "'lower_and_strip_punctuation', or a "
-                        "Callable.") % self._standardize)
-
-    if self._split is not None:
-      # If we are splitting, we validate that the 1st axis is of dimension 1 and
-      # so can be squeezed out. We do this here instead of after splitting for
-      # performance reasons - it's more expensive to squeeze a ragged tensor.
-      if inputs.shape.ndims > 1:
-        inputs = array_ops.squeeze(inputs, axis=-1)
-      if self._split == SPLIT_ON_WHITESPACE:
-        # This treats multiple whitespaces as one whitespace, and strips leading
-        # and trailing whitespace.
-        inputs = ragged_string_ops.string_split_v2(inputs)
-      elif callable(self._split):
-        inputs = self._split(inputs)
-      else:
-        raise ValueError(
-            ("%s is not a supported splitting."
-             "TextVectorization supports the following options "
-             "for `split`: None, 'whitespace', or a Callable.") % self._split)
-
-    # Note that 'inputs' here can be either ragged or dense depending on the
-    # configuration choices for this Layer. The strings.ngrams op, however, does
-    # support both ragged and dense inputs.
-    if self._ngrams is not None:
-      inputs = ragged_string_ops.ngrams(
-          inputs, ngram_width=self._ngrams, separator=" ")
-
-    return inputs
-
-  def call(self, inputs):
-    if isinstance(inputs, (list, tuple, np.ndarray)):
-      inputs = ops.convert_to_tensor_v2_with_dispatch(inputs)
-
-    inputs = self._preprocess(inputs)
-
-    # If we're not doing any output processing, return right away.
-    if self._output_mode is None:
-      return inputs
-
-    lookup_data = self._index_lookup_layer(inputs)
-    if self._output_mode == INT:
-
-      # Maybe trim the output (NOOP if self._output_sequence_length is None).
-      output_tensor = lookup_data[..., :self._output_sequence_length]
-
-      output_shape = output_tensor.shape.as_list()
-      output_shape[-1] = self._output_sequence_length
-
-      # If it is a ragged tensor, convert it to dense with correct shape.
-      if tf_utils.is_ragged(output_tensor):
-        return output_tensor.to_tensor(default_value=0, shape=output_shape)
-
-      if self._output_sequence_length is None:
-        return output_tensor
-
-      padding, _ = array_ops.required_space_to_batch_paddings(
-          output_tensor.shape, output_shape)
-      return array_ops.pad(output_tensor, padding)
-
-    return lookup_data
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_distribution_test.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_distribution_test.py
deleted file mode 100644
index c71d3c5..0000000
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_distribution_test.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Distribution tests for keras.layers.preprocessing.text_vectorization."""
-
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import combinations as ds_combinations
-from tensorflow.python.distribute import multi_process_runner
-from tensorflow.python.framework import config
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_combinations as combinations
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.distribute import strategy_combinations
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.layers.preprocessing import text_vectorization
-from tensorflow.python.platform import test
-
-
-@ds_combinations.generate(
-    combinations.combine(
-        strategy=strategy_combinations.all_strategies +
-        strategy_combinations.multi_worker_mirrored_strategies,
-        mode=["eager"]))
-class TextVectorizationDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_distribution_strategy_output(self, strategy):
-    # TODO(b/180614455): remove this check when MLIR bridge is always enabled.
-    if backend.is_tpu_strategy(strategy):
-      self.skipTest("This test needs MLIR bridge on TPU.")
-
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_array).batch(
-        2, drop_remainder=True)
-
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = text_vectorization.TextVectorization(
-          max_tokens=None,
-          standardize=None,
-          split=None,
-          output_mode=text_vectorization.INT)
-      layer.set_vocabulary(vocab_data)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_distribution_strategy_output_with_adapt(self, strategy):
-    # TODO(b/180614455): remove this check when MLIR bridge is always enabled.
-    if backend.is_tpu_strategy(strategy):
-      self.skipTest("This test needs MLIR bridge on TPU.")
-    if test.is_built_with_rocm():
-      self.skipTest("MultiworkerMirroredGPU2x fails with ROCm")
-    vocab_data = [[
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ]]
-    vocab_dataset = dataset_ops.Dataset.from_tensors(vocab_data)
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    input_dataset = dataset_ops.Dataset.from_tensor_slices(input_array).batch(
-        2, drop_remainder=True)
-
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    config.set_soft_device_placement(True)
-
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = text_vectorization.TextVectorization(
-          max_tokens=None,
-          standardize=None,
-          split=None,
-          output_mode=text_vectorization.INT)
-      layer.adapt(vocab_dataset)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_dataset)
-    self.assertAllEqual(expected_output, output_dataset)
-
-if __name__ == "__main__":
-  v2_compat.enable_v2_behavior()
-  multi_process_runner.test_main()
diff --git a/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py b/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py
deleted file mode 100644
index e985cf0..0000000
--- a/tensorflow/python/keras/layers/preprocessing/text_vectorization_test.py
+++ /dev/null
@@ -1,1768 +0,0 @@
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Keras text vectorization preprocessing layer."""
-
-import gc
-import os
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python import tf2
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import one_device_strategy
-from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers import convolutional
-from tensorflow.python.keras.layers import core
-from tensorflow.python.keras.layers import embeddings
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
-from tensorflow.python.keras.layers.preprocessing import text_vectorization
-from tensorflow.python.keras.utils import generic_utils
-from tensorflow.python.ops import gen_string_ops
-from tensorflow.python.ops.ragged import ragged_factory_ops
-from tensorflow.python.ops.ragged import ragged_string_ops
-from tensorflow.python.platform import gfile
-from tensorflow.python.platform import test
-
-
-def _get_end_to_end_test_cases():
-  test_cases = (
-      {
-          "testcase_name":
-              "test_simple_tokens_int_mode",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-              "standardize": None,
-              "split": None,
-              "output_mode": text_vectorization.INT
-          },
-          "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
-      },
-      {
-          "testcase_name":
-              "test_simple_tokens_int_mode_hard_cap",
-          # Create an array where 'earth' is the most frequent term, followed by
-          # 'wind', then 'and', then 'fire'. This ensures that the vocab
-          # is sorting by frequency.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": 6,
-              "standardize": None,
-              "split": None,
-              "output_mode": text_vectorization.INT
-          },
-          "expected_output": [[2], [3], [4], [5], [5], [4], [2], [1]],
-      },
-      {
-          "testcase_name":
-              "test_special_tokens_int_mode",
-          # Mask tokens in the vocab data should be ingored, and mapped to 0 in
-          # from the input data.
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        [""], [""], [""], ["[UNK]"], ["[UNK]"], ["[UNK]"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], [""], ["wind"], ["[UNK]"], ["and"], [""],
-                        ["fire"], ["and"], ["[UNK]"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-              "standardize": None,
-              "split": None,
-              "output_mode": text_vectorization.INT
-          },
-          "expected_output": [[2], [0], [3], [1], [4], [0], [5], [4], [1], [1]],
-      },
-      {
-          "testcase_name":
-              "test_documents_int_mode",
-          "vocab_data":
-              np.array([["fire earth earth"], ["earth earth"], ["wind wind"],
-                        ["and wind and"]]),
-          "input_data":
-              np.array([["earth wind and"], ["fire fire"], ["and earth"],
-                        ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-              "standardize": None,
-              "split": text_vectorization.SPLIT_ON_WHITESPACE,
-              "output_mode": text_vectorization.INT
-          },
-          "expected_output": [[2, 3, 4], [5, 5, 0], [4, 2, 0], [1, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_documents_1d_input_int_mode",
-          "vocab_data":
-              np.array([
-                  "fire earth earth", "earth earth", "wind wind", "and wind and"
-              ]),
-          "input_data":
-              np.array([["earth wind and"], ["fire fire"], ["and earth"],
-                        ["michigan"]]),
-          "kwargs": {
-              "max_tokens": None,
-              "standardize": None,
-              "split": text_vectorization.SPLIT_ON_WHITESPACE,
-              "output_mode": text_vectorization.INT
-          },
-          "expected_output": [[2, 3, 4], [5, 5, 0], [4, 2, 0], [1, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_simple_tokens_binary_mode",
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "standardize": None,
-              "split": None,
-              "output_mode": text_vectorization.MULTI_HOT
-          },
-          "expected_output": [[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0],
-                              [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
-                              [0, 1, 0, 0, 0], [1, 0, 0, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_documents_binary_mode",
-          "vocab_data":
-              np.array([["fire earth earth"], ["earth earth"], ["wind wind"],
-                        ["and wind and"]]),
-          "input_data":
-              np.array([["earth wind"], ["and"], ["fire fire"],
-                        ["earth michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "standardize": None,
-              "split": text_vectorization.SPLIT_ON_WHITESPACE,
-              "output_mode": text_vectorization.MULTI_HOT
-          },
-          "expected_output": [[0, 1, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1],
-                              [1, 1, 0, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_simple_tokens_count_mode",
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "standardize": None,
-              "split": None,
-              "output_mode": text_vectorization.COUNT
-          },
-          "expected_output": [[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0],
-                              [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 1, 0],
-                              [0, 1, 0, 0, 0], [1, 0, 0, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_documents_count_mode",
-          "vocab_data":
-              np.array([["fire earth earth"], ["earth earth"], ["wind wind"],
-                        ["and wind and"]]),
-          "input_data":
-              np.array([["earth wind"], ["and"], ["fire fire"],
-                        ["earth michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "standardize": None,
-              "split": text_vectorization.SPLIT_ON_WHITESPACE,
-              "output_mode": text_vectorization.COUNT
-          },
-          "expected_output": [[0, 1, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 2],
-                              [1, 1, 0, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_tokens_idf_mode",
-          "vocab_data":
-              np.array([["fire"], ["earth"], ["earth"], ["earth"], ["earth"],
-                        ["wind"], ["wind"], ["wind"], ["and"], ["and"]]),
-          "input_data":
-              np.array([["earth"], ["wind"], ["and"], ["fire"], ["fire"],
-                        ["and"], ["earth"], ["michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "standardize": None,
-              "split": None,
-              "output_mode": text_vectorization.TF_IDF
-          },
-          "expected_output": [[0, 1.098612, 0, 0, 0], [0, 0, 1.252763, 0, 0],
-                              [0, 0, 0, 1.466337, 0], [0, 0, 0, 0, 1.7917595],
-                              [0, 0, 0, 0, 1.7917595], [0, 0, 0, 1.4663371, 0],
-                              [0, 1.098612, 0, 0, 0], [1.402368, 0, 0, 0, 0]],
-      },
-      {
-          "testcase_name":
-              "test_documents_idf_mode",
-          "vocab_data":
-              np.array([["fire earth earth"], ["earth earth"], ["wind wind"],
-                        ["and wind and"]]),
-          "input_data":
-              np.array([["earth wind"], ["and"], ["fire fire"],
-                        ["earth michigan"]]),
-          "kwargs": {
-              "max_tokens": 5,
-              "standardize": None,
-              "split": text_vectorization.SPLIT_ON_WHITESPACE,
-              "output_mode": text_vectorization.TF_IDF
-          },
-          "expected_output": [[0., 0.847298, 0.847298, 0., 0.],
-                              [0., 0., 0., 1.098612, 0.],
-                              [0., 0., 0., 0., 2.197225],
-                              [0.972955, 0.847298, 0., 0., 0.]],
-      },
-  )
-
-  crossed_test_cases = []
-  # Cross above test cases with use_dataset in (True, False)
-  for use_dataset in (True, False):
-    for case in test_cases:
-      case = case.copy()
-      if use_dataset:
-        case["testcase_name"] = case["testcase_name"] + "_with_dataset"
-      case["use_dataset"] = use_dataset
-      crossed_test_cases.append(case)
-
-  return crossed_test_cases
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationLayerTest(keras_parameterized.TestCase,
-                                 preprocessing_test_utils.PreprocessingLayerTest
-                                ):
-
-  @parameterized.named_parameters(*_get_end_to_end_test_cases())
-  def test_layer_end_to_end_with_adapt(self, vocab_data, input_data, kwargs,
-                                       use_dataset, expected_output):
-    cls = text_vectorization.TextVectorization
-    if kwargs.get("output_mode") == text_vectorization.INT:
-      expected_output_dtype = dtypes.int64
-    else:
-      expected_output_dtype = dtypes.float32
-    input_shape = input_data.shape
-
-    if use_dataset:
-      # Keras APIs expect batched datasets.
-      # TODO(rachelim): `model.predict` predicts the result on each
-      # dataset batch separately, then tries to concatenate the results
-      # together. When the results have different shapes on the non-concat
-      # axis (which can happen in the output_mode = INT case for
-      # TextVectorization), the concatenation fails. In real use cases, this may
-      # not be an issue because users are likely to pipe the preprocessing layer
-      # into other keras layers instead of predicting it directly. A workaround
-      # for these unit tests is to have the dataset only contain one batch, so
-      # no concatenation needs to happen with the result. For consistency with
-      # numpy input, we should make `predict` join differently shaped results
-      # together sensibly, with 0 padding.
-      input_data = dataset_ops.Dataset.from_tensor_slices(input_data).batch(
-          input_shape[0])
-      vocab_data = dataset_ops.Dataset.from_tensor_slices(vocab_data).batch(
-          input_shape[0])
-
-    output_data = testing_utils.layer_test(
-        cls,
-        kwargs=kwargs,
-        input_shape=input_shape,
-        input_data=input_data,
-        input_dtype=dtypes.string,
-        expected_output_dtype=expected_output_dtype,
-        validate_training=False,
-        adapt_data=vocab_data)
-    self.assertAllClose(expected_output, output_data)
-
-  def test_scalar_input_int_mode_no_len_limit(self):
-    vocab_data = [
-        "fire earth earth", "earth earth", "wind wind", "and wind and"
-    ]
-    input_data = "earth wind and fire fire and earth michigan"
-    layer = text_vectorization.TextVectorization()
-    layer.adapt(vocab_data)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1])
-    layer.set_vocabulary(["earth", "wind", "and", "fire"])
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1])
-
-  def test_scalar_input_int_mode_trim_to_len_limit(self):
-    vocab_data = [
-        "fire earth earth", "earth earth", "wind wind", "and wind and"
-    ]
-    input_data = "earth wind and fire fire and earth michigan"
-    layer = text_vectorization.TextVectorization(output_sequence_length=3)
-    layer.adapt(vocab_data)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [2, 3, 4])
-    layer.set_vocabulary(["earth", "wind", "and", "fire"])
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [2, 3, 4])
-
-  def test_scalar_input_int_pad_to_len_limit(self):
-    vocab_data = [
-        "fire earth earth", "earth earth", "wind wind", "and wind and"
-    ]
-    input_data = "earth wind and fire fire and earth michigan"
-    layer = text_vectorization.TextVectorization(output_sequence_length=10)
-    layer.adapt(vocab_data)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1, 0, 0])
-    layer.set_vocabulary(["earth", "wind", "and", "fire"])
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [2, 3, 4, 5, 5, 4, 2, 1, 0, 0])
-
-  def test_list_inputs_1d(self):
-    vocab_data = ["two two two", "two three three", "three four four five"]
-    input_data = ["two three", "four five"]
-    layer = text_vectorization.TextVectorization()
-    layer.adapt(vocab_data)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-    layer.set_vocabulary(["two", "three", "four", "five"])
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-
-  def test_tensor_inputs(self):
-    vocab_data = constant_op.constant(
-        ["two two two", "two three three", "three four four five"])
-    input_data = constant_op.constant(["two three", "four five"])
-    layer = text_vectorization.TextVectorization()
-    layer.adapt(vocab_data)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-    layer.set_vocabulary(["two", "three", "four", "five"])
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-
-  def test_list_inputs_2d(self):
-    vocab_data = [
-        ["two two two"], ["two three three"], ["three four four five"]]
-    input_data = [["two three"], ["four five"]]
-    layer = text_vectorization.TextVectorization()
-    layer.adapt(vocab_data)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-    layer.set_vocabulary(["two", "three", "four", "five"])
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-
-  def test_dataset_of_single_strings(self):
-    vocab_data = ["two two two", "two three three", "three four four five"]
-    input_data = ["two three", "four five"]
-    vocab_ds = dataset_ops.Dataset.from_tensor_slices(vocab_data)  # unbatched
-    layer = text_vectorization.TextVectorization()
-    layer.adapt(vocab_ds)
-    out = layer(input_data)
-    if context.executing_eagerly():
-      self.assertAllClose(out.numpy(), [[2, 3], [4, 5]])
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "1d",
-          "data": ["0", "a", "b", "c", "d", "e", "a", "b", "c", "d", "f"],
-          "expected": [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1]
-      },
-      {
-          "testcase_name": "2d",
-          "data": [["0", "a", "b", "c", "d"], ["e", "a", "b", "c", "d"], ["f"]],
-          "expected": [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 0, 0, 0, 0]]
-      },
-      {
-          "testcase_name":
-              "3d",
-          "data": [[["0", "a", "b"], ["c", "d"]], [["e", "a"], ["b", "c", "d"]],
-                   [["f"]]],
-          "expected": [[[1, 2, 3], [4, 5, 0]], [[1, 2, 0], [3, 4, 5]],
-                       [[1, 0, 0], [0, 0, 0]]]
-      },
-  )
-  def test_layer_dimensionality_handling(self, data, expected):
-    vocab = ["a", "b", "c", "d"]
-    vectorization = text_vectorization.TextVectorization(
-        max_tokens=None, standardize=None, split=None, pad_to_max_tokens=False)
-    vectorization.set_vocabulary(vocab)
-    output = vectorization(ragged_factory_ops.constant(data))
-    self.assertAllEqual(expected, output)
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "1d",
-          "data": ["0 a b c d e a b c d f"],
-          "expected": [[1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1]]
-      },
-      {
-          "testcase_name":
-              "3d",
-          "data": [[["0 a b"], ["c d"]], [["e a"], ["b c d"]], [["f"]]],
-          "expected": [[[1, 2, 3], [4, 5, 0]], [[1, 2, 0], [3, 4, 5]],
-                       [[1, 0, 0], [0, 0, 0]]]
-      },
-  )
-  def test_layer_dimensionality_handling_with_split(self, data, expected):
-    vocab = ["a", "b", "c", "d"]
-    vectorization = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        pad_to_max_tokens=False)
-    vectorization.set_vocabulary(vocab)
-    output = vectorization(ragged_factory_ops.constant(data, inner_shape=(1,)))
-    self.assertAllEqual(expected, output)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationPreprocessingTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def _write_to_temp_file(self, file_name, vocab_list):
-    vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
-    with gfile.GFile(vocab_path, "w") as writer:
-      for vocab in vocab_list:
-        writer.write(vocab + "\n")
-      writer.flush()
-      writer.close()
-    return vocab_path
-
-  def test_summary_before_adapt(self):
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION,
-        split=None,
-        ngrams=None,
-        output_mode=text_vectorization.TF_IDF)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    # We are testing that model.summary() can be called without erroring out.
-    # (b/145726907)
-    model.summary()
-
-  def test_normalization(self):
-    input_array = np.array([["Earth", "wInD", "aNd", "firE"],
-                            ["fire|", "an<>d", "{earth}", "michigan@%$"]])
-    expected_output = np.array([[b"earth", b"wind", b"and", b"fire"],
-                                [b"fire", b"and", b"earth", b"michigan"]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION,
-        split=None,
-        ngrams=None,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_normalization_ragged_inputs(self):
-    input_array = ragged_factory_ops.constant([["Earth", "wInD", "aNd", "firE"],
-                                               ["fire|", "an<>d", "{earth}"]])
-    expected_output = [[b"earth", b"wind", b"and", b"fire"],
-                       [b"fire", b"and", b"earth"]]
-
-    input_data = keras.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION,
-        split=None,
-        ngrams=None,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_custom_normalization(self):
-    input_array = np.array([["Earth", "wInD", "aNd", "firE"],
-                            ["fire|", "an<>d", "{earth}", "michigan@%$"]])
-    expected_output = np.array(
-        [[b"earth", b"wind", b"and", b"fire"],
-         [b"fire|", b"an<>d", b"{earth}", b"michigan@%$"]])
-
-    custom_standardization = gen_string_ops.string_lower
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=custom_standardization,
-        split=None,
-        ngrams=None,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_string_splitting(self):
-    input_array = np.array([["earth wind and fire"],
-                            ["\tfire\tand\nearth    michigan  "]])
-    expected_output = [[b"earth", b"wind", b"and", b"fire"],
-                       [b"fire", b"and", b"earth", b"michigan"]]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        ngrams=None,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_custom_string_splitting(self):
-    input_array = np.array([["earth>wind>and fire"],
-                            ["\tfire>and\nearth>michigan"]])
-    expected_output = [[b"earth", b"wind", b"and fire"],
-                       [b"\tfire", b"and\nearth", b"michigan"]]
-
-    custom_split = lambda x: ragged_string_ops.string_split_v2(x, sep=">")
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=custom_split,
-        ngrams=None,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_single_ngram_value_ragged_inputs(self):
-    input_array = ragged_factory_ops.constant([["earth", "wind", "and", "fire"],
-                                               ["fire", "and", "earth"]])
-    # pyformat: disable
-    expected_output = [[b"earth", b"wind", b"and", b"fire",
-                        b"earth wind", b"wind and", b"and fire",
-                        b"earth wind and", b"wind and fire"],
-                       [b"fire", b"and", b"earth",
-                        b"fire and", b"and earth",
-                        b"fire and earth"]]
-    # pyformat: enable
-
-    input_data = keras.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        ngrams=3,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_single_ngram_value(self):
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    # pyformat: disable
-    expected_output = [[b"earth", b"wind", b"and", b"fire",
-                        b"earth wind", b"wind and", b"and fire",
-                        b"earth wind and", b"wind and fire"],
-                       [b"fire", b"and", b"earth", b"michigan",
-                        b"fire and", b"and earth", b"earth michigan",
-                        b"fire and earth", b"and earth michigan"]]
-    # pyformat: enable
-
-    input_data = keras.Input(shape=(4,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        ngrams=3,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_multiple_ngram_values(self):
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    # pyformat: disable
-    expected_output = [[b"earth wind", b"wind and", b"and fire",
-                        b"earth wind and", b"wind and fire"],
-                       [b"fire and", b"and earth", b"earth michigan",
-                        b"fire and earth", b"and earth michigan"]]
-    # pyformat: enable
-
-    input_data = keras.Input(shape=(4,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        ngrams=(2, 3),
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_string_multiple_preprocessing_steps(self):
-    input_array = np.array([["earth wInD and firE"],
-                            ["\tfire\tand\nearth!!    michig@n  "]])
-    expected_output = [[
-        b"earth",
-        b"wind",
-        b"and",
-        b"fire",
-        b"earth wind",
-        b"wind and",
-        b"and fire",
-    ],
-                       [
-                           b"fire",
-                           b"and",
-                           b"earth",
-                           b"michign",
-                           b"fire and",
-                           b"and earth",
-                           b"earth michign",
-                       ]]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=text_vectorization.LOWER_AND_STRIP_PUNCTUATION,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        ngrams=2,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_string_splitting_with_non_1d_array_fails(self):
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=None)
-    with self.assertRaisesRegex(RuntimeError,
-                                ".*tokenize strings, the innermost dime.*"):
-      _ = layer(input_data)
-
-  def test_string_splitting_with_non_1d_raggedarray_fails(self):
-    input_data = keras.Input(shape=(None,), ragged=True, dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        vocabulary=["a"],
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=None)
-    with self.assertRaisesRegex(RuntimeError,
-                                ".*tokenize strings, the innermost dime.*"):
-      _ = layer(input_data)
-
-  def test_standardization_with_invalid_standardize_arg(self):
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(vocabulary=["a"])
-    layer._standardize = "unsupported"
-    with self.assertRaisesRegex(ValueError,
-                                ".*is not a supported standardization.*"):
-      _ = layer(input_data)
-
-  def test_splitting_with_invalid_split_arg(self):
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(vocabulary=["a"])
-    layer._split = "unsupported"
-    with self.assertRaisesRegex(ValueError, ".*is not a supported splitting.*"):
-      _ = layer(input_data)
-
-  def test_vocab_setting_via_init(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT,
-        vocabulary=vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_vocab_setting_via_init_file(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_data)
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT,
-        vocabulary=vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_vocab_setting_via_setter(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_data)
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_vocab_setting_with_oov_via_setter(self):
-    vocab_data = ["", "[UNK]", "earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    vocab_path = self._write_to_temp_file("vocab_file", vocab_data)
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_path)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationDistributionTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_distribution_strategy_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    strategy = one_device_strategy.OneDeviceStrategy("/cpu:0")
-    with strategy.scope():
-      input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-      layer = text_vectorization.TextVectorization(
-          max_tokens=None,
-          standardize=None,
-          split=None,
-          output_mode=text_vectorization.INT)
-      layer.set_vocabulary(vocab_data)
-      int_data = layer(input_data)
-      model = keras.Model(inputs=input_data, outputs=int_data)
-
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationOutputTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_int_output(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_densifies_with_zeros(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # Create an input array that has 5 elements in the first example and 4 in
-    # the second. This should output a 2x5 tensor with a padding value in the
-    # second example.
-    input_array = np.array([["earth wind and also fire"],
-                            ["fire and earth michigan"]])
-    expected_output = [[2, 3, 4, 1, 5], [5, 4, 2, 1, 0]]
-
-    # This test doesn't explicitly set an output shape, so the 2nd dimension
-    # should stay 'None'.
-    expected_output_shape = [None, None]
-
-    # The input shape here is explicitly 1 because we're tokenizing.
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_densifies_with_zeros_and_pads(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # Create an input array that has 5 elements in the first example and 4 in
-    # the second. This should output a 2x6 tensor with a padding value in the
-    # second example, since output_sequence_length is set to 6.
-    input_array = np.array([["earth wind and also fire"],
-                            ["fire and earth michigan"]])
-    expected_output = [[2, 3, 4, 1, 5, 0], [5, 4, 2, 1, 0, 0]]
-
-    output_sequence_length = 6
-    expected_output_shape = [None, output_sequence_length]
-
-    # The input shape here is explicitly 1 because we're tokenizing.
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=text_vectorization.INT,
-        output_sequence_length=output_sequence_length)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_densifies_with_zeros_and_strips(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # Create an input array that has 5 elements in the first example and 4 in
-    # the second. This should output a 2x3 tensor with a padding value in the
-    # second example, since output_sequence_length is set to 3.
-    input_array = np.array([["earth wind and also fire"],
-                            ["fire and earth michigan"]])
-    expected_output = [[2, 3, 4], [5, 4, 2]]
-    output_sequence_length = 3
-    expected_output_shape = [None, output_sequence_length]
-
-    # The input shape here is explicitly 1 because we're tokenizing.
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=text_vectorization.INT,
-        output_sequence_length=output_sequence_length)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_int_output_dynamically_strips_and_pads(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # Create an input array that has 5 elements in the first example and 4 in
-    # the second. This should output a 2x3 tensor with a padding value in the
-    # second example, since output_sequence_length is set to 3.
-    input_array = np.array([["earth wind and also fire"],
-                            ["fire and earth michigan"]])
-    expected_output = [[2, 3, 4], [5, 4, 2]]
-    output_sequence_length = 3
-    expected_output_shape = [None, output_sequence_length]
-
-    # The input shape here is explicitly 1 because we're tokenizing.
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=text_vectorization.INT,
-        output_sequence_length=output_sequence_length)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-    # Create an input array that has 1 element in the first example and 2 in
-    # the second. This should output a 2x3 tensor with a padding value in the
-    # second example, since output_sequence_length is set to 3.
-    input_array_2 = np.array([["wind"], ["fire and"]])
-    expected_output_2 = [[3, 0, 0], [5, 4, 0]]
-    output_dataset = model.predict(input_array_2)
-    self.assertAllEqual(expected_output_2, output_dataset)
-
-  def test_binary_output_hard_maximum(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0, 0],
-                       [1, 1, 0, 1, 0, 0]]
-    # pyformat: enable
-    max_tokens = 6
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=max_tokens,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=True)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_binary_output_soft_maximum(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0],
-                       [1, 1, 0, 1, 0]]
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=False)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bag_output_hard_maximum_set_vocabulary_after_build(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0],
-                       [1, 1, 0, 1, 0]]
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=max_tokens,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=True)
-    int_data = layer(input_data)
-    layer.set_vocabulary(vocab_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bag_output_hard_maximum_adapt_after_build(self):
-    vocab_data = np.array([
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ])
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0],
-                       [1, 1, 0, 1, 0]]
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=max_tokens,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=True)
-    int_data = layer(input_data)
-    layer.adapt(vocab_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bag_output_hard_maximum_set_state_variables_after_build(self):
-    state_variables = {
-        text_vectorization._VOCAB_NAME: ["earth", "wind", "and", "fire"]
-    }
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0],
-                       [1, 1, 0, 1, 0]]
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=max_tokens,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=True)
-    int_data = layer(input_data)
-    layer._set_state_variables(state_variables)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bag_output_hard_maximum_multiple_adapts(self):
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-    adapt_data = ["earth", "earth", "earth", "earth", "wind", "wind", "wind"]
-    first_expected_output = [
-        [1, 1, 1, 0, 0],
-        [1, 1, 0, 0, 0],
-    ]
-    second_adapt_data = [
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ]
-    second_expected_output = [
-        [0, 1, 1, 1, 0],
-        [1, 1, 0, 1, 0],
-    ]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=True)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    # Test the first adapt
-    layer.adapt(adapt_data)
-    first_output = model.predict(input_array)
-    # Test the second adapt
-    layer.adapt(second_adapt_data)
-    second_output = model.predict(input_array)
-    self.assertAllEqual(first_expected_output, first_output)
-    self.assertAllEqual(second_expected_output, second_output)
-
-  def test_bag_output_soft_maximum_set_state_after_build(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 1, 1, 1, 0],
-                       [1, 1, 0, 1, 0]]
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=False)
-    layer.build(input_data.shape)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_bag_output_soft_maximum_set_vocabulary_after_call_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=False)
-    layer.adapt(vocab_data)
-    _ = layer(input_data)
-    with self.assertRaisesRegex(RuntimeError, "vocabulary cannot be changed"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_bag_output_soft_maximum_set_state_variables_after_call_fails(self):
-    state_variables = {
-        text_vectorization._VOCAB_NAME: ["earth", "wind", "and", "fire"]
-    }
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT,
-        pad_to_max_tokens=False)
-    layer.adapt(["earth", "wind"])
-    _ = layer(input_data)
-    with self.assertRaisesRegex(RuntimeError, "vocabulary cannot be changed"):
-      layer._set_state_variables(state_variables)
-
-  def test_count_output_hard_maximum(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 2, 1, 1, 0, 0],
-                       [2, 1, 0, 1, 0, 0]]
-    # pyformat: enable
-    max_tokens = 6
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=6,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.COUNT,
-        pad_to_max_tokens=True)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_count_output_soft_maximum(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[0, 2, 1, 1, 0],
-                       [2, 1, 0, 1, 0]]
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.COUNT,
-        pad_to_max_tokens=False)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-  def test_tfidf_output_hard_maximum(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # OOV idf weight (bucket 0) should 0.5, the average of passed weights.
-    idf_weights = [.4, .25, .75, .6]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "fire", "earth", "michigan"]])
-
-    # pyformat: disable
-    # pylint: disable=bad-whitespace
-    expected_output = [[ 0, .8, .25, .75,  0, 0],
-                       [ 1, .4,   0,   0, .6, 0]]
-    # pylint: enable=bad-whitespace
-    # pyformat: enable
-    max_tokens = 6
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=6,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TF_IDF,
-        pad_to_max_tokens=True)
-    layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_tfidf_output_soft_maximum(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # OOV idf weight (bucket 0) should 0.5, the average of passed weights.
-    idf_weights = [.4, .25, .75, .6]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "fire", "earth", "michigan"]])
-
-    # pyformat: disable
-    # pylint: disable=bad-whitespace
-    expected_output = [[ 0, .8, .25, .75,  0],
-                       [ 1, .4,   0,   0, .6]]
-    # pylint: enable=bad-whitespace
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TF_IDF,
-        pad_to_max_tokens=False)
-    layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_tfidf_output_set_oov_weight(self):
-    vocab_data = ["[UNK]", "earth", "wind", "and", "fire"]
-    idf_weights = [.1, .4, .25, .75, .6]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "fire", "earth", "michigan"]])
-
-    # pyformat: disable
-    # pylint: disable=bad-whitespace
-    expected_output = [[  0, .8, .25, .75,  0],
-                       [ .2, .4,   0,   0, .6]]
-    # pylint: enable=bad-whitespace
-    # pyformat: enable
-    max_tokens = 5
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TF_IDF,
-        pad_to_max_tokens=False)
-    layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-    int_data = layer(input_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(expected_output, output_dataset)
-
-  def test_accept_1D_input(self):
-    input_array = np.array(["earth wind and fire",
-                            "fire and earth michigan"])
-    layer = text_vectorization.TextVectorization(
-        standardize=None, split=None, output_mode="int")
-    layer.adapt(input_array)
-    _ = layer(input_array)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationModelBuildingTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  @parameterized.named_parameters(
-      {
-          "testcase_name": "count_hard_max",
-          "pad_to_max_tokens": True,
-          "output_mode": text_vectorization.COUNT
-      }, {
-          "testcase_name": "count_soft_max",
-          "pad_to_max_tokens": False,
-          "output_mode": text_vectorization.COUNT
-      }, {
-          "testcase_name": "binary_hard_max",
-          "pad_to_max_tokens": True,
-          "output_mode": text_vectorization.MULTI_HOT
-      }, {
-          "testcase_name": "binary_soft_max",
-          "pad_to_max_tokens": False,
-          "output_mode": text_vectorization.MULTI_HOT
-      }, {
-          "testcase_name": "tfidf_hard_max",
-          "pad_to_max_tokens": True,
-          "output_mode": text_vectorization.TF_IDF
-      }, {
-          "testcase_name": "tfidf_soft_max",
-          "pad_to_max_tokens": False,
-          "output_mode": text_vectorization.TF_IDF
-      })
-  def test_end_to_end_bagged_modeling(self, output_mode, pad_to_max_tokens):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    idf_weights = [.5, .25, .2, .125]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=10,
-        standardize=None,
-        split=None,
-        output_mode=output_mode,
-        pad_to_max_tokens=pad_to_max_tokens)
-    if output_mode == text_vectorization.TF_IDF:
-      layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-    else:
-      layer.set_vocabulary(vocab_data)
-
-    int_data = layer(input_data)
-    float_data = backend.cast(int_data, dtype="float32")
-    output_data = core.Dense(64)(float_data)
-    model = keras.Model(inputs=input_data, outputs=output_data)
-    _ = model.predict(input_array)
-
-  def test_end_to_end_vocab_modeling(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth wind and also fire"],
-                            ["fire and earth michigan"]])
-    output_sequence_length = 6
-    max_tokens = 5
-
-    # The input shape here is explicitly 1 because we're tokenizing.
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=text_vectorization.SPLIT_ON_WHITESPACE,
-        output_mode=text_vectorization.INT,
-        output_sequence_length=output_sequence_length)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    embedded_data = embeddings.Embedding(
-        input_dim=max_tokens + 1, output_dim=32)(
-            int_data)
-    output_data = convolutional.Conv1D(
-        250, 3, padding="valid", activation="relu", strides=1)(
-            embedded_data)
-
-    model = keras.Model(inputs=input_data, outputs=output_data)
-    _ = model.predict(input_array)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationVocbularyTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest,
-):
-
-  def test_get_vocabulary(self):
-    vocab = ["earth", "wind", "and", "fire"]
-
-    layer = text_vectorization.TextVectorization(vocabulary=vocab)
-    self.assertAllEqual(layer.get_vocabulary(),
-                        ["", "[UNK]", "earth", "wind", "and", "fire"])
-
-  def test_get_vocabulary_adapt(self):
-    vocab = np.array([["earth earth earth earth wind wind wind and and fire"]])
-
-    layer = text_vectorization.TextVectorization()
-    layer.adapt(vocab)
-    self.assertAllEqual(layer.get_vocabulary(),
-                        ["", "[UNK]", "earth", "wind", "and", "fire"])
-
-  def test_get_vocabulary_no_special_tokens(self):
-    vocab = ["earth", "wind", "and", "fire"]
-
-    layer = text_vectorization.TextVectorization(vocabulary=vocab)
-    self.assertAllEqual(
-        layer.get_vocabulary(include_special_tokens=False),
-        ["earth", "wind", "and", "fire"])
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationErrorTest(keras_parameterized.TestCase,
-                                 preprocessing_test_utils.PreprocessingLayerTest
-                                ):
-
-  def test_too_long_vocab_fails_in_single_setting(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    layer = text_vectorization.TextVectorization(
-        max_tokens=4,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    with self.assertRaisesRegex(ValueError,
-                                "vocabulary larger than the maximum vocab.*"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_setting_vocab_without_idf_weights_fails_in_tfidf_mode(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-
-    layer = text_vectorization.TextVectorization(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TF_IDF)
-    with self.assertRaisesRegex(
-        ValueError, "`idf_weights` must be set if output_mode is TF_IDF"):
-      layer.set_vocabulary(vocab_data)
-
-  def test_idf_weights_length_mismatch_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    idf_weights = [1, 2, 3]
-    layer = text_vectorization.TextVectorization(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TF_IDF)
-    with self.assertRaisesRegex(
-        ValueError, "`idf_weights` must be the same length as vocab"):
-      layer.set_vocabulary(vocab_data, idf_weights)
-
-  def test_set_tfidf_in_non_tfidf_fails(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    idf_weights = [1, 2, 3, 4]
-    layer = text_vectorization.TextVectorization(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.MULTI_HOT)
-    with self.assertRaisesRegex(ValueError,
-                                "`idf_weights` should only be set if"):
-      layer.set_vocabulary(vocab_data, idf_weights)
-
-  def test_zero_max_tokens_fails(self):
-    with self.assertRaisesRegex(ValueError, "max_tokens.*"):
-      _ = text_vectorization.TextVectorization(max_tokens=0)
-
-  def test_non_string_dtype_fails(self):
-    with self.assertRaisesRegex(ValueError, "dtype of string.*"):
-      _ = text_vectorization.TextVectorization(dtype=dtypes.int64)
-
-  def test_unknown_standardize_arg_fails(self):
-    with self.assertRaisesRegex(ValueError,
-                                "standardize arg.*unsupported_value"):
-      _ = text_vectorization.TextVectorization(standardize="unsupported_value")
-
-  def test_unknown_split_arg_fails(self):
-    with self.assertRaisesRegex(ValueError, "split arg.*unsupported_value"):
-      _ = text_vectorization.TextVectorization(split="unsupported_value")
-
-  def test_unknown_output_mode_arg_fails(self):
-    with self.assertRaisesRegex(ValueError,
-                                "output_mode arg.*unsupported_value"):
-      _ = text_vectorization.TextVectorization(output_mode="unsupported_value")
-
-  def test_unknown_ngrams_arg_fails(self):
-    with self.assertRaisesRegex(ValueError, "ngrams.*unsupported_value"):
-      _ = text_vectorization.TextVectorization(ngrams="unsupported_value")
-
-  def test_float_ngrams_arg_fails(self):
-    with self.assertRaisesRegex(ValueError, "ngrams.*2.9"):
-      _ = text_vectorization.TextVectorization(ngrams=2.9)
-
-  def test_float_tuple_ngrams_arg_fails(self):
-    with self.assertRaisesRegex(ValueError, "ngrams.*(1.3, 2.9)"):
-      _ = text_vectorization.TextVectorization(ngrams=(1.3, 2.9))
-
-  def test_non_int_output_sequence_length_dtype_fails(self):
-    with self.assertRaisesRegex(ValueError, "output_sequence_length.*2.0"):
-      _ = text_vectorization.TextVectorization(
-          output_mode="int", output_sequence_length=2.0)
-
-  def test_non_none_output_sequence_length_fails_if_output_type_not_int(self):
-    with self.assertRaisesRegex(ValueError,
-                                "`output_sequence_length` must not be set"):
-      _ = text_vectorization.TextVectorization(
-          output_mode="count", output_sequence_length=2)
-
-
-# Custom functions for the custom callable serialization test. Declared here
-# to avoid multiple registrations from run_all_keras_modes().
-@generic_utils.register_keras_serializable(package="Test")
-def custom_standardize_fn(x):
-  return gen_string_ops.string_lower(x)
-
-
-@generic_utils.register_keras_serializable(package="Test")
-def custom_split_fn(x):
-  return ragged_string_ops.string_split_v2(x, sep=">")
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationSavingTest(
-    keras_parameterized.TestCase,
-    preprocessing_test_utils.PreprocessingLayerTest):
-
-  def tearDown(self):
-    keras.backend.clear_session()
-    gc.collect()
-    super(TextVectorizationSavingTest, self).tearDown()
-
-  def test_saving(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-
-    model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = keras.models.load_model(output_path)
-    self.assertAllEqual(loaded_model.predict(input_array), expected_output)
-
-  def test_saving_when_nested(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    input_array = np.array([["earth", "wind", "and", "fire"],
-                            ["fire", "and", "earth", "michigan"]])
-    expected_output = [[2, 3, 4, 5], [5, 4, 2, 1]]
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.INT)
-    layer.set_vocabulary(vocab_data)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-
-    outer_input = keras.Input(shape=(None,), dtype=dtypes.string)
-    outer_output = model(outer_input)
-    outer_model = keras.Model(inputs=outer_input, outputs=outer_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    outer_model.save(output_path, save_format="tf")
-
-    # Delete the session and graph to ensure that the loaded model is generated
-    # from scratch.
-    # TODO(b/149526183): Can't clear session when TF2 is disabled.
-    if tf2.enabled():
-      keras.backend.clear_session()
-
-    loaded_model = keras.models.load_model(output_path)
-    self.assertAllEqual(loaded_model.predict(input_array), expected_output)
-
-  def test_saving_with_tfidf(self):
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # OOV idf weight (bucket 0) should 0.5, the average of passed weights.
-    idf_weights = [.4, .25, .75, .6]
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "fire", "earth", "michigan"]])
-
-    # pyformat: disable
-    # pylint: disable=bad-whitespace
-    expected_output = [[ 0, .8, .25, .75,  0],
-                       [ 1, .4,   0,   0, .6]]
-    vocab_data = ["earth", "wind", "and", "fire"]
-    # pylint: enable=bad-whitespace
-    # pyformat: enable
-
-    # Build and validate a golden model.
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=5,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.TF_IDF)
-    layer.set_vocabulary(vocab_data, idf_weights=idf_weights)
-
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllClose(output_dataset, expected_output)
-
-    # Save the model to disk.
-    output_path = os.path.join(self.get_temp_dir(), "tf_keras_saved_model")
-    model.save(output_path, save_format="tf")
-    loaded_model = keras.models.load_model(output_path)
-
-    # Ensure that the loaded model is unique (so that the save/load is real)
-    self.assertIsNot(model, loaded_model)
-
-    # Validate correctness of the new model.
-    new_output_dataset = loaded_model.predict(input_array)
-    self.assertAllClose(new_output_dataset, expected_output)
-
-  def test_serialization_with_custom_callables(self):
-    input_array = np.array([["earth>wind>and Fire"],
-                            ["\tfire>And\nearth>michigan"]])
-    expected_output = [[b"earth", b"wind", b"and fire"],
-                       [b"\tfire", b"and\nearth", b"michigan"]]
-
-    input_data = keras.Input(shape=(1,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=None,
-        standardize=custom_standardize_fn,
-        split=custom_split_fn,
-        ngrams=None,
-        output_mode=None)
-    int_data = layer(input_data)
-    model = keras.Model(inputs=input_data, outputs=int_data)
-    output_dataset = model.predict(input_array)
-    self.assertAllEqual(expected_output, output_dataset)
-
-    serialized_model_data = model.get_config()
-    new_model = keras.Model.from_config(serialized_model_data)
-    new_output_dataset = new_model.predict(input_array)
-    self.assertAllEqual(expected_output, new_output_dataset)
-
-
-@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
-class TextVectorizationE2ETest(keras_parameterized.TestCase,
-                               preprocessing_test_utils.PreprocessingLayerTest):
-
-  def test_keras_vocab_trimming_example(self):
-    vocab_data = np.array([
-        "earth", "earth", "earth", "earth", "wind", "wind", "wind", "and",
-        "and", "fire"
-    ])
-    input_array = np.array([["earth", "wind", "and", "earth"],
-                            ["ohio", "and", "earth", "michigan"]])
-
-    # pyformat: disable
-    expected_output = [[1, 2, 1],
-                       [3, 1, 0]]
-    # pyformat: enable
-    max_tokens = 3
-    expected_output_shape = [None, max_tokens]
-
-    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
-    layer = text_vectorization.TextVectorization(
-        max_tokens=max_tokens,
-        standardize=None,
-        split=None,
-        output_mode=text_vectorization.COUNT,
-        pad_to_max_tokens=True)
-    int_data = layer(input_data)
-    layer.adapt(vocab_data)
-    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
-    model = keras.Model(input_data, int_data)
-    output = model.predict(input_array)
-    self.assertAllEqual(expected_output, output)
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/python/keras/layers/serialization.py b/tensorflow/python/keras/layers/serialization.py
index f623084..fe7f46a 100644
--- a/tensorflow/python/keras/layers/serialization.py
+++ b/tensorflow/python/keras/layers/serialization.py
@@ -43,15 +43,6 @@
 from tensorflow.python.keras.layers.normalization import batch_normalization
 from tensorflow.python.keras.layers.normalization import batch_normalization_v1
 from tensorflow.python.keras.layers.normalization import layer_normalization
-from tensorflow.python.keras.layers.preprocessing import category_crossing
-from tensorflow.python.keras.layers.preprocessing import category_encoding
-from tensorflow.python.keras.layers.preprocessing import discretization
-from tensorflow.python.keras.layers.preprocessing import hashing
-from tensorflow.python.keras.layers.preprocessing import image_preprocessing
-from tensorflow.python.keras.layers.preprocessing import integer_lookup
-from tensorflow.python.keras.layers.preprocessing import normalization as preprocessing_normalization
-from tensorflow.python.keras.layers.preprocessing import string_lookup
-from tensorflow.python.keras.layers.preprocessing import text_vectorization
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import tf_inspect as inspect
 from tensorflow.python.util.tf_export import keras_export
@@ -60,10 +51,7 @@
                convolutional_recurrent, core, cudnn_recurrent, dense_attention,
                embeddings, einsum_dense, local, merge, noise,
                batch_normalization_v1, layer_normalization,
-               pooling, image_preprocessing, recurrent, wrappers, hashing,
-               category_crossing, category_encoding, discretization,
-               multi_head_attention, integer_lookup,
-               preprocessing_normalization, string_lookup, text_vectorization)
+               pooling, recurrent, wrappers, multi_head_attention)
 ALL_V2_MODULES = (rnn_cell_wrapper_v2, batch_normalization, layer_normalization,
                   recurrent_v2)
 # ALL_OBJECTS is meant to be a global mutable. Hence we need to make it
diff --git a/tensorflow/python/keras/mixed_precision/layer_correctness_test.py b/tensorflow/python/keras/mixed_precision/layer_correctness_test.py
index 82349b5..e6a9e3f 100644
--- a/tensorflow/python/keras/mixed_precision/layer_correctness_test.py
+++ b/tensorflow/python/keras/mixed_precision/layer_correctness_test.py
@@ -39,8 +39,6 @@
 from tensorflow.python.keras.layers import wrappers
 from tensorflow.python.keras.layers.normalization import batch_normalization
 from tensorflow.python.keras.layers.normalization import layer_normalization
-from tensorflow.python.keras.layers.preprocessing import image_preprocessing
-from tensorflow.python.keras.layers.preprocessing import normalization
 from tensorflow.python.keras.mixed_precision import policy
 from tensorflow.python.platform import test
 
@@ -51,19 +49,6 @@
   return mirrored_strategy.MirroredStrategy(['cpu:0', 'cpu:1'])
 
 
-def _create_normalization_layer_with_adapt():
-  layer = normalization.Normalization()
-  layer.adapt(np.random.normal(size=(10, 4)))
-  return layer
-
-
-def _create_normalization_layer_without_adapt():
-  return normalization.Normalization(
-      mean=np.random.normal(size=(4,)),
-      variance=np.random.uniform(0.5, 2., size=(4,))
-  )
-
-
 class LayerCorrectnessTest(keras_parameterized.TestCase):
 
   def setUp(self):
@@ -159,13 +144,6 @@
        lambda: dense_attention.AdditiveAttention(causal=True), [(2, 3, 4),
                                                                 (2, 3, 4),
                                                                 (2, 3, 4)]),
-      ('NormalizationAdapt', _create_normalization_layer_with_adapt, (4, 4)),
-      ('NormalizationNoAdapt', _create_normalization_layer_without_adapt,
-       (4, 4)),
-      ('Resizing', lambda: image_preprocessing.Resizing(3, 3), (2, 5, 5, 1)),
-      ('Rescaling', lambda: image_preprocessing.Rescaling(2., 1.), (6, 6)),
-      ('CenterCrop', lambda: image_preprocessing.CenterCrop(3, 3),
-       (2, 5, 5, 1))
   )
   def test_layer(self, f32_layer_fn, input_shape, rtol=2e-3, atol=2e-3,
                  input_data=None):
diff --git a/tensorflow/python/keras/preprocessing/BUILD b/tensorflow/python/keras/preprocessing/BUILD
deleted file mode 100644
index 876a2c3..0000000
--- a/tensorflow/python/keras/preprocessing/BUILD
+++ /dev/null
@@ -1,162 +0,0 @@
-# Description:
-#   Contains the Keras preprocessing layers (internal TensorFlow version).
-
-load("//tensorflow:tensorflow.bzl", "tf_py_test")
-
-package(
-    default_visibility = [
-        "//tensorflow/python/keras:__subpackages__",
-    ],
-    licenses = ["notice"],
-)
-
-filegroup(
-    name = "all_py_srcs",
-    srcs = glob(["*.py"]),
-    visibility = ["//tensorflow/python/keras/google/private_tf_api_test:__pkg__"],
-)
-
-py_library(
-    name = "preprocessing",
-    srcs = [
-        "__init__.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        ":image",
-        ":sequence",
-        ":text",
-        ":timeseries",
-        "//tensorflow/python/keras/utils:all_utils",
-    ],
-)
-
-py_library(
-    name = "image",
-    srcs = [
-        "dataset_utils.py",
-        "image.py",
-        "image_dataset.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:util",
-        "//tensorflow/python/keras:backend",
-        "//tensorflow/python/keras/utils:data_utils",
-    ],
-)
-
-py_library(
-    name = "sequence",
-    srcs = [
-        "sequence.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:util",
-        "//tensorflow/python/keras/utils:data_utils",
-    ],
-)
-
-py_library(
-    name = "timeseries",
-    srcs = [
-        "timeseries.py",
-    ],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python/data/ops:dataset_ops",
-        "//third_party/py/numpy",
-    ],
-)
-
-py_library(
-    name = "text",
-    srcs = [
-        "dataset_utils.py",
-        "text.py",
-        "text_dataset.py",
-    ],
-    srcs_version = "PY3",
-    deps = ["//tensorflow/python:util"],
-)
-
-tf_py_test(
-    name = "image_test",
-    size = "medium",
-    srcs = ["image_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":image",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/compat:v2_compat",
-        "//tensorflow/python/keras",
-        "//third_party/py/numpy",
-    ],
-)
-
-tf_py_test(
-    name = "image_dataset_test",
-    size = "small",
-    srcs = ["image_dataset_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":image",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/compat:v2_compat",
-        "//tensorflow/python/keras",
-        "//third_party/py/numpy",
-    ],
-)
-
-tf_py_test(
-    name = "sequence_test",
-    size = "small",
-    srcs = ["sequence_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":sequence",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-tf_py_test(
-    name = "text_test",
-    size = "small",
-    srcs = ["text_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":text",
-        "//tensorflow/python:client_testlib",
-        "//third_party/py/numpy",
-    ],
-)
-
-tf_py_test(
-    name = "text_dataset_test",
-    size = "small",
-    srcs = ["text_dataset_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":text",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/compat:v2_compat",
-        "//tensorflow/python/keras",
-    ],
-)
-
-tf_py_test(
-    name = "timeseries_test",
-    size = "small",
-    srcs = ["timeseries_test.py"],
-    python_version = "PY3",
-    deps = [
-        ":timeseries",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python/compat:v2_compat",
-        "//third_party/py/numpy",
-    ],
-)
diff --git a/tensorflow/python/keras/preprocessing/__init__.py b/tensorflow/python/keras/preprocessing/__init__.py
deleted file mode 100644
index 42151dc..0000000
--- a/tensorflow/python/keras/preprocessing/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Provides keras data preprocessing utils to pre-process tf.data.Datasets before they are fed to the model."""
-# pylint: disable=g-import-not-at-top
-# TODO(mihaimaruseac): remove the import of keras_preprocessing and injecting
-# once we update to latest version of keras_preprocessing
-import keras_preprocessing
-
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.preprocessing import image
-from tensorflow.python.keras.preprocessing import sequence
-from tensorflow.python.keras.preprocessing import text
-from tensorflow.python.keras.preprocessing import timeseries
-from tensorflow.python.keras.utils import all_utils as utils
-
-# This exists for compatibility with prior version of keras_preprocessing.
-keras_preprocessing.set_keras_submodules(backend=backend, utils=utils)
diff --git a/tensorflow/python/keras/preprocessing/dataset_utils.py b/tensorflow/python/keras/preprocessing/dataset_utils.py
deleted file mode 100644
index a6fbaee..0000000
--- a/tensorflow/python/keras/preprocessing/dataset_utils.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras image dataset loading utilities."""
-# pylint: disable=g-classes-have-attributes
-
-import multiprocessing
-import os
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-
-
-def index_directory(directory,
-                    labels,
-                    formats,
-                    class_names=None,
-                    shuffle=True,
-                    seed=None,
-                    follow_links=False):
-  """Make list of all files in the subdirs of `directory`, with their labels.
-
-  Args:
-    directory: The target directory (string).
-    labels: Either "inferred"
-        (labels are generated from the directory structure),
-        None (no labels),
-        or a list/tuple of integer labels of the same size as the number of
-        valid files found in the directory. Labels should be sorted according
-        to the alphanumeric order of the image file paths
-        (obtained via `os.walk(directory)` in Python).
-    formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt").
-    class_names: Only valid if "labels" is "inferred". This is the explict
-        list of class names (must match names of subdirectories). Used
-        to control the order of the classes
-        (otherwise alphanumerical order is used).
-    shuffle: Whether to shuffle the data. Default: True.
-        If set to False, sorts the data in alphanumeric order.
-    seed: Optional random seed for shuffling.
-    follow_links: Whether to visits subdirectories pointed to by symlinks.
-
-  Returns:
-    tuple (file_paths, labels, class_names).
-      file_paths: list of file paths (strings).
-      labels: list of matching integer labels (same length as file_paths)
-      class_names: names of the classes corresponding to these labels, in order.
-  """
-  if labels is None:
-    # in the no-label case, index from the parent directory down.
-    subdirs = ['']
-    class_names = subdirs
-  else:
-    subdirs = []
-    for subdir in sorted(os.listdir(directory)):
-      if os.path.isdir(os.path.join(directory, subdir)):
-        subdirs.append(subdir)
-    if not class_names:
-      class_names = subdirs
-    else:
-      if set(class_names) != set(subdirs):
-        raise ValueError(
-            'The `class_names` passed did not match the '
-            'names of the subdirectories of the target directory. '
-            'Expected: %s, but received: %s' %
-            (subdirs, class_names))
-  class_indices = dict(zip(class_names, range(len(class_names))))
-
-  # Build an index of the files
-  # in the different class subfolders.
-  pool = multiprocessing.pool.ThreadPool()
-  results = []
-  filenames = []
-
-  for dirpath in (os.path.join(directory, subdir) for subdir in subdirs):
-    results.append(
-        pool.apply_async(index_subdirectory,
-                         (dirpath, class_indices, follow_links, formats)))
-  labels_list = []
-  for res in results:
-    partial_filenames, partial_labels = res.get()
-    labels_list.append(partial_labels)
-    filenames += partial_filenames
-  if labels not in ('inferred', None):
-    if len(labels) != len(filenames):
-      raise ValueError('Expected the lengths of `labels` to match the number '
-                       'of files in the target directory. len(labels) is %s '
-                       'while we found %s files in %s.' % (
-                           len(labels), len(filenames), directory))
-  else:
-    i = 0
-    labels = np.zeros((len(filenames),), dtype='int32')
-    for partial_labels in labels_list:
-      labels[i:i + len(partial_labels)] = partial_labels
-      i += len(partial_labels)
-
-  if labels is None:
-    print('Found %d files.' % (len(filenames),))
-  else:
-    print('Found %d files belonging to %d classes.' %
-          (len(filenames), len(class_names)))
-  pool.close()
-  pool.join()
-  file_paths = [os.path.join(directory, fname) for fname in filenames]
-
-  if shuffle:
-    # Shuffle globally to erase macro-structure
-    if seed is None:
-      seed = np.random.randint(1e6)
-    rng = np.random.RandomState(seed)
-    rng.shuffle(file_paths)
-    rng = np.random.RandomState(seed)
-    rng.shuffle(labels)
-  return file_paths, labels, class_names
-
-
-def iter_valid_files(directory, follow_links, formats):
-  walk = os.walk(directory, followlinks=follow_links)
-  for root, _, files in sorted(walk, key=lambda x: x[0]):
-    for fname in sorted(files):
-      if fname.lower().endswith(formats):
-        yield root, fname
-
-
-def index_subdirectory(directory, class_indices, follow_links, formats):
-  """Recursively walks directory and list image paths and their class index.
-
-  Args:
-    directory: string, target directory.
-    class_indices: dict mapping class names to their index.
-    follow_links: boolean, whether to recursively follow subdirectories
-      (if False, we only list top-level images in `directory`).
-    formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt").
-
-  Returns:
-    tuple `(filenames, labels)`. `filenames` is a list of relative file
-      paths, and `labels` is a list of integer labels corresponding to these
-      files.
-  """
-  dirname = os.path.basename(directory)
-  valid_files = iter_valid_files(directory, follow_links, formats)
-  labels = []
-  filenames = []
-  for root, fname in valid_files:
-    labels.append(class_indices[dirname])
-    absolute_path = os.path.join(root, fname)
-    relative_path = os.path.join(
-        dirname, os.path.relpath(absolute_path, directory))
-    filenames.append(relative_path)
-  return filenames, labels
-
-
-def get_training_or_validation_split(samples, labels, validation_split, subset):
-  """Potentially restict samples & labels to a training or validation split.
-
-  Args:
-    samples: List of elements.
-    labels: List of corresponding labels.
-    validation_split: Float, fraction of data to reserve for validation.
-    subset: Subset of the data to return.
-      Either "training", "validation", or None. If None, we return all of the
-      data.
-
-  Returns:
-    tuple (samples, labels), potentially restricted to the specified subset.
-  """
-  if not validation_split:
-    return samples, labels
-
-  num_val_samples = int(validation_split * len(samples))
-  if subset == 'training':
-    print('Using %d files for training.' % (len(samples) - num_val_samples,))
-    samples = samples[:-num_val_samples]
-    labels = labels[:-num_val_samples]
-  elif subset == 'validation':
-    print('Using %d files for validation.' % (num_val_samples,))
-    samples = samples[-num_val_samples:]
-    labels = labels[-num_val_samples:]
-  else:
-    raise ValueError('`subset` must be either "training" '
-                     'or "validation", received: %s' % (subset,))
-  return samples, labels
-
-
-def labels_to_dataset(labels, label_mode, num_classes):
-  """Create a tf.data.Dataset from the list/tuple of labels.
-
-  Args:
-    labels: list/tuple of labels to be converted into a tf.data.Dataset.
-    label_mode:
-    - 'binary' indicates that the labels (there can be only 2) are encoded as
-      `float32` scalars with values 0 or 1 (e.g. for `binary_crossentropy`).
-    - 'categorical' means that the labels are mapped into a categorical vector.
-      (e.g. for `categorical_crossentropy` loss).
-    num_classes: number of classes of labels.
-  """
-  label_ds = dataset_ops.Dataset.from_tensor_slices(labels)
-  if label_mode == 'binary':
-    label_ds = label_ds.map(
-        lambda x: array_ops.expand_dims(math_ops.cast(x, 'float32'), axis=-1))
-  elif label_mode == 'categorical':
-    label_ds = label_ds.map(lambda x: array_ops.one_hot(x, num_classes))
-  return label_ds
-
-
-def check_validation_split_arg(validation_split, subset, shuffle, seed):
-  """Raise errors in case of invalid argument values.
-
-  Args:
-    shuffle: Whether to shuffle the data. Either True or False.
-    seed: random seed for shuffling and transformations.
-    validation_split: float between 0 and 1, fraction of data to reserve for
-      validation.
-    subset: One of "training" or "validation". Only used if `validation_split`
-      is set.
-  """
-  if validation_split and not 0 < validation_split < 1:
-    raise ValueError(
-        '`validation_split` must be between 0 and 1, received: %s' %
-        (validation_split,))
-  if (validation_split or subset) and not (validation_split and subset):
-    raise ValueError(
-        'If `subset` is set, `validation_split` must be set, and inversely.')
-  if subset not in ('training', 'validation', None):
-    raise ValueError('`subset` must be either "training" '
-                     'or "validation", received: %s' % (subset,))
-  if validation_split and shuffle and seed is None:
-    raise ValueError(
-        'If using `validation_split` and shuffling the data, you must provide '
-        'a `seed` argument, to make sure that there is no overlap between the '
-        'training and validation subset.')
diff --git a/tensorflow/python/keras/preprocessing/image.py b/tensorflow/python/keras/preprocessing/image.py
deleted file mode 100644
index 6c875e1..0000000
--- a/tensorflow/python/keras/preprocessing/image.py
+++ /dev/null
@@ -1,1152 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-# pylint: disable=invalid-name
-# pylint: disable=g-import-not-at-top
-# pylint: disable=g-classes-have-attributes
-"""Set of tools for real-time data augmentation on image data."""
-
-from keras_preprocessing import image
-import numpy as np
-try:
-  from scipy import linalg  # pylint: disable=unused-import
-  from scipy import ndimage  # pylint: disable=unused-import
-except ImportError:
-  pass
-
-from tensorflow.python.framework import ops
-from tensorflow.python.keras import backend
-from tensorflow.python.keras.preprocessing.image_dataset import image_dataset_from_directory  # pylint: disable=unused-import
-from tensorflow.python.keras.utils import data_utils
-from tensorflow.python.keras.utils import tf_inspect
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import image_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import tf_logging
-from tensorflow.python.util.tf_export import keras_export
-
-random_rotation = image.random_rotation
-random_shift = image.random_shift
-random_shear = image.random_shear
-random_zoom = image.random_zoom
-apply_channel_shift = image.apply_channel_shift
-random_channel_shift = image.random_channel_shift
-apply_brightness_shift = image.apply_brightness_shift
-random_brightness = image.random_brightness
-apply_affine_transform = image.apply_affine_transform
-
-
-@keras_export('keras.preprocessing.image.smart_resize', v1=[])
-def smart_resize(x, size, interpolation='bilinear'):
-  """Resize images to a target size without aspect ratio distortion.
-
-  TensorFlow image datasets typically yield images that have each a different
-  size. However, these images need to be batched before they can be
-  processed by Keras layers. To be batched, images need to share the same height
-  and width.
-
-  You could simply do:
-
-  ```python
-  size = (200, 200)
-  ds = ds.map(lambda img: tf.image.resize(img, size))
-  ```
-
-  However, if you do this, you distort the aspect ratio of your images, since
-  in general they do not all have the same aspect ratio as `size`. This is
-  fine in many cases, but not always (e.g. for GANs this can be a problem).
-
-  Note that passing the argument `preserve_aspect_ratio=True` to `resize`
-  will preserve the aspect ratio, but at the cost of no longer respecting the
-  provided target size. Because `tf.image.resize` doesn't crop images,
-  your output images will still have different sizes.
-
-  This calls for:
-
-  ```python
-  size = (200, 200)
-  ds = ds.map(lambda img: smart_resize(img, size))
-  ```
-
-  Your output images will actually be `(200, 200)`, and will not be distorted.
-  Instead, the parts of the image that do not fit within the target size
-  get cropped out.
-
-  The resizing process is:
-
-  1. Take the largest centered crop of the image that has the same aspect ratio
-  as the target size. For instance, if `size=(200, 200)` and the input image has
-  size `(340, 500)`, we take a crop of `(340, 340)` centered along the width.
-  2. Resize the cropped image to the target size. In the example above,
-  we resize the `(340, 340)` crop to `(200, 200)`.
-
-  Args:
-    x: Input image or batch of images (as a tensor or NumPy array).
-      Must be in format `(height, width, channels)` or
-      `(batch_size, height, width, channels)`.
-    size: Tuple of `(height, width)` integer. Target size.
-    interpolation: String, interpolation to use for resizing.
-      Defaults to `'bilinear'`. Supports `bilinear`, `nearest`, `bicubic`,
-      `area`, `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic`.
-
-  Returns:
-    Array with shape `(size[0], size[1], channels)`. If the input image was a
-    NumPy array, the output is a NumPy array, and if it was a TF tensor,
-    the output is a TF tensor.
-  """
-  if len(size) != 2:
-    raise ValueError('Expected `size` to be a tuple of 2 integers, '
-                     'but got: %s' % (size,))
-  img = ops.convert_to_tensor_v2_with_dispatch(x)
-  if img.shape.rank is not None:
-    if img.shape.rank < 3 or img.shape.rank > 4:
-      raise ValueError(
-          'Expected an image array with shape `(height, width, channels)`, '
-          'or `(batch_size, height, width, channels)` but '
-          'got input with incorrect rank, of shape %s' % (img.shape,))
-  shape = array_ops.shape(img)
-  if img.shape.rank == 4:
-    height, width = shape[1], shape[2]
-    static_num_channels = img.shape[-1]
-  else:
-    height, width = shape[0], shape[1]
-  target_height, target_width = size
-
-  crop_height = math_ops.cast(
-      math_ops.cast(width * target_height, 'float32') / target_width, 'int32')
-  crop_width = math_ops.cast(
-      math_ops.cast(height * target_width, 'float32') / target_height, 'int32')
-
-  # Set back to input height / width if crop_height / crop_width is not smaller.
-  crop_height = math_ops.minimum(height, crop_height)
-  crop_width = math_ops.minimum(width, crop_width)
-
-  crop_box_hstart = math_ops.cast(
-      math_ops.cast(height - crop_height, 'float32') / 2, 'int32')
-  crop_box_wstart = math_ops.cast(
-      math_ops.cast(width - crop_width, 'float32') / 2, 'int32')
-
-  if img.shape.rank == 4:
-    crop_box_start = array_ops.stack([0, crop_box_hstart, crop_box_wstart, 0])
-    crop_box_size = array_ops.stack([-1, crop_height, crop_width, -1])
-  else:
-    crop_box_start = array_ops.stack([crop_box_hstart, crop_box_wstart, 0])
-    crop_box_size = array_ops.stack([crop_height, crop_width, -1])
-
-  img = array_ops.slice(img, crop_box_start, crop_box_size)
-  img = image_ops.resize_images_v2(
-      images=img,
-      size=size,
-      method=interpolation)
-  if img.shape.rank == 4:
-    # Apparent bug in resize_images_v2 may cause shape to be lost
-    img.set_shape((None, None, None, static_num_channels))
-  if isinstance(x, np.ndarray):
-    return img.numpy()
-  return img
-
-
-@keras_export('keras.utils.array_to_img',
-              'keras.preprocessing.image.array_to_img')
-def array_to_img(x, data_format=None, scale=True, dtype=None):
-  """Converts a 3D Numpy array to a PIL Image instance.
-
-  Usage:
-
-  ```python
-  from PIL import Image
-  img = np.random.random(size=(100, 100, 3))
-  pil_img = tf.keras.preprocessing.image.array_to_img(img)
-  ```
-
-
-  Args:
-      x: Input data, in any form that can be converted to a Numpy array.
-      data_format: Image data format, can be either "channels_first" or
-        "channels_last". Defaults to `None`, in which case the global setting
-        `tf.keras.backend.image_data_format()` is used (unless you changed it,
-        it defaults to "channels_last").
-      scale: Whether to rescale the image such that minimum and maximum values
-        are 0 and 255 respectively. Defaults to `True`.
-      dtype: Dtype to use. Default to `None`, in which case the global setting
-      `tf.keras.backend.floatx()` is used (unless you changed it, it defaults
-      to "float32")
-
-  Returns:
-      A PIL Image instance.
-
-  Raises:
-      ImportError: if PIL is not available.
-      ValueError: if invalid `x` or `data_format` is passed.
-  """
-
-  if data_format is None:
-    data_format = backend.image_data_format()
-  kwargs = {}
-  if 'dtype' in tf_inspect.getfullargspec(image.array_to_img)[0]:
-    if dtype is None:
-      dtype = backend.floatx()
-    kwargs['dtype'] = dtype
-  return image.array_to_img(x, data_format=data_format, scale=scale, **kwargs)
-
-
-@keras_export('keras.utils.img_to_array',
-              'keras.preprocessing.image.img_to_array')
-def img_to_array(img, data_format=None, dtype=None):
-  """Converts a PIL Image instance to a Numpy array.
-
-  Usage:
-
-  ```python
-  from PIL import Image
-  img_data = np.random.random(size=(100, 100, 3))
-  img = tf.keras.preprocessing.image.array_to_img(img_data)
-  array = tf.keras.preprocessing.image.img_to_array(img)
-  ```
-
-
-  Args:
-      img: Input PIL Image instance.
-      data_format: Image data format, can be either "channels_first" or
-        "channels_last". Defaults to `None`, in which case the global setting
-        `tf.keras.backend.image_data_format()` is used (unless you changed it,
-        it defaults to "channels_last").
-      dtype: Dtype to use. Default to `None`, in which case the global setting
-      `tf.keras.backend.floatx()` is used (unless you changed it, it defaults
-      to "float32")
-
-  Returns:
-      A 3D Numpy array.
-
-  Raises:
-      ValueError: if invalid `img` or `data_format` is passed.
-  """
-
-  if data_format is None:
-    data_format = backend.image_data_format()
-  kwargs = {}
-  if 'dtype' in tf_inspect.getfullargspec(image.img_to_array)[0]:
-    if dtype is None:
-      dtype = backend.floatx()
-    kwargs['dtype'] = dtype
-  return image.img_to_array(img, data_format=data_format, **kwargs)
-
-
-@keras_export('keras.utils.save_img',
-              'keras.preprocessing.image.save_img')
-def save_img(path,
-             x,
-             data_format=None,
-             file_format=None,
-             scale=True,
-             **kwargs):
-  """Saves an image stored as a Numpy array to a path or file object.
-
-  Args:
-      path: Path or file object.
-      x: Numpy array.
-      data_format: Image data format,
-          either "channels_first" or "channels_last".
-      file_format: Optional file format override. If omitted, the
-          format to use is determined from the filename extension.
-          If a file object was used instead of a filename, this
-          parameter should always be used.
-      scale: Whether to rescale image values to be within `[0, 255]`.
-      **kwargs: Additional keyword arguments passed to `PIL.Image.save()`.
-  """
-  if data_format is None:
-    data_format = backend.image_data_format()
-  image.save_img(path,
-                 x,
-                 data_format=data_format,
-                 file_format=file_format,
-                 scale=scale, **kwargs)
-
-
-@keras_export('keras.utils.load_img',
-              'keras.preprocessing.image.load_img')
-def load_img(path, grayscale=False, color_mode='rgb', target_size=None,
-             interpolation='nearest'):
-  """Loads an image into PIL format.
-
-  Usage:
-
-  ```
-  image = tf.keras.preprocessing.image.load_img(image_path)
-  input_arr = tf.keras.preprocessing.image.img_to_array(image)
-  input_arr = np.array([input_arr])  # Convert single image to a batch.
-  predictions = model.predict(input_arr)
-  ```
-
-  Args:
-      path: Path to image file.
-      grayscale: DEPRECATED use `color_mode="grayscale"`.
-      color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb".
-          The desired image format.
-      target_size: Either `None` (default to original size)
-          or tuple of ints `(img_height, img_width)`.
-      interpolation: Interpolation method used to resample the image if the
-          target size is different from that of the loaded image.
-          Supported methods are "nearest", "bilinear", and "bicubic".
-          If PIL version 1.1.3 or newer is installed, "lanczos" is also
-          supported. If PIL version 3.4.0 or newer is installed, "box" and
-          "hamming" are also supported. By default, "nearest" is used.
-
-  Returns:
-      A PIL Image instance.
-
-  Raises:
-      ImportError: if PIL is not available.
-      ValueError: if interpolation method is not supported.
-  """
-  return image.load_img(path, grayscale=grayscale, color_mode=color_mode,
-                        target_size=target_size, interpolation=interpolation)
-
-
-@keras_export('keras.preprocessing.image.Iterator')
-class Iterator(image.Iterator, data_utils.Sequence):
-  pass
-
-
-@keras_export('keras.preprocessing.image.DirectoryIterator')
-class DirectoryIterator(image.DirectoryIterator, Iterator):  # pylint: disable=inconsistent-mro
-  """Iterator capable of reading images from a directory on disk.
-
-  Args:
-      directory: Path to the directory to read images from.
-          Each subdirectory in this directory will be
-          considered to contain images from one class,
-          or alternatively you could specify class subdirectories
-          via the `classes` argument.
-      image_data_generator: Instance of `ImageDataGenerator`
-          to use for random transformations and normalization.
-      target_size: tuple of integers, dimensions to resize input images to.
-      color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`.
-          Color mode to read images.
-      classes: Optional list of strings, names of subdirectories
-          containing images from each class (e.g. `["dogs", "cats"]`).
-          It will be computed automatically if not set.
-      class_mode: Mode for yielding the targets:
-          - `"binary"`: binary targets (if there are only two classes),
-          - `"categorical"`: categorical targets,
-          - `"sparse"`: integer targets,
-          - `"input"`: targets are images identical to input images (mainly
-              used to work with autoencoders),
-          - `None`: no targets get yielded (only input images are yielded).
-      batch_size: Integer, size of a batch.
-      shuffle: Boolean, whether to shuffle the data between epochs.
-      seed: Random seed for data shuffling.
-      data_format: String, one of `channels_first`, `channels_last`.
-      save_to_dir: Optional directory where to save the pictures
-          being yielded, in a viewable format. This is useful
-          for visualizing the random transformations being
-          applied, for debugging purposes.
-      save_prefix: String prefix to use for saving sample
-          images (if `save_to_dir` is set).
-      save_format: Format to use for saving sample images
-          (if `save_to_dir` is set).
-      subset: Subset of data (`"training"` or `"validation"`) if
-          validation_split is set in ImageDataGenerator.
-      interpolation: Interpolation method used to resample the image if the
-          target size is different from that of the loaded image.
-          Supported methods are "nearest", "bilinear", and "bicubic".
-          If PIL version 1.1.3 or newer is installed, "lanczos" is also
-          supported. If PIL version 3.4.0 or newer is installed, "box" and
-          "hamming" are also supported. By default, "nearest" is used.
-      dtype: Dtype to use for generated arrays.
-  """
-
-  def __init__(self, directory, image_data_generator,
-               target_size=(256, 256),
-               color_mode='rgb',
-               classes=None,
-               class_mode='categorical',
-               batch_size=32,
-               shuffle=True,
-               seed=None,
-               data_format=None,
-               save_to_dir=None,
-               save_prefix='',
-               save_format='png',
-               follow_links=False,
-               subset=None,
-               interpolation='nearest',
-               dtype=None):
-    if data_format is None:
-      data_format = backend.image_data_format()
-    kwargs = {}
-    if 'dtype' in tf_inspect.getfullargspec(
-        image.ImageDataGenerator.__init__)[0]:
-      if dtype is None:
-        dtype = backend.floatx()
-      kwargs['dtype'] = dtype
-    super(DirectoryIterator, self).__init__(
-        directory, image_data_generator,
-        target_size=target_size,
-        color_mode=color_mode,
-        classes=classes,
-        class_mode=class_mode,
-        batch_size=batch_size,
-        shuffle=shuffle,
-        seed=seed,
-        data_format=data_format,
-        save_to_dir=save_to_dir,
-        save_prefix=save_prefix,
-        save_format=save_format,
-        follow_links=follow_links,
-        subset=subset,
-        interpolation=interpolation,
-        **kwargs)
-
-
-@keras_export('keras.preprocessing.image.NumpyArrayIterator')
-class NumpyArrayIterator(image.NumpyArrayIterator, Iterator):
-  """Iterator yielding data from a Numpy array.
-
-  Args:
-      x: Numpy array of input data or tuple.
-          If tuple, the second elements is either
-          another numpy array or a list of numpy arrays,
-          each of which gets passed
-          through as an output without any modifications.
-      y: Numpy array of targets data.
-      image_data_generator: Instance of `ImageDataGenerator`
-          to use for random transformations and normalization.
-      batch_size: Integer, size of a batch.
-      shuffle: Boolean, whether to shuffle the data between epochs.
-      sample_weight: Numpy array of sample weights.
-      seed: Random seed for data shuffling.
-      data_format: String, one of `channels_first`, `channels_last`.
-      save_to_dir: Optional directory where to save the pictures
-          being yielded, in a viewable format. This is useful
-          for visualizing the random transformations being
-          applied, for debugging purposes.
-      save_prefix: String prefix to use for saving sample
-          images (if `save_to_dir` is set).
-      save_format: Format to use for saving sample images
-          (if `save_to_dir` is set).
-      subset: Subset of data (`"training"` or `"validation"`) if
-          validation_split is set in ImageDataGenerator.
-      dtype: Dtype to use for the generated arrays.
-  """
-
-  def __init__(self, x, y, image_data_generator,
-               batch_size=32,
-               shuffle=False,
-               sample_weight=None,
-               seed=None,
-               data_format=None,
-               save_to_dir=None,
-               save_prefix='',
-               save_format='png',
-               subset=None,
-               dtype=None):
-    if data_format is None:
-      data_format = backend.image_data_format()
-    kwargs = {}
-    if 'dtype' in tf_inspect.getfullargspec(
-        image.NumpyArrayIterator.__init__)[0]:
-      if dtype is None:
-        dtype = backend.floatx()
-      kwargs['dtype'] = dtype
-    super(NumpyArrayIterator, self).__init__(
-        x, y, image_data_generator,
-        batch_size=batch_size,
-        shuffle=shuffle,
-        sample_weight=sample_weight,
-        seed=seed,
-        data_format=data_format,
-        save_to_dir=save_to_dir,
-        save_prefix=save_prefix,
-        save_format=save_format,
-        subset=subset,
-        **kwargs)
-
-
-class DataFrameIterator(image.DataFrameIterator, Iterator):  # pylint: disable=inconsistent-mro
-  """Iterator capable of reading images from a directory on disk as a dataframe.
-
-  Args:
-      dataframe: Pandas dataframe containing the filepaths relative to
-        `directory` (or absolute paths if `directory` is None) of the images in
-        a string column. It should include other column/s depending on the
-        `class_mode`:
-          - if `class_mode` is `"categorical"` (default value) it must include
-              the `y_col` column with the class/es of each image. Values in
-              column can be string/list/tuple if a single class or list/tuple if
-              multiple classes.
-          - if `class_mode` is `"binary"` or `"sparse"` it must include the
-              given `y_col` column with class values as strings.
-          - if `class_mode` is `"raw"` or `"multi_output"` it should contain the
-              columns specified in `y_col`.
-          - if `class_mode` is `"input"` or `None` no extra column is needed.
-      directory: string, path to the directory to read images from. If `None`,
-        data in `x_col` column should be absolute paths.
-      image_data_generator: Instance of `ImageDataGenerator` to use for random
-        transformations and normalization. If None, no transformations and
-        normalizations are made.
-      x_col: string, column in `dataframe` that contains the filenames (or
-        absolute paths if `directory` is `None`).
-      y_col: string or list, column/s in `dataframe` that has the target data.
-      weight_col: string, column in `dataframe` that contains the sample
-          weights. Default: `None`.
-      target_size: tuple of integers, dimensions to resize input images to.
-      color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. Color mode to read
-        images.
-      classes: Optional list of strings, classes to use (e.g. `["dogs",
-        "cats"]`). If None, all classes in `y_col` will be used.
-      class_mode: one of "binary", "categorical", "input", "multi_output",
-        "raw", "sparse" or None. Default: "categorical".
-        Mode for yielding the targets:
-          - `"binary"`: 1D numpy array of binary labels,
-          - `"categorical"`: 2D numpy array of one-hot encoded labels. Supports
-            multi-label output.
-          - `"input"`: images identical to input images (mainly used to work
-            with autoencoders),
-          - `"multi_output"`: list with the values of the different columns,
-          - `"raw"`: numpy array of values in `y_col` column(s),
-          - `"sparse"`: 1D numpy array of integer labels,
-          - `None`, no targets are returned (the generator will only yield
-            batches of image data, which is useful to use in `model.predict()`).
-      batch_size: Integer, size of a batch.
-      shuffle: Boolean, whether to shuffle the data between epochs.
-      seed: Random seed for data shuffling.
-      data_format: String, one of `channels_first`, `channels_last`.
-      save_to_dir: Optional directory where to save the pictures being yielded,
-        in a viewable format. This is useful for visualizing the random
-        transformations being applied, for debugging purposes.
-      save_prefix: String prefix to use for saving sample images (if
-        `save_to_dir` is set).
-      save_format: Format to use for saving sample images (if `save_to_dir` is
-        set).
-      subset: Subset of data (`"training"` or `"validation"`) if
-        validation_split is set in ImageDataGenerator.
-      interpolation: Interpolation method used to resample the image if the
-        target size is different from that of the loaded image. Supported
-        methods are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3
-        or newer is installed, "lanczos" is also supported. If PIL version 3.4.0
-        or newer is installed, "box" and "hamming" are also supported. By
-        default, "nearest" is used.
-      dtype: Dtype to use for the generated arrays.
-      validate_filenames: Boolean, whether to validate image filenames in
-        `x_col`. If `True`, invalid images will be ignored. Disabling this
-        option
-      can lead to speed-up in the instantiation of this class. Default: `True`.
-  """
-
-  def __init__(
-      self,
-      dataframe,
-      directory=None,
-      image_data_generator=None,
-      x_col='filename',
-      y_col='class',
-      weight_col=None,
-      target_size=(256, 256),
-      color_mode='rgb',
-      classes=None,
-      class_mode='categorical',
-      batch_size=32,
-      shuffle=True,
-      seed=None,
-      data_format='channels_last',
-      save_to_dir=None,
-      save_prefix='',
-      save_format='png',
-      subset=None,
-      interpolation='nearest',
-      dtype='float32',
-      validate_filenames=True):
-    super(DataFrameIterator, self).__init__(
-        dataframe=dataframe,
-        directory=directory,
-        image_data_generator=image_data_generator,
-        x_col=x_col,
-        y_col=y_col,
-        weight_col=weight_col,
-        target_size=target_size,
-        color_mode=color_mode,
-        classes=classes,
-        class_mode=class_mode,
-        batch_size=batch_size,
-        shuffle=shuffle,
-        seed=seed,
-        data_format=data_format,
-        save_to_dir=save_to_dir,
-        save_prefix=save_prefix,
-        save_format=save_format,
-        subset=subset,
-        interpolation=interpolation,
-        dtype=dtype,
-        validate_filenames=validate_filenames
-    )
-
-
-@keras_export('keras.preprocessing.image.ImageDataGenerator')
-class ImageDataGenerator(image.ImageDataGenerator):
-  """Generate batches of tensor image data with real-time data augmentation.
-
-   The data will be looped over (in batches).
-
-  Args:
-      featurewise_center: Boolean.
-          Set input mean to 0 over the dataset, feature-wise.
-      samplewise_center: Boolean. Set each sample mean to 0.
-      featurewise_std_normalization: Boolean.
-          Divide inputs by std of the dataset, feature-wise.
-      samplewise_std_normalization: Boolean. Divide each input by its std.
-      zca_epsilon: epsilon for ZCA whitening. Default is 1e-6.
-      zca_whitening: Boolean. Apply ZCA whitening.
-      rotation_range: Int. Degree range for random rotations.
-      width_shift_range: Float, 1-D array-like or int
-          - float: fraction of total width, if < 1, or pixels if >= 1.
-          - 1-D array-like: random elements from the array.
-          - int: integer number of pixels from interval
-              `(-width_shift_range, +width_shift_range)`
-          - With `width_shift_range=2` possible values
-              are integers `[-1, 0, +1]`,
-              same as with `width_shift_range=[-1, 0, +1]`,
-              while with `width_shift_range=1.0` possible values are floats
-              in the interval [-1.0, +1.0).
-      height_shift_range: Float, 1-D array-like or int
-          - float: fraction of total height, if < 1, or pixels if >= 1.
-          - 1-D array-like: random elements from the array.
-          - int: integer number of pixels from interval
-              `(-height_shift_range, +height_shift_range)`
-          - With `height_shift_range=2` possible values
-              are integers `[-1, 0, +1]`,
-              same as with `height_shift_range=[-1, 0, +1]`,
-              while with `height_shift_range=1.0` possible values are floats
-              in the interval [-1.0, +1.0).
-      brightness_range: Tuple or list of two floats. Range for picking
-          a brightness shift value from.
-      shear_range: Float. Shear Intensity
-          (Shear angle in counter-clockwise direction in degrees)
-      zoom_range: Float or [lower, upper]. Range for random zoom.
-          If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`.
-      channel_shift_range: Float. Range for random channel shifts.
-      fill_mode: One of {"constant", "nearest", "reflect" or "wrap"}.
-          Default is 'nearest'.
-          Points outside the boundaries of the input are filled
-          according to the given mode:
-          - 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k)
-          - 'nearest':  aaaaaaaa|abcd|dddddddd
-          - 'reflect':  abcddcba|abcd|dcbaabcd
-          - 'wrap':  abcdabcd|abcd|abcdabcd
-      cval: Float or Int.
-          Value used for points outside the boundaries
-          when `fill_mode = "constant"`.
-      horizontal_flip: Boolean. Randomly flip inputs horizontally.
-      vertical_flip: Boolean. Randomly flip inputs vertically.
-      rescale: rescaling factor. Defaults to None.
-          If None or 0, no rescaling is applied,
-          otherwise we multiply the data by the value provided
-          (after applying all other transformations).
-      preprocessing_function: function that will be applied on each input.
-          The function will run after the image is resized and augmented.
-          The function should take one argument:
-          one image (Numpy tensor with rank 3),
-          and should output a Numpy tensor with the same shape.
-      data_format: Image data format,
-          either "channels_first" or "channels_last".
-          "channels_last" mode means that the images should have shape
-          `(samples, height, width, channels)`,
-          "channels_first" mode means that the images should have shape
-          `(samples, channels, height, width)`.
-          It defaults to the `image_data_format` value found in your
-          Keras config file at `~/.keras/keras.json`.
-          If you never set it, then it will be "channels_last".
-      validation_split: Float. Fraction of images reserved for validation
-          (strictly between 0 and 1).
-      dtype: Dtype to use for the generated arrays.
-
-  Raises:
-    ValueError: If the value of the argument, `data_format` is other than
-          `"channels_last"` or `"channels_first"`.
-    ValueError: If the value of the argument, `validation_split` > 1
-          or `validation_split` < 0.
-
-  Examples:
-
-  Example of using `.flow(x, y)`:
-
-  ```python
-  (x_train, y_train), (x_test, y_test) = cifar10.load_data()
-  y_train = utils.to_categorical(y_train, num_classes)
-  y_test = utils.to_categorical(y_test, num_classes)
-  datagen = ImageDataGenerator(
-      featurewise_center=True,
-      featurewise_std_normalization=True,
-      rotation_range=20,
-      width_shift_range=0.2,
-      height_shift_range=0.2,
-      horizontal_flip=True,
-      validation_split=0.2)
-  # compute quantities required for featurewise normalization
-  # (std, mean, and principal components if ZCA whitening is applied)
-  datagen.fit(x_train)
-  # fits the model on batches with real-time data augmentation:
-  model.fit(datagen.flow(x_train, y_train, batch_size=32,
-           subset='training'),
-           validation_data=datagen.flow(x_train, y_train,
-           batch_size=8, subset='validation'),
-           steps_per_epoch=len(x_train) / 32, epochs=epochs)
-  # here's a more "manual" example
-  for e in range(epochs):
-      print('Epoch', e)
-      batches = 0
-      for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32):
-          model.fit(x_batch, y_batch)
-          batches += 1
-          if batches >= len(x_train) / 32:
-              # we need to break the loop by hand because
-              # the generator loops indefinitely
-              break
-  ```
-
-  Example of using `.flow_from_directory(directory)`:
-
-  ```python
-  train_datagen = ImageDataGenerator(
-          rescale=1./255,
-          shear_range=0.2,
-          zoom_range=0.2,
-          horizontal_flip=True)
-  test_datagen = ImageDataGenerator(rescale=1./255)
-  train_generator = train_datagen.flow_from_directory(
-          'data/train',
-          target_size=(150, 150),
-          batch_size=32,
-          class_mode='binary')
-  validation_generator = test_datagen.flow_from_directory(
-          'data/validation',
-          target_size=(150, 150),
-          batch_size=32,
-          class_mode='binary')
-  model.fit(
-          train_generator,
-          steps_per_epoch=2000,
-          epochs=50,
-          validation_data=validation_generator,
-          validation_steps=800)
-  ```
-
-  Example of transforming images and masks together.
-
-  ```python
-  # we create two instances with the same arguments
-  data_gen_args = dict(featurewise_center=True,
-                       featurewise_std_normalization=True,
-                       rotation_range=90,
-                       width_shift_range=0.1,
-                       height_shift_range=0.1,
-                       zoom_range=0.2)
-  image_datagen = ImageDataGenerator(**data_gen_args)
-  mask_datagen = ImageDataGenerator(**data_gen_args)
-  # Provide the same seed and keyword arguments to the fit and flow methods
-  seed = 1
-  image_datagen.fit(images, augment=True, seed=seed)
-  mask_datagen.fit(masks, augment=True, seed=seed)
-  image_generator = image_datagen.flow_from_directory(
-      'data/images',
-      class_mode=None,
-      seed=seed)
-  mask_generator = mask_datagen.flow_from_directory(
-      'data/masks',
-      class_mode=None,
-      seed=seed)
-  # combine generators into one which yields image and masks
-  train_generator = zip(image_generator, mask_generator)
-  model.fit(
-      train_generator,
-      steps_per_epoch=2000,
-      epochs=50)
-  ```
-  """
-
-  def __init__(self,
-               featurewise_center=False,
-               samplewise_center=False,
-               featurewise_std_normalization=False,
-               samplewise_std_normalization=False,
-               zca_whitening=False,
-               zca_epsilon=1e-6,
-               rotation_range=0,
-               width_shift_range=0.,
-               height_shift_range=0.,
-               brightness_range=None,
-               shear_range=0.,
-               zoom_range=0.,
-               channel_shift_range=0.,
-               fill_mode='nearest',
-               cval=0.,
-               horizontal_flip=False,
-               vertical_flip=False,
-               rescale=None,
-               preprocessing_function=None,
-               data_format=None,
-               validation_split=0.0,
-               dtype=None):
-    if data_format is None:
-      data_format = backend.image_data_format()
-    kwargs = {}
-    if 'dtype' in tf_inspect.getfullargspec(
-        image.ImageDataGenerator.__init__)[0]:
-      if dtype is None:
-        dtype = backend.floatx()
-      kwargs['dtype'] = dtype
-    super(ImageDataGenerator, self).__init__(
-        featurewise_center=featurewise_center,
-        samplewise_center=samplewise_center,
-        featurewise_std_normalization=featurewise_std_normalization,
-        samplewise_std_normalization=samplewise_std_normalization,
-        zca_whitening=zca_whitening,
-        zca_epsilon=zca_epsilon,
-        rotation_range=rotation_range,
-        width_shift_range=width_shift_range,
-        height_shift_range=height_shift_range,
-        brightness_range=brightness_range,
-        shear_range=shear_range,
-        zoom_range=zoom_range,
-        channel_shift_range=channel_shift_range,
-        fill_mode=fill_mode,
-        cval=cval,
-        horizontal_flip=horizontal_flip,
-        vertical_flip=vertical_flip,
-        rescale=rescale,
-        preprocessing_function=preprocessing_function,
-        data_format=data_format,
-        validation_split=validation_split,
-        **kwargs)
-
-  def flow(self,
-           x,
-           y=None,
-           batch_size=32,
-           shuffle=True,
-           sample_weight=None,
-           seed=None,
-           save_to_dir=None,
-           save_prefix='',
-           save_format='png',
-           subset=None):
-    """Takes data & label arrays, generates batches of augmented data.
-
-    Args:
-        x: Input data. Numpy array of rank 4 or a tuple. If tuple, the first
-          element should contain the images and the second element another numpy
-          array or a list of numpy arrays that gets passed to the output without
-          any modifications. Can be used to feed the model miscellaneous data
-          along with the images. In case of grayscale data, the channels axis of
-          the image array should have value 1, in case of RGB data, it should
-          have value 3, and in case of RGBA data, it should have value 4.
-        y: Labels.
-        batch_size: Int (default: 32).
-        shuffle: Boolean (default: True).
-        sample_weight: Sample weights.
-        seed: Int (default: None).
-        save_to_dir: None or str (default: None). This allows you to optionally
-          specify a directory to which to save the augmented pictures being
-          generated (useful for visualizing what you are doing).
-        save_prefix: Str (default: `''`). Prefix to use for filenames of saved
-          pictures (only relevant if `save_to_dir` is set).
-        save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif",
-            "tif", "jpg"
-            (only relevant if `save_to_dir` is set). Default: "png".
-        subset: Subset of data (`"training"` or `"validation"`) if
-          `validation_split` is set in `ImageDataGenerator`.
-
-    Returns:
-        An `Iterator` yielding tuples of `(x, y)`
-            where `x` is a numpy array of image data
-            (in the case of a single image input) or a list
-            of numpy arrays (in the case with
-            additional inputs) and `y` is a numpy array
-            of corresponding labels. If 'sample_weight' is not None,
-            the yielded tuples are of the form `(x, y, sample_weight)`.
-            If `y` is None, only the numpy array `x` is returned.
-    Raises:
-      ValueError: If the Value of the argument, `subset` is other than
-            "training" or "validation".
-
-    """
-    return NumpyArrayIterator(
-        x,
-        y,
-        self,
-        batch_size=batch_size,
-        shuffle=shuffle,
-        sample_weight=sample_weight,
-        seed=seed,
-        data_format=self.data_format,
-        save_to_dir=save_to_dir,
-        save_prefix=save_prefix,
-        save_format=save_format,
-        subset=subset)
-
-  def flow_from_directory(self,
-                          directory,
-                          target_size=(256, 256),
-                          color_mode='rgb',
-                          classes=None,
-                          class_mode='categorical',
-                          batch_size=32,
-                          shuffle=True,
-                          seed=None,
-                          save_to_dir=None,
-                          save_prefix='',
-                          save_format='png',
-                          follow_links=False,
-                          subset=None,
-                          interpolation='nearest'):
-    """Takes the path to a directory & generates batches of augmented data.
-
-    Args:
-        directory: string, path to the target directory. It should contain one
-          subdirectory per class. Any PNG, JPG, BMP, PPM or TIF images inside
-          each of the subdirectories directory tree will be included in the
-          generator. See [this script](
-            https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d)
-              for more details.
-        target_size: Tuple of integers `(height, width)`, defaults to `(256,
-          256)`. The dimensions to which all images found will be resized.
-        color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". Whether
-          the images will be converted to have 1, 3, or 4 channels.
-        classes: Optional list of class subdirectories
-            (e.g. `['dogs', 'cats']`). Default: None. If not provided, the list
-              of classes will be automatically inferred from the subdirectory
-              names/structure under `directory`, where each subdirectory will be
-              treated as a different class (and the order of the classes, which
-              will map to the label indices, will be alphanumeric). The
-              dictionary containing the mapping from class names to class
-              indices can be obtained via the attribute `class_indices`.
-        class_mode: One of "categorical", "binary", "sparse",
-            "input", or None. Default: "categorical".
-            Determines the type of label arrays that are returned:
-            - "categorical" will be 2D one-hot encoded labels,
-            - "binary" will be 1D binary labels,
-            - "sparse" will be 1D integer labels,
-            - "input"  will be images identical to input images (mainly used to
-              work with autoencoders).
-            - If None, no labels are returned (the generator will only yield
-              batches of image data, which is useful to use with
-              `model.predict()`).
-            Please note that in case of class_mode None, the data still needs to
-            reside in a subdirectory of `directory` for it to work correctly.
-        batch_size: Size of the batches of data (default: 32).
-        shuffle: Whether to shuffle the data (default: True) If set to False,
-          sorts the data in alphanumeric order.
-        seed: Optional random seed for shuffling and transformations.
-        save_to_dir: None or str (default: None). This allows you to optionally
-          specify a directory to which to save the augmented pictures being
-          generated (useful for visualizing what you are doing).
-        save_prefix: Str. Prefix to use for filenames of saved pictures (only
-          relevant if `save_to_dir` is set).
-        save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif",
-            "tif", "jpg"
-            (only relevant if `save_to_dir` is set). Default: "png".
-        follow_links: Whether to follow symlinks inside
-            class subdirectories (default: False).
-        subset: Subset of data (`"training"` or `"validation"`) if
-          `validation_split` is set in `ImageDataGenerator`.
-        interpolation: Interpolation method used to resample the image if the
-          target size is different from that of the loaded image. Supported
-          methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version
-          1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL
-          version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also
-          supported. By default, `"nearest"` is used.
-
-    Returns:
-        A `DirectoryIterator` yielding tuples of `(x, y)`
-            where `x` is a numpy array containing a batch
-            of images with shape `(batch_size, *target_size, channels)`
-            and `y` is a numpy array of corresponding labels.
-    """
-    return DirectoryIterator(
-        directory,
-        self,
-        target_size=target_size,
-        color_mode=color_mode,
-        classes=classes,
-        class_mode=class_mode,
-        data_format=self.data_format,
-        batch_size=batch_size,
-        shuffle=shuffle,
-        seed=seed,
-        save_to_dir=save_to_dir,
-        save_prefix=save_prefix,
-        save_format=save_format,
-        follow_links=follow_links,
-        subset=subset,
-        interpolation=interpolation)
-
-  def flow_from_dataframe(self,
-                          dataframe,
-                          directory=None,
-                          x_col='filename',
-                          y_col='class',
-                          weight_col=None,
-                          target_size=(256, 256),
-                          color_mode='rgb',
-                          classes=None,
-                          class_mode='categorical',
-                          batch_size=32,
-                          shuffle=True,
-                          seed=None,
-                          save_to_dir=None,
-                          save_prefix='',
-                          save_format='png',
-                          subset=None,
-                          interpolation='nearest',
-                          validate_filenames=True,
-                          **kwargs):
-    """Takes the dataframe and the path to a directory + generates batches.
-
-     The generated batches contain augmented/normalized data.
-
-    **A simple tutorial can be found **[here](
-                                http://bit.ly/keras_flow_from_dataframe).
-
-    Args:
-        dataframe: Pandas dataframe containing the filepaths relative to
-          `directory` (or absolute paths if `directory` is None) of the images
-          in a string column. It should include other column/s
-            depending on the `class_mode`:
-            - if `class_mode` is `"categorical"` (default value) it must include
-              the `y_col` column with the class/es of each image. Values in
-              column can be string/list/tuple if a single class or list/tuple if
-              multiple classes.
-            - if `class_mode` is `"binary"` or `"sparse"` it must include the
-              given `y_col` column with class values as strings.
-            - if `class_mode` is `"raw"` or `"multi_output"` it should contain
-              the columns specified in `y_col`.
-            - if `class_mode` is `"input"` or `None` no extra column is needed.
-        directory: string, path to the directory to read images from. If `None`,
-          data in `x_col` column should be absolute paths.
-        x_col: string, column in `dataframe` that contains the filenames (or
-          absolute paths if `directory` is `None`).
-        y_col: string or list, column/s in `dataframe` that has the target data.
-        weight_col: string, column in `dataframe` that contains the sample
-            weights. Default: `None`.
-        target_size: tuple of integers `(height, width)`, default: `(256, 256)`.
-          The dimensions to which all images found will be resized.
-        color_mode: one of "grayscale", "rgb", "rgba". Default: "rgb". Whether
-          the images will be converted to have 1 or 3 color channels.
-        classes: optional list of classes (e.g. `['dogs', 'cats']`). Default is
-          None. If not provided, the list of classes will be automatically
-          inferred from the `y_col`, which will map to the label indices, will
-          be alphanumeric). The dictionary containing the mapping from class
-          names to class indices can be obtained via the attribute
-          `class_indices`.
-        class_mode: one of "binary", "categorical", "input", "multi_output",
-            "raw", sparse" or None. Default: "categorical".
-            Mode for yielding the targets:
-            - `"binary"`: 1D numpy array of binary labels,
-            - `"categorical"`: 2D numpy array of one-hot encoded labels.
-              Supports multi-label output.
-            - `"input"`: images identical to input images (mainly used to work
-              with autoencoders),
-            - `"multi_output"`: list with the values of the different columns,
-            - `"raw"`: numpy array of values in `y_col` column(s),
-            - `"sparse"`: 1D numpy array of integer labels,
-            - `None`, no targets are returned (the generator will only yield
-              batches of image data, which is useful to use in
-              `model.predict()`).
-        batch_size: size of the batches of data (default: 32).
-        shuffle: whether to shuffle the data (default: True)
-        seed: optional random seed for shuffling and transformations.
-        save_to_dir: None or str (default: None). This allows you to optionally
-          specify a directory to which to save the augmented pictures being
-          generated (useful for visualizing what you are doing).
-        save_prefix: str. Prefix to use for filenames of saved pictures (only
-          relevant if `save_to_dir` is set).
-        save_format: one of "png", "jpeg", "bmp", "pdf", "ppm", "gif",
-            "tif", "jpg"
-            (only relevant if `save_to_dir` is set). Default: "png".
-        subset: Subset of data (`"training"` or `"validation"`) if
-          `validation_split` is set in `ImageDataGenerator`.
-        interpolation: Interpolation method used to resample the image if the
-          target size is different from that of the loaded image. Supported
-          methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. If PIL version
-          1.1.3 or newer is installed, `"lanczos"` is also supported. If PIL
-          version 3.4.0 or newer is installed, `"box"` and `"hamming"` are also
-          supported. By default, `"nearest"` is used.
-        validate_filenames: Boolean, whether to validate image filenames in
-          `x_col`. If `True`, invalid images will be ignored. Disabling this
-          option can lead to speed-up in the execution of this function.
-          Defaults to `True`.
-        **kwargs: legacy arguments for raising deprecation warnings.
-
-    Returns:
-        A `DataFrameIterator` yielding tuples of `(x, y)`
-        where `x` is a numpy array containing a batch
-        of images with shape `(batch_size, *target_size, channels)`
-        and `y` is a numpy array of corresponding labels.
-    """
-    if 'has_ext' in kwargs:
-      tf_logging.warning(
-          'has_ext is deprecated, filenames in the dataframe have '
-          'to match the exact filenames in disk.', DeprecationWarning)
-    if 'sort' in kwargs:
-      tf_logging.warning(
-          'sort is deprecated, batches will be created in the'
-          'same order than the filenames provided if shuffle'
-          'is set to False.', DeprecationWarning)
-    if class_mode == 'other':
-      tf_logging.warning(
-          '`class_mode` "other" is deprecated, please use '
-          '`class_mode` "raw".', DeprecationWarning)
-      class_mode = 'raw'
-    if 'drop_duplicates' in kwargs:
-      tf_logging.warning(
-          'drop_duplicates is deprecated, you can drop duplicates '
-          'by using the pandas.DataFrame.drop_duplicates method.',
-          DeprecationWarning)
-
-    return DataFrameIterator(
-        dataframe,
-        directory,
-        self,
-        x_col=x_col,
-        y_col=y_col,
-        weight_col=weight_col,
-        target_size=target_size,
-        color_mode=color_mode,
-        classes=classes,
-        class_mode=class_mode,
-        data_format=self.data_format,
-        batch_size=batch_size,
-        shuffle=shuffle,
-        seed=seed,
-        save_to_dir=save_to_dir,
-        save_prefix=save_prefix,
-        save_format=save_format,
-        subset=subset,
-        interpolation=interpolation,
-        validate_filenames=validate_filenames)
-
-
-keras_export('keras.preprocessing.image.random_rotation')(random_rotation)
-keras_export('keras.preprocessing.image.random_shift')(random_shift)
-keras_export('keras.preprocessing.image.random_shear')(random_shear)
-keras_export('keras.preprocessing.image.random_zoom')(random_zoom)
-keras_export(
-    'keras.preprocessing.image.apply_channel_shift')(apply_channel_shift)
-keras_export(
-    'keras.preprocessing.image.random_channel_shift')(random_channel_shift)
-keras_export(
-    'keras.preprocessing.image.apply_brightness_shift')(apply_brightness_shift)
-keras_export('keras.preprocessing.image.random_brightness')(random_brightness)
-keras_export(
-    'keras.preprocessing.image.apply_affine_transform')(apply_affine_transform)
diff --git a/tensorflow/python/keras/preprocessing/image_dataset.py b/tensorflow/python/keras/preprocessing/image_dataset.py
deleted file mode 100644
index e87159f..0000000
--- a/tensorflow/python/keras/preprocessing/image_dataset.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras image dataset loading utilities."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.keras.layers.preprocessing import image_preprocessing
-from tensorflow.python.keras.preprocessing import dataset_utils
-from tensorflow.python.keras.preprocessing import image as keras_image_ops
-from tensorflow.python.ops import image_ops
-from tensorflow.python.ops import io_ops
-from tensorflow.python.util.tf_export import keras_export
-
-
-ALLOWLIST_FORMATS = ('.bmp', '.gif', '.jpeg', '.jpg', '.png')
-
-
-@keras_export('keras.utils.image_dataset_from_directory',
-              'keras.preprocessing.image_dataset_from_directory',
-              v1=[])
-def image_dataset_from_directory(directory,
-                                 labels='inferred',
-                                 label_mode='int',
-                                 class_names=None,
-                                 color_mode='rgb',
-                                 batch_size=32,
-                                 image_size=(256, 256),
-                                 shuffle=True,
-                                 seed=None,
-                                 validation_split=None,
-                                 subset=None,
-                                 interpolation='bilinear',
-                                 follow_links=False,
-                                 crop_to_aspect_ratio=False,
-                                 **kwargs):
-  """Generates a `tf.data.Dataset` from image files in a directory.
-
-  If your directory structure is:
-
-  ```
-  main_directory/
-  ...class_a/
-  ......a_image_1.jpg
-  ......a_image_2.jpg
-  ...class_b/
-  ......b_image_1.jpg
-  ......b_image_2.jpg
-  ```
-
-  Then calling `image_dataset_from_directory(main_directory, labels='inferred')`
-  will return a `tf.data.Dataset` that yields batches of images from
-  the subdirectories `class_a` and `class_b`, together with labels
-  0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`).
-
-  Supported image formats: jpeg, png, bmp, gif.
-  Animated gifs are truncated to the first frame.
-
-  Args:
-    directory: Directory where the data is located.
-        If `labels` is "inferred", it should contain
-        subdirectories, each containing images for a class.
-        Otherwise, the directory structure is ignored.
-    labels: Either "inferred"
-        (labels are generated from the directory structure),
-        None (no labels),
-        or a list/tuple of integer labels of the same size as the number of
-        image files found in the directory. Labels should be sorted according
-        to the alphanumeric order of the image file paths
-        (obtained via `os.walk(directory)` in Python).
-    label_mode:
-        - 'int': means that the labels are encoded as integers
-            (e.g. for `sparse_categorical_crossentropy` loss).
-        - 'categorical' means that the labels are
-            encoded as a categorical vector
-            (e.g. for `categorical_crossentropy` loss).
-        - 'binary' means that the labels (there can be only 2)
-            are encoded as `float32` scalars with values 0 or 1
-            (e.g. for `binary_crossentropy`).
-        - None (no labels).
-    class_names: Only valid if "labels" is "inferred". This is the explict
-        list of class names (must match names of subdirectories). Used
-        to control the order of the classes
-        (otherwise alphanumerical order is used).
-    color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb".
-        Whether the images will be converted to
-        have 1, 3, or 4 channels.
-    batch_size: Size of the batches of data. Default: 32.
-    image_size: Size to resize images to after they are read from disk.
-        Defaults to `(256, 256)`.
-        Since the pipeline processes batches of images that must all have
-        the same size, this must be provided.
-    shuffle: Whether to shuffle the data. Default: True.
-        If set to False, sorts the data in alphanumeric order.
-    seed: Optional random seed for shuffling and transformations.
-    validation_split: Optional float between 0 and 1,
-        fraction of data to reserve for validation.
-    subset: One of "training" or "validation".
-        Only used if `validation_split` is set.
-    interpolation: String, the interpolation method used when resizing images.
-      Defaults to `bilinear`. Supports `bilinear`, `nearest`, `bicubic`,
-      `area`, `lanczos3`, `lanczos5`, `gaussian`, `mitchellcubic`.
-    follow_links: Whether to visits subdirectories pointed to by symlinks.
-        Defaults to False.
-    crop_to_aspect_ratio: If True, resize the images without aspect
-      ratio distortion. When the original aspect ratio differs from the target
-      aspect ratio, the output image will be cropped so as to return the largest
-      possible window in the image (of size `image_size`) that matches
-      the target aspect ratio. By default (`crop_to_aspect_ratio=False`),
-      aspect ratio may not be preserved.
-    **kwargs: Legacy keyword arguments.
-
-  Returns:
-    A `tf.data.Dataset` object.
-      - If `label_mode` is None, it yields `float32` tensors of shape
-        `(batch_size, image_size[0], image_size[1], num_channels)`,
-        encoding images (see below for rules regarding `num_channels`).
-      - Otherwise, it yields a tuple `(images, labels)`, where `images`
-        has shape `(batch_size, image_size[0], image_size[1], num_channels)`,
-        and `labels` follows the format described below.
-
-  Rules regarding labels format:
-    - if `label_mode` is `int`, the labels are an `int32` tensor of shape
-      `(batch_size,)`.
-    - if `label_mode` is `binary`, the labels are a `float32` tensor of
-      1s and 0s of shape `(batch_size, 1)`.
-    - if `label_mode` is `categorial`, the labels are a `float32` tensor
-      of shape `(batch_size, num_classes)`, representing a one-hot
-      encoding of the class index.
-
-  Rules regarding number of channels in the yielded images:
-    - if `color_mode` is `grayscale`,
-      there's 1 channel in the image tensors.
-    - if `color_mode` is `rgb`,
-      there are 3 channel in the image tensors.
-    - if `color_mode` is `rgba`,
-      there are 4 channel in the image tensors.
-  """
-  if 'smart_resize' in kwargs:
-    crop_to_aspect_ratio = kwargs.pop('smart_resize')
-  if kwargs:
-    raise TypeError(f'Unknown keywords argument(s): {tuple(kwargs.keys())}')
-  if labels not in ('inferred', None):
-    if not isinstance(labels, (list, tuple)):
-      raise ValueError(
-          '`labels` argument should be a list/tuple of integer labels, of '
-          'the same size as the number of image files in the target '
-          'directory. If you wish to infer the labels from the subdirectory '
-          'names in the target directory, pass `labels="inferred"`. '
-          'If you wish to get a dataset that only contains images '
-          '(no labels), pass `label_mode=None`.')
-    if class_names:
-      raise ValueError('You can only pass `class_names` if the labels are '
-                       'inferred from the subdirectory names in the target '
-                       'directory (`labels="inferred"`).')
-  if label_mode not in {'int', 'categorical', 'binary', None}:
-    raise ValueError(
-        '`label_mode` argument must be one of "int", "categorical", "binary", '
-        'or None. Received: %s' % (label_mode,))
-  if labels is None or label_mode is None:
-    labels = None
-    label_mode = None
-  if color_mode == 'rgb':
-    num_channels = 3
-  elif color_mode == 'rgba':
-    num_channels = 4
-  elif color_mode == 'grayscale':
-    num_channels = 1
-  else:
-    raise ValueError(
-        '`color_mode` must be one of {"rbg", "rgba", "grayscale"}. '
-        'Received: %s' % (color_mode,))
-  interpolation = image_preprocessing.get_interpolation(interpolation)
-  dataset_utils.check_validation_split_arg(
-      validation_split, subset, shuffle, seed)
-
-  if seed is None:
-    seed = np.random.randint(1e6)
-  image_paths, labels, class_names = dataset_utils.index_directory(
-      directory,
-      labels,
-      formats=ALLOWLIST_FORMATS,
-      class_names=class_names,
-      shuffle=shuffle,
-      seed=seed,
-      follow_links=follow_links)
-
-  if label_mode == 'binary' and len(class_names) != 2:
-    raise ValueError(
-        'When passing `label_mode="binary", there must exactly 2 classes. '
-        'Found the following classes: %s' % (class_names,))
-
-  image_paths, labels = dataset_utils.get_training_or_validation_split(
-      image_paths, labels, validation_split, subset)
-  if not image_paths:
-    raise ValueError('No images found.')
-
-  dataset = paths_and_labels_to_dataset(
-      image_paths=image_paths,
-      image_size=image_size,
-      num_channels=num_channels,
-      labels=labels,
-      label_mode=label_mode,
-      num_classes=len(class_names),
-      interpolation=interpolation,
-      crop_to_aspect_ratio=crop_to_aspect_ratio)
-  if shuffle:
-    # Shuffle locally at each iteration
-    dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed)
-  dataset = dataset.batch(batch_size)
-  # Users may need to reference `class_names`.
-  dataset.class_names = class_names
-  # Include file paths for images as attribute.
-  dataset.file_paths = image_paths
-  return dataset
-
-
-def paths_and_labels_to_dataset(image_paths,
-                                image_size,
-                                num_channels,
-                                labels,
-                                label_mode,
-                                num_classes,
-                                interpolation,
-                                crop_to_aspect_ratio=False):
-  """Constructs a dataset of images and labels."""
-  # TODO(fchollet): consider making num_parallel_calls settable
-  path_ds = dataset_ops.Dataset.from_tensor_slices(image_paths)
-  args = (image_size, num_channels, interpolation, crop_to_aspect_ratio)
-  img_ds = path_ds.map(
-      lambda x: load_image(x, *args))
-  if label_mode:
-    label_ds = dataset_utils.labels_to_dataset(labels, label_mode, num_classes)
-    img_ds = dataset_ops.Dataset.zip((img_ds, label_ds))
-  return img_ds
-
-
-def load_image(path, image_size, num_channels, interpolation,
-               crop_to_aspect_ratio=False):
-  """Load an image from a path and resize it."""
-  img = io_ops.read_file(path)
-  img = image_ops.decode_image(
-      img, channels=num_channels, expand_animations=False)
-  if crop_to_aspect_ratio:
-    img = keras_image_ops.smart_resize(img, image_size,
-                                       interpolation=interpolation)
-  else:
-    img = image_ops.resize_images_v2(img, image_size, method=interpolation)
-  img.set_shape((image_size[0], image_size[1], num_channels))
-  return img
diff --git a/tensorflow/python/keras/preprocessing/image_dataset_test.py b/tensorflow/python/keras/preprocessing/image_dataset_test.py
deleted file mode 100644
index 51f2dc6..0000000
--- a/tensorflow/python/keras/preprocessing/image_dataset_test.py
+++ /dev/null
@@ -1,354 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for image_dataset."""
-
-import os
-import shutil
-
-import numpy as np
-
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.eager import def_function
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.preprocessing import image as image_preproc
-from tensorflow.python.keras.preprocessing import image_dataset
-from tensorflow.python.platform import test
-
-try:
-  import PIL  # pylint:disable=g-import-not-at-top
-except ImportError:
-  PIL = None
-
-
-class ImageDatasetFromDirectoryTest(keras_parameterized.TestCase):
-
-  def _get_images(self, count=16, color_mode='rgb'):
-    width = height = 24
-    imgs = []
-    for _ in range(count):
-      if color_mode == 'grayscale':
-        img = np.random.randint(0, 256, size=(height, width, 1))
-      elif color_mode == 'rgba':
-        img = np.random.randint(0, 256, size=(height, width, 4))
-      else:
-        img = np.random.randint(0, 256, size=(height, width, 3))
-      img = image_preproc.array_to_img(img)
-      imgs.append(img)
-    return imgs
-
-  def _prepare_directory(self,
-                         num_classes=2,
-                         grayscale=False,
-                         nested_dirs=False,
-                         color_mode='rgb',
-                         count=16):
-    # Get a unique temp directory
-    temp_dir = os.path.join(self.get_temp_dir(), str(np.random.randint(1e6)))
-    os.mkdir(temp_dir)
-    self.addCleanup(shutil.rmtree, temp_dir)
-
-    # Generate paths to class subdirectories
-    paths = []
-    for class_index in range(num_classes):
-      class_directory = 'class_%s' % (class_index,)
-      if nested_dirs:
-        class_paths = [
-            class_directory, os.path.join(class_directory, 'subfolder_1'),
-            os.path.join(class_directory, 'subfolder_2'), os.path.join(
-                class_directory, 'subfolder_1', 'sub-subfolder')
-        ]
-      else:
-        class_paths = [class_directory]
-      for path in class_paths:
-        os.mkdir(os.path.join(temp_dir, path))
-      paths += class_paths
-
-    # Save images to the paths
-    i = 0
-    for img in self._get_images(color_mode=color_mode, count=count):
-      path = paths[i % len(paths)]
-      if color_mode == 'rgb':
-        ext = 'jpg'
-      else:
-        ext = 'png'
-      filename = os.path.join(path, 'image_%s.%s' % (i, ext))
-      img.save(os.path.join(temp_dir, filename))
-      i += 1
-    return temp_dir
-
-  def test_image_dataset_from_directory_standalone(self):
-    # Test retrieving images without labels from a directory and its subdirs.
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    # Save a few extra images in the parent directory.
-    directory = self._prepare_directory(count=7, num_classes=2)
-    for i, img in enumerate(self._get_images(3)):
-      filename = 'image_%s.jpg' % (i,)
-      img.save(os.path.join(directory, filename))
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=5, image_size=(18, 18), labels=None)
-    batch = next(iter(dataset))
-    # We return plain images
-    self.assertEqual(batch.shape, (5, 18, 18, 3))
-    self.assertEqual(batch.dtype.name, 'float32')
-    # Count samples
-    batch_count = 0
-    sample_count = 0
-    for batch in dataset:
-      batch_count += 1
-      sample_count += batch.shape[0]
-    self.assertEqual(batch_count, 2)
-    self.assertEqual(sample_count, 10)
-
-  def test_image_dataset_from_directory_binary(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=2)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode='int')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 3))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-    self.assertEqual(batch[1].shape, (8,))
-    self.assertEqual(batch[1].dtype.name, 'int32')
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode='binary')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 3))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-    self.assertEqual(batch[1].shape, (8, 1))
-    self.assertEqual(batch[1].dtype.name, 'float32')
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode='categorical')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 3))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-    self.assertEqual(batch[1].shape, (8, 2))
-    self.assertEqual(batch[1].dtype.name, 'float32')
-
-  def test_static_shape_in_graph(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=2)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode='int')
-    test_case = self
-
-    @def_function.function
-    def symbolic_fn(ds):
-      for x, _ in ds.take(1):
-        test_case.assertListEqual(x.shape.as_list(), [None, 18, 18, 3])
-
-    symbolic_fn(dataset)
-
-  def test_sample_count(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=4, count=15)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode=None)
-    sample_count = 0
-    for batch in dataset:
-      sample_count += batch.shape[0]
-    self.assertEqual(sample_count, 15)
-
-  def test_image_dataset_from_directory_multiclass(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=4, count=15)
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode=None)
-    batch = next(iter(dataset))
-    self.assertEqual(batch.shape, (8, 18, 18, 3))
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode=None)
-    sample_count = 0
-    iterator = iter(dataset)
-    for batch in dataset:
-      sample_count += next(iterator).shape[0]
-    self.assertEqual(sample_count, 15)
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode='int')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 3))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-    self.assertEqual(batch[1].shape, (8,))
-    self.assertEqual(batch[1].dtype.name, 'int32')
-
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode='categorical')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 3))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-    self.assertEqual(batch[1].shape, (8, 4))
-    self.assertEqual(batch[1].dtype.name, 'float32')
-
-  def test_image_dataset_from_directory_color_modes(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=4, color_mode='rgba')
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), color_mode='rgba')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 4))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-
-    directory = self._prepare_directory(num_classes=4, color_mode='grayscale')
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), color_mode='grayscale')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 1))
-    self.assertEqual(batch[0].dtype.name, 'float32')
-
-  def test_image_dataset_from_directory_validation_split(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=2, count=10)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=10, image_size=(18, 18),
-        validation_split=0.2, subset='training', seed=1337)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8, 18, 18, 3))
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=10, image_size=(18, 18),
-        validation_split=0.2, subset='validation', seed=1337)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (2, 18, 18, 3))
-
-  def test_image_dataset_from_directory_manual_labels(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=2, count=2)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18),
-        labels=[0, 1], shuffle=False)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertAllClose(batch[1], [0, 1])
-
-  def test_image_dataset_from_directory_follow_links(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=2, count=25,
-                                        nested_dirs=True)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=8, image_size=(18, 18), label_mode=None,
-        follow_links=True)
-    sample_count = 0
-    for batch in dataset:
-      sample_count += batch.shape[0]
-    self.assertEqual(sample_count, 25)
-
-  def test_image_dataset_from_directory_no_images(self):
-    directory = self._prepare_directory(num_classes=2, count=0)
-    with self.assertRaisesRegex(ValueError, 'No images found.'):
-      _ = image_dataset.image_dataset_from_directory(directory)
-
-  def test_image_dataset_from_directory_crop_to_aspect_ratio(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=2, count=5)
-    dataset = image_dataset.image_dataset_from_directory(
-        directory, batch_size=5, image_size=(18, 18), crop_to_aspect_ratio=True)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (5, 18, 18, 3))
-
-  def test_image_dataset_from_directory_errors(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    directory = self._prepare_directory(num_classes=3, count=5)
-
-    with self.assertRaisesRegex(ValueError, '`labels` argument should be'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, labels='other')
-
-    with self.assertRaisesRegex(ValueError, '`label_mode` argument must be'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, label_mode='other')
-
-    with self.assertRaisesRegex(ValueError, '`color_mode` must be one of'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, color_mode='other')
-
-    with self.assertRaisesRegex(
-        ValueError, 'only pass `class_names` if the labels are inferred'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, labels=[0, 0, 1, 1, 1],
-          class_names=['class_0', 'class_1', 'class_2'])
-
-    with self.assertRaisesRegex(
-        ValueError,
-        'Expected the lengths of `labels` to match the number of files'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, labels=[0, 0, 1, 1])
-
-    with self.assertRaisesRegex(
-        ValueError, '`class_names` passed did not match'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, class_names=['class_0', 'class_2'])
-
-    with self.assertRaisesRegex(ValueError, 'there must exactly 2 classes'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, label_mode='binary')
-
-    with self.assertRaisesRegex(ValueError,
-                                '`validation_split` must be between 0 and 1'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, validation_split=2)
-
-    with self.assertRaisesRegex(ValueError,
-                                '`subset` must be either "training" or'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, validation_split=0.2, subset='other')
-
-    with self.assertRaisesRegex(ValueError, '`validation_split` must be set'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, validation_split=0, subset='training')
-
-    with self.assertRaisesRegex(ValueError, 'must provide a `seed`'):
-      _ = image_dataset.image_dataset_from_directory(
-          directory, validation_split=0.2, subset='training')
-
-
-if __name__ == '__main__':
-  v2_compat.enable_v2_behavior()
-  test.main()
diff --git a/tensorflow/python/keras/preprocessing/image_test.py b/tensorflow/python/keras/preprocessing/image_test.py
deleted file mode 100644
index 464f166..0000000
--- a/tensorflow/python/keras/preprocessing/image_test.py
+++ /dev/null
@@ -1,449 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for image preprocessing utils."""
-
-import os
-import shutil
-import tempfile
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import layers
-from tensorflow.python.keras.engine import sequential
-from tensorflow.python.keras.preprocessing import image as preprocessing_image
-from tensorflow.python.platform import test
-
-try:
-  import PIL  # pylint:disable=g-import-not-at-top
-except ImportError:
-  PIL = None
-
-
-def _generate_test_images():
-  img_w = img_h = 20
-  rgb_images = []
-  gray_images = []
-  for _ in range(8):
-    bias = np.random.rand(img_w, img_h, 1) * 64
-    variance = np.random.rand(img_w, img_h, 1) * (255 - 64)
-    imarray = np.random.rand(img_w, img_h, 3) * variance + bias
-    im = preprocessing_image.array_to_img(imarray, scale=False)
-    rgb_images.append(im)
-
-    imarray = np.random.rand(img_w, img_h, 1) * variance + bias
-    im = preprocessing_image.array_to_img(imarray, scale=False)
-    gray_images.append(im)
-
-  return [rgb_images, gray_images]
-
-
-class TestImage(keras_parameterized.TestCase):
-
-  def test_smart_resize(self):
-    test_input = np.random.random((20, 40, 3))
-    output = preprocessing_image.smart_resize(test_input, size=(50, 50))
-    self.assertIsInstance(output, np.ndarray)
-    self.assertListEqual(list(output.shape), [50, 50, 3])
-    output = preprocessing_image.smart_resize(test_input, size=(10, 10))
-    self.assertListEqual(list(output.shape), [10, 10, 3])
-    output = preprocessing_image.smart_resize(test_input, size=(100, 50))
-    self.assertListEqual(list(output.shape), [100, 50, 3])
-    output = preprocessing_image.smart_resize(test_input, size=(5, 15))
-    self.assertListEqual(list(output.shape), [5, 15, 3])
-
-  @parameterized.named_parameters(
-      ('size1', (50, 50)),
-      ('size2', (10, 10)),
-      ('size3', (100, 50)),
-      ('size4', (5, 15)))
-  def test_smart_resize_tf_dataset(self, size):
-    test_input_np = np.random.random((2, 20, 40, 3))
-    test_ds = dataset_ops.Dataset.from_tensor_slices(test_input_np)
-
-    resize = lambda img: preprocessing_image.smart_resize(img, size=size)
-    test_ds = test_ds.map(resize)
-    for sample in test_ds.as_numpy_iterator():
-      self.assertIsInstance(sample, np.ndarray)
-      self.assertListEqual(list(sample.shape), [size[0], size[1], 3])
-
-  def test_smart_resize_batch(self):
-    img = np.random.random((2, 20, 40, 3))
-    out = preprocessing_image.smart_resize(img, size=(20, 20))
-    self.assertListEqual(list(out.shape), [2, 20, 20, 3])
-    self.assertAllClose(out, img[:, :, 10:-10, :])
-
-  def test_smart_resize_errors(self):
-    with self.assertRaisesRegex(ValueError, 'a tuple of 2 integers'):
-      preprocessing_image.smart_resize(
-          np.random.random((20, 20, 2)), size=(10, 5, 3))
-    with self.assertRaisesRegex(ValueError, 'incorrect rank'):
-      preprocessing_image.smart_resize(np.random.random((2, 4)), size=(10, 5))
-    with self.assertRaisesRegex(ValueError, 'incorrect rank'):
-      preprocessing_image.smart_resize(
-          np.random.random((2, 4, 4, 5, 3)), size=(10, 5))
-
-  def test_image_data_generator(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    for test_images in _generate_test_images():
-      img_list = []
-      for im in test_images:
-        img_list.append(preprocessing_image.img_to_array(im)[None, ...])
-
-      images = np.vstack(img_list)
-      generator = preprocessing_image.ImageDataGenerator(
-          featurewise_center=True,
-          samplewise_center=True,
-          featurewise_std_normalization=True,
-          samplewise_std_normalization=True,
-          zca_whitening=True,
-          rotation_range=90.,
-          width_shift_range=0.1,
-          height_shift_range=0.1,
-          shear_range=0.5,
-          zoom_range=0.2,
-          channel_shift_range=0.,
-          brightness_range=(1, 5),
-          fill_mode='nearest',
-          cval=0.5,
-          horizontal_flip=True,
-          vertical_flip=True)
-      # Basic test before fit
-      x = np.random.random((32, 10, 10, 3))
-      generator.flow(x)
-
-      # Fit
-      generator.fit(images, augment=True)
-
-      for x, _ in generator.flow(
-          images,
-          np.arange(images.shape[0]),
-          shuffle=True):
-        self.assertEqual(x.shape[1:], images.shape[1:])
-        break
-
-  def test_image_data_generator_with_split_value_error(self):
-    with self.assertRaises(ValueError):
-      preprocessing_image.ImageDataGenerator(validation_split=5)
-
-  def test_image_data_generator_invalid_data(self):
-    generator = preprocessing_image.ImageDataGenerator(
-        featurewise_center=True,
-        samplewise_center=True,
-        featurewise_std_normalization=True,
-        samplewise_std_normalization=True,
-        zca_whitening=True,
-        data_format='channels_last')
-
-    # Test fit with invalid data
-    with self.assertRaises(ValueError):
-      x = np.random.random((3, 10, 10))
-      generator.fit(x)
-    # Test flow with invalid data
-    with self.assertRaises(ValueError):
-      generator.flow(np.arange(5))
-    # Invalid number of channels: will work but raise a warning
-    x = np.random.random((32, 10, 10, 5))
-    generator.flow(x)
-
-    with self.assertRaises(ValueError):
-      generator = preprocessing_image.ImageDataGenerator(
-          data_format='unknown')
-
-    generator = preprocessing_image.ImageDataGenerator(zoom_range=(2., 2.))
-
-  def test_image_data_generator_fit(self):
-    generator = preprocessing_image.ImageDataGenerator(
-        featurewise_center=True,
-        samplewise_center=True,
-        featurewise_std_normalization=True,
-        samplewise_std_normalization=True,
-        zca_whitening=True,
-        data_format='channels_last')
-    # Test grayscale
-    x = np.random.random((32, 10, 10, 1))
-    generator.fit(x)
-    # Test RBG
-    x = np.random.random((32, 10, 10, 3))
-    generator.fit(x)
-    generator = preprocessing_image.ImageDataGenerator(
-        featurewise_center=True,
-        samplewise_center=True,
-        featurewise_std_normalization=True,
-        samplewise_std_normalization=True,
-        zca_whitening=True,
-        data_format='channels_first')
-    # Test grayscale
-    x = np.random.random((32, 1, 10, 10))
-    generator.fit(x)
-    # Test RBG
-    x = np.random.random((32, 3, 10, 10))
-    generator.fit(x)
-
-  def test_directory_iterator(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    num_classes = 2
-
-    temp_dir = self.get_temp_dir()
-    self.addCleanup(shutil.rmtree, temp_dir)
-
-    # create folders and subfolders
-    paths = []
-    for cl in range(num_classes):
-      class_directory = 'class-{}'.format(cl)
-      classpaths = [
-          class_directory, os.path.join(class_directory, 'subfolder-1'),
-          os.path.join(class_directory, 'subfolder-2'), os.path.join(
-              class_directory, 'subfolder-1', 'sub-subfolder')
-      ]
-      for path in classpaths:
-        os.mkdir(os.path.join(temp_dir, path))
-      paths.append(classpaths)
-
-    # save the images in the paths
-    count = 0
-    filenames = []
-    for test_images in _generate_test_images():
-      for im in test_images:
-        # rotate image class
-        im_class = count % num_classes
-        # rotate subfolders
-        classpaths = paths[im_class]
-        filename = os.path.join(classpaths[count % len(classpaths)],
-                                'image-{}.jpg'.format(count))
-        filenames.append(filename)
-        im.save(os.path.join(temp_dir, filename))
-        count += 1
-
-    # Test image loading util
-    fname = os.path.join(temp_dir, filenames[0])
-    _ = preprocessing_image.load_img(fname)
-    _ = preprocessing_image.load_img(fname, grayscale=True)
-    _ = preprocessing_image.load_img(fname, target_size=(10, 10))
-    _ = preprocessing_image.load_img(fname, target_size=(10, 10),
-                                     interpolation='bilinear')
-
-    # create iterator
-    generator = preprocessing_image.ImageDataGenerator()
-    dir_iterator = generator.flow_from_directory(temp_dir)
-
-    # check number of classes and images
-    self.assertEqual(len(dir_iterator.class_indices), num_classes)
-    self.assertEqual(len(dir_iterator.classes), count)
-    self.assertEqual(set(dir_iterator.filenames), set(filenames))
-
-    def preprocessing_function(x):
-      """This will fail if not provided by a Numpy array.
-
-      Note: This is made to enforce backward compatibility.
-
-      Args:
-          x: A numpy array.
-
-      Returns:
-          An array of zeros with the same shape as the given array.
-      """
-      self.assertEqual(x.shape, (26, 26, 3))
-      self.assertIs(type(x), np.ndarray)
-      return np.zeros_like(x)
-
-    # Test usage as Sequence
-    generator = preprocessing_image.ImageDataGenerator(
-        preprocessing_function=preprocessing_function)
-    dir_seq = generator.flow_from_directory(
-        str(temp_dir),
-        target_size=(26, 26),
-        color_mode='rgb',
-        batch_size=3,
-        class_mode='categorical')
-    self.assertEqual(len(dir_seq), count // 3 + 1)
-    x1, y1 = dir_seq[1]
-    self.assertEqual(x1.shape, (3, 26, 26, 3))
-    self.assertEqual(y1.shape, (3, num_classes))
-    x1, y1 = dir_seq[5]
-    self.assertTrue((x1 == 0).all())
-
-  def directory_iterator_with_validation_split_test_helper(
-      self, validation_split):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    num_classes = 2
-    tmp_folder = tempfile.mkdtemp(prefix='test_images')
-
-    # create folders and subfolders
-    paths = []
-    for cl in range(num_classes):
-      class_directory = 'class-{}'.format(cl)
-      classpaths = [
-          class_directory,
-          os.path.join(class_directory, 'subfolder-1'),
-          os.path.join(class_directory, 'subfolder-2'),
-          os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
-      ]
-      for path in classpaths:
-        os.mkdir(os.path.join(tmp_folder, path))
-      paths.append(classpaths)
-
-    # save the images in the paths
-    count = 0
-    filenames = []
-    for test_images in _generate_test_images():
-      for im in test_images:
-        # rotate image class
-        im_class = count % num_classes
-        # rotate subfolders
-        classpaths = paths[im_class]
-        filename = os.path.join(classpaths[count % len(classpaths)],
-                                'image-{}.jpg'.format(count))
-        filenames.append(filename)
-        im.save(os.path.join(tmp_folder, filename))
-        count += 1
-
-    # create iterator
-    generator = preprocessing_image.ImageDataGenerator(
-        validation_split=validation_split)
-
-    with self.assertRaises(ValueError):
-      generator.flow_from_directory(tmp_folder, subset='foo')
-
-    num_validation = int(count * validation_split)
-    num_training = count - num_validation
-    train_iterator = generator.flow_from_directory(
-        tmp_folder, subset='training')
-    self.assertEqual(train_iterator.samples, num_training)
-
-    valid_iterator = generator.flow_from_directory(
-        tmp_folder, subset='validation')
-    self.assertEqual(valid_iterator.samples, num_validation)
-
-    # check number of classes and images
-    self.assertEqual(len(train_iterator.class_indices), num_classes)
-    self.assertEqual(len(train_iterator.classes), num_training)
-    self.assertEqual(
-        len(set(train_iterator.filenames) & set(filenames)), num_training)
-
-    model = sequential.Sequential([layers.Flatten(), layers.Dense(2)])
-    model.compile(optimizer='sgd', loss='mse')
-    model.fit(train_iterator, epochs=1)
-
-    shutil.rmtree(tmp_folder)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_directory_iterator_with_validation_split_25_percent(self):
-    self.directory_iterator_with_validation_split_test_helper(0.25)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_directory_iterator_with_validation_split_40_percent(self):
-    self.directory_iterator_with_validation_split_test_helper(0.40)
-
-  @keras_parameterized.run_all_keras_modes
-  def test_directory_iterator_with_validation_split_50_percent(self):
-    self.directory_iterator_with_validation_split_test_helper(0.50)
-
-  def test_img_utils(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    height, width = 10, 8
-
-    # Test channels_first data format
-    x = np.random.random((3, height, width))
-    img = preprocessing_image.array_to_img(
-        x, data_format='channels_first')
-    self.assertEqual(img.size, (width, height))
-    x = preprocessing_image.img_to_array(
-        img, data_format='channels_first')
-    self.assertEqual(x.shape, (3, height, width))
-    # Test 2D
-    x = np.random.random((1, height, width))
-    img = preprocessing_image.array_to_img(
-        x, data_format='channels_first')
-    self.assertEqual(img.size, (width, height))
-    x = preprocessing_image.img_to_array(
-        img, data_format='channels_first')
-    self.assertEqual(x.shape, (1, height, width))
-
-    # Test channels_last data format
-    x = np.random.random((height, width, 3))
-    img = preprocessing_image.array_to_img(x, data_format='channels_last')
-    self.assertEqual(img.size, (width, height))
-    x = preprocessing_image.img_to_array(img, data_format='channels_last')
-    self.assertEqual(x.shape, (height, width, 3))
-    # Test 2D
-    x = np.random.random((height, width, 1))
-    img = preprocessing_image.array_to_img(x, data_format='channels_last')
-    self.assertEqual(img.size, (width, height))
-    x = preprocessing_image.img_to_array(img, data_format='channels_last')
-    self.assertEqual(x.shape, (height, width, 1))
-
-  def test_batch_standardize(self):
-    if PIL is None:
-      return  # Skip test if PIL is not available.
-
-    # ImageDataGenerator.standardize should work on batches
-    for test_images in _generate_test_images():
-      img_list = []
-      for im in test_images:
-        img_list.append(preprocessing_image.img_to_array(im)[None, ...])
-
-      images = np.vstack(img_list)
-      generator = preprocessing_image.ImageDataGenerator(
-          featurewise_center=True,
-          samplewise_center=True,
-          featurewise_std_normalization=True,
-          samplewise_std_normalization=True,
-          zca_whitening=True,
-          rotation_range=90.,
-          width_shift_range=0.1,
-          height_shift_range=0.1,
-          shear_range=0.5,
-          zoom_range=0.2,
-          channel_shift_range=0.,
-          brightness_range=(1, 5),
-          fill_mode='nearest',
-          cval=0.5,
-          horizontal_flip=True,
-          vertical_flip=True)
-      generator.fit(images, augment=True)
-
-      transformed = np.copy(images)
-      for i, im in enumerate(transformed):
-        transformed[i] = generator.random_transform(im)
-      transformed = generator.standardize(transformed)
-
-  def test_img_transforms(self):
-    x = np.random.random((3, 200, 200))
-    _ = preprocessing_image.random_rotation(x, 20)
-    _ = preprocessing_image.random_shift(x, 0.2, 0.2)
-    _ = preprocessing_image.random_shear(x, 2.)
-    _ = preprocessing_image.random_zoom(x, (0.5, 0.5))
-    _ = preprocessing_image.apply_channel_shift(x, 2, 2)
-    _ = preprocessing_image.apply_affine_transform(x, 2)
-    with self.assertRaises(ValueError):
-      preprocessing_image.random_zoom(x, (0, 0, 0))
-    _ = preprocessing_image.random_channel_shift(x, 2.)
-
-
-if __name__ == '__main__':
-  v2_compat.enable_v2_behavior()
-  test.main()
diff --git a/tensorflow/python/keras/preprocessing/sequence.py b/tensorflow/python/keras/preprocessing/sequence.py
deleted file mode 100644
index 51a4110..0000000
--- a/tensorflow/python/keras/preprocessing/sequence.py
+++ /dev/null
@@ -1,158 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities for preprocessing sequence data."""
-# pylint: disable=invalid-name
-
-from keras_preprocessing import sequence
-
-from tensorflow.python.keras.utils import data_utils
-from tensorflow.python.util.tf_export import keras_export
-
-make_sampling_table = sequence.make_sampling_table
-skipgrams = sequence.skipgrams
-# TODO(fchollet): consider making `_remove_long_seq` public.
-_remove_long_seq = sequence._remove_long_seq  # pylint: disable=protected-access
-
-
-@keras_export('keras.preprocessing.sequence.TimeseriesGenerator')
-class TimeseriesGenerator(sequence.TimeseriesGenerator, data_utils.Sequence):
-  """Utility class for generating batches of temporal data.
-
-  This class takes in a sequence of data-points gathered at
-  equal intervals, along with time series parameters such as
-  stride, length of history, etc., to produce batches for
-  training/validation.
-  # Arguments
-      data: Indexable generator (such as list or Numpy array)
-          containing consecutive data points (timesteps).
-          The data should be at 2D, and axis 0 is expected
-          to be the time dimension.
-      targets: Targets corresponding to timesteps in `data`.
-          It should have same length as `data`.
-      length: Length of the output sequences (in number of timesteps).
-      sampling_rate: Period between successive individual timesteps
-          within sequences. For rate `r`, timesteps
-          `data[i]`, `data[i-r]`, ... `data[i - length]`
-          are used for create a sample sequence.
-      stride: Period between successive output sequences.
-          For stride `s`, consecutive output samples would
-          be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
-      start_index: Data points earlier than `start_index` will not be used
-          in the output sequences. This is useful to reserve part of the
-          data for test or validation.
-      end_index: Data points later than `end_index` will not be used
-          in the output sequences. This is useful to reserve part of the
-          data for test or validation.
-      shuffle: Whether to shuffle output samples,
-          or instead draw them in chronological order.
-      reverse: Boolean: if `true`, timesteps in each output sample will be
-          in reverse chronological order.
-      batch_size: Number of timeseries samples in each batch
-          (except maybe the last one).
-  # Returns
-      A [Sequence](https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence) instance.
-  # Examples
-  ```python
-  from keras.preprocessing.sequence import TimeseriesGenerator
-  import numpy as np
-  data = np.array([[i] for i in range(50)])
-  targets = np.array([[i] for i in range(50)])
-  data_gen = TimeseriesGenerator(data, targets,
-                                 length=10, sampling_rate=2,
-                                 batch_size=2)
-  assert len(data_gen) == 20
-  batch_0 = data_gen[0]
-  x, y = batch_0
-  assert np.array_equal(x,
-                        np.array([[[0], [2], [4], [6], [8]],
-                                  [[1], [3], [5], [7], [9]]]))
-  assert np.array_equal(y,
-                        np.array([[10], [11]]))
-  ```
-  """
-  pass
-
-
-@keras_export('keras.preprocessing.sequence.pad_sequences')
-def pad_sequences(sequences, maxlen=None, dtype='int32',
-                  padding='pre', truncating='pre', value=0.):
-  """Pads sequences to the same length.
-
-  This function transforms a list (of length `num_samples`)
-  of sequences (lists of integers)
-  into a 2D Numpy array of shape `(num_samples, num_timesteps)`.
-  `num_timesteps` is either the `maxlen` argument if provided,
-  or the length of the longest sequence in the list.
-
-  Sequences that are shorter than `num_timesteps`
-  are padded with `value` until they are `num_timesteps` long.
-
-  Sequences longer than `num_timesteps` are truncated
-  so that they fit the desired length.
-
-  The position where padding or truncation happens is determined by
-  the arguments `padding` and `truncating`, respectively.
-  Pre-padding or removing values from the beginning of the sequence is the
-  default.
-
-  >>> sequence = [[1], [2, 3], [4, 5, 6]]
-  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence)
-  array([[0, 0, 1],
-         [0, 2, 3],
-         [4, 5, 6]], dtype=int32)
-
-  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, value=-1)
-  array([[-1, -1,  1],
-         [-1,  2,  3],
-         [ 4,  5,  6]], dtype=int32)
-
-  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, padding='post')
-  array([[1, 0, 0],
-         [2, 3, 0],
-         [4, 5, 6]], dtype=int32)
-
-  >>> tf.keras.preprocessing.sequence.pad_sequences(sequence, maxlen=2)
-  array([[0, 1],
-         [2, 3],
-         [5, 6]], dtype=int32)
-
-  Args:
-      sequences: List of sequences (each sequence is a list of integers).
-      maxlen: Optional Int, maximum length of all sequences. If not provided,
-          sequences will be padded to the length of the longest individual
-          sequence.
-      dtype: (Optional, defaults to int32). Type of the output sequences.
-          To pad sequences with variable length strings, you can use `object`.
-      padding: String, 'pre' or 'post' (optional, defaults to 'pre'):
-          pad either before or after each sequence.
-      truncating: String, 'pre' or 'post' (optional, defaults to 'pre'):
-          remove values from sequences larger than
-          `maxlen`, either at the beginning or at the end of the sequences.
-      value: Float or String, padding value. (Optional, defaults to 0.)
-
-  Returns:
-      Numpy array with shape `(len(sequences), maxlen)`
-
-  Raises:
-      ValueError: In case of invalid values for `truncating` or `padding`,
-          or in case of invalid shape for a `sequences` entry.
-  """
-  return sequence.pad_sequences(
-      sequences, maxlen=maxlen, dtype=dtype,
-      padding=padding, truncating=truncating, value=value)
-
-keras_export(
-    'keras.preprocessing.sequence.make_sampling_table')(make_sampling_table)
-keras_export('keras.preprocessing.sequence.skipgrams')(skipgrams)
diff --git a/tensorflow/python/keras/preprocessing/sequence_test.py b/tensorflow/python/keras/preprocessing/sequence_test.py
deleted file mode 100644
index eeb84b6..0000000
--- a/tensorflow/python/keras/preprocessing/sequence_test.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for sequence data preprocessing utils."""
-
-from math import ceil
-
-import numpy as np
-
-from tensorflow.python.keras.preprocessing import sequence as preprocessing_sequence
-from tensorflow.python.platform import test
-
-
-class TestSequence(test.TestCase):
-
-  def test_pad_sequences(self):
-    a = [[1], [1, 2], [1, 2, 3]]
-
-    # test padding
-    b = preprocessing_sequence.pad_sequences(a, maxlen=3, padding='pre')
-    self.assertAllClose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]])
-    b = preprocessing_sequence.pad_sequences(a, maxlen=3, padding='post')
-    self.assertAllClose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]])
-
-    # test truncating
-    b = preprocessing_sequence.pad_sequences(
-        a, maxlen=2, truncating='pre')
-    self.assertAllClose(b, [[0, 1], [1, 2], [2, 3]])
-    b = preprocessing_sequence.pad_sequences(
-        a, maxlen=2, truncating='post')
-    self.assertAllClose(b, [[0, 1], [1, 2], [1, 2]])
-
-    # test value
-    b = preprocessing_sequence.pad_sequences(a, maxlen=3, value=1)
-    self.assertAllClose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]])
-
-  def test_pad_sequences_vector(self):
-    a = [[[1, 1]], [[2, 1], [2, 2]], [[3, 1], [3, 2], [3, 3]]]
-
-    # test padding
-    b = preprocessing_sequence.pad_sequences(a, maxlen=3, padding='pre')
-    self.assertAllClose(b, [[[0, 0], [0, 0], [1, 1]], [[0, 0], [2, 1], [2, 2]],
-                            [[3, 1], [3, 2], [3, 3]]])
-    b = preprocessing_sequence.pad_sequences(a, maxlen=3, padding='post')
-    self.assertAllClose(b, [[[1, 1], [0, 0], [0, 0]], [[2, 1], [2, 2], [0, 0]],
-                            [[3, 1], [3, 2], [3, 3]]])
-
-    # test truncating
-    b = preprocessing_sequence.pad_sequences(
-        a, maxlen=2, truncating='pre')
-    self.assertAllClose(b, [[[0, 0], [1, 1]], [[2, 1], [2, 2]], [[3, 2], [3,
-                                                                          3]]])
-
-    b = preprocessing_sequence.pad_sequences(
-        a, maxlen=2, truncating='post')
-    self.assertAllClose(b, [[[0, 0], [1, 1]], [[2, 1], [2, 2]], [[3, 1], [3,
-                                                                          2]]])
-
-    # test value
-    b = preprocessing_sequence.pad_sequences(a, maxlen=3, value=1)
-    self.assertAllClose(b, [[[1, 1], [1, 1], [1, 1]], [[1, 1], [2, 1], [2, 2]],
-                            [[3, 1], [3, 2], [3, 3]]])
-
-  def test_make_sampling_table(self):
-    a = preprocessing_sequence.make_sampling_table(3)
-    self.assertAllClose(
-        a, np.asarray([0.00315225, 0.00315225, 0.00547597]), rtol=.1)
-
-  def test_skipgrams(self):
-    # test with no window size and binary labels
-    couples, labels = preprocessing_sequence.skipgrams(
-        np.arange(3), vocabulary_size=3)
-    for couple in couples:
-      self.assertIn(couple[0], [0, 1, 2])
-      self.assertIn(couple[1], [0, 1, 2])
-
-    # test window size and categorical labels
-    couples, labels = preprocessing_sequence.skipgrams(
-        np.arange(5), vocabulary_size=5, window_size=1, categorical=True)
-    for couple in couples:
-      self.assertLessEqual(couple[0] - couple[1], 3)
-    for l in labels:
-      self.assertEqual(len(l), 2)
-
-  def test_remove_long_seq(self):
-    a = [[[1, 1]], [[2, 1], [2, 2]], [[3, 1], [3, 2], [3, 3]]]
-
-    new_seq, new_label = preprocessing_sequence._remove_long_seq(
-        maxlen=3, seq=a, label=['a', 'b', ['c', 'd']])
-    self.assertEqual(new_seq, [[[1, 1]], [[2, 1], [2, 2]]])
-    self.assertEqual(new_label, ['a', 'b'])
-
-  def test_TimeseriesGenerator(self):
-    data = np.array([[i] for i in range(50)])
-    targets = np.array([[i] for i in range(50)])
-
-    data_gen = preprocessing_sequence.TimeseriesGenerator(
-        data, targets, length=10, sampling_rate=2, batch_size=2)
-    self.assertEqual(len(data_gen), 20)
-    self.assertAllClose(data_gen[0][0],
-                        np.array([[[0], [2], [4], [6], [8]], [[1], [3], [5],
-                                                              [7], [9]]]))
-    self.assertAllClose(data_gen[0][1], np.array([[10], [11]]))
-    self.assertAllClose(data_gen[1][0],
-                        np.array([[[2], [4], [6], [8], [10]], [[3], [5], [7],
-                                                               [9], [11]]]))
-    self.assertAllClose(data_gen[1][1], np.array([[12], [13]]))
-
-    data_gen = preprocessing_sequence.TimeseriesGenerator(
-        data, targets, length=10, sampling_rate=2, reverse=True, batch_size=2)
-    self.assertEqual(len(data_gen), 20)
-    self.assertAllClose(data_gen[0][0],
-                        np.array([[[8], [6], [4], [2], [0]], [[9], [7], [5],
-                                                              [3], [1]]]))
-    self.assertAllClose(data_gen[0][1], np.array([[10], [11]]))
-
-    data_gen = preprocessing_sequence.TimeseriesGenerator(
-        data, targets, length=10, sampling_rate=2, shuffle=True, batch_size=1)
-    batch = data_gen[0]
-    r = batch[1][0][0]
-    self.assertAllClose(batch[0],
-                        np.array([[[r - 10], [r - 8], [r - 6], [r - 4],
-                                   [r - 2]]]))
-    self.assertAllClose(batch[1], np.array([
-        [r],
-    ]))
-
-    data_gen = preprocessing_sequence.TimeseriesGenerator(
-        data, targets, length=10, sampling_rate=2, stride=2, batch_size=2)
-    self.assertEqual(len(data_gen), 10)
-    self.assertAllClose(data_gen[1][0],
-                        np.array([[[4], [6], [8], [10], [12]], [[6], [8], [10],
-                                                                [12], [14]]]))
-    self.assertAllClose(data_gen[1][1], np.array([[14], [16]]))
-
-    data_gen = preprocessing_sequence.TimeseriesGenerator(
-        data,
-        targets,
-        length=10,
-        sampling_rate=2,
-        start_index=10,
-        end_index=30,
-        batch_size=2)
-    self.assertEqual(len(data_gen), 6)
-    self.assertAllClose(data_gen[0][0],
-                        np.array([[[10], [12], [14], [16], [18]],
-                                  [[11], [13], [15], [17], [19]]]))
-    self.assertAllClose(data_gen[0][1], np.array([[20], [21]]))
-
-    data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)])
-    targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)])
-    data_gen = preprocessing_sequence.TimeseriesGenerator(
-        data,
-        targets,
-        length=10,
-        sampling_rate=2,
-        start_index=10,
-        end_index=30,
-        batch_size=2)
-
-    self.assertEqual(len(data_gen), 6)
-    self.assertAllClose(data_gen[0][0],
-                        np.array(
-                            [np.array(data[10:19:2]),
-                             np.array(data[11:20:2])]))
-    self.assertAllClose(data_gen[0][1], np.array([targets[20], targets[21]]))
-
-    with self.assertRaises(ValueError) as context:
-      preprocessing_sequence.TimeseriesGenerator(data, targets, length=50)
-    error = str(context.exception)
-    self.assertIn('`start_index+length=50 > end_index=49` is disallowed', error)
-
-  def test_TimeSeriesGenerator_doesnt_miss_any_sample(self):
-    x = np.array([[i] for i in range(10)])
-
-    for length in range(3, 10):
-      g = preprocessing_sequence.TimeseriesGenerator(
-          x, x, length=length, batch_size=1)
-      expected = max(0, len(x) - length)
-      actual = len(g)
-      self.assertEqual(expected, actual)
-
-      if actual > 0:
-        # All elements in range(length, 10) should be used as current step
-        expected = np.arange(length, 10).reshape(-1, 1)
-
-        y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0)
-        self.assertAllClose(y, expected)
-
-    x = np.array([[i] for i in range(23)])
-
-    strides = (1, 1, 5, 7, 3, 5, 3)
-    lengths = (3, 3, 4, 3, 1, 3, 7)
-    batch_sizes = (6, 6, 6, 5, 6, 6, 6)
-    shuffles = (False, True, True, False, False, False, False)
-
-    for stride, length, batch_size, shuffle in zip(strides, lengths,
-                                                   batch_sizes, shuffles):
-      g = preprocessing_sequence.TimeseriesGenerator(
-          x,
-          x,
-          length=length,
-          sampling_rate=1,
-          stride=stride,
-          start_index=0,
-          end_index=None,
-          shuffle=shuffle,
-          reverse=False,
-          batch_size=batch_size)
-      if shuffle:
-        # all batches have the same size when shuffle is True.
-        expected_sequences = ceil(
-            (23 - length) / float(batch_size * stride)) * batch_size
-      else:
-        # last batch will be different if `(samples - length) / stride`
-        # is not a multiple of `batch_size`.
-        expected_sequences = ceil((23 - length) / float(stride))
-
-      expected_batches = ceil(expected_sequences / float(batch_size))
-
-      y = [g[ix][1] for ix in range(len(g))]
-
-      actual_sequences = sum(len(iy) for iy in y)
-      actual_batches = len(y)
-
-      self.assertEqual(expected_sequences, actual_sequences)
-      self.assertEqual(expected_batches, actual_batches)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/preprocessing/text.py b/tensorflow/python/keras/preprocessing/text.py
deleted file mode 100644
index d36a9a5..0000000
--- a/tensorflow/python/keras/preprocessing/text.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities for text input preprocessing."""
-# pylint: disable=invalid-name
-
-from keras_preprocessing import text
-
-from tensorflow.python.keras.preprocessing.text_dataset import text_dataset_from_directory  # pylint: disable=unused-import
-from tensorflow.python.util.tf_export import keras_export
-
-hashing_trick = text.hashing_trick
-Tokenizer = text.Tokenizer
-
-
-@keras_export('keras.preprocessing.text.text_to_word_sequence')
-def text_to_word_sequence(input_text,
-                          filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
-                          lower=True,
-                          split=' '):
-  """Converts a text to a sequence of words (or tokens).
-
-  This function transforms a string of text into a list of words
-  while ignoring `filters` which include punctuations by default.
-
-  >>> sample_text = 'This is a sample sentence.'
-  >>> tf.keras.preprocessing.text.text_to_word_sequence(sample_text)
-  ['this', 'is', 'a', 'sample', 'sentence']
-
-  Args:
-      input_text: Input text (string).
-      filters: list (or concatenation) of characters to filter out, such as
-          punctuation. Default: ``'!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n'``,
-            includes basic punctuation, tabs, and newlines.
-      lower: boolean. Whether to convert the input to lowercase.
-      split: str. Separator for word splitting.
-
-  Returns:
-      A list of words (or tokens).
-  """
-  return text.text_to_word_sequence(
-      input_text, filters=filters, lower=lower, split=split)
-
-
-@keras_export('keras.preprocessing.text.one_hot')
-def one_hot(input_text,
-            n,
-            filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
-            lower=True,
-            split=' '):
-  r"""One-hot encodes a text into a list of word indexes of size `n`.
-
-  This function receives as input a string of text and returns a
-  list of encoded integers each corresponding to a word (or token)
-  in the given input string.
-
-  Args:
-      input_text: Input text (string).
-      n: int. Size of vocabulary.
-      filters: list (or concatenation) of characters to filter out, such as
-        punctuation. Default:
-        ```
-        '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\t\n
-        ```,
-        includes basic punctuation, tabs, and newlines.
-      lower: boolean. Whether to set the text to lowercase.
-      split: str. Separator for word splitting.
-
-  Returns:
-      List of integers in `[1, n]`. Each integer encodes a word
-      (unicity non-guaranteed).
-  """
-  return text.one_hot(input_text, n, filters=filters, lower=lower, split=split)
-
-
-# text.tokenizer_from_json is only available if keras_preprocessing >= 1.1.0
-try:
-  tokenizer_from_json = text.tokenizer_from_json
-  keras_export('keras.preprocessing.text.tokenizer_from_json')(
-      tokenizer_from_json)
-except AttributeError:
-  pass
-
-keras_export('keras.preprocessing.text.hashing_trick')(hashing_trick)
-keras_export('keras.preprocessing.text.Tokenizer')(Tokenizer)
diff --git a/tensorflow/python/keras/preprocessing/text_dataset.py b/tensorflow/python/keras/preprocessing/text_dataset.py
deleted file mode 100644
index f29a7f6..0000000
--- a/tensorflow/python/keras/preprocessing/text_dataset.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras text dataset generation utilities."""
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.keras.preprocessing import dataset_utils
-from tensorflow.python.ops import io_ops
-from tensorflow.python.ops import string_ops
-from tensorflow.python.util.tf_export import keras_export
-
-
-@keras_export('keras.utils.text_dataset_from_directory',
-              'keras.preprocessing.text_dataset_from_directory',
-              v1=[])
-def text_dataset_from_directory(directory,
-                                labels='inferred',
-                                label_mode='int',
-                                class_names=None,
-                                batch_size=32,
-                                max_length=None,
-                                shuffle=True,
-                                seed=None,
-                                validation_split=None,
-                                subset=None,
-                                follow_links=False):
-  """Generates a `tf.data.Dataset` from text files in a directory.
-
-  If your directory structure is:
-
-  ```
-  main_directory/
-  ...class_a/
-  ......a_text_1.txt
-  ......a_text_2.txt
-  ...class_b/
-  ......b_text_1.txt
-  ......b_text_2.txt
-  ```
-
-  Then calling `text_dataset_from_directory(main_directory, labels='inferred')`
-  will return a `tf.data.Dataset` that yields batches of texts from
-  the subdirectories `class_a` and `class_b`, together with labels
-  0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`).
-
-  Only `.txt` files are supported at this time.
-
-  Args:
-    directory: Directory where the data is located.
-        If `labels` is "inferred", it should contain
-        subdirectories, each containing text files for a class.
-        Otherwise, the directory structure is ignored.
-    labels: Either "inferred"
-        (labels are generated from the directory structure),
-        None (no labels),
-        or a list/tuple of integer labels of the same size as the number of
-        text files found in the directory. Labels should be sorted according
-        to the alphanumeric order of the text file paths
-        (obtained via `os.walk(directory)` in Python).
-    label_mode:
-        - 'int': means that the labels are encoded as integers
-            (e.g. for `sparse_categorical_crossentropy` loss).
-        - 'categorical' means that the labels are
-            encoded as a categorical vector
-            (e.g. for `categorical_crossentropy` loss).
-        - 'binary' means that the labels (there can be only 2)
-            are encoded as `float32` scalars with values 0 or 1
-            (e.g. for `binary_crossentropy`).
-        - None (no labels).
-    class_names: Only valid if "labels" is "inferred". This is the explict
-        list of class names (must match names of subdirectories). Used
-        to control the order of the classes
-        (otherwise alphanumerical order is used).
-    batch_size: Size of the batches of data. Default: 32.
-    max_length: Maximum size of a text string. Texts longer than this will
-      be truncated to `max_length`.
-    shuffle: Whether to shuffle the data. Default: True.
-        If set to False, sorts the data in alphanumeric order.
-    seed: Optional random seed for shuffling and transformations.
-    validation_split: Optional float between 0 and 1,
-        fraction of data to reserve for validation.
-    subset: One of "training" or "validation".
-        Only used if `validation_split` is set.
-    follow_links: Whether to visits subdirectories pointed to by symlinks.
-        Defaults to False.
-
-  Returns:
-    A `tf.data.Dataset` object.
-      - If `label_mode` is None, it yields `string` tensors of shape
-        `(batch_size,)`, containing the contents of a batch of text files.
-      - Otherwise, it yields a tuple `(texts, labels)`, where `texts`
-        has shape `(batch_size,)` and `labels` follows the format described
-        below.
-
-  Rules regarding labels format:
-    - if `label_mode` is `int`, the labels are an `int32` tensor of shape
-      `(batch_size,)`.
-    - if `label_mode` is `binary`, the labels are a `float32` tensor of
-      1s and 0s of shape `(batch_size, 1)`.
-    - if `label_mode` is `categorial`, the labels are a `float32` tensor
-      of shape `(batch_size, num_classes)`, representing a one-hot
-      encoding of the class index.
-  """
-  if labels not in ('inferred', None):
-    if not isinstance(labels, (list, tuple)):
-      raise ValueError(
-          '`labels` argument should be a list/tuple of integer labels, of '
-          'the same size as the number of text files in the target '
-          'directory. If you wish to infer the labels from the subdirectory '
-          'names in the target directory, pass `labels="inferred"`. '
-          'If you wish to get a dataset that only contains text samples '
-          '(no labels), pass `labels=None`.')
-    if class_names:
-      raise ValueError('You can only pass `class_names` if the labels are '
-                       'inferred from the subdirectory names in the target '
-                       'directory (`labels="inferred"`).')
-  if label_mode not in {'int', 'categorical', 'binary', None}:
-    raise ValueError(
-        '`label_mode` argument must be one of "int", "categorical", "binary", '
-        'or None. Received: %s' % (label_mode,))
-  if labels is None or label_mode is None:
-    labels = None
-    label_mode = None
-  dataset_utils.check_validation_split_arg(
-      validation_split, subset, shuffle, seed)
-
-  if seed is None:
-    seed = np.random.randint(1e6)
-  file_paths, labels, class_names = dataset_utils.index_directory(
-      directory,
-      labels,
-      formats=('.txt',),
-      class_names=class_names,
-      shuffle=shuffle,
-      seed=seed,
-      follow_links=follow_links)
-
-  if label_mode == 'binary' and len(class_names) != 2:
-    raise ValueError(
-        'When passing `label_mode="binary", there must exactly 2 classes. '
-        'Found the following classes: %s' % (class_names,))
-
-  file_paths, labels = dataset_utils.get_training_or_validation_split(
-      file_paths, labels, validation_split, subset)
-  if not file_paths:
-    raise ValueError('No text files found.')
-
-  dataset = paths_and_labels_to_dataset(
-      file_paths=file_paths,
-      labels=labels,
-      label_mode=label_mode,
-      num_classes=len(class_names),
-      max_length=max_length)
-  if shuffle:
-    # Shuffle locally at each iteration
-    dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed)
-  dataset = dataset.batch(batch_size)
-  # Users may need to reference `class_names`.
-  dataset.class_names = class_names
-  return dataset
-
-
-def paths_and_labels_to_dataset(file_paths,
-                                labels,
-                                label_mode,
-                                num_classes,
-                                max_length):
-  """Constructs a dataset of text strings and labels."""
-  path_ds = dataset_ops.Dataset.from_tensor_slices(file_paths)
-  string_ds = path_ds.map(
-      lambda x: path_to_string_content(x, max_length))
-  if label_mode:
-    label_ds = dataset_utils.labels_to_dataset(labels, label_mode, num_classes)
-    string_ds = dataset_ops.Dataset.zip((string_ds, label_ds))
-  return string_ds
-
-
-def path_to_string_content(path, max_length):
-  txt = io_ops.read_file(path)
-  if max_length is not None:
-    txt = string_ops.substr(txt, 0, max_length)
-  return txt
diff --git a/tensorflow/python/keras/preprocessing/text_dataset_test.py b/tensorflow/python/keras/preprocessing/text_dataset_test.py
deleted file mode 100644
index a40364d..0000000
--- a/tensorflow/python/keras/preprocessing/text_dataset_test.py
+++ /dev/null
@@ -1,255 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for text_dataset."""
-
-import os
-import random
-import shutil
-import string
-
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras.preprocessing import text_dataset
-from tensorflow.python.platform import test
-
-
-class TextDatasetFromDirectoryTest(keras_parameterized.TestCase):
-
-  def _prepare_directory(self,
-                         num_classes=2,
-                         nested_dirs=False,
-                         count=16,
-                         length=20):
-    # Get a unique temp directory
-    temp_dir = os.path.join(self.get_temp_dir(), str(random.randint(0, 1e6)))
-    os.mkdir(temp_dir)
-    self.addCleanup(shutil.rmtree, temp_dir)
-
-    # Generate paths to class subdirectories
-    paths = []
-    for class_index in range(num_classes):
-      class_directory = 'class_%s' % (class_index,)
-      if nested_dirs:
-        class_paths = [
-            class_directory, os.path.join(class_directory, 'subfolder_1'),
-            os.path.join(class_directory, 'subfolder_2'), os.path.join(
-                class_directory, 'subfolder_1', 'sub-subfolder')
-        ]
-      else:
-        class_paths = [class_directory]
-      for path in class_paths:
-        os.mkdir(os.path.join(temp_dir, path))
-      paths += class_paths
-
-    for i in range(count):
-      path = paths[i % len(paths)]
-      filename = os.path.join(path, 'text_%s.txt' % (i,))
-      f = open(os.path.join(temp_dir, filename), 'w')
-      text = ''.join([random.choice(string.printable) for _ in range(length)])
-      f.write(text)
-      f.close()
-    return temp_dir
-
-  def test_text_dataset_from_directory_standalone(self):
-    # Test retrieving txt files without labels from a directory and its subdirs.
-    # Save a few extra files in the parent directory.
-    directory = self._prepare_directory(count=7, num_classes=2)
-    for i in range(3):
-      filename = 'text_%s.txt' % (i,)
-      f = open(os.path.join(directory, filename), 'w')
-      text = ''.join([random.choice(string.printable) for _ in range(20)])
-      f.write(text)
-      f.close()
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=5, label_mode=None, max_length=10)
-    batch = next(iter(dataset))
-    # We just return the texts, no labels
-    self.assertEqual(batch.shape, (5,))
-    self.assertEqual(batch.dtype.name, 'string')
-    # Count samples
-    batch_count = 0
-    sample_count = 0
-    for batch in dataset:
-      batch_count += 1
-      sample_count += batch.shape[0]
-    self.assertEqual(batch_count, 2)
-    self.assertEqual(sample_count, 10)
-
-  def test_text_dataset_from_directory_binary(self):
-    directory = self._prepare_directory(num_classes=2)
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode='int', max_length=10)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8,))
-    self.assertEqual(batch[0].dtype.name, 'string')
-    self.assertEqual(len(batch[0].numpy()[0]), 10)  # Test max_length
-    self.assertEqual(batch[1].shape, (8,))
-    self.assertEqual(batch[1].dtype.name, 'int32')
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode='binary')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8,))
-    self.assertEqual(batch[0].dtype.name, 'string')
-    self.assertEqual(batch[1].shape, (8, 1))
-    self.assertEqual(batch[1].dtype.name, 'float32')
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode='categorical')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8,))
-    self.assertEqual(batch[0].dtype.name, 'string')
-    self.assertEqual(batch[1].shape, (8, 2))
-    self.assertEqual(batch[1].dtype.name, 'float32')
-
-  def test_sample_count(self):
-    directory = self._prepare_directory(num_classes=4, count=15)
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode=None)
-    sample_count = 0
-    for batch in dataset:
-      sample_count += batch.shape[0]
-    self.assertEqual(sample_count, 15)
-
-  def test_text_dataset_from_directory_multiclass(self):
-    directory = self._prepare_directory(num_classes=4, count=15)
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode=None)
-    batch = next(iter(dataset))
-    self.assertEqual(batch.shape, (8,))
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode=None)
-    sample_count = 0
-    iterator = iter(dataset)
-    for batch in dataset:
-      sample_count += next(iterator).shape[0]
-    self.assertEqual(sample_count, 15)
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode='int')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8,))
-    self.assertEqual(batch[0].dtype.name, 'string')
-    self.assertEqual(batch[1].shape, (8,))
-    self.assertEqual(batch[1].dtype.name, 'int32')
-
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode='categorical')
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8,))
-    self.assertEqual(batch[0].dtype.name, 'string')
-    self.assertEqual(batch[1].shape, (8, 4))
-    self.assertEqual(batch[1].dtype.name, 'float32')
-
-  def test_text_dataset_from_directory_validation_split(self):
-    directory = self._prepare_directory(num_classes=2, count=10)
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=10, validation_split=0.2, subset='training',
-        seed=1337)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (8,))
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=10, validation_split=0.2, subset='validation',
-        seed=1337)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertEqual(batch[0].shape, (2,))
-
-  def test_text_dataset_from_directory_manual_labels(self):
-    directory = self._prepare_directory(num_classes=2, count=2)
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, labels=[0, 1], shuffle=False)
-    batch = next(iter(dataset))
-    self.assertLen(batch, 2)
-    self.assertAllClose(batch[1], [0, 1])
-
-  def test_text_dataset_from_directory_follow_links(self):
-    directory = self._prepare_directory(num_classes=2, count=25,
-                                        nested_dirs=True)
-    dataset = text_dataset.text_dataset_from_directory(
-        directory, batch_size=8, label_mode=None, follow_links=True)
-    sample_count = 0
-    for batch in dataset:
-      sample_count += batch.shape[0]
-    self.assertEqual(sample_count, 25)
-
-  def test_text_dataset_from_directory_no_files(self):
-    directory = self._prepare_directory(num_classes=2, count=0)
-    with self.assertRaisesRegex(ValueError, 'No text files found.'):
-      _ = text_dataset.text_dataset_from_directory(directory)
-
-  def test_text_dataset_from_directory_errors(self):
-    directory = self._prepare_directory(num_classes=3, count=5)
-
-    with self.assertRaisesRegex(ValueError, '`labels` argument should be'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, labels='other')
-
-    with self.assertRaisesRegex(ValueError, '`label_mode` argument must be'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, label_mode='other')
-
-    with self.assertRaisesRegex(
-        ValueError, 'only pass `class_names` if the labels are inferred'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, labels=[0, 0, 1, 1, 1],
-          class_names=['class_0', 'class_1', 'class_2'])
-
-    with self.assertRaisesRegex(
-        ValueError,
-        'Expected the lengths of `labels` to match the number of files'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, labels=[0, 0, 1, 1])
-
-    with self.assertRaisesRegex(
-        ValueError, '`class_names` passed did not match'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, class_names=['class_0', 'class_2'])
-
-    with self.assertRaisesRegex(ValueError, 'there must exactly 2 classes'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, label_mode='binary')
-
-    with self.assertRaisesRegex(ValueError,
-                                '`validation_split` must be between 0 and 1'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, validation_split=2)
-
-    with self.assertRaisesRegex(ValueError,
-                                '`subset` must be either "training" or'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, validation_split=0.2, subset='other')
-
-    with self.assertRaisesRegex(ValueError, '`validation_split` must be set'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, validation_split=0, subset='training')
-
-    with self.assertRaisesRegex(ValueError, 'must provide a `seed`'):
-      _ = text_dataset.text_dataset_from_directory(
-          directory, validation_split=0.2, subset='training')
-
-
-if __name__ == '__main__':
-  v2_compat.enable_v2_behavior()
-  test.main()
diff --git a/tensorflow/python/keras/preprocessing/text_test.py b/tensorflow/python/keras/preprocessing/text_test.py
deleted file mode 100644
index abe99d7..0000000
--- a/tensorflow/python/keras/preprocessing/text_test.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for text data preprocessing utils."""
-
-import numpy as np
-
-from tensorflow.python.keras.preprocessing import text as preprocessing_text
-from tensorflow.python.platform import test
-
-
-class TestText(test.TestCase):
-
-  def test_one_hot(self):
-    text = 'The cat sat on the mat.'
-    encoded = preprocessing_text.one_hot(text, 5)
-    self.assertEqual(len(encoded), 6)
-    self.assertLessEqual(np.max(encoded), 4)
-    self.assertGreaterEqual(np.min(encoded), 0)
-
-    # Test on unicode.
-    text = u'The cat sat on the mat.'
-    encoded = preprocessing_text.one_hot(text, 5)
-    self.assertEqual(len(encoded), 6)
-    self.assertLessEqual(np.max(encoded), 4)
-    self.assertGreaterEqual(np.min(encoded), 0)
-
-  def test_tokenizer(self):
-    texts = [
-        'The cat sat on the mat.',
-        'The dog sat on the log.',
-        'Dogs and cats living together.'
-    ]
-    tokenizer = preprocessing_text.Tokenizer(num_words=10)
-    tokenizer.fit_on_texts(texts)
-
-    sequences = []
-    for seq in tokenizer.texts_to_sequences_generator(texts):
-      sequences.append(seq)
-    self.assertLess(np.max(np.max(sequences)), 10)
-    self.assertEqual(np.min(np.min(sequences)), 1)
-
-    tokenizer.fit_on_sequences(sequences)
-
-    for mode in ['binary', 'count', 'tfidf', 'freq']:
-      matrix = tokenizer.texts_to_matrix(texts, mode)
-      self.assertEqual(matrix.shape, (3, 10))
-
-  def test_hashing_trick_hash(self):
-    text = 'The cat sat on the mat.'
-    encoded = preprocessing_text.hashing_trick(text, 5)
-    self.assertEqual(len(encoded), 6)
-    self.assertLessEqual(np.max(encoded), 4)
-    self.assertGreaterEqual(np.min(encoded), 1)
-
-  def test_hashing_trick_md5(self):
-    text = 'The cat sat on the mat.'
-    encoded = preprocessing_text.hashing_trick(
-        text, 5, hash_function='md5')
-    self.assertEqual(len(encoded), 6)
-    self.assertLessEqual(np.max(encoded), 4)
-    self.assertGreaterEqual(np.min(encoded), 1)
-
-  def test_tokenizer_oov_flag(self):
-    x_train = ['This text has only known words']
-    x_test = ['This text has some unknown words']  # 2 OOVs: some, unknown
-
-    # Default, without OOV flag
-    tokenizer = preprocessing_text.Tokenizer()
-    tokenizer.fit_on_texts(x_train)
-    x_test_seq = tokenizer.texts_to_sequences(x_test)
-    self.assertEqual(len(x_test_seq[0]), 4)  # discards 2 OOVs
-
-    # With OOV feature
-    tokenizer = preprocessing_text.Tokenizer(oov_token='<unk>')
-    tokenizer.fit_on_texts(x_train)
-    x_test_seq = tokenizer.texts_to_sequences(x_test)
-    self.assertEqual(len(x_test_seq[0]), 6)  # OOVs marked in place
-
-  def test_sequential_fit(self):
-    texts = [
-        'The cat sat on the mat.', 'The dog sat on the log.',
-        'Dogs and cats living together.'
-    ]
-    word_sequences = [['The', 'cat', 'is', 'sitting'],
-                      ['The', 'dog', 'is', 'standing']]
-    tokenizer = preprocessing_text.Tokenizer()
-    tokenizer.fit_on_texts(texts)
-    tokenizer.fit_on_texts(word_sequences)
-
-    self.assertEqual(tokenizer.document_count, 5)
-
-    tokenizer.texts_to_matrix(texts)
-    tokenizer.texts_to_matrix(word_sequences)
-
-  def test_text_to_word_sequence(self):
-    text = 'hello! ? world!'
-    seq = preprocessing_text.text_to_word_sequence(text)
-    self.assertEqual(seq, ['hello', 'world'])
-
-  def test_text_to_word_sequence_multichar_split(self):
-    text = 'hello!stop?world!'
-    seq = preprocessing_text.text_to_word_sequence(text, split='stop')
-    self.assertEqual(seq, ['hello', 'world'])
-
-  def test_text_to_word_sequence_unicode(self):
-    text = u'ali! veli? kırk dokuz elli'
-    seq = preprocessing_text.text_to_word_sequence(text)
-    self.assertEqual(seq, [u'ali', u'veli', u'kırk', u'dokuz', u'elli'])
-
-  def test_text_to_word_sequence_unicode_multichar_split(self):
-    text = u'ali!stopveli?stopkırkstopdokuzstopelli'
-    seq = preprocessing_text.text_to_word_sequence(text, split='stop')
-    self.assertEqual(seq, [u'ali', u'veli', u'kırk', u'dokuz', u'elli'])
-
-  def test_tokenizer_unicode(self):
-    texts = [
-        u'ali veli kırk dokuz elli', u'ali veli kırk dokuz elli veli kırk dokuz'
-    ]
-    tokenizer = preprocessing_text.Tokenizer(num_words=5)
-    tokenizer.fit_on_texts(texts)
-
-    self.assertEqual(len(tokenizer.word_counts), 5)
-
-
-if __name__ == '__main__':
-  test.main()
diff --git a/tensorflow/python/keras/preprocessing/timeseries.py b/tensorflow/python/keras/preprocessing/timeseries.py
deleted file mode 100644
index abe0418..0000000
--- a/tensorflow/python/keras/preprocessing/timeseries.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras timeseries dataset utilities."""
-# pylint: disable=g-classes-have-attributes
-
-import numpy as np
-
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.util.tf_export import keras_export
-
-
-@keras_export('keras.utils.timeseries_dataset_from_array',
-              'keras.preprocessing.timeseries_dataset_from_array',
-              v1=[])
-def timeseries_dataset_from_array(
-    data,
-    targets,
-    sequence_length,
-    sequence_stride=1,
-    sampling_rate=1,
-    batch_size=128,
-    shuffle=False,
-    seed=None,
-    start_index=None,
-    end_index=None):
-  """Creates a dataset of sliding windows over a timeseries provided as array.
-
-  This function takes in a sequence of data-points gathered at
-  equal intervals, along with time series parameters such as
-  length of the sequences/windows, spacing between two sequence/windows, etc.,
-  to produce batches of timeseries inputs and targets.
-
-  Args:
-    data: Numpy array or eager tensor
-      containing consecutive data points (timesteps).
-      Axis 0 is expected to be the time dimension.
-    targets: Targets corresponding to timesteps in `data`.
-      `targets[i]` should be the target
-      corresponding to the window that starts at index `i`
-      (see example 2 below).
-      Pass None if you don't have target data (in this case the dataset will
-      only yield the input data).
-    sequence_length: Length of the output sequences (in number of timesteps).
-    sequence_stride: Period between successive output sequences.
-      For stride `s`, output samples would
-      start at index `data[i]`, `data[i + s]`, `data[i + 2 * s]`, etc.
-    sampling_rate: Period between successive individual timesteps
-      within sequences. For rate `r`, timesteps
-      `data[i], data[i + r], ... data[i + sequence_length]`
-      are used for create a sample sequence.
-    batch_size: Number of timeseries samples in each batch
-      (except maybe the last one).
-    shuffle: Whether to shuffle output samples,
-      or instead draw them in chronological order.
-    seed: Optional int; random seed for shuffling.
-    start_index: Optional int; data points earlier (exclusive)
-      than `start_index` will not be used
-      in the output sequences. This is useful to reserve part of the
-      data for test or validation.
-    end_index: Optional int; data points later (exclusive) than `end_index`
-      will not be used in the output sequences.
-      This is useful to reserve part of the data for test or validation.
-
-  Returns:
-    A tf.data.Dataset instance. If `targets` was passed, the dataset yields
-    tuple `(batch_of_sequences, batch_of_targets)`. If not, the dataset yields
-    only `batch_of_sequences`.
-
-  Example 1:
-
-  Consider indices `[0, 1, ... 99]`.
-  With `sequence_length=10,  sampling_rate=2, sequence_stride=3`,
-  `shuffle=False`, the dataset will yield batches of sequences
-  composed of the following indices:
-
-  ```
-  First sequence:  [0  2  4  6  8 10 12 14 16 18]
-  Second sequence: [3  5  7  9 11 13 15 17 19 21]
-  Third sequence:  [6  8 10 12 14 16 18 20 22 24]
-  ...
-  Last sequence:   [78 80 82 84 86 88 90 92 94 96]
-  ```
-
-  In this case the last 3 data points are discarded since no full sequence
-  can be generated to include them (the next sequence would have started
-  at index 81, and thus its last step would have gone over 99).
-
-  Example 2: Temporal regression.
-
-  Consider an array `data` of scalar values, of shape `(steps,)`.
-  To generate a dataset that uses the past 10
-  timesteps to predict the next timestep, you would use:
-
-  ```python
-  input_data = data[:-10]
-  targets = data[10:]
-  dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
-      input_data, targets, sequence_length=10)
-  for batch in dataset:
-    inputs, targets = batch
-    assert np.array_equal(inputs[0], data[:10])  # First sequence: steps [0-9]
-    assert np.array_equal(targets[0], data[10])  # Corresponding target: step 10
-    break
-  ```
-
-  Example 3: Temporal regression for many-to-many architectures.
-
-  Consider two arrays of scalar values `X` and `Y`,
-  both of shape `(100,)`. The resulting dataset should consist samples with
-  20 timestamps each. The samples should not overlap.
-  To generate a dataset that uses the current timestamp
-  to predict the corresponding target timestep, you would use:
-
-  ```python
-  X = np.arange(100)
-  Y = X*2
-
-  sample_length = 20
-  input_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
-    X, None, sequence_length=sample_length, sequence_stride=sample_length)
-  target_dataset = tf.keras.preprocessing.timeseries_dataset_from_array(
-    Y, None, sequence_length=sample_length, sequence_stride=sample_length)
-
-  for batch in zip(input_dataset, target_dataset):
-    inputs, targets = batch
-    assert np.array_equal(inputs[0], X[:sample_length])
-
-    # second sample equals output timestamps 20-40
-    assert np.array_equal(targets[1], Y[sample_length:2*sample_length])
-    break
-  ```
-  """
-  if start_index and (start_index < 0 or start_index >= len(data)):
-    raise ValueError('start_index must be higher than 0 and lower than the '
-                     'length of the data. Got: start_index=%s '
-                     'for data of length %s.' % (start_index, len(data)))
-  if end_index:
-    if start_index and end_index <= start_index:
-      raise ValueError('end_index must be higher than start_index. Got: '
-                       'start_index=%s, end_index=%s.' %
-                       (start_index, end_index))
-    if end_index >= len(data):
-      raise ValueError('end_index must be lower than the length of the data. '
-                       'Got: end_index=%s' % (end_index,))
-    if end_index <= 0:
-      raise ValueError('end_index must be higher than 0. '
-                       'Got: end_index=%s' % (end_index,))
-
-  # Validate strides
-  if sampling_rate <= 0 or sampling_rate >= len(data):
-    raise ValueError(
-        'sampling_rate must be higher than 0 and lower than '
-        'the length of the data. Got: '
-        'sampling_rate=%s for data of length %s.' % (sampling_rate, len(data)))
-  if sequence_stride <= 0 or sequence_stride >= len(data):
-    raise ValueError(
-        'sequence_stride must be higher than 0 and lower than '
-        'the length of the data. Got: sequence_stride=%s '
-        'for data of length %s.' % (sequence_stride, len(data)))
-
-  if start_index is None:
-    start_index = 0
-  if end_index is None:
-    end_index = len(data)
-
-  # Determine the lowest dtype to store start positions (to lower memory usage).
-  num_seqs = end_index - start_index - (sequence_length * sampling_rate) + 1
-  if targets is not None:
-    num_seqs = min(num_seqs, len(targets))
-  if num_seqs < 2147483647:
-    index_dtype = 'int32'
-  else:
-    index_dtype = 'int64'
-
-  # Generate start positions
-  start_positions = np.arange(0, num_seqs, sequence_stride, dtype=index_dtype)
-  if shuffle:
-    if seed is None:
-      seed = np.random.randint(1e6)
-    rng = np.random.RandomState(seed)
-    rng.shuffle(start_positions)
-
-  sequence_length = math_ops.cast(sequence_length, dtype=index_dtype)
-  sampling_rate = math_ops.cast(sampling_rate, dtype=index_dtype)
-
-  positions_ds = dataset_ops.Dataset.from_tensors(start_positions).repeat()
-
-  # For each initial window position, generates indices of the window elements
-  indices = dataset_ops.Dataset.zip(
-      (dataset_ops.Dataset.range(len(start_positions)), positions_ds)).map(
-          lambda i, positions: math_ops.range(  # pylint: disable=g-long-lambda
-              positions[i],
-              positions[i] + sequence_length * sampling_rate,
-              sampling_rate),
-          num_parallel_calls=dataset_ops.AUTOTUNE)
-
-  dataset = sequences_from_indices(data, indices, start_index, end_index)
-  if targets is not None:
-    indices = dataset_ops.Dataset.zip(
-        (dataset_ops.Dataset.range(len(start_positions)), positions_ds)).map(
-            lambda i, positions: positions[i],
-            num_parallel_calls=dataset_ops.AUTOTUNE)
-    target_ds = sequences_from_indices(
-        targets, indices, start_index, end_index)
-    dataset = dataset_ops.Dataset.zip((dataset, target_ds))
-  if shuffle:
-    # Shuffle locally at each iteration
-    dataset = dataset.shuffle(buffer_size=batch_size * 8, seed=seed)
-  dataset = dataset.batch(batch_size)
-  return dataset
-
-
-def sequences_from_indices(array, indices_ds, start_index, end_index):
-  dataset = dataset_ops.Dataset.from_tensors(array[start_index : end_index])
-  dataset = dataset_ops.Dataset.zip((dataset.repeat(), indices_ds)).map(
-      lambda steps, inds: array_ops.gather(steps, inds),  # pylint: disable=unnecessary-lambda
-      num_parallel_calls=dataset_ops.AUTOTUNE)
-  return dataset
diff --git a/tensorflow/python/keras/preprocessing/timeseries_test.py b/tensorflow/python/keras/preprocessing/timeseries_test.py
deleted file mode 100644
index 1099ed9..0000000
--- a/tensorflow/python/keras/preprocessing/timeseries_test.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for timeseries."""
-
-import numpy as np
-
-from tensorflow.python.compat import v2_compat
-from tensorflow.python.keras.preprocessing import timeseries
-from tensorflow.python.platform import test
-
-
-class TimeseriesDatasetTest(test.TestCase):
-
-  def test_basics(self):
-    # Test ordering, targets, sequence length, batch size
-    data = np.arange(100)
-    targets = data * 2
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, targets, sequence_length=9, batch_size=5)
-    # Expect 19 batches
-    for i, batch in enumerate(dataset):
-      self.assertLen(batch, 2)
-      inputs, targets = batch
-      if i < 18:
-        self.assertEqual(inputs.shape, (5, 9))
-      if i == 18:
-        # Last batch: size 2
-        self.assertEqual(inputs.shape, (2, 9))
-      # Check target values
-      self.assertAllClose(targets, inputs[:, 0] * 2)
-      for j in range(min(5, len(inputs))):
-        # Check each sample in the batch
-        self.assertAllClose(inputs[j], np.arange(i * 5 + j, i * 5 + j + 9))
-
-  def test_timeseries_regression(self):
-    # Test simple timeseries regression use case
-    data = np.arange(10)
-    offset = 3
-    targets = data[offset:]
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, targets, sequence_length=offset, batch_size=1)
-    i = 0
-    for batch in dataset:
-      self.assertLen(batch, 2)
-      inputs, targets = batch
-      self.assertEqual(inputs.shape, (1, 3))
-      # Check values
-      self.assertAllClose(targets[0], data[offset + i])
-      self.assertAllClose(inputs[0], data[i : i + offset])
-      i += 1
-    self.assertEqual(i, 7)  # Expect 7 batches
-
-  def test_no_targets(self):
-    data = np.arange(50)
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, None, sequence_length=10, batch_size=5)
-    # Expect 9 batches
-    i = None
-    for i, batch in enumerate(dataset):
-      if i < 8:
-        self.assertEqual(batch.shape, (5, 10))
-      elif i == 8:
-        self.assertEqual(batch.shape, (1, 10))
-      for j in range(min(5, len(batch))):
-        # Check each sample in the batch
-        self.assertAllClose(batch[j], np.arange(i * 5 + j, i * 5 + j + 10))
-    self.assertEqual(i, 8)
-
-  def test_shuffle(self):
-    # Test cross-epoch random order and seed determinism
-    data = np.arange(10)
-    targets = data * 2
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, targets, sequence_length=5, batch_size=1, shuffle=True, seed=123)
-    first_seq = None
-    for x, y in dataset.take(1):
-      self.assertNotAllClose(x, np.arange(0, 5))
-      self.assertAllClose(x[:, 0] * 2, y)
-      first_seq = x
-    # Check that a new iteration with the same dataset yields different results
-    for x, _ in dataset.take(1):
-      self.assertNotAllClose(x, first_seq)
-    # Check determism with same seed
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, targets, sequence_length=5, batch_size=1, shuffle=True, seed=123)
-    for x, _ in dataset.take(1):
-      self.assertAllClose(x, first_seq)
-
-  def test_sampling_rate(self):
-    data = np.arange(100)
-    targets = data * 2
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, targets, sequence_length=9, batch_size=5, sampling_rate=2)
-    for i, batch in enumerate(dataset):
-      self.assertLen(batch, 2)
-      inputs, targets = batch
-      if i < 16:
-        self.assertEqual(inputs.shape, (5, 9))
-      if i == 16:
-        # Last batch: size 3
-        self.assertEqual(inputs.shape, (3, 9))
-      # Check target values
-      self.assertAllClose(inputs[:, 0] * 2, targets)
-      for j in range(min(5, len(inputs))):
-        # Check each sample in the batch
-        start_index = i * 5 + j
-        end_index = start_index + 9 * 2
-        self.assertAllClose(inputs[j], np.arange(start_index, end_index, 2))
-
-  def test_sequence_stride(self):
-    data = np.arange(100)
-    targets = data * 2
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, targets, sequence_length=9, batch_size=5, sequence_stride=3)
-    for i, batch in enumerate(dataset):
-      self.assertLen(batch, 2)
-      inputs, targets = batch
-      if i < 6:
-        self.assertEqual(inputs.shape, (5, 9))
-      if i == 6:
-        # Last batch: size 1
-        self.assertEqual(inputs.shape, (1, 9))
-      # Check target values
-      self.assertAllClose(inputs[:, 0] * 2, targets)
-      for j in range(min(5, len(inputs))):
-        # Check each sample in the batch
-        start_index = i * 5 * 3 + j * 3
-        end_index = start_index + 9
-        self.assertAllClose(inputs[j],
-                            np.arange(start_index, end_index))
-
-  def test_start_and_end_index(self):
-    data = np.arange(100)
-    dataset = timeseries.timeseries_dataset_from_array(
-        data, None,
-        sequence_length=9, batch_size=5, sequence_stride=3, sampling_rate=2,
-        start_index=10, end_index=90)
-    for batch in dataset:
-      self.assertAllLess(batch[0], 90)
-      self.assertAllGreater(batch[0], 9)
-
-  def test_errors(self):
-    # bad start index
-    with self.assertRaisesRegex(ValueError, 'start_index must be '):
-      _ = timeseries.timeseries_dataset_from_array(
-          np.arange(10), None, 3, start_index=-1)
-    with self.assertRaisesRegex(ValueError, 'start_index must be '):
-      _ = timeseries.timeseries_dataset_from_array(
-          np.arange(10), None, 3, start_index=11)
-    # bad end index
-    with self.assertRaisesRegex(ValueError, 'end_index must be '):
-      _ = timeseries.timeseries_dataset_from_array(
-          np.arange(10), None, 3, end_index=-1)
-    with self.assertRaisesRegex(ValueError, 'end_index must be '):
-      _ = timeseries.timeseries_dataset_from_array(
-          np.arange(10), None, 3, end_index=11)
-    # bad sampling_rate
-    with self.assertRaisesRegex(ValueError, 'sampling_rate must be '):
-      _ = timeseries.timeseries_dataset_from_array(
-          np.arange(10), None, 3, sampling_rate=0)
-    # bad sequence stride
-    with self.assertRaisesRegex(ValueError, 'sequence_stride must be '):
-      _ = timeseries.timeseries_dataset_from_array(
-          np.arange(10), None, 3, sequence_stride=0)
-
-
-if __name__ == '__main__':
-  v2_compat.enable_v2_behavior()
-  test.main()
diff --git a/tensorflow/python/keras/utils/BUILD b/tensorflow/python/keras/utils/BUILD
index 2df988d..af9568f 100644
--- a/tensorflow/python/keras/utils/BUILD
+++ b/tensorflow/python/keras/utils/BUILD
@@ -56,16 +56,6 @@
 )
 
 py_library(
-    name = "kpl_test_utils",
-    srcs = ["kpl_test_utils.py"],
-    srcs_version = "PY3",
-    deps = [
-        "//tensorflow/python/keras",
-        "//tensorflow/python/keras/layers/preprocessing:string_lookup",
-    ],
-)
-
-py_library(
     name = "data_utils",
     srcs = ["data_utils.py"],
     srcs_version = "PY3",
diff --git a/tensorflow/python/keras/utils/kpl_test_utils.py b/tensorflow/python/keras/utils/kpl_test_utils.py
deleted file mode 100644
index 57696a7..0000000
--- a/tensorflow/python/keras/utils/kpl_test_utils.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test related utilities for KPL + tf.distribute."""
-
-import random
-import tempfile
-
-from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import tensor_spec
-from tensorflow.python.keras.layers.preprocessing import string_lookup
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import test
-
-
-class DistributeKplTestUtils(test.TestCase):
-  """Utils for test of tf.distribute + KPL."""
-  FEATURE_VOCAB = [
-      "avenger", "ironman", "batman", "hulk", "spiderman", "kingkong",
-      "wonder_woman"
-  ]
-  LABEL_VOCAB = ["yes", "no"]
-
-  def define_kpls_for_training(self, use_adapt):
-    """Function that defines KPL used for unit tests of tf.distribute.
-
-    Args:
-      use_adapt: if adapt will be called. False means there will be precomputed
-        statistics.
-
-    Returns:
-      feature_mapper: a simple keras model with one keras StringLookup layer
-      which maps feature to index.
-      label_mapper: similar to feature_mapper, but maps label to index.
-
-    """
-    if use_adapt:
-      feature_lookup_layer = (
-          string_lookup.StringLookup(
-              num_oov_indices=1))
-      feature_lookup_layer.adapt(self.FEATURE_VOCAB)
-      label_lookup_layer = (
-          string_lookup.StringLookup(
-              num_oov_indices=0, mask_token=None))
-      label_lookup_layer.adapt(self.LABEL_VOCAB)
-    else:
-      feature_lookup_layer = (
-          string_lookup.StringLookup(
-              vocabulary=self.FEATURE_VOCAB, num_oov_indices=1))
-      label_lookup_layer = (
-          string_lookup.StringLookup(
-              vocabulary=self.LABEL_VOCAB, num_oov_indices=0, mask_token=None))
-
-    raw_feature_input = keras.layers.Input(
-        shape=(3,), dtype=dtypes.string, name="feature", ragged=True)
-    feature_id_input = feature_lookup_layer(raw_feature_input)
-    feature_mapper = keras.Model({"features": raw_feature_input},
-                                 feature_id_input)
-
-    raw_label_input = keras.layers.Input(
-        shape=(1,), dtype=dtypes.string, name="label")
-    label_id_input = label_lookup_layer(raw_label_input)
-    label_mapper = keras.Model({"label": raw_label_input}, label_id_input)
-
-    return feature_mapper, label_mapper
-
-  def dataset_fn(self, feature_mapper, label_mapper):
-    """Function that generates dataset for test of tf.distribute + KPL.
-
-    Args:
-      feature_mapper: a simple keras model with one keras StringLookup layer
-        which maps feature to index.
-      label_mapper: similar to feature_mapper, but maps label to index.
-
-    Returns:
-      Generated dataset for test of tf.distribute + KPL.
-
-    """
-
-    def feature_and_label_gen():
-      # Generator of dataset.
-      while True:
-        features = random.sample(self.FEATURE_VOCAB, 3)
-        label = ["yes"] if self.FEATURE_VOCAB[0] in features else ["no"]
-        yield {"features": features, "label": label}
-
-    raw_dataset = dataset_ops.Dataset.from_generator(
-        feature_and_label_gen,
-        output_signature={
-            "features": tensor_spec.TensorSpec([3], dtypes.string),
-            "label": tensor_spec.TensorSpec([1], dtypes.string)
-        }).shuffle(100).batch(32)
-
-    train_dataset = raw_dataset.map(lambda x: (  # pylint: disable=g-long-lambda
-        {
-            "features": feature_mapper(x["features"])
-        }, label_mapper(x["label"])))
-    return train_dataset
-
-  def define_model(self):
-    """A simple model for test of tf.distribute + KPL."""
-    # Create the model. The input needs to be compatible with KPLs.
-    model_input = keras.layers.Input(
-        shape=(3,), dtype=dtypes.int64, name="model_input")
-
-    # input_dim includes a mask token and an oov token.
-    emb_output = keras.layers.Embedding(
-        input_dim=len(self.FEATURE_VOCAB) + 2, output_dim=20)(
-            model_input)
-    emb_output = math_ops.reduce_mean(emb_output, axis=1)
-    dense_output = keras.layers.Dense(
-        units=1, activation="sigmoid")(
-            emb_output)
-    model = keras.Model({"features": model_input}, dense_output)
-    return model
-
-  def define_reverse_lookup_layer(self):
-    """Create string reverse lookup layer for serving."""
-
-    label_inverse_lookup_layer = string_lookup.StringLookup(
-        num_oov_indices=0,
-        mask_token=None,
-        vocabulary=self.LABEL_VOCAB,
-        invert=True)
-    return label_inverse_lookup_layer
-
-  def create_serving_signature(self, model, feature_mapper,
-                               label_inverse_lookup_layer):
-    """Create serving signature for the given model."""
-
-    @def_function.function
-    def serve_fn(raw_features):
-      raw_features = array_ops.expand_dims(raw_features, axis=0)
-      transformed_features = model.feature_mapper(raw_features)
-      outputs = model(transformed_features)
-      outputs = array_ops.squeeze(outputs, axis=0)
-      outputs = math_ops.cast(math_ops.greater(outputs, 0.5), dtypes.int64)
-      decoded_outputs = model.label_inverse_lookup_layer(outputs)
-      return array_ops.squeeze(decoded_outputs, axis=0)
-
-    model.feature_mapper = feature_mapper
-    model.label_inverse_lookup_layer = label_inverse_lookup_layer
-    # serving does NOT have batch dimension
-    return serve_fn.get_concrete_function(
-        tensor_spec.TensorSpec(
-            shape=(3), dtype=dtypes.string, name="example"))
-
-  def test_save_load_serving_model(self, model, feature_mapper,
-                                   label_inverse_lookup_layer):
-    """Test save/load/serving model."""
-
-    serving_fn = self.create_serving_signature(model, feature_mapper,
-                                               label_inverse_lookup_layer)
-
-    saved_model_dir = tempfile.mkdtemp(dir=self.get_temp_dir())
-    model.save(saved_model_dir, save_format="tf",
-               signatures={"serving_default": serving_fn})
-
-    # Test the saved_model.
-    loaded_serving_fn = keras.saving.save.load_model(
-        saved_model_dir).signatures["serving_default"]
-
-    # check the result w/ and w/o avenger.
-    prediction0 = loaded_serving_fn(
-        constant_op.constant(["avenger", "ironman", "avenger"]))["output_0"]
-    self.assertIn(prediction0.numpy().decode("UTF-8"), ("yes", "no"))
-
-    prediction1 = loaded_serving_fn(
-        constant_op.constant(["ironman", "ironman", "unkonwn"]))["output_0"]
-    self.assertIn(prediction1.numpy().decode("UTF-8"), ("yes", "no"))
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 0707e39..f63cbbc 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -129,12 +129,10 @@
     "//tensorflow/python/distribute:multi_process_runner",
     "//tensorflow/python/eager:eager_pip",
     "//tensorflow/python/keras:combinations",
-    "//tensorflow/python/keras/layers/preprocessing:preprocessing_test_utils",
     "//tensorflow/python/keras/distribute:distribute_test_lib_pip",
     "//tensorflow/python/keras/distribute:strategy_combinations",
     "//tensorflow/python/keras/mixed_precision:test_util",
     "//tensorflow/python/keras/utils:dataset_creator",
-    "//tensorflow/python/keras/utils:kpl_test_utils",
     "//tensorflow/python/kernel_tests:cudnn_deterministic_base",
     "//tensorflow/python/kernel_tests:bias_op_base",
     "//tensorflow/python/kernel_tests:sparse_xent_op_test_base",