[tf.data] Graduating `tf.data.experimental.AUTOTUNE` to core API.

PiperOrigin-RevId: 330845881
Change-Id: I0480031d39753f5115e9dae5f5a6ae5b42e19bb0
diff --git a/RELEASE.md b/RELEASE.md
index f35c2ed..6c9f5b1 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -107,6 +107,8 @@
     * `tf.data.Dataset.from_generator` now supports Ragged and Sparse tensors
       with a new `output_signature` argument, which allows `from_generator` to
       produce any type describable by a `tf.TypeSpec`.
+    * `tf.data.experimental.AUTOTUNE` is now available in the core API as
+      `tf.data.AUTOTUNE`.
 * `tf.image`:
     * Added deterministic `tf.image.stateless_random_*` functions for each
       `tf.image.random_*` function. Added a new op
diff --git a/tensorflow/python/data/__init__.py b/tensorflow/python/data/__init__.py
index 39cbd3d..04ea1e6 100644
--- a/tensorflow/python/data/__init__.py
+++ b/tensorflow/python/data/__init__.py
@@ -23,6 +23,7 @@
 
 # pylint: disable=unused-import
 from tensorflow.python.data import experimental
+from tensorflow.python.data.ops.dataset_ops import AUTOTUNE
 from tensorflow.python.data.ops.dataset_ops import Dataset
 from tensorflow.python.data.ops.dataset_ops import INFINITE as INFINITE_CARDINALITY
 from tensorflow.python.data.ops.dataset_ops import make_initializable_iterator
diff --git a/tensorflow/python/data/experimental/ops/batching.py b/tensorflow/python/data/experimental/ops/batching.py
index 4d2bfbb..3d04de8 100644
--- a/tensorflow/python/data/experimental/ops/batching.py
+++ b/tensorflow/python/data/experimental/ops/batching.py
@@ -176,7 +176,7 @@
     num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
       representing the number of elements to process in parallel. If not
       specified, `batch_size * num_parallel_batches` elements will be processed
-      in parallel. If the value `tf.data.experimental.AUTOTUNE` is used, then
+      in parallel. If the value `tf.data.AUTOTUNE` is used, then
       the number of parallel calls is set dynamically based on available CPU.
 
   Returns:
@@ -237,7 +237,7 @@
     num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
       representing the number of elements to process in parallel. If not
       specified, `batch_size * num_parallel_batches` elements will be processed
-      in parallel. If the value `tf.data.experimental.AUTOTUNE` is used, then
+      in parallel. If the value `tf.data.AUTOTUNE` is used, then
       the number of parallel calls is set dynamically based on available CPU.
 
   Returns:
diff --git a/tensorflow/python/data/experimental/ops/interleave_ops.py b/tensorflow/python/data/experimental/ops/interleave_ops.py
index a3cc75a..4c16d35 100644
--- a/tensorflow/python/data/experimental/ops/interleave_ops.py
+++ b/tensorflow/python/data/experimental/ops/interleave_ops.py
@@ -37,7 +37,7 @@
 @deprecation.deprecated(
     None,
     "Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, "
-    "num_parallel_calls=tf.data.experimental.AUTOTUNE)` instead. If sloppy "
+    "num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy "
     "execution is desired, use `tf.data.Options.experimental_deterministic`.")
 @tf_export("data.experimental.parallel_interleave")
 def parallel_interleave(map_func,
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index e3cce9c..5e1981c 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -94,6 +94,8 @@
 
 # A constant that can be used to enable auto-tuning.
 AUTOTUNE = -1
+tf_export("data.AUTOTUNE").export_constant(__name__, "AUTOTUNE")
+# TODO(b/168128531): Deprecate and remove this symbol.
 tf_export("data.experimental.AUTOTUNE").export_constant(__name__, "AUTOTUNE")
 
 # Constants representing infinite and unknown cardinalities.
@@ -1700,7 +1702,7 @@
 
     >>> dataset = Dataset.range(1, 6)  # ==> [ 1, 2, 3, 4, 5 ]
     >>> dataset = dataset.map(lambda x: x + 1,
-    ...     num_parallel_calls=tf.data.experimental.AUTOTUNE,
+    ...     num_parallel_calls=tf.data.AUTOTUNE,
     ...     deterministic=False)
 
     Args:
@@ -1708,7 +1710,7 @@
       num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
         representing the number elements to process asynchronously in parallel.
         If not specified, elements will be processed sequentially. If the value
-        `tf.data.experimental.AUTOTUNE` is used, then the number of parallel
+        `tf.data.AUTOTUNE` is used, then the number of parallel
         calls is set dynamically based on available CPU.
       deterministic: (Optional.) A boolean controlling whether determinism
         should be traded for performance by allowing elements to be produced out
@@ -1821,7 +1823,7 @@
     ...              "/var/data/file3.txt", "/var/data/file4.txt"]
     >>> dataset = tf.data.Dataset.from_tensor_slices(filenames)
     >>> dataset = dataset.interleave(lambda x: tf.data.TFRecordDataset(x),
-    ...     cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE,
+    ...     cycle_length=4, num_parallel_calls=tf.data.AUTOTUNE,
     ...     deterministic=False)
 
     Args:
@@ -1829,7 +1831,7 @@
       cycle_length: (Optional.) The number of input elements that will be
         processed concurrently. If not set, the tf.data runtime decides what it
         should be based on available CPU. If `num_parallel_calls` is set to
-        `tf.data.experimental.AUTOTUNE`, the `cycle_length` argument identifies
+        `tf.data.AUTOTUNE`, the `cycle_length` argument identifies
         the maximum degree of parallelism.
       block_length: (Optional.) The number of consecutive elements to produce
         from each input element before cycling to another input element. If not
@@ -1838,7 +1840,7 @@
         threadpool, which is used to fetch inputs from cycle elements
         asynchronously and in parallel. The default behavior is to fetch inputs
         from cycle elements synchronously with no parallelism. If the value
-        `tf.data.experimental.AUTOTUNE` is used, then the number of parallel
+        `tf.data.AUTOTUNE` is used, then the number of parallel
         calls is set dynamically based on available CPU.
       deterministic: (Optional.) A boolean controlling whether determinism
         should be traded for performance by allowing elements to be produced out
@@ -2574,7 +2576,7 @@
       num_parallel_calls: (Optional.) A `tf.int32` scalar `tf.Tensor`,
         representing the number elements to process asynchronously in parallel.
         If not specified, elements will be processed sequentially. If the value
-        `tf.data.experimental.AUTOTUNE` is used, then the number of parallel
+        `tf.data.AUTOTUNE` is used, then the number of parallel
         calls is set dynamically based on available CPU.
       deterministic: (Optional.) A boolean controlling whether determinism
         should be traded for performance by allowing elements to be produced out
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
index 1d2af01..6b64ad6 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.pbtxt
@@ -1,6 +1,10 @@
 path: "tensorflow.data"
 tf_module {
   member {
+    name: "AUTOTUNE"
+    mtype: "<type \'int\'>"
+  }
+  member {
     name: "Dataset"
     mtype: "<type \'type\'>"
   }
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
index b6ac6f8..ead3f80 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.pbtxt
@@ -1,6 +1,10 @@
 path: "tensorflow.data"
 tf_module {
   member {
+    name: "AUTOTUNE"
+    mtype: "<type \'int\'>"
+  }
+  member {
     name: "Dataset"
     mtype: "<type \'type\'>"
   }