tensorflow/python/data/experimental/ops/writers.py - platform/external/tensorflow - Git at Google

 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Python wrappers for tf.data writers."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 from tensorflow.python.compat import compat
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import convert
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import gen_experimental_dataset_ops
 from tensorflow.python.util.tf_export import tf_export


 @tf_export("data.experimental.TFRecordWriter")
 class TFRecordWriter(object):
   """Writes data to a TFRecord file.

   To write a `dataset` to a single TFRecord file:

   ```python
   dataset = ... # dataset to be written
   writer = tf.data.experimental.TFRecordWriter(PATH)
   writer.write(dataset)
   ```

   To shard a `dataset` across multiple TFRecord files:

   ```python
   dataset = ... # dataset to be written

   def reduce_func(key, dataset):
     filename = tf.strings.join([PATH_PREFIX, tf.strings.as_string(key)])
     writer = tf.data.experimental.TFRecordWriter(filename)
     writer.write(dataset.map(lambda _, x: x))
     return tf.data.Dataset.from_tensors(filename)

   dataset = dataset.enumerate()
   dataset = dataset.apply(tf.data.experimental.group_by_window(
     lambda i, _: i % NUM_SHARDS, reduce_func, tf.int64.max
   ))
   ```
   """

   def __init__(self, filename, compression_type=None):
     self._filename = ops.convert_to_tensor(
         filename, dtypes.string, name="filename")
     self._compression_type = convert.optional_param_to_tensor(
         "compression_type",
         compression_type,
         argument_default="",
         argument_dtype=dtypes.string)

   def write(self, dataset):
     """Returns a `tf.Operation` to write a dataset to a file.

     Args:
       dataset: a `tf.data.Dataset` whose elements are to be written to a file

     Returns:
       A `tf.Operation` that, when run, writes contents of `dataset` to a file.
     """
     if not isinstance(dataset, dataset_ops.DatasetV2):
       raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
     if not dataset_ops.get_structure(dataset).is_compatible_with(
         tensor_spec.TensorSpec([], dtypes.string)):
       raise TypeError(
           "`dataset` must produce scalar `DT_STRING` tensors whereas it "
           "produces shape {0} and types {1}".format(
               dataset_ops.get_legacy_output_shapes(dataset),
               dataset_ops.get_legacy_output_types(dataset)))
     if compat.forward_compatible(2019, 8, 3):
       return gen_experimental_dataset_ops.dataset_to_tf_record(
           dataset._variant_tensor, self._filename, self._compression_type)  # pylint: disable=protected-access
     else:
       return gen_experimental_dataset_ops.experimental_dataset_to_tf_record(
           dataset._variant_tensor, self._filename, self._compression_type)  # pylint: disable=protected-access
	# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	"""Python wrappers for tf.data writers."""
	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	from tensorflow.python.compat import compat
	from tensorflow.python.data.ops import dataset_ops
	from tensorflow.python.data.util import convert
	from tensorflow.python.framework import dtypes
	from tensorflow.python.framework import ops
	from tensorflow.python.framework import tensor_spec
	from tensorflow.python.ops import gen_experimental_dataset_ops
	from tensorflow.python.util.tf_export import tf_export


	@tf_export("data.experimental.TFRecordWriter")
	class TFRecordWriter(object):
	"""Writes data to a TFRecord file.

	To write a `dataset` to a single TFRecord file:

	```python
	dataset = ... # dataset to be written
	writer = tf.data.experimental.TFRecordWriter(PATH)
	writer.write(dataset)
	```

	To shard a `dataset` across multiple TFRecord files:

	```python
	dataset = ... # dataset to be written

	def reduce_func(key, dataset):
	filename = tf.strings.join([PATH_PREFIX, tf.strings.as_string(key)])
	writer = tf.data.experimental.TFRecordWriter(filename)
	writer.write(dataset.map(lambda _, x: x))
	return tf.data.Dataset.from_tensors(filename)

	dataset = dataset.enumerate()
	dataset = dataset.apply(tf.data.experimental.group_by_window(
	lambda i, _: i % NUM_SHARDS, reduce_func, tf.int64.max
	))
	```
	"""

	def __init__(self, filename, compression_type=None):
	self._filename = ops.convert_to_tensor(
	filename, dtypes.string, name="filename")
	self._compression_type = convert.optional_param_to_tensor(
	"compression_type",
	compression_type,
	argument_default="",
	argument_dtype=dtypes.string)

	def write(self, dataset):
	"""Returns a `tf.Operation` to write a dataset to a file.

	Args:
	dataset: a `tf.data.Dataset` whose elements are to be written to a file

	Returns:
	A `tf.Operation` that, when run, writes contents of `dataset` to a file.
	"""
	if not isinstance(dataset, dataset_ops.DatasetV2):
	raise TypeError("`dataset` must be a `tf.data.Dataset` object.")
	if not dataset_ops.get_structure(dataset).is_compatible_with(
	tensor_spec.TensorSpec([], dtypes.string)):
	raise TypeError(
	"`dataset` must produce scalar `DT_STRING` tensors whereas it "
	"produces shape {0} and types {1}".format(
	dataset_ops.get_legacy_output_shapes(dataset),
	dataset_ops.get_legacy_output_types(dataset)))
	if compat.forward_compatible(2019, 8, 3):
	return gen_experimental_dataset_ops.dataset_to_tf_record(
	dataset._variant_tensor, self._filename, self._compression_type) # pylint: disable=protected-access
	else:
	return gen_experimental_dataset_ops.experimental_dataset_to_tf_record(
	dataset._variant_tensor, self._filename, self._compression_type) # pylint: disable=protected-access