| /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_ |
| #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_ |
| |
| #include <string> |
| #include <unordered_map> |
| #include <vector> |
| |
| #include "tensorflow/core/example/example.pb.h" |
| #include "tensorflow/core/framework/allocator.h" |
| #include "tensorflow/core/framework/graph.pb.h" |
| #include "tensorflow/core/framework/op_kernel.h" |
| #include "tensorflow/core/framework/partial_tensor_shape.h" |
| #include "tensorflow/core/framework/tensor.h" |
| #include "tensorflow/core/framework/types.h" |
| #include "tensorflow/core/lib/gtl/array_slice.h" |
| #include "tensorflow/core/platform/types.h" |
| #include "tensorflow/core/util/sparse/sparse_tensor.h" |
| |
| namespace tensorflow { |
| namespace example { |
| |
| // FastParseExampleConfig defines how to parse features in Example. |
| // Each sub-config is responsible for one feature identified with feautre_name. |
| // FastParseExampleConfig can't have two sub-configs with the same feature_name. |
| // dtype identifies the type of output vector and the kind of Feature expected |
| // in Example. |
| struct FastParseExampleConfig { |
| struct Dense { |
| string feature_name; |
| DataType dtype; |
| // These 2 fields correspond exactly to dense_shapes and dense_defaults in |
| // ParseExample op. |
| // Documentation is available in: tensorflow/core/ops/parsing_ops.cc |
| PartialTensorShape shape; |
| Tensor default_value; |
| bool variable_length; |
| std::size_t elements_per_stride; |
| }; |
| |
| struct Sparse { |
| string feature_name; |
| DataType dtype; |
| }; |
| |
| std::vector<Dense> dense; |
| std::vector<Sparse> sparse; |
| |
| // If `true`, `Result::feature_stats` will contain one |
| // `PerExampleFeatureStats` for each serialized example in the input. |
| bool collect_feature_stats = false; |
| }; |
| |
| // Statistics about the features in each example passed to |
| // `FastParse[Single]Example()`. |
| // |
| // TODO(b/111553342): The gathered statistics currently have two limitations: |
| // * Feature names that appear more than once will be counted multiple times. |
| // * The feature values count only represents the counts for features that were |
| // requested in the `FastParseExampleConfig`. |
| // These could be addressed with additional work at runtime. |
| struct PerExampleFeatureStats { |
| // The number of feature names in an example. |
| size_t features_count = 0; |
| |
| // The sum of the number of values in each feature that is parsed. |
| size_t feature_values_count = 0; |
| }; |
| |
| // This is exactly the output of TF's ParseExample Op. |
| // Documentation is available in: tensorflow/core/ops/parsing_ops.cc |
| struct Result { |
| std::vector<Tensor> sparse_indices; |
| std::vector<Tensor> sparse_values; |
| std::vector<Tensor> sparse_shapes; |
| std::vector<Tensor> dense_values; |
| |
| // This vector will be populated with one element per example if |
| // `FastParseExampleConfig::collect_feature_stats` is set to `true`. |
| std::vector<PerExampleFeatureStats> feature_stats; |
| }; |
| |
| // Parses a batch of serialized Example protos and converts them into result |
| // according to given config. |
| // Given example names have to either be empty or the same size as serialized. |
| // example_names are used only for error messages. |
| Status FastParseExample(const FastParseExampleConfig& config, |
| gtl::ArraySlice<string> serialized, |
| gtl::ArraySlice<string> example_names, |
| thread::ThreadPool* thread_pool, Result* result); |
| |
| // TODO(mrry): Move the hash table construction into the config object. |
| typedef FastParseExampleConfig FastParseSingleExampleConfig; |
| |
| Status FastParseSingleExample(const FastParseSingleExampleConfig& config, |
| const string& serialized, Result* result); |
| |
| // Parses a batch of serialized SequenceExample protos and converts them into |
| // result according to given config. |
| // Given example names have to either be empty or the same size as serialized. |
| // example_names are used only for error messages. |
| Status FastParseSequenceExample( |
| const example::FastParseExampleConfig& context_config, |
| const example::FastParseExampleConfig& feature_list_config, |
| gtl::ArraySlice<string> serialized, gtl::ArraySlice<string> example_names, |
| thread::ThreadPool* thread_pool, example::Result* context_result, |
| example::Result* feature_list_result, |
| std::vector<Tensor>* dense_feature_lengths); |
| |
| // This function parses serialized Example and populates given example. |
| // It uses the same specialized parser as FastParseExample which is efficient. |
| // But then constructs Example which is relatively slow. |
| // It is exported here as a convenient API to test parser part separately. |
| bool TestFastParse(const string& serialized, Example* example); |
| |
| } // namespace example |
| } // namespace tensorflow |
| |
| #endif // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_FAST_PARSING_H_ |