tensorflow_lite_support/metadata/metadata_schema.fbs - platform/external/tflite-support - Git at Google

 // Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 namespace tflite;

 // TFLite metadata contains both human readable and machine readable information
 // about what the model does and how to use the model. It can be used as a
 // README file, which elaborates the details of the model, each input/ouput
 // tensor, and each associated file.
 //
 // An important use case of TFLite metadata is the TFLite codegen tool, which
 // automatically generates the model interface based on the properties of the
 // model and the tensors. The model interface provides high-level APIs to
 // interact with the model, such as preprocessing the input data and running
 // inferences.
 //
 // Entries marked with "<Codegen usage>" are used in TFLite codegen tool to
 // generate the model interface. It is recommended to fill in at least those
 // enties to boost the codegen performance.

 // The Metadata schema is versioned by the Semantic versioning number, such as
 // MAJOR.MINOR.PATCH. It tracks the schema changes according to the rules below:
 //  * Bump up the MAJOR number when making potentially backwards incompatible
 //    changes. It must be incremented if the new changes break the backwards
 //    compatibility. It may also include minor and patch level changes as
 //    needed. The true backwards compatibility is indicated by the file
 //    identifier.
 //  * Bump up the MINOR number when making backwards compatible updates for
 //    major features, such as supporting new content types or adding new
 //    processing units.
 //  * Bump up the PATCH number when making small backwards compatible changes,
 //    such as adding a new fields or deprecating certain fields (not deleting
 //    them).
 //
 // ModelMetadata.min_parser_version indicates the minimum necessary metadata
 // parser version to fully understand all fields in a given metadata flatbuffer.
 //
 // New fields and types will have associated comments with the schema version
 // for which they were added.
 //
 // LINT.IfChange
 // Schema Semantic version: 1.2.1
 // LINT.ThenChange(//tensorflow_lite_support/\
 //     metadata/java/src/java/org/tensorflow/lite/support/metadata/\
 //     MetadataParser.java)

 // This indicates the flatbuffer compatibility. The number will bump up when a
 // break change is applied to the schema, such as removing fields or adding new
 // fields to the middle of a table.
 file_identifier "M001";

 // History:
 // 1.0.1 - Added VOCABULARY type to AssociatedFileType.
 // 1.1.0 - Added BertTokenizerOptions to ProcessUnitOptions.
 //         Added SentencePieceTokenizerOptions to ProcessUnitOptions.
 //         Added input_process_units to SubGraphMetadata.
 //         Added output_process_units to SubGraphMetadata.
 // 1.2.0 - Added input_tensor_group to SubGraphMetadata.
 //         Added output_tensor_group to SubGraphMetadata.
 // 1.2.1 - Added RegexTokenizerOptions to ProcessUnitOptions.

 // File extension of any written files.
 file_extension "tflitemeta";

 // LINT.IfChange
 enum AssociatedFileType : byte {
   UNKNOWN = 0,

   // Files such as readme.txt.
   DESCRIPTIONS = 1,

   // Contains labels that annotate certain axis of the tensor. For example,
   // the label file in image classification. Those labels annotate the
   // the output tensor, such that each value in the output tensor is the
   // probability of that corresponding category specified by the label.
   //
   // <Codegen usage>:
   // If an output tensor has an associated file as TENSOR_AXIS_LABELS, return
   // the output as a mapping between the labels and probability in the model
   // interface.
   // If multiple files of the same type are present, the first one is used by
   // default; additional ones are to be distinguished from one another by their
   // specified locale.
   TENSOR_AXIS_LABELS = 2,

   // Contains labels that tensor values correspond to. For example, in
   // the object detection model, one of the output tensors is the detected
   // classes. And each value in the tensor refers to the index of label in the
   // category label file.
   //
   // <Codegen usage>:
   // If an output tensor has an associated file as TENSOR_VALUE_LABELS, convert
   // the tensor values into labels, and return a list of string as the output.
   // If multiple files of the same type are present, the first one is used by
   // default; additional ones are to be distinguished from one another by their
   // specified locale.
   TENSOR_VALUE_LABELS = 3,

   // Contains sigmoid-based score calibration parameters, formatted as CSV.
   // Lines contain for each index of an output tensor the scale, slope, offset
   // and (optional) min_score parameters to be used for sigmoid fitting (in this
   // order and in `strtof`-compatible [1] format).
   // A line may be left empty to default calibrated scores for this index to
   // default_score.
   // In summary, each line should thus contain 0, 3 or 4 comma-separated values.
   //
   // See documentation for ScoreCalibrationOptions for details.
   //
   // [1]: https://en.cppreference.com/w/c/string/byte/strtof
   TENSOR_AXIS_SCORE_CALIBRATION = 4,

   // Contains a list of unique words (characters separated by "\n" or in lines)
   // that help to convert natural language words to embedding vectors.
   // Added in: 1.0.1
   VOCABULARY = 5,
 }

 table AssociatedFile {
   // Name of this file. Need to be exact the same as the name of the actual file
   // packed into the TFLite model as a zip file.
   //
   // <Codegen usage>:
   // Locates to the actual file in the TFLite model.
   name:string;

   // A description of what the file is.
   description:string;

   // Type of the associated file. There may be special pre/post processing for
   // some types. For example in image classification, a label file of the output
   // will be used to convert object index into string.
   //
   // <Codegen usage>:
   // Determines how to process the corresponding tensor.
   type:AssociatedFileType;

   // An optional locale for this associated file (if applicable). It is
   // recommended to use an ISO 639-1 letter code (e.g. "en" for English),
   // optionally completed by a two letter region code (e.g. "en-US" for US
   // English and "en-CA" for Canadian English).
   // Leverage this in order to specify e.g multiple label files translated in
   // different languages.
   locale:string;
 }

 // The basic content type for all tensors.
 //
 // <Codegen usage>:
 // Input feature tensors:
 // 1. Generates the method to load data from a TensorBuffer.
 // 2. Creates the preprocessing logic. The default processing pipeline is:
 // [NormalizeOp, QuantizeOp].
 // Output feature tensors:
 // 1. Generates the method to return the output data to a TensorBuffer.
 // 2. Creates the post-processing logic. The default processing pipeline is:
 // [DeQuantizeOp].
 table FeatureProperties {
 }

 // The type of color space of an image.
 enum ColorSpaceType : byte {
   UNKNOWN = 0,
   RGB = 1,
   GRAYSCALE = 2,
 }

 table ImageSize {
   width:uint;
   height:uint;
 }

 // The properties for image tensors.
 //
 // <Codegen usage>:
 // Input image tensors:
 // 1. Generates the method to load an image from a TensorImage.
 // 2. Creates the preprocessing logic. The default processing pipeline is:
 // [ResizeOp, NormalizeOp, QuantizeOp].
 // Output image tensors:
 // 1. Generates the method to return the output data to a TensorImage.
 // 2. Creates the post-processing logic. The default processing pipeline is:
 // [DeQuantizeOp].
 table ImageProperties {
   // The color space of the image.
   //
   // <Codegen usage>:
   // Determines how to convert the color space of a given image from users.
   color_space:ColorSpaceType;

   // Indicates the default value of image width and height if the tensor shape
   // is dynamic. For fixed-size tensor, this size will be consistent with the
   // expected size.
   default_size:ImageSize;
 }

 // The properties for tensors representing bounding boxes.
 //
 // <Codegen usage>:
 // Input image tensors: NA.
 // Output image tensors: parses the values into a data stucture that represents
 // bounding boxes. For example, in the generated wrapper for Android, it returns
 // the output as android.graphics.Rect objects.
 enum BoundingBoxType : byte {
   UNKNOWN = 0,
   // Represents the bounding box by using the combination of boundaries,
   // {left, top, right, bottom}.
   // The default order is {left, top, right, bottom}. Other orders can be
   // indicated by BoundingBoxProperties.index.
   BOUNDARIES = 1,

   // Represents the bounding box by using the upper_left corner, width and
   // height.
   // The default order is {upper_left_x, upper_left_y, width, height}. Other
   // orders can be indicated by BoundingBoxProperties.index.
   UPPER_LEFT = 2,

   // Represents the bounding box by using the center of the box, width and
   // height. The default order is {center_x, center_y, width, height}. Other
   // orders can be indicated by BoundingBoxProperties.index.
   CENTER = 3,

 }

 enum CoordinateType : byte {
   // The coordinates are float values from 0 to 1.
   RATIO = 0,
   // The coordinates are integers.
   PIXEL = 1,
 }

 table BoundingBoxProperties {
   // Denotes the order of the elements defined in each bounding box type. An
   // empty index array represent the default order of each bounding box type.
   // For example, to denote the default order of BOUNDARIES, {left, top, right,
   // bottom}, the index should be {0, 1, 2, 3}. To denote the order {left,
   // right, top, bottom}, the order should be {0, 2, 1, 3}.
   //
   // The index array can be applied to all bounding box types to adjust the
   // order of their corresponding underlying elements.
   //
   // <Codegen usage>:
   // Indicates how to parse the bounding box values.
   index:[uint];

   // <Codegen usage>:
   // Indicates how to parse the bounding box values.
   type:BoundingBoxType;

   // <Codegen usage>:
   // Indicates how to convert the bounding box back to the original image in
   // pixels.
   coordinate_type:CoordinateType;
 }

 union ContentProperties {
   FeatureProperties,
   ImageProperties,
   BoundingBoxProperties,
 }

 table ValueRange {
   min:int;
   max:int;
 }

 table Content {
   // The properties that the content may have, indicating the type of the
   // Content.
   //
   // <Codegen usage>:
   // Indicates how to process the tensor.
   content_properties:ContentProperties;

   // The range of dimensions that the content corresponds to. A NULL
   // "range" indicates that the content uses up all dimensions,
   // except the batch axis if applied.
   //
   // Here are all the possible situations of how a tensor is composed.
   // Case 1: The tensor is a single object, such as an image.
   // For example, the input of an image classifier
   // (https://www.tensorflow.org/lite/models/image_classification/overview),
   // a tensor of shape [1, 224, 224, 3]. Dimensions 1 to 3 correspond to the
   // image. Since dimension 0 is a batch axis, which can be ignored,
   // "range" can be left as NULL.
   //
   // Case 2: The tensor contains multiple instances of the same object.
   // For example, the output tensor of detected bounding boxes of an object
   // detection model
   // (https://www.tensorflow.org/lite/models/object_detection/overview).
   // The tensor shape is [1, 10, 4]. Here is the what the three dimensions
   // represent for:
   // dimension 0: the batch axis.
   // dimension 1: the 10 objects detected with the highest confidence.
   // dimension 2: the bounding boxes of the 10 detected objects.
   // The tensor is essentially 10 bounding boxes. In this case,
   // "range" should be {min=2; max=2;}.
   //
   // The output tensor of scores of the above object detection model has shape
   // [1, 10], where
   // dimension 0: the batch axis;
   // dimension 1: the scores of the 10 detected objects.
   // Set "range" to the number of dimensions which is {min=2; max=2;} to denote
   // that every element in the tensor is an individual content object, i.e. a
   // score in this example.
   //
   // Another example is the pose estimation model
   // (https://www.tensorflow.org/lite/models/pose_estimation/overview).
   // The output tensor of heatmaps is in the shape of [1, 9, 9, 17].
   // Here is the what the four dimensions represent for:
   // dimension 0: the batch axis.
   // dimension 1/2: the heatmap image.
   // dimension 3: 17 body parts of a person.
   // Even though the last axis is body part, the real content of this tensor is
   // the heatmap. "range" should be [min=1; max=2].
   //
   // Case 3: The tensor contains multiple different objects. (Not supported by
   // Content at this point).
   // Sometimes a tensor may contain multiple different objects, thus different
   // contents. It is very common for regression models. For example, a model
   // to predict the fuel efficiency
   // (https://www.tensorflow.org/tutorials/keras/regression).
   // The input tensor has shape [1, 9], consisting of 9 features, such as
   // "Cylinders", "Displacement", "Weight", etc. In this case, dimension 1
   // contains 9 different contents. However, since these sub-dimension objects
   // barely need to be specifically processed, their contents are not recorded
   // in the metadata. Through, the name of each dimension can be set through
   // TensorMetadata.dimension_names.
   //
   // Note that if it is not case 3, a tensor can only have one content type.
   //
   // <Codegen usage>:
   // Case 1: return a processed single object of certain content type.
   // Case 2: return a list of processed objects of certain content type. The
   // generated model interface have API to random access those objects from
   // the output.
   range:ValueRange;
 }

 // Parameters that are used when normalizing the tensor.
 table NormalizationOptions{
   // mean and std are normalization parameters. Tensor values are normalized
   // on a per-channel basis, by the formula
   //   (x - mean) / std.
   // If there is only one value in mean or std, we'll propogate the value to
   // all channels.
   //
   // Quantized models share the same normalization parameters as their
   // corresponding float models. For example, an image input tensor may have
   // the normalization parameter of
   //   mean = 127.5f and std = 127.5f.
   // The image value will be normalized from [0, 255] to [-1, 1].
   // Then, for quantized models, the image data should be further quantized
   // according to the quantization parameters. In the case of uint8, the image
   // data will be scaled back to [0, 255], while for int8, the image data will
   // be scaled to [-128, 127].
   //
   // Both the normalization parameters and quantization parameters can be
   // retrieved through the metadata extractor library.
   // TODO(b/156644598): add link for the metadata extractor library.

   // Per-channel mean of the possible values used in normalization.
   //
   // <Codegen usage>:
   // Apply normalization to input tensors accordingly.
   mean:[float];

   // Per-channel standard dev. of the possible values used in normalization.
   //
   // <Codegen usage>:
   // Apply normalization to input tensors accordingly.
   std:[float];
 }

 // The different possible score transforms to apply to uncalibrated scores
 // before applying score calibration.
 enum ScoreTransformationType : byte {
   // Identity function: g(x) = x.
   IDENTITY = 0,
   // Log function: g(x) = log(x).
   LOG = 1,
   // Inverse logistic function: g(x) = log(x) - log(1-x).
   INVERSE_LOGISTIC = 2,
 }

 // Options to perform score calibration on an output tensor through sigmoid
 // functions. One of the main purposes of score calibration is to make scores
 // across classes comparable, so that a common threshold can be used for all
 // output classes. This is meant for models producing class predictions as
 // output, e.g. image classification or detection models.
 //
 // For each index in the output tensor, this applies:
 // * `f(x) = scale / (1 + e^-(slope*g(x)+offset))` if `x > min_score` or if no
 //   `min_score` has been specified,
 // * `f(x) = default_score` otherwise or if no scale, slope and offset have been
 //   specified.
 // Where:
 // * scale, slope, offset and (optional) min_score are index-specific parameters
 // * g(x) is an index-independent transform among those defined in
 //   ScoreTransformationType
 // * default_score is an index-independent parameter.
 // An AssociatedFile with type TANSOR_AXIS_SCORE_CALIBRATION specifying the
 // index-specific parameters must be associated with the corresponding
 // TensorMetadata for score calibration be applied.
 table ScoreCalibrationOptions {
   // The function to use for transforming the uncalibrated score before
   // applying score calibration.
   score_transformation:ScoreTransformationType;

   // The default calibrated score to apply if the uncalibrated score is
   // below min_score or if no parameters were specified for a given index.
   default_score:float;
 }

 // Performs thresholding on output tensor values, in order to filter out
 // low-confidence results.
 table ScoreThresholdingOptions {
   // The recommended global threshold below which results are considered
   // low-confidence and should be filtered out.
   global_score_threshold:float;
 }

 // Performs Bert tokenization as in tf.text.BertTokenizer
 // (https://github.com/tensorflow/text/blob/3599f6fcd2b780a2dc413b90fb9315464f10b314/docs/api_docs/python/text/BertTokenizer.md)
 // Added in: 1.1.0
 table BertTokenizerOptions {
   // The vocabulary files used in the BertTokenizer.
   vocab_file:[AssociatedFile];
 }

 // Performs SentencePiece tokenization as in tf.text.SentencepieceTokenizer
 // (https://github.com/tensorflow/text/blob/3599f6fcd2b780a2dc413b90fb9315464f10b314/docs/api_docs/python/text/SentencepieceTokenizer.md).
 // Added in: 1.1.0
 table SentencePieceTokenizerOptions {
   // The SentencePiece model files used in the SentencePieceTokenizer.
   sentencePiece_model:[AssociatedFile];

   // The optional vocabulary model files used in the SentencePieceTokenizer.
   vocab_file:[AssociatedFile];
 }

 // Splits strings by the occurrences of delim_regex_pattern and converts the
 // tokens into ids. For example, given
 //   delim_regex_pattern: "\W+",
 //   string: "Words, words, words.",
 // the tokens after split are: "Words", "words", "words", "".
 // And then the tokens can be converted into ids according to the vocab_file.
 // Added in: 1.2.1
 table RegexTokenizerOptions {
   delim_regex_pattern:string;
   // The vocabulary files used to convert this tokens into ids.
   vocab_file:[AssociatedFile];
 }

 // Options that are used when processing the tensor.
 union ProcessUnitOptions {
   NormalizationOptions,
   ScoreCalibrationOptions,
   ScoreThresholdingOptions,
   // Added in: 1.1.0
   BertTokenizerOptions,
   // Added in: 1.1.0
   SentencePieceTokenizerOptions,
   // Added in: 1.2.1
   RegexTokenizerOptions
 }

 // A process unit that is used to process the tensor out-of-graph.
 table ProcessUnit {
   options:ProcessUnitOptions;
 }


 // Statistics to describe a tensor.
 table Stats {
   // Max and min are not currently used in tflite.support codegen. They mainly
   // serve as references for users to better understand the model. They can also
   // be used to validate model pre/post processing results.
   // If there is only one value in max or min, we'll propogate the value to
   // all channels.

   // Per-channel maximum value of the tensor.
   max:[float];

   // Per-channel minimum value of the tensor.
   min:[float];
 }

 // Metadata of a group of tensors. It may contain several tensors that will be
 // grouped together in codegen. For example, the TFLite object detection model
 // example (https://www.tensorflow.org/lite/models/object_detection/overview)
 // has four outputs: classes, scores, bounding boxes, and number of detections.
 // If the four outputs are bundled together using TensorGroup (for example,
 // named as "detection result"), the codegen tool will generate the class,
 // `DetectionResult`, which contains the class, score, and bouding box. And the
 // outputs of the model will be converted to a list of `DetectionResults` and
 // the number of detection. Note that the number of detection is a single
 // number, therefore is inappropriate for the list of `DetectionResult`.
 // Added in: 1.2.0
 table TensorGroup {
   // Name of tensor group.
   //
   // <codegen usage>:
   // Name of the joint class of the tensor group.
   name:string;

   // Names of the tensors to group together, corresponding to
   // TensorMetadata.name.
   //
   // <codegen usage>:
   // Determines which tensors will be added to this group. All tensors in the
   // group should have the same number of elements specified by Content.range.
   tensor_names:[string];
 }

 // Detailed information of an input or output tensor.
 table TensorMetadata {
   // Name of the tensor.
   //
   // <Codegen usage>:
   // The name of this tensor in the generated model interface.
   name:string;

   // A description of the tensor.
   description:string;

   // A list of names of the dimensions in this tensor. The length of
   // dimension_names need to match the number of dimensions in this tensor.
   //
   // <Codegen usage>:
   // The name of each dimension in the generated model interface. See "Case 2"
   // in the comments of Content.range.
   dimension_names:[string];

   // The content that represents this tensor.
   //
   // <Codegen usage>:
   // Determines how to process this tensor. See each item in ContentProperties
   // for the default process units that will be applied to the tensor.
   content:Content;

   // The process units that are used to process the tensor out-of-graph.
   //
   // <Codegen usage>:
   // Contains the parameters of the default processing pipeline for each content
   // type, such as the normalization parameters in all content types. See the
   // items under ContentProperties for the details of the default processing
   // pipeline.
   process_units:[ProcessUnit];

   // The statistics of the tensor values.
   stats:Stats;

   // A list of associated files of this tensor.
   //
   // <Codegen usage>:
   // Contains processing parameters of this tensor, such as normalization.
   associated_files:[AssociatedFile];
 }

 table SubGraphMetadata {
   // Name of the subgraph.
   //
   // Note that, since TFLite only support one subgraph at this moment, the
   // Codegen tool will use the name in ModelMetadata in the generated model
   // interface.
   name:string;

   // A description explains details about what the subgraph does.
   description:string;

   // Metadata of all input tensors used in this subgraph. It matches extactly
   // the input tensors specified by `SubGraph.inputs` in the TFLite
   // schema.fbs file[2]. The number of `TensorMetadata` in the array should
   // equal to the number of indices in `SubGraph.inputs`.
   //
   // [2]: tensorflow/lite/schema/schema.fbs
   // <Codegen usage>:
   // Determines how to process the inputs.
   input_tensor_metadata:[TensorMetadata];

   // Metadata of all output tensors used in this subgraph. It matches extactly
   // the output tensors specified by `SubGraph.outputs` in the TFLite
   // schema.fbs file[2]. The number of `TensorMetadata` in the array should
   // equal to the number of indices in `SubGraph.outputs`.
   //
   // <Codegen usage>:
   // Determines how to process the outputs.
   output_tensor_metadata:[TensorMetadata];

   // A list of associated files of this subgraph.
   associated_files:[AssociatedFile];

   // Input process units of the subgraph. Some models may have complex pre and
   // post processing logics where the process units do not work on one tensor at
   // a time, but in a similar way of a TFLite graph. For example, in the
   // MobileBert model (https://www.tensorflow.org/lite/models/bert_qa/overview),
   // the inputs are: ids / mask / segment ids;
   // the outputs are: end logits / start logits.
   // The preprocessing converts the query string and the context string to the
   // model inputs, and the post-processing converts the model outputs to the
   // answer string.
   // Added in: 1.1.0
   input_process_units:[ProcessUnit];

   // Output process units of the subgraph.
   // Added in: 1.1.0
   output_process_units:[ProcessUnit];

   // Metadata of all input tensor groups used in this subgraph.
   //
   // <codegen usage>:
   // Bundles the corresponding elements of the underlying input tensors together
   // into a class, and converts those individual tensors into a list of the
   // class objects.
   // Added in: 1.2.0
   input_tensor_groups:[TensorGroup];

   // Metadata of all output tensor groups used in this subgraph.
   //
   // <codegen usage>:
   // Bundles the corresponding elements of the underlying output tensors
   // together into a class, and converts those individual tensors into a list of
   // the class objects.
   // Added in: 1.2.0
   output_tensor_groups:[TensorGroup];

 }

 table ModelMetadata {
   // Name of the model.
   //
   // <Codegen usage>:
   // The name of the model in the generated model interface.
   name:string;

   // Model description in schema.
   description:string;

   // Version of the model that specified by model creators.
   version:string;

   // Noted that, the minimum required TFLite runtime version that the model is
   // compatible with, has already been added as a metadata entry in tflite
   // schema. We'll decide later if we want to move it here, and keep it with
   // other metadata entries.

   // Metadata of all the subgraphs of the model. The 0th is assumed to be the
   // main subgraph.
   //
   // <Codegen usage>:
   // Determines how to process the inputs and outputs.
   subgraph_metadata:[SubGraphMetadata];

   // The person who creates this model.
   author:string;

   // Licenses that may apply to this model.
   license:string;

   // A list of associated files of this model.
   associated_files:[AssociatedFile];

   // The minimum metadata parser version that can fully understand the fields in
   // the metadata flatbuffer. The version is effectively the largest version
   // number among the versions of all the fields populated and the smallest
   // compatible version indicated by the file identifier.
   //
   // This field is automaticaly populated by the MetadataPopulator when
   // the metadata is populated into a TFLite model.
   min_parser_version:string;
 }
 // LINT.ThenChange(//tensorflow_lite_support/\
 //     metadata/cc/metadata_version.cc)

 root_type ModelMetadata;