| // Copyright 2022 The TensorFlow Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| syntax = "proto2"; |
| |
| package toco; |
| |
| import "tensorflow/lite/toco/types.proto"; |
| |
| // Supported I/O file formats. Some formats may be input-only or output-only. |
| enum FileFormat { |
| FILE_FORMAT_UNKNOWN = 0; |
| |
| // GraphDef, third_party/tensorflow/core/framework/graph.proto |
| TENSORFLOW_GRAPHDEF = 1; |
| |
| // Tensorflow's mobile inference model. |
| // third_party/tensorflow/lite/schema/schema.fbs |
| TFLITE = 2; |
| |
| // GraphViz |
| // Export-only. |
| GRAPHVIZ_DOT = 3; |
| } |
| |
| // TocoFlags encodes extra parameters that drive tooling operations, that |
| // are not normally encoded in model files and in general may not be thought |
| // of as properties of models, instead describing how models are to be |
| // processed in the context of the present tooling job. |
| // |
| // Next ID to use: 51. |
| message TocoFlags { |
| // Input file format |
| optional FileFormat input_format = 1; |
| |
| // Output file format |
| optional FileFormat output_format = 2; |
| |
| // Similar to inference_type, but allows to control specifically the |
| // quantization of input arrays, separately from other arrays. |
| // |
| // If not set, then the value of inference_type is implicitly used, i.e. |
| // by default input arrays are quantized like other arrays. |
| // |
| // Like inference_type, this only affects real-number arrays. By "real-number" |
| // we mean float arrays, and quantized arrays. This excludes plain |
| // integer arrays, strings arrays, and every other data type. |
| // |
| // The typical use for this flag is for vision models taking a bitmap |
| // as input, typically with uint8 channels, yet still requiring floating-point |
| // inference. For such image models, the uint8 input is quantized, i.e. |
| // the uint8 values are interpreted as real numbers, and the quantization |
| // parameters used for such input arrays are their mean_value, std_value |
| // parameters. |
| optional IODataType inference_input_type = 11; |
| |
| // Sets the type of real-number arrays in the output file, that is, controls |
| // the representation (quantization) of real numbers in the output file, |
| // except for input arrays, which are controlled by inference_input_type. |
| // |
| // NOTE: this flag only impacts real-number arrays. By "real-number" |
| // we mean float arrays, and quantized arrays. This excludes plain |
| // integer arrays, strings arrays, and every other data type. |
| // |
| // For real-number arrays, the impact of this flag is to allow the output |
| // file to choose a different real-numbers representation (quantization) |
| // from what the input file used. For any other types of arrays, changing |
| // the data type would not make sense. |
| // |
| // Specifically: |
| // - If FLOAT, then real-numbers arrays will be of type float in |
| // the output file. If they were quantized in the input file, then |
| // they get dequantized. |
| // - If QUANTIZED_UINT8, then real-numbers arrays will be quantized |
| // as uint8 in the output file. If they were float in the input file, |
| // then they get quantized. |
| // - If not set, then all real-numbers arrays retain the same type in the |
| // output file as they have in the input file. |
| // |
| optional IODataType inference_type = 4; |
| |
| // default_ranges_min and default_ranges_max are helpers to experiment |
| // with quantization of models. Normally, quantization requires the input |
| // model to have (min, max) range information for every activations array. |
| // This is needed in order to know how to quantize arrays and still achieve |
| // satisfactory accuracy. However, in some circumstances one would just like |
| // to estimate the performance of quantized inference, without caring about |
| // accuracy. That is what default_ranges_min and default_ranges_max are for: |
| // when specified, they will be used as default (min, max) range boundaries |
| // for all activation arrays that lack (min, max) range information, thus |
| // allowing for quantization to proceed. |
| // |
| // It should be clear from the above explanation that these parameters are |
| // for experimentation purposes only and should not be used in production: |
| // they make it easy to quantize models, but the resulting quantized model |
| // will be inaccurate. |
| // |
| // These values only apply to arrays quantized with the kUint8 data type. |
| optional float default_ranges_min = 5; |
| optional float default_ranges_max = 6; |
| // Equivalent versions of default_ranges_min/_max for arrays quantized with |
| // the kInt16 data type. |
| optional float default_int16_ranges_min = 15; |
| optional float default_int16_ranges_max = 16; |
| |
| // Ignore and discard FakeQuant nodes. For instance, that can be used to |
| // generate plain float code without fake-quantization from a quantized |
| // graph. |
| optional bool drop_fake_quant = 7; |
| |
| // Normally, FakeQuant nodes must be strict boundaries for graph |
| // transformations, in order to ensure that quantized inference has the |
| // exact same arithmetic behavior as quantized training --- which is the |
| // whole point of quantized training and of FakeQuant nodes in the first |
| // place. However, that entails subtle requirements on where exactly |
| // FakeQuant nodes must be placed in the graph. Some quantized graphs |
| // have FakeQuant nodes at unexpected locations, that prevent graph |
| // transformations that are necessary in order to generate inference |
| // code for these graphs. Such graphs should be fixed, but as a |
| // temporary work-around, setting this reorder_across_fake_quant flag |
| // allows toco to perform necessary graph transformations on them, |
| // at the cost of no longer faithfully matching inference and training |
| // arithmetic. |
| optional bool reorder_across_fake_quant = 8; |
| |
| // If true, allow TOCO to create TF Lite Custom operators for all the |
| // unsupported Tensorflow ops. |
| optional bool allow_custom_ops = 10; |
| |
| // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF. |
| // If true, then control dependencies will be immediately dropped during |
| // import. |
| // If not set, the default behavior is as follows: |
| // - Default to false if the output format is TENSORFLOW_GRAPHDEF. |
| // - Default to true in all other cases. |
| optional bool drop_control_dependency = 12; |
| |
| // Disables transformations that fuse subgraphs such as known LSTMs (not all |
| // LSTMs are identified). |
| optional bool debug_disable_recurrent_cell_fusion = 13; |
| |
| // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized |
| // array data types throughout the graph. The graph must be properly annotated |
| // with FakeQuant* ops on at least the edges and may contain additional ops on |
| // the interior of the graph to widen/narrow as desired. |
| // |
| // Input and output array data types may change because of this propagation |
| // and users must be sure to query the final data_type values. |
| optional bool propagate_fake_quant_num_bits = 14; |
| |
| // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0. |
| // This flag allows nudging them to 1 to allow proceeding, with moderate |
| // inaccuracy. |
| optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17; |
| |
| // Minimum size of constant arrays to deduplicate; arrays smaller will not be |
| // deduplicated. |
| optional int64 dedupe_array_min_size_bytes = 18 [default = 64]; |
| |
| // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite. |
| // Ignored if the output format is not TFLite. |
| optional bool split_tflite_lstm_inputs = 19 [default = true]; |
| |
| // Store weights as quantized weights followed by dequantize operations. |
| // Computation is still done in float, but reduces model size (at the cost of |
| // accuracy and latency). |
| // DEPRECATED: Please use post_training_quantize instead. |
| optional bool quantize_weights = 20 [default = false]; |
| |
| // Full filepath of folder to dump the graphs at various stages of processing |
| // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order |
| // to keep the requirements of the output file. |
| optional string dump_graphviz_dir = 24; |
| |
| // Boolean indicating whether to dump the graph after every graph |
| // transformation. |
| optional bool dump_graphviz_include_video = 25; |
| |
| // Boolean indicating whether to quantize the weights of the converted float |
| // model. Model size will be reduced and there will be latency improvements |
| // (at the cost of accuracy). |
| optional bool post_training_quantize = 26 [default = false]; |
| |
| // This flag only works when converting to TensorFlow Lite format. |
| // When enabled, unsupported ops will be converted to select TensorFlow ops. |
| // TODO(ycling): Consider to rename the following 2 flags and don't call it |
| // "Flex". |
| // `enable_select_tf_ops` should always be used with `allow_custom_ops`. |
| // WARNING: Experimental interface, subject to change |
| optional bool enable_select_tf_ops = 27 [default = false]; |
| |
| // This flag only works when converting to TensorFlow Lite format. |
| // When enabled, all TensorFlow ops will be converted to select TensorFlow |
| // ops. |
| // This will force `enable_select_tf_ops` to true. |
| // `force_select_tf_ops` should always be used with `enable_select_tf_ops`. |
| // WARNING: Experimental interface, subject to change |
| optional bool force_select_tf_ops = 28 [default = false]; |
| |
| // Boolean indicating whether to convert float32 constant buffers to |
| // float16. This is typically done to reduce model size. Delegates may also |
| // wish to implement kernels on reduced precision floats for performance |
| // gains. |
| optional bool quantize_to_float16 = 29 [default = false]; |
| |
| // Boolean flag indicating whether the converter should allow models with |
| // dynamic Tensor shape. When set to False, the converter will generate |
| // runtime memory offsets for activation Tensors (with 128 bits alignment) |
| // and error out on models with undetermined Tensor shape. (Default: True) |
| optional bool allow_dynamic_tensors = 30 [default = true]; |
| |
| // Full filepath of the folder to dump conversion logs. This includes a global |
| // view of the conversion process, and user can choose to submit those logs. |
| optional string conversion_summary_dir = 31; |
| |
| // String representing the custom ops OpDefs that are included in the |
| // GraphDef. |
| // Deprecated do not use. |
| repeated string custom_opdefs = 32 [deprecated = true]; |
| |
| // Name of user's defined Tensorflow ops required in the TensorFlow Lite |
| // runtime. These ops will be supported as select TensorFlow ops. |
| repeated string select_user_tf_ops = 33; |
| |
| // Whether to enable tflite resource variables during conversion or not. |
| // Note: This is an experimental feature. |
| optional bool enable_tflite_resource_variables = 34 [default = true]; |
| |
| // Whether to unfold tf.BatchMatMul to a set of tfl.fully_connected ops. If |
| // not, translate to tfl.batch_matmul. |
| // WARNING: Experimental interface, subject to change. |
| optional bool unfold_batchmatmul = 35 [default = true]; |
| |
| // Whether to lower static Tensor List ops to builtin ops. If not, use Flex |
| // tensor list ops. |
| // WARNING: Experimental interface, subject to change. |
| optional bool lower_tensor_list_ops = 36 [default = true]; |
| |
| // The accumulation type to use when quantize_to_float16 is true. Typical |
| // choices would be either float16 or float32. |
| optional IODataType accumulation_type = 37; |
| |
| // Whether this model supports inference in bfloat16. |
| // Note: This is an experimental feature. |
| optional bool allow_bfloat16 = 38 [default = false]; |
| |
| // If true, automatically adds all tf ops into the model as select Tensorflow |
| // ops. |
| optional bool allow_all_select_tf_ops = 39; |
| |
| // Whether to unfold large splat constant tensors in the flatbuffer to reduce |
| // model size. |
| optional bool unfold_large_splat_constant = 40 [default = false]; |
| |
| // Name of TFLite backends which are needed to check compatibility. |
| // WARNING: Experimental interface, subject to change. |
| repeated string supported_backends = 41; |
| |
| // Whether to force to use batch size one when the batch size is None during |
| // lowering tensor list ops. |
| optional bool default_to_single_batch_in_tensor_list_ops = 42 |
| [default = false]; |
| |
| // Disable per_channel quantization for dynamic range quantization. |
| // Note: This is an experimental feature |
| optional bool disable_per_channel_quantization = 43 [default = false]; |
| |
| // If false, the old TOCO dynamic range quantization is used. |
| // Note: This is an experimental feature |
| optional bool enable_mlir_dynamic_range_quantizer = 44 [default = false]; |
| |
| // When the output model is used for TF Quantization, this flag indicates the |
| // mode of TF Quantization. Ex: DEFAULT, LEGACY_INTEGER,... |
| optional string tf_quantization_mode = 45; |
| |
| // Disable inferring tensor range for quantization. |
| // Note: This is an experimental feature |
| optional bool disable_infer_tensor_range = 46 [default = false]; |
| |
| // Enable using num bits set in fake quant attributes for quantization. |
| // Note: This is an experimental feature |
| optional bool use_fake_quant_num_bits = 47 [default = false]; |
| |
| // Enable converting to DynamicUpdateSlice op (for ops like TensorListSetItem) |
| // Note: This is an experimental feature |
| optional bool enable_dynamic_update_slice = 48 [default = false]; |
| |
| // Whether to preserve `TF::AssertOp`. |
| optional bool preserve_assert_op = 49 [default = false]; |
| |
| // Whether to ensure each function has a single use. |
| optional bool guarantee_all_funcs_one_use = 50 [default = false]; |
| } |