| // Copyright 2020 The TensorFlow Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // This schema defines how to configure TFLite for delegation. These |
| // definitions can be used in multiple ways: as output of a compatibility list, |
| // in benchmarking tools and to decouple delegate instantiation from code. |
| // |
| // The schema is work-in-progress, covering the most broadly used delegates and |
| // options. |
| |
| syntax = "proto2"; |
| |
| package tflite.proto; |
| |
| // ExecutionPreference is used to match accelerators against the preferences of |
| // the current application or usecase. Some of the values here can appear both |
| // in the compatibility list and as input, some only as input. |
| // |
| // These are separate from NNAPIExecutionPreference - the compatibility list |
| // design doesn't assume a one-to-one mapping between which usecases |
| // compatibility list entries have been developed for and what settings are used |
| // for NNAPI. |
| enum ExecutionPreference { |
| // Match any selected preference. Allowlist (semantically - value is same as |
| // on input). |
| ANY = 0; |
| // Match low latency preference. Both compatibility list and input. |
| LOW_LATENCY = 1; |
| // Math low power preference. Both compatibility list and input. |
| LOW_POWER = 2; |
| // Never accelerate. Can be used for input to compatibility list or for |
| // standalone Acceleration configuration. |
| FORCE_CPU = 3; |
| } |
| |
| // TFLite accelerator to use. |
| enum Delegate { |
| NONE = 0; |
| |
| NNAPI = 1; |
| GPU = 2; |
| HEXAGON = 3; |
| XNNPACK = 4; |
| // The EdgeTpu in Pixel devices. |
| EDGETPU = 5; |
| // The Coral EdgeTpu Dev Board / USB accelerator. |
| EDGETPU_CORAL = 6; |
| } |
| |
| enum NNAPIExecutionPreference { |
| // Undefined. |
| UNDEFINED = 0; |
| // Prefer executing in a way that minimizes battery drain. |
| NNAPI_LOW_POWER = 1; |
| // Prefer returning a single answer as fast as possible, even if this causes |
| // more power consumption. |
| NNAPI_FAST_SINGLE_ANSWER = 2; |
| // Prefer maximizing the throughput of successive frames, for example when |
| // processing successive frames coming from the camera. |
| NNAPI_SUSTAINED_SPEED = 3; |
| } |
| |
| enum NNAPIExecutionPriority { |
| NNAPI_PRIORITY_UNDEFINED = 0; |
| NNAPI_PRIORITY_LOW = 1; |
| NNAPI_PRIORITY_MEDIUM = 2; |
| NNAPI_PRIORITY_HIGH = 3; |
| } |
| |
| // One possible acceleration configuration. |
| message ComputeSettings { |
| // Which preference to use this accelerator for. |
| optional ExecutionPreference preference = 1; |
| // How to configure TFLite |
| optional TFLiteSettings tflite_settings = 2; |
| // Identifiers to use for instrumentation and telemetry. |
| optional string model_namespace_for_statistics = 3; |
| optional string model_identifier_for_statistics = 4; |
| } |
| |
| // NNAPI delegate settings. |
| message NNAPISettings { |
| // Which instance (NNAPI accelerator) to use. One driver may provide several |
| // accelerators (though a driver may also hide several back-ends behind one |
| // name, at the choice of the driver vendor). |
| // Note that driver introspection is only available in Android Q and later. |
| optional string accelerator_name = 1; |
| |
| // NNAPI model compilation caching settings to be passed to |
| // tflite::StatefulNnApiDelegate |
| optional string cache_directory = 2; |
| optional string model_token = 3; |
| |
| // NNAPI execution preference to pass. See |
| // https://developer.android.com/ndk/reference/group/neural-networks.html |
| optional NNAPIExecutionPreference execution_preference = 4; |
| |
| // Number of instances to cache for the same model (for input size |
| // changes). This is mandatory for getting reasonable performance in that |
| // case. |
| optional int32 no_of_nnapi_instances_to_cache = 5; |
| |
| // Deprecated; use the fallback_settings in TFLiteSettings. |
| // |
| // Whether to automatically fall back to TFLite CPU path. |
| optional FallbackSettings fallback_settings = 6 [deprecated = true]; |
| |
| // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android |
| // 10+ when an accelerator name is not specified. The NNAPI CPU typically |
| // performs less well than the TfLite built-in kernels; but allowing allows a |
| // model to be partially accelerated which may be a win. |
| optional bool allow_nnapi_cpu_on_android_10_plus = 7; |
| |
| optional NNAPIExecutionPriority execution_priority = 8; |
| |
| // Whether to allow dynamic dimension sizes without re-compilation. |
| // A tensor of with dynamic dimension must have a valid dims_signature |
| // defined. |
| // Only supported in NNAPI 1.1 and newer versions. |
| // WARNING: Setting this flag to true may result in model being rejected by |
| // accelerator. This should only be enabled if the target device supports |
| // dynamic dimensions of the model. |
| // By default this is set to false. |
| optional bool allow_dynamic_dimensions = 9; |
| |
| // Whether to allow the NNAPI accelerator to optionally use lower-precision |
| // float16 (16-bit floating point) arithmetic when doing calculations on |
| // float32 (32-bit floating point). |
| optional bool allow_fp16_precision_for_fp32 = 10; |
| } |
| |
| // Which GPU backend to select. Default behaviour on Android is to try OpenCL |
| // and if it's not available fall back to OpenGL. |
| enum GPUBackend { |
| UNSET = 0; |
| OPENCL = 1; |
| OPENGL = 2; |
| // Not yet supported. |
| // VULKAN = 3; |
| // METAL = 4; |
| } |
| |
| // GPU Delegate settings. |
| // |
| // See |
| // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h |
| message GPUSettings { |
| optional bool is_precision_loss_allowed = 1; |
| optional bool enable_quantized_inference = 2 [default = true]; |
| optional GPUBackend force_backend = 3; |
| // TODO(b/152019007): add remaining options. |
| } |
| |
| // Hexagon Delegate settings. |
| // |
| // See |
| // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h |
| message HexagonSettings { |
| optional int32 debug_level = 1; |
| optional int32 powersave_level = 2; |
| optional bool print_graph_profile = 3; |
| optional bool print_graph_debug = 4; |
| } |
| |
| // XNNPack Delegate settings. |
| // |
| // See |
| // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h |
| message XNNPackSettings { |
| optional int32 num_threads = 1; |
| } |
| |
| // EdgeTPU device spec. |
| // |
| message EdgeTpuDeviceSpec { |
| // EdgeTPU platform types. |
| enum PlatformType { |
| MMIO = 0; |
| REFERENCE = 1; |
| SIMULATOR = 2; |
| REMOTE_SIMULATOR = 3; |
| } |
| |
| // Execution platform for the EdgeTPU device. |
| optional PlatformType platform_type = 1; |
| |
| // Number of chips to use for the EdgeTPU device. |
| optional int32 num_chips = 2; |
| |
| // Paths to the EdgeTPU devices; |
| repeated string device_paths = 3; |
| |
| // Chip family used by the EdgeTpu device. |
| optional int32 chip_family = 4; |
| } |
| |
| // Generic definitions of EdgeTPU power states. |
| enum EdgeTpuPowerState { |
| // Undefined power state. |
| UNDEFINED_POWERSTATE = 0; |
| |
| // TPU core is off but control cluster is on. |
| TPU_CORE_OFF = 1; |
| |
| // A non-active low-power state that has much smaller transition time to |
| // active compared to off. |
| READY = 2; |
| |
| // Minimum power active state. |
| ACTIVE_MIN_POWER = 3; |
| |
| // Very low performance, very low power. |
| ACTIVE_VERY_LOW_POWER = 4; |
| |
| // Low performance, low power. |
| ACTIVE_LOW_POWER = 5; |
| |
| // The normal performance and power. This setting usually provides the |
| // optimal perf/power trade-off for the average use-case. |
| ACTIVE = 6; |
| |
| // Maximum performance level. Potentially higher power and thermal. This |
| // setting may not be allowed in production depending on the system. |
| OVER_DRIVE = 7; |
| } |
| |
| message EdgeTpuInactivePowerConfig { |
| // Inactive power states between inferences. |
| optional EdgeTpuPowerState inactive_power_state = 1; |
| |
| // Inactive timeout in microseconds between inferences. |
| optional int64 inactive_timeout_us = 2; |
| } |
| |
| // EdgeTPU Delegate settings. |
| // |
| message EdgeTpuSettings { |
| // Target inference power state for running the model. |
| optional EdgeTpuPowerState inference_power_state = 1; |
| |
| // Inactive power states between inferences. |
| repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2; |
| |
| // Priority for the inference request. |
| optional int32 inference_priority = 3 [default = -1]; |
| |
| // Device spec for creating the EdgeTpu device. |
| optional EdgeTpuDeviceSpec edgetpu_device_spec = 4; |
| } |
| |
| // Coral Dev Board / USB accelerator delegate settings. |
| // |
| // See |
| // https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h |
| message CoralSettings { |
| enum Performance { |
| UNDEFINED = 0; |
| MAXIMUM = 1; |
| HIGH = 2; |
| MEDIUM = 3; |
| LOW = 4; |
| } |
| |
| // The Edge Tpu device to be used. See |
| // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137 |
| optional string device = 1; |
| // The desired performance level. This setting adjusts the internal clock |
| // rate to achieve different performance / power balance. Higher performance |
| // values improve speed, but increase power usage. |
| optional Performance performance = 2 [default = MAXIMUM]; |
| // If true, always perform device firmware update (DFU) after reset. DFU is |
| // usually only necessary after power cycle. |
| optional bool usb_always_dfu = 3; |
| // The maximum bulk in queue length. Larger queue length may improve USB |
| // performance on the direction from device to host. When not specified (or |
| // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the |
| // current EdgeTpu Coral implementation. |
| optional int32 usb_max_bulk_in_queue_length = 4; |
| } |
| |
| message CPUSettings { |
| optional int32 num_threads = 1; |
| } |
| |
| // How to configure TFLite. |
| message TFLiteSettings { |
| // Which delegate to use. |
| optional Delegate delegate = 1; |
| |
| // How to configure the chosen delegate. |
| // (In principle we would like to use 'oneof', but flatc turns that into an |
| // nested anonymous table rather than a union. See |
| // https://github.com/google/flatbuffers/issues/4628). |
| optional NNAPISettings nnapi_settings = 2; |
| optional GPUSettings gpu_settings = 3; |
| optional HexagonSettings hexagon_settings = 4; |
| optional XNNPackSettings xnnpack_settings = 5; |
| |
| // How to configure CPU execution. |
| optional CPUSettings cpu_settings = 6; |
| |
| // Shared delegation settings. |
| optional int32 max_delegated_partitions = 7; |
| |
| // For configuring the EdgeTpuDelegate. |
| optional EdgeTpuSettings edgetpu_settings = 8; |
| |
| // For configuring the Coral EdgeTpu Delegate. |
| optional CoralSettings coral_settings = 10; |
| |
| // Whether to automatically fall back to TFLite CPU path. |
| optional FallbackSettings fallback_settings = 9; |
| } |
| |
| // Whether to automatically fallback to TFLite CPU path on delegation errors. |
| // |
| // Typically fallback is enabled in production use but disabled in tests and |
| // benchmarks to ensure they test the intended path. |
| message FallbackSettings { |
| // Whether to allow automatically falling back to TfLite CPU path on |
| // compilation failure. Default is not allowing automatic fallback. |
| // |
| // This is useful in naive production usecases where the caller would prefer |
| // for the model to run even if it's not accelerated. More advanced users will |
| // implement fallback themselves; e.g., by using a different model on CPU. |
| // |
| // Note that compilation errors may occur either at initial |
| // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after |
| // resizing. |
| optional bool allow_automatic_fallback_on_compilation_error = 7; |
| // Whether to allow automatically falling back to TfLite CPU path on |
| // execution error. Default is not allowing automatic fallback. |
| // |
| // Experimental, use with care (only when you have complete control over the |
| // client code). |
| // |
| // The caveat above for compilation error holds. Additionally, execution-time |
| // errors are harder to handle automatically as they require invalidating the |
| // TfLite interpreter which most client code has not been designed to deal |
| // with. |
| optional bool allow_automatic_fallback_on_execution_error = 8; |
| } |
| |
| // On-device mini-benchmark result storage. The following definitions are used |
| // to keep an append-only log of benchmark results on-device. (Hence there is |
| // single top-level event that is used for all data). |
| // |
| // These definitions don't need a proto-to-flatbuffer conversion, since they are |
| // not used for specifying configuration in the Tasks library. |
| |
| // Which stage of benchmarking the event is for. |
| // There might be multiple events with the same type, if a benchmark is run |
| // multiple times. |
| enum BenchmarkEventType { |
| UNDEFINED_BENCHMARK_EVENT_TYPE = 0; |
| // Benchmark start. A start without an end can be interpreted as a test that |
| // has crashed or hung. |
| START = 1; |
| // Benchmarking completion. A model was successfully loaded, acceleration |
| // configured and inference run without errors. There may still be an issue |
| // with correctness of results, or with performance. |
| END = 2; |
| // Benchmark was not completed due to an error. The error may be a handled |
| // error (e.g., failure in a delegate), or a crash. |
| ERROR = 3; |
| // Benchmark data has been sent for logging. |
| LOGGED = 4; |
| } |
| |
| // A correctness metric from a benchmark, for example KL-divergence between |
| // known-good CPU output and on-device output. These are primarily used for |
| // telemetry and monitored server-side. |
| message BenchmarkMetric { |
| optional string name = 1; |
| repeated float values = 2 [packed = true]; |
| } |
| |
| // Outcome of a successfully complete benchmark run. This information is |
| // intended to both be used on-device to select best compute configuration as |
| // well as sent to server for monitoring. |
| // |
| // Used with event type END. |
| message BenchmarkResult { |
| // Time to load model and apply acceleration. Initialization may get run |
| // multiple times to get information on variance. |
| repeated int64 initialization_time_us = 1 [packed = true]; |
| // Time to run inference (call Invoke()). Inference may get run multiple times |
| // to get information on variance. |
| repeated int64 inference_time_us = 2 [packed = true]; |
| // Maximum memory used. Measures size of application heap (does not |
| // necessarily take into account driver-side allocation. |
| optional int32 max_memory_kb = 3; |
| // Whether the inference produced correct results (validation graph output |
| // 'ok' for all test inputs). Used on-device to disallow configurations that |
| // produce incorrect results (e.g., due to OpenCL driver bugs). |
| optional bool ok = 4; |
| // Metrics that were used to determine the 'ok' status. |
| repeated BenchmarkMetric metrics = 5; |
| } |
| |
| // A handled error. |
| message ErrorCode { |
| // Which delegate the error comes from (or NONE, if it comes from the tflite |
| // framework). |
| optional Delegate source = 1; |
| // What the tflite level error is. |
| optional int32 tflite_error = 2; |
| // What the underlying error is (e.g., NNAPI or OpenGL error). |
| optional int64 underlying_api_error = 3; |
| } |
| |
| // When during benchmark execution an error occurred. |
| enum BenchmarkStage { |
| UNKNOWN = 0; |
| // During model loading or delegation. |
| INITIALIZATION = 1; |
| // During inference. |
| INFERENCE = 2; |
| } |
| |
| // An error that occurred during benchmarking. |
| // |
| // Used with event type ERROR. |
| message BenchmarkError { |
| // How far benchmarking got. |
| optional BenchmarkStage stage = 1; |
| // Process exit code. |
| optional int32 exit_code = 2; |
| // Signal the process received. |
| optional int32 signal = 3; |
| // Handled error. |
| repeated ErrorCode error_code = 4; |
| } |
| |
| // Top-level benchmarking event stored on-device. All events for a model are |
| // parsed to detect the status. |
| message BenchmarkEvent { |
| // Which settings were used for benchmarking. |
| optional TFLiteSettings tflite_settings = 1; |
| // Type of the event. |
| optional BenchmarkEventType event_type = 2; |
| // Result of benchmark, used when type is END. |
| optional BenchmarkResult result = 3; |
| // Error during benchmark, used when type is ERROR. |
| optional BenchmarkError error = 4; |
| // Start timestamps. These are used for |
| // 1. Checking whether a test was started but not completed within a given |
| // deadline. |
| // 2. Optionally, telemetry timestamps. |
| optional int64 boottime_us = 5; |
| optional int64 wallclock_us = 6; |
| } |