blob: 3533bfca75ffbc1ac3d9ab3b5ae6c21267d9c11d [file] [log] [blame]
// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This schema defines how to configure TFLite for delegation. These
// definitions can be used in multiple ways: as output of a compatibility list,
// in benchmarking tools and to decouple delegate instantiation from code.
//
// The schema is work-in-progress, covering the most broadly used delegates and
// options.
syntax = "proto2";
package tflite.proto;
// ExecutionPreference is used to match accelerators against the preferences of
// the current application or usecase. Some of the values here can appear both
// in the compatibility list and as input, some only as input.
//
// These are separate from NNAPIExecutionPreference - the compatibility list
// design doesn't assume a one-to-one mapping between which usecases
// compatibility list entries have been developed for and what settings are used
// for NNAPI.
enum ExecutionPreference {
// Match any selected preference. Allowlist (semantically - value is same as
// on input).
ANY = 0;
// Match low latency preference. Both compatibility list and input.
LOW_LATENCY = 1;
// Math low power preference. Both compatibility list and input.
LOW_POWER = 2;
// Never accelerate. Can be used for input to compatibility list or for
// standalone Acceleration configuration.
FORCE_CPU = 3;
}
// TFLite accelerator to use.
enum Delegate {
NONE = 0;
NNAPI = 1;
GPU = 2;
HEXAGON = 3;
XNNPACK = 4;
// The EdgeTpu in Pixel devices.
EDGETPU = 5;
// The Coral EdgeTpu Dev Board / USB accelerator.
EDGETPU_CORAL = 6;
}
enum NNAPIExecutionPreference {
// Undefined.
UNDEFINED = 0;
// Prefer executing in a way that minimizes battery drain.
NNAPI_LOW_POWER = 1;
// Prefer returning a single answer as fast as possible, even if this causes
// more power consumption.
NNAPI_FAST_SINGLE_ANSWER = 2;
// Prefer maximizing the throughput of successive frames, for example when
// processing successive frames coming from the camera.
NNAPI_SUSTAINED_SPEED = 3;
}
enum NNAPIExecutionPriority {
NNAPI_PRIORITY_UNDEFINED = 0;
NNAPI_PRIORITY_LOW = 1;
NNAPI_PRIORITY_MEDIUM = 2;
NNAPI_PRIORITY_HIGH = 3;
}
// One possible acceleration configuration.
message ComputeSettings {
// Which preference to use this accelerator for.
optional ExecutionPreference preference = 1;
// How to configure TFLite
optional TFLiteSettings tflite_settings = 2;
// Identifiers to use for instrumentation and telemetry.
optional string model_namespace_for_statistics = 3;
optional string model_identifier_for_statistics = 4;
}
// NNAPI delegate settings.
message NNAPISettings {
// Which instance (NNAPI accelerator) to use. One driver may provide several
// accelerators (though a driver may also hide several back-ends behind one
// name, at the choice of the driver vendor).
// Note that driver introspection is only available in Android Q and later.
optional string accelerator_name = 1;
// NNAPI model compilation caching settings to be passed to
// tflite::StatefulNnApiDelegate
optional string cache_directory = 2;
optional string model_token = 3;
// NNAPI execution preference to pass. See
// https://developer.android.com/ndk/reference/group/neural-networks.html
optional NNAPIExecutionPreference execution_preference = 4;
// Number of instances to cache for the same model (for input size
// changes). This is mandatory for getting reasonable performance in that
// case.
optional int32 no_of_nnapi_instances_to_cache = 5;
// Deprecated; use the fallback_settings in TFLiteSettings.
//
// Whether to automatically fall back to TFLite CPU path.
optional FallbackSettings fallback_settings = 6 [deprecated = true];
// Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
// 10+ when an accelerator name is not specified. The NNAPI CPU typically
// performs less well than the TfLite built-in kernels; but allowing allows a
// model to be partially accelerated which may be a win.
optional bool allow_nnapi_cpu_on_android_10_plus = 7;
optional NNAPIExecutionPriority execution_priority = 8;
// Whether to allow dynamic dimension sizes without re-compilation.
// A tensor of with dynamic dimension must have a valid dims_signature
// defined.
// Only supported in NNAPI 1.1 and newer versions.
// WARNING: Setting this flag to true may result in model being rejected by
// accelerator. This should only be enabled if the target device supports
// dynamic dimensions of the model.
// By default this is set to false.
optional bool allow_dynamic_dimensions = 9;
// Whether to allow the NNAPI accelerator to optionally use lower-precision
// float16 (16-bit floating point) arithmetic when doing calculations on
// float32 (32-bit floating point).
optional bool allow_fp16_precision_for_fp32 = 10;
}
// Which GPU backend to select. Default behaviour on Android is to try OpenCL
// and if it's not available fall back to OpenGL.
enum GPUBackend {
UNSET = 0;
OPENCL = 1;
OPENGL = 2;
// Not yet supported.
// VULKAN = 3;
// METAL = 4;
}
// GPU Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
message GPUSettings {
optional bool is_precision_loss_allowed = 1;
optional bool enable_quantized_inference = 2 [default = true];
optional GPUBackend force_backend = 3;
// TODO(b/152019007): add remaining options.
}
// Hexagon Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
message HexagonSettings {
optional int32 debug_level = 1;
optional int32 powersave_level = 2;
optional bool print_graph_profile = 3;
optional bool print_graph_debug = 4;
}
// XNNPack Delegate settings.
//
// See
// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
message XNNPackSettings {
optional int32 num_threads = 1;
}
// EdgeTPU device spec.
//
message EdgeTpuDeviceSpec {
// EdgeTPU platform types.
enum PlatformType {
MMIO = 0;
REFERENCE = 1;
SIMULATOR = 2;
REMOTE_SIMULATOR = 3;
}
// Execution platform for the EdgeTPU device.
optional PlatformType platform_type = 1;
// Number of chips to use for the EdgeTPU device.
optional int32 num_chips = 2;
// Paths to the EdgeTPU devices;
repeated string device_paths = 3;
// Chip family used by the EdgeTpu device.
optional int32 chip_family = 4;
}
// Generic definitions of EdgeTPU power states.
enum EdgeTpuPowerState {
// Undefined power state.
UNDEFINED_POWERSTATE = 0;
// TPU core is off but control cluster is on.
TPU_CORE_OFF = 1;
// A non-active low-power state that has much smaller transition time to
// active compared to off.
READY = 2;
// Minimum power active state.
ACTIVE_MIN_POWER = 3;
// Very low performance, very low power.
ACTIVE_VERY_LOW_POWER = 4;
// Low performance, low power.
ACTIVE_LOW_POWER = 5;
// The normal performance and power. This setting usually provides the
// optimal perf/power trade-off for the average use-case.
ACTIVE = 6;
// Maximum performance level. Potentially higher power and thermal. This
// setting may not be allowed in production depending on the system.
OVER_DRIVE = 7;
}
message EdgeTpuInactivePowerConfig {
// Inactive power states between inferences.
optional EdgeTpuPowerState inactive_power_state = 1;
// Inactive timeout in microseconds between inferences.
optional int64 inactive_timeout_us = 2;
}
// EdgeTPU Delegate settings.
//
message EdgeTpuSettings {
// Target inference power state for running the model.
optional EdgeTpuPowerState inference_power_state = 1;
// Inactive power states between inferences.
repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;
// Priority for the inference request.
optional int32 inference_priority = 3 [default = -1];
// Device spec for creating the EdgeTpu device.
optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;
}
// Coral Dev Board / USB accelerator delegate settings.
//
// See
// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
message CoralSettings {
enum Performance {
UNDEFINED = 0;
MAXIMUM = 1;
HIGH = 2;
MEDIUM = 3;
LOW = 4;
}
// The Edge Tpu device to be used. See
// https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
optional string device = 1;
// The desired performance level. This setting adjusts the internal clock
// rate to achieve different performance / power balance. Higher performance
// values improve speed, but increase power usage.
optional Performance performance = 2 [default = MAXIMUM];
// If true, always perform device firmware update (DFU) after reset. DFU is
// usually only necessary after power cycle.
optional bool usb_always_dfu = 3;
// The maximum bulk in queue length. Larger queue length may improve USB
// performance on the direction from device to host. When not specified (or
// zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
// current EdgeTpu Coral implementation.
optional int32 usb_max_bulk_in_queue_length = 4;
}
message CPUSettings {
optional int32 num_threads = 1;
}
// How to configure TFLite.
message TFLiteSettings {
// Which delegate to use.
optional Delegate delegate = 1;
// How to configure the chosen delegate.
// (In principle we would like to use 'oneof', but flatc turns that into an
// nested anonymous table rather than a union. See
// https://github.com/google/flatbuffers/issues/4628).
optional NNAPISettings nnapi_settings = 2;
optional GPUSettings gpu_settings = 3;
optional HexagonSettings hexagon_settings = 4;
optional XNNPackSettings xnnpack_settings = 5;
// How to configure CPU execution.
optional CPUSettings cpu_settings = 6;
// Shared delegation settings.
optional int32 max_delegated_partitions = 7;
// For configuring the EdgeTpuDelegate.
optional EdgeTpuSettings edgetpu_settings = 8;
// For configuring the Coral EdgeTpu Delegate.
optional CoralSettings coral_settings = 10;
// Whether to automatically fall back to TFLite CPU path.
optional FallbackSettings fallback_settings = 9;
}
// Whether to automatically fallback to TFLite CPU path on delegation errors.
//
// Typically fallback is enabled in production use but disabled in tests and
// benchmarks to ensure they test the intended path.
message FallbackSettings {
// Whether to allow automatically falling back to TfLite CPU path on
// compilation failure. Default is not allowing automatic fallback.
//
// This is useful in naive production usecases where the caller would prefer
// for the model to run even if it's not accelerated. More advanced users will
// implement fallback themselves; e.g., by using a different model on CPU.
//
// Note that compilation errors may occur either at initial
// ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
// resizing.
optional bool allow_automatic_fallback_on_compilation_error = 7;
// Whether to allow automatically falling back to TfLite CPU path on
// execution error. Default is not allowing automatic fallback.
//
// Experimental, use with care (only when you have complete control over the
// client code).
//
// The caveat above for compilation error holds. Additionally, execution-time
// errors are harder to handle automatically as they require invalidating the
// TfLite interpreter which most client code has not been designed to deal
// with.
optional bool allow_automatic_fallback_on_execution_error = 8;
}
// On-device mini-benchmark result storage. The following definitions are used
// to keep an append-only log of benchmark results on-device. (Hence there is
// single top-level event that is used for all data).
//
// These definitions don't need a proto-to-flatbuffer conversion, since they are
// not used for specifying configuration in the Tasks library.
// Which stage of benchmarking the event is for.
// There might be multiple events with the same type, if a benchmark is run
// multiple times.
enum BenchmarkEventType {
UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
// Benchmark start. A start without an end can be interpreted as a test that
// has crashed or hung.
START = 1;
// Benchmarking completion. A model was successfully loaded, acceleration
// configured and inference run without errors. There may still be an issue
// with correctness of results, or with performance.
END = 2;
// Benchmark was not completed due to an error. The error may be a handled
// error (e.g., failure in a delegate), or a crash.
ERROR = 3;
// Benchmark data has been sent for logging.
LOGGED = 4;
}
// A correctness metric from a benchmark, for example KL-divergence between
// known-good CPU output and on-device output. These are primarily used for
// telemetry and monitored server-side.
message BenchmarkMetric {
optional string name = 1;
repeated float values = 2 [packed = true];
}
// Outcome of a successfully complete benchmark run. This information is
// intended to both be used on-device to select best compute configuration as
// well as sent to server for monitoring.
//
// Used with event type END.
message BenchmarkResult {
// Time to load model and apply acceleration. Initialization may get run
// multiple times to get information on variance.
repeated int64 initialization_time_us = 1 [packed = true];
// Time to run inference (call Invoke()). Inference may get run multiple times
// to get information on variance.
repeated int64 inference_time_us = 2 [packed = true];
// Maximum memory used. Measures size of application heap (does not
// necessarily take into account driver-side allocation.
optional int32 max_memory_kb = 3;
// Whether the inference produced correct results (validation graph output
// 'ok' for all test inputs). Used on-device to disallow configurations that
// produce incorrect results (e.g., due to OpenCL driver bugs).
optional bool ok = 4;
// Metrics that were used to determine the 'ok' status.
repeated BenchmarkMetric metrics = 5;
}
// A handled error.
message ErrorCode {
// Which delegate the error comes from (or NONE, if it comes from the tflite
// framework).
optional Delegate source = 1;
// What the tflite level error is.
optional int32 tflite_error = 2;
// What the underlying error is (e.g., NNAPI or OpenGL error).
optional int64 underlying_api_error = 3;
}
// When during benchmark execution an error occurred.
enum BenchmarkStage {
UNKNOWN = 0;
// During model loading or delegation.
INITIALIZATION = 1;
// During inference.
INFERENCE = 2;
}
// An error that occurred during benchmarking.
//
// Used with event type ERROR.
message BenchmarkError {
// How far benchmarking got.
optional BenchmarkStage stage = 1;
// Process exit code.
optional int32 exit_code = 2;
// Signal the process received.
optional int32 signal = 3;
// Handled error.
repeated ErrorCode error_code = 4;
}
// Top-level benchmarking event stored on-device. All events for a model are
// parsed to detect the status.
message BenchmarkEvent {
// Which settings were used for benchmarking.
optional TFLiteSettings tflite_settings = 1;
// Type of the event.
optional BenchmarkEventType event_type = 2;
// Result of benchmark, used when type is END.
optional BenchmarkResult result = 3;
// Error during benchmark, used when type is ERROR.
optional BenchmarkError error = 4;
// Start timestamps. These are used for
// 1. Checking whether a test was started but not completed within a given
// deadline.
// 2. Optionally, telemetry timestamps.
optional int64 boottime_us = 5;
optional int64 wallclock_us = 6;
}