tensorflow/lite/experimental/acceleration/configuration/configuration.proto - platform/external/tensorflow - Git at Google

 // Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 // This schema defines how to configure TFLite for delegation. These
 // definitions can be used in multiple ways: as output of a compatibility list,
 // in benchmarking tools and to decouple delegate instantiation from code.
 //
 // The schema is work-in-progress, covering the most broadly used delegates and
 // options.

 syntax = "proto2";

 package tflite.proto;

 // ExecutionPreference is used to match accelerators against the preferences of
 // the current application or usecase. Some of the values here can appear both
 // in the compatibility list and as input, some only as input.
 //
 // These are separate from NNAPIExecutionPreference - the compatibility list
 // design doesn't assume a one-to-one mapping between which usecases
 // compatibility list entries have been developed for and what settings are used
 // for NNAPI.
 enum ExecutionPreference {
   // Match any selected preference. Allowlist (semantically - value is same as
   // on input).
   ANY = 0;
   // Match low latency preference. Both compatibility list and input.
   LOW_LATENCY = 1;
   // Math low power preference. Both compatibility list and input.
   LOW_POWER = 2;
   // Never accelerate. Can be used for input to compatibility list or for
   // standalone Acceleration configuration.
   FORCE_CPU = 3;
 }

 // TFLite accelerator to use.
 enum Delegate {
   NONE = 0;

   NNAPI = 1;
   GPU = 2;
   HEXAGON = 3;
   XNNPACK = 4;
   // The EdgeTpu in Pixel devices.
   EDGETPU = 5;
   // The Coral EdgeTpu Dev Board / USB accelerator.
   EDGETPU_CORAL = 6;
 }

 enum NNAPIExecutionPreference {
   // Undefined.
   UNDEFINED = 0;
   // Prefer executing in a way that minimizes battery drain.
   NNAPI_LOW_POWER = 1;
   // Prefer returning a single answer as fast as possible, even if this causes
   // more power consumption.
   NNAPI_FAST_SINGLE_ANSWER = 2;
   // Prefer maximizing the throughput of successive frames, for example when
   // processing successive frames coming from the camera.
   NNAPI_SUSTAINED_SPEED = 3;
 }

 enum NNAPIExecutionPriority {
   NNAPI_PRIORITY_UNDEFINED = 0;
   NNAPI_PRIORITY_LOW = 1;
   NNAPI_PRIORITY_MEDIUM = 2;
   NNAPI_PRIORITY_HIGH = 3;
 }

 // One possible acceleration configuration.
 message ComputeSettings {
   // Which preference to use this accelerator for.
   optional ExecutionPreference preference = 1;
   // How to configure TFLite
   optional TFLiteSettings tflite_settings = 2;
   // Identifiers to use for instrumentation and telemetry.
   optional string model_namespace_for_statistics = 3;
   optional string model_identifier_for_statistics = 4;
 }

 // NNAPI delegate settings.
 message NNAPISettings {
   // Which instance (NNAPI accelerator) to use. One driver may provide several
   // accelerators (though a driver may also hide several back-ends behind one
   // name, at the choice of the driver vendor).
   // Note that driver introspection is only available in Android Q and later.
   optional string accelerator_name = 1;

   // NNAPI model compilation caching settings to be passed to
   // tflite::StatefulNnApiDelegate
   optional string cache_directory = 2;
   optional string model_token = 3;

   // NNAPI execution preference to pass. See
   // https://developer.android.com/ndk/reference/group/neural-networks.html
   optional NNAPIExecutionPreference execution_preference = 4;

   // Number of instances to cache for the same model (for input size
   // changes). This is mandatory for getting reasonable performance in that
   // case.
   optional int32 no_of_nnapi_instances_to_cache = 5;

   // Deprecated; use the fallback_settings in TFLiteSettings.
   //
   // Whether to automatically fall back to TFLite CPU path.
   optional FallbackSettings fallback_settings = 6 [deprecated = true];

   // Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
   // 10+ when an accelerator name is not specified. The NNAPI CPU typically
   // performs less well than the TfLite built-in kernels; but allowing allows a
   // model to be partially accelerated which may be a win.
   optional bool allow_nnapi_cpu_on_android_10_plus = 7;

   optional NNAPIExecutionPriority execution_priority = 8;

   // Whether to allow dynamic dimension sizes without re-compilation.
   // A tensor of with dynamic dimension must have a valid dims_signature
   // defined.
   // Only supported in NNAPI 1.1 and newer versions.
   // WARNING: Setting this flag to true may result in model being rejected by
   // accelerator. This should only be enabled if the target device supports
   // dynamic dimensions of the model.
   // By default this is set to false.
   optional bool allow_dynamic_dimensions = 9;

   // Whether to allow the NNAPI accelerator to optionally use lower-precision
   // float16 (16-bit floating point) arithmetic when doing calculations on
   // float32 (32-bit floating point).
   optional bool allow_fp16_precision_for_fp32 = 10;
 }

 // Which GPU backend to select. Default behaviour on Android is to try OpenCL
 // and if it's not available fall back to OpenGL.
 enum GPUBackend {
   UNSET = 0;
   OPENCL = 1;
   OPENGL = 2;
   // Not yet supported.
   // VULKAN = 3;
   // METAL = 4;
 }

 // GPU Delegate settings.
 //
 // See
 // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
 message GPUSettings {
   optional bool is_precision_loss_allowed = 1;
   optional bool enable_quantized_inference = 2 [default = true];
   optional GPUBackend force_backend = 3;
   // TODO(b/152019007): add remaining options.
 }

 // Hexagon Delegate settings.
 //
 // See
 // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
 message HexagonSettings {
   optional int32 debug_level = 1;
   optional int32 powersave_level = 2;
   optional bool print_graph_profile = 3;
   optional bool print_graph_debug = 4;
 }

 // XNNPack Delegate settings.
 //
 // See
 // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
 message XNNPackSettings {
   optional int32 num_threads = 1;
 }

 // EdgeTPU device spec.
 //
 message EdgeTpuDeviceSpec {
   // EdgeTPU platform types.
   enum PlatformType {
     MMIO = 0;
     REFERENCE = 1;
     SIMULATOR = 2;
     REMOTE_SIMULATOR = 3;
   }

   // Execution platform for the EdgeTPU device.
   optional PlatformType platform_type = 1;

   // Number of chips to use for the EdgeTPU device.
   optional int32 num_chips = 2;

   // Paths to the EdgeTPU devices;
   repeated string device_paths = 3;

   // Chip family used by the EdgeTpu device.
   optional int32 chip_family = 4;
 }

 // Generic definitions of EdgeTPU power states.
 enum EdgeTpuPowerState {
   // Undefined power state.
   UNDEFINED_POWERSTATE = 0;

   // TPU core is off but control cluster is on.
   TPU_CORE_OFF = 1;

   // A non-active low-power state that has much smaller transition time to
   // active compared to off.
   READY = 2;

   // Minimum power active state.
   ACTIVE_MIN_POWER = 3;

   // Very low performance, very low power.
   ACTIVE_VERY_LOW_POWER = 4;

   // Low performance, low power.
   ACTIVE_LOW_POWER = 5;

   // The normal performance and power. This setting usually provides the
   // optimal perf/power trade-off for the average use-case.
   ACTIVE = 6;

   // Maximum performance level. Potentially higher power and thermal. This
   // setting may not be allowed in production depending on the system.
   OVER_DRIVE = 7;
 }

 message EdgeTpuInactivePowerConfig {
   // Inactive power states between inferences.
   optional EdgeTpuPowerState inactive_power_state = 1;

   // Inactive timeout in microseconds between inferences.
   optional int64 inactive_timeout_us = 2;
 }

 // EdgeTPU Delegate settings.
 //
 message EdgeTpuSettings {
   // Target inference power state for running the model.
   optional EdgeTpuPowerState inference_power_state = 1;

   // Inactive power states between inferences.
   repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;

   // Priority for the inference request.
   optional int32 inference_priority = 3 [default = -1];

   // Device spec for creating the EdgeTpu device.
   optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;
 }

 // Coral Dev Board / USB accelerator delegate settings.
 //
 // See
 // https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
 message CoralSettings {
   enum Performance {
     UNDEFINED = 0;
     MAXIMUM = 1;
     HIGH = 2;
     MEDIUM = 3;
     LOW = 4;
   }

   // The Edge Tpu device to be used. See
   // https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
   optional string device = 1;
   // The desired performance level. This setting adjusts the internal clock
   // rate to achieve different performance / power balance. Higher performance
   // values improve speed, but increase power usage.
   optional Performance performance = 2 [default = MAXIMUM];
   // If true, always perform device firmware update (DFU) after reset. DFU is
   // usually only necessary after power cycle.
   optional bool usb_always_dfu = 3;
   // The maximum bulk in queue length. Larger queue length may improve USB
   // performance on the direction from device to host. When not specified (or
   // zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
   // current EdgeTpu Coral implementation.
   optional int32 usb_max_bulk_in_queue_length = 4;
 }

 message CPUSettings {
   optional int32 num_threads = 1;
 }

 // How to configure TFLite.
 message TFLiteSettings {
   // Which delegate to use.
   optional Delegate delegate = 1;

   // How to configure the chosen delegate.
   // (In principle we would like to use 'oneof', but flatc turns that into an
   // nested anonymous table rather than a union. See
   // https://github.com/google/flatbuffers/issues/4628).
   optional NNAPISettings nnapi_settings = 2;
   optional GPUSettings gpu_settings = 3;
   optional HexagonSettings hexagon_settings = 4;
   optional XNNPackSettings xnnpack_settings = 5;

   // How to configure CPU execution.
   optional CPUSettings cpu_settings = 6;

   // Shared delegation settings.
   optional int32 max_delegated_partitions = 7;

   // For configuring the EdgeTpuDelegate.
   optional EdgeTpuSettings edgetpu_settings = 8;

   // For configuring the Coral EdgeTpu Delegate.
   optional CoralSettings coral_settings = 10;

   // Whether to automatically fall back to TFLite CPU path.
   optional FallbackSettings fallback_settings = 9;
 }

 // Whether to automatically fallback to TFLite CPU path on delegation errors.
 //
 // Typically fallback is enabled in production use but disabled in tests and
 // benchmarks to ensure they test the intended path.
 message FallbackSettings {
   // Whether to allow automatically falling back to TfLite CPU path on
   // compilation failure. Default is not allowing automatic fallback.
   //
   // This is useful in naive production usecases where the caller would prefer
   // for the model to run even if it's not accelerated. More advanced users will
   // implement fallback themselves; e.g., by using a different model on CPU.
   //
   // Note that compilation errors may occur either at initial
   // ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
   // resizing.
   optional bool allow_automatic_fallback_on_compilation_error = 7;
   // Whether to allow automatically falling back to TfLite CPU path on
   // execution error. Default is not allowing automatic fallback.
   //
   // Experimental, use with care (only when you have complete control over the
   // client code).
   //
   // The caveat above for compilation error holds.  Additionally, execution-time
   // errors are harder to handle automatically as they require invalidating the
   // TfLite interpreter which most client code has not been designed to deal
   // with.
   optional bool allow_automatic_fallback_on_execution_error = 8;
 }

 // On-device mini-benchmark result storage. The following definitions are used
 // to keep an append-only log of benchmark results on-device. (Hence there is
 // single top-level event that is used for all data).
 //
 // These definitions don't need a proto-to-flatbuffer conversion, since they are
 // not used for specifying configuration in the Tasks library.

 // Which stage of benchmarking the event is for.
 // There might be multiple events with the same type, if a benchmark is run
 // multiple times.
 enum BenchmarkEventType {
   UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
   // Benchmark start. A start without an end can be interpreted as a test that
   // has crashed or hung.
   START = 1;
   // Benchmarking completion. A model was successfully loaded, acceleration
   // configured and inference run without errors. There may still be an issue
   // with correctness of results, or with performance.
   END = 2;
   // Benchmark was not completed due to an error. The error may be a handled
   // error (e.g., failure in a delegate), or a crash.
   ERROR = 3;
   // Benchmark data has been sent for logging.
   LOGGED = 4;
 }

 // A correctness metric from a benchmark, for example KL-divergence between
 // known-good CPU output and on-device output. These are primarily used for
 // telemetry and monitored server-side.
 message BenchmarkMetric {
   optional string name = 1;
   repeated float values = 2 [packed = true];
 }

 // Outcome of a successfully complete benchmark run. This information is
 // intended to both be used on-device to select best compute configuration as
 // well as sent to server for monitoring.
 //
 // Used with event type END.
 message BenchmarkResult {
   // Time to load model and apply acceleration. Initialization may get run
   // multiple times to get information on variance.
   repeated int64 initialization_time_us = 1 [packed = true];
   // Time to run inference (call Invoke()). Inference may get run multiple times
   // to get information on variance.
   repeated int64 inference_time_us = 2 [packed = true];
   // Maximum memory used. Measures size of application heap (does not
   // necessarily take into account driver-side allocation.
   optional int32 max_memory_kb = 3;
   // Whether the inference produced correct results (validation graph output
   // 'ok' for all test inputs). Used on-device to disallow configurations that
   // produce incorrect results (e.g., due to OpenCL driver bugs).
   optional bool ok = 4;
   // Metrics that were used to determine the 'ok' status.
   repeated BenchmarkMetric metrics = 5;
 }

 // A handled error.
 message ErrorCode {
   // Which delegate the error comes from (or NONE, if it comes from the tflite
   // framework).
   optional Delegate source = 1;
   // What the tflite level error is.
   optional int32 tflite_error = 2;
   // What the underlying error is (e.g., NNAPI or OpenGL error).
   optional int64 underlying_api_error = 3;
 }

 // When during benchmark execution an error occurred.
 enum BenchmarkStage {
   UNKNOWN = 0;
   // During model loading or delegation.
   INITIALIZATION = 1;
   // During inference.
   INFERENCE = 2;
 }

 // An error that occurred during benchmarking.
 //
 // Used with event type ERROR.
 message BenchmarkError {
   // How far benchmarking got.
   optional BenchmarkStage stage = 1;
   // Process exit code.
   optional int32 exit_code = 2;
   // Signal the process received.
   optional int32 signal = 3;
   // Handled error.
   repeated ErrorCode error_code = 4;
 }

 // Top-level benchmarking event stored on-device. All events for a model are
 // parsed to detect the status.
 message BenchmarkEvent {
   // Which settings were used for benchmarking.
   optional TFLiteSettings tflite_settings = 1;
   // Type of the event.
   optional BenchmarkEventType event_type = 2;
   // Result of benchmark, used when type is END.
   optional BenchmarkResult result = 3;
   // Error during benchmark, used when type is ERROR.
   optional BenchmarkError error = 4;
   // Start timestamps. These are used for
   // 1. Checking whether a test was started but not completed within a given
   // deadline.
   // 2. Optionally, telemetry timestamps.
   optional int64 boottime_us = 5;
   optional int64 wallclock_us = 6;
 }
	// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	// This schema defines how to configure TFLite for delegation. These
	// definitions can be used in multiple ways: as output of a compatibility list,
	// in benchmarking tools and to decouple delegate instantiation from code.
	//
	// The schema is work-in-progress, covering the most broadly used delegates and
	// options.

	syntax = "proto2";

	package tflite.proto;

	// ExecutionPreference is used to match accelerators against the preferences of
	// the current application or usecase. Some of the values here can appear both
	// in the compatibility list and as input, some only as input.
	//
	// These are separate from NNAPIExecutionPreference - the compatibility list
	// design doesn't assume a one-to-one mapping between which usecases
	// compatibility list entries have been developed for and what settings are used
	// for NNAPI.
	enum ExecutionPreference {
	// Match any selected preference. Allowlist (semantically - value is same as
	// on input).
	ANY = 0;
	// Match low latency preference. Both compatibility list and input.
	LOW_LATENCY = 1;
	// Math low power preference. Both compatibility list and input.
	LOW_POWER = 2;
	// Never accelerate. Can be used for input to compatibility list or for
	// standalone Acceleration configuration.
	FORCE_CPU = 3;
	}

	// TFLite accelerator to use.
	enum Delegate {
	NONE = 0;

	NNAPI = 1;
	GPU = 2;
	HEXAGON = 3;
	XNNPACK = 4;
	// The EdgeTpu in Pixel devices.
	EDGETPU = 5;
	// The Coral EdgeTpu Dev Board / USB accelerator.
	EDGETPU_CORAL = 6;
	}

	enum NNAPIExecutionPreference {
	// Undefined.
	UNDEFINED = 0;
	// Prefer executing in a way that minimizes battery drain.
	NNAPI_LOW_POWER = 1;
	// Prefer returning a single answer as fast as possible, even if this causes
	// more power consumption.
	NNAPI_FAST_SINGLE_ANSWER = 2;
	// Prefer maximizing the throughput of successive frames, for example when
	// processing successive frames coming from the camera.
	NNAPI_SUSTAINED_SPEED = 3;
	}

	enum NNAPIExecutionPriority {
	NNAPI_PRIORITY_UNDEFINED = 0;
	NNAPI_PRIORITY_LOW = 1;
	NNAPI_PRIORITY_MEDIUM = 2;
	NNAPI_PRIORITY_HIGH = 3;
	}

	// One possible acceleration configuration.
	message ComputeSettings {
	// Which preference to use this accelerator for.
	optional ExecutionPreference preference = 1;
	// How to configure TFLite
	optional TFLiteSettings tflite_settings = 2;
	// Identifiers to use for instrumentation and telemetry.
	optional string model_namespace_for_statistics = 3;
	optional string model_identifier_for_statistics = 4;
	}

	// NNAPI delegate settings.
	message NNAPISettings {
	// Which instance (NNAPI accelerator) to use. One driver may provide several
	// accelerators (though a driver may also hide several back-ends behind one
	// name, at the choice of the driver vendor).
	// Note that driver introspection is only available in Android Q and later.
	optional string accelerator_name = 1;

	// NNAPI model compilation caching settings to be passed to
	// tflite::StatefulNnApiDelegate
	optional string cache_directory = 2;
	optional string model_token = 3;

	// NNAPI execution preference to pass. See
	// https://developer.android.com/ndk/reference/group/neural-networks.html
	optional NNAPIExecutionPreference execution_preference = 4;

	// Number of instances to cache for the same model (for input size
	// changes). This is mandatory for getting reasonable performance in that
	// case.
	optional int32 no_of_nnapi_instances_to_cache = 5;

	// Deprecated; use the fallback_settings in TFLiteSettings.
	//
	// Whether to automatically fall back to TFLite CPU path.
	optional FallbackSettings fallback_settings = 6 [deprecated = true];

	// Whether to allow use of NNAPI CPU (nnapi-reference accelerator) on Android
	// 10+ when an accelerator name is not specified. The NNAPI CPU typically
	// performs less well than the TfLite built-in kernels; but allowing allows a
	// model to be partially accelerated which may be a win.
	optional bool allow_nnapi_cpu_on_android_10_plus = 7;

	optional NNAPIExecutionPriority execution_priority = 8;

	// Whether to allow dynamic dimension sizes without re-compilation.
	// A tensor of with dynamic dimension must have a valid dims_signature
	// defined.
	// Only supported in NNAPI 1.1 and newer versions.
	// WARNING: Setting this flag to true may result in model being rejected by
	// accelerator. This should only be enabled if the target device supports
	// dynamic dimensions of the model.
	// By default this is set to false.
	optional bool allow_dynamic_dimensions = 9;

	// Whether to allow the NNAPI accelerator to optionally use lower-precision
	// float16 (16-bit floating point) arithmetic when doing calculations on
	// float32 (32-bit floating point).
	optional bool allow_fp16_precision_for_fp32 = 10;
	}

	// Which GPU backend to select. Default behaviour on Android is to try OpenCL
	// and if it's not available fall back to OpenGL.
	enum GPUBackend {
	UNSET = 0;
	OPENCL = 1;
	OPENGL = 2;
	// Not yet supported.
	// VULKAN = 3;
	// METAL = 4;
	}

	// GPU Delegate settings.
	//
	// See
	// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/gpu/delegate.h
	message GPUSettings {
	optional bool is_precision_loss_allowed = 1;
	optional bool enable_quantized_inference = 2 [default = true];
	optional GPUBackend force_backend = 3;
	// TODO(b/152019007): add remaining options.
	}

	// Hexagon Delegate settings.
	//
	// See
	// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/hexagon/hexagon_delegate.h
	message HexagonSettings {
	optional int32 debug_level = 1;
	optional int32 powersave_level = 2;
	optional bool print_graph_profile = 3;
	optional bool print_graph_debug = 4;
	}

	// XNNPack Delegate settings.
	//
	// See
	// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h
	message XNNPackSettings {
	optional int32 num_threads = 1;
	}

	// EdgeTPU device spec.
	//
	message EdgeTpuDeviceSpec {
	// EdgeTPU platform types.
	enum PlatformType {
	MMIO = 0;
	REFERENCE = 1;
	SIMULATOR = 2;
	REMOTE_SIMULATOR = 3;
	}

	// Execution platform for the EdgeTPU device.
	optional PlatformType platform_type = 1;

	// Number of chips to use for the EdgeTPU device.
	optional int32 num_chips = 2;

	// Paths to the EdgeTPU devices;
	repeated string device_paths = 3;

	// Chip family used by the EdgeTpu device.
	optional int32 chip_family = 4;
	}

	// Generic definitions of EdgeTPU power states.
	enum EdgeTpuPowerState {
	// Undefined power state.
	UNDEFINED_POWERSTATE = 0;

	// TPU core is off but control cluster is on.
	TPU_CORE_OFF = 1;

	// A non-active low-power state that has much smaller transition time to
	// active compared to off.
	READY = 2;

	// Minimum power active state.
	ACTIVE_MIN_POWER = 3;

	// Very low performance, very low power.
	ACTIVE_VERY_LOW_POWER = 4;

	// Low performance, low power.
	ACTIVE_LOW_POWER = 5;

	// The normal performance and power. This setting usually provides the
	// optimal perf/power trade-off for the average use-case.
	ACTIVE = 6;

	// Maximum performance level. Potentially higher power and thermal. This
	// setting may not be allowed in production depending on the system.
	OVER_DRIVE = 7;
	}

	message EdgeTpuInactivePowerConfig {
	// Inactive power states between inferences.
	optional EdgeTpuPowerState inactive_power_state = 1;

	// Inactive timeout in microseconds between inferences.
	optional int64 inactive_timeout_us = 2;
	}

	// EdgeTPU Delegate settings.
	//
	message EdgeTpuSettings {
	// Target inference power state for running the model.
	optional EdgeTpuPowerState inference_power_state = 1;

	// Inactive power states between inferences.
	repeated EdgeTpuInactivePowerConfig inactive_power_configs = 2;

	// Priority for the inference request.
	optional int32 inference_priority = 3 [default = -1];

	// Device spec for creating the EdgeTpu device.
	optional EdgeTpuDeviceSpec edgetpu_device_spec = 4;
	}

	// Coral Dev Board / USB accelerator delegate settings.
	//
	// See
	// https://github.com/google-coral/edgetpu/blob/master/libedgetpu/edgetpu_c.h
	message CoralSettings {
	enum Performance {
	UNDEFINED = 0;
	MAXIMUM = 1;
	HIGH = 2;
	MEDIUM = 3;
	LOW = 4;
	}

	// The Edge Tpu device to be used. See
	// https://github.com/google-coral/libcoral/blob/982426546dfa10128376d0c24fd8a8b161daac97/coral/tflite_utils.h#L131-L137
	optional string device = 1;
	// The desired performance level. This setting adjusts the internal clock
	// rate to achieve different performance / power balance. Higher performance
	// values improve speed, but increase power usage.
	optional Performance performance = 2 [default = MAXIMUM];
	// If true, always perform device firmware update (DFU) after reset. DFU is
	// usually only necessary after power cycle.
	optional bool usb_always_dfu = 3;
	// The maximum bulk in queue length. Larger queue length may improve USB
	// performance on the direction from device to host. When not specified (or
	// zero), `usb_max_bulk_in_queue_length` will default to 32 according to the
	// current EdgeTpu Coral implementation.
	optional int32 usb_max_bulk_in_queue_length = 4;
	}

	message CPUSettings {
	optional int32 num_threads = 1;
	}

	// How to configure TFLite.
	message TFLiteSettings {
	// Which delegate to use.
	optional Delegate delegate = 1;

	// How to configure the chosen delegate.
	// (In principle we would like to use 'oneof', but flatc turns that into an
	// nested anonymous table rather than a union. See
	// https://github.com/google/flatbuffers/issues/4628).
	optional NNAPISettings nnapi_settings = 2;
	optional GPUSettings gpu_settings = 3;
	optional HexagonSettings hexagon_settings = 4;
	optional XNNPackSettings xnnpack_settings = 5;

	// How to configure CPU execution.
	optional CPUSettings cpu_settings = 6;

	// Shared delegation settings.
	optional int32 max_delegated_partitions = 7;

	// For configuring the EdgeTpuDelegate.
	optional EdgeTpuSettings edgetpu_settings = 8;

	// For configuring the Coral EdgeTpu Delegate.
	optional CoralSettings coral_settings = 10;

	// Whether to automatically fall back to TFLite CPU path.
	optional FallbackSettings fallback_settings = 9;
	}

	// Whether to automatically fallback to TFLite CPU path on delegation errors.
	//
	// Typically fallback is enabled in production use but disabled in tests and
	// benchmarks to ensure they test the intended path.
	message FallbackSettings {
	// Whether to allow automatically falling back to TfLite CPU path on
	// compilation failure. Default is not allowing automatic fallback.
	//
	// This is useful in naive production usecases where the caller would prefer
	// for the model to run even if it's not accelerated. More advanced users will
	// implement fallback themselves; e.g., by using a different model on CPU.
	//
	// Note that compilation errors may occur either at initial
	// ModifyGraphWithDelegate() time, or when calling AllocateTensors() after
	// resizing.
	optional bool allow_automatic_fallback_on_compilation_error = 7;
	// Whether to allow automatically falling back to TfLite CPU path on
	// execution error. Default is not allowing automatic fallback.
	//
	// Experimental, use with care (only when you have complete control over the
	// client code).
	//
	// The caveat above for compilation error holds. Additionally, execution-time
	// errors are harder to handle automatically as they require invalidating the
	// TfLite interpreter which most client code has not been designed to deal
	// with.
	optional bool allow_automatic_fallback_on_execution_error = 8;
	}

	// On-device mini-benchmark result storage. The following definitions are used
	// to keep an append-only log of benchmark results on-device. (Hence there is
	// single top-level event that is used for all data).
	//
	// These definitions don't need a proto-to-flatbuffer conversion, since they are
	// not used for specifying configuration in the Tasks library.

	// Which stage of benchmarking the event is for.
	// There might be multiple events with the same type, if a benchmark is run
	// multiple times.
	enum BenchmarkEventType {
	UNDEFINED_BENCHMARK_EVENT_TYPE = 0;
	// Benchmark start. A start without an end can be interpreted as a test that
	// has crashed or hung.
	START = 1;
	// Benchmarking completion. A model was successfully loaded, acceleration
	// configured and inference run without errors. There may still be an issue
	// with correctness of results, or with performance.
	END = 2;
	// Benchmark was not completed due to an error. The error may be a handled
	// error (e.g., failure in a delegate), or a crash.
	ERROR = 3;
	// Benchmark data has been sent for logging.
	LOGGED = 4;
	}

	// A correctness metric from a benchmark, for example KL-divergence between
	// known-good CPU output and on-device output. These are primarily used for
	// telemetry and monitored server-side.
	message BenchmarkMetric {
	optional string name = 1;
	repeated float values = 2 [packed = true];
	}

	// Outcome of a successfully complete benchmark run. This information is
	// intended to both be used on-device to select best compute configuration as
	// well as sent to server for monitoring.
	//
	// Used with event type END.
	message BenchmarkResult {
	// Time to load model and apply acceleration. Initialization may get run
	// multiple times to get information on variance.
	repeated int64 initialization_time_us = 1 [packed = true];
	// Time to run inference (call Invoke()). Inference may get run multiple times
	// to get information on variance.
	repeated int64 inference_time_us = 2 [packed = true];
	// Maximum memory used. Measures size of application heap (does not
	// necessarily take into account driver-side allocation.
	optional int32 max_memory_kb = 3;
	// Whether the inference produced correct results (validation graph output
	// 'ok' for all test inputs). Used on-device to disallow configurations that
	// produce incorrect results (e.g., due to OpenCL driver bugs).
	optional bool ok = 4;
	// Metrics that were used to determine the 'ok' status.
	repeated BenchmarkMetric metrics = 5;
	}

	// A handled error.
	message ErrorCode {
	// Which delegate the error comes from (or NONE, if it comes from the tflite
	// framework).
	optional Delegate source = 1;
	// What the tflite level error is.
	optional int32 tflite_error = 2;
	// What the underlying error is (e.g., NNAPI or OpenGL error).
	optional int64 underlying_api_error = 3;
	}

	// When during benchmark execution an error occurred.
	enum BenchmarkStage {
	UNKNOWN = 0;
	// During model loading or delegation.
	INITIALIZATION = 1;
	// During inference.
	INFERENCE = 2;
	}

	// An error that occurred during benchmarking.
	//
	// Used with event type ERROR.
	message BenchmarkError {
	// How far benchmarking got.
	optional BenchmarkStage stage = 1;
	// Process exit code.
	optional int32 exit_code = 2;
	// Signal the process received.
	optional int32 signal = 3;
	// Handled error.
	repeated ErrorCode error_code = 4;
	}

	// Top-level benchmarking event stored on-device. All events for a model are
	// parsed to detect the status.
	message BenchmarkEvent {
	// Which settings were used for benchmarking.
	optional TFLiteSettings tflite_settings = 1;
	// Type of the event.
	optional BenchmarkEventType event_type = 2;
	// Result of benchmark, used when type is END.
	optional BenchmarkResult result = 3;
	// Error during benchmark, used when type is ERROR.
	optional BenchmarkError error = 4;
	// Start timestamps. These are used for
	// 1. Checking whether a test was started but not completed within a given
	// deadline.
	// 2. Optionally, telemetry timestamps.
	optional int64 boottime_us = 5;
	optional int64 wallclock_us = 6;
	}