Implement NNAPI QoS APIs in NNAPI delegate.
PiperOrigin-RevId: 311804298
Change-Id: Ia018050ca90fbc2cc12f363b5bc52727734e4abf
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index e6faea6..39ab19a 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -3256,6 +3256,22 @@
RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
"configuring NNAPI caching", nnapi_errno);
}
+ // Set compilation timeout if applicable.
+ if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
+ if (delegate_options.max_compilation_timeout_duration_ns > 0) {
+ RETURN_TFLITE_ERROR_IF_NN_ERROR(
+ context,
+ nnapi_->ANeuralNetworksCompilation_setTimeout(
+ compilation,
+ delegate_options.max_compilation_timeout_duration_ns),
+ "setting compilation timeout", nnapi_errno);
+ }
+ RETURN_TFLITE_ERROR_IF_NN_ERROR(
+ context,
+ nnapi_->ANeuralNetworksCompilation_setPriority(
+ compilation, delegate_options.execution_priority),
+ "setting compilation priority", nnapi_errno);
+ }
const int finish_result =
nnapi_->ANeuralNetworksCompilation_finish(compilation);
if (finish_result != ANEURALNETWORKS_NO_ERROR) {
@@ -3322,6 +3338,27 @@
std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
execution_unique_ptr(execution, NNFreeExecution(nnapi_));
+ // Set compilation timeout if applicable.
+ const auto delegate_options =
+ StatefulNnApiDelegate::GetOptions(node->delegate);
+ if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
+ if (delegate_options.max_execution_timeout_duration_ns > 0) {
+ RETURN_TFLITE_ERROR_IF_NN_ERROR(
+ context,
+ nnapi_->ANeuralNetworksExecution_setTimeout(
+ execution, delegate_options.max_execution_timeout_duration_ns),
+ "setting execution timeout", nnapi_errno);
+ }
+ if (delegate_options.max_execution_loop_timeout_duration_ns > 0) {
+ RETURN_TFLITE_ERROR_IF_NN_ERROR(
+ context,
+ nnapi_->ANeuralNetworksExecution_setLoopTimeout(
+ execution,
+ delegate_options.max_execution_loop_timeout_duration_ns),
+ "setting execution loop timeout", nnapi_errno);
+ }
+ }
+
// Set the input tensor buffers. Note: we access tflite tensors using
// absolute indices but NN api indices inputs by relative indices.
int relative_input_index = 0;
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
index b94c6d6..68c55e1 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@@ -22,6 +22,7 @@
#include "absl/types/optional.h"
#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
#include "tensorflow/lite/nnapi/nnapi_implementation.h"
typedef struct ANeuralNetworksMemory ANeuralNetworksMemory;
@@ -92,6 +93,30 @@
// allow fp32 compuation to be run in fp16.
bool allow_fp16 = false;
+
+ // Specifies the relative priority for executions of the model.
+ // Available values are {ANEURALNETWORKS_PRIORITY_LOW,
+ // ANEURALNETWORKS_PRIORITY_MEDIUM, ANEURALNETWORKS_PRIORITY_HIGH,
+ // ANEURALNETWORKS_PRIORITY_DEFAULT}.
+ int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
+
+ // Specifies the maximum expected duration in nanosecond for compiling the
+ // model. If the device is not able to complete the compilation within the
+ // specified duration, the compilation may be aborted. If set to 0, the
+ // timeout duration is considered infinite.
+ uint64_t max_compilation_timeout_duration_ns = 0;
+
+ // Specifies the maximum expected duration in nanosecond for executing the
+ // model. If the device is not able to complete the execution within the
+ // specified duration, the execution may be aborted. If set to 0, the
+ // timeout duration is considered infinite.
+ uint64_t max_execution_timeout_duration_ns = 0;
+
+ // Specifies the maximum expected duration in nanosecond for WHILE loops in
+ // the execution. If a WHILE loop condition model does not output false
+ // within the specified duration, the execution will be aborted. If set to
+ // 0, the default timeout for loops will be used.
+ uint64_t max_execution_loop_timeout_duration_ns = 0;
};
// Uses default options.
@@ -189,6 +214,17 @@
int max_number_delegated_partitions;
// allow fp32 computation to be run in fp16.
bool allow_fp16;
+ // Specifies the relative priority for executions of the model.
+ int execution_priority = ANEURALNETWORKS_PRIORITY_DEFAULT;
+ // Specifies the maximum expected duration in nanosecond for compiling the
+ // model.
+ uint64_t max_compilation_timeout_duration_ns = 0;
+ // Specifies the maximum expected duration in nanosecond for executing the
+ // model.
+ uint64_t max_execution_timeout_duration_ns = 0;
+ // Specifies the maximum expected duration in nanosecond for WHILE loops in
+ // the execution
+ uint64_t max_execution_loop_timeout_duration_ns = 0;
~Data();
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
index ea9111c..acfa0c7 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
@@ -304,6 +304,23 @@
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
}
+// Sanity check for the state-ful NNAPI delegate with QoS hints.
+TEST(NNAPIDelegate, StatefulDelegateWithQoS) {
+ StatefulNnApiDelegate::Options options;
+ options.execution_priority = ANEURALNETWORKS_PRIORITY_HIGH;
+ options.max_compilation_timeout_duration_ns = UINT64_MAX;
+ options.max_execution_timeout_duration_ns = UINT64_MAX;
+ options.max_execution_loop_timeout_duration_ns = UINT64_MAX;
+
+ FloatAddOpModel m(options, {TensorType_FLOAT32, {1, 2, 2, 1}},
+ {TensorType_FLOAT32, {1, 2, 2, 1}},
+ {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+ m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
+ m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
+}
+
// Sanity check for the state-ful NNAPI delegate using TfLiteBufferHandle.
TEST(NNAPIDelegate, StatefulDelegateWithBufferHandles) {
// Skip the test if Android specific functions could not be found.
diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
index a3dfd37..6739838 100644
--- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h
+++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
@@ -216,6 +216,18 @@
};
/**
+ * Relative execution priority.
+ *
+ * Available since API level 30.
+ */
+enum {
+ ANEURALNETWORKS_PRIORITY_LOW = 90,
+ ANEURALNETWORKS_PRIORITY_MEDIUM = 100,
+ ANEURALNETWORKS_PRIORITY_HIGH = 110,
+ ANEURALNETWORKS_PRIORITY_DEFAULT = ANEURALNETWORKS_PRIORITY_MEDIUM,
+};
+
+/**
* ANeuralNetworksMemory is an opaque type that represents memory.
*
* This type is used to represent shared memory, memory mapped files,
@@ -528,9 +540,21 @@
ANeuralNetworksCompilation* compilation, const char* cacheDir,
const uint8_t* token);
+typedef int (*ANeuralNetworksCompilation_setTimeout_fn)(
+ ANeuralNetworksCompilation* compilation, uint64_t duration);
+
+typedef int (*ANeuralNetworksCompilation_setPriority_fn)(
+ ANeuralNetworksCompilation* compilation, int priority);
+
typedef int (*ANeuralNetworksExecution_compute_fn)(
ANeuralNetworksExecution* execution);
+typedef int (*ANeuralNetworksExecution_setTimeout_fn)(
+ ANeuralNetworksExecution* execution, uint64_t duration);
+
+typedef int (*ANeuralNetworksExecution_setLoopTimeout_fn)(
+ ANeuralNetworksExecution* execution, uint64_t duration);
+
typedef int (*ANeuralNetworksExecution_getOutputOperandRank_fn)(
ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank);
diff --git a/tensorflow/lite/nnapi/nnapi_implementation.cc b/tensorflow/lite/nnapi/nnapi_implementation.cc
index accdfb6..ad5869f 100644
--- a/tensorflow/lite/nnapi/nnapi_implementation.cc
+++ b/tensorflow/lite/nnapi/nnapi_implementation.cc
@@ -215,6 +215,17 @@
ANeuralNetworksModel_getExtensionOperationType);
LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
ANeuralNetworksModel_setOperandExtensionData);
+
+ // API 30 (NNAPI 1.3) methods.
+ LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+ ANeuralNetworksCompilation_setTimeout);
+ LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+ ANeuralNetworksCompilation_setPriority);
+ LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+ ANeuralNetworksExecution_setTimeout);
+ LOAD_FUNCTION_OPTIONAL(libneuralnetworks,
+ ANeuralNetworksExecution_setLoopTimeout);
+
return nnapi;
}
diff --git a/tensorflow/lite/nnapi/nnapi_implementation.h b/tensorflow/lite/nnapi/nnapi_implementation.h
index a27f5ba..abee0fb 100644
--- a/tensorflow/lite/nnapi/nnapi_implementation.h
+++ b/tensorflow/lite/nnapi/nnapi_implementation.h
@@ -790,6 +790,76 @@
const uint8_t* token);
/**
+ * Set the maximum expected duration for compiling the model.
+ *
+ * If the device is not able to complete the compilation within the specified
+ * duration, the compilation may be aborted. The timeout duration begins at
+ * the call to {@link ANeuralNetworksCompilation_finish}.
+ *
+ * This timeout duration acts as a hint to drivers, and can be used to both
+ * free up compute resources within the driver and return control back to the
+ * application quicker than is possible without the hint. It enables drivers
+ * that are able to estimate how long a compilation will take to abort the
+ * compilation before it has even started if the driver believes the
+ * compilation cannot be completed within the timeout duration. Similarly, it
+ * enables drivers to abort an ongoing compilation if it is taking too long.
+ * However, this call does not guarantee that the compilation will complete or
+ * abort within the timeout duration.
+ *
+ * By default (i.e., unless ANeuralNetworksCompilation_setTimeout is called),
+ * the timeout duration for compiling the model is considered infinite.
+ *
+ * The {@link ANeuralNetworksCompilation} must have been created with
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+ * device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
+ * the timeout duration hint will be ignored.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * @param compilation The compilation to be modified.
+ * @param duration The maximum amount of time in nanoseconds that is expected
+ * to be spent finishing a compilation. If this duration is exceeded, the
+ * compilation may be aborted. If set to 0, the timeout duration is
+ * considered infinite.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+ int (*ANeuralNetworksCompilation_setTimeout)(
+ ANeuralNetworksCompilation* compilation, uint64_t duration);
+
+ /**
+ * Set the execution priority.
+ *
+ * Execution priorities are relative to other executions created by the same
+ * application (specifically same uid) for the same device. Specifically,
+ * priorities of executions from one application will not affect executions
+ * from another application. Similarly, priorities of executions on one device
+ * will not affect executions on another device.
+ *
+ * Higher priority executions may use more compute resources than lower
+ * priority executions, and may preempt or starve lower priority executions.
+ *
+ * See {@link ANeuralNetworksCompilation} for information on multithreaded
+ * usage.
+ *
+ * Available since API level 30.
+ *
+ * @param compilation The compilation to be modified.
+ * @param priority The relative priority of the execution compared to other
+ * executions created by the application. Must be one of
+ * ANEURALNETWORKS_PRIORITY_*.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ */
+ int (*ANeuralNetworksCompilation_setPriority)(
+ ANeuralNetworksCompilation* compilation, int priority);
+
+ /**
* Schedule synchronous evaluation of the execution.
*
* <p>Schedules synchronous evaluation of the execution. Returns once the
@@ -814,6 +884,84 @@
int (*ANeuralNetworksExecution_compute)(ANeuralNetworksExecution* execution);
/**
+ * Set the maximum expected duration of the specified execution.
+ *
+ * If the device is not able to complete the execution within the specified
+ * duration, the execution may be aborted. The timeout duration begins at a
+ * call to one of:
+ * - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
+ *
+ * This timeout duration acts as a hint to drivers, and can be used to both
+ * free up compute resources within the driver and return control back to the
+ * application quicker than is possible without the hint. It enables drivers
+ * that are able to estimate how long an execution will take to abort the
+ * execution before it has even started if the driver believes the execution
+ * cannot be completed within the timeout duration. Similarly, it enables
+ * drivers to abort an ongoing execution if it is taking too long. However,
+ * this call does not guarantee that the execution will complete or abort
+ * within the timeout duration.
+ *
+ * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
+ * the timeout duration for execution is considered infinite.
+ *
+ * The {@link ANeuralNetworksExecution} must have been created from an
+ * {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1,
+ * otherwise this function will fail with ANEURALNETWORKS_BAD_DATA. If the
+ * device has a feature level reported by
+ * {@link ANeuralNetworksDevice_getFeatureLevel} that is lower than 30, then
+ * the timeout duration hint will be ignored.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded
+ * usage.
+ *
+ * @param execution The execution to be modified.
+ * @param duration The maximum amount of time in nanoseconds that is expected
+ * to be spent executing a model. If this duration is exceeded, the execution
+ * may be aborted. If set to 0, the timeout duration is considered
+ * infinite.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ *
+ * Available since API level 30.
+ */
+ int (*ANeuralNetworksExecution_setTimeout)(
+ ANeuralNetworksExecution* execution, uint64_t duration);
+
+ /**
+ * Set the maximum duration of WHILE loops in the specified execution.
+ *
+ * This is a fuzzy per-loop timeout intended to prevent infinite loops.
+ *
+ * If a WHILE loop condition model does not output false within the specified
+ * duration, the execution will be aborted.
+ *
+ * See {@link ANeuralNetworks_getDefaultLoopTimeout} and
+ * {@link ANeuralNetworks_getMaximumLoopTimeout} for the default
+ * and maximum timeout values.
+ *
+ * See {@link ANeuralNetworksExecution} for information on multithreaded
+ * usage.
+ *
+ * @param execution The execution to be modified.
+ * @param duration The maximum amount of time in nanoseconds that can be spent
+ * executing a WHILE loop. If the specified duration value exceeds the
+ * value produced by {@link ANeuralNetworks_getMaximumLoopTimeout}, it will be
+ * overridden by that value.
+ *
+ * @return ANEURALNETWORKS_NO_ERROR if successful.
+ * ANEURALNETWORKS_BAD_STATE if execution has started.
+ * ANEURALNETWORKS_UNEXPECTED_NULL if execution is NULL.
+ *
+ * Available since API level 30.
+ */
+ int (*ANeuralNetworksExecution_setLoopTimeout)(
+ ANeuralNetworksExecution* execution, uint64_t duration);
+
+ /**
* Get the dimensional information of the specified output operand of the
* model of the
* {@link ANeuralNetworksExecution}.