Update sync fence related APIs
- Allow ANeuralNetworksExecution_startComputeWithDependencies
to measure execution duration after all dependencies are done.
- Allow ANeuralNetworksExecution_startComputeWithDependencies
to specify post-gate-release timeout duration.
Bug: 142778241
Bug: 136739795
Test: mm
Test: NNAPI CTS & VTS tests
Change-Id: Ie32e30b7d1cb98882f3084c1741e975d3e39d970
Merged-In: Ie32e30b7d1cb98882f3084c1741e975d3e39d970
(cherry picked from commit 6a0c2ed41976ee4b57a97e670f11d1c6c25a8df6)
diff --git a/common/include/HalInterfaces.h b/common/include/HalInterfaces.h
index b885675..2f20afc 100644
--- a/common/include/HalInterfaces.h
+++ b/common/include/HalInterfaces.h
@@ -95,6 +95,7 @@
using V1_3::Operation;
using V1_3::OperationType;
using V1_3::OperationTypeRange;
+using V1_3::OptionalTimeoutDuration;
using V1_3::OptionalTimePoint;
using V1_3::Priority;
using V1_3::Request;
diff --git a/driver/sample/SampleDriver.cpp b/driver/sample/SampleDriver.cpp
index 3d66903..8f68512 100644
--- a/driver/sample/SampleDriver.cpp
+++ b/driver/sample/SampleDriver.cpp
@@ -408,7 +408,9 @@
}
Return<void> SamplePreparedModel::executeFenced(const hal::Request&, const hidl_vec<hidl_handle>&,
- MeasureTiming, executeFenced_cb cb) {
+ MeasureTiming, const OptionalTimePoint&,
+ const OptionalTimeoutDuration&,
+ executeFenced_cb cb) {
// TODO(miaowang): implement me.
cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
return Void();
diff --git a/driver/sample/SampleDriver.h b/driver/sample/SampleDriver.h
index 4aabe9c..a3eff5e 100644
--- a/driver/sample/SampleDriver.h
+++ b/driver/sample/SampleDriver.h
@@ -139,7 +139,10 @@
configureExecutionBurst_cb cb) override;
hal::Return<void> executeFenced(const hal::Request& request,
const hal::hidl_vec<hal::hidl_handle>& wait_for,
- hal::MeasureTiming measure, executeFenced_cb callback) override;
+ hal::MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& duration,
+ executeFenced_cb callback) override;
private:
hal::Model mModel;
diff --git a/runtime/Event.h b/runtime/Event.h
index 90bc0b2..4981a1f 100644
--- a/runtime/Event.h
+++ b/runtime/Event.h
@@ -77,7 +77,9 @@
// If there is a callback available, use the callback to get the error code.
if (kFencedExecutionCallback != nullptr) {
const hal::Return<void> ret = kFencedExecutionCallback->getExecutionInfo(
- [&error](hal::ErrorStatus status, hal::Timing) { error = status; });
+ [&error](hal::ErrorStatus status, hal::Timing, hal::Timing) {
+ error = status;
+ });
if (!ret.isOk()) {
error = hal::ErrorStatus::GENERAL_FAILURE;
}
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index b51cbdb..caede71 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -264,13 +264,16 @@
// Timing might be reported through other compute method.
// Only query the fenced callback if it is available, and we are not
// updating mTiming to keep this method const.
- Timing timing = mTiming;
+ Timing timingLaunched = mTiming;
+ Timing timingFenced = kNoTiming;
if (mFencedExecutionCallback != nullptr) {
ErrorStatus status;
const Return<void> ret = mFencedExecutionCallback->getExecutionInfo(
- [&status, &timing](ErrorStatus error, Timing t) {
+ [&status, &timingLaunched, &timingFenced](ErrorStatus error, Timing tLaunched,
+ Timing tFenced) {
status = error;
- timing = t;
+ timingLaunched = tLaunched;
+ timingFenced = tFenced;
});
if (!ret.isOk()) {
*duration = UINT64_MAX;
@@ -284,10 +287,16 @@
uint64_t microDuration = UINT64_MAX;
switch (durationCode) {
case ANEURALNETWORKS_DURATION_ON_HARDWARE:
- microDuration = timing.timeOnDevice;
+ microDuration = timingLaunched.timeOnDevice;
break;
case ANEURALNETWORKS_DURATION_IN_DRIVER:
- microDuration = timing.timeInDriver;
+ microDuration = timingLaunched.timeInDriver;
+ break;
+ case ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE:
+ microDuration = timingFenced.timeOnDevice;
+ break;
+ case ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER:
+ microDuration = timingFenced.timeInDriver;
break;
default:
CHECK(!"unexpected");
@@ -552,17 +561,17 @@
// allowFallback is set to true.
static std::tuple<int, int, sp<hal::IFencedExecutionCallback>> startComputeFenced(
ExecutionBuilder* executionBuilder, const ExecutionPlan& plan,
- std::shared_ptr<ExecutionPlan::Controller> controller, const std::vector<int>& wait_for,
- bool allowFallback) {
+ std::shared_ptr<ExecutionPlan::Controller> controller, const std::vector<int>& waitFor,
+ uint64_t timeoutDurationAfterFence, bool allowFallback) {
CHECK(executionBuilder != nullptr);
VLOG(EXECUTION) << "ExecutionBuilder::computeFenced (from plan, iteratively)";
// Disallow fallback when the ExecutionPlan is simple on CPU.
allowFallback &= !plan.isSimpleCpu();
- // Initiate wait_for_fds, sync_fence for the first step.
- std::vector<int> wait_for_fds = wait_for;
- int sync_fence = -1;
- sp<hal::IFencedExecutionCallback> computeFenced_callback;
+ // Initiate waitForFds, syncFence for the first step.
+ std::vector<int> waitForFds = waitFor;
+ int syncFence = -1;
+ sp<hal::IFencedExecutionCallback> computeFencedCallback;
while (true) {
VLOG(EXECUTION) << "looking for next StepExecutor";
@@ -573,7 +582,7 @@
int n = plan.next(controller, &executor, &burstController);
if (n != ANEURALNETWORKS_NO_ERROR) {
if (allowFallback) break;
- // Return -1 for the sync_fence_fd, and nullptr for the callback.
+ // Return -1 for the sync fence fd, and nullptr for the callback.
return std::make_tuple(n, -1, nullptr);
}
@@ -582,23 +591,27 @@
if (executor == nullptr) {
// If the final step returns a -1 for sync fence, the execution is finished.
// Update the output shapes.
- if (sync_fence == -1) {
+ if (syncFence == -1) {
// TODO(miaowang): support dynamic output shape only with memory domain.
// For now just return the initial output shapes.
executionBuilder->finish(ErrorStatus::NONE,
executionBuilder->getInitialOutputShapes());
}
- return std::make_tuple(ANEURALNETWORKS_NO_ERROR, sync_fence, computeFenced_callback);
+ return std::make_tuple(ANEURALNETWORKS_NO_ERROR, syncFence, computeFencedCallback);
}
const bool executorIsCpu = executor->isCpu();
// Attempt to execute a single step of the execution.
- auto [stepN, sync_fd, d_callback] = executor->computeFenced(wait_for_fds);
+ auto [stepN, syncFd, callback] =
+ executor->computeFenced(waitForFds, timeoutDurationAfterFence);
- // Update wait_for_fds, sync_fence for the next step.
- sync_fence = sync_fd;
- computeFenced_callback = d_callback;
- wait_for_fds = {sync_fd};
+ // Update waitForFds, syncFence for the next step.
+ syncFence = syncFd;
+ computeFencedCallback = callback;
+ waitForFds.clear();
+ if (syncFd > 0) {
+ waitForFds = {syncFd};
+ }
// If execution was successful, continue to next step.
if (stepN == ANEURALNETWORKS_NO_ERROR) {
@@ -625,30 +638,49 @@
// occurred during the step executions. Instead, do a full execution
// fallback on the CPU.
VLOG(EXECUTION) << "Performing full fallback on the CPU.";
- for (int sync_fd : wait_for) {
- if (sync_fd > 0) {
- int r = sync_wait(sync_fd, -1);
+ for (int syncFd : waitFor) {
+ if (syncFd > 0) {
+ int r = sync_wait(syncFd, -1);
if (r < 0) {
- VLOG(EXECUTION) << "sync_wait failed, fd: " << sync_fd;
+ VLOG(EXECUTION) << "sync_wait failed, fd: " << syncFd;
return std::make_tuple(ANEURALNETWORKS_OP_FAILED, -1, nullptr);
}
}
}
auto [fullN, fullOutputShapes, fullTiming] = cpuFallbackFull(executionBuilder);
const ErrorStatus fullStatus = convertResultCodeToErrorStatus(fullN);
- sync_fence = -1;
+ syncFence = -1;
executionBuilder->finish(fullStatus, fullOutputShapes);
executionBuilder->reportTiming(fullTiming);
- return std::make_tuple(fullN, sync_fence, nullptr);
+ return std::make_tuple(fullN, syncFence, nullptr);
}
-int ExecutionBuilder::computeFenced(const std::vector<int>& wait_for, int* sync_fence) {
- CHECK(sync_fence != nullptr);
+int ExecutionBuilder::computeFenced(const std::vector<int>& waitFor,
+ uint64_t timeoutDurationAfterFence, int* syncFence) {
+ CHECK(syncFence != nullptr);
if (mStarted) {
LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
" called on an execution that has already started";
return ANEURALNETWORKS_BAD_STATE;
}
+ if (timeoutDurationAfterFence > 0) {
+ if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
+ LOG(ERROR)
+ << "ANeuralNetworksExecution_startComputeWithDependencies called with non-zero "
+ "duration on an ANeuralNetworksExecution "
+ "created from an ANeuralNetworksCompilation that was not created by "
+ "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const auto& device = mCompilation->mDevices.front();
+ const bool supportsExecutionDeadline = device->supportsDeadlines().second;
+ if (!supportsExecutionDeadline) {
+ LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies called with "
+ "non-zero duration on device that does not support "
+ "execution timeouts.";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
for (auto& p : mInputs) {
if (p.state == ModelArgumentInfo::UNSPECIFIED) {
LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
@@ -663,14 +695,23 @@
return ANEURALNETWORKS_BAD_DATA;
}
}
+ for (uint32_t i = 0; i < mOutputs.size(); i++) {
+ if (mOutputs[i].state != ModelArgumentInfo::HAS_NO_VALUE &&
+ !checkDimensionInfo(mModel->getOutputOperand(i), nullptr,
+ "ANeuralNetworksExecution_startComputeWithDependencies", false)) {
+ LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
+ " not all outputs have fully specified dimensions";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ }
mStarted = true;
const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this, nullptr);
VLOG(EXECUTION) << "ExecutionBuilder::computeFenced";
int result;
- std::tie(result, mSyncFenceFd, mFencedExecutionCallback) =
- startComputeFenced(this, *mPlan, controller, wait_for, allowFallback);
- *sync_fence = mSyncFenceFd;
+ std::tie(result, mSyncFenceFd, mFencedExecutionCallback) = startComputeFenced(
+ this, *mPlan, controller, waitFor, timeoutDurationAfterFence, allowFallback);
+ *syncFence = mSyncFenceFd;
return result;
}
@@ -954,7 +995,7 @@
}
std::tuple<int, int, sp<hal::IFencedExecutionCallback>> StepExecutor::computeFenced(
- const std::vector<int>& wait_for) {
+ const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence) {
CHECK(mPreparedModel != nullptr);
if (VLOG_IS_ON(EXECUTION)) {
@@ -963,12 +1004,20 @@
}
const MeasureTiming measure = measureTiming(mExecutionBuilder);
- const auto [n, sync_fence, computeFenced_callback, timing] =
- mPreparedModel->executeFenced(mInputs, mOutputs, mMemories, wait_for, measure);
- if (sync_fence < 0 && computeFenced_callback == nullptr) {
+ const auto [timePointN, deadline] = makeTimePoint(mExecutionBuilder->getTimeoutDuration());
+ if (timePointN != ANEURALNETWORKS_NO_ERROR) {
+ return {timePointN, -1, nullptr};
+ }
+ OptionalTimeoutDuration otd;
+ if (timeoutDurationAfterFence > 0) {
+ otd.nanoseconds(timeoutDurationAfterFence);
+ }
+ const auto [n, syncFence, computeFencedCallback, timing] = mPreparedModel->executeFenced(
+ mInputs, mOutputs, mMemories, waitFor, measure, deadline, otd);
+ if (syncFence < 0 && computeFencedCallback == nullptr) {
mExecutionBuilder->reportTiming(timing);
}
- return {n, sync_fence, computeFenced_callback};
+ return {n, syncFence, computeFencedCallback};
}
// For cpuFallback{Partial,Full}, recompile the model on CPU and then start compute.
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 9eee184..fdb4677 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -66,7 +66,8 @@
std::optional<uint64_t> getTimeoutDuration() const;
- int computeFenced(const std::vector<int>& wait_for, int* sync_fence);
+ int computeFenced(const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence,
+ int* sync_fence);
int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) {
CHECK(synchronizationCallback != nullptr);
@@ -231,7 +232,7 @@
// Perform fenced execution and return error_code, sync_fence_fd and a
// callback.
std::tuple<int, int, sp<hal::IFencedExecutionCallback>> computeFenced(
- const std::vector<int>& wait_for);
+ const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence);
private:
void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
diff --git a/runtime/Manager.cpp b/runtime/Manager.cpp
index a6c3b4e..6122c5b 100644
--- a/runtime/Manager.cpp
+++ b/runtime/Manager.cpp
@@ -132,7 +132,9 @@
std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
- const std::vector<int>& wait_for, MeasureTiming measure) const override;
+ const std::vector<int>& waitFor, MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const override;
std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
bool preferPowerOverLatency) const override {
@@ -412,15 +414,16 @@
}
std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing>
-DriverPreparedModel::executeFenced(const std::vector<ModelArgumentInfo>& inputs,
- const std::vector<ModelArgumentInfo>& outputs,
- const MemoryTracker& memories, const std::vector<int>& wait_for,
- hal::MeasureTiming measure) const {
+DriverPreparedModel::executeFenced(
+ const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
+ const MemoryTracker& memories, const std::vector<int>& waitFor, hal::MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const {
NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
-
+ CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd > 0; }));
// Make a copy of the memory tracker as we will append memory pools for pointer arguments.
MemoryTracker localMemories = memories;
- sp<hal::IFencedExecutionCallback> executeFenced_callback;
+ sp<hal::IFencedExecutionCallback> executeFencedCallback;
hal::Timing timing = kNoTiming;
// We separate the input & output pools so accelerators only need to copy
@@ -464,43 +467,38 @@
"DriverPreparedModel::executeFenced");
int n = ANEURALNETWORKS_OP_FAILED;
- hidl_vec<hidl_handle> wait_for_handles;
- wait_for_handles.resize(wait_for.size());
- for (uint32_t i = 0; i < wait_for.size(); i++) {
- // Return if FD is invalid.
- if (wait_for[i] <= 0) {
- LOG(ERROR) << "Invalid file descriptor";
- return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, timing};
- }
+ hidl_vec<hidl_handle> waitForHandles;
+ waitForHandles.resize(waitFor.size());
+ for (uint32_t i = 0; i < waitFor.size(); i++) {
native_handle_t* nativeHandle = native_handle_create(1, 0);
if (nativeHandle == nullptr) {
LOG(ERROR) << "Failed to create native_handle";
return {n, -1, nullptr, timing};
}
- int dup_fd = dup(wait_for[i]);
- if (dup_fd <= 0) {
+ int dupFd = dup(waitFor[i]);
+ if (dupFd <= 0) {
LOG(ERROR) << "Unable to dup the file descriptor";
return {n, -1, nullptr, timing};
}
- nativeHandle->data[0] = dup_fd;
+ nativeHandle->data[0] = dupFd;
hidl_handle hidlHandle;
hidlHandle.setTo(nativeHandle, /*shouldOwn=*/true);
- wait_for_handles[i] = std::move(hidlHandle);
+ waitForHandles[i] = std::move(hidlHandle);
}
- hidl_handle sync_fence;
- std::tie(n, sync_fence, executeFenced_callback, timing) =
- mPreparedModel->executeFenced(request, wait_for_handles, measure);
+ hidl_handle syncFence;
+ std::tie(n, syncFence, executeFencedCallback, timing) = mPreparedModel->executeFenced(
+ request, waitForHandles, measure, deadline, timeoutDurationAfterFence);
if (n != ANEURALNETWORKS_NO_ERROR) {
VLOG(EXECUTION) << "**executeFenced failed**";
return {n, -1, nullptr, timing};
}
- int sync_fence_fd = -1;
- if (sync_fence.getNativeHandle()) {
- sync_fence_fd = dup(sync_fence.getNativeHandle()->data[0]);
- if (sync_fence_fd < 0) {
+ int syncFenceFd = -1;
+ if (syncFence.getNativeHandle()) {
+ syncFenceFd = dup(syncFence.getNativeHandle()->data[0]);
+ if (syncFenceFd < 0) {
LOG(ERROR) << "Failed to dup the file descriptor";
return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
}
@@ -509,11 +507,11 @@
// Then copy the output data from shared memory to the output buffers.
if (outputPtrArgsMemory != nullptr) {
NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "DriverPreparedModel::executeFenced");
- if (sync_fence_fd > 0) {
- int r = sync_wait(sync_fence_fd, -1);
+ if (syncFenceFd > 0) {
+ int r = sync_wait(syncFenceFd, -1);
if (r < 0) {
- LOG(ERROR) << "sync wait failed, fd: " << sync_fence_fd;
- return {ANEURALNETWORKS_OP_FAILED, sync_fence_fd, nullptr, timing};
+ LOG(ERROR) << "sync wait failed, fd: " << syncFenceFd;
+ return {ANEURALNETWORKS_OP_FAILED, syncFenceFd, nullptr, timing};
}
}
uint32_t ptrOutputIndex = 0;
@@ -527,7 +525,7 @@
}
VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
- return {ANEURALNETWORKS_NO_ERROR, sync_fence_fd, executeFenced_callback, timing};
+ return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedCallback, timing};
}
// A special abstracted device for the CPU. Only one instance of this class will exist.
@@ -608,7 +606,9 @@
std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
- const std::vector<int>& wait_for, MeasureTiming measure) const override;
+ const std::vector<int>& wait_for, MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const override;
// Prefer to use CpuPreparedModel::create.
CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
@@ -678,15 +678,16 @@
std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing>
CpuPreparedModel::executeFenced(const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs,
- const MemoryTracker& memories, const std::vector<int>& wait_for,
- hal::MeasureTiming measure) const {
+ const MemoryTracker& memories, const std::vector<int>& waitFor,
+ hal::MeasureTiming measure, const hal::OptionalTimePoint&,
+ const hal::OptionalTimeoutDuration&) const {
VLOG(EXECUTION)
<< "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
- for (int sync_fd : wait_for) {
- if (sync_fd > 0) {
- int r = sync_wait(sync_fd, -1);
+ for (int syncFd : waitFor) {
+ if (syncFd > 0) {
+ int r = sync_wait(syncFd, -1);
if (r < 0) {
- LOG(ERROR) << "sync wait failed, fd: " << sync_fd;
+ LOG(ERROR) << "sync wait failed, fd: " << syncFd;
return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {UINT64_MAX, UINT64_MAX}};
}
}
diff --git a/runtime/Manager.h b/runtime/Manager.h
index 439d089..5b26dcf 100644
--- a/runtime/Manager.h
+++ b/runtime/Manager.h
@@ -66,7 +66,9 @@
virtual std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
- const std::vector<int>& wait_for, hal::MeasureTiming measure) const = 0;
+ const std::vector<int>& waitFor, hal::MeasureTiming measure,
+ const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const = 0;
virtual std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
bool preferPowerOverLatency) const = 0;
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 0b0c6cc..364319f 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -250,6 +250,10 @@
"ANEURALNETWORKS_DURATION_ON_HARDWARE has changed");
static_assert(ANEURALNETWORKS_DURATION_IN_DRIVER == 1,
"ANEURALNETWORKS_DURATION_IN_DRIVER has changed");
+static_assert(ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE == 2,
+ "ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE has changed");
+static_assert(ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER == 3,
+ "ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER has changed");
// Make sure that the constants are compatible with the values defined in
// hardware/interfaces/neuralnetworks/1.0/types.hal.
@@ -1460,38 +1464,38 @@
return m->setOperandExtensionData(index, data, length);
}
-int ANeuralNetworksEvent_createFromSyncFenceFd(int sync_fence_fd, ANeuralNetworksEvent** event) {
+int ANeuralNetworksEvent_createFromSyncFenceFd(int syncFenceFd, ANeuralNetworksEvent** event) {
if (event == nullptr) {
LOG(ERROR) << "ANeuralNetworksEvent_createFromSyncFenceFd passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- if (sync_fence_fd <= 0) {
+ if (syncFenceFd <= 0) {
LOG(ERROR) << "ANeuralNetworksEvent_createFromSyncFenceFd passed an invalid fd: "
- << sync_fence_fd;
+ << syncFenceFd;
*event = nullptr;
return ANEURALNETWORKS_BAD_DATA;
}
- std::unique_ptr<SyncFenceEvent> e = std::make_unique<SyncFenceEvent>(sync_fence_fd, nullptr);
+ std::unique_ptr<SyncFenceEvent> e = std::make_unique<SyncFenceEvent>(syncFenceFd, nullptr);
*event = reinterpret_cast<ANeuralNetworksEvent*>(e.release());
return ANEURALNETWORKS_NO_ERROR;
}
-int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* sync_fence_fd) {
- if (sync_fence_fd == nullptr) {
+int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* syncFenceFd) {
+ if (syncFenceFd == nullptr) {
LOG(ERROR) << "ANeuralNetworksEvent_getSyncFenceFd passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- *sync_fence_fd = -1;
+ *syncFenceFd = -1;
if (event == nullptr) {
LOG(ERROR) << "ANeuralNetworksEvent_getSyncFenceFd passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
const IEvent* e = reinterpret_cast<const IEvent*>(event);
// The client owns the dupped fd, and is responsible for closing it.
- *sync_fence_fd = e->getSyncFenceFd(/*shouldDup*/ true);
- if (*sync_fence_fd <= 0) {
+ *syncFenceFd = e->getSyncFenceFd(/*shouldDup*/ true);
+ if (*syncFenceFd <= 0) {
LOG(ERROR) << "ANeuralNetworksEvent_getSyncFenceFd unable to get valid sync_fence fd";
- *sync_fence_fd = -1;
+ *syncFenceFd = -1;
return ANEURALNETWORKS_OP_FAILED;
}
return ANEURALNETWORKS_NO_ERROR;
@@ -1499,38 +1503,38 @@
int ANeuralNetworksExecution_startComputeWithDependencies(
ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
- uint32_t num_events, ANeuralNetworksEvent** event) {
+ uint32_t numOfDependencies, uint64_t duration, ANeuralNetworksEvent** event) {
NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_startComputeWithDependencies");
if (!event) {
LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies passed a nullptr";
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
- if ((!dependencies && num_events != 0) || !execution) {
+ if ((!dependencies && numOfDependencies != 0) || !execution) {
LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies passed a nullptr";
*event = nullptr;
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
- std::vector<int> wait_for_list;
- for (uint32_t i = 0; i < num_events; i++) {
+ std::vector<int> waitForList;
+ for (uint32_t i = 0; i < numOfDependencies; i++) {
if (!dependencies[i]) {
LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies passed a nullptr";
*event = nullptr;
return ANEURALNETWORKS_UNEXPECTED_NULL;
}
const IEvent* e = reinterpret_cast<const IEvent*>(dependencies[i]);
- int sync_fence_fd = e->getSyncFenceFd(/*should_dup*/ false);
- if (sync_fence_fd < 0) {
+ int syncFenceFd = e->getSyncFenceFd(/*should_dup*/ false);
+ if (syncFenceFd < 0) {
e->wait();
} else {
- wait_for_list.push_back(sync_fence_fd);
+ waitForList.push_back(syncFenceFd);
}
}
- int sync_fence_to_signal = -1;
- int n = r->computeFenced(wait_for_list, &sync_fence_to_signal);
+ int syncFenceToSignal = -1;
+ int n = r->computeFenced(waitForList, duration, &syncFenceToSignal);
std::unique_ptr<SyncFenceEvent> e =
- std::make_unique<SyncFenceEvent>(sync_fence_to_signal, r->getFencedExecutionCallback());
+ std::make_unique<SyncFenceEvent>(syncFenceToSignal, r->getFencedExecutionCallback());
if (n != ANEURALNETWORKS_NO_ERROR) {
*event = nullptr;
} else {
diff --git a/runtime/VersionedInterfaces.cpp b/runtime/VersionedInterfaces.cpp
index 8fad344..b9da649 100644
--- a/runtime/VersionedInterfaces.cpp
+++ b/runtime/VersionedInterfaces.cpp
@@ -403,62 +403,63 @@
}
std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing>
-VersionedIPreparedModel::executeFenced(const hal::Request& request,
- const hal::hidl_vec<hal::hidl_handle>& wait_for,
- MeasureTiming measure) {
+VersionedIPreparedModel::executeFenced(
+ const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
+ MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) {
// version 1.3+ HAL
// TODO(miaowang): figure out the right coding style for the sync_fence related API.
- hal::hidl_handle sync_fence;
- sp<hal::IFencedExecutionCallback> dispatch_callback;
+ hal::hidl_handle syncFence;
+ sp<hal::IFencedExecutionCallback> dispatchCallback;
hal::Timing timing = {UINT64_MAX, UINT64_MAX};
if (mPreparedModelV1_3 != nullptr) {
- ErrorStatus error_status;
+ ErrorStatus errorStatus;
Return<void> ret = mPreparedModelV1_3->executeFenced(
- request, wait_for, measure,
- [&sync_fence, &error_status, &dispatch_callback](
- ErrorStatus error, const hidl_handle& sync_handle,
+ request, waitFor, measure, deadline, timeoutDurationAfterFence,
+ [&syncFence, &errorStatus, &dispatchCallback](
+ ErrorStatus error, const hidl_handle& handle,
const sp<hal::IFencedExecutionCallback>& callback) {
- sync_fence = sync_handle;
- error_status = error;
- dispatch_callback = callback;
+ syncFence = handle;
+ errorStatus = error;
+ dispatchCallback = callback;
});
if (!ret.isOk()) {
LOG(ERROR) << "executeFenced failure: " << ret.description();
return std::make_tuple(ANEURALNETWORKS_OP_FAILED, hal::hidl_handle(nullptr), nullptr,
timing);
}
- if (error_status != ErrorStatus::NONE) {
+ if (errorStatus != ErrorStatus::NONE) {
LOG(ERROR) << "executeFenced returned "
- << toString(static_cast<ErrorStatus>(error_status));
- return std::make_tuple(convertErrorStatusToResultCode(error_status),
+ << toString(static_cast<ErrorStatus>(errorStatus));
+ return std::make_tuple(convertErrorStatusToResultCode(errorStatus),
hal::hidl_handle(nullptr), nullptr, timing);
}
- return std::make_tuple(ANEURALNETWORKS_NO_ERROR, sync_fence, dispatch_callback, timing);
+ return std::make_tuple(ANEURALNETWORKS_NO_ERROR, syncFence, dispatchCallback, timing);
}
// fallback to synchronous execution if sync_fence is not supported
// first wait for all sync fences to be ready.
LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
- for (const auto& fence_handle : wait_for) {
- if (!fence_handle.getNativeHandle()) {
+ for (const auto& fenceHandle : waitFor) {
+ if (!fenceHandle.getNativeHandle()) {
return std::make_tuple(ANEURALNETWORKS_BAD_DATA, hal::hidl_handle(nullptr), nullptr,
timing);
}
- int sync_fd = fence_handle.getNativeHandle()->data[0];
- if (sync_fd <= 0) {
+ int syncFd = fenceHandle.getNativeHandle()->data[0];
+ if (syncFd <= 0) {
return std::make_tuple(ANEURALNETWORKS_BAD_DATA, hal::hidl_handle(nullptr), nullptr,
timing);
}
- int r = sync_wait(sync_fd, -1);
+ int r = sync_wait(syncFd, -1);
if (r < 0) {
- LOG(ERROR) << "sync_wait failed, fd: " << sync_fd;
+ LOG(ERROR) << "sync_wait failed, fd: " << syncFd;
return std::make_tuple(ANEURALNETWORKS_OP_FAILED, hal::hidl_handle(nullptr), nullptr,
timing);
}
}
- int error_code;
- std::tie(error_code, std::ignore, timing) = executeSynchronously(request, measure, {});
- return std::make_tuple(error_code, hal::hidl_handle(nullptr), nullptr, timing);
+ int errorCode;
+ std::tie(errorCode, std::ignore, timing) = executeSynchronously(request, measure, deadline);
+ return std::make_tuple(errorCode, hal::hidl_handle(nullptr), nullptr, timing);
}
static std::pair<ErrorStatus, Capabilities> getCapabilitiesFunction(V1_2::IDevice* device) {
diff --git a/runtime/VersionedInterfaces.h b/runtime/VersionedInterfaces.h
index e1cae7d..251205a 100644
--- a/runtime/VersionedInterfaces.h
+++ b/runtime/VersionedInterfaces.h
@@ -697,25 +697,39 @@
* Launch a fenced asynchronous execution on a prepared model.
*
* The execution is performed asynchronously with respect to the caller.
- * executeFenced must fully validate the request, and only accept one that is
- * guaranteed to be completed, unless a hardware failure or kernel panic happens on the device.
- * If there is an error during validation, executeFenced must immediately return with
- * the corresponding ErrorStatus. If the request is valid and there is no error launching,
+ * executeFenced must fully validate the request. If there is an error during validation,
+ * executeFenced must immediately return with the corresponding ErrorStatus. If the inputs
+ * to the function are valid and there is no error and there is no error launching,
* executeFenced must dispatch an asynchronous task to perform the execution in the
- * background, and immediately return with ErrorStatus::NONE, a sync_fence that will be
+ * background, and immediately return with ErrorStatus::NONE, a sync fence that will be
* signaled once the execution is completed, and a callback that can be used by the client
- * to query the duration and runtime error status. If the asynchronous task fails to launch,
- * executeFenced must immediately return with ErrorStatus::GENERAL_FAILURE.
- * The execution must wait for all the sync fences (if any) in wait_for to be signaled
- * before starting the actual execution.
+ * to query the duration and runtime error status. If the task has finished
+ * before the call returns, empty handle may be returned for the syncFence. If the
+ * asynchronous task fails to launch, executeFenced must immediately return with
+ * ErrorStatus::GENERAL_FAILURE, an empty handle for the syncFence, and nullptr
+ * for callback. The execution must wait for all the sync fences (if any) in waitFor to be
+ * signaled before starting the actual execution.
*
- * If any of sync fences in wait_for changes to error status after the executeFenced
- * call succeeds, the driver must immediately set the returned sync_fence to error status.
+ * If any of sync fences in waitFor changes to error status after the executeFenced
+ * call succeeds, the driver must immediately set the returned syncFence to error status.
*
* When the asynchronous task has finished its execution, it must
- * immediately signal the sync_fence created when dispatching. And after
- * the sync_fence is signaled, the task must not modify the content of
- * any data object referenced by 'request'.
+ * immediately signal the syncFence returned from executeFenced call. After
+ * the syncFence is signaled, the task must not modify the content of
+ * any data object referenced by 'request' (described by the
+ * {@link @1.0::DataLocation} of a {@link @1.0::RequestArgument}).
+ *
+ * executeFenced can be called with an optional deadline and an optional duration.
+ * If the execution is not able to completed before the provided deadline or within
+ * the timeout duration, whichever comes earlier, the
+ * execution must be aborted, and either {@link
+ * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+ * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+ * to an abort must be sent the same way as other errors, described above.
+ * If the service reports that it does not support execution deadlines via
+ * IDevice::supportsDeadlines, and executeFenced is called with a
+ * deadline, then the argument is invalid, and
+ * {@link ErrorStatus::INVALID_ARGUMENT} must be returned.
*
* Any number of calls to the executeFenced, execute* and executeSynchronously*
* functions, in any combination, may be made concurrently, even on the same
@@ -723,10 +737,15 @@
*
* @param request The input and output information on which the prepared
* model is to be executed.
- * @param wait_for A vector of sync fence file descriptors. The execution must
- * wait for all sync fence to be signaled before starting the
- * task.
+ * @param waitFor A vector of sync fence file descriptors. The execution must
+ * wait for all sync fence to be signaled before starting the
+ * task.
* @param measure Specifies whether or not to measure duration of the execution.
+ * @param deadline The time by which execution must complete. If the
+ * execution cannot be finished by the deadline, the
+ * execution must be aborted.
+ * @param timeoutDurationAfterFence The maximum timeout duration within which execution must
+ * complete after all sync fences in waitFor are signaled.
* @return A tuple consisting of:
* - Error code of the dispatch call.
* - A sync_fence that will be triggered when the task is completed.
@@ -739,8 +758,9 @@
* returned or optional timing information is returned
*/
std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
- const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& wait_for,
- hal::MeasureTiming measure);
+ const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
+ hal::MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+ const hal::OptionalTimeoutDuration& timeoutDurationAfterFence);
private:
friend class VersionedIDevice;
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index c6b6c2b..540e598 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -5949,7 +5949,16 @@
* EXCEPTION: If the input is optional and omitted
* (by passing nullptr for buffer to
* {@link ANeuralNetworksExecution_setInput}) then it need
- * not have a fully specified tensor operand type.</li></ul>
+ * not have a fully specified tensor operand type.</li>
+ * <li>The operand is a model output (see
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs})
+ * and is to be used with
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * A fully specified tensor operand type must either be provided
+ * to {@link ANeuralNetworksModel_addOperand}; or it must be
+ * provided to the corresponding
+ * {@link ANeuralNetworksExecution_setOutput}, or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.</li></ul>
*
* A tensor operand type of specified rank but some number of
* unspecified dimensions is represented by setting dimensionCount to
@@ -6781,6 +6790,20 @@
// such as that of the runtime itself and the IPC needed for the runtime to
// communicate with the driver.
ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+ // Execution time on hardware, after all dependencies have been signaled.
+ // If no dependencies specified (for example, if the execution was scheduled other
+ // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+ // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+ // Available since API level 30.
+ ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+ // Execution time in driver, after all dependencies have been signaled. Excludes
+ // overhead such as that of the runtime itself and the IPC needed for the runtime
+ // to communicate with the driver.
+ // If no dependencies specified (for example, if the execution was scheduled other
+ // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+ // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+ // Available since API level 30.
+ ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
} DurationCode;
/**
@@ -7633,9 +7656,10 @@
* If the device is not able to complete the execution within the specified
* duration, the execution must be aborted. The timeout duration begins at a
* call to one of:
- * - {@link ANeuralNetworksExecution_startCompute}
- * - {@link ANeuralNetworksExecution_compute}
* - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
*
* By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
* the timeout duration for execution is considered infinite.
@@ -7756,6 +7780,21 @@
* normally, the execution will fail, and {@link ANeuralNetworksEvent_wait} on the returned
* event will return an error.
*
+ * The function will return an error if any of the execution outputs has a tensor operand type
+ * that is not fully specified.
+ *
+ * The function can be passed a timeout duration in nanoseconds.
+ * The duration begins when all waitFor sync fences have been signaled, and can be used
+ * together with {@link ANeuralNetworksExecution_setTimeout} which specifies the
+ * maximum timeout duration beginning at the call to
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have been created
+ * from an {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1, and
+ * the device must support execution timeout as indicated by
+ * {@link ANeuralNetworksDevice_supportsExecutionTimeout}, otherwise this
+ * function will fail with ANEURALNETWORKS_BAD_DATA.
+ *
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
* See {@link ANeuralNetworksExecution_compute} for synchronous execution.
@@ -7766,6 +7805,9 @@
* @param dependencies A set of depending events. The actual evaluation will not start
* until all the events are signaled.
* @param num_dependencies The number of events in the dependencies set.
+ * @param duration The maximum length of time in nanoseconds within which execution must
+ * complete after all dependencies are signaled. If set to 0, the timeout
+ * duration is considered infinite.
* @param event The event that will be signaled on completion. event is set to
* NULL if there's an error.
*
@@ -7775,7 +7817,8 @@
*/
int ANeuralNetworksExecution_startComputeWithDependencies(
ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
- uint32_t num_dependencies, ANeuralNetworksEvent** event) __INTRODUCED_IN(30);
+ uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event)
+ __INTRODUCED_IN(30);
#endif // __ANDROID_API__ >= __ANDROID_API_R__
diff --git a/runtime/test/TestCompilationCaching.cpp b/runtime/test/TestCompilationCaching.cpp
index bea40ce..cc5a273 100644
--- a/runtime/test/TestCompilationCaching.cpp
+++ b/runtime/test/TestCompilationCaching.cpp
@@ -130,6 +130,7 @@
return Void();
}
Return<void> executeFenced(const hal::Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
+ const OptionalTimePoint&, const OptionalTimeoutDuration&,
executeFenced_cb cb) {
cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
return Void();
diff --git a/runtime/test/TestExecution.cpp b/runtime/test/TestExecution.cpp
index 61d4aeb..cd3b669 100644
--- a/runtime/test/TestExecution.cpp
+++ b/runtime/test/TestExecution.cpp
@@ -162,6 +162,7 @@
}
}
Return<void> executeFenced(const V1_3::Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
+ const OptionalTimePoint&, const OptionalTimeoutDuration&,
executeFenced_cb cb) override {
cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
return Void();
diff --git a/runtime/test/TestGenerated.cpp b/runtime/test/TestGenerated.cpp
index 5842f0f..85c0620 100644
--- a/runtime/test/TestGenerated.cpp
+++ b/runtime/test/TestGenerated.cpp
@@ -86,6 +86,9 @@
DynamicOutputShapeTest() { mTestDynamicOutputShape = true; }
};
+// Tag for the fenced execute tests
+class FencedComputeTest : public GeneratedTests {};
+
// Tag for the generated validation tests
class GeneratedValidationTests : public GeneratedTests {
protected:
@@ -488,12 +491,6 @@
execute(testModel);
Execution::setComputeMode(oldComputeMode);
}
-
-TEST_P(GeneratedTests, Fenced) {
- const auto oldComputeMode = Execution::setComputeMode(Execution::ComputeMode::FENCED);
- execute(testModel);
- Execution::setComputeMode(oldComputeMode);
-}
#else
TEST_P(GeneratedTests, Test) {
execute(testModel);
@@ -517,6 +514,12 @@
execute(testModel);
}
+TEST_P(FencedComputeTest, Test) {
+ const auto oldComputeMode = Execution::setComputeMode(Execution::ComputeMode::FENCED);
+ execute(testModel);
+ Execution::setComputeMode(oldComputeMode);
+}
+
INSTANTIATE_GENERATED_TEST(GeneratedTests,
[](const TestModel& testModel) { return !testModel.expectFailure; });
@@ -538,4 +541,11 @@
});
});
+INSTANTIATE_GENERATED_TEST(FencedComputeTest, [](const TestModel& testModel) {
+ return !testModel.expectFailure &&
+ std::all_of(testModel.outputIndexes.begin(), testModel.outputIndexes.end(),
+ [&testModel](uint32_t index) {
+ return testModel.operands[index].data.size() > 0;
+ });
+});
} // namespace android::nn::generated_tests
diff --git a/runtime/test/TestNeuralNetworksWrapper.h b/runtime/test/TestNeuralNetworksWrapper.h
index 3111d14..4a677ce 100644
--- a/runtime/test/TestNeuralNetworksWrapper.h
+++ b/runtime/test/TestNeuralNetworksWrapper.h
@@ -380,7 +380,7 @@
ANeuralNetworksEvent* event = nullptr;
Result result =
static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies(
- mExecution, nullptr, 0, &event));
+ mExecution, nullptr, 0, 0, &event));
if (result != Result::NO_ERROR) {
return result;
}
diff --git a/runtime/test/TestPartitioning.cpp b/runtime/test/TestPartitioning.cpp
index c9b34f3..cfdbd49 100644
--- a/runtime/test/TestPartitioning.cpp
+++ b/runtime/test/TestPartitioning.cpp
@@ -324,6 +324,7 @@
return Void();
}
Return<void> executeFenced(const Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
+ const OptionalTimePoint&, const OptionalTimeoutDuration&,
executeFenced_cb cb) {
cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
return Void();
diff --git a/runtime/test/TestTrivialModel.cpp b/runtime/test/TestTrivialModel.cpp
index 557b2aa..6bc2b26 100644
--- a/runtime/test/TestTrivialModel.cpp
+++ b/runtime/test/TestTrivialModel.cpp
@@ -168,7 +168,7 @@
ANeuralNetworksEvent* event1;
ANeuralNetworksExecution* execution1_handle = execution1.getHandle();
ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution1_handle, nullptr, 0,
- &event1),
+ 0, &event1),
ANEURALNETWORKS_NO_ERROR);
// Start the second execution which will wait for the first one.
@@ -179,7 +179,7 @@
ANeuralNetworksEvent* event2;
ANeuralNetworksExecution* execution2_handle = execution2.getHandle();
ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2_handle, &event1, 1,
- &event2),
+ 0, &event2),
ANEURALNETWORKS_NO_ERROR);
// Wait for the second event.
ASSERT_EQ(ANeuralNetworksEvent_wait(event2), ANEURALNETWORKS_NO_ERROR);
diff --git a/runtime/test/TestValidation.cpp b/runtime/test/TestValidation.cpp
index 8254794..b4204be 100644
--- a/runtime/test/TestValidation.cpp
+++ b/runtime/test/TestValidation.cpp
@@ -1126,7 +1126,7 @@
{
ANeuralNetworksEvent* event;
ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution, nullptr,
- 0, &event),
+ 0, 0, &event),
ANEURALNETWORKS_BAD_STATE);
}
};
@@ -1161,7 +1161,7 @@
case ExecutionType::FENCED: {
ANeuralNetworksEvent* event;
ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution, nullptr,
- 0, &event),
+ 0, 0, &event),
ANEURALNETWORKS_NO_ERROR);
testTooLate();
ASSERT_EQ(ANeuralNetworksEvent_wait(event), ANEURALNETWORKS_NO_ERROR);
@@ -1611,9 +1611,9 @@
EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution1, 0, nullptr, output0, sizeof(output0)),
ANEURALNETWORKS_NO_ERROR);
ANeuralNetworksEvent* event1 = nullptr;
- EXPECT_EQ(
- ANeuralNetworksExecution_startComputeWithDependencies(execution1, nullptr, 0, &event1),
- ANEURALNETWORKS_NO_ERROR);
+ EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution1, nullptr, 0, 0,
+ &event1),
+ ANEURALNETWORKS_NO_ERROR);
EXPECT_EQ(ANeuralNetworksEvent_getSyncFenceFd(event1, nullptr),
ANEURALNETWORKS_UNEXPECTED_NULL);
@@ -1622,16 +1622,17 @@
ANeuralNetworksExecution* execution2;
ANeuralNetworksEvent* event2 = nullptr;
EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution2), ANEURALNETWORKS_NO_ERROR);
- EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(nullptr, &event1, 1, &event2),
+ EXPECT_EQ(
+ ANeuralNetworksExecution_startComputeWithDependencies(nullptr, &event1, 1, 0, &event2),
+ ANEURALNETWORKS_UNEXPECTED_NULL);
+ EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, nullptr, 1, 0,
+ &event2),
ANEURALNETWORKS_UNEXPECTED_NULL);
- EXPECT_EQ(
- ANeuralNetworksExecution_startComputeWithDependencies(execution2, nullptr, 1, &event2),
- ANEURALNETWORKS_UNEXPECTED_NULL);
- EXPECT_EQ(
- ANeuralNetworksExecution_startComputeWithDependencies(execution2, &event1, 1, nullptr),
- ANEURALNETWORKS_UNEXPECTED_NULL);
+ EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, &event1, 1, 0,
+ nullptr),
+ ANEURALNETWORKS_UNEXPECTED_NULL);
ANeuralNetworksEvent* wait_for_list[] = {event1, nullptr};
- EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, wait_for_list, 2,
+ EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, wait_for_list, 2, 0,
&event2),
ANEURALNETWORKS_UNEXPECTED_NULL);
diff --git a/tools/api/NeuralNetworks.t b/tools/api/NeuralNetworks.t
index b6bfdde..8285372 100644
--- a/tools/api/NeuralNetworks.t
+++ b/tools/api/NeuralNetworks.t
@@ -588,7 +588,16 @@
* EXCEPTION: If the input is optional and omitted
* (by passing nullptr for buffer to
* {@link ANeuralNetworksExecution_setInput}) then it need
- * not have a fully specified tensor operand type.</li></ul>
+ * not have a fully specified tensor operand type.</li>
+ * <li>The operand is a model output (see
+ * {@link ANeuralNetworksModel_identifyInputsAndOutputs})
+ * and is to be used with
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * A fully specified tensor operand type must either be provided
+ * to {@link ANeuralNetworksModel_addOperand}; or it must be
+ * provided to the corresponding
+ * {@link ANeuralNetworksExecution_setOutput}, or
+ * {@link ANeuralNetworksExecution_setOutputFromMemory}.</li></ul>
*
* A tensor operand type of specified rank but some number of
* unspecified dimensions is represented by setting dimensionCount to
@@ -1420,6 +1429,20 @@
// such as that of the runtime itself and the IPC needed for the runtime to
// communicate with the driver.
ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+ // Execution time on hardware, after all dependencies have been signaled.
+ // If no dependencies specified (for example, if the execution was scheduled other
+ // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+ // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+ // Available since API level 30.
+ ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+ // Execution time in driver, after all dependencies have been signaled. Excludes
+ // overhead such as that of the runtime itself and the IPC needed for the runtime
+ // to communicate with the driver.
+ // If no dependencies specified (for example, if the execution was scheduled other
+ // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+ // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+ // Available since API level 30.
+ ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
} DurationCode;
/**
@@ -2272,9 +2295,10 @@
* If the device is not able to complete the execution within the specified
* duration, the execution must be aborted. The timeout duration begins at a
* call to one of:
- * - {@link ANeuralNetworksExecution_startCompute}
- * - {@link ANeuralNetworksExecution_compute}
* - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
*
* By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
* the timeout duration for execution is considered infinite.
@@ -2395,6 +2419,21 @@
* normally, the execution will fail, and {@link ANeuralNetworksEvent_wait} on the returned
* event will return an error.
*
+ * The function will return an error if any of the execution outputs has a tensor operand type
+ * that is not fully specified.
+ *
+ * The function can be passed a timeout duration in nanoseconds.
+ * The duration begins when all waitFor sync fences have been signaled, and can be used
+ * together with {@link ANeuralNetworksExecution_setTimeout} which specifies the
+ * maximum timeout duration beginning at the call to
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have been created
+ * from an {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1, and
+ * the device must support execution timeout as indicated by
+ * {@link ANeuralNetworksDevice_supportsExecutionTimeout}, otherwise this
+ * function will fail with ANEURALNETWORKS_BAD_DATA.
+ *
* See {@link ANeuralNetworksExecution} for information on multithreaded usage.
*
* See {@link ANeuralNetworksExecution_compute} for synchronous execution.
@@ -2405,6 +2444,9 @@
* @param dependencies A set of depending events. The actual evaluation will not start
* until all the events are signaled.
* @param num_dependencies The number of events in the dependencies set.
+ * @param duration The maximum length of time in nanoseconds within which execution must
+ * complete after all dependencies are signaled. If set to 0, the timeout
+ * duration is considered infinite.
* @param event The event that will be signaled on completion. event is set to
* NULL if there's an error.
*
@@ -2414,7 +2456,8 @@
*/
int ANeuralNetworksExecution_startComputeWithDependencies(
ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
- uint32_t num_dependencies, ANeuralNetworksEvent** event) __INTRODUCED_IN(30);
+ uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event)
+ __INTRODUCED_IN(30);
#endif // __ANDROID_API__ >= __ANDROID_API_R__