Update sync fence related APIs

 - Allow ANeuralNetworksExecution_startComputeWithDependencies
 to measure execution duration after all dependencies are done.
 - Allow ANeuralNetworksExecution_startComputeWithDependencies
 to specify post-gate-release timeout duration.

Bug: 142778241
Bug: 136739795
Test: mm
Test: NNAPI CTS & VTS tests
Change-Id: Ie32e30b7d1cb98882f3084c1741e975d3e39d970
Merged-In: Ie32e30b7d1cb98882f3084c1741e975d3e39d970
(cherry picked from commit 6a0c2ed41976ee4b57a97e670f11d1c6c25a8df6)
diff --git a/common/include/HalInterfaces.h b/common/include/HalInterfaces.h
index b885675..2f20afc 100644
--- a/common/include/HalInterfaces.h
+++ b/common/include/HalInterfaces.h
@@ -95,6 +95,7 @@
 using V1_3::Operation;
 using V1_3::OperationType;
 using V1_3::OperationTypeRange;
+using V1_3::OptionalTimeoutDuration;
 using V1_3::OptionalTimePoint;
 using V1_3::Priority;
 using V1_3::Request;
diff --git a/driver/sample/SampleDriver.cpp b/driver/sample/SampleDriver.cpp
index 3d66903..8f68512 100644
--- a/driver/sample/SampleDriver.cpp
+++ b/driver/sample/SampleDriver.cpp
@@ -408,7 +408,9 @@
 }
 
 Return<void> SamplePreparedModel::executeFenced(const hal::Request&, const hidl_vec<hidl_handle>&,
-                                                MeasureTiming, executeFenced_cb cb) {
+                                                MeasureTiming, const OptionalTimePoint&,
+                                                const OptionalTimeoutDuration&,
+                                                executeFenced_cb cb) {
     // TODO(miaowang): implement me.
     cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
     return Void();
diff --git a/driver/sample/SampleDriver.h b/driver/sample/SampleDriver.h
index 4aabe9c..a3eff5e 100644
--- a/driver/sample/SampleDriver.h
+++ b/driver/sample/SampleDriver.h
@@ -139,7 +139,10 @@
             configureExecutionBurst_cb cb) override;
     hal::Return<void> executeFenced(const hal::Request& request,
                                     const hal::hidl_vec<hal::hidl_handle>& wait_for,
-                                    hal::MeasureTiming measure, executeFenced_cb callback) override;
+                                    hal::MeasureTiming measure,
+                                    const hal::OptionalTimePoint& deadline,
+                                    const hal::OptionalTimeoutDuration& duration,
+                                    executeFenced_cb callback) override;
 
    private:
     hal::Model mModel;
diff --git a/runtime/Event.h b/runtime/Event.h
index 90bc0b2..4981a1f 100644
--- a/runtime/Event.h
+++ b/runtime/Event.h
@@ -77,7 +77,9 @@
             // If there is a callback available, use the callback to get the error code.
             if (kFencedExecutionCallback != nullptr) {
                 const hal::Return<void> ret = kFencedExecutionCallback->getExecutionInfo(
-                        [&error](hal::ErrorStatus status, hal::Timing) { error = status; });
+                        [&error](hal::ErrorStatus status, hal::Timing, hal::Timing) {
+                            error = status;
+                        });
                 if (!ret.isOk()) {
                     error = hal::ErrorStatus::GENERAL_FAILURE;
                 }
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index b51cbdb..caede71 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -264,13 +264,16 @@
     // Timing might be reported through other compute method.
     // Only query the fenced callback if it is available, and we are not
     // updating mTiming to keep this method const.
-    Timing timing = mTiming;
+    Timing timingLaunched = mTiming;
+    Timing timingFenced = kNoTiming;
     if (mFencedExecutionCallback != nullptr) {
         ErrorStatus status;
         const Return<void> ret = mFencedExecutionCallback->getExecutionInfo(
-                [&status, &timing](ErrorStatus error, Timing t) {
+                [&status, &timingLaunched, &timingFenced](ErrorStatus error, Timing tLaunched,
+                                                          Timing tFenced) {
                     status = error;
-                    timing = t;
+                    timingLaunched = tLaunched;
+                    timingFenced = tFenced;
                 });
         if (!ret.isOk()) {
             *duration = UINT64_MAX;
@@ -284,10 +287,16 @@
     uint64_t microDuration = UINT64_MAX;
     switch (durationCode) {
         case ANEURALNETWORKS_DURATION_ON_HARDWARE:
-            microDuration = timing.timeOnDevice;
+            microDuration = timingLaunched.timeOnDevice;
             break;
         case ANEURALNETWORKS_DURATION_IN_DRIVER:
-            microDuration = timing.timeInDriver;
+            microDuration = timingLaunched.timeInDriver;
+            break;
+        case ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE:
+            microDuration = timingFenced.timeOnDevice;
+            break;
+        case ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER:
+            microDuration = timingFenced.timeInDriver;
             break;
         default:
             CHECK(!"unexpected");
@@ -552,17 +561,17 @@
 // allowFallback is set to true.
 static std::tuple<int, int, sp<hal::IFencedExecutionCallback>> startComputeFenced(
         ExecutionBuilder* executionBuilder, const ExecutionPlan& plan,
-        std::shared_ptr<ExecutionPlan::Controller> controller, const std::vector<int>& wait_for,
-        bool allowFallback) {
+        std::shared_ptr<ExecutionPlan::Controller> controller, const std::vector<int>& waitFor,
+        uint64_t timeoutDurationAfterFence, bool allowFallback) {
     CHECK(executionBuilder != nullptr);
     VLOG(EXECUTION) << "ExecutionBuilder::computeFenced (from plan, iteratively)";
     // Disallow fallback when the ExecutionPlan is simple on CPU.
     allowFallback &= !plan.isSimpleCpu();
 
-    // Initiate wait_for_fds, sync_fence for the first step.
-    std::vector<int> wait_for_fds = wait_for;
-    int sync_fence = -1;
-    sp<hal::IFencedExecutionCallback> computeFenced_callback;
+    // Initiate waitForFds, syncFence for the first step.
+    std::vector<int> waitForFds = waitFor;
+    int syncFence = -1;
+    sp<hal::IFencedExecutionCallback> computeFencedCallback;
 
     while (true) {
         VLOG(EXECUTION) << "looking for next StepExecutor";
@@ -573,7 +582,7 @@
         int n = plan.next(controller, &executor, &burstController);
         if (n != ANEURALNETWORKS_NO_ERROR) {
             if (allowFallback) break;
-            // Return -1 for the sync_fence_fd, and nullptr for the callback.
+            // Return -1 for the sync fence fd, and nullptr for the callback.
             return std::make_tuple(n, -1, nullptr);
         }
 
@@ -582,23 +591,27 @@
         if (executor == nullptr) {
             // If the final step returns a -1 for sync fence, the execution is finished.
             // Update the output shapes.
-            if (sync_fence == -1) {
+            if (syncFence == -1) {
                 // TODO(miaowang): support dynamic output shape only with memory domain.
                 // For now just return the initial output shapes.
                 executionBuilder->finish(ErrorStatus::NONE,
                                          executionBuilder->getInitialOutputShapes());
             }
-            return std::make_tuple(ANEURALNETWORKS_NO_ERROR, sync_fence, computeFenced_callback);
+            return std::make_tuple(ANEURALNETWORKS_NO_ERROR, syncFence, computeFencedCallback);
         }
         const bool executorIsCpu = executor->isCpu();
 
         // Attempt to execute a single step of the execution.
-        auto [stepN, sync_fd, d_callback] = executor->computeFenced(wait_for_fds);
+        auto [stepN, syncFd, callback] =
+                executor->computeFenced(waitForFds, timeoutDurationAfterFence);
 
-        // Update wait_for_fds, sync_fence for the next step.
-        sync_fence = sync_fd;
-        computeFenced_callback = d_callback;
-        wait_for_fds = {sync_fd};
+        // Update waitForFds, syncFence for the next step.
+        syncFence = syncFd;
+        computeFencedCallback = callback;
+        waitForFds.clear();
+        if (syncFd > 0) {
+            waitForFds = {syncFd};
+        }
 
         // If execution was successful, continue to next step.
         if (stepN == ANEURALNETWORKS_NO_ERROR) {
@@ -625,30 +638,49 @@
     // occurred during the step executions. Instead, do a full execution
     // fallback on the CPU.
     VLOG(EXECUTION) << "Performing full fallback on the CPU.";
-    for (int sync_fd : wait_for) {
-        if (sync_fd > 0) {
-            int r = sync_wait(sync_fd, -1);
+    for (int syncFd : waitFor) {
+        if (syncFd > 0) {
+            int r = sync_wait(syncFd, -1);
             if (r < 0) {
-                VLOG(EXECUTION) << "sync_wait failed, fd: " << sync_fd;
+                VLOG(EXECUTION) << "sync_wait failed, fd: " << syncFd;
                 return std::make_tuple(ANEURALNETWORKS_OP_FAILED, -1, nullptr);
             }
         }
     }
     auto [fullN, fullOutputShapes, fullTiming] = cpuFallbackFull(executionBuilder);
     const ErrorStatus fullStatus = convertResultCodeToErrorStatus(fullN);
-    sync_fence = -1;
+    syncFence = -1;
     executionBuilder->finish(fullStatus, fullOutputShapes);
     executionBuilder->reportTiming(fullTiming);
-    return std::make_tuple(fullN, sync_fence, nullptr);
+    return std::make_tuple(fullN, syncFence, nullptr);
 }
 
-int ExecutionBuilder::computeFenced(const std::vector<int>& wait_for, int* sync_fence) {
-    CHECK(sync_fence != nullptr);
+int ExecutionBuilder::computeFenced(const std::vector<int>& waitFor,
+                                    uint64_t timeoutDurationAfterFence, int* syncFence) {
+    CHECK(syncFence != nullptr);
     if (mStarted) {
         LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
                       " called on an execution that has already started";
         return ANEURALNETWORKS_BAD_STATE;
     }
+    if (timeoutDurationAfterFence > 0) {
+        if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
+            LOG(ERROR)
+                    << "ANeuralNetworksExecution_startComputeWithDependencies called with non-zero "
+                       "duration on an ANeuralNetworksExecution "
+                       "created from an ANeuralNetworksCompilation that was not created by "
+                       "ANeuralNetworksCompilation_createForDevices with numDevices = 1";
+            return ANEURALNETWORKS_BAD_DATA;
+        }
+        const auto& device = mCompilation->mDevices.front();
+        const bool supportsExecutionDeadline = device->supportsDeadlines().second;
+        if (!supportsExecutionDeadline) {
+            LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies called with "
+                          "non-zero duration on device that does not support "
+                          "execution timeouts.";
+            return ANEURALNETWORKS_BAD_DATA;
+        }
+    }
     for (auto& p : mInputs) {
         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
             LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
@@ -663,14 +695,23 @@
             return ANEURALNETWORKS_BAD_DATA;
         }
     }
+    for (uint32_t i = 0; i < mOutputs.size(); i++) {
+        if (mOutputs[i].state != ModelArgumentInfo::HAS_NO_VALUE &&
+            !checkDimensionInfo(mModel->getOutputOperand(i), nullptr,
+                                "ANeuralNetworksExecution_startComputeWithDependencies", false)) {
+            LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
+                          " not all outputs have fully specified dimensions";
+            return ANEURALNETWORKS_BAD_DATA;
+        }
+    }
     mStarted = true;
     const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
     std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this, nullptr);
     VLOG(EXECUTION) << "ExecutionBuilder::computeFenced";
     int result;
-    std::tie(result, mSyncFenceFd, mFencedExecutionCallback) =
-            startComputeFenced(this, *mPlan, controller, wait_for, allowFallback);
-    *sync_fence = mSyncFenceFd;
+    std::tie(result, mSyncFenceFd, mFencedExecutionCallback) = startComputeFenced(
+            this, *mPlan, controller, waitFor, timeoutDurationAfterFence, allowFallback);
+    *syncFence = mSyncFenceFd;
     return result;
 }
 
@@ -954,7 +995,7 @@
 }
 
 std::tuple<int, int, sp<hal::IFencedExecutionCallback>> StepExecutor::computeFenced(
-        const std::vector<int>& wait_for) {
+        const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence) {
     CHECK(mPreparedModel != nullptr);
 
     if (VLOG_IS_ON(EXECUTION)) {
@@ -963,12 +1004,20 @@
     }
 
     const MeasureTiming measure = measureTiming(mExecutionBuilder);
-    const auto [n, sync_fence, computeFenced_callback, timing] =
-            mPreparedModel->executeFenced(mInputs, mOutputs, mMemories, wait_for, measure);
-    if (sync_fence < 0 && computeFenced_callback == nullptr) {
+    const auto [timePointN, deadline] = makeTimePoint(mExecutionBuilder->getTimeoutDuration());
+    if (timePointN != ANEURALNETWORKS_NO_ERROR) {
+        return {timePointN, -1, nullptr};
+    }
+    OptionalTimeoutDuration otd;
+    if (timeoutDurationAfterFence > 0) {
+        otd.nanoseconds(timeoutDurationAfterFence);
+    }
+    const auto [n, syncFence, computeFencedCallback, timing] = mPreparedModel->executeFenced(
+            mInputs, mOutputs, mMemories, waitFor, measure, deadline, otd);
+    if (syncFence < 0 && computeFencedCallback == nullptr) {
         mExecutionBuilder->reportTiming(timing);
     }
-    return {n, sync_fence, computeFenced_callback};
+    return {n, syncFence, computeFencedCallback};
 }
 
 // For cpuFallback{Partial,Full}, recompile the model on CPU and then start compute.
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 9eee184..fdb4677 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -66,7 +66,8 @@
 
     std::optional<uint64_t> getTimeoutDuration() const;
 
-    int computeFenced(const std::vector<int>& wait_for, int* sync_fence);
+    int computeFenced(const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence,
+                      int* sync_fence);
 
     int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) {
         CHECK(synchronizationCallback != nullptr);
@@ -231,7 +232,7 @@
     // Perform fenced execution and return error_code, sync_fence_fd and a
     // callback.
     std::tuple<int, int, sp<hal::IFencedExecutionCallback>> computeFenced(
-            const std::vector<int>& wait_for);
+            const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence);
 
    private:
     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
diff --git a/runtime/Manager.cpp b/runtime/Manager.cpp
index a6c3b4e..6122c5b 100644
--- a/runtime/Manager.cpp
+++ b/runtime/Manager.cpp
@@ -132,7 +132,9 @@
     std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
-            const std::vector<int>& wait_for, MeasureTiming measure) const override;
+            const std::vector<int>& waitFor, MeasureTiming measure,
+            const hal::OptionalTimePoint& deadline,
+            const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const override;
 
     std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
             bool preferPowerOverLatency) const override {
@@ -412,15 +414,16 @@
 }
 
 std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing>
-DriverPreparedModel::executeFenced(const std::vector<ModelArgumentInfo>& inputs,
-                                   const std::vector<ModelArgumentInfo>& outputs,
-                                   const MemoryTracker& memories, const std::vector<int>& wait_for,
-                                   hal::MeasureTiming measure) const {
+DriverPreparedModel::executeFenced(
+        const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
+        const MemoryTracker& memories, const std::vector<int>& waitFor, hal::MeasureTiming measure,
+        const hal::OptionalTimePoint& deadline,
+        const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const {
     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
-
+    CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd > 0; }));
     // Make a copy of the memory tracker as we will append memory pools for pointer arguments.
     MemoryTracker localMemories = memories;
-    sp<hal::IFencedExecutionCallback> executeFenced_callback;
+    sp<hal::IFencedExecutionCallback> executeFencedCallback;
     hal::Timing timing = kNoTiming;
 
     // We separate the input & output pools so accelerators only need to copy
@@ -464,43 +467,38 @@
                         "DriverPreparedModel::executeFenced");
 
     int n = ANEURALNETWORKS_OP_FAILED;
-    hidl_vec<hidl_handle> wait_for_handles;
-    wait_for_handles.resize(wait_for.size());
-    for (uint32_t i = 0; i < wait_for.size(); i++) {
-        // Return if FD is invalid.
-        if (wait_for[i] <= 0) {
-            LOG(ERROR) << "Invalid file descriptor";
-            return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, timing};
-        }
+    hidl_vec<hidl_handle> waitForHandles;
+    waitForHandles.resize(waitFor.size());
+    for (uint32_t i = 0; i < waitFor.size(); i++) {
         native_handle_t* nativeHandle = native_handle_create(1, 0);
         if (nativeHandle == nullptr) {
             LOG(ERROR) << "Failed to create native_handle";
             return {n, -1, nullptr, timing};
         }
-        int dup_fd = dup(wait_for[i]);
-        if (dup_fd <= 0) {
+        int dupFd = dup(waitFor[i]);
+        if (dupFd <= 0) {
             LOG(ERROR) << "Unable to dup the file descriptor";
             return {n, -1, nullptr, timing};
         }
-        nativeHandle->data[0] = dup_fd;
+        nativeHandle->data[0] = dupFd;
         hidl_handle hidlHandle;
         hidlHandle.setTo(nativeHandle, /*shouldOwn=*/true);
-        wait_for_handles[i] = std::move(hidlHandle);
+        waitForHandles[i] = std::move(hidlHandle);
     }
 
-    hidl_handle sync_fence;
-    std::tie(n, sync_fence, executeFenced_callback, timing) =
-            mPreparedModel->executeFenced(request, wait_for_handles, measure);
+    hidl_handle syncFence;
+    std::tie(n, syncFence, executeFencedCallback, timing) = mPreparedModel->executeFenced(
+            request, waitForHandles, measure, deadline, timeoutDurationAfterFence);
 
     if (n != ANEURALNETWORKS_NO_ERROR) {
         VLOG(EXECUTION) << "**executeFenced failed**";
         return {n, -1, nullptr, timing};
     }
 
-    int sync_fence_fd = -1;
-    if (sync_fence.getNativeHandle()) {
-        sync_fence_fd = dup(sync_fence.getNativeHandle()->data[0]);
-        if (sync_fence_fd < 0) {
+    int syncFenceFd = -1;
+    if (syncFence.getNativeHandle()) {
+        syncFenceFd = dup(syncFence.getNativeHandle()->data[0]);
+        if (syncFenceFd < 0) {
             LOG(ERROR) << "Failed to dup the file descriptor";
             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
         }
@@ -509,11 +507,11 @@
     // Then copy the output data from shared memory to the output buffers.
     if (outputPtrArgsMemory != nullptr) {
         NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "DriverPreparedModel::executeFenced");
-        if (sync_fence_fd > 0) {
-            int r = sync_wait(sync_fence_fd, -1);
+        if (syncFenceFd > 0) {
+            int r = sync_wait(syncFenceFd, -1);
             if (r < 0) {
-                LOG(ERROR) << "sync wait failed, fd: " << sync_fence_fd;
-                return {ANEURALNETWORKS_OP_FAILED, sync_fence_fd, nullptr, timing};
+                LOG(ERROR) << "sync wait failed, fd: " << syncFenceFd;
+                return {ANEURALNETWORKS_OP_FAILED, syncFenceFd, nullptr, timing};
             }
         }
         uint32_t ptrOutputIndex = 0;
@@ -527,7 +525,7 @@
     }
 
     VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
-    return {ANEURALNETWORKS_NO_ERROR, sync_fence_fd, executeFenced_callback, timing};
+    return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedCallback, timing};
 }
 
 // A special abstracted device for the CPU. Only one instance of this class will exist.
@@ -608,7 +606,9 @@
     std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
-            const std::vector<int>& wait_for, MeasureTiming measure) const override;
+            const std::vector<int>& wait_for, MeasureTiming measure,
+            const hal::OptionalTimePoint& deadline,
+            const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const override;
 
     // Prefer to use CpuPreparedModel::create.
     CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
@@ -678,15 +678,16 @@
 std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing>
 CpuPreparedModel::executeFenced(const std::vector<ModelArgumentInfo>& inputs,
                                 const std::vector<ModelArgumentInfo>& outputs,
-                                const MemoryTracker& memories, const std::vector<int>& wait_for,
-                                hal::MeasureTiming measure) const {
+                                const MemoryTracker& memories, const std::vector<int>& waitFor,
+                                hal::MeasureTiming measure, const hal::OptionalTimePoint&,
+                                const hal::OptionalTimeoutDuration&) const {
     VLOG(EXECUTION)
             << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
-    for (int sync_fd : wait_for) {
-        if (sync_fd > 0) {
-            int r = sync_wait(sync_fd, -1);
+    for (int syncFd : waitFor) {
+        if (syncFd > 0) {
+            int r = sync_wait(syncFd, -1);
             if (r < 0) {
-                LOG(ERROR) << "sync wait failed, fd: " << sync_fd;
+                LOG(ERROR) << "sync wait failed, fd: " << syncFd;
                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {UINT64_MAX, UINT64_MAX}};
             }
         }
diff --git a/runtime/Manager.h b/runtime/Manager.h
index 439d089..5b26dcf 100644
--- a/runtime/Manager.h
+++ b/runtime/Manager.h
@@ -66,7 +66,9 @@
     virtual std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
-            const std::vector<int>& wait_for, hal::MeasureTiming measure) const = 0;
+            const std::vector<int>& waitFor, hal::MeasureTiming measure,
+            const hal::OptionalTimePoint& deadline,
+            const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const = 0;
 
     virtual std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
             bool preferPowerOverLatency) const = 0;
diff --git a/runtime/NeuralNetworks.cpp b/runtime/NeuralNetworks.cpp
index 0b0c6cc..364319f 100644
--- a/runtime/NeuralNetworks.cpp
+++ b/runtime/NeuralNetworks.cpp
@@ -250,6 +250,10 @@
               "ANEURALNETWORKS_DURATION_ON_HARDWARE has changed");
 static_assert(ANEURALNETWORKS_DURATION_IN_DRIVER == 1,
               "ANEURALNETWORKS_DURATION_IN_DRIVER has changed");
+static_assert(ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE == 2,
+              "ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE has changed");
+static_assert(ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER == 3,
+              "ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER has changed");
 
 // Make sure that the constants are compatible with the values defined in
 // hardware/interfaces/neuralnetworks/1.0/types.hal.
@@ -1460,38 +1464,38 @@
     return m->setOperandExtensionData(index, data, length);
 }
 
-int ANeuralNetworksEvent_createFromSyncFenceFd(int sync_fence_fd, ANeuralNetworksEvent** event) {
+int ANeuralNetworksEvent_createFromSyncFenceFd(int syncFenceFd, ANeuralNetworksEvent** event) {
     if (event == nullptr) {
         LOG(ERROR) << "ANeuralNetworksEvent_createFromSyncFenceFd passed a nullptr";
         return ANEURALNETWORKS_UNEXPECTED_NULL;
     }
-    if (sync_fence_fd <= 0) {
+    if (syncFenceFd <= 0) {
         LOG(ERROR) << "ANeuralNetworksEvent_createFromSyncFenceFd passed an invalid fd: "
-                   << sync_fence_fd;
+                   << syncFenceFd;
         *event = nullptr;
         return ANEURALNETWORKS_BAD_DATA;
     }
-    std::unique_ptr<SyncFenceEvent> e = std::make_unique<SyncFenceEvent>(sync_fence_fd, nullptr);
+    std::unique_ptr<SyncFenceEvent> e = std::make_unique<SyncFenceEvent>(syncFenceFd, nullptr);
     *event = reinterpret_cast<ANeuralNetworksEvent*>(e.release());
     return ANEURALNETWORKS_NO_ERROR;
 }
 
-int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* sync_fence_fd) {
-    if (sync_fence_fd == nullptr) {
+int ANeuralNetworksEvent_getSyncFenceFd(const ANeuralNetworksEvent* event, int* syncFenceFd) {
+    if (syncFenceFd == nullptr) {
         LOG(ERROR) << "ANeuralNetworksEvent_getSyncFenceFd passed a nullptr";
         return ANEURALNETWORKS_UNEXPECTED_NULL;
     }
-    *sync_fence_fd = -1;
+    *syncFenceFd = -1;
     if (event == nullptr) {
         LOG(ERROR) << "ANeuralNetworksEvent_getSyncFenceFd passed a nullptr";
         return ANEURALNETWORKS_UNEXPECTED_NULL;
     }
     const IEvent* e = reinterpret_cast<const IEvent*>(event);
     // The client owns the dupped fd, and is responsible for closing it.
-    *sync_fence_fd = e->getSyncFenceFd(/*shouldDup*/ true);
-    if (*sync_fence_fd <= 0) {
+    *syncFenceFd = e->getSyncFenceFd(/*shouldDup*/ true);
+    if (*syncFenceFd <= 0) {
         LOG(ERROR) << "ANeuralNetworksEvent_getSyncFenceFd unable to get valid sync_fence fd";
-        *sync_fence_fd = -1;
+        *syncFenceFd = -1;
         return ANEURALNETWORKS_OP_FAILED;
     }
     return ANEURALNETWORKS_NO_ERROR;
@@ -1499,38 +1503,38 @@
 
 int ANeuralNetworksExecution_startComputeWithDependencies(
         ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
-        uint32_t num_events, ANeuralNetworksEvent** event) {
+        uint32_t numOfDependencies, uint64_t duration, ANeuralNetworksEvent** event) {
     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "ANeuralNetworksExecution_startComputeWithDependencies");
     if (!event) {
         LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies passed a nullptr";
         return ANEURALNETWORKS_UNEXPECTED_NULL;
     }
-    if ((!dependencies && num_events != 0) || !execution) {
+    if ((!dependencies && numOfDependencies != 0) || !execution) {
         LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies passed a nullptr";
         *event = nullptr;
         return ANEURALNETWORKS_UNEXPECTED_NULL;
     }
     ExecutionBuilder* r = reinterpret_cast<ExecutionBuilder*>(execution);
 
-    std::vector<int> wait_for_list;
-    for (uint32_t i = 0; i < num_events; i++) {
+    std::vector<int> waitForList;
+    for (uint32_t i = 0; i < numOfDependencies; i++) {
         if (!dependencies[i]) {
             LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies passed a nullptr";
             *event = nullptr;
             return ANEURALNETWORKS_UNEXPECTED_NULL;
         }
         const IEvent* e = reinterpret_cast<const IEvent*>(dependencies[i]);
-        int sync_fence_fd = e->getSyncFenceFd(/*should_dup*/ false);
-        if (sync_fence_fd < 0) {
+        int syncFenceFd = e->getSyncFenceFd(/*should_dup*/ false);
+        if (syncFenceFd < 0) {
             e->wait();
         } else {
-            wait_for_list.push_back(sync_fence_fd);
+            waitForList.push_back(syncFenceFd);
         }
     }
-    int sync_fence_to_signal = -1;
-    int n = r->computeFenced(wait_for_list, &sync_fence_to_signal);
+    int syncFenceToSignal = -1;
+    int n = r->computeFenced(waitForList, duration, &syncFenceToSignal);
     std::unique_ptr<SyncFenceEvent> e =
-            std::make_unique<SyncFenceEvent>(sync_fence_to_signal, r->getFencedExecutionCallback());
+            std::make_unique<SyncFenceEvent>(syncFenceToSignal, r->getFencedExecutionCallback());
     if (n != ANEURALNETWORKS_NO_ERROR) {
         *event = nullptr;
     } else {
diff --git a/runtime/VersionedInterfaces.cpp b/runtime/VersionedInterfaces.cpp
index 8fad344..b9da649 100644
--- a/runtime/VersionedInterfaces.cpp
+++ b/runtime/VersionedInterfaces.cpp
@@ -403,62 +403,63 @@
 }
 
 std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing>
-VersionedIPreparedModel::executeFenced(const hal::Request& request,
-                                       const hal::hidl_vec<hal::hidl_handle>& wait_for,
-                                       MeasureTiming measure) {
+VersionedIPreparedModel::executeFenced(
+        const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
+        MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+        const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) {
     // version 1.3+ HAL
     // TODO(miaowang): figure out the right coding style for the sync_fence related API.
-    hal::hidl_handle sync_fence;
-    sp<hal::IFencedExecutionCallback> dispatch_callback;
+    hal::hidl_handle syncFence;
+    sp<hal::IFencedExecutionCallback> dispatchCallback;
     hal::Timing timing = {UINT64_MAX, UINT64_MAX};
     if (mPreparedModelV1_3 != nullptr) {
-        ErrorStatus error_status;
+        ErrorStatus errorStatus;
         Return<void> ret = mPreparedModelV1_3->executeFenced(
-                request, wait_for, measure,
-                [&sync_fence, &error_status, &dispatch_callback](
-                        ErrorStatus error, const hidl_handle& sync_handle,
+                request, waitFor, measure, deadline, timeoutDurationAfterFence,
+                [&syncFence, &errorStatus, &dispatchCallback](
+                        ErrorStatus error, const hidl_handle& handle,
                         const sp<hal::IFencedExecutionCallback>& callback) {
-                    sync_fence = sync_handle;
-                    error_status = error;
-                    dispatch_callback = callback;
+                    syncFence = handle;
+                    errorStatus = error;
+                    dispatchCallback = callback;
                 });
         if (!ret.isOk()) {
             LOG(ERROR) << "executeFenced failure: " << ret.description();
             return std::make_tuple(ANEURALNETWORKS_OP_FAILED, hal::hidl_handle(nullptr), nullptr,
                                    timing);
         }
-        if (error_status != ErrorStatus::NONE) {
+        if (errorStatus != ErrorStatus::NONE) {
             LOG(ERROR) << "executeFenced returned "
-                       << toString(static_cast<ErrorStatus>(error_status));
-            return std::make_tuple(convertErrorStatusToResultCode(error_status),
+                       << toString(static_cast<ErrorStatus>(errorStatus));
+            return std::make_tuple(convertErrorStatusToResultCode(errorStatus),
                                    hal::hidl_handle(nullptr), nullptr, timing);
         }
-        return std::make_tuple(ANEURALNETWORKS_NO_ERROR, sync_fence, dispatch_callback, timing);
+        return std::make_tuple(ANEURALNETWORKS_NO_ERROR, syncFence, dispatchCallback, timing);
     }
 
     // fallback to synchronous execution if sync_fence is not supported
     // first wait for all sync fences to be ready.
     LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
-    for (const auto& fence_handle : wait_for) {
-        if (!fence_handle.getNativeHandle()) {
+    for (const auto& fenceHandle : waitFor) {
+        if (!fenceHandle.getNativeHandle()) {
             return std::make_tuple(ANEURALNETWORKS_BAD_DATA, hal::hidl_handle(nullptr), nullptr,
                                    timing);
         }
-        int sync_fd = fence_handle.getNativeHandle()->data[0];
-        if (sync_fd <= 0) {
+        int syncFd = fenceHandle.getNativeHandle()->data[0];
+        if (syncFd <= 0) {
             return std::make_tuple(ANEURALNETWORKS_BAD_DATA, hal::hidl_handle(nullptr), nullptr,
                                    timing);
         }
-        int r = sync_wait(sync_fd, -1);
+        int r = sync_wait(syncFd, -1);
         if (r < 0) {
-            LOG(ERROR) << "sync_wait failed, fd: " << sync_fd;
+            LOG(ERROR) << "sync_wait failed, fd: " << syncFd;
             return std::make_tuple(ANEURALNETWORKS_OP_FAILED, hal::hidl_handle(nullptr), nullptr,
                                    timing);
         }
     }
-    int error_code;
-    std::tie(error_code, std::ignore, timing) = executeSynchronously(request, measure, {});
-    return std::make_tuple(error_code, hal::hidl_handle(nullptr), nullptr, timing);
+    int errorCode;
+    std::tie(errorCode, std::ignore, timing) = executeSynchronously(request, measure, deadline);
+    return std::make_tuple(errorCode, hal::hidl_handle(nullptr), nullptr, timing);
 }
 
 static std::pair<ErrorStatus, Capabilities> getCapabilitiesFunction(V1_2::IDevice* device) {
diff --git a/runtime/VersionedInterfaces.h b/runtime/VersionedInterfaces.h
index e1cae7d..251205a 100644
--- a/runtime/VersionedInterfaces.h
+++ b/runtime/VersionedInterfaces.h
@@ -697,25 +697,39 @@
      * Launch a fenced asynchronous execution on a prepared model.
      *
      * The execution is performed asynchronously with respect to the caller.
-     * executeFenced must fully validate the request, and only accept one that is
-     * guaranteed to be completed, unless a hardware failure or kernel panic happens on the device.
-     * If there is an error during validation, executeFenced must immediately return with
-     * the corresponding ErrorStatus. If the request is valid and there is no error launching,
+     * executeFenced must fully validate the request. If there is an error during validation,
+     * executeFenced must immediately return with the corresponding ErrorStatus. If the inputs
+     * to the function are valid and there is no error and there is no error launching,
      * executeFenced must dispatch an asynchronous task to perform the execution in the
-     * background, and immediately return with ErrorStatus::NONE, a sync_fence that will be
+     * background, and immediately return with ErrorStatus::NONE, a sync fence that will be
      * signaled once the execution is completed, and a callback that can be used by the client
-     * to query the duration and runtime error status. If the asynchronous task fails to launch,
-     * executeFenced must immediately return with ErrorStatus::GENERAL_FAILURE.
-     * The execution must wait for all the sync fences (if any) in wait_for to be signaled
-     * before starting the actual execution.
+     * to query the duration and runtime error status. If the task has finished
+     * before the call returns, empty handle may be returned for the syncFence. If the
+     * asynchronous task fails to launch, executeFenced must immediately return with
+     * ErrorStatus::GENERAL_FAILURE, an empty handle for the syncFence, and nullptr
+     * for callback. The execution must wait for all the sync fences (if any) in waitFor to be
+     * signaled before starting the actual execution.
      *
-     * If any of sync fences in wait_for changes to error status after the executeFenced
-     * call succeeds, the driver must immediately set the returned sync_fence to error status.
+     * If any of sync fences in waitFor changes to error status after the executeFenced
+     * call succeeds, the driver must immediately set the returned syncFence to error status.
      *
      * When the asynchronous task has finished its execution, it must
-     * immediately signal the sync_fence created when dispatching. And after
-     * the sync_fence is signaled, the task must not modify the content of
-     * any data object referenced by 'request'.
+     * immediately signal the syncFence returned from executeFenced call. After
+     * the syncFence is signaled, the task must not modify the content of
+     * any data object referenced by 'request' (described by the
+     * {@link @1.0::DataLocation} of a {@link @1.0::RequestArgument}).
+     *
+     * executeFenced can be called with an optional deadline and an optional duration.
+     * If the execution is not able to completed before the provided deadline or within
+     * the timeout duration, whichever comes earlier, the
+     * execution must be aborted, and either {@link
+     * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+     * to an abort must be sent the same way as other errors, described above.
+     * If the service reports that it does not support execution deadlines via
+     * IDevice::supportsDeadlines, and executeFenced is called with a
+     * deadline, then the argument is invalid, and
+     * {@link ErrorStatus::INVALID_ARGUMENT} must be returned.
      *
      * Any number of calls to the executeFenced, execute* and executeSynchronously*
      * functions, in any combination, may be made concurrently, even on the same
@@ -723,10 +737,15 @@
      *
      * @param request The input and output information on which the prepared
      *                model is to be executed.
-     * @param wait_for A vector of sync fence file descriptors. The execution must
-     *                 wait for all sync fence to be signaled before starting the
-     *                 task.
+     * @param waitFor A vector of sync fence file descriptors. The execution must
+     *                wait for all sync fence to be signaled before starting the
+     *                task.
      * @param measure Specifies whether or not to measure duration of the execution.
+     * @param deadline The time by which execution must complete. If the
+     *                 execution cannot be finished by the deadline, the
+     *                 execution must be aborted.
+     * @param timeoutDurationAfterFence The maximum timeout duration within which execution must
+     *                                  complete after all sync fences in waitFor are signaled.
      * @return A tuple consisting of:
      *         - Error code of the dispatch call.
      *         - A sync_fence that will be triggered when the task is completed.
@@ -739,8 +758,9 @@
      *           returned or optional timing information is returned
      */
     std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
-            const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& wait_for,
-            hal::MeasureTiming measure);
+            const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
+            hal::MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+            const hal::OptionalTimeoutDuration& timeoutDurationAfterFence);
 
    private:
     friend class VersionedIDevice;
diff --git a/runtime/include/NeuralNetworks.h b/runtime/include/NeuralNetworks.h
index c6b6c2b..540e598 100644
--- a/runtime/include/NeuralNetworks.h
+++ b/runtime/include/NeuralNetworks.h
@@ -5949,7 +5949,16 @@
  *         EXCEPTION: If the input is optional and omitted
  *         (by passing nullptr for buffer to
  *         {@link ANeuralNetworksExecution_setInput}) then it need
- *         not have a fully specified tensor operand type.</li></ul>
+ *         not have a fully specified tensor operand type.</li>
+ *     <li>The operand is a model output (see
+ *         {@link ANeuralNetworksModel_identifyInputsAndOutputs})
+ *         and is to be used with
+ *         {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ *         A fully specified tensor operand type must either be provided
+ *         to {@link ANeuralNetworksModel_addOperand}; or it must be
+ *         provided to the corresponding
+ *         {@link ANeuralNetworksExecution_setOutput}, or
+ *         {@link ANeuralNetworksExecution_setOutputFromMemory}.</li></ul>
  *
  * A tensor operand type of specified rank but some number of
  * unspecified dimensions is represented by setting dimensionCount to
@@ -6781,6 +6790,20 @@
     // such as that of the runtime itself and the IPC needed for the runtime to
     // communicate with the driver.
     ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+    // Execution time on hardware, after all dependencies have been signaled.
+    // If no dependencies specified (for example, if the execution was scheduled other
+    // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+    // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+    // Available since API level 30.
+    ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+    // Execution time in driver, after all dependencies have been signaled. Excludes
+    // overhead such as that of the runtime itself and the IPC needed for the runtime
+    // to communicate with the driver.
+    // If no dependencies specified (for example, if the execution was scheduled other
+    // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+    // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+    // Available since API level 30.
+    ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
 } DurationCode;
 
 /**
@@ -7633,9 +7656,10 @@
  * If the device is not able to complete the execution within the specified
  * duration, the execution must be aborted. The timeout duration begins at a
  * call to one of:
- * - {@link ANeuralNetworksExecution_startCompute}
- * - {@link ANeuralNetworksExecution_compute}
  * - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
  *
  * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
  * the timeout duration for execution is considered infinite.
@@ -7756,6 +7780,21 @@
  * normally, the execution will fail, and {@link ANeuralNetworksEvent_wait} on the returned
  * event will return an error.
  *
+ * The function will return an error if any of the execution outputs has a tensor operand type
+ * that is not fully specified.
+ *
+ * The function can be passed a timeout duration in nanoseconds.
+ * The duration begins when all waitFor sync fences have been signaled, and can be used
+ * together with {@link ANeuralNetworksExecution_setTimeout} which specifies the
+ * maximum timeout duration beginning at the call to
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have been created
+ * from an {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1, and
+ * the device must support execution timeout as indicated by
+ * {@link ANeuralNetworksDevice_supportsExecutionTimeout}, otherwise this
+ * function will fail with ANEURALNETWORKS_BAD_DATA.
+ *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
  * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
@@ -7766,6 +7805,9 @@
  * @param dependencies A set of depending events. The actual evaluation will not start
  *                     until all the events are signaled.
  * @param num_dependencies The number of events in the dependencies set.
+ * @param duration The maximum length of time in nanoseconds within which execution must
+ *                 complete after all dependencies are signaled. If set to 0, the timeout
+ *                 duration is considered infinite.
  * @param event The event that will be signaled on completion. event is set to
  *              NULL if there's an error.
  *
@@ -7775,7 +7817,8 @@
  */
 int ANeuralNetworksExecution_startComputeWithDependencies(
         ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
-        uint32_t num_dependencies, ANeuralNetworksEvent** event) __INTRODUCED_IN(30);
+        uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event)
+        __INTRODUCED_IN(30);
 
 #endif  // __ANDROID_API__ >= __ANDROID_API_R__
 
diff --git a/runtime/test/TestCompilationCaching.cpp b/runtime/test/TestCompilationCaching.cpp
index bea40ce..cc5a273 100644
--- a/runtime/test/TestCompilationCaching.cpp
+++ b/runtime/test/TestCompilationCaching.cpp
@@ -130,6 +130,7 @@
             return Void();
         }
         Return<void> executeFenced(const hal::Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
+                                   const OptionalTimePoint&, const OptionalTimeoutDuration&,
                                    executeFenced_cb cb) {
             cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
             return Void();
diff --git a/runtime/test/TestExecution.cpp b/runtime/test/TestExecution.cpp
index 61d4aeb..cd3b669 100644
--- a/runtime/test/TestExecution.cpp
+++ b/runtime/test/TestExecution.cpp
@@ -162,6 +162,7 @@
         }
     }
     Return<void> executeFenced(const V1_3::Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
+                               const OptionalTimePoint&, const OptionalTimeoutDuration&,
                                executeFenced_cb cb) override {
         cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
         return Void();
diff --git a/runtime/test/TestGenerated.cpp b/runtime/test/TestGenerated.cpp
index 5842f0f..85c0620 100644
--- a/runtime/test/TestGenerated.cpp
+++ b/runtime/test/TestGenerated.cpp
@@ -86,6 +86,9 @@
     DynamicOutputShapeTest() { mTestDynamicOutputShape = true; }
 };
 
+// Tag for the fenced execute tests
+class FencedComputeTest : public GeneratedTests {};
+
 // Tag for the generated validation tests
 class GeneratedValidationTests : public GeneratedTests {
    protected:
@@ -488,12 +491,6 @@
     execute(testModel);
     Execution::setComputeMode(oldComputeMode);
 }
-
-TEST_P(GeneratedTests, Fenced) {
-    const auto oldComputeMode = Execution::setComputeMode(Execution::ComputeMode::FENCED);
-    execute(testModel);
-    Execution::setComputeMode(oldComputeMode);
-}
 #else
 TEST_P(GeneratedTests, Test) {
     execute(testModel);
@@ -517,6 +514,12 @@
     execute(testModel);
 }
 
+TEST_P(FencedComputeTest, Test) {
+    const auto oldComputeMode = Execution::setComputeMode(Execution::ComputeMode::FENCED);
+    execute(testModel);
+    Execution::setComputeMode(oldComputeMode);
+}
+
 INSTANTIATE_GENERATED_TEST(GeneratedTests,
                            [](const TestModel& testModel) { return !testModel.expectFailure; });
 
@@ -538,4 +541,11 @@
                        });
 });
 
+INSTANTIATE_GENERATED_TEST(FencedComputeTest, [](const TestModel& testModel) {
+    return !testModel.expectFailure &&
+           std::all_of(testModel.outputIndexes.begin(), testModel.outputIndexes.end(),
+                       [&testModel](uint32_t index) {
+                           return testModel.operands[index].data.size() > 0;
+                       });
+});
 }  // namespace android::nn::generated_tests
diff --git a/runtime/test/TestNeuralNetworksWrapper.h b/runtime/test/TestNeuralNetworksWrapper.h
index 3111d14..4a677ce 100644
--- a/runtime/test/TestNeuralNetworksWrapper.h
+++ b/runtime/test/TestNeuralNetworksWrapper.h
@@ -380,7 +380,7 @@
                 ANeuralNetworksEvent* event = nullptr;
                 Result result =
                         static_cast<Result>(ANeuralNetworksExecution_startComputeWithDependencies(
-                                mExecution, nullptr, 0, &event));
+                                mExecution, nullptr, 0, 0, &event));
                 if (result != Result::NO_ERROR) {
                     return result;
                 }
diff --git a/runtime/test/TestPartitioning.cpp b/runtime/test/TestPartitioning.cpp
index c9b34f3..cfdbd49 100644
--- a/runtime/test/TestPartitioning.cpp
+++ b/runtime/test/TestPartitioning.cpp
@@ -324,6 +324,7 @@
             return Void();
         }
         Return<void> executeFenced(const Request&, const hidl_vec<hidl_handle>&, MeasureTiming,
+                                   const OptionalTimePoint&, const OptionalTimeoutDuration&,
                                    executeFenced_cb cb) {
             cb(ErrorStatus::DEVICE_UNAVAILABLE, hidl_handle(nullptr), nullptr);
             return Void();
diff --git a/runtime/test/TestTrivialModel.cpp b/runtime/test/TestTrivialModel.cpp
index 557b2aa..6bc2b26 100644
--- a/runtime/test/TestTrivialModel.cpp
+++ b/runtime/test/TestTrivialModel.cpp
@@ -168,7 +168,7 @@
     ANeuralNetworksEvent* event1;
     ANeuralNetworksExecution* execution1_handle = execution1.getHandle();
     ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution1_handle, nullptr, 0,
-                                                                    &event1),
+                                                                    0, &event1),
               ANEURALNETWORKS_NO_ERROR);
 
     // Start the second execution which will wait for the first one.
@@ -179,7 +179,7 @@
     ANeuralNetworksEvent* event2;
     ANeuralNetworksExecution* execution2_handle = execution2.getHandle();
     ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2_handle, &event1, 1,
-                                                                    &event2),
+                                                                    0, &event2),
               ANEURALNETWORKS_NO_ERROR);
     // Wait for the second event.
     ASSERT_EQ(ANeuralNetworksEvent_wait(event2), ANEURALNETWORKS_NO_ERROR);
diff --git a/runtime/test/TestValidation.cpp b/runtime/test/TestValidation.cpp
index 8254794..b4204be 100644
--- a/runtime/test/TestValidation.cpp
+++ b/runtime/test/TestValidation.cpp
@@ -1126,7 +1126,7 @@
             {
                 ANeuralNetworksEvent* event;
                 ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution, nullptr,
-                                                                                0, &event),
+                                                                                0, 0, &event),
                           ANEURALNETWORKS_BAD_STATE);
             }
         };
@@ -1161,7 +1161,7 @@
             case ExecutionType::FENCED: {
                 ANeuralNetworksEvent* event;
                 ASSERT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution, nullptr,
-                                                                                0, &event),
+                                                                                0, 0, &event),
                           ANEURALNETWORKS_NO_ERROR);
                 testTooLate();
                 ASSERT_EQ(ANeuralNetworksEvent_wait(event), ANEURALNETWORKS_NO_ERROR);
@@ -1611,9 +1611,9 @@
     EXPECT_EQ(ANeuralNetworksExecution_setOutput(execution1, 0, nullptr, output0, sizeof(output0)),
               ANEURALNETWORKS_NO_ERROR);
     ANeuralNetworksEvent* event1 = nullptr;
-    EXPECT_EQ(
-            ANeuralNetworksExecution_startComputeWithDependencies(execution1, nullptr, 0, &event1),
-            ANEURALNETWORKS_NO_ERROR);
+    EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution1, nullptr, 0, 0,
+                                                                    &event1),
+              ANEURALNETWORKS_NO_ERROR);
 
     EXPECT_EQ(ANeuralNetworksEvent_getSyncFenceFd(event1, nullptr),
               ANEURALNETWORKS_UNEXPECTED_NULL);
@@ -1622,16 +1622,17 @@
     ANeuralNetworksExecution* execution2;
     ANeuralNetworksEvent* event2 = nullptr;
     EXPECT_EQ(ANeuralNetworksExecution_create(mCompilation, &execution2), ANEURALNETWORKS_NO_ERROR);
-    EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(nullptr, &event1, 1, &event2),
+    EXPECT_EQ(
+            ANeuralNetworksExecution_startComputeWithDependencies(nullptr, &event1, 1, 0, &event2),
+            ANEURALNETWORKS_UNEXPECTED_NULL);
+    EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, nullptr, 1, 0,
+                                                                    &event2),
               ANEURALNETWORKS_UNEXPECTED_NULL);
-    EXPECT_EQ(
-            ANeuralNetworksExecution_startComputeWithDependencies(execution2, nullptr, 1, &event2),
-            ANEURALNETWORKS_UNEXPECTED_NULL);
-    EXPECT_EQ(
-            ANeuralNetworksExecution_startComputeWithDependencies(execution2, &event1, 1, nullptr),
-            ANEURALNETWORKS_UNEXPECTED_NULL);
+    EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, &event1, 1, 0,
+                                                                    nullptr),
+              ANEURALNETWORKS_UNEXPECTED_NULL);
     ANeuralNetworksEvent* wait_for_list[] = {event1, nullptr};
-    EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, wait_for_list, 2,
+    EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution2, wait_for_list, 2, 0,
                                                                     &event2),
               ANEURALNETWORKS_UNEXPECTED_NULL);
 
diff --git a/tools/api/NeuralNetworks.t b/tools/api/NeuralNetworks.t
index b6bfdde..8285372 100644
--- a/tools/api/NeuralNetworks.t
+++ b/tools/api/NeuralNetworks.t
@@ -588,7 +588,16 @@
  *         EXCEPTION: If the input is optional and omitted
  *         (by passing nullptr for buffer to
  *         {@link ANeuralNetworksExecution_setInput}) then it need
- *         not have a fully specified tensor operand type.</li></ul>
+ *         not have a fully specified tensor operand type.</li>
+ *     <li>The operand is a model output (see
+ *         {@link ANeuralNetworksModel_identifyInputsAndOutputs})
+ *         and is to be used with
+ *         {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ *         A fully specified tensor operand type must either be provided
+ *         to {@link ANeuralNetworksModel_addOperand}; or it must be
+ *         provided to the corresponding
+ *         {@link ANeuralNetworksExecution_setOutput}, or
+ *         {@link ANeuralNetworksExecution_setOutputFromMemory}.</li></ul>
  *
  * A tensor operand type of specified rank but some number of
  * unspecified dimensions is represented by setting dimensionCount to
@@ -1420,6 +1429,20 @@
     // such as that of the runtime itself and the IPC needed for the runtime to
     // communicate with the driver.
     ANEURALNETWORKS_DURATION_IN_DRIVER = 1,
+    // Execution time on hardware, after all dependencies have been signaled.
+    // If no dependencies specified (for example, if the execution was scheduled other
+    // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+    // reported time will be the same as ANEURALNETWORKS_DURATION_ON_HARDWARE.
+    // Available since API level 30.
+    ANEURALNETWORKS_FENCED_DURATION_ON_HARDWARE = 2,
+    // Execution time in driver, after all dependencies have been signaled. Excludes
+    // overhead such as that of the runtime itself and the IPC needed for the runtime
+    // to communicate with the driver.
+    // If no dependencies specified (for example, if the execution was scheduled other
+    // than with {@link ANeuralNetworksExecution_startComputeWithDependencies}), the
+    // reported time will be the same as ANEURALNETWORKS_DURATION_IN_DRIVER.
+    // Available since API level 30.
+    ANEURALNETWORKS_FENCED_DURATION_IN_DRIVER = 3,
 } DurationCode;
 
 /**
@@ -2272,9 +2295,10 @@
  * If the device is not able to complete the execution within the specified
  * duration, the execution must be aborted. The timeout duration begins at a
  * call to one of:
- * - {@link ANeuralNetworksExecution_startCompute}
- * - {@link ANeuralNetworksExecution_compute}
  * - {@link ANeuralNetworksExecution_burstCompute}
+ * - {@link ANeuralNetworksExecution_compute}
+ * - {@link ANeuralNetworksExecution_startCompute}
+ * - {@link ANeuralNetworksExecution_startComputeWithDependencies}
  *
  * By default (i.e., unless ANeuralNetworksExecution_setTimeout is called),
  * the timeout duration for execution is considered infinite.
@@ -2395,6 +2419,21 @@
  * normally, the execution will fail, and {@link ANeuralNetworksEvent_wait} on the returned
  * event will return an error.
  *
+ * The function will return an error if any of the execution outputs has a tensor operand type
+ * that is not fully specified.
+ *
+ * The function can be passed a timeout duration in nanoseconds.
+ * The duration begins when all waitFor sync fences have been signaled, and can be used
+ * together with {@link ANeuralNetworksExecution_setTimeout} which specifies the
+ * maximum timeout duration beginning at the call to
+ * {@link ANeuralNetworksExecution_startComputeWithDependencies}.
+ * If the duration is non-zero, the {@link ANeuralNetworksExecution} must have been created
+ * from an {@link ANeuralNetworksCompilation} which in turn was created from
+ * {@link ANeuralNetworksCompilation_createForDevices} with numDevices = 1, and
+ * the device must support execution timeout as indicated by
+ * {@link ANeuralNetworksDevice_supportsExecutionTimeout}, otherwise this
+ * function will fail with ANEURALNETWORKS_BAD_DATA.
+ *
  * See {@link ANeuralNetworksExecution} for information on multithreaded usage.
  *
  * See {@link ANeuralNetworksExecution_compute} for synchronous execution.
@@ -2405,6 +2444,9 @@
  * @param dependencies A set of depending events. The actual evaluation will not start
  *                     until all the events are signaled.
  * @param num_dependencies The number of events in the dependencies set.
+ * @param duration The maximum length of time in nanoseconds within which execution must
+ *                 complete after all dependencies are signaled. If set to 0, the timeout
+ *                 duration is considered infinite.
  * @param event The event that will be signaled on completion. event is set to
  *              NULL if there's an error.
  *
@@ -2414,7 +2456,8 @@
  */
 int ANeuralNetworksExecution_startComputeWithDependencies(
         ANeuralNetworksExecution* execution, const ANeuralNetworksEvent* const* dependencies,
-        uint32_t num_dependencies, ANeuralNetworksEvent** event) __INTRODUCED_IN(30);
+        uint32_t num_dependencies, uint64_t duration, ANeuralNetworksEvent** event)
+        __INTRODUCED_IN(30);
 
 #endif  // __ANDROID_API__ >= __ANDROID_API_R__