Cleanup NNAPI QoS Deadline

This CL does the following changes:
* In the case where a timeout duration would cause an overflow of the
  OptionalTimePoint, this CL instead clamps it to the max time point.
  Accordingly, the Overflow test result is changed from BAD_DATA to
  NO_ERROR.
* Previously hal::OptionalTimePoint was used throughout the runtime
  code. However, OptionalTimePoint cannot be directly compared against
  the current time. Instead, this CL changes the runtime deadline type
  to std::optional<std::chrono::steady_clock::time_point>, called
  std::optional<Deadline>.
* Implements a simple deadline abort within the sample driver the and
  runtime's CPU (i.e., non-driver) path.

Bug: 147925145
Test: mma
Test: CtsNNAPITestCases
Test: NeuralNetworksTest_static
Change-Id: I54ca961416313334eb3fca3f439db0974044bd6d
diff --git a/common/CpuExecutor.cpp b/common/CpuExecutor.cpp
index b28fbff..8362131 100644
--- a/common/CpuExecutor.cpp
+++ b/common/CpuExecutor.cpp
@@ -762,6 +762,9 @@
 }
 
 int CpuExecutor::executeOperation(const Operation& operation, RunTimeOperandInfo* operands) {
+    if (hasDeadlinePassed(mDeadline)) {
+        return ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT;
+    }
     if (operation.type == OperationType::IF) {
         int result = executeIfOperation(operation, operands);
         if (result != ANEURALNETWORKS_NO_ERROR) {
diff --git a/common/Utils.cpp b/common/Utils.cpp
index e9fe46e..5c5ce4f 100644
--- a/common/Utils.cpp
+++ b/common/Utils.cpp
@@ -93,36 +93,64 @@
     }
 }
 
-static uint64_t getNanosecondsSinceEpoch(const std::chrono::steady_clock::time_point& time) {
-    const auto timeSinceEpoch = time.time_since_epoch();
-    return std::chrono::duration_cast<std::chrono::nanoseconds>(timeSinceEpoch).count();
+Deadline makeDeadline(uint64_t duration) {
+    const auto maxTime = Deadline::max();
+    const auto currentTime = std::chrono::steady_clock::now();
+
+    // Create Deadline. If there would be an overflow, use the max value.
+    const uint64_t remainingNanoseconds =
+            std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime - currentTime).count();
+    if (duration > remainingNanoseconds) {
+        return maxTime;
+    }
+    return currentTime + std::chrono::nanoseconds{duration};
 }
 
-uint64_t getCurrentNanosecondsSinceEpoch() {
-    return getNanosecondsSinceEpoch(std::chrono::steady_clock::now());
+std::optional<Deadline> makeDeadline(std::optional<uint64_t> duration) {
+    return duration.has_value() ? makeDeadline(*duration) : std::optional<Deadline>{};
 }
 
-static std::pair<int, OptionalTimePoint> makeTimePoint(uint64_t duration) {
-    // Relevant time points.
-    const uint64_t maxNanosecondsSinceEpoch =
-            getNanosecondsSinceEpoch(std::chrono::steady_clock::time_point::max());
-    const uint64_t currentNanosecondsSinceEpoch = getCurrentNanosecondsSinceEpoch();
+static uint64_t getMaxNanosecondsSinceEpoch() {
+    const auto maxTime =
+            std::chrono::time_point<std::chrono::steady_clock, std::chrono::nanoseconds>::max();
+    return maxTime.time_since_epoch().count();
+}
 
-    // Check for overflow.
-    if (duration > maxNanosecondsSinceEpoch - currentNanosecondsSinceEpoch) {
-        LOG(ERROR) << "Launching execution failed due to time point overflow";
-        return {ANEURALNETWORKS_BAD_DATA, {}};
+std::optional<Deadline> makeDeadline(const OptionalTimePoint& timePoint) {
+    using Discriminator = hal::OptionalTimePoint::hidl_discriminator;
+    if (timePoint.getDiscriminator() == Discriminator::none) {
+        return std::nullopt;
+    }
+    const uint64_t nanosecondsSinceEpoch = timePoint.nanosecondsSinceEpoch();
+    const uint64_t maxNanosecondsSinceEpoch = getMaxNanosecondsSinceEpoch();
+
+    // Clamp time point to max.
+    if (nanosecondsSinceEpoch >= maxNanosecondsSinceEpoch) {
+        return Deadline::max();
     }
 
-    // Load and return OptionalTimePoint.
-    OptionalTimePoint otp;
-    otp.nanosecondsSinceEpoch(currentNanosecondsSinceEpoch + duration);
-    return {ANEURALNETWORKS_NO_ERROR, otp};
+    // Return provided time point.
+    return Deadline{std::chrono::nanoseconds{nanosecondsSinceEpoch}};
 }
 
-std::pair<int, OptionalTimePoint> makeTimePoint(std::optional<uint64_t> duration) {
-    const std::pair<int, OptionalTimePoint> empty = {ANEURALNETWORKS_NO_ERROR, {}};
-    return duration.has_value() ? makeTimePoint(*duration) : empty;
+bool hasDeadlinePassed(const std::optional<Deadline>& deadline) {
+    if (!deadline.has_value()) {
+        return false;
+    }
+    return std::chrono::steady_clock::now() >= *deadline;
+}
+
+static OptionalTimePoint makeTimePoint(const Deadline& deadline) {
+    const auto timeSinceEpoch = deadline.time_since_epoch();
+    const uint64_t nanosecondsSinceEpoch =
+            std::chrono::duration_cast<std::chrono::nanoseconds>(timeSinceEpoch).count();
+    OptionalTimePoint ret;
+    ret.nanosecondsSinceEpoch(nanosecondsSinceEpoch);
+    return ret;
+}
+
+OptionalTimePoint makeTimePoint(const std::optional<Deadline>& deadline) {
+    return deadline.has_value() ? makeTimePoint(*deadline) : OptionalTimePoint{};
 }
 
 static bool isExtensionOperandType(int32_t type) {
diff --git a/common/include/CpuExecutor.h b/common/include/CpuExecutor.h
index bf01b11..a6bf74c 100644
--- a/common/include/CpuExecutor.h
+++ b/common/include/CpuExecutor.h
@@ -155,6 +155,7 @@
         return mOutputShapes;
     }
 
+    void setDeadline(const Deadline& deadline) { mDeadline = deadline; }
     void setLoopTimeout(uint64_t duration) { mLoopTimeoutDuration = duration; }
 
    private:
@@ -188,6 +189,11 @@
     // Whether execution is finished and mOutputShapes is ready
     bool mFinished = false;
 
+    // The deadline hint for the maximum amount of time the client expects the
+    // execution will take. If this deadline is exceeded, the CpuExecutor will
+    // abort the execution if there are remaining ops to execute.
+    std::optional<Deadline> mDeadline;
+
     // The maximum amount of time in nanoseconds that can be spent executing a
     // WHILE loop.
     uint64_t mLoopTimeoutDuration = operation_while::kTimeoutNsDefault;
diff --git a/common/include/Utils.h b/common/include/Utils.h
index 4c2f358..de4f811 100644
--- a/common/include/Utils.h
+++ b/common/include/Utils.h
@@ -131,13 +131,32 @@
 #define NN_RET_CHECK_GE(x, y) NN_RET_CHECK_OP(x, y, >=)
 #define NN_RET_CHECK_GT(x, y) NN_RET_CHECK_OP(x, y, >)
 
-// Make an optional time point from an optional duration. If the operation
-// succeeds, a pair of {ANEURALNETWORKS_NO_ERROR, timepoint} is returned. If an
-// overflow occurs in this function, {ANEURALNETWORKS_BAD_DATA, empty} is
-// returned.
-std::pair<int, hal::OptionalTimePoint> makeTimePoint(std::optional<uint64_t> duration);
+// Type to represent a deadline time point across processes.
+using Deadline = std::chrono::steady_clock::time_point;
 
-uint64_t getCurrentNanosecondsSinceEpoch();
+// Make an Deadline from a duration. If the sum of the current time and the
+// duration exceeds the max time, return a time point holding the maximum
+// expressible time.
+Deadline makeDeadline(uint64_t duration);
+
+// Convenience function. If the duration is provided, this function creates a
+// Deadline using makeDeadline. If the duration is not provided, this function
+// returns std::nullopt.
+std::optional<Deadline> makeDeadline(std::optional<uint64_t> duration);
+
+// Make an optional Deadline from an OptionalTimePoint. If
+// timePoint.nanosecondsSinceEpoch cannot be represented in Deadline, return a
+// time point holding the maximum Deadline. If the OptionalTimePoint is none,
+// this function returns std::nullopt.
+std::optional<Deadline> makeDeadline(const hal::OptionalTimePoint& timePoint);
+
+// Returns true if the deadline has passed. Returns false if either the deadline
+// has not been exceeded or if the deadline is not present.
+bool hasDeadlinePassed(const std::optional<Deadline>& deadline);
+
+// Make an OptionalTimePoint from an optional Deadline. If the Deadline is not
+// provided, this function returns none for OptionalTimePoint.
+hal::OptionalTimePoint makeTimePoint(const std::optional<Deadline>& deadline);
 
 // Ensure that every user of FalseyErrorStream is linked to the
 // correct instance, using the correct LOG_TAG
diff --git a/driver/sample/SampleDriver.cpp b/driver/sample/SampleDriver.cpp
index 5eabe23..37009ae 100644
--- a/driver/sample/SampleDriver.cpp
+++ b/driver/sample/SampleDriver.cpp
@@ -253,6 +253,7 @@
 void asyncExecute(const Request& request, MeasureTiming measure, time_point driverStart,
                   const Model& model, const SampleDriver& driver,
                   const std::vector<RunTimePoolInfo>& poolInfos,
+                  const std::optional<Deadline>& deadline,
                   const OptionalTimeoutDuration& loopTimeoutDuration,
                   const sp<T_IExecutionCallback>& callback) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_INPUTS_AND_OUTPUTS,
@@ -270,6 +271,9 @@
         OptionalTimeoutDuration::hidl_discriminator::none) {
         executor.setLoopTimeout(loopTimeoutDuration.nanoseconds());
     }
+    if (deadline.has_value()) {
+        executor.setDeadline(*deadline);
+    }
     time_point driverEnd, deviceStart, deviceEnd;
     if (measure == MeasureTiming::YES) deviceStart = now();
     int n = executor.run(model, request, poolInfos, requestPoolInfos);
@@ -291,7 +295,7 @@
 template <typename T_IExecutionCallback>
 ErrorStatus executeBase(const Request& request, MeasureTiming measure, const Model& model,
                         const SampleDriver& driver, const std::vector<RunTimePoolInfo>& poolInfos,
-                        const OptionalTimePoint& deadline,
+                        const OptionalTimePoint& halDeadline,
                         const OptionalTimeoutDuration& loopTimeoutDuration,
                         const sp<T_IExecutionCallback>& callback) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION, "SampleDriver::executeBase");
@@ -308,18 +312,18 @@
         notify(callback, ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
         return ErrorStatus::INVALID_ARGUMENT;
     }
-    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none &&
-        getCurrentNanosecondsSinceEpoch() > deadline.nanosecondsSinceEpoch()) {
+    const auto deadline = makeDeadline(halDeadline);
+    if (hasDeadlinePassed(deadline)) {
         notify(callback, ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming);
         return ErrorStatus::NONE;
     }
 
     // This thread is intentionally detached because the sample driver service
     // is expected to live forever.
-    std::thread([&model, &driver, &poolInfos, request, measure, driverStart, loopTimeoutDuration,
-                 callback] {
-        asyncExecute(request, measure, driverStart, model, driver, poolInfos, loopTimeoutDuration,
-                     callback);
+    std::thread([&model, &driver, &poolInfos, request, measure, driverStart, deadline,
+                 loopTimeoutDuration, callback] {
+        asyncExecute(request, measure, driverStart, model, driver, poolInfos, deadline,
+                     loopTimeoutDuration, callback);
     }).detach();
 
     return ErrorStatus::NONE;
@@ -351,7 +355,7 @@
 static std::tuple<ErrorStatus, hidl_vec<OutputShape>, Timing> executeSynchronouslyBase(
         const Request& request, MeasureTiming measure, const Model& model,
         const SampleDriver& driver, const std::vector<RunTimePoolInfo>& poolInfos,
-        const OptionalTimePoint& deadline, const OptionalTimeoutDuration& loopTimeoutDuration) {
+        const OptionalTimePoint& halDeadline, const OptionalTimeoutDuration& loopTimeoutDuration) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
                  "SampleDriver::executeSynchronouslyBase");
     VLOG(DRIVER) << "executeSynchronouslyBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
@@ -362,8 +366,8 @@
     if (!validateRequest(request, model)) {
         return {ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
     }
-    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none &&
-        getCurrentNanosecondsSinceEpoch() > deadline.nanosecondsSinceEpoch()) {
+    const auto deadline = makeDeadline(halDeadline);
+    if (hasDeadlinePassed(deadline)) {
         return {ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming};
     }
 
@@ -381,6 +385,9 @@
         OptionalTimeoutDuration::hidl_discriminator::none) {
         executor.setLoopTimeout(loopTimeoutDuration.nanoseconds());
     }
+    if (deadline.has_value()) {
+        executor.setDeadline(*deadline);
+    }
     if (measure == MeasureTiming::YES) deviceStart = now();
     int n = executor.run(model, request, poolInfos, requestPoolInfos);
     if (measure == MeasureTiming::YES) deviceEnd = now();
@@ -418,7 +425,7 @@
 // The sample driver will finish the execution and then return.
 Return<void> SamplePreparedModel::executeFenced(
         const hal::Request& request, const hidl_vec<hidl_handle>& waitFor, MeasureTiming measure,
-        const OptionalTimePoint& deadline, const OptionalTimeoutDuration& loopTimeoutDuration,
+        const OptionalTimePoint& halDeadline, const OptionalTimeoutDuration& loopTimeoutDuration,
         const OptionalTimeoutDuration& duration, executeFenced_cb cb) {
     NNTRACE_FULL(NNTRACE_LAYER_DRIVER, NNTRACE_PHASE_EXECUTION,
                  "SamplePreparedModel::executeFenced");
@@ -431,10 +438,8 @@
         cb(ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
         return Void();
     }
-    if ((duration.getDiscriminator() != OptionalTimeoutDuration::hidl_discriminator::none &&
-         duration.nanoseconds() == 0) ||
-        (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none &&
-         getCurrentNanosecondsSinceEpoch() > deadline.nanosecondsSinceEpoch())) {
+    const auto deadline = makeDeadline(halDeadline);
+    if (hasDeadlinePassed(deadline)) {
         cb(ErrorStatus::MISSED_DEADLINE_PERSISTENT, hidl_handle(nullptr), nullptr);
         return Void();
     }
@@ -453,6 +458,15 @@
         }
     }
 
+    // Update deadline if the timeout duration is closer than the deadline.
+    auto closestDeadline = deadline;
+    if (duration.getDiscriminator() != OptionalTimeoutDuration::hidl_discriminator::none) {
+        const auto timeoutDurationDeadline = makeDeadline(duration.nanoseconds());
+        if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
+            closestDeadline = timeoutDurationDeadline;
+        }
+    }
+
     time_point driverStartAfterFence;
     if (measure == MeasureTiming::YES) driverStartAfterFence = now();
 
@@ -471,6 +485,9 @@
         OptionalTimeoutDuration::hidl_discriminator::none) {
         executor.setLoopTimeout(loopTimeoutDuration.nanoseconds());
     }
+    if (closestDeadline.has_value()) {
+        executor.setDeadline(*closestDeadline);
+    }
     if (measure == MeasureTiming::YES) deviceStart = now();
     int n = executor.run(mModel, request, mPoolInfos, requestPoolInfos);
     if (measure == MeasureTiming::YES) deviceEnd = now();
diff --git a/driver/sample/SampleDriverUtils.h b/driver/sample/SampleDriverUtils.h
index 4a441d2..d5a87a1 100644
--- a/driver/sample/SampleDriverUtils.h
+++ b/driver/sample/SampleDriverUtils.h
@@ -46,7 +46,7 @@
 template <typename T_Model, typename T_IPreparedModelCallback>
 hal::ErrorStatus prepareModelBase(const T_Model& model, const SampleDriver* driver,
                                   hal::ExecutionPreference preference, hal::Priority priority,
-                                  const hal::OptionalTimePoint& deadline,
+                                  const hal::OptionalTimePoint& halDeadline,
                                   const sp<T_IPreparedModelCallback>& callback,
                                   bool isFullModelSupported = true) {
     const uid_t userId = hardware::IPCThreadState::self()->getCallingUid();
@@ -67,11 +67,12 @@
         notify(callback, hal::ErrorStatus::INVALID_ARGUMENT, nullptr);
         return hal::ErrorStatus::NONE;
     }
-    if (deadline.getDiscriminator() != hal::OptionalTimePoint::hidl_discriminator::none &&
-        getCurrentNanosecondsSinceEpoch() > deadline.nanosecondsSinceEpoch()) {
+    const auto deadline = makeDeadline(halDeadline);
+    if (hasDeadlinePassed(deadline)) {
         notify(callback, hal::ErrorStatus::MISSED_DEADLINE_PERSISTENT, nullptr);
         return hal::ErrorStatus::NONE;
     }
+
     // asynchronously prepare the model from a new, detached thread
     std::thread([model, driver, preference, userId, priority, callback] {
         sp<SamplePreparedModel> preparedModel =
diff --git a/runtime/CompilationBuilder.cpp b/runtime/CompilationBuilder.cpp
index 8b7b1d8..8b2a269 100644
--- a/runtime/CompilationBuilder.cpp
+++ b/runtime/CompilationBuilder.cpp
@@ -56,15 +56,14 @@
     }
     // TODO validate the rest
 
-    const auto [n, timeout] = makeTimePoint(mTimeoutDuration);
-    NN_RETURN_IF_ERROR(n);
+    const auto deadline = makeDeadline(mTimeoutDuration);
 
     mFinished = true;
     if (mIsCacheInfoProvided) {
         mPlan.setCaching(&mCacheDir, mToken);
     }
     if (mPartitioning) {
-        int n = mModel->partitionTheWork(mDevices, mPreference, mPriority, timeout, &mPlan);
+        int n = mModel->partitionTheWork(mDevices, mPreference, mPriority, deadline, &mPlan);
         switch (n) {
             case ANEURALNETWORKS_NO_ERROR:
                 return n;
@@ -97,7 +96,7 @@
     VLOG(COMPILATION) << "CompilationBuilder::finish with CPU fallback";
     mPlan.reset();
     mPlan.becomeSingleStep(DeviceManager::getCpuDevice(), mModel);
-    return mPlan.finish(mPreference, mPriority, timeout);
+    return mPlan.finish(mPreference, mPriority, deadline);
 }
 
 int CompilationBuilder::setPreference(int32_t preference) {
diff --git a/runtime/ExecutionBuilder.cpp b/runtime/ExecutionBuilder.cpp
index 0e97c1a..2790839 100644
--- a/runtime/ExecutionBuilder.cpp
+++ b/runtime/ExecutionBuilder.cpp
@@ -450,7 +450,8 @@
 static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
                                          const ExecutionPlan& plan,
                                          std::shared_ptr<ExecutionPlan::Controller> controller,
-                                         bool allowFallback, const OptionalTimePoint& deadline,
+                                         bool allowFallback,
+                                         const std::optional<Deadline>& deadline,
                                          const sp<ExecutionCallback>& executionCallback) {
     CHECK(executionBuilder != nullptr);
     VLOG(EXECUTION) << "ExecutionBuilder::compute (from plan, iteratively)";
@@ -579,7 +580,8 @@
 static std::tuple<int, int, sp<hal::IFencedExecutionCallback>> startComputeFenced(
         ExecutionBuilder* executionBuilder, const ExecutionPlan& plan,
         std::shared_ptr<ExecutionPlan::Controller> controller, const std::vector<int>& waitFor,
-        uint64_t timeoutDurationAfterFence, bool allowFallback) {
+        uint64_t timeoutDurationAfterFence, const std::optional<Deadline>& deadline,
+        bool allowFallback) {
     CHECK(executionBuilder != nullptr);
     VLOG(EXECUTION) << "ExecutionBuilder::computeFenced (from plan, iteratively)";
     // Disallow fallback when the ExecutionPlan is simple on CPU.
@@ -620,7 +622,7 @@
 
         // Attempt to execute a single step of the execution.
         auto [stepN, syncFd, callback] =
-                executor->computeFenced(waitForFds, timeoutDurationAfterFence);
+                executor->computeFenced(waitForFds, timeoutDurationAfterFence, deadline);
 
         // Update waitForFds, syncFence for the next step.
         syncFence = syncFd;
@@ -690,6 +692,7 @@
             return ANEURALNETWORKS_BAD_DATA;
         }
     }
+    const auto deadline = makeDeadline(mTimeoutDuration);
     for (auto& p : mInputs) {
         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
             LOG(ERROR) << "ANeuralNetworksExecution_startComputeWithDependencies"
@@ -719,7 +722,7 @@
     VLOG(EXECUTION) << "ExecutionBuilder::computeFenced";
     int result;
     std::tie(result, mSyncFenceFd, mFencedExecutionCallback) = startComputeFenced(
-            this, *mPlan, controller, waitFor, timeoutDurationAfterFence, allowFallback);
+            this, *mPlan, controller, waitFor, timeoutDurationAfterFence, deadline, allowFallback);
     *syncFence = mSyncFenceFd;
     return result;
 }
@@ -730,13 +733,11 @@
             << "synchronizationCallback and burstBuilder cannot simultaneously be used";
 
     const bool synchronous = (synchronizationCallback == nullptr);
-
     if (!synchronous) {
         *synchronizationCallback = nullptr;
     }
 
-    const auto [timePointN, deadline] = makeTimePoint(mTimeoutDuration);
-    NN_RETURN_IF_ERROR(timePointN);
+    const auto deadline = makeDeadline(mTimeoutDuration);
 
     // TODO validate that we have full types for all inputs and outputs,
     // that the graph is not cyclic,
@@ -808,9 +809,9 @@
         } else {
             VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API)";
             std::thread asyncExecution(
-                    [this, controller, allowFallback, d = deadline, executionCallback] {
-                        asyncStartComputePartitioned(this, *mPlan, controller, allowFallback, d,
-                                                     executionCallback);
+                    [this, controller, allowFallback, deadline, executionCallback] {
+                        asyncStartComputePartitioned(this, *mPlan, controller, allowFallback,
+                                                     deadline, executionCallback);
                     });
             executionCallback->bindThread(std::move(asyncExecution));
         }
@@ -996,7 +997,7 @@
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> StepExecutor::compute(
-        const OptionalTimePoint& deadline,
+        const std::optional<Deadline>& deadline,
         const std::shared_ptr<ExecutionBurstController>& burstController) {
     CHECK(mPreparedModel != nullptr);
 
@@ -1016,7 +1017,8 @@
 }
 
 std::tuple<int, int, sp<hal::IFencedExecutionCallback>> StepExecutor::computeFenced(
-        const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence) {
+        const std::vector<int>& waitFor, uint64_t timeoutDurationAfterFence,
+        const std::optional<Deadline>& deadline) {
     CHECK(mPreparedModel != nullptr);
 
     if (VLOG_IS_ON(EXECUTION)) {
@@ -1025,10 +1027,6 @@
     }
 
     const MeasureTiming measure = measureTiming(mExecutionBuilder);
-    const auto [timePointN, deadline] = makeTimePoint(mExecutionBuilder->getTimeoutDuration());
-    if (timePointN != ANEURALNETWORKS_NO_ERROR) {
-        return {timePointN, -1, nullptr};
-    }
     const OptionalTimeoutDuration loopTimeoutDuration =
             makeTimeoutDuration(mExecutionBuilder->getLoopTimeoutDuration());
     OptionalTimeoutDuration optionalTimeoutDurationAfterFence;
diff --git a/runtime/ExecutionBuilder.h b/runtime/ExecutionBuilder.h
index 947b73f..180444b 100644
--- a/runtime/ExecutionBuilder.h
+++ b/runtime/ExecutionBuilder.h
@@ -238,7 +238,7 @@
 
     // Executes using the (driver, preparedModel) specified at construction time.
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> compute(
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
 
     // Re-compiles and executes using the CPU, regardless of the (driver,
@@ -250,7 +250,8 @@
     // Perform fenced execution and return error_code, sync_fence_fd and a
     // callback.
     std::tuple<int, int, sp<hal::IFencedExecutionCallback>> computeFenced(
-            const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence);
+            const std::vector<int>& wait_for, uint64_t timeoutDurationAfterFence,
+            const std::optional<Deadline>& deadline);
 
    private:
     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
diff --git a/runtime/ExecutionPlan.cpp b/runtime/ExecutionPlan.cpp
index 64efea1..bf9bcb7 100644
--- a/runtime/ExecutionPlan.cpp
+++ b/runtime/ExecutionPlan.cpp
@@ -69,8 +69,9 @@
 // operation indices to be executed (COMPOUND body). The token will be re-hashed further by the
 // device name, device version string, and the execution preference in this function.
 int compile(const Device& device, const ModelBuilder& model, int executionPreference,
-            int compilationPriority, const OptionalTimePoint& deadline, const std::string& cacheDir,
-            TokenHasher* token, std::shared_ptr<PreparedModel>* preparedModel) {
+            int compilationPriority, const std::optional<Deadline>& deadline,
+            const std::string& cacheDir, TokenHasher* token,
+            std::shared_ptr<PreparedModel>* preparedModel) {
     CHECK(token != nullptr);
     CHECK(preparedModel != nullptr);
     *preparedModel = nullptr;
@@ -603,8 +604,8 @@
 
 int ExecutionPlan::CompoundBody::finish(const SourceModels* sourceModels,
                                         int32_t executionPreference, int32_t priority,
-                                        const OptionalTimePoint& deadline) {
-    CHECK(deadline.getDiscriminator() == OptionalTimePoint::hidl_discriminator::none);
+                                        const std::optional<Deadline>& deadline) {
+    CHECK(!deadline.has_value());
     const ModelBuilder* mainModel = sourceModels->getModel(kMainModelInSourceModels);
 
     auto containsUnknownSize = [sourceModels](const std::vector<SourceOperandIndex>& operands) {
@@ -695,7 +696,7 @@
 }
 
 int ExecutionPlan::SimpleBody::finish(const SourceModels*, int32_t executionPreference,
-                                      int32_t priority, const OptionalTimePoint& deadline) {
+                                      int32_t priority, const std::optional<Deadline>& deadline) {
     CHECK(mDevice != nullptr);
     VLOG(COMPILATION) << "ExecutionPlan::SimpleBody::finish, compilation";
     const int n = compile(*mDevice, *mModel, executionPreference, priority, deadline, *mCacheDir,
@@ -705,7 +706,7 @@
 }
 
 int ExecutionPlan::finish(int32_t executionPreference, int32_t priority,
-                          const OptionalTimePoint& deadline) {
+                          const std::optional<Deadline>& deadline) {
     CHECK(mBody != nullptr);
     return mBody->finish(&getSourceModels(), executionPreference, priority, deadline);
 }
@@ -1470,7 +1471,8 @@
 
 int ModelBuilder::partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
                                    uint32_t preference, uint32_t priority,
-                                   const OptionalTimePoint& deadline, ExecutionPlan* plan) const {
+                                   const std::optional<Deadline>& deadline,
+                                   ExecutionPlan* plan) const {
     uint32_t sourceModelIndex = plan->getSourceModels().addModel(this);
     NN_RETURN_IF_ERROR(partitionTheWorkInternal(sourceModelIndex, devices, preference, priority,
                                                 deadline, plan));
@@ -1486,7 +1488,7 @@
 int ModelBuilder::partitionTheWorkInternal(uint32_t sourceModelIndex,
                                            const std::vector<std::shared_ptr<Device>>& devices,
                                            uint32_t preference, uint32_t priority,
-                                           const OptionalTimePoint& deadline,
+                                           const std::optional<Deadline>& deadline,
                                            ExecutionPlan* plan) const {
     // This function uses a heuristic approach to partitioning the graph.
     // It should be good enough for the first release.
diff --git a/runtime/ExecutionPlan.h b/runtime/ExecutionPlan.h
index 0125f41..8156f99 100644
--- a/runtime/ExecutionPlan.h
+++ b/runtime/ExecutionPlan.h
@@ -521,7 +521,7 @@
     void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
 
     int finish(int32_t executionPreference, int32_t priority,
-               const hal::OptionalTimePoint& deadline);
+               const std::optional<Deadline>& deadline);
 
     void recordTemporaryDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex);
 
@@ -600,7 +600,7 @@
         virtual ~Body() {}
         virtual void dump() const = 0;
         virtual int finish(const SourceModels* sourceModels, int32_t executionPreference,
-                           int32_t priority, const hal::OptionalTimePoint& deadline) = 0;
+                           int32_t priority, const std::optional<Deadline>& deadline) = 0;
         virtual bool hasStepModelOutputsOfUnknownSize() const = 0;
         virtual void forEachStepRoleOfInput(uint32_t index,
                                             const StepRoleCallback& callback) const = 0;
@@ -616,7 +616,7 @@
 
         void dump() const override;
         int finish(const SourceModels* sourceModels, int32_t executionPreference, int32_t priority,
-                   const hal::OptionalTimePoint& deadline) override;
+                   const std::optional<Deadline>& deadline) override;
         bool hasStepModelOutputsOfUnknownSize() const override { return false; }
         void forEachStepRoleOfInput(uint32_t index,
                                     const StepRoleCallback& callback) const override;
@@ -634,7 +634,7 @@
     struct CompoundBody : Body {
         void dump() const override;
         int finish(const SourceModels* sourceModels, int32_t executionPreference, int32_t priority,
-                   const hal::OptionalTimePoint& deadline) override;
+                   const std::optional<Deadline>& deadline) override;
         bool hasStepModelOutputsOfUnknownSize() const override {
             return mHasStepModelOutputOfUnknownSize;
         }
diff --git a/runtime/Manager.cpp b/runtime/Manager.cpp
index 094d8d9..42f9262 100644
--- a/runtime/Manager.cpp
+++ b/runtime/Manager.cpp
@@ -98,7 +98,7 @@
 
     std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
-            const OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<Deadline>& deadline, const std::string& cacheDir,
             const std::optional<CacheToken>& maybeToken) const override;
 
     std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor& desc) const override;
@@ -132,14 +132,14 @@
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
-            const OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const OptionalTimeoutDuration& loopTimeoutDuration) const override;
 
     std::tuple<int, int, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::vector<int>& waitFor, MeasureTiming measure,
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const OptionalTimeoutDuration& loopTimeoutDuration,
             const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const override;
 
@@ -237,7 +237,7 @@
 
 std::pair<int, std::shared_ptr<PreparedModel>> DriverDevice::prepareModel(
         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
-        const OptionalTimePoint& deadline, const std::string& cacheDir,
+        const std::optional<Deadline>& deadline, const std::string& cacheDir,
         const std::optional<CacheToken>& maybeToken) const {
     const auto [n, preparedModel] = kInterface->prepareModel(makeModel, preference, priority,
                                                              deadline, cacheDir, maybeToken);
@@ -318,7 +318,7 @@
         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
         const MemoryTracker& memories,
         const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
-        const OptionalTimePoint& deadline,
+        const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration) const {
     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
 
@@ -425,7 +425,7 @@
 DriverPreparedModel::executeFenced(
         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
         const MemoryTracker& memories, const std::vector<int>& waitFor, hal::MeasureTiming measure,
-        const hal::OptionalTimePoint& deadline, const OptionalTimeoutDuration& loopTimeoutDuration,
+        const std::optional<Deadline>& deadline, const OptionalTimeoutDuration& loopTimeoutDuration,
         const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const {
     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd > 0; }));
@@ -569,7 +569,7 @@
 
     std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
-            const OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<Deadline>& deadline, const std::string& cacheDir,
             const std::optional<CacheToken>& maybeToken) const override;
 
     std::pair<int, std::unique_ptr<Memory>> allocate(const MemoryDescriptor&) const override {
@@ -604,7 +604,7 @@
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::shared_ptr<ExecutionBurstController>& burstController, MeasureTiming measure,
-            const OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const OptionalTimeoutDuration& loopTimeoutDuration) const override;
 
     std::shared_ptr<ExecutionBurstController> configureExecutionBurst(
@@ -616,7 +616,7 @@
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::vector<int>& wait_for, MeasureTiming measure,
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const OptionalTimeoutDuration& loopTimeoutDuration,
             const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const override;
 
@@ -646,7 +646,7 @@
 
 std::pair<int, std::shared_ptr<PreparedModel>> CpuDevice::prepareModel(
         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
-        const OptionalTimePoint& deadline, const std::string& /*cacheDir*/,
+        const std::optional<Deadline>& deadline, const std::string& /*cacheDir*/,
         const std::optional<CacheToken>& maybeToken) const {
     CHECK(!maybeToken.has_value())
             << "Should never call prepareModel with cache information on CpuDevice";
@@ -656,8 +656,8 @@
         !validatePriority(priority)) {
         return {ANEURALNETWORKS_OP_FAILED, nullptr};
     }
-    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
-        return {ANEURALNETWORKS_BAD_DATA, nullptr};
+    if (hasDeadlinePassed(deadline)) {
+        return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, nullptr};
     }
 
     return CpuPreparedModel::create(model);
@@ -678,6 +678,7 @@
         const Model& model, const Request& request,
         const std::vector<RunTimePoolInfo>& modelPoolInfos,
         const std::vector<RunTimePoolInfo>& requestPoolInfos,
+        const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration) {
     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
     CpuExecutor executor;
@@ -685,6 +686,9 @@
         OptionalTimeoutDuration::hidl_discriminator::none) {
         executor.setLoopTimeout(loopTimeoutDuration.nanoseconds());
     }
+    if (deadline.has_value()) {
+        executor.setDeadline(*deadline);
+    }
     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
     const auto& outputShapes = executor.getOutputShapes();
     return {err, outputShapes, kNoTiming};
@@ -694,9 +698,9 @@
 CpuPreparedModel::executeFenced(const std::vector<ModelArgumentInfo>& inputs,
                                 const std::vector<ModelArgumentInfo>& outputs,
                                 const MemoryTracker& memories, const std::vector<int>& waitFor,
-                                hal::MeasureTiming measure, const hal::OptionalTimePoint&,
+                                hal::MeasureTiming measure, const std::optional<Deadline>& deadline,
                                 const OptionalTimeoutDuration& loopTimeoutDuration,
-                                const hal::OptionalTimeoutDuration&) const {
+                                const hal::OptionalTimeoutDuration& duration) const {
     VLOG(EXECUTION)
             << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
     for (int syncFd : waitFor) {
@@ -708,8 +712,18 @@
             }
         }
     }
-    const auto [result, outputShapes, timing] =
-            execute(inputs, outputs, memories, nullptr, measure, {}, loopTimeoutDuration);
+
+    // Update deadline if the timeout duration is closer than the deadline.
+    auto closestDeadline = deadline;
+    if (duration.getDiscriminator() != OptionalTimeoutDuration::hidl_discriminator::none) {
+        const auto timeoutDurationDeadline = makeDeadline(duration.nanoseconds());
+        if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
+            closestDeadline = timeoutDurationDeadline;
+        }
+    }
+
+    const auto [result, outputShapes, timing] = execute(inputs, outputs, memories, nullptr, measure,
+                                                        closestDeadline, loopTimeoutDuration);
     return {result, -1, nullptr, timing};
 }
 
@@ -724,10 +738,10 @@
         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
         const MemoryTracker& memories,
         const std::shared_ptr<ExecutionBurstController>& /*burstController*/,
-        MeasureTiming /*measure*/, const OptionalTimePoint& deadline,
+        MeasureTiming /*measure*/, const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration) const {
-    if (deadline.getDiscriminator() != OptionalTimePoint::hidl_discriminator::none) {
-        return {ANEURALNETWORKS_BAD_DATA, {}, kNoTiming};
+    if (hasDeadlinePassed(deadline)) {
+        return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, kNoTiming};
     }
 
     std::vector<RunTimePoolInfo> requestPoolInfos;
@@ -768,14 +782,15 @@
         // TODO(mikie): this could have NNTRACE so we could measure the overhead
         //              of spinning up a new thread.
         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
-        std::thread([this, &request, &requestPoolInfos, &loopTimeoutDuration, &result] {
-            result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos,
+        std::thread([this, &request, &requestPoolInfos, &deadline, &loopTimeoutDuration, &result] {
+            result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
                                   loopTimeoutDuration);
         }).join();
         return result;
     }
 
-    return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, loopTimeoutDuration);
+    return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
+                        loopTimeoutDuration);
 }
 
 DeviceManager* DeviceManager::get() {
diff --git a/runtime/Manager.h b/runtime/Manager.h
index 197fe95..f634c2d 100644
--- a/runtime/Manager.h
+++ b/runtime/Manager.h
@@ -58,7 +58,7 @@
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::shared_ptr<ExecutionBurstController>& burstController,
-            hal::MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+            hal::MeasureTiming measure, const std::optional<Deadline>& deadline,
             const hal::OptionalTimeoutDuration& loopTimeoutDuration) const = 0;
 
     // Perform fenced computation with given input/output argument info and memory pools.
@@ -68,7 +68,7 @@
             const std::vector<ModelArgumentInfo>& inputs,
             const std::vector<ModelArgumentInfo>& outputs, const MemoryTracker& memories,
             const std::vector<int>& waitFor, hal::MeasureTiming measure,
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const hal::OptionalTimeoutDuration& loopTimeoutDuration,
             const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) const = 0;
 
@@ -104,7 +104,7 @@
 
     virtual std::pair<int, std::shared_ptr<PreparedModel>> prepareModel(
             const hal::ModelFactory& makeModel, hal::ExecutionPreference preference,
-            hal::Priority priority, const hal::OptionalTimePoint& deadline,
+            hal::Priority priority, const std::optional<Deadline>& deadline,
             const std::string& cacheDir,
             const std::optional<hal::CacheToken>& maybeToken) const = 0;
 
diff --git a/runtime/ModelBuilder.h b/runtime/ModelBuilder.h
index 83dd1c8..78c4501 100644
--- a/runtime/ModelBuilder.h
+++ b/runtime/ModelBuilder.h
@@ -127,7 +127,7 @@
     }
 
     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference,
-                         uint32_t priority, const hal::OptionalTimePoint& deadline,
+                         uint32_t priority, const std::optional<Deadline>& deadline,
                          ExecutionPlan* plan) const;
 
    private:
@@ -153,7 +153,8 @@
     int partitionTheWorkInternal(uint32_t sourceModelIndex,
                                  const std::vector<std::shared_ptr<Device>>& devices,
                                  uint32_t preference, uint32_t priority,
-                                 const hal::OptionalTimePoint& deadline, ExecutionPlan* plan) const;
+                                 const std::optional<Deadline>& deadline,
+                                 ExecutionPlan* plan) const;
 
     // Return true if either mCompleteModel or mInvalidModel is true.
     bool badState(const char* name);
diff --git a/runtime/VersionedInterfaces.cpp b/runtime/VersionedInterfaces.cpp
index cb72281..794b647 100644
--- a/runtime/VersionedInterfaces.cpp
+++ b/runtime/VersionedInterfaces.cpp
@@ -201,7 +201,7 @@
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::executeAsynchronously(
-        const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline,
+        const Request& request, MeasureTiming measure, const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration) const {
     const auto failDeadObject = []() -> std::tuple<int, std::vector<OutputShape>, Timing> {
         return {ANEURALNETWORKS_DEAD_OBJECT, {}, kNoTiming};
@@ -221,7 +221,8 @@
 
     // version 1.3+ HAL
     if (mPreparedModelV1_3 != nullptr) {
-        Return<ErrorStatus> ret = mPreparedModelV1_3->execute_1_3(request, measure, deadline,
+        const auto otp = makeTimePoint(deadline);
+        Return<ErrorStatus> ret = mPreparedModelV1_3->execute_1_3(request, measure, otp,
                                                                   loopTimeoutDuration, callback);
         if (ret.isDeadObject()) {
             LOG(ERROR) << "execute_1_3 failure: " << ret.description();
@@ -293,7 +294,7 @@
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::executeSynchronously(
-        const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline,
+        const Request& request, MeasureTiming measure, const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration) const {
     const std::tuple<int, std::vector<OutputShape>, Timing> kDeadObject = {
             ANEURALNETWORKS_DEAD_OBJECT, {}, kNoTiming};
@@ -302,8 +303,9 @@
     // version 1.3+ HAL
     if (mPreparedModelV1_3 != nullptr) {
         std::tuple<int, std::vector<OutputShape>, Timing> result;
+        const auto otp = makeTimePoint(deadline);
         Return<void> ret = mPreparedModelV1_3->executeSynchronously_1_3(
-                request, measure, deadline, loopTimeoutDuration,
+                request, measure, otp, loopTimeoutDuration,
                 [&result](ErrorStatus error, const hidl_vec<OutputShape>& outputShapes,
                           const Timing& timing) {
                     result = getExecutionResult(error, outputShapes, timing);
@@ -351,7 +353,7 @@
 }
 
 std::tuple<int, std::vector<OutputShape>, Timing> VersionedIPreparedModel::execute(
-        const Request& request, MeasureTiming measure, const OptionalTimePoint& deadline,
+        const Request& request, MeasureTiming measure, const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration, bool preferSynchronous) const {
     if (preferSynchronous) {
         VLOG(EXECUTION) << "Before executeSynchronously() " << SHOW_IF_DEBUG(toString(request));
@@ -407,7 +409,7 @@
 std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing>
 VersionedIPreparedModel::executeFenced(
         const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
-        MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+        MeasureTiming measure, const std::optional<Deadline>& deadline,
         const OptionalTimeoutDuration& loopTimeoutDuration,
         const hal::OptionalTimeoutDuration& timeoutDurationAfterFence) {
     // version 1.3+ HAL
@@ -417,8 +419,9 @@
     hal::Timing timing = {UINT64_MAX, UINT64_MAX};
     if (mPreparedModelV1_3 != nullptr) {
         ErrorStatus errorStatus;
+        const auto otp = makeTimePoint(deadline);
         Return<void> ret = mPreparedModelV1_3->executeFenced(
-                request, waitFor, measure, deadline, loopTimeoutDuration, timeoutDurationAfterFence,
+                request, waitFor, measure, otp, loopTimeoutDuration, timeoutDurationAfterFence,
                 [&syncFence, &errorStatus, &dispatchCallback](
                         ErrorStatus error, const hidl_handle& handle,
                         const sp<hal::IFencedExecutionCallback>& callback) {
@@ -1185,7 +1188,7 @@
 
 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> VersionedIDevice::prepareModelInternal(
         const Model& model, ExecutionPreference preference, Priority priority,
-        const OptionalTimePoint& deadline, const std::string& cacheDir,
+        const std::optional<Deadline>& deadline, const std::string& cacheDir,
         const std::optional<CacheToken>& maybeToken) const {
     // Note that some work within VersionedIDevice will be subtracted from the IPC layer
     NNTRACE_FULL(NNTRACE_LAYER_IPC, NNTRACE_PHASE_COMPILATION, "prepareModel");
@@ -1209,12 +1212,13 @@
 
     // If 1.3 device, try preparing model
     if (getDevice<V1_3::IDevice>() != nullptr) {
+        const auto otp = makeTimePoint(deadline);
         const Return<ErrorStatus> ret = recoverable<ErrorStatus, V1_3::IDevice>(
                 __FUNCTION__,
-                [&model, preference, priority, &deadline, &modelCache, &dataCache, &token,
+                [&model, preference, priority, &otp, &modelCache, &dataCache, &token,
                  &callback](const sp<V1_3::IDevice>& device) {
-                    return device->prepareModel_1_3(model, preference, priority, deadline,
-                                                    modelCache, dataCache, token, callback);
+                    return device->prepareModel_1_3(model, preference, priority, otp, modelCache,
+                                                    dataCache, token, callback);
                 },
                 callback);
         if (ret.isDeadObject()) {
@@ -1366,7 +1370,7 @@
 }
 
 std::pair<int, std::shared_ptr<VersionedIPreparedModel>>
-VersionedIDevice::prepareModelFromCacheInternal(const OptionalTimePoint& deadline,
+VersionedIDevice::prepareModelFromCacheInternal(const std::optional<Deadline>& deadline,
                                                 const std::string& cacheDir,
                                                 const CacheToken& token) const {
     // Note that some work within VersionedIDevice will be subtracted from the IPC layer
@@ -1384,12 +1388,13 @@
 
     // version 1.3+ HAL
     if (getDevice<V1_3::IDevice>() != nullptr) {
+        const auto otp = makeTimePoint(deadline);
         const sp<PreparedModelCallback> callback = new PreparedModelCallback();
         const Return<ErrorStatus> ret = recoverable<ErrorStatus, V1_3::IDevice>(
                 __FUNCTION__,
-                [&deadline, &modelCache, &dataCache, &token,
+                [&otp, &modelCache, &dataCache, &token,
                  &callback](const sp<V1_3::IDevice>& device) {
-                    return device->prepareModelFromCache_1_3(deadline, modelCache, dataCache, token,
+                    return device->prepareModelFromCache_1_3(otp, modelCache, dataCache, token,
                                                              callback);
                 },
                 callback);
@@ -1447,7 +1452,7 @@
 
 std::pair<int, std::shared_ptr<VersionedIPreparedModel>> VersionedIDevice::prepareModel(
         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
-        const OptionalTimePoint& deadline, const std::string& cacheDir,
+        const std::optional<Deadline>& deadline, const std::string& cacheDir,
         const std::optional<CacheToken>& maybeToken) const {
     // Attempt to compile from cache if token is present.
     if (maybeToken.has_value()) {
diff --git a/runtime/VersionedInterfaces.h b/runtime/VersionedInterfaces.h
index 4c4b445..2d4a243 100644
--- a/runtime/VersionedInterfaces.h
+++ b/runtime/VersionedInterfaces.h
@@ -31,6 +31,7 @@
 
 #include "Callbacks.h"
 #include "HalInterfaces.h"
+#include "Utils.h"
 
 namespace android {
 namespace nn {
@@ -218,7 +219,7 @@
      */
     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModel(
             const hal::ModelFactory& makeModel, hal::ExecutionPreference preference, hal::Priority,
-            const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<Deadline>& deadline, const std::string& cacheDir,
             const std::optional<hal::CacheToken>& maybeToken) const;
 
     /**
@@ -399,10 +400,10 @@
     // internal methods to prepare a model
     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelInternal(
             const hal::Model& model, hal::ExecutionPreference preference, hal::Priority priority,
-            const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<Deadline>& deadline, const std::string& cacheDir,
             const std::optional<hal::CacheToken>& maybeToken) const;
     std::pair<int, std::shared_ptr<VersionedIPreparedModel>> prepareModelFromCacheInternal(
-            const hal::OptionalTimePoint& deadline, const std::string& cacheDir,
+            const std::optional<Deadline>& deadline, const std::string& cacheDir,
             const hal::CacheToken& token) const;
 
     /**
@@ -677,7 +678,7 @@
      */
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> execute(
             const hal::Request& request, hal::MeasureTiming measure,
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const hal::OptionalTimeoutDuration& loopTimeoutDuration, bool preferSynchronous) const;
 
     /**
@@ -720,12 +721,12 @@
      * any data object referenced by 'request' (described by the
      * {@link @1.0::DataLocation} of a {@link @1.0::RequestArgument}).
      *
-     * executeFenced can be called with an optional deadline and an optional duration.
+     * executeFenced may be called with an optional deadline and an optional duration.
      * If the execution is not able to completed before the provided deadline or within
      * the timeout duration, whichever comes earlier, the
      * execution may be aborted, and either {@link
      * ErrorStatus::MISSED_DEADLINE_TRANSIENT} or {@link
-     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} must be returned. The error due
+     * ErrorStatus::MISSED_DEADLINE_PERSISTENT} may be returned. The error due
      * to an abort must be sent the same way as other errors, described above.
      *
      * Any number of calls to the executeFenced, execute* and executeSynchronously*
@@ -761,7 +762,7 @@
      */
     std::tuple<int, hal::hidl_handle, sp<hal::IFencedExecutionCallback>, hal::Timing> executeFenced(
             const hal::Request& request, const hal::hidl_vec<hal::hidl_handle>& waitFor,
-            hal::MeasureTiming measure, const hal::OptionalTimePoint& deadline,
+            hal::MeasureTiming measure, const std::optional<Deadline>& deadline,
             const hal::OptionalTimeoutDuration& loopTimeoutDuration,
             const hal::OptionalTimeoutDuration& timeoutDurationAfterFence);
 
@@ -770,11 +771,11 @@
 
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeAsynchronously(
             const hal::Request& request, hal::MeasureTiming timing,
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const hal::OptionalTimeoutDuration& loopTimeoutDuration) const;
     std::tuple<int, std::vector<hal::OutputShape>, hal::Timing> executeSynchronously(
             const hal::Request& request, hal::MeasureTiming measure,
-            const hal::OptionalTimePoint& deadline,
+            const std::optional<Deadline>& deadline,
             const hal::OptionalTimeoutDuration& loopTimeoutDuration) const;
 
     /**
diff --git a/runtime/test/TestPartitioning.cpp b/runtime/test/TestPartitioning.cpp
index 98521d0..c6eadcd 100644
--- a/runtime/test/TestPartitioning.cpp
+++ b/runtime/test/TestPartitioning.cpp
@@ -137,6 +137,7 @@
 
 using namespace android::nn::hal;
 using CompilationBuilder = ::android::nn::CompilationBuilder;
+using Deadline = ::android::nn::Deadline;
 using Device = ::android::nn::Device;
 using DeviceManager = ::android::nn::DeviceManager;
 using ExecutePreference = ::android::nn::test_wrapper::ExecutePreference;
@@ -665,7 +666,7 @@
     // Run the partitioning algorithm to create an ExecutionPlan.
     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
                          ExecutePreference preference, ExecutePriority priority,
-                         const OptionalTimePoint& deadline, ExecutionPlan* plan) {
+                         const std::optional<Deadline>& deadline, ExecutionPlan* plan) {
         return reinterpret_cast<ModelBuilder*>(getHandle())
                 ->partitionTheWork(devices, static_cast<uint32_t>(preference),
                                    static_cast<int32_t>(priority), deadline, plan);
diff --git a/runtime/test/TestValidation.cpp b/runtime/test/TestValidation.cpp
index 12debbd..9e1f910 100644
--- a/runtime/test/TestValidation.cpp
+++ b/runtime/test/TestValidation.cpp
@@ -2099,7 +2099,7 @@
                 n == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT);
 }
 
-TEST_F(ValidationTestCompilationForDevices_1, SetTimeoutOverflow) {
+TEST_F(ValidationTestCompilationForDevices_1, SetTimeoutMaximum) {
     if (!mCompilation) {
         return;
     }
@@ -2107,7 +2107,7 @@
     constexpr uint64_t duration = std::numeric_limits<uint64_t>::max();
     EXPECT_EQ(ANeuralNetworksCompilation_setTimeout(mCompilation, duration),
               ANEURALNETWORKS_NO_ERROR);
-    EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_BAD_DATA);
+    EXPECT_EQ(ANeuralNetworksCompilation_finish(mCompilation), ANEURALNETWORKS_NO_ERROR);
 }
 
 class ValidationTestCompilationForDevices_2 : public ValidationTestModel {
@@ -2423,12 +2423,12 @@
     }
 }
 
-enum class TimeoutDurationType { SHORT, OVERFLOW };
+enum class TimeoutDurationType { SHORT, MAXIMUM };
 uint64_t createTimeoutDuration(TimeoutDurationType type) {
     switch (type) {
         case TimeoutDurationType::SHORT:
             return kShortWaitInNanoseconds;
-        case TimeoutDurationType::OVERFLOW:
+        case TimeoutDurationType::MAXIMUM:
             return std::numeric_limits<uint64_t>::max();
     }
     LOG(FATAL) << "Invalid TimeoutDurationType: " << static_cast<int>(type);
@@ -2468,16 +2468,15 @@
         EXPECT_EQ(ANeuralNetworksExecution_setTimeout(execution, timeoutDuration),
                   ANEURALNETWORKS_NO_ERROR);
 
-        const auto checkResult = [timeoutDurationType](int launchCode, int resultCode) {
+        const auto checkResult = [timeoutDurationType](int n) {
             switch (timeoutDurationType) {
                 case TimeoutDurationType::SHORT:
-                    EXPECT_EQ(launchCode, ANEURALNETWORKS_NO_ERROR);
-                    EXPECT_TRUE(resultCode == ANEURALNETWORKS_NO_ERROR ||
-                                resultCode == ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT ||
-                                resultCode == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT);
+                    EXPECT_TRUE(n == ANEURALNETWORKS_NO_ERROR ||
+                                n == ANEURALNETWORKS_MISSED_DEADLINE_TRANSIENT ||
+                                n == ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT);
                     return;
-                case TimeoutDurationType::OVERFLOW:
-                    EXPECT_EQ(launchCode, ANEURALNETWORKS_BAD_DATA);
+                case TimeoutDurationType::MAXIMUM:
+                    EXPECT_EQ(n, ANEURALNETWORKS_NO_ERROR);
                     return;
             }
             LOG(FATAL) << "Invalid TimeoutDurationType: " << static_cast<int>(timeoutDurationType);
@@ -2487,34 +2486,30 @@
         switch (executionType) {
             case ExecutionType::ASYNC: {
                 ANeuralNetworksEvent* event = nullptr;
-                const int launchCode = ANeuralNetworksExecution_startCompute(execution, &event);
-                const int resultCode =
-                        event ? ANeuralNetworksEvent_wait(event) : ANEURALNETWORKS_BAD_DATA;
-                checkResult(launchCode, resultCode);
+                EXPECT_EQ(ANeuralNetworksExecution_startCompute(execution, &event),
+                          ANEURALNETWORKS_NO_ERROR);
+                checkResult(ANeuralNetworksEvent_wait(event));
                 ANeuralNetworksEvent_free(event);
                 break;
             }
             case ExecutionType::SYNC: {
-                const int n = ANeuralNetworksExecution_compute(execution);
-                checkResult(n, n);
+                checkResult(ANeuralNetworksExecution_compute(execution));
                 break;
             }
             case ExecutionType::BURST: {
                 ANeuralNetworksBurst* burst;
                 ASSERT_EQ(ANeuralNetworksBurst_create(compilation, &burst),
                           ANEURALNETWORKS_NO_ERROR);
-                const int n = ANeuralNetworksExecution_burstCompute(execution, burst);
-                checkResult(n, n);
+                checkResult(ANeuralNetworksExecution_burstCompute(execution, burst));
                 ANeuralNetworksBurst_free(burst);
                 break;
             }
             case ExecutionType::FENCED: {
                 ANeuralNetworksEvent* event = nullptr;
-                const int launchCode = ANeuralNetworksExecution_startComputeWithDependencies(
-                        execution, nullptr, 0, 0, &event);
-                const int resultCode =
-                        event ? ANeuralNetworksEvent_wait(event) : ANEURALNETWORKS_BAD_DATA;
-                checkResult(launchCode, resultCode);
+                EXPECT_EQ(ANeuralNetworksExecution_startComputeWithDependencies(execution, nullptr,
+                                                                                0, 0, &event),
+                          ANEURALNETWORKS_NO_ERROR);
+                checkResult(ANeuralNetworksEvent_wait(event));
                 ANeuralNetworksEvent_free(event);
                 break;
             }
@@ -2530,8 +2525,8 @@
     runExecutionSetTimeoutTest(mCompilation, TimeoutDurationType::SHORT);
 }
 
-TEST_F(ValidationTestCompilationForDevices_1, ExecutionSetTimeoutOverflow) {
-    runExecutionSetTimeoutTest(mCompilation, TimeoutDurationType::OVERFLOW);
+TEST_F(ValidationTestCompilationForDevices_1, ExecutionSetTimeoutMaximum) {
+    runExecutionSetTimeoutTest(mCompilation, TimeoutDurationType::MAXIMUM);
 }
 
 TEST_F(ValidationTest, CreateMemoryDesc) {