Ping client Binders without autosuspend lock held

This fixes a deadlock situation:
- system_server is calling acquire/releaseWakeLock on the system suspend
  HAL, but is blocked because mAutosuspendLock is currently held
- The system suspend HAL's autosuspend thread is holding the
  mAutosuspendLock, but blocked in pingBinder(). If system_server's
  Binder thread pool is exhausted and all Binder threads are waiting on
  the acquire/releaseWakeLock call to return.

This change introduces a new lock to protect the list of client tokens.
When checking client liveness by pinging the Binder tokens, the
mAutosuspendLock must not be held, to avoid the circular dependency
described above.

The lock ordering is mAutosuspendClientTokensLock > mAutosuspendLock,
ie. mAutosuspendClientTokensLock must be locked before mAutosuspendLock.

Bug: 241476033
Bug: 237496877
Bug: 241283953
Test: atest SystemSuspendV1_0UnitTest SystemSuspendV1_0AidlTest
Merged-In: Ia480d1f77d632d7c839a1012f5cd7e7555130406
Change-Id: Ia480d1f77d632d7c839a1012f5cd7e7555130406
diff --git a/suspend/1.0/default/Android.bp b/suspend/1.0/default/Android.bp
index 9bcbc3c..eca48be 100644
--- a/suspend/1.0/default/Android.bp
+++ b/suspend/1.0/default/Android.bp
@@ -30,6 +30,7 @@
     cflags: [
         "-Wall",
         "-Werror",
+        "-Wthread-safety",
     ],
     cpp_std: "c++17",
 }
diff --git a/suspend/1.0/default/SystemSuspend.cpp b/suspend/1.0/default/SystemSuspend.cpp
index a0e08b4..5e5a745 100644
--- a/suspend/1.0/default/SystemSuspend.cpp
+++ b/suspend/1.0/default/SystemSuspend.cpp
@@ -158,13 +158,14 @@
 }
 
 bool SystemSuspend::enableAutosuspend(const sp<IBinder>& token) {
+    auto tokensLock = std::lock_guard(mAutosuspendClientTokensLock);
     auto autosuspendLock = std::lock_guard(mAutosuspendLock);
 
-    bool hasToken = std::find(mDisableAutosuspendTokens.begin(), mDisableAutosuspendTokens.end(),
-                              token) != mDisableAutosuspendTokens.end();
+    bool hasToken = std::find(mAutosuspendClientTokens.begin(), mAutosuspendClientTokens.end(),
+                              token) != mAutosuspendClientTokens.end();
 
     if (!hasToken) {
-        mDisableAutosuspendTokens.push_back(token);
+        mAutosuspendClientTokens.push_back(token);
     }
 
     if (mAutosuspendEnabled) {
@@ -178,7 +179,7 @@
 }
 
 void SystemSuspend::disableAutosuspendLocked() {
-    mDisableAutosuspendTokens.clear();
+    mAutosuspendClientTokens.clear();
     if (mAutosuspendEnabled) {
         mAutosuspendEnabled = false;
         mAutosuspendCondVar.notify_all();
@@ -187,28 +188,37 @@
 }
 
 void SystemSuspend::disableAutosuspend() {
+    auto tokensLock = std::lock_guard(mAutosuspendClientTokensLock);
     auto autosuspendLock = std::lock_guard(mAutosuspendLock);
     disableAutosuspendLocked();
 }
 
-bool SystemSuspend::hasAliveAutosuspendTokenLocked() {
-    mDisableAutosuspendTokens.erase(
-        std::remove_if(mDisableAutosuspendTokens.begin(), mDisableAutosuspendTokens.end(),
+void SystemSuspend::checkAutosuspendClientsLivenessLocked() {
+    // Ping autosuspend client tokens, remove any dead tokens from the list.
+    // mAutosuspendLock must not be held when calling this, as that could lead to a deadlock
+    // if pingBinder() can't be processed by system_server because it's Binder thread pool is
+    // exhausted and blocked on acquire/release wakelock calls.
+    mAutosuspendClientTokens.erase(
+        std::remove_if(mAutosuspendClientTokens.begin(), mAutosuspendClientTokens.end(),
                        [](const sp<IBinder>& token) { return token->pingBinder() != OK; }),
-        mDisableAutosuspendTokens.end());
+        mAutosuspendClientTokens.end());
+}
 
-    return !mDisableAutosuspendTokens.empty();
+bool SystemSuspend::hasAliveAutosuspendTokenLocked() {
+    return !mAutosuspendClientTokens.empty();
 }
 
 SystemSuspend::~SystemSuspend(void) {
+    auto tokensLock = std::lock_guard(mAutosuspendClientTokensLock);
     auto autosuspendLock = std::unique_lock(mAutosuspendLock);
 
     // signal autosuspend thread to shut down
     disableAutosuspendLocked();
 
     // wait for autosuspend thread to exit
-    mAutosuspendCondVar.wait_for(autosuspendLock, 100ms,
-                                 [this] { return !mAutosuspendThreadCreated; });
+    mAutosuspendCondVar.wait_for(autosuspendLock, 100ms, [this]() REQUIRES(mAutosuspendLock) {
+        return !mAutosuspendThreadCreated;
+    });
 }
 
 bool SystemSuspend::forceSuspend() {
@@ -273,80 +283,99 @@
         bool shouldSleep = true;
 
         while (true) {
-            if (!mAutosuspendEnabled) {
-                mAutosuspendThreadCreated = false;
-                return;
-            }
-            // If we got here by a failed write to /sys/power/wakeup_count; don't sleep
-            // since we didn't attempt to suspend on the last cycle of this loop.
-            if (shouldSleep) {
-                mAutosuspendCondVar.wait_for(autosuspendLock, mSleepTime,
-                                             [this] { return !mAutosuspendEnabled; });
-            }
-
-            if (!mAutosuspendEnabled) continue;
-
-            string wakeupCount;
             {
-                autosuspendLock.unlock();
+                base::ScopedLockAssertion autosuspendLocked(mAutosuspendLock);
 
-                lseek(mWakeupCountFd, 0, SEEK_SET);
-                wakeupCount = readFd(mWakeupCountFd);
-
-                autosuspendLock.lock();
-            }
-
-            if (wakeupCount.empty()) {
-                PLOG(ERROR) << "error reading from /sys/power/wakeup_count";
-                continue;
-            }
-
-            shouldSleep = false;
-
-            mAutosuspendCondVar.wait(
-                autosuspendLock, [this] { return mSuspendCounter == 0 || !mAutosuspendEnabled; });
-            // The mutex is locked and *MUST* remain locked until we write to /sys/power/state.
-            // Otherwise, a WakeLock might be acquired after we check mSuspendCounter and before we
-            // write to /sys/power/state.
-
-            if (!mAutosuspendEnabled) continue;
-
-            if (!hasAliveAutosuspendTokenLocked()) {
-                disableAutosuspendLocked();
-                continue;
-            }
-
-            if (!WriteStringToFd(wakeupCount, mWakeupCountFd)) {
-                PLOG(VERBOSE) << "error writing from /sys/power/wakeup_count";
-                continue;
-            }
-            bool success = WriteStringToFd(kSleepState, mStateFd);
-            shouldSleep = true;
-
-            {
-                autosuspendLock.unlock();
-
-                if (!success) {
-                    PLOG(VERBOSE) << "error writing to /sys/power/state";
+                if (!mAutosuspendEnabled) {
+                    mAutosuspendThreadCreated = false;
+                    return;
+                }
+                // If we got here by a failed write to /sys/power/wakeup_count; don't sleep
+                // since we didn't attempt to suspend on the last cycle of this loop.
+                if (shouldSleep) {
+                    mAutosuspendCondVar.wait_for(
+                        autosuspendLock, mSleepTime,
+                        [this]() REQUIRES(mAutosuspendLock) { return !mAutosuspendEnabled; });
                 }
 
-                struct SuspendTime suspendTime = readSuspendTime(mSuspendTimeFd);
-                updateSleepTime(success, suspendTime);
-
-                std::vector<std::string> wakeupReasons = readWakeupReasons(mWakeupReasonsFd);
-                if (wakeupReasons == std::vector<std::string>({kUnknownWakeup})) {
-                    LOG(INFO) << "Unknown/empty wakeup reason. Re-opening wakeup_reason file.";
-
-                    mWakeupReasonsFd =
-                        std::move(reopenFileUsingFd(mWakeupReasonsFd.get(), O_CLOEXEC | O_RDONLY));
-                }
-                mWakeupList.update(wakeupReasons);
-
-                mControlService->notifyWakeup(success, wakeupReasons);
-
-                // Take the lock before returning to the start of the loop
-                autosuspendLock.lock();
+                if (!mAutosuspendEnabled) continue;
+                autosuspendLock.unlock();
             }
+
+            lseek(mWakeupCountFd, 0, SEEK_SET);
+            string wakeupCount = readFd(mWakeupCountFd);
+
+            {
+                autosuspendLock.lock();
+                base::ScopedLockAssertion autosuspendLocked(mAutosuspendLock);
+
+                if (wakeupCount.empty()) {
+                    PLOG(ERROR) << "error reading from /sys/power/wakeup_count";
+                    continue;
+                }
+
+                shouldSleep = false;
+
+                mAutosuspendCondVar.wait(autosuspendLock, [this]() REQUIRES(mAutosuspendLock) {
+                    return mSuspendCounter == 0 || !mAutosuspendEnabled;
+                });
+
+                if (!mAutosuspendEnabled) continue;
+                autosuspendLock.unlock();
+            }
+
+            bool success;
+            {
+                auto tokensLock = std::lock_guard(mAutosuspendClientTokensLock);
+                checkAutosuspendClientsLivenessLocked();
+
+                autosuspendLock.lock();
+                base::ScopedLockAssertion autosuspendLocked(mAutosuspendLock);
+
+                if (!hasAliveAutosuspendTokenLocked()) {
+                    disableAutosuspendLocked();
+                    continue;
+                }
+
+                // Check suspend counter hasn't increased while checking client liveness
+                if (mSuspendCounter > 0) {
+                    continue;
+                }
+
+                // The mutex is locked and *MUST* remain locked until we write to /sys/power/state.
+                // Otherwise, a WakeLock might be acquired after we check mSuspendCounter and before
+                // we write to /sys/power/state.
+
+                if (!WriteStringToFd(wakeupCount, mWakeupCountFd)) {
+                    PLOG(VERBOSE) << "error writing to /sys/power/wakeup_count";
+                    continue;
+                }
+                success = WriteStringToFd(kSleepState, mStateFd);
+                shouldSleep = true;
+
+                autosuspendLock.unlock();
+            }
+
+            if (!success) {
+                PLOG(VERBOSE) << "error writing to /sys/power/state";
+            }
+
+            struct SuspendTime suspendTime = readSuspendTime(mSuspendTimeFd);
+            updateSleepTime(success, suspendTime);
+
+            std::vector<std::string> wakeupReasons = readWakeupReasons(mWakeupReasonsFd);
+            if (wakeupReasons == std::vector<std::string>({kUnknownWakeup})) {
+                LOG(INFO) << "Unknown/empty wakeup reason. Re-opening wakeup_reason file.";
+
+                mWakeupReasonsFd =
+                    std::move(reopenFileUsingFd(mWakeupReasonsFd.get(), O_CLOEXEC | O_RDONLY));
+            }
+            mWakeupList.update(wakeupReasons);
+
+            mControlService->notifyWakeup(success, wakeupReasons);
+
+            // Take the lock before returning to the start of the loop
+            autosuspendLock.lock();
         }
     });
     autosuspendThread.detach();
diff --git a/suspend/1.0/default/SystemSuspend.h b/suspend/1.0/default/SystemSuspend.h
index ddcee24..a3f9a00 100644
--- a/suspend/1.0/default/SystemSuspend.h
+++ b/suspend/1.0/default/SystemSuspend.h
@@ -18,6 +18,7 @@
 #define ANDROID_SYSTEM_SYSTEM_SUSPEND_V1_0_H
 
 #include <android-base/result.h>
+#include <android-base/thread_annotations.h>
 #include <android-base/unique_fd.h>
 #include <android/system/suspend/internal/SuspendInfo.h>
 #include <utils/RefBase.h>
@@ -99,27 +100,39 @@
 
    private:
     ~SystemSuspend(void) override;
-    void initAutosuspendLocked();
-    void disableAutosuspendLocked();
-    bool hasAliveAutosuspendTokenLocked();
 
-    std::mutex mAutosuspendLock;
-    std::condition_variable mAutosuspendCondVar;
-    uint32_t mSuspendCounter;
+    std::mutex mAutosuspendClientTokensLock;
+    std::mutex mAutosuspendLock ACQUIRED_AFTER(mAutosuspendClientTokensLock);
+    std::mutex mSuspendInfoLock;
+
+    void initAutosuspendLocked()
+        EXCLUSIVE_LOCKS_REQUIRED(mAutosuspendClientTokensLock, mAutosuspendLock);
+    void disableAutosuspendLocked()
+        EXCLUSIVE_LOCKS_REQUIRED(mAutosuspendClientTokensLock, mAutosuspendLock);
+    void checkAutosuspendClientsLivenessLocked()
+        EXCLUSIVE_LOCKS_REQUIRED(mAutosuspendClientTokensLock);
+    bool hasAliveAutosuspendTokenLocked() EXCLUSIVE_LOCKS_REQUIRED(mAutosuspendClientTokensLock);
+
+    std::condition_variable mAutosuspendCondVar GUARDED_BY(mAutosuspendLock);
+    uint32_t mSuspendCounter GUARDED_BY(mAutosuspendLock);
+
+    std::vector<sp<IBinder>> mAutosuspendClientTokens GUARDED_BY(mAutosuspendClientTokensLock);
+    std::atomic<bool> mAutosuspendEnabled GUARDED_BY(mAutosuspendLock){false};
+    std::atomic<bool> mAutosuspendThreadCreated GUARDED_BY(mAutosuspendLock){false};
+
     unique_fd mWakeupCountFd;
     unique_fd mStateFd;
 
     unique_fd mSuspendStatsFd;
     unique_fd mSuspendTimeFd;
 
-    std::mutex mSuspendInfoLock;
-    SuspendInfo mSuspendInfo;
+    SuspendInfo mSuspendInfo GUARDED_BY(mSuspendInfoLock);
 
     const SleepTimeConfig kSleepTimeConfig;
 
     // Amount of thread sleep time between consecutive iterations of the suspend loop
     std::chrono::milliseconds mSleepTime;
-    int32_t mNumConsecutiveBadSuspends;
+    int32_t mNumConsecutiveBadSuspends GUARDED_BY(mSuspendInfoLock);
 
     // Updates thread sleep time and suspend stats depending on the result of suspend attempt
     void updateSleepTime(bool success, const struct SuspendTime& suspendTime);
@@ -137,10 +150,6 @@
     unique_fd mWakeLockFd;
     unique_fd mWakeUnlockFd;
     unique_fd mWakeupReasonsFd;
-
-    std::atomic<bool> mAutosuspendEnabled{false};
-    std::atomic<bool> mAutosuspendThreadCreated{false};
-    std::vector<sp<IBinder>> mDisableAutosuspendTokens;
 };
 
 }  // namespace V1_0
diff --git a/suspend/1.0/default/SystemSuspendUnitTest.cpp b/suspend/1.0/default/SystemSuspendUnitTest.cpp
index c779623..f6e216b 100644
--- a/suspend/1.0/default/SystemSuspendUnitTest.cpp
+++ b/suspend/1.0/default/SystemSuspendUnitTest.cpp
@@ -335,6 +335,57 @@
     ASSERT_TRUE(isSystemSuspendBlocked(150));
 }
 
+TEST_F(SystemSuspendTest, UnresponsiveClientDoesNotBlockAcquireRelease) {
+    static std::mutex _lock;
+    static std::condition_variable inPingBinderCondVar;
+    static bool inPingBinder = false;
+
+    class UnresponsiveBinder : public BBinder {
+        android::status_t pingBinder() override {
+            auto lock = std::unique_lock(_lock);
+            inPingBinder = true;
+            inPingBinderCondVar.notify_all();
+
+            // Block pingBinder until test finishes and releases its lock
+            inPingBinderCondVar.wait(lock);
+            return android::UNKNOWN_ERROR;
+        }
+    };
+
+    systemSuspend->disableAutosuspend();
+    unblockSystemSuspendFromWakeupCount();
+    ASSERT_TRUE(isSystemSuspendBlocked());
+
+    auto token = sp<UnresponsiveBinder>::make();
+    bool enabled = false;
+    controlServiceInternal->enableAutosuspend(token, &enabled);
+    unblockSystemSuspendFromWakeupCount();
+
+    auto lock = std::unique_lock(_lock);
+    // wait until pingBinder has been called.
+    if (!inPingBinder) {
+        inPingBinderCondVar.wait(lock);
+    }
+    // let pingBinder finish once we release the test lock
+    inPingBinderCondVar.notify_all();
+
+    std::condition_variable wakeLockAcquired;
+    std::thread(
+        [this](std::condition_variable& wakeLockAcquired) {
+            std::shared_ptr<IWakeLock> testLock = acquireWakeLock("testLock");
+            testLock->release();
+            wakeLockAcquired.notify_all();
+        },
+        std::ref(wakeLockAcquired))
+        .detach();
+
+    std::mutex _acquireReleaseLock;
+    auto acquireReleaseLock = std::unique_lock(_acquireReleaseLock);
+    bool timedOut = wakeLockAcquired.wait_for(acquireReleaseLock, 200ms) == std::cv_status::timeout;
+
+    ASSERT_FALSE(timedOut);
+}
+
 TEST_F(SystemSuspendTest, AutosuspendLoop) {
     checkLoop(5);
 }