Reduce contention for renderengine's async ops.

RenderEngine was holding a lock when executing work items in a task
queue. The lock was intended for managing the task queue itself since
we're not using a concurrent data structure for the queue, but the lock
was not required to be held while executing work. This was causing
performance issues where if multiple ExternalTextures were destroyed
very closely in time, subsequent requests to unmap the underlying
GrBackendTexture needed to block on prior requests to finish. The
exemplary case is destroying a layer, where many buffers associated with
that layer needed to be freed on the main thread.

The fix in this patch is two-fold:
1. Don't lock anything else with the work queue lock
2. (1) breaks thread-safety for mRunning, so make it atomic which is
sufficient since it's only used for safe destruction.

Bug: 188632264
Bug: 190375955
Test: Perfetto trace of app lifecyle.

Change-Id: Ia238b43cb971fde42a92eab4f8247286d2e2d247
diff --git a/libs/renderengine/threaded/RenderEngineThreaded.cpp b/libs/renderengine/threaded/RenderEngineThreaded.cpp
index 9009ce4..5febd8c 100644
--- a/libs/renderengine/threaded/RenderEngineThreaded.cpp
+++ b/libs/renderengine/threaded/RenderEngineThreaded.cpp
@@ -48,11 +48,8 @@
 }
 
 RenderEngineThreaded::~RenderEngineThreaded() {
-    {
-        std::lock_guard lock(mThreadMutex);
-        mRunning = false;
-        mCondition.notify_one();
-    }
+    mRunning = false;
+    mCondition.notify_one();
 
     if (mThread.joinable()) {
         mThread.join();
@@ -71,21 +68,32 @@
 
     mRenderEngine = factory();
 
-    std::unique_lock<std::mutex> lock(mThreadMutex);
     pthread_setname_np(pthread_self(), mThreadName);
 
     {
-        std::unique_lock<std::mutex> lock(mInitializedMutex);
+        std::scoped_lock lock(mInitializedMutex);
         mIsInitialized = true;
     }
     mInitializedCondition.notify_all();
 
     while (mRunning) {
-        if (!mFunctionCalls.empty()) {
-            auto task = mFunctionCalls.front();
-            mFunctionCalls.pop();
-            task(*mRenderEngine);
+        const auto getNextTask = [this]() -> std::optional<Work> {
+            std::scoped_lock lock(mThreadMutex);
+            if (!mFunctionCalls.empty()) {
+                Work task = mFunctionCalls.front();
+                mFunctionCalls.pop();
+                return std::make_optional<Work>(task);
+            }
+            return std::nullopt;
+        };
+
+        const auto task = getNextTask();
+
+        if (task) {
+            (*task)(*mRenderEngine);
         }
+
+        std::unique_lock<std::mutex> lock(mThreadMutex);
         mCondition.wait(lock, [this]() REQUIRES(mThreadMutex) {
             return !mRunning || !mFunctionCalls.empty();
         });
diff --git a/libs/renderengine/threaded/RenderEngineThreaded.h b/libs/renderengine/threaded/RenderEngineThreaded.h
index eb6098e..b78bf21 100644
--- a/libs/renderengine/threaded/RenderEngineThreaded.h
+++ b/libs/renderengine/threaded/RenderEngineThreaded.h
@@ -82,9 +82,10 @@
     // Protects the creation and destruction of mThread.
     mutable std::mutex mThreadMutex;
     std::thread mThread GUARDED_BY(mThreadMutex);
-    bool mRunning GUARDED_BY(mThreadMutex) = true;
-    mutable std::queue<std::function<void(renderengine::RenderEngine& instance)>> mFunctionCalls
-            GUARDED_BY(mThreadMutex);
+    std::atomic<bool> mRunning = true;
+
+    using Work = std::function<void(renderengine::RenderEngine&)>;
+    mutable std::queue<Work> mFunctionCalls GUARDED_BY(mThreadMutex);
     mutable std::condition_variable mCondition;
 
     // Used to allow select thread safe methods to be accessed without requiring the