vk: SyncThread wait for vkQueueSubmit before waiting for fences

Vulkan specs require fences of vkQueueSubmit to be *externally
synchronized*, i.e. we cannot submit a queue while waiting for the
fence in another thread. Thus for a SyncThread executing SYNC_THREAD_
WAIT_VK commands, it has to first wait until a vkQueueSubmit() using
this fence is called before calling vkWaitForFences().

For each VkFence, we'll use a conditional variable and mutex for
thread synchronization.

Also see:
https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#fundamentals-threadingbehavior
https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/issues/519

Change-Id: I8b7bdbb3dfd9d703a6f962aee9d6ede8fc6dc016
diff --git a/protocols/vulkan/xml/cereal/decoder.py b/protocols/vulkan/xml/cereal/decoder.py
index 5286565..c21aea3 100644
--- a/protocols/vulkan/xml/cereal/decoder.py
+++ b/protocols/vulkan/xml/cereal/decoder.py
@@ -653,6 +653,7 @@
     "vkDestroySemaphore" : emit_global_state_wrapped_decoding,
 
     "vkCreateFence" : emit_global_state_wrapped_decoding,
+    "vkResetFences" : emit_global_state_wrapped_decoding,
     "vkDestroyFence" : emit_global_state_wrapped_decoding,
 
     # VK_GOOGLE_gfxstream
diff --git a/stream-servers/vulkan/VkDecoder.cpp b/stream-servers/vulkan/VkDecoder.cpp
index cf064ea..e26217c 100644
--- a/stream-servers/vulkan/VkDecoder.cpp
+++ b/stream-servers/vulkan/VkDecoder.cpp
@@ -2328,14 +2328,11 @@
                 VkDevice device;
                 uint32_t fenceCount;
                 const VkFence* pFences;
-                // Begin non wrapped dispatchable handle unboxing for device;
+                // Begin global wrapped dispatchable handle unboxing for device;
                 uint64_t cgen_var_0;
                 memcpy((uint64_t*)&cgen_var_0, *readStreamPtrPtr, 1 * 8);
                 *readStreamPtrPtr += 1 * 8;
                 *(VkDevice*)&device = (VkDevice)(VkDevice)((VkDevice)(*&cgen_var_0));
-                auto unboxed_device = unbox_VkDevice(device);
-                auto vk = dispatch_VkDevice(device);
-                // End manual dispatchable handle unboxing for device;
                 memcpy((uint32_t*)&fenceCount, *readStreamPtrPtr, sizeof(uint32_t));
                 *readStreamPtrPtr += sizeof(uint32_t);
                 vkReadStream->alloc((void**)&pFences, ((fenceCount)) * sizeof(const VkFence));
@@ -2354,7 +2351,7 @@
                     fprintf(stderr, "stream %p: call vkResetFences 0x%llx 0x%llx 0x%llx \n", ioStream, (unsigned long long)device, (unsigned long long)fenceCount, (unsigned long long)pFences);
                 }
                 VkResult vkResetFences_VkResult_return = (VkResult)0;
-                vkResetFences_VkResult_return = vk->vkResetFences(unboxed_device, fenceCount, pFences);
+                vkResetFences_VkResult_return = m_state->on_vkResetFences(&m_pool, device, fenceCount, pFences);
                 vkStream->unsetHandleMapping();
                 vkStream->write(&vkResetFences_VkResult_return, sizeof(VkResult));
                 vkStream->commitWrite();
diff --git a/stream-servers/vulkan/VkDecoderGlobalState.cpp b/stream-servers/vulkan/VkDecoderGlobalState.cpp
index 8a31899..ece1564 100644
--- a/stream-servers/vulkan/VkDecoderGlobalState.cpp
+++ b/stream-servers/vulkan/VkDecoderGlobalState.cpp
@@ -1901,7 +1901,7 @@
             AutoLock lock(mLock);
 
             DCHECK(mFenceInfo.find(*pFence) == mFenceInfo.end());
-            mFenceInfo[*pFence] = {};
+            // Create FenceInfo for *pFence.
             auto& fenceInfo = mFenceInfo[*pFence];
             fenceInfo.device = device;
             fenceInfo.vk = vk;
@@ -1913,6 +1913,29 @@
         return res;
     }
 
+    VkResult on_vkResetFences(android::base::BumpPool* pool,
+                              VkDevice boxed_device,
+                              uint32_t fenceCount,
+                              const VkFence* pFences) {
+        auto device = unbox_VkDevice(boxed_device);
+        auto vk = dispatch_VkDevice(boxed_device);
+
+        VkResult res = vk->vkResetFences(device, fenceCount, pFences);
+        if (res != VK_SUCCESS) {
+            return res;
+        }
+
+        // Reset all fences' states to kNotWaitable.
+        {
+            AutoLock lock(mLock);
+            for (uint32_t i = 0; i < fenceCount; i++) {
+                DCHECK(mFenceInfo.find(pFences[i]) != mFenceInfo.end());
+                mFenceInfo[pFences[i]].state = FenceInfo::State::kNotWaitable;
+            }
+        }
+        return VK_SUCCESS;
+    }
+
     VkResult on_vkImportSemaphoreFdKHR(
             android::base::BumpPool* pool,
             VkDevice boxed_device,
@@ -3738,7 +3761,21 @@
 
         AutoLock qlock(*ql);
 
-        return vk->vkQueueSubmit(queue, submitCount, pSubmits, fence);
+        auto result = vk->vkQueueSubmit(queue, submitCount, pSubmits, fence);
+
+        // After vkQueueSubmit is called, we can signal the conditional variable
+        // in FenceInfo, so that other threads (e.g. SyncThread) can call
+        // waitForFence() on this fence.
+        lock.lock();
+        auto fenceInfo = mFenceInfo.find(fence);
+        if (fenceInfo != mFenceInfo.end()) {
+            fenceInfo->second.state = FenceInfo::State::kWaitable;
+            fenceInfo->second.lock.lock();
+            fenceInfo->second.cv.signalAndUnlock(&fenceInfo->second.lock);
+        }
+        lock.unlock();
+
+        return result;
     }
 
     VkResult on_vkQueueWaitIdle(
@@ -4626,10 +4663,34 @@
             return VK_SUCCESS;
         }
 
+        // Vulkan specs require fences of vkQueueSubmit to be *externally
+        // synchronized*, i.e. we cannot submit a queue while waiting for the
+        // fence in another thread. For threads that call this function, they
+        // have to wait until a vkQueueSubmit() using this fence is called
+        // before calling vkWaitForFences(). So we use a conditional variable
+        // and mutex for thread synchronization.
+        //
+        // See:
+        // https://www.khronos.org/registry/vulkan/specs/1.2/html/vkspec.html#fundamentals-threadingbehavior
+        // https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/issues/519
+
         const VkDevice device = mFenceInfo[fence].device;
         const VulkanDispatch* vk = mFenceInfo[fence].vk;
+        auto& fenceLock = mFenceInfo[fence].lock;
+        auto& cv = mFenceInfo[fence].cv;
         lock.unlock();
 
+        fenceLock.lock();
+        cv.wait(&fenceLock, [this, fence] {
+            AutoLock lock(mLock);
+            if (mFenceInfo[fence].state == FenceInfo::State::kWaitable) {
+                mFenceInfo[fence].state = FenceInfo::State::kWaiting;
+                return true;
+            }
+            return false;
+        });
+        fenceLock.unlock();
+
         return vk->vkWaitForFences(device, /* fenceCount */ 1u, &fence,
                                    /* waitAll */ false, timeout);
     }
@@ -6387,6 +6448,16 @@
         VkDevice device = VK_NULL_HANDLE;
         VkFence boxed = VK_NULL_HANDLE;
         VulkanDispatch* vk = nullptr;
+
+        android::base::StaticLock lock;
+        android::base::ConditionVariable cv;
+
+        enum class State {
+            kWaitable,
+            kNotWaitable,
+            kWaiting,
+        };
+        State state = State::kNotWaitable;
     };
 
     struct SemaphoreInfo {
@@ -6971,6 +7042,13 @@
                                    pFence);
 }
 
+VkResult VkDecoderGlobalState::on_vkResetFences(android::base::BumpPool* pool,
+                                                VkDevice device,
+                                                uint32_t fenceCount,
+                                                const VkFence* pFences) {
+    return mImpl->on_vkResetFences(pool, device, fenceCount, pFences);
+}
+
 void VkDecoderGlobalState::on_vkDestroyFence(
         android::base::BumpPool* pool,
         VkDevice device,
diff --git a/stream-servers/vulkan/VkDecoderGlobalState.h b/stream-servers/vulkan/VkDecoderGlobalState.h
index cd44393..7e0d622 100644
--- a/stream-servers/vulkan/VkDecoderGlobalState.h
+++ b/stream-servers/vulkan/VkDecoderGlobalState.h
@@ -600,6 +600,10 @@
                               const VkFenceCreateInfo* pCreateInfo,
                               const VkAllocationCallbacks* pAllocator,
                               VkFence* pFence);
+    VkResult on_vkResetFences(android::base::BumpPool* pool,
+                              VkDevice device,
+                              uint32_t fenceCount,
+                              const VkFence* pFences);
     void on_vkDestroyFence(android::base::BumpPool* pool,
                            VkDevice device,
                            VkFence fence,