Vulkan: Submit queue more often for texture data

Outside command buffers should be flushed more often in order
to prevent the texture data accumulation just before the first
render pass when they are referenced.

* Added a tracker next to copyBufferToImage() for texture size
(in ContextVk). When its value passes kMaxBufferToImageCopySize,
the outside command buffer operations should be submitted and
the tracker would be reset. Currently, the threshold value is
set to 1 << 28 = 256M.

* Added a variation of submitFrame() to be used in outside
command buffer submission. The main difference is that it
copies mResourceUseList into GetShareGroupVk() rather than
move it.
  * Refactored the two functions into submitFrameImpl().

* Added a helper function to submit the outside command
buffer.
* Added explicit copy functions for ResourceUseList and
SharedResourceUse. The counter in the copied object is
incremented by 1.
* Added a test to make sure submitting the outside command
buffer does not break the render pass.

Bug: angleproject:6354
Change-Id: Ia1d4f857fcbd06934609c94622ccbf675b3b1c72
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/3379231
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Charlie Lao <cclao@google.com>
Commit-Queue: Amirali Abdolrashidi <abdolrashidi@google.com>
diff --git a/src/common/angleutils.h b/src/common/angleutils.h
index d31684c..99161ac 100644
--- a/src/common/angleutils.h
+++ b/src/common/angleutils.h
@@ -133,6 +133,7 @@
 #define ANGLE_VK_PERF_COUNTERS_X(FN)              \
     FN(primaryBuffers)                            \
     FN(renderPasses)                              \
+    FN(submittedFrames)                           \
     FN(writeDescriptorSets)                       \
     FN(flushedOutsideRenderPassCommandBuffers)    \
     FN(resolveImageCommands)                      \
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index 764d2af..4758062 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -2470,6 +2470,19 @@
 
 angle::Result ContextVk::submitFrame(const vk::Semaphore *signalSemaphore, Serial *submitSerialOut)
 {
+    return submitFrameImpl(signalSemaphore, submitSerialOut, SubmitFrameType::OutsideAndRPCommands);
+}
+
+angle::Result ContextVk::submitFrameOutsideCommandBufferOnly(Serial *submitSerialOut)
+{
+    ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::submitFrameOutsideCommandBufferOnly");
+    return submitFrameImpl(nullptr, submitSerialOut, SubmitFrameType::OutsideRPCommandsOnly);
+}
+
+angle::Result ContextVk::submitFrameImpl(const vk::Semaphore *signalSemaphore,
+                                         Serial *submitSerialOut,
+                                         SubmitFrameType submitFrameType)
+{
     if (mCurrentWindowSurface)
     {
         const vk::Semaphore *waitSemaphore =
@@ -2486,18 +2499,29 @@
         dumpCommandStreamDiagnostics();
     }
 
-    getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
+    if (submitFrameType == SubmitFrameType::OutsideAndRPCommands)
+    {
+        getShareGroupVk()->acquireResourceUseList(std::move(mResourceUseList));
+    }
+    else
+    {
+        getShareGroupVk()->copyResourceUseList(mResourceUseList);
+    }
+
     ANGLE_TRY(mRenderer->submitFrame(this, hasProtectedContent(), mContextPriority,
                                      std::move(mWaitSemaphores),
                                      std::move(mWaitSemaphoreStageMasks), signalSemaphore,
                                      std::move(mCurrentGarbage), &mCommandPools, submitSerialOut));
 
-    getShareGroupVk()->releaseResourceUseLists(*submitSerialOut);
-    // Now that we have processed resourceUseList, some of pending garbage may no longer pending
-    // and should be moved to garbage list.
-    mRenderer->cleanupPendingSubmissionGarbage();
+    if (submitFrameType == SubmitFrameType::OutsideAndRPCommands)
+    {
+        getShareGroupVk()->releaseResourceUseLists(*submitSerialOut);
+        // Now that we have processed resourceUseList, some of pending garbage may no longer pending
+        // and should be moved to garbage list.
+        mRenderer->cleanupPendingSubmissionGarbage();
+        onRenderPassFinished(RenderPassClosureReason::AlreadySpecifiedElsewhere);
+    }
 
-    onRenderPassFinished(RenderPassClosureReason::AlreadySpecifiedElsewhere);
     mComputeDirtyBits |= mNewComputeCommandBufferDirtyBits;
 
     if (mGpuEventsEnabled)
@@ -2505,6 +2529,22 @@
         ANGLE_TRY(checkCompletedGpuEvents());
     }
 
+    mPerfCounters.submittedFrames++;
+    resetTotalBufferToImageCopySize();
+
+    return angle::Result::Continue;
+}
+
+angle::Result ContextVk::onCopyUpdate(VkDeviceSize size)
+{
+    mTotalBufferToImageCopySize += size;
+    ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::onCopyUpdate");
+    // If the copy size exceeds the specified threshold, submit the outside command buffer.
+    VkDeviceSize copySize = getTotalBufferToImageCopySize();
+    if (copySize >= kMaxBufferToImageCopySize)
+    {
+        ANGLE_TRY(submitOutsideRenderPassCommandsImpl());
+    }
     return angle::Result::Continue;
 }
 
@@ -6363,6 +6403,15 @@
             !mRenderer->getFeatures().supportsIndexTypeUint8.enabled);
 }
 
+angle::Result ContextVk::submitOutsideRenderPassCommandsImpl()
+{
+    ANGLE_TRACE_EVENT0("gpu.angle", "ContextVk::submitOutsideRenderPassCommandsImpl");
+    ANGLE_TRY(flushOutsideRenderPassCommands());
+    Serial unusedSerial;
+    ANGLE_TRY(submitFrameOutsideCommandBufferOnly(&unusedSerial));
+    return angle::Result::Continue;
+}
+
 angle::Result ContextVk::flushOutsideRenderPassCommands()
 {
     if (mOutsideRenderPassCommands->empty())
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h
index 6621c58..5e170ed 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -37,6 +37,10 @@
 static constexpr uint32_t kMaxGpuEventNameLen = 32;
 using EventName                               = std::array<char, kMaxGpuEventNameLen>;
 
+// If the total size of copyBufferToImage commands in the outside command buffer reaches the
+// threshold below, the latter is flushed.
+static constexpr VkDeviceSize kMaxBufferToImageCopySize = 1 << 28;
+
 using ContextVkDescriptorSetList = angle::PackedEnumMap<PipelineType, uint32_t>;
 
 struct ContextVkPerfCounters
@@ -88,6 +92,12 @@
     std::vector<VkWriteDescriptorSet> mWriteDescriptorSets;
 };
 
+enum class SubmitFrameType
+{
+    OutsideAndRPCommands,
+    OutsideRPCommandsOnly,
+};
+
 class ContextVk : public ContextImpl, public vk::Context, public MultisampleTextureInitializer
 {
   public:
@@ -634,6 +644,7 @@
     angle::Result startNextSubpass();
     angle::Result flushCommandsAndEndRenderPass(RenderPassClosureReason reason);
     angle::Result flushCommandsAndEndRenderPassWithoutQueueSubmit(RenderPassClosureReason reason);
+    angle::Result submitOutsideRenderPassCommandsImpl();
 
     angle::Result syncExternalMemory();
 
@@ -674,6 +685,12 @@
 
     vk::BufferHelper &getEmptyBuffer() { return mEmptyBuffer; }
 
+    // Keeping track of the buffer copy size. Used to determine when to submit the outside command
+    // buffer.
+    angle::Result onCopyUpdate(VkDeviceSize size);
+    void resetTotalBufferToImageCopySize() { mTotalBufferToImageCopySize = 0; }
+    VkDeviceSize getTotalBufferToImageCopySize() const { return mTotalBufferToImageCopySize; }
+
     // Implementation of MultisampleTextureInitializer
     angle::Result initializeMultisampleTextureToBlack(const gl::Context *context,
                                                       gl::Texture *glTexture) override;
@@ -977,6 +994,10 @@
     void writeAtomicCounterBufferDriverUniformOffsets(uint32_t *offsetsOut, size_t offsetsSize);
 
     angle::Result submitFrame(const vk::Semaphore *signalSemaphore, Serial *submitSerialOut);
+    angle::Result submitFrameOutsideCommandBufferOnly(Serial *submitSerialOut);
+    angle::Result submitFrameImpl(const vk::Semaphore *signalSemaphore,
+                                  Serial *submitSerialOut,
+                                  SubmitFrameType submitFrameType);
 
     angle::Result synchronizeCpuGpuTime();
     angle::Result traceGpuEventImpl(vk::OutsideRenderPassCommandBuffer *commandBuffer,
@@ -1216,6 +1237,10 @@
     // GL_EXT_shader_framebuffer_fetch_non_coherent
     bool mLastProgramUsesFramebufferFetch;
 
+    // The size of copy commands issued between buffers and images. Used to submit the command
+    // buffer for the outside render pass.
+    VkDeviceSize mTotalBufferToImageCopySize = 0;
+
     // Semaphores that must be waited on in the next submission.
     std::vector<VkSemaphore> mWaitSemaphores;
     std::vector<VkPipelineStageFlags> mWaitSemaphoreStageMasks;
diff --git a/src/libANGLE/renderer/vulkan/DisplayVk.h b/src/libANGLE/renderer/vulkan/DisplayVk.h
index 03e7fde..3dfd148 100644
--- a/src/libANGLE/renderer/vulkan/DisplayVk.h
+++ b/src/libANGLE/renderer/vulkan/DisplayVk.h
@@ -41,6 +41,12 @@
     {
         mResourceUseLists.emplace_back(std::move(resourceUseList));
     }
+    void copyResourceUseList(vk::ResourceUseList &resourceUseList)
+    {
+        vk::ResourceUseList copyResourceUseList;
+        copyResourceUseList.copy(resourceUseList);
+        mResourceUseLists.emplace_back(std::move(copyResourceUseList));
+    }
 
     vk::BufferPool *getDefaultBufferPool(RendererVk *renderer,
                                          VkDeviceSize size,
diff --git a/src/libANGLE/renderer/vulkan/ResourceVk.cpp b/src/libANGLE/renderer/vulkan/ResourceVk.cpp
index 0c578761..d61668b 100644
--- a/src/libANGLE/renderer/vulkan/ResourceVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ResourceVk.cpp
@@ -199,6 +199,16 @@
     ASSERT(mResourceUses.empty());
 }
 
+void ResourceUseList::copy(ResourceUseList &srcResourceUse)
+{
+    size_t size = srcResourceUse.mResourceUses.size();
+    mResourceUses.resize(size);
+    for (size_t i = 0; i < size; i++)
+    {
+        mResourceUses[i].copy(srcResourceUse.mResourceUses[i]);
+    }
+}
+
 ResourceUseList &ResourceUseList::operator=(ResourceUseList &&rhs)
 {
     std::swap(mResourceUses, rhs.mResourceUses);
diff --git a/src/libANGLE/renderer/vulkan/ResourceVk.h b/src/libANGLE/renderer/vulkan/ResourceVk.h
index 248a3d5..541a465 100644
--- a/src/libANGLE/renderer/vulkan/ResourceVk.h
+++ b/src/libANGLE/renderer/vulkan/ResourceVk.h
@@ -45,6 +45,12 @@
         return *this;
     }
 
+    void copy(SharedResourceUse &src)
+    {
+        mUse = src.mUse;
+        mUse->counter++;
+    }
+
     ANGLE_INLINE bool valid() const { return mUse != nullptr; }
 
     void init()
@@ -165,6 +171,7 @@
     ResourceUseList &operator=(ResourceUseList &&rhs);
 
     void add(const SharedResourceUse &resourceUse);
+    void copy(ResourceUseList &srcResourceUse);
 
     void releaseResourceUses();
     void releaseResourceUsesAndUpdateSerials(Serial serial);
diff --git a/src/libANGLE/renderer/vulkan/vk_helpers.cpp b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
index 1280c9e..f9f07f8 100644
--- a/src/libANGLE/renderer/vulkan/vk_helpers.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_helpers.cpp
@@ -7319,11 +7319,12 @@
                 ANGLE_TRY(
                     contextVk->getOutsideRenderPassCommandBuffer(bufferAccess, &commandBuffer));
 
+                VkBufferImageCopy *copyRegion = &update.data.buffer.copyRegion;
                 commandBuffer->copyBufferToImage(currentBuffer->getBuffer().getHandle(), mImage,
-                                                 getCurrentLayout(), 1,
-                                                 &update.data.buffer.copyRegion);
+                                                 getCurrentLayout(), 1, copyRegion);
+                ANGLE_TRY(contextVk->onCopyUpdate(currentBuffer->getSize()));
                 onWrite(updateMipLevelGL, 1, updateBaseLayer, updateLayerCount,
-                        update.data.buffer.copyRegion.imageSubresource.aspectMask);
+                        copyRegion->imageSubresource.aspectMask);
             }
             else
             {
@@ -7333,11 +7334,12 @@
                 ANGLE_TRY(
                     contextVk->getOutsideRenderPassCommandBuffer(imageAccess, &commandBuffer));
 
+                VkImageCopy *copyRegion = &update.data.image.copyRegion;
                 commandBuffer->copyImage(update.refCounted.image->get().getImage(),
                                          update.refCounted.image->get().getCurrentLayout(), mImage,
-                                         getCurrentLayout(), 1, &update.data.image.copyRegion);
+                                         getCurrentLayout(), 1, copyRegion);
                 onWrite(updateMipLevelGL, 1, updateBaseLayer, updateLayerCount,
-                        update.data.image.copyRegion.dstSubresource.aspectMask);
+                        copyRegion->dstSubresource.aspectMask);
             }
 
             update.release(contextVk->getRenderer());
diff --git a/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp b/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp
index c9d0735..76c54e1 100644
--- a/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp
+++ b/src/tests/gl_tests/VulkanPerformanceCounterTest.cpp
@@ -237,6 +237,72 @@
     EXPECT_EQ(expectedRenderPassCount, actualRenderPassCount);
 }
 
+// Tests that submitting the outside command buffer due to texture upload size does not break the
+// current render pass.
+TEST_P(VulkanPerformanceCounterTest, SubmittingOutsideCommandBufferDoesNotBreakRenderPass)
+{
+    // http://anglebug.com/6354
+
+    size_t kMaxBufferToImageCopySize  = 1 << 28;
+    uint32_t kNumSubmits              = 2;
+    uint32_t expectedRenderPassCount  = getPerfCounters().renderPasses + 1;
+    uint32_t expectedSubmitFrameCount = getPerfCounters().submittedFrames + kNumSubmits;
+
+    // Step 1: Set up a simple 2D texture.
+    GLTexture texture;
+    GLsizei texDim         = 256;
+    uint32_t pixelSizeRGBA = 4;
+    uint32_t textureSize   = texDim * texDim * pixelSizeRGBA;
+    std::vector<GLColor> kInitialData(texDim * texDim, GLColor::green);
+
+    glBindTexture(GL_TEXTURE_2D, texture);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texDim, texDim, 0, GL_RGBA, GL_UNSIGNED_BYTE,
+                 kInitialData.data());
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+    auto quadVerts = GetQuadVertices();
+
+    GLBuffer vertexBuffer;
+    glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
+    glBufferData(GL_ARRAY_BUFFER, quadVerts.size() * sizeof(quadVerts[0]), quadVerts.data(),
+                 GL_STATIC_DRAW);
+
+    ANGLE_GL_PROGRAM(program, essl1_shaders::vs::Texture2D(), essl1_shaders::fs::Texture2D());
+    glUseProgram(program);
+
+    GLint posLoc = glGetAttribLocation(program, essl1_shaders::PositionAttrib());
+    ASSERT_NE(-1, posLoc);
+
+    glVertexAttribPointer(posLoc, 3, GL_FLOAT, GL_FALSE, 0, nullptr);
+    glEnableVertexAttribArray(posLoc);
+    ASSERT_GL_NO_ERROR();
+
+    glDrawArrays(GL_TRIANGLES, 0, 6);
+    ASSERT_GL_NO_ERROR();
+
+    // Step 2: Load a new 2D Texture multiple times with the same Program and Framebuffer. The total
+    // size of the loaded textures must exceed the threshold to submit the outside command buffer.
+    auto maxLoadCount =
+        static_cast<size_t>((kMaxBufferToImageCopySize / textureSize) * kNumSubmits + 1);
+    for (size_t loadCount = 0; loadCount < maxLoadCount; loadCount++)
+    {
+        GLTexture newTexture;
+        glBindTexture(GL_TEXTURE_2D, newTexture);
+        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, texDim, texDim, 0, GL_RGBA, GL_UNSIGNED_BYTE,
+                     kInitialData.data());
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+        glDrawArrays(GL_TRIANGLES, 0, 6);
+        ASSERT_GL_NO_ERROR();
+    }
+
+    // Verify render pass and submitted frame counts.
+    EXPECT_EQ(getPerfCounters().renderPasses, expectedRenderPassCount);
+    EXPECT_EQ(getPerfCounters().submittedFrames, expectedSubmitFrameCount);
+}
+
 // Tests that RGB texture should not break renderpass.
 TEST_P(VulkanPerformanceCounterTest, SampleFromRGBTextureDoesNotBreakRenderPass)
 {