blob: 5b51be82bc11910aa5960f843db37617c1472fb3 [file] [log] [blame]
//
// Copyright 2020 The ANGLE Project Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// CommandProcessor.cpp:
// Implements the class methods for CommandProcessor.
//
#include "libANGLE/renderer/vulkan/CommandProcessor.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
namespace rx
{
namespace vk
{
namespace
{
constexpr size_t kInFlightCommandsLimit = 50u;
constexpr bool kOutputVmaStatsString = false;
// When suballocation garbages is more than this, we may wait for GPU to finish and free up some
// memory for allocation.
constexpr VkDeviceSize kMaxBufferSuballocationGarbageSize = 64 * 1024 * 1024;
void InitializeSubmitInfo(VkSubmitInfo *submitInfo,
const PrimaryCommandBuffer &commandBuffer,
const std::vector<VkSemaphore> &waitSemaphores,
const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
const VkSemaphore &signalSemaphore)
{
// Verify that the submitInfo has been zero'd out.
ASSERT(submitInfo->signalSemaphoreCount == 0);
ASSERT(waitSemaphores.size() == waitSemaphoreStageMasks.size());
submitInfo->sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submitInfo->commandBufferCount = commandBuffer.valid() ? 1 : 0;
submitInfo->pCommandBuffers = commandBuffer.ptr();
submitInfo->waitSemaphoreCount = static_cast<uint32_t>(waitSemaphores.size());
submitInfo->pWaitSemaphores = waitSemaphores.empty() ? nullptr : waitSemaphores.data();
submitInfo->pWaitDstStageMask = waitSemaphoreStageMasks.data();
if (signalSemaphore != VK_NULL_HANDLE)
{
submitInfo->signalSemaphoreCount = 1;
submitInfo->pSignalSemaphores = &signalSemaphore;
}
}
template <typename SecondaryCommandBufferListT>
void ResetSecondaryCommandBuffers(VkDevice device,
CommandPool *commandPool,
SecondaryCommandBufferListT *commandBuffers)
{
// Nothing to do when using ANGLE secondary command buffers.
}
template <>
[[maybe_unused]] void ResetSecondaryCommandBuffers<std::vector<VulkanSecondaryCommandBuffer>>(
VkDevice device,
CommandPool *commandPool,
std::vector<VulkanSecondaryCommandBuffer> *commandBuffers)
{
// Note: we currently free the command buffers individually, but we could potentially reset the
// entire command pool. https://issuetracker.google.com/issues/166793850
for (VulkanSecondaryCommandBuffer &secondary : *commandBuffers)
{
commandPool->freeCommandBuffers(device, 1, secondary.ptr());
secondary.releaseHandle();
}
commandBuffers->clear();
}
// Count the number of batches with serial <= given serial. A reference to the fence of the last
// batch with a valid fence is returned for waiting purposes. Note that due to empty submissions
// being optimized out, there may not be a fence associated with every batch.
template <typename BitSetArrayT>
size_t GetBatchCountUpToSerials(std::vector<CommandBatch> &inFlightCommands,
const AtomicQueueSerialFixedArray &lastSubmittedSerials,
const AtomicQueueSerialFixedArray &lastCompletedSerials,
const Serials &serials)
{
// First calculate the bitmask of which index we should wait
BitSetArrayT serialBitMaskToFinish;
for (SerialIndex i = 0; i < serials.size(); i++)
{
ASSERT(serials[i] <= lastSubmittedSerials[i]);
if (serials[i] > lastCompletedSerials[i])
{
serialBitMaskToFinish.set(i);
}
}
if (serialBitMaskToFinish.none())
{
return 0;
}
Serials serialsWillBeFinished(serials.size());
size_t batchCountToFinish = 0;
for (size_t i = 0; i < inFlightCommands.size(); i++)
{
CommandBatch &batch = inFlightCommands[i];
if (serialBitMaskToFinish[batch.queueSerial.getIndex()])
{
// Update what the serial will look like if this batch is completed.
serialsWillBeFinished[batch.queueSerial.getIndex()] = batch.queueSerial.getSerial();
// If finish this batch will make completed serial meet the requested serial, we should
// wait, and we are done with this index.
if (serialsWillBeFinished[batch.queueSerial.getIndex()] >=
serials[batch.queueSerial.getIndex()])
{
batchCountToFinish = i + 1;
serialBitMaskToFinish.reset(batch.queueSerial.getIndex());
if (serialBitMaskToFinish.none())
{
// If nothing left to wait, we are done.
break;
}
}
}
}
ASSERT(batchCountToFinish > 0);
return batchCountToFinish;
}
} // namespace
// SharedFence implementation
SharedFence::SharedFence() : mRefCountedFence(nullptr), mRecycler(nullptr) {}
SharedFence::SharedFence(const SharedFence &other)
: mRefCountedFence(other.mRefCountedFence), mRecycler(other.mRecycler)
{
if (mRefCountedFence != nullptr)
{
mRefCountedFence->addRef();
}
}
SharedFence::SharedFence(SharedFence &&other)
: mRefCountedFence(other.mRefCountedFence), mRecycler(other.mRecycler)
{
other.mRecycler = nullptr;
other.mRefCountedFence = nullptr;
}
SharedFence::~SharedFence()
{
release();
}
VkResult SharedFence::init(VkDevice device, FenceRecycler *recycler)
{
ASSERT(mRecycler == nullptr && mRefCountedFence == nullptr);
Fence fence;
// First try to fetch from recycler. If that failed, try to create a new VkFence
recycler->fetch(device, &fence);
if (!fence.valid())
{
VkFenceCreateInfo fenceCreateInfo = {};
fenceCreateInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fenceCreateInfo.flags = 0;
VkResult result = fence.init(device, fenceCreateInfo);
if (result != VK_SUCCESS)
{
return result;
}
}
// Create a new refcounted object to hold onto VkFence
mRefCountedFence = new RefCounted<Fence>(std::move(fence));
mRefCountedFence->addRef();
mRecycler = recycler;
return VK_SUCCESS;
}
SharedFence &SharedFence::operator=(const SharedFence &other)
{
release();
mRecycler = other.mRecycler;
if (other.mRefCountedFence != nullptr)
{
mRefCountedFence = other.mRefCountedFence;
mRefCountedFence->addRef();
}
return *this;
}
SharedFence &SharedFence::operator=(SharedFence &&other)
{
release();
mRecycler = other.mRecycler;
mRefCountedFence = other.mRefCountedFence;
other.mRecycler = nullptr;
other.mRefCountedFence = nullptr;
return *this;
}
void SharedFence::destroy(VkDevice device)
{
if (mRefCountedFence != nullptr)
{
mRefCountedFence->releaseRef();
if (!mRefCountedFence->isReferenced())
{
mRefCountedFence->get().destroy(device);
SafeDelete(mRefCountedFence);
}
else
{
mRefCountedFence = nullptr;
}
}
}
void SharedFence::release()
{
if (mRefCountedFence != nullptr)
{
mRefCountedFence->releaseRef();
if (!mRefCountedFence->isReferenced())
{
mRecycler->recycle(std::move(mRefCountedFence->get()));
ASSERT(!mRefCountedFence->get().valid());
SafeDelete(mRefCountedFence);
}
else
{
mRefCountedFence = nullptr;
}
}
}
SharedFence::operator bool() const
{
ASSERT(mRefCountedFence == nullptr || mRefCountedFence->isReferenced());
return mRefCountedFence != nullptr;
}
VkResult SharedFence::getStatus(VkDevice device) const
{
if (mRefCountedFence != nullptr)
{
return mRefCountedFence->get().getStatus(device);
}
return VK_SUCCESS;
}
VkResult SharedFence::wait(VkDevice device, uint64_t timeout) const
{
if (mRefCountedFence != nullptr)
{
return mRefCountedFence->get().wait(device, timeout);
}
return VK_SUCCESS;
}
// FenceRecycler implementation
void FenceRecycler::destroy(Context *context)
{
std::lock_guard<std::mutex> lock(mMutex);
mRecyler.destroy(context->getDevice());
}
void FenceRecycler::fetch(VkDevice device, Fence *fenceOut)
{
ASSERT(fenceOut != nullptr && !fenceOut->valid());
std::lock_guard<std::mutex> lock(mMutex);
if (!mRecyler.empty())
{
mRecyler.fetch(fenceOut);
fenceOut->reset(device);
}
}
void FenceRecycler::recycle(Fence &&fence)
{
std::lock_guard<std::mutex> lock(mMutex);
mRecyler.recycle(std::move(fence));
}
// CommandProcessorTask implementation
void CommandProcessorTask::initTask()
{
mTask = CustomTask::Invalid;
mOutsideRenderPassCommandBuffer = nullptr;
mRenderPassCommandBuffer = nullptr;
mRenderPass = nullptr;
mSemaphore = VK_NULL_HANDLE;
mCommandPools = nullptr;
mOneOffWaitSemaphore = nullptr;
mOneOffWaitSemaphoreStageMask = 0;
mOneOffFence = nullptr;
mPresentInfo = {};
mPresentInfo.pResults = nullptr;
mPresentInfo.pSwapchains = nullptr;
mPresentInfo.pImageIndices = nullptr;
mPresentInfo.pNext = nullptr;
mPresentInfo.pWaitSemaphores = nullptr;
mPresentFence = VK_NULL_HANDLE;
mOneOffCommandBufferVk = VK_NULL_HANDLE;
mPriority = egl::ContextPriority::Medium;
mHasProtectedContent = false;
}
void CommandProcessorTask::initOutsideRenderPassProcessCommands(
bool hasProtectedContent,
OutsideRenderPassCommandBufferHelper *commandBuffer)
{
mTask = CustomTask::ProcessOutsideRenderPassCommands;
mOutsideRenderPassCommandBuffer = commandBuffer;
mHasProtectedContent = hasProtectedContent;
}
void CommandProcessorTask::initRenderPassProcessCommands(
bool hasProtectedContent,
RenderPassCommandBufferHelper *commandBuffer,
const RenderPass *renderPass)
{
mTask = CustomTask::ProcessRenderPassCommands;
mRenderPassCommandBuffer = commandBuffer;
mRenderPass = renderPass;
mHasProtectedContent = hasProtectedContent;
}
void CommandProcessorTask::copyPresentInfo(const VkPresentInfoKHR &other)
{
if (other.sType == 0)
{
return;
}
mPresentInfo.sType = other.sType;
mPresentInfo.pNext = nullptr;
if (other.swapchainCount > 0)
{
ASSERT(other.swapchainCount == 1);
mPresentInfo.swapchainCount = 1;
mSwapchain = other.pSwapchains[0];
mPresentInfo.pSwapchains = &mSwapchain;
mImageIndex = other.pImageIndices[0];
mPresentInfo.pImageIndices = &mImageIndex;
}
if (other.waitSemaphoreCount > 0)
{
ASSERT(other.waitSemaphoreCount == 1);
mPresentInfo.waitSemaphoreCount = 1;
mWaitSemaphore = other.pWaitSemaphores[0];
mPresentInfo.pWaitSemaphores = &mWaitSemaphore;
}
mPresentInfo.pResults = other.pResults;
void *pNext = const_cast<void *>(other.pNext);
while (pNext != nullptr)
{
VkStructureType sType = *reinterpret_cast<VkStructureType *>(pNext);
switch (sType)
{
case VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR:
{
const VkPresentRegionsKHR *presentRegions =
reinterpret_cast<VkPresentRegionsKHR *>(pNext);
mPresentRegion = *presentRegions->pRegions;
mRects.resize(mPresentRegion.rectangleCount);
for (uint32_t i = 0; i < mPresentRegion.rectangleCount; i++)
{
mRects[i] = presentRegions->pRegions->pRectangles[i];
}
mPresentRegion.pRectangles = mRects.data();
mPresentRegions.sType = VK_STRUCTURE_TYPE_PRESENT_REGIONS_KHR;
mPresentRegions.pNext = presentRegions->pNext;
mPresentRegions.swapchainCount = 1;
mPresentRegions.pRegions = &mPresentRegion;
AddToPNextChain(&mPresentInfo, &mPresentRegions);
pNext = const_cast<void *>(presentRegions->pNext);
break;
}
case VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT:
{
const VkSwapchainPresentFenceInfoEXT *presentFenceInfo =
reinterpret_cast<VkSwapchainPresentFenceInfoEXT *>(pNext);
ASSERT(presentFenceInfo->swapchainCount == 1);
mPresentFence = presentFenceInfo->pFences[0];
mPresentFenceInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT;
mPresentFenceInfo.pNext = nullptr;
mPresentFenceInfo.swapchainCount = 1;
mPresentFenceInfo.pFences = &mPresentFence;
AddToPNextChain(&mPresentInfo, &mPresentFenceInfo);
pNext = const_cast<void *>(presentFenceInfo->pNext);
break;
}
default:
ERR() << "Unknown sType: " << sType << " in VkPresentInfoKHR.pNext chain";
UNREACHABLE();
break;
}
}
}
void CommandProcessorTask::initPresent(egl::ContextPriority priority,
const VkPresentInfoKHR &presentInfo)
{
mTask = CustomTask::Present;
mPriority = priority;
copyPresentInfo(presentInfo);
}
void CommandProcessorTask::initFlushAndQueueSubmit(
const std::vector<VkSemaphore> &waitSemaphores,
const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
const VkSemaphore semaphore,
bool hasProtectedContent,
egl::ContextPriority priority,
SecondaryCommandPools *commandPools,
SecondaryCommandBufferList &&commandBuffersToReset,
const QueueSerial &submitQueueSerial)
{
mTask = CustomTask::FlushAndQueueSubmit;
mWaitSemaphores = waitSemaphores;
mWaitSemaphoreStageMasks = waitSemaphoreStageMasks;
mSemaphore = semaphore;
mCommandPools = commandPools;
mCommandBuffersToReset = std::move(commandBuffersToReset);
mPriority = priority;
mHasProtectedContent = hasProtectedContent;
mSubmitQueueSerial = submitQueueSerial;
}
void CommandProcessorTask::initOneOffQueueSubmit(VkCommandBuffer commandBufferHandle,
bool hasProtectedContent,
egl::ContextPriority priority,
const Semaphore *waitSemaphore,
VkPipelineStageFlags waitSemaphoreStageMask,
const Fence *fence,
const QueueSerial &submitQueueSerial)
{
mTask = CustomTask::OneOffQueueSubmit;
mOneOffCommandBufferVk = commandBufferHandle;
mOneOffWaitSemaphore = waitSemaphore;
mOneOffWaitSemaphoreStageMask = waitSemaphoreStageMask;
mOneOffFence = fence;
mPriority = priority;
mHasProtectedContent = hasProtectedContent;
mSubmitQueueSerial = submitQueueSerial;
}
CommandProcessorTask &CommandProcessorTask::operator=(CommandProcessorTask &&rhs)
{
if (this == &rhs)
{
return *this;
}
std::swap(mRenderPass, rhs.mRenderPass);
std::swap(mOutsideRenderPassCommandBuffer, rhs.mOutsideRenderPassCommandBuffer);
std::swap(mRenderPassCommandBuffer, rhs.mRenderPassCommandBuffer);
std::swap(mTask, rhs.mTask);
std::swap(mWaitSemaphores, rhs.mWaitSemaphores);
std::swap(mWaitSemaphoreStageMasks, rhs.mWaitSemaphoreStageMasks);
std::swap(mSemaphore, rhs.mSemaphore);
std::swap(mOneOffWaitSemaphore, rhs.mOneOffWaitSemaphore);
std::swap(mOneOffWaitSemaphoreStageMask, rhs.mOneOffWaitSemaphoreStageMask);
std::swap(mOneOffFence, rhs.mOneOffFence);
std::swap(mCommandPools, rhs.mCommandPools);
std::swap(mCommandBuffersToReset, rhs.mCommandBuffersToReset);
std::swap(mSubmitQueueSerial, rhs.mSubmitQueueSerial);
std::swap(mPriority, rhs.mPriority);
std::swap(mHasProtectedContent, rhs.mHasProtectedContent);
std::swap(mOneOffCommandBufferVk, rhs.mOneOffCommandBufferVk);
copyPresentInfo(rhs.mPresentInfo);
// clear rhs now that everything has moved.
rhs.initTask();
return *this;
}
// CommandBatch implementation.
CommandBatch::CommandBatch() : commandPools(nullptr), hasProtectedContent(false) {}
CommandBatch::~CommandBatch() = default;
CommandBatch::CommandBatch(CommandBatch &&other) : CommandBatch()
{
*this = std::move(other);
}
CommandBatch &CommandBatch::operator=(CommandBatch &&other)
{
std::swap(primaryCommands, other.primaryCommands);
std::swap(commandPools, other.commandPools);
std::swap(commandBuffersToReset, other.commandBuffersToReset);
std::swap(fence, other.fence);
std::swap(queueSerial, other.queueSerial);
std::swap(hasProtectedContent, other.hasProtectedContent);
return *this;
}
void CommandBatch::destroy(VkDevice device)
{
primaryCommands.destroy(device);
fence.destroy(device);
hasProtectedContent = false;
}
void CommandBatch::resetSecondaryCommandBuffers(VkDevice device)
{
ResetSecondaryCommandBuffers(device, &commandPools->outsideRenderPassPool,
&commandBuffersToReset.outsideRenderPassCommandBuffers);
ResetSecondaryCommandBuffers(device, &commandPools->renderPassPool,
&commandBuffersToReset.renderPassCommandBuffers);
}
// CommandProcessor implementation.
void CommandProcessor::handleError(VkResult errorCode,
const char *file,
const char *function,
unsigned int line)
{
ASSERT(errorCode != VK_SUCCESS);
std::stringstream errorStream;
errorStream << "Internal Vulkan error (" << errorCode << "): " << VulkanResultString(errorCode)
<< ".";
if (errorCode == VK_ERROR_DEVICE_LOST)
{
WARN() << errorStream.str();
handleDeviceLost(mRenderer);
}
std::lock_guard<std::mutex> queueLock(mErrorMutex);
Error error = {errorCode, file, function, line};
mErrors.emplace(error);
}
CommandProcessor::CommandProcessor(RendererVk *renderer)
: Context(renderer), mWorkerThreadIdle(false)
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
while (!mErrors.empty())
{
mErrors.pop();
}
}
CommandProcessor::~CommandProcessor() = default;
angle::Result CommandProcessor::checkAndPopPendingError(Context *errorHandlingContext)
{
std::lock_guard<std::mutex> queueLock(mErrorMutex);
if (mErrors.empty())
{
return angle::Result::Continue;
}
else
{
Error err = mErrors.front();
mErrors.pop();
errorHandlingContext->handleError(err.errorCode, err.file, err.function, err.line);
return angle::Result::Stop;
}
}
void CommandProcessor::queueCommand(CommandProcessorTask &&task)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queueCommand");
// Grab the worker mutex so that we put things on the queue in the same order as we give out
// serials.
std::lock_guard<std::mutex> queueLock(mWorkerMutex);
mTasks.emplace(std::move(task));
mWorkAvailableCondition.notify_one();
}
void CommandProcessor::processTasks()
{
while (true)
{
bool exitThread = false;
angle::Result result = processTasksImpl(&exitThread);
if (exitThread)
{
// We are doing a controlled exit of the thread, break out of the while loop.
break;
}
if (result != angle::Result::Continue)
{
// TODO: https://issuetracker.google.com/issues/170311829 - follow-up on error handling
// ContextVk::commandProcessorSyncErrorsAndQueueCommand and WindowSurfaceVk::destroy
// do error processing, is anything required here? Don't think so, mostly need to
// continue the worker thread until it's been told to exit.
UNREACHABLE();
}
}
}
angle::Result CommandProcessor::processTasksImpl(bool *exitThread)
{
while (true)
{
std::unique_lock<std::mutex> lock(mWorkerMutex);
if (mTasks.empty())
{
mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_all();
// Only wake if notified and command queue is not empty
mWorkAvailableCondition.wait(lock, [this] { return !mTasks.empty(); });
}
mWorkerThreadIdle = false;
CommandProcessorTask task(std::move(mTasks.front()));
mTasks.pop();
lock.unlock();
ANGLE_TRY(processTask(&task));
if (task.getTaskCommand() == CustomTask::Exit)
{
*exitThread = true;
lock.lock();
mWorkerThreadIdle = true;
mWorkerIdleCondition.notify_one();
return angle::Result::Continue;
}
}
UNREACHABLE();
return angle::Result::Stop;
}
angle::Result CommandProcessor::processTask(CommandProcessorTask *task)
{
switch (task->getTaskCommand())
{
case CustomTask::Exit:
{
ANGLE_TRY(mCommandQueue.waitIdle(this, mRenderer->getMaxFenceWaitTimeNs()));
// Shutting down so cleanup
mCommandQueue.destroy(this);
break;
}
case CustomTask::FlushAndQueueSubmit:
{
ANGLE_TRACE_EVENT0("gpu.angle", "processTask::FlushAndQueueSubmit");
// End command buffer
// Call submitCommands()
ANGLE_TRY(mCommandQueue.submitCommands(
this, task->hasProtectedContent(), task->getPriority(), task->getWaitSemaphores(),
task->getWaitSemaphoreStageMasks(), task->getSemaphore(),
std::move(task->getCommandBuffersToReset()), task->getCommandPools(),
task->getSubmitQueueSerial()));
break;
}
case CustomTask::OneOffQueueSubmit:
{
ANGLE_TRACE_EVENT0("gpu.angle", "processTask::OneOffQueueSubmit");
ANGLE_TRY(mCommandQueue.queueSubmitOneOff(
this, task->hasProtectedContent(), task->getPriority(),
task->getOneOffCommandBufferVk(), task->getOneOffWaitSemaphore(),
task->getOneOffWaitSemaphoreStageMask(), task->getOneOffFence(),
SubmitPolicy::EnsureSubmitted, task->getSubmitQueueSerial()));
ANGLE_TRY(mCommandQueue.checkCompletedCommands(this));
break;
}
case CustomTask::Present:
{
VkResult result = present(task->getPriority(), task->getPresentInfo());
if (ANGLE_UNLIKELY(result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR))
{
// We get to ignore these as they are not fatal
}
else if (ANGLE_UNLIKELY(result != VK_SUCCESS))
{
// Save the error so that we can handle it.
// Don't leave processing loop, don't consider errors from present to be fatal.
// TODO: https://issuetracker.google.com/issues/170329600 - This needs to improve to
// properly parallelize present
handleError(result, __FILE__, __FUNCTION__, __LINE__);
}
break;
}
case CustomTask::ProcessOutsideRenderPassCommands:
{
OutsideRenderPassCommandBufferHelper *commandBuffer =
task->getOutsideRenderPassCommandBuffer();
ANGLE_TRY(mCommandQueue.flushOutsideRPCommands(this, task->hasProtectedContent(),
&commandBuffer));
OutsideRenderPassCommandBufferHelper *originalCommandBuffer =
task->getOutsideRenderPassCommandBuffer();
mRenderer->recycleOutsideRenderPassCommandBufferHelper(mRenderer->getDevice(),
&originalCommandBuffer);
break;
}
case CustomTask::ProcessRenderPassCommands:
{
RenderPassCommandBufferHelper *commandBuffer = task->getRenderPassCommandBuffer();
ANGLE_TRY(mCommandQueue.flushRenderPassCommands(
this, task->hasProtectedContent(), *task->getRenderPass(), &commandBuffer));
RenderPassCommandBufferHelper *originalCommandBuffer =
task->getRenderPassCommandBuffer();
mRenderer->recycleRenderPassCommandBufferHelper(mRenderer->getDevice(),
&originalCommandBuffer);
break;
}
case CustomTask::CheckCompletedCommands:
{
ANGLE_TRY(mCommandQueue.checkCompletedCommands(this));
break;
}
default:
UNREACHABLE();
break;
}
return angle::Result::Continue;
}
angle::Result CommandProcessor::checkCompletedCommands(Context *context)
{
ANGLE_TRY(checkAndPopPendingError(context));
CommandProcessorTask checkCompletedTask;
checkCompletedTask.initTask(CustomTask::CheckCompletedCommands);
queueCommand(std::move(checkCompletedTask));
return angle::Result::Continue;
}
angle::Result CommandProcessor::waitForWorkComplete(Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitForWorkComplete");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
// Sync any errors to the context
bool shouldStop = hasPendingError();
while (hasPendingError())
{
(void)checkAndPopPendingError(context);
}
return shouldStop ? angle::Result::Stop : angle::Result::Continue;
}
angle::Result CommandProcessor::init(Context *context, const DeviceQueueMap &queueMap)
{
ANGLE_TRY(mCommandQueue.init(context, queueMap));
mTaskThread = std::thread(&CommandProcessor::processTasks, this);
return angle::Result::Continue;
}
void CommandProcessor::destroy(Context *context)
{
CommandProcessorTask endTask;
endTask.initTask(CustomTask::Exit);
queueCommand(std::move(endTask));
(void)waitForWorkComplete(context);
if (mTaskThread.joinable())
{
mTaskThread.join();
}
}
// Wait until all commands up to and including serial have been processed
void CommandProcessor::handleDeviceLost(RendererVk *renderer)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::handleDeviceLost");
std::unique_lock<std::mutex> lock(mWorkerMutex);
mWorkerIdleCondition.wait(lock, [this] { return (mTasks.empty() && mWorkerThreadIdle); });
// Worker thread is idle and command queue is empty so good to continue
mCommandQueue.handleDeviceLost(renderer);
}
VkResult CommandProcessor::getLastAndClearPresentResult(VkSwapchainKHR swapchain)
{
std::unique_lock<std::mutex> lock(mSwapchainStatusMutex);
if (mSwapchainStatus.find(swapchain) == mSwapchainStatus.end())
{
// Wake when required swapchain status becomes available
mSwapchainStatusCondition.wait(lock, [this, swapchain] {
return mSwapchainStatus.find(swapchain) != mSwapchainStatus.end();
});
}
VkResult result = mSwapchainStatus[swapchain];
mSwapchainStatus.erase(swapchain);
return result;
}
VkResult CommandProcessor::present(egl::ContextPriority priority,
const VkPresentInfoKHR &presentInfo)
{
std::lock_guard<std::mutex> lock(mSwapchainStatusMutex);
ANGLE_TRACE_EVENT0("gpu.angle", "vkQueuePresentKHR");
VkResult result = mCommandQueue.queuePresent(priority, presentInfo);
// Verify that we are presenting one and only one swapchain
ASSERT(presentInfo.swapchainCount == 1);
ASSERT(presentInfo.pResults == nullptr);
mSwapchainStatus[presentInfo.pSwapchains[0]] = result;
mSwapchainStatusCondition.notify_all();
return result;
}
angle::Result CommandProcessor::submitCommands(
Context *context,
bool hasProtectedContent,
egl::ContextPriority priority,
const std::vector<VkSemaphore> &waitSemaphores,
const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
const VkSemaphore signalSemaphore,
SecondaryCommandBufferList &&commandBuffersToReset,
SecondaryCommandPools *commandPools,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRY(checkAndPopPendingError(context));
CommandProcessorTask task;
task.initFlushAndQueueSubmit(waitSemaphores, waitSemaphoreStageMasks, signalSemaphore,
hasProtectedContent, priority, commandPools,
std::move(commandBuffersToReset), submitQueueSerial);
queueCommand(std::move(task));
mLastSubmittedSerials.setQueueSerial(submitQueueSerial);
return angle::Result::Continue;
}
angle::Result CommandProcessor::queueSubmitOneOff(Context *context,
bool hasProtectedContent,
egl::ContextPriority contextPriority,
VkCommandBuffer commandBufferHandle,
const Semaphore *waitSemaphore,
VkPipelineStageFlags waitSemaphoreStageMask,
const Fence *fence,
SubmitPolicy submitPolicy,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRY(checkAndPopPendingError(context));
CommandProcessorTask task;
task.initOneOffQueueSubmit(commandBufferHandle, hasProtectedContent, contextPriority,
waitSemaphore, waitSemaphoreStageMask, fence, submitQueueSerial);
queueCommand(std::move(task));
mLastSubmittedSerials.setQueueSerial(submitQueueSerial);
if (submitPolicy == SubmitPolicy::EnsureSubmitted)
{
// Caller has synchronization requirement to have work in GPU pipe when returning from this
// function.
ANGLE_TRY(waitForWorkComplete(context));
}
return angle::Result::Continue;
}
VkResult CommandProcessor::queuePresent(egl::ContextPriority contextPriority,
const VkPresentInfoKHR &presentInfo)
{
CommandProcessorTask task;
task.initPresent(contextPriority, presentInfo);
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::queuePresent");
queueCommand(std::move(task));
// Always return success, when we call acquireNextImage we'll check the return code. This
// allows the app to continue working until we really need to know the return code from
// present.
return VK_SUCCESS;
}
angle::Result CommandProcessor::flushOutsideRPCommands(
Context *context,
bool hasProtectedContent,
OutsideRenderPassCommandBufferHelper **outsideRPCommands)
{
ANGLE_TRY(checkAndPopPendingError(context));
(*outsideRPCommands)->markClosed();
// Detach functions are only used for ring buffer allocators.
SecondaryCommandMemoryAllocator *allocator = (*outsideRPCommands)->detachAllocator();
CommandProcessorTask task;
task.initOutsideRenderPassProcessCommands(hasProtectedContent, *outsideRPCommands);
queueCommand(std::move(task));
ANGLE_TRY(mRenderer->getOutsideRenderPassCommandBufferHelper(
context, (*outsideRPCommands)->getCommandPool(), allocator, outsideRPCommands));
return angle::Result::Continue;
}
angle::Result CommandProcessor::flushRenderPassCommands(
Context *context,
bool hasProtectedContent,
const RenderPass &renderPass,
RenderPassCommandBufferHelper **renderPassCommands)
{
ANGLE_TRY(checkAndPopPendingError(context));
(*renderPassCommands)->markClosed();
// Detach functions are only used for ring buffer allocators.
SecondaryCommandMemoryAllocator *allocator = (*renderPassCommands)->detachAllocator();
CommandProcessorTask task;
task.initRenderPassProcessCommands(hasProtectedContent, *renderPassCommands, &renderPass);
queueCommand(std::move(task));
ANGLE_TRY(mRenderer->getRenderPassCommandBufferHelper(
context, (*renderPassCommands)->getCommandPool(), allocator, renderPassCommands));
return angle::Result::Continue;
}
bool CommandProcessor::hasUnsubmittedUse(const vk::ResourceUse &use) const
{
const Serials &serials = use.getSerials();
for (SerialIndex i = 0; i < serials.size(); ++i)
{
if (serials[i] > mLastSubmittedSerials[i])
{
return true;
}
}
return false;
}
// ThreadSafeCommandProcessor implementation.
angle::Result ThreadSafeCommandProcessor::waitForQueueSerialToBeSubmitted(
vk::Context *context,
const QueueSerial &queueSerial)
{
const ResourceUse use(queueSerial);
return waitForResourceUseToBeSubmitted(context, use);
}
angle::Result ThreadSafeCommandProcessor::waitForResourceUseToBeSubmitted(vk::Context *context,
const ResourceUse &use)
{
if (mCommandQueue.hasUnsubmittedUse(use))
{
std::unique_lock<std::mutex> lock(mMutex);
// TODO: https://issuetracker.google.com/261098465 stop wait when use has been submitted.
ANGLE_TRY(waitForWorkComplete(context));
}
return angle::Result::Continue;
}
angle::Result ThreadSafeCommandProcessor::finishResourceUse(Context *context,
const ResourceUse &use,
uint64_t timeout)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::finishResourceUse");
ANGLE_TRY(waitForResourceUseToBeSubmitted(context, use));
return mCommandQueue.finishResourceUse(context, use, timeout);
}
angle::Result ThreadSafeCommandProcessor::finishQueueSerial(Context *context,
const QueueSerial &queueSerial,
uint64_t timeout)
{
const ResourceUse use(queueSerial);
return finishResourceUse(context, use, timeout);
}
angle::Result ThreadSafeCommandProcessor::waitIdle(Context *context, uint64_t timeout)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandProcessor::waitIdle");
{
std::unique_lock<std::mutex> lock(mMutex);
ANGLE_TRY(waitForWorkComplete(context));
}
return mCommandQueue.waitIdle(this, mRenderer->getMaxFenceWaitTimeNs());
}
angle::Result ThreadSafeCommandProcessor::waitForResourceUseToFinishWithUserTimeout(
Context *context,
const ResourceUse &use,
uint64_t timeout,
VkResult *result)
{
// If finishResourceUse times out we generate an error. Therefore we a large timeout.
// TODO: https://issuetracker.google.com/170312581 - Wait with timeout.
return finishResourceUse(context, use, mRenderer->getMaxFenceWaitTimeNs());
}
bool ThreadSafeCommandProcessor::isBusy(RendererVk *renderer) const
{
std::lock_guard<std::mutex> workerLock(mWorkerMutex);
return !mTasks.empty() || mCommandQueue.isBusy(renderer);
}
// CommandQueue implementation.
CommandQueue::CommandQueue() : mPerfCounters{} {}
CommandQueue::~CommandQueue() = default;
void CommandQueue::destroy(Context *context)
{
// Force all commands to finish by flushing all queues.
for (VkQueue queue : mQueueMap)
{
if (queue != VK_NULL_HANDLE)
{
vkQueueWaitIdle(queue);
}
}
RendererVk *renderer = context->getRenderer();
// Assigns an infinite "last completed" serial to force garbage to delete.
mLastCompletedSerials.fill(Serial::Infinite());
mPrimaryCommands.destroy(renderer->getDevice());
mPrimaryCommandPool.destroy(renderer->getDevice());
if (mProtectedPrimaryCommandPool.valid())
{
mProtectedPrimaryCommands.destroy(renderer->getDevice());
mProtectedPrimaryCommandPool.destroy(renderer->getDevice());
}
mFenceRecycler.destroy(context);
ASSERT(mInFlightCommands.empty());
}
angle::Result CommandQueue::init(Context *context, const DeviceQueueMap &queueMap)
{
// In case of RendererVk gets re-initialized, we can't rely on constructor to do initialization
// for us.
mLastSubmittedSerials.fill(kZeroSerial);
mLastCompletedSerials.fill(kZeroSerial);
// Initialize the command pool now that we know the queue family index.
ANGLE_TRY(mPrimaryCommandPool.init(context, false, queueMap.getIndex()));
mQueueMap = queueMap;
if (queueMap.isProtected())
{
ANGLE_TRY(mProtectedPrimaryCommandPool.init(context, true, queueMap.getIndex()));
}
return angle::Result::Continue;
}
angle::Result CommandQueue::checkCompletedCommands(Context *context)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::checkCompletedCommandsNoLock");
RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice();
int finishedCount = 0;
for (CommandBatch &batch : mInFlightCommands)
{
// For empty submissions, fence is not set but there may be garbage to be collected. In
// such a case, the empty submission is "completed" at the same time as the last submission
// that actually happened.
if (batch.fence)
{
VkResult result = batch.fence.getStatus(device);
if (result == VK_NOT_READY)
{
break;
}
ANGLE_VK_TRY(context, result);
}
++finishedCount;
}
if (finishedCount == 0)
{
return angle::Result::Continue;
}
return retireFinishedCommandsAndCleanupGarbage(context, finishedCount);
}
angle::Result CommandQueue::retireFinishedCommands(Context *context, size_t finishedCount)
{
RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice();
// First store the last completed queue serial value into a local variable and then update
// mLastCompletedQueueSerial once in the end.
angle::FastMap<Serial, kMaxFastQueueSerials> lastCompletedQueueSerials;
for (size_t commandIndex = 0; commandIndex < finishedCount; ++commandIndex)
{
CommandBatch &batch = mInFlightCommands[commandIndex];
lastCompletedQueueSerials[batch.queueSerial.getIndex()] = batch.queueSerial.getSerial();
if (batch.fence)
{
batch.fence.release();
}
if (batch.primaryCommands.valid())
{
ANGLE_TRACE_EVENT0("gpu.angle", "Primary command buffer recycling");
PersistentCommandPool &commandPool = getCommandPool(batch.hasProtectedContent);
ANGLE_TRY(commandPool.collect(context, std::move(batch.primaryCommands)));
}
if (batch.commandPools)
{
ANGLE_TRACE_EVENT0("gpu.angle", "Secondary command buffer recycling");
batch.resetSecondaryCommandBuffers(device);
}
}
for (SerialIndex index = 0; index < lastCompletedQueueSerials.size(); index++)
{
// Set mLastCompletedSerials only if there is a lastCompletedQueueSerials in the index.
if (lastCompletedQueueSerials[index] != kZeroSerial)
{
mLastCompletedSerials.setQueueSerial(index, lastCompletedQueueSerials[index]);
}
}
auto beginIter = mInFlightCommands.begin();
mInFlightCommands.erase(beginIter, beginIter + finishedCount);
return angle::Result::Continue;
}
angle::Result CommandQueue::retireFinishedCommandsAndCleanupGarbage(Context *context,
size_t finishedCount)
{
ASSERT(finishedCount > 0);
RendererVk *renderer = context->getRenderer();
ANGLE_TRY(retireFinishedCommands(context, finishedCount));
// Now clean up RendererVk garbage
renderer->cleanupGarbage();
return angle::Result::Continue;
}
void CommandQueue::releaseToCommandBatch(bool hasProtectedContent,
PrimaryCommandBuffer &&commandBuffer,
SecondaryCommandPools *commandPools,
CommandBatch *batch)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::releaseToCommandBatch");
batch->primaryCommands = std::move(commandBuffer);
batch->commandPools = commandPools;
batch->hasProtectedContent = hasProtectedContent;
}
void CommandQueue::handleDeviceLost(RendererVk *renderer)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::handleDeviceLost");
VkDevice device = renderer->getDevice();
for (CommandBatch &batch : mInFlightCommands)
{
// On device loss we need to wait for fence to be signaled before destroying it
if (batch.fence)
{
VkResult status = batch.fence.wait(device, renderer->getMaxFenceWaitTimeNs());
// If the wait times out, it is probably not possible to recover from lost device
ASSERT(status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST);
batch.fence.destroy(device);
}
// On device lost, here simply destroy the CommandBuffer, it will fully cleared later
// by CommandPool::destroy
if (batch.primaryCommands.valid())
{
batch.primaryCommands.destroy(device);
}
batch.resetSecondaryCommandBuffers(device);
mLastCompletedSerials.setQueueSerial(batch.queueSerial);
}
mInFlightCommands.clear();
}
bool CommandQueue::allInFlightCommandsAreAfterSerials(const Serials &serials)
{
for (const CommandBatch &batch : mInFlightCommands)
{
if (batch.queueSerial.getIndex() < serials.size() &&
serials[batch.queueSerial.getIndex()] != kZeroSerial &&
batch.queueSerial.getSerial() <= serials[batch.queueSerial.getIndex()])
{
return false;
}
}
return true;
}
angle::Result CommandQueue::finishQueueSerial(Context *context,
const QueueSerial &queueSerial,
uint64_t timeout)
{
vk::ResourceUse use(queueSerial);
return finishResourceUse(context, use, timeout);
}
angle::Result CommandQueue::finishResourceUse(Context *context,
const ResourceUse &use,
uint64_t timeout)
{
size_t finishCount = getBatchCountUpToSerials(context->getRenderer(), use.getSerials());
if (finishCount == 0)
{
return angle::Result::Continue;
}
const SharedFence &sharedFence = getSharedFenceToWait(finishCount);
// Wait for it finish. If no fence, the serial is already finished, it might just have garbage
// to clean up.
if (!sharedFence)
{
return angle::Result::Continue;
}
else
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::finishResourceUse");
VkResult status = sharedFence.wait(context->getDevice(), timeout);
ANGLE_VK_TRY(context, status);
}
// Clean up finished batches.
ANGLE_TRY(retireFinishedCommandsAndCleanupGarbage(context, finishCount));
ASSERT(allInFlightCommandsAreAfterSerials(use.getSerials()));
return angle::Result::Continue;
}
angle::Result CommandQueue::submitCommands(
Context *context,
bool hasProtectedContent,
egl::ContextPriority priority,
const std::vector<VkSemaphore> &waitSemaphores,
const std::vector<VkPipelineStageFlags> &waitSemaphoreStageMasks,
const VkSemaphore signalSemaphore,
SecondaryCommandBufferList &&commandBuffersToReset,
SecondaryCommandPools *commandPools,
const QueueSerial &submitQueueSerial)
{
RendererVk *renderer = context->getRenderer();
VkDevice device = renderer->getDevice();
++mPerfCounters.commandQueueSubmitCallsTotal;
++mPerfCounters.commandQueueSubmitCallsPerFrame;
DeviceScoped<CommandBatch> scopedBatch(device);
CommandBatch &batch = scopedBatch.get();
batch.queueSerial = submitQueueSerial;
batch.hasProtectedContent = hasProtectedContent;
batch.commandBuffersToReset = std::move(commandBuffersToReset);
// Don't make a submission if there is nothing to submit.
PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
const bool hasAnyPendingCommands = commandBuffer.valid();
if (hasAnyPendingCommands || signalSemaphore != VK_NULL_HANDLE || !waitSemaphores.empty())
{
if (commandBuffer.valid())
{
ANGLE_VK_TRY(context, commandBuffer.end());
}
VkSubmitInfo submitInfo = {};
InitializeSubmitInfo(&submitInfo, commandBuffer, waitSemaphores, waitSemaphoreStageMasks,
signalSemaphore);
VkProtectedSubmitInfo protectedSubmitInfo = {};
if (hasProtectedContent)
{
protectedSubmitInfo.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
protectedSubmitInfo.pNext = nullptr;
protectedSubmitInfo.protectedSubmit = true;
submitInfo.pNext = &protectedSubmitInfo;
}
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::submitCommands");
ANGLE_VK_TRY(context, batch.fence.init(context->getDevice(), &mFenceRecycler));
ANGLE_TRY(
queueSubmit(context, priority, submitInfo, &batch.fence.get(), batch.queueSerial));
}
else
{
mLastSubmittedSerials.setQueueSerial(submitQueueSerial);
}
// Store the primary CommandBuffer and command pool used for secondary CommandBuffers
// in the in-flight list.
if (hasProtectedContent)
{
releaseToCommandBatch(hasProtectedContent, std::move(mProtectedPrimaryCommands),
commandPools, &batch);
}
else
{
releaseToCommandBatch(hasProtectedContent, std::move(mPrimaryCommands), commandPools,
&batch);
}
mInFlightCommands.emplace_back(scopedBatch.release());
ANGLE_TRY(checkCompletedCommands(context));
// CPU should be throttled to avoid mInFlightCommands from growing too fast. Important for
// off-screen scenarios.
if (mInFlightCommands.size() > kInFlightCommandsLimit)
{
size_t numCommandsToFinish = mInFlightCommands.size() - kInFlightCommandsLimit;
const QueueSerial &finishSerial = mInFlightCommands[numCommandsToFinish].queueSerial;
ANGLE_TRY(finishQueueSerial(context, finishSerial, renderer->getMaxFenceWaitTimeNs()));
}
// CPU should be throttled to avoid accumulating too much memory garbage waiting to be
// destroyed. This is important to keep peak memory usage at check when game launched and a lot
// of staging buffers used for textures upload and then gets released. But if there is only one
// command buffer in flight, we do not wait here to ensure we keep GPU busy.
VkDeviceSize suballocationGarbageSize = renderer->getSuballocationGarbageSize();
while (suballocationGarbageSize > kMaxBufferSuballocationGarbageSize &&
mInFlightCommands.size() > 1)
{
const QueueSerial &finishSerial = mInFlightCommands.back().queueSerial;
ANGLE_TRY(finishQueueSerial(context, finishSerial, renderer->getMaxFenceWaitTimeNs()));
suballocationGarbageSize = renderer->getSuballocationGarbageSize();
}
return angle::Result::Continue;
}
angle::Result CommandQueue::ensurePrimaryCommandBufferValid(Context *context,
bool hasProtectedContent)
{
PersistentCommandPool &commandPool = getCommandPool(hasProtectedContent);
PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
if (commandBuffer.valid())
{
return angle::Result::Continue;
}
ANGLE_TRY(commandPool.allocate(context, &commandBuffer));
VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
beginInfo.pInheritanceInfo = nullptr;
ANGLE_VK_TRY(context, commandBuffer.begin(beginInfo));
return angle::Result::Continue;
}
angle::Result CommandQueue::flushOutsideRPCommands(
Context *context,
bool hasProtectedContent,
OutsideRenderPassCommandBufferHelper **outsideRPCommands)
{
ANGLE_TRY(ensurePrimaryCommandBufferValid(context, hasProtectedContent));
PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
return (*outsideRPCommands)->flushToPrimary(context, &commandBuffer);
}
angle::Result CommandQueue::flushRenderPassCommands(
Context *context,
bool hasProtectedContent,
const RenderPass &renderPass,
RenderPassCommandBufferHelper **renderPassCommands)
{
ANGLE_TRY(ensurePrimaryCommandBufferValid(context, hasProtectedContent));
PrimaryCommandBuffer &commandBuffer = getCommandBuffer(hasProtectedContent);
return (*renderPassCommands)->flushToPrimary(context, &commandBuffer, &renderPass);
}
angle::Result CommandQueue::queueSubmitOneOff(Context *context,
bool hasProtectedContent,
egl::ContextPriority contextPriority,
VkCommandBuffer commandBufferHandle,
const Semaphore *waitSemaphore,
VkPipelineStageFlags waitSemaphoreStageMask,
const Fence *fence,
SubmitPolicy submitPolicy,
const QueueSerial &submitQueueSerial)
{
DeviceScoped<CommandBatch> scopedBatch(context->getDevice());
CommandBatch &batch = scopedBatch.get();
batch.queueSerial = submitQueueSerial;
batch.hasProtectedContent = hasProtectedContent;
// If caller passed in a fence, use it. Otherwise create an internal fence. That way if we can
// go through normal waitForQueueSerial code path to wait for it to finish.
if (fence == nullptr)
{
ANGLE_VK_TRY(context, batch.fence.init(context->getDevice(), &mFenceRecycler));
fence = &batch.fence.get();
}
VkSubmitInfo submitInfo = {};
submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
VkProtectedSubmitInfo protectedSubmitInfo = {};
if (hasProtectedContent)
{
protectedSubmitInfo.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO;
protectedSubmitInfo.pNext = nullptr;
protectedSubmitInfo.protectedSubmit = true;
submitInfo.pNext = &protectedSubmitInfo;
}
if (commandBufferHandle != VK_NULL_HANDLE)
{
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &commandBufferHandle;
}
if (waitSemaphore != nullptr)
{
submitInfo.waitSemaphoreCount = 1;
submitInfo.pWaitSemaphores = waitSemaphore->ptr();
submitInfo.pWaitDstStageMask = &waitSemaphoreStageMask;
}
ANGLE_TRY(queueSubmit(context, contextPriority, submitInfo, fence, submitQueueSerial));
mInFlightCommands.emplace_back(scopedBatch.release());
return angle::Result::Continue;
}
angle::Result CommandQueue::queueSubmit(Context *context,
egl::ContextPriority contextPriority,
const VkSubmitInfo &submitInfo,
const Fence *fence,
const QueueSerial &submitQueueSerial)
{
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::queueSubmit");
RendererVk *renderer = context->getRenderer();
if (kOutputVmaStatsString)
{
renderer->outputVmaStatString();
}
VkFence fenceHandle = fence ? fence->getHandle() : VK_NULL_HANDLE;
VkQueue queue = getQueue(contextPriority);
ANGLE_VK_TRY(context, vkQueueSubmit(queue, 1, &submitInfo, fenceHandle));
mLastSubmittedSerials.setQueueSerial(submitQueueSerial);
++mPerfCounters.vkQueueSubmitCallsTotal;
++mPerfCounters.vkQueueSubmitCallsPerFrame;
return angle::Result::Continue;
}
void CommandQueue::resetPerFramePerfCounters()
{
mPerfCounters.commandQueueSubmitCallsPerFrame = 0;
mPerfCounters.vkQueueSubmitCallsPerFrame = 0;
}
VkResult CommandQueue::queuePresent(egl::ContextPriority contextPriority,
const VkPresentInfoKHR &presentInfo)
{
VkQueue queue = getQueue(contextPriority);
return vkQueuePresentKHR(queue, &presentInfo);
}
bool CommandQueue::hasUnfinishedUse(const vk::ResourceUse &use) const
{
return use > mLastCompletedSerials;
}
bool CommandQueue::hasUnsubmittedUse(const vk::ResourceUse &use) const
{
return use > mLastSubmittedSerials;
}
size_t CommandQueue::getBatchCountUpToSerials(RendererVk *renderer, const Serials &serials)
{
if (mInFlightCommands.empty())
{
return 0;
}
if (renderer->getLargestQueueSerialIndexEverAllocated() < 64)
{
return GetBatchCountUpToSerials<angle::BitSet64<64>>(
mInFlightCommands, mLastSubmittedSerials, mLastCompletedSerials, serials);
}
else
{
return GetBatchCountUpToSerials<angle::BitSetArray<kMaxQueueSerialIndexCount>>(
mInFlightCommands, mLastSubmittedSerials, mLastCompletedSerials, serials);
}
}
const SharedFence &CommandQueue::getSharedFenceToWait(size_t finishCount)
{
ASSERT(finishCount > 0);
// Because some submission maybe empty submission, we have to search for the closest non-empty
// submission for the fence to wait on
for (int i = static_cast<int>(finishCount) - 1; i >= 0; i--)
{
if (mInFlightCommands[i].fence)
{
return mInFlightCommands[i].fence;
}
}
// If everything is finished, we just return the first one (which is also finished).
return mInFlightCommands[0].fence;
}
// ThreadSafeCommandQueue implementation
angle::Result ThreadSafeCommandQueue::finishResourceUse(Context *context,
const ResourceUse &use,
uint64_t timeout)
{
std::unique_lock<std::mutex> lock(mMutex);
size_t finishCount = getBatchCountUpToSerials(context->getRenderer(), use.getSerials());
if (finishCount == 0)
{
return angle::Result::Continue;
}
const SharedFence &sharedFence = getSharedFenceToWait(finishCount);
// Wait for it finish.
if (!sharedFence)
{
return angle::Result::Continue;
}
else
{
const SharedFence localSharedFenceToWaitOn = sharedFence;
lock.unlock();
ANGLE_TRACE_EVENT0("gpu.angle", "CommandQueue::finishResourceUse");
// You can only use the local copy of the sharedFence without lock;
VkResult status = localSharedFenceToWaitOn.wait(context->getDevice(), timeout);
ANGLE_VK_TRY(context, status);
lock.lock();
}
// Clean up finished batches. After we unlocked, finishCount may have changed, recheck the
// mInFlightCommands for all finished commands.
ANGLE_TRY(CommandQueue::checkCompletedCommands(context));
ASSERT(allInFlightCommandsAreAfterSerials(use.getSerials()));
return angle::Result::Continue;
}
angle::Result ThreadSafeCommandQueue::finishQueueSerial(Context *context,
const QueueSerial &queueSerial,
uint64_t timeout)
{
vk::ResourceUse use(queueSerial);
return finishResourceUse(context, use, timeout);
}
angle::Result ThreadSafeCommandQueue::waitIdle(Context *context, uint64_t timeout)
{
// Fill the local variable with lock
vk::ResourceUse use;
{
std::unique_lock<std::mutex> lock(mMutex);
if (mInFlightCommands.empty())
{
return angle::Result::Continue;
}
use.setQueueSerial(mInFlightCommands.back().queueSerial);
}
return finishResourceUse(context, use, timeout);
}
angle::Result ThreadSafeCommandQueue::waitForResourceUseToFinishWithUserTimeout(
Context *context,
const ResourceUse &use,
uint64_t timeout,
VkResult *result)
{
std::unique_lock<std::mutex> lock(mMutex);
size_t finishCount = getBatchCountUpToSerials(context->getRenderer(), use.getSerials());
// The serial is already complete if there is no in-flight work (i.e. mInFlightCommands is
// empty), or the given serial is smaller than the smallest serial, or
if (finishCount == 0)
{
*result = VK_SUCCESS;
return angle::Result::Continue;
}
// The serial is already complete if every batch up to this serial is a garbage-clean-up-only
// batch (i.e. empty submission that's optimized out)
const SharedFence &sharedFence = getSharedFenceToWait(finishCount);
if (!sharedFence)
{
*result = VK_SUCCESS;
return angle::Result::Continue;
}
// Serial is not yet submitted. This is undefined behaviour, so we can do anything.
if (hasUnsubmittedUse(use))
{
WARN() << "Waiting on an unsubmitted serial.";
*result = VK_TIMEOUT;
return angle::Result::Continue;
}
// Make a local copy of SharedFence with lock being held. Since SharedFence is refCounted, this
// local copy of SharedFence will ensure underline VkFence will not go away.
SharedFence localSharedFenceToWaitOn = sharedFence;
lock.unlock();
// You can only use the local copy of the sharedFence without lock;
*result = localSharedFenceToWaitOn.wait(context->getDevice(), timeout);
lock.lock();
// Don't trigger an error on timeout.
if (*result != VK_TIMEOUT)
{
ANGLE_VK_TRY(context, *result);
}
return angle::Result::Continue;
}
bool ThreadSafeCommandQueue::isBusy(RendererVk *renderer) const
{
size_t maxIndex = renderer->getLargestQueueSerialIndexEverAllocated();
for (SerialIndex i = 0; i <= maxIndex; ++i)
{
if (mLastSubmittedSerials[i] > mLastCompletedSerials[i])
{
return true;
}
}
return false;
}
// QueuePriorities:
constexpr float kVulkanQueuePriorityLow = 0.0;
constexpr float kVulkanQueuePriorityMedium = 0.4;
constexpr float kVulkanQueuePriorityHigh = 1.0;
const float QueueFamily::kQueuePriorities[static_cast<uint32_t>(egl::ContextPriority::EnumCount)] =
{kVulkanQueuePriorityMedium, kVulkanQueuePriorityHigh, kVulkanQueuePriorityLow};
egl::ContextPriority DeviceQueueMap::getDevicePriority(egl::ContextPriority priority) const
{
return mPriorities[priority];
}
DeviceQueueMap::~DeviceQueueMap() {}
DeviceQueueMap &DeviceQueueMap::operator=(const DeviceQueueMap &other)
{
ASSERT(this != &other);
if ((this != &other) && other.valid())
{
mIndex = other.mIndex;
mIsProtected = other.mIsProtected;
mPriorities[egl::ContextPriority::Low] = other.mPriorities[egl::ContextPriority::Low];
mPriorities[egl::ContextPriority::Medium] = other.mPriorities[egl::ContextPriority::Medium];
mPriorities[egl::ContextPriority::High] = other.mPriorities[egl::ContextPriority::High];
*static_cast<angle::PackedEnumMap<egl::ContextPriority, VkQueue> *>(this) = other;
}
return *this;
}
void QueueFamily::getDeviceQueue(VkDevice device,
bool makeProtected,
uint32_t queueIndex,
VkQueue *queue)
{
if (makeProtected)
{
VkDeviceQueueInfo2 queueInfo2 = {};
queueInfo2.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2;
queueInfo2.flags = VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT;
queueInfo2.queueFamilyIndex = mIndex;
queueInfo2.queueIndex = queueIndex;
vkGetDeviceQueue2(device, &queueInfo2, queue);
}
else
{
vkGetDeviceQueue(device, mIndex, queueIndex, queue);
}
}
DeviceQueueMap QueueFamily::initializeQueueMap(VkDevice device,
bool makeProtected,
uint32_t queueIndex,
uint32_t queueCount)
{
// QueueIndexing:
constexpr uint32_t kQueueIndexMedium = 0;
constexpr uint32_t kQueueIndexHigh = 1;
constexpr uint32_t kQueueIndexLow = 2;
ASSERT(queueCount);
ASSERT((queueIndex + queueCount) <= mProperties.queueCount);
DeviceQueueMap queueMap(mIndex, makeProtected);
getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexMedium,
&queueMap[egl::ContextPriority::Medium]);
queueMap.mPriorities[egl::ContextPriority::Medium] = egl::ContextPriority::Medium;
// If at least 2 queues, High has its own queue
if (queueCount > 1)
{
getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexHigh,
&queueMap[egl::ContextPriority::High]);
queueMap.mPriorities[egl::ContextPriority::High] = egl::ContextPriority::High;
}
else
{
queueMap[egl::ContextPriority::High] = queueMap[egl::ContextPriority::Medium];
queueMap.mPriorities[egl::ContextPriority::High] = egl::ContextPriority::Medium;
}
// If at least 3 queues, Low has its own queue. Adjust Low priority.
if (queueCount > 2)
{
getDeviceQueue(device, makeProtected, queueIndex + kQueueIndexLow,
&queueMap[egl::ContextPriority::Low]);
queueMap.mPriorities[egl::ContextPriority::Low] = egl::ContextPriority::Low;
}
else
{
queueMap[egl::ContextPriority::Low] = queueMap[egl::ContextPriority::Medium];
queueMap.mPriorities[egl::ContextPriority::Low] = egl::ContextPriority::Medium;
}
return queueMap;
}
void QueueFamily::initialize(const VkQueueFamilyProperties &queueFamilyProperties, uint32_t index)
{
mProperties = queueFamilyProperties;
mIndex = index;
}
uint32_t QueueFamily::FindIndex(const std::vector<VkQueueFamilyProperties> &queueFamilyProperties,
VkQueueFlags flags,
int32_t matchNumber,
uint32_t *matchCount)
{
uint32_t index = QueueFamily::kInvalidIndex;
uint32_t count = 0;
for (uint32_t familyIndex = 0; familyIndex < queueFamilyProperties.size(); ++familyIndex)
{
const auto &queueInfo = queueFamilyProperties[familyIndex];
if ((queueInfo.queueFlags & flags) == flags)
{
ASSERT(queueInfo.queueCount > 0);
count++;
if ((index == QueueFamily::kInvalidIndex) && (matchNumber-- == 0))
{
index = familyIndex;
}
}
}
if (matchCount)
{
*matchCount = count;
}
return index;
}
} // namespace vk
} // namespace rx