Vulkan: Emulate instanced attrib divisor
This sets instancedArrays[ANGLE|EXT] extenstions as always
supported regardless of underlying Vulkan HW's max vertex attrib
divisor.
Then detect instances where app sets a divisor that isn't supported
by hardware and emulate those cases. Emulations is accomplished by
copying the instanced attribs to a new buffer where each attrib is
present once per instance, using the attrib divisor value as a
factor to replicate the attribs, and then setting the actual divisor
value for the draw to "1".
Also, we only store 8 bits for the divisor used in the PSO, so this
code also handles emulation of the case where divisor is > 255.
This is passing all of the drawInstanced/Elements dEQP tests
where divisor has to be emulated.
Also enabled end2end InstancingTestES3 for Vulkan backend.
Bug: angleproject:2672
Change-Id: I9932f9eab49b16a19e8bbd35dacaf3b5a27a213f
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1758689
Reviewed-by: Courtney Goeltzenleuchter <courtneygo@google.com>
Commit-Queue: Tobin Ehlis <tobine@google.com>
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.cpp b/src/libANGLE/renderer/vulkan/ContextVk.cpp
index ae5a5b2..9e90529 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.cpp
+++ b/src/libANGLE/renderer/vulkan/ContextVk.cpp
@@ -477,12 +477,14 @@
}
// Must be called before the command buffer is started. Can call finish.
- if (context->getStateCache().hasAnyActiveClientAttrib())
+ if (mVertexArray->getStreamingVertexAttribsMask().any())
{
ASSERT(firstVertexOrInvalid != -1);
- ANGLE_TRY(mVertexArray->updateClientAttribs(context, firstVertexOrInvalid,
- vertexOrIndexCount, instanceCount,
- indexTypeOrInvalid, indices));
+ // All client attribs & any emulated buffered attribs will be updated
+ ANGLE_TRY(mVertexArray->updateStreamedAttribs(context, firstVertexOrInvalid,
+ vertexOrIndexCount, instanceCount,
+ indexTypeOrInvalid, indices));
+
mGraphicsDirtyBits.set(DIRTY_BIT_VERTEX_BUFFERS);
}
diff --git a/src/libANGLE/renderer/vulkan/ContextVk.h b/src/libANGLE/renderer/vulkan/ContextVk.h
index e88fece..af50f4d 100644
--- a/src/libANGLE/renderer/vulkan/ContextVk.h
+++ b/src/libANGLE/renderer/vulkan/ContextVk.h
@@ -213,9 +213,10 @@
GLuint relativeOffset)
{
invalidateVertexAndIndexBuffers();
- mGraphicsPipelineDesc->updateVertexInput(&mGraphicsPipelineTransition,
- static_cast<uint32_t>(attribIndex), stride,
- divisor, format, relativeOffset);
+ // Set divisor to 1 for attribs with emulated divisor
+ mGraphicsPipelineDesc->updateVertexInput(
+ &mGraphicsPipelineTransition, static_cast<uint32_t>(attribIndex), stride,
+ divisor > mRenderer->getMaxVertexAttribDivisor() ? 1 : divisor, format, relativeOffset);
}
void invalidateDefaultAttribute(size_t attribIndex);
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.cpp b/src/libANGLE/renderer/vulkan/RendererVk.cpp
index d309953..3a7b7d0 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.cpp
+++ b/src/libANGLE/renderer/vulkan/RendererVk.cpp
@@ -1056,7 +1056,11 @@
deviceProperties.pNext = &divisorProperties;
vkGetPhysicalDeviceProperties2KHR(mPhysicalDevice, &deviceProperties);
- mMaxVertexAttribDivisor = divisorProperties.maxVertexAttribDivisor;
+ // We only store 8 bit divisor in GraphicsPipelineDesc so capping value & we emulate if
+ // exceeded
+ mMaxVertexAttribDivisor =
+ std::min(divisorProperties.maxVertexAttribDivisor,
+ static_cast<uint32_t>(std::numeric_limits<uint8_t>::max()));
createInfo.pNext = &enabledFeatures;
}
diff --git a/src/libANGLE/renderer/vulkan/RendererVk.h b/src/libANGLE/renderer/vulkan/RendererVk.h
index 07e11b2..9340d5b 100644
--- a/src/libANGLE/renderer/vulkan/RendererVk.h
+++ b/src/libANGLE/renderer/vulkan/RendererVk.h
@@ -125,6 +125,7 @@
ASSERT(mFeaturesInitialized);
return mFeatures;
}
+ uint32_t getMaxVertexAttribDivisor() const { return mMaxVertexAttribDivisor; }
bool isMockICDEnabled() const { return mEnabledICD == vk::ICD::Mock; }
diff --git a/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp b/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
index 8cf7e19..77684da 100644
--- a/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
+++ b/src/libANGLE/renderer/vulkan/VertexArrayVk.cpp
@@ -54,17 +54,37 @@
size_t bytesToAllocate,
size_t destOffset,
size_t vertexCount,
- size_t stride,
+ size_t sourceStride,
+ size_t destStride,
VertexCopyFunction vertexLoadFunction,
vk::BufferHelper **bufferOut,
- VkDeviceSize *bufferOffsetOut)
+ VkDeviceSize *bufferOffsetOut,
+ uint32_t replicateCount)
{
uint8_t *dst = nullptr;
ANGLE_TRY(dynamicBuffer->allocate(contextVk, bytesToAllocate, &dst, nullptr, bufferOffsetOut,
nullptr));
*bufferOut = dynamicBuffer->getCurrentBuffer();
dst += destOffset;
- vertexLoadFunction(sourceData, stride, vertexCount, dst);
+ if (replicateCount == 1)
+ {
+ vertexLoadFunction(sourceData, sourceStride, vertexCount, dst);
+ }
+ else
+ {
+ ASSERT(replicateCount > 1);
+ uint32_t sourceRemainingCount = replicateCount - 1;
+ for (size_t dataCopied = 0; dataCopied < bytesToAllocate;
+ dataCopied += destStride, dst += destStride, sourceRemainingCount--)
+ {
+ vertexLoadFunction(sourceData, sourceStride, 1, dst);
+ if (sourceRemainingCount == 0)
+ {
+ sourceData += sourceStride;
+ sourceRemainingCount = replicateCount;
+ }
+ }
+ }
ANGLE_TRY(dynamicBuffer->flush(contextVk));
return angle::Result::Continue;
@@ -334,9 +354,9 @@
srcBytes += binding.getOffset() + relativeOffset;
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
ANGLE_TRY(StreamVertexData(contextVk, &conversion->data, srcBytes, numVertices * dstFormatSize,
- 0, numVertices, binding.getStride(), vertexFormat.vertexLoadFunction,
- &mCurrentArrayBuffers[attribIndex],
- &conversion->lastAllocationOffset));
+ 0, numVertices, binding.getStride(), srcFormatSize,
+ vertexFormat.vertexLoadFunction, &mCurrentArrayBuffers[attribIndex],
+ &conversion->lastAllocationOffset, 1));
srcBuffer->unmapImpl(contextVk);
ASSERT(conversion->dirty);
@@ -460,6 +480,11 @@
GLuint stride;
bool anyVertexBufferConvertedOnGpu = false;
gl::Buffer *bufferGL = binding.getBuffer().get();
+ // Emulated and/or client-side attribs will be streamed
+ bool isStreamingVertexAttrib =
+ (binding.getDivisor() > renderer->getMaxVertexAttribDivisor()) || (bufferGL == nullptr);
+ mStreamingVertexAttribsMask.set(attribIndex, isStreamingVertexAttrib);
+
if (bufferGL)
{
BufferVk *bufferVk = vk::GetImpl(bufferGL);
@@ -563,17 +588,24 @@
return angle::Result::Continue;
}
-angle::Result VertexArrayVk::updateClientAttribs(const gl::Context *context,
- GLint firstVertex,
- GLsizei vertexOrIndexCount,
- GLsizei instanceCount,
- gl::DrawElementsType indexTypeOrInvalid,
- const void *indices)
+// Handle copying client attribs and/or expanding attrib buffer in case where attribute
+// divisor value has to be emulated.
+angle::Result VertexArrayVk::updateStreamedAttribs(const gl::Context *context,
+ GLint firstVertex,
+ GLsizei vertexOrIndexCount,
+ GLsizei instanceCount,
+ gl::DrawElementsType indexTypeOrInvalid,
+ const void *indices)
{
ContextVk *contextVk = vk::GetImpl(context);
- const gl::AttributesMask &clientAttribs = context->getStateCache().getActiveClientAttribsMask();
+ const gl::AttributesMask activeAttribs =
+ context->getStateCache().getActiveClientAttribsMask() |
+ context->getStateCache().getActiveBufferedAttribsMask();
+ const gl::AttributesMask activeStreamedAttribs = mStreamingVertexAttribsMask & activeAttribs;
- ASSERT(clientAttribs.any());
+ // Early return for corner case where emulated buffered attribs are not active
+ if (!activeStreamedAttribs.any())
+ return angle::Result::Continue;
GLint startVertex;
size_t vertexCount;
@@ -586,13 +618,13 @@
const auto &attribs = mState.getVertexAttributes();
const auto &bindings = mState.getVertexBindings();
- // TODO(fjhenigman): When we have a bunch of interleaved attributes, they end up
+ // TODO: When we have a bunch of interleaved attributes, they end up
// un-interleaved, wasting space and copying time. Consider improving on that.
- for (size_t attribIndex : clientAttribs)
+ for (size_t attribIndex : activeStreamedAttribs)
{
const gl::VertexAttribute &attrib = attribs[attribIndex];
- const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
- ASSERT(attrib.enabled && binding.getBuffer().get() == nullptr);
+ ASSERT(attrib.enabled);
+ const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
GLuint stride = vertexFormat.bufferFormat().pixelBytes;
@@ -600,19 +632,51 @@
ASSERT(GetVertexInputAlignment(vertexFormat) <= vk::kVertexBufferAlignment);
const uint8_t *src = static_cast<const uint8_t *>(attrib.pointer);
- if (binding.getDivisor() > 0)
+ const uint32_t divisor = binding.getDivisor();
+ if (divisor > 0)
{
- // instanced attrib
- size_t count = UnsignedCeilDivide(instanceCount, binding.getDivisor());
- size_t bytesToAllocate = count * stride;
+ // Instanced attrib
+ if (divisor > renderer->getMaxVertexAttribDivisor())
+ {
+ // Emulated attrib
+ BufferVk *bufferVk = nullptr;
+ if (binding.getBuffer().get() != nullptr)
+ {
+ // Map buffer to expand attribs for divisor emulation
+ bufferVk = vk::GetImpl(binding.getBuffer().get());
+ void *buffSrc = nullptr;
+ ANGLE_TRY(bufferVk->mapImpl(contextVk, &buffSrc));
+ src = reinterpret_cast<const uint8_t *>(buffSrc);
+ }
+ // Divisor will be set to 1 & so update buffer to have 1 attrib per instance
+ size_t bytesToAllocate = instanceCount * stride;
- ANGLE_TRY(StreamVertexData(contextVk, &mDynamicVertexData, src, bytesToAllocate, 0,
- count, binding.getStride(), vertexFormat.vertexLoadFunction,
- &mCurrentArrayBuffers[attribIndex],
- &mCurrentArrayBufferOffsets[attribIndex]));
+ ANGLE_TRY(StreamVertexData(contextVk, &mDynamicVertexData, src, bytesToAllocate, 0,
+ instanceCount, binding.getStride(), stride,
+ vertexFormat.vertexLoadFunction,
+ &mCurrentArrayBuffers[attribIndex],
+ &mCurrentArrayBufferOffsets[attribIndex], divisor));
+ if (bufferVk)
+ {
+ bufferVk->unmapImpl(contextVk);
+ }
+ }
+ else
+ {
+ ASSERT(binding.getBuffer().get() == nullptr);
+ size_t count = UnsignedCeilDivide(instanceCount, divisor);
+ size_t bytesToAllocate = count * stride;
+
+ ANGLE_TRY(StreamVertexData(contextVk, &mDynamicVertexData, src, bytesToAllocate, 0,
+ count, binding.getStride(), stride,
+ vertexFormat.vertexLoadFunction,
+ &mCurrentArrayBuffers[attribIndex],
+ &mCurrentArrayBufferOffsets[attribIndex], 1));
+ }
}
else
{
+ ASSERT(binding.getBuffer().get() == nullptr);
// Allocate space for startVertex + vertexCount so indexing will work. If we don't
// start at zero all the indices will be off.
// Only vertexCount vertices will be used by the upcoming draw so that is all we copy.
@@ -622,8 +686,8 @@
ANGLE_TRY(StreamVertexData(
contextVk, &mDynamicVertexData, src, bytesToAllocate, destOffset, vertexCount,
- binding.getStride(), vertexFormat.vertexLoadFunction,
- &mCurrentArrayBuffers[attribIndex], &mCurrentArrayBufferOffsets[attribIndex]));
+ binding.getStride(), stride, vertexFormat.vertexLoadFunction,
+ &mCurrentArrayBuffers[attribIndex], &mCurrentArrayBufferOffsets[attribIndex], 1));
}
mCurrentArrayBufferHandles[attribIndex] =
diff --git a/src/libANGLE/renderer/vulkan/VertexArrayVk.h b/src/libANGLE/renderer/vulkan/VertexArrayVk.h
index 6c26184..a6338ac 100644
--- a/src/libANGLE/renderer/vulkan/VertexArrayVk.h
+++ b/src/libANGLE/renderer/vulkan/VertexArrayVk.h
@@ -37,12 +37,12 @@
VkBuffer bufferHandle,
uint32_t offset);
- angle::Result updateClientAttribs(const gl::Context *context,
- GLint firstVertex,
- GLsizei vertexOrIndexCount,
- GLsizei instanceCount,
- gl::DrawElementsType indexTypeOrInvalid,
- const void *indices);
+ angle::Result updateStreamedAttribs(const gl::Context *context,
+ GLint firstVertex,
+ GLsizei vertexOrIndexCount,
+ GLsizei instanceCount,
+ gl::DrawElementsType indexTypeOrInvalid,
+ const void *indices);
angle::Result handleLineLoop(ContextVk *contextVk,
GLint firstVertex,
@@ -92,6 +92,11 @@
size_t indexCount,
const void *sourcePointer);
+ const gl::AttributesMask &getStreamingVertexAttribsMask() const
+ {
+ return mStreamingVertexAttribsMask;
+ }
+
private:
void setDefaultPackedInput(ContextVk *contextVk, size_t attribIndex);
@@ -133,6 +138,9 @@
// Vulkan does not allow binding a null vertex buffer. We use a dummy as a placeholder.
vk::BufferHelper mTheNullBuffer;
+
+ // Track client and/or emulated attribs that we have to stream their buffer contents
+ gl::AttributesMask mStreamingVertexAttribsMask;
};
} // namespace rx
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
index e52cb36..ca60902 100644
--- a/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.cpp
@@ -16,6 +16,7 @@
#include "libANGLE/renderer/vulkan/FramebufferVk.h"
#include "libANGLE/renderer/vulkan/ProgramVk.h"
#include "libANGLE/renderer/vulkan/RendererVk.h"
+#include "libANGLE/renderer/vulkan/VertexArrayVk.h"
#include "libANGLE/renderer/vulkan/vk_format_utils.h"
#include "libANGLE/renderer/vulkan/vk_helpers.h"
@@ -673,7 +674,6 @@
VkPipelineVertexInputDivisorStateCreateInfoEXT divisorState = {};
divisorState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
divisorState.pVertexBindingDivisors = divisorDesc.data();
-
for (size_t attribIndexSizeT : activeAttribLocationsMask)
{
const uint32_t attribIndex = static_cast<uint32_t>(attribIndexSizeT);
@@ -885,10 +885,6 @@
{
vk::PackedAttribDesc &packedAttrib = mVertexInputAttribs.attribs[attribIndex];
- // TODO: Handle the case where the divisor overflows the field that holds it.
- // http://anglebug.com/2672
- ASSERT(divisor <= std::numeric_limits<decltype(packedAttrib.divisor)>::max());
-
SetBitField(packedAttrib.stride, stride);
SetBitField(packedAttrib.divisor, divisor);
diff --git a/src/libANGLE/renderer/vulkan/vk_cache_utils.h b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
index b759e0d..1d20ed7 100644
--- a/src/libANGLE/renderer/vulkan/vk_cache_utils.h
+++ b/src/libANGLE/renderer/vulkan/vk_cache_utils.h
@@ -38,7 +38,7 @@
// fewer bits. For example, boolean values could be represented by a single bit instead
// of a uint8_t. However at the current time there are concerns about the portability
// of bitfield operators, and complexity issues with using bit mask operations. This is
-// something likely we will want to investigate as the Vulkan implementation progresses.
+// something we will likely want to investigate as the Vulkan implementation progresses.
//
// Second implementation note: the struct packing is also a bit fragile, and some of the
// packing requirements depend on using alignas and field ordering to get the result of
@@ -164,8 +164,6 @@
struct PackedAttribDesc final
{
uint8_t format;
-
- // TODO(http://anglebug.com/2672): Emulate divisors greater than UBYTE_MAX.
uint8_t divisor;
// Can only take 11 bits on NV.
@@ -315,9 +313,9 @@
static_assert(kPackedInputAssemblyAndColorBlendStateSize == 56, "Size check failed");
constexpr size_t kGraphicsPipelineDescSumOfSizes =
- kVertexInputAttributesSize + kPackedInputAssemblyAndColorBlendStateSize +
- kPackedRasterizationAndMultisampleStateSize + kPackedDepthStencilStateSize +
- kRenderPassDescSize + sizeof(VkViewport) + sizeof(VkRect2D);
+ kVertexInputAttributesSize + kRenderPassDescSize + kPackedRasterizationAndMultisampleStateSize +
+ kPackedDepthStencilStateSize + kPackedInputAssemblyAndColorBlendStateSize + sizeof(VkViewport) +
+ sizeof(VkRect2D);
// Number of dirty bits in the dirty bit set.
constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
diff --git a/src/libANGLE/renderer/vulkan/vk_caps_utils.cpp b/src/libANGLE/renderer/vulkan/vk_caps_utils.cpp
index 76815a5..0470ffe 100644
--- a/src/libANGLE/renderer/vulkan/vk_caps_utils.cpp
+++ b/src/libANGLE/renderer/vulkan/vk_caps_utils.cpp
@@ -82,10 +82,9 @@
mNativeExtensions.vertexHalfFloat = true;
- // TODO: Enable this always and emulate instanced draws if any divisor exceeds the maximum
- // supported. http://anglebug.com/2672
- mNativeExtensions.instancedArraysANGLE = mMaxVertexAttribDivisor > 1;
- mNativeExtensions.instancedArraysEXT = mMaxVertexAttribDivisor > 1;
+ // Enabled in HW if VK_EXT_vertex_attribute_divisor available, otherwise emulated
+ mNativeExtensions.instancedArraysANGLE = true;
+ mNativeExtensions.instancedArraysEXT = true;
// Only expose robust buffer access if the physical device supports it.
mNativeExtensions.robustBufferAccessBehavior =
diff --git a/src/tests/deqp_support/deqp_gles3_test_expectations.txt b/src/tests/deqp_support/deqp_gles3_test_expectations.txt
index 5832ece..b66e9ec 100644
--- a/src/tests/deqp_support/deqp_gles3_test_expectations.txt
+++ b/src/tests/deqp_support/deqp_gles3_test_expectations.txt
@@ -555,16 +555,6 @@
3219 VULKAN : dEQP-GLES3.functional.negative_api.shader.link_program = FAIL
3219 VULKAN : dEQP-GLES3.functional.negative_api.shader.use_program = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.attribute_divisor.2*_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.attribute_divisor.4_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.mixed.2*_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_elements_instanced.mixed.4_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.attribute_divisor.2*_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.attribute_divisor.4_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.mixed.2*_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.draw_arrays_instanced.mixed.4_instances = FAIL
-2672 VULKAN : dEQP-GLES3.functional.instanced.types* = FAIL
-
// Polygon offset:
3678 VULKAN : dEQP-GLES3.functional.polygon_offset.float32_result_depth_clamp = FAIL
3678 VULKAN : dEQP-GLES3.functional.polygon_offset.float32_factor_1_slope = FAIL
diff --git a/src/tests/gl_tests/InstancingTest.cpp b/src/tests/gl_tests/InstancingTest.cpp
index e65a6f4..8b2d439 100644
--- a/src/tests/gl_tests/InstancingTest.cpp
+++ b/src/tests/gl_tests/InstancingTest.cpp
@@ -588,7 +588,7 @@
<< "Vertex attrib divisor read was not the same that was passed in.";
}
-ANGLE_INSTANTIATE_TEST(InstancingTestES3, ES3_OPENGL(), ES3_OPENGLES(), ES3_D3D11());
+ANGLE_INSTANTIATE_TEST(InstancingTestES3, ES3_OPENGL(), ES3_OPENGLES(), ES3_D3D11(), ES3_VULKAN());
ANGLE_INSTANTIATE_TEST(InstancingTestES31, ES31_OPENGL(), ES31_OPENGLES(), ES31_D3D11());