Vulkan: Support atomic counter array of arrays
Previously, it was assumed that a function argument is either AC or
AC[i], and it was converted to AC or AC+i respectively. The code is
changed to support any number of dimensions and subscripts, using
array size information from AC's type. If AC is an array of array
(atomic_uint AC[N][M][R]), the following index calculations are done.
AC -> AC.arrayIndex
AC[i] -> AC.arrayIndex + i*M*R
AC[i][j] -> AC.arrayIndex + i*M*R + j*R
AC[i][j][k] -> AC.arrayIndex + i*M*R + j*R + k
A test is added to exercise these various forms of indexing:
AtomicCounterBufferTest31.AtomicCounterArrayOfArray
Bug: angleproject:3566
Change-Id: I1e181a7363463d1d0ee4916f35006ed7c58e0f7c
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/1739488
Commit-Queue: Shahbaz Youssefi <syoussefi@chromium.org>
Reviewed-by: Tim Van Patten <timvp@google.com>
diff --git a/src/compiler/translator/tree_ops/RewriteAtomicCounters.cpp b/src/compiler/translator/tree_ops/RewriteAtomicCounters.cpp
index 5067d27..25d2e25 100644
--- a/src/compiler/translator/tree_ops/RewriteAtomicCounters.cpp
+++ b/src/compiler/translator/tree_ops/RewriteAtomicCounters.cpp
@@ -314,51 +314,29 @@
new TIntermSymbol(mRetyper.getVariableReplacement(symbolVariable));
ASSERT(bindingOffset != nullptr);
- TIntermNode *argument = symbol;
+ TIntermNode *argument = convertFunctionArgument(symbol, &bindingOffset);
- TIntermNode *parent = getParentNode();
- ASSERT(parent);
-
- TIntermBinary *arrayExpression = parent->getAsBinaryNode();
- if (arrayExpression)
+ if (mRetyper.isInAggregate())
{
- ASSERT(arrayExpression->getOp() == EOpIndexDirect ||
- arrayExpression->getOp() == EOpIndexIndirect);
-
- argument = arrayExpression;
-
- TIntermTyped *subscript = arrayExpression->getRight();
- TIntermConstantUnion *subscriptAsConstant = subscript->getAsConstantUnion();
- const bool subscriptIsZero = subscriptAsConstant && subscriptAsConstant->isZero(0);
-
- if (!subscriptIsZero)
- {
- // Copy the atomic counter binding/offset constant and modify it by adding the array
- // subscript to its offset field.
- TVariable *modified = CreateTempVariable(mSymbolTable, mAtomicCounterType);
- TIntermDeclaration *modifiedDecl =
- CreateTempInitDeclarationNode(modified, bindingOffset);
-
- TIntermSymbol *modifiedSymbol = new TIntermSymbol(modified);
- TConstantUnion *offsetFieldIndex = new TConstantUnion;
- offsetFieldIndex->setIConst(1);
- TIntermConstantUnion *offsetFieldRef =
- new TIntermConstantUnion(offsetFieldIndex, *StaticType::GetBasic<EbtUInt>());
- TIntermBinary *offsetField =
- new TIntermBinary(EOpIndexDirectStruct, modifiedSymbol, offsetFieldRef);
-
- TIntermBinary *modifiedOffset = new TIntermBinary(
- EOpAddAssign, offsetField, arrayExpression->getRight()->deepCopy());
-
- TIntermSequence *modifySequence =
- new TIntermSequence({modifiedDecl, modifiedOffset});
- insertStatementsInParentBlock(*modifySequence);
-
- bindingOffset = modifiedSymbol->deepCopy();
- }
+ mRetyper.replaceFunctionCallArg(argument, bindingOffset);
}
+ else
+ {
+ // If there's a stray ac[i] lying around, just delete it. This can happen if the shader
+ // uses ac[i].length(), which in RemoveArrayLengthMethod() will result in an ineffective
+ // statement that's just ac[i]; (similarly for a stray ac;, it doesn't have to be
+ // subscripted). Note that the subscript could have side effects, but the
+ // convertFunctionArgument above has already generated code that includes the subscript
+ // (and therefore its side-effect).
+ TIntermBlock *block = nullptr;
+ for (size_t ancestorIndex = 0; block == nullptr; ++ancestorIndex)
+ {
+ block = getAncestorNode(ancestorIndex)->getAsBlock();
+ }
- mRetyper.replaceFunctionCallArg(argument, bindingOffset);
+ TIntermSequence emptySequence;
+ mMultiReplacements.emplace_back(block, argument, emptySequence);
+ }
}
TIntermDeclaration *getAtomicCounterTypeDeclaration() { return mAtomicCounterTypeDeclaration; }
@@ -447,6 +425,152 @@
return replacementVar;
}
+ TIntermTyped *convertFunctionArgumentHelper(
+ const TVector<unsigned int> &runningArraySizeProducts,
+ TIntermTyped *flattenedSubscript,
+ uint32_t depth,
+ uint32_t *subscriptCountOut)
+ {
+ std::string prefix(depth, ' ');
+ TIntermNode *parent = getAncestorNode(depth);
+ ASSERT(parent);
+
+ TIntermBinary *arrayExpression = parent->getAsBinaryNode();
+ if (!arrayExpression)
+ {
+ // If the parent is not an array subscript operation, we have reached the end of the
+ // subscript chain. Note the depth that's traversed so the corresponding node can be
+ // taken as the function argument.
+ *subscriptCountOut = depth;
+ return flattenedSubscript;
+ }
+
+ ASSERT(arrayExpression->getOp() == EOpIndexDirect ||
+ arrayExpression->getOp() == EOpIndexIndirect);
+
+ // Assume i = n - depth. Get Pi. See comment in convertFunctionArgument.
+ ASSERT(depth < runningArraySizeProducts.size());
+ uint32_t thisDimensionSize =
+ runningArraySizeProducts[runningArraySizeProducts.size() - 1 - depth];
+
+ // Get Ii.
+ TIntermTyped *thisDimensionOffset = arrayExpression->getRight();
+
+ TIntermConstantUnion *subscriptAsConstant = thisDimensionOffset->getAsConstantUnion();
+ const bool subscriptIsZero = subscriptAsConstant && subscriptAsConstant->isZero(0);
+
+ // If Ii is zero, don't need to add Ii*Pi; that's zero.
+ if (!subscriptIsZero)
+ {
+ thisDimensionOffset = thisDimensionOffset->deepCopy();
+
+ // If Pi is 1, don't multiply. Just accumulate Ii.
+ if (thisDimensionSize != 1)
+ {
+ thisDimensionOffset = new TIntermBinary(EOpMul, thisDimensionOffset,
+ CreateUIntConstant(thisDimensionSize));
+ }
+
+ // Accumulate with the previous running offset, if any.
+ if (flattenedSubscript)
+ {
+ flattenedSubscript =
+ new TIntermBinary(EOpAdd, flattenedSubscript, thisDimensionOffset);
+ }
+ else
+ {
+ flattenedSubscript = thisDimensionOffset;
+ }
+ }
+
+ // Note: GLSL only allows 2 nested levels of arrays, so this recursion is bounded.
+ return convertFunctionArgumentHelper(runningArraySizeProducts, flattenedSubscript,
+ depth + 1, subscriptCountOut);
+ }
+
+ TIntermNode *convertFunctionArgument(TIntermNode *symbol, TIntermTyped **bindingOffset)
+ {
+ // Assume a general case of array declaration with N dimensions:
+ //
+ // atomic_uint ac[Dn]..[D2][D1];
+ //
+ // Let's define
+ //
+ // Pn = D(n-1)*...*D2*D1
+ //
+ // In that case, we have:
+ //
+ // ac[In] = ac + In*Pn
+ // ac[In][I(n-1)] = ac + In*Pn + I(n-1)*P(n-1)
+ // ac[In]...[Ii] = ac + In*Pn + ... + Ii*Pi
+ //
+ // We have just visited a symbol; ac. Walking the parent chain, we will visit the
+ // expressions in the above order (ac, ac[In], ac[In][I(n-1)], ...). We therefore can
+ // simply walk the parent chain and accumulate Ii*Pi to obtain the offset from the base of
+ // ac.
+
+ TIntermSymbol *argumentAsSymbol = symbol->getAsSymbolNode();
+ ASSERT(argumentAsSymbol);
+
+ const TVector<unsigned int> *arraySizes = argumentAsSymbol->getType().getArraySizes();
+
+ // Calculate Pi
+ TVector<unsigned int> runningArraySizeProducts;
+ if (arraySizes && arraySizes->size() > 0)
+ {
+ runningArraySizeProducts.resize(arraySizes->size());
+ uint32_t runningProduct = 1;
+ for (size_t dimension = 0; dimension < arraySizes->size(); ++dimension)
+ {
+ runningArraySizeProducts[dimension] = runningProduct;
+ runningProduct *= (*arraySizes)[dimension];
+ }
+ }
+
+ // Walk the parent chain and accumulate Ii*Pi
+ uint32_t subscriptCount = 0;
+ TIntermTyped *flattenedSubscript =
+ convertFunctionArgumentHelper(runningArraySizeProducts, nullptr, 0, &subscriptCount);
+
+ // Find the function argument, which is either in the form of ac (i.e. there are no
+ // subscripts, in which case that's the function argument), or ac[In]...[Ii] (in which case
+ // the function argument is the (n-i)th ancestor of ac.
+ //
+ // Note that this is the case because no other operation is allowed on ac other than
+ // subscript.
+ TIntermNode *argument = subscriptCount == 0 ? symbol : getAncestorNode(subscriptCount - 1);
+ ASSERT(argument != nullptr);
+
+ // If not subscripted, keep the argument as-is.
+ if (flattenedSubscript == nullptr)
+ {
+ return argument;
+ }
+
+ // Copy the atomic counter binding/offset constant and modify it by adding the array
+ // subscript to its offset field.
+ TVariable *modified = CreateTempVariable(mSymbolTable, mAtomicCounterType);
+ TIntermDeclaration *modifiedDecl = CreateTempInitDeclarationNode(modified, *bindingOffset);
+
+ TIntermSymbol *modifiedSymbol = new TIntermSymbol(modified);
+ TConstantUnion *offsetFieldIndex = new TConstantUnion;
+ offsetFieldIndex->setIConst(1);
+ TIntermConstantUnion *offsetFieldRef =
+ new TIntermConstantUnion(offsetFieldIndex, *StaticType::GetBasic<EbtUInt>());
+ TIntermBinary *offsetField =
+ new TIntermBinary(EOpIndexDirectStruct, modifiedSymbol, offsetFieldRef);
+
+ TIntermBinary *modifiedOffset =
+ new TIntermBinary(EOpAddAssign, offsetField, flattenedSubscript);
+
+ TIntermSequence *modifySequence = new TIntermSequence({modifiedDecl, modifiedOffset});
+ insertStatementsInParentBlock(*modifySequence);
+
+ *bindingOffset = modifiedSymbol->deepCopy();
+
+ return argument;
+ }
+
void convertBuiltinFunction(TIntermAggregate *node)
{
// If the function is |memoryBarrierAtomicCounter|, simply replace it with
diff --git a/src/compiler/translator/tree_util/ReplaceVariable.h b/src/compiler/translator/tree_util/ReplaceVariable.h
index d2b1c8d..a27547f 100644
--- a/src/compiler/translator/tree_util/ReplaceVariable.h
+++ b/src/compiler/translator/tree_util/ReplaceVariable.h
@@ -101,6 +101,7 @@
// Function call arguments handling:
void preVisitAggregate() { mReplacedFunctionCallArgs.emplace(); }
+ bool isInAggregate() const { return !mReplacedFunctionCallArgs.empty(); }
void postVisitAggregate() { mReplacedFunctionCallArgs.pop(); }
void replaceFunctionCallArg(const TIntermNode *oldArg, TIntermTyped *newArg)
{
diff --git a/src/tests/gl_tests/AtomicCounterBufferTest.cpp b/src/tests/gl_tests/AtomicCounterBufferTest.cpp
index 833ed17..67b9f72 100644
--- a/src/tests/gl_tests/AtomicCounterBufferTest.cpp
+++ b/src/tests/gl_tests/AtomicCounterBufferTest.cpp
@@ -310,6 +310,156 @@
}
}
+// Test atomic counter array of array.
+TEST_P(AtomicCounterBufferTest31, AtomicCounterArrayOfArray)
+{
+ // Crashes in older drivers. http://anglebug.com/3738
+ ANGLE_SKIP_TEST_IF(IsOpenGL() && IsWindows() && IsAMD());
+
+ // Fails on D3D. Some counters are double-incremented while some are untouched, hinting at a
+ // bug in index translation. http://anglebug.com/3783
+ ANGLE_SKIP_TEST_IF(IsD3D11());
+
+ // Nvidia's OpenGL driver fails to compile the shader. http://anglebug.com/3791
+ ANGLE_SKIP_TEST_IF(IsOpenGL() && IsNVIDIA());
+
+ // Intel's Windows OpenGL driver crashes in this test. http://anglebug.com/3791
+ ANGLE_SKIP_TEST_IF(IsOpenGL() && IsIntel() && IsWindows());
+
+ // Skipping due to a bug on the Qualcomm Vulkan Android driver.
+ // http://anglebug.com/3726
+ ANGLE_SKIP_TEST_IF(IsAndroid() && IsVulkan());
+
+ constexpr char kCS[] = R"(#version 310 es
+layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
+layout(binding = 0) uniform atomic_uint ac[7][5][3];
+
+void f0(in atomic_uint ac)
+{
+ atomicCounterIncrement(ac);
+}
+
+void f1(in atomic_uint ac[3])
+{
+ atomicCounterIncrement(ac[0]);
+ f0(ac[1]);
+ int index = 2;
+ f0(ac[index]);
+}
+
+void f2(in atomic_uint ac[5][3])
+{
+ // Increment all in ac[0], ac[1] and ac[2]
+ for (int i = 0; i < 3; ++i)
+ {
+ for (int j = 0; j < 2; ++j)
+ {
+ f0(ac[i][j]);
+ }
+ f0(ac[i][2]);
+ }
+
+ // Increment all in ac[3]
+ f1(ac[3]);
+
+ // Increment all in ac[4]
+ for (int i = 0; i < 2; ++i)
+ {
+ atomicCounterIncrement(ac[4][i]);
+ }
+ f0(ac[4][2]);
+}
+
+void f3(in atomic_uint ac[7][5][3])
+{
+ // Increment all in ac[0], ac[1], ac[2] and ac[3]
+ f2(ac[0]);
+ for (int i = 1; i < 4; ++i)
+ {
+ f2(ac[i]);
+ }
+
+ // Increment all in ac[5][0], ac[5][1], ac[5][2] and ac[5][3]
+ for (int i = 0; i < 4; ++i)
+ {
+ f1(ac[5][i]);
+ }
+
+ // Increment all in ac[5][4][0], ac[5][4][1] and ac[5][4][2]
+ f0(ac[5][4][0]);
+ for (int i = 1; i < 3; ++i)
+ {
+ f0(ac[5][4][i]);
+ }
+
+ // Increment all in ac[6]
+ for (int i = 0; i < 5; ++i)
+ {
+ for (int j = 0; j < 2; ++j)
+ {
+ atomicCounterIncrement(ac[6][i][j]);
+ }
+ atomicCounterIncrement(ac[6][i][2]);
+ }
+}
+
+void main()
+{
+ // Increment all in ac except ac[4]
+ f3(ac);
+
+ // Increment all in ac[4]
+ f2(ac[4]);
+})";
+
+ constexpr uint32_t kAtomicCounterRows = 7;
+ constexpr uint32_t kAtomicCounterCols = 5;
+ constexpr uint32_t kAtomicCounterDepth = 3;
+ constexpr uint32_t kAtomicCounterCount =
+ kAtomicCounterRows * kAtomicCounterCols * kAtomicCounterDepth;
+
+ GLint maxAtomicCounters = 0;
+ glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, &maxAtomicCounters);
+ EXPECT_GL_NO_ERROR();
+
+ // Required minimum is 8 by the spec
+ EXPECT_GE(maxAtomicCounters, 8);
+ ANGLE_SKIP_TEST_IF(static_cast<uint32_t>(maxAtomicCounters) < kAtomicCounterCount);
+
+ ANGLE_GL_COMPUTE_PROGRAM(program, kCS);
+ glUseProgram(program.get());
+
+ // The initial value of atomic counters is 0, 1, 2, ...
+ unsigned int bufferData[kAtomicCounterCount] = {};
+ for (uint32_t index = 0; index < kAtomicCounterCount; ++index)
+ {
+ bufferData[index] = index;
+ }
+
+ GLBuffer atomicCounterBuffer;
+ glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
+ glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(bufferData), bufferData, GL_STATIC_DRAW);
+
+ glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicCounterBuffer);
+
+ glDispatchCompute(1, 1, 1);
+ EXPECT_GL_NO_ERROR();
+
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+
+ unsigned int result[kAtomicCounterCount] = {};
+ glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
+ void *mappedBuffer =
+ glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(bufferData), GL_MAP_READ_BIT);
+ memcpy(result, mappedBuffer, sizeof(bufferData));
+ glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
+
+ for (uint32_t index = 0; index < kAtomicCounterCount; ++index)
+ {
+ EXPECT_EQ(result[index], bufferData[index] + 1) << "index " << index;
+ }
+}
+
ANGLE_INSTANTIATE_TEST(AtomicCounterBufferTest,
ES3_OPENGL(),
ES3_OPENGLES(),
diff --git a/src/tests/gl_tests/GLSLTest.cpp b/src/tests/gl_tests/GLSLTest.cpp
index d7d14ce..e6e3d8d 100644
--- a/src/tests/gl_tests/GLSLTest.cpp
+++ b/src/tests/gl_tests/GLSLTest.cpp
@@ -703,9 +703,6 @@
// Draw an array of points with the first vertex offset at 0 using gl_VertexID
TEST_P(GLSLTest_ES3, GLVertexIDOffsetZeroDrawArray)
{
- // TODO(syoussefi): missing ES3 shader feature support. http://anglebug.com/3221
- ANGLE_SKIP_TEST_IF(IsVulkan());
-
constexpr int kStartIndex = 0;
constexpr int kArrayLength = 5;
constexpr char kVS[] = R"(#version 300 es
@@ -768,9 +765,6 @@
// https://github.com/KhronosGroup/WebGL/blob/master/sdk/tests/conformance2/rendering/vertex-id.html
TEST_P(GLSLTest_ES3, GLVertexIDIntegerTextureDrawArrays)
{
- // TODO(syoussefi): missing ES3 shader feature support. http://anglebug.com/3221
- ANGLE_SKIP_TEST_IF(IsVulkan());
-
// Have to set a large point size because the window size is much larger than the texture
constexpr char kVS[] = R"(#version 300 es
flat out highp int vVertexID;
@@ -829,9 +823,6 @@
// Bug in Nexus drivers, offset does not work. (anglebug.com/3264)
ANGLE_SKIP_TEST_IF((IsNexus5X() || IsNexus6P()) && IsOpenGLES());
- // TODO(syoussefi): missing ES3 shader feature support. http://anglebug.com/3221
- ANGLE_SKIP_TEST_IF(IsVulkan());
-
constexpr int kStartIndex = 5;
constexpr int kArrayLength = 5;
constexpr char kVS[] = R"(#version 300 es
@@ -2891,6 +2882,104 @@
EXPECT_PIXEL_COLOR_EQ(0, 0, GLColor::green);
}
+// Test that the length() method is correctly translated in Vulkan atomic counter buffer emulation.
+TEST_P(GLSLTest_ES31, AtomicCounterArrayLength)
+{
+ // Crashes on an assertion. The driver reports no atomic counter buffers when queried from the
+ // program, but ANGLE believes there to be one.
+ //
+ // This is likely due to the fact that ANGLE generates the following code, as a side effect of
+ // the code on which .length() is being called:
+ //
+ // _uac1[(_uvalue = _utestSideEffectValue)];
+ //
+ // The driver is optimizing the subscription out, and calling the atomic counter inactive. This
+ // was observed on nvidia, mesa and amd/windows.
+ //
+ // The fix would be for ANGLE to skip uniforms it believes should exist, but when queried, the
+ // driver says don't.
+ //
+ // http://anglebug.com/3782
+ ANGLE_SKIP_TEST_IF(IsOpenGL());
+
+ // Skipping due to a bug on the Qualcomm Vulkan Android driver.
+ // http://anglebug.com/3726
+ ANGLE_SKIP_TEST_IF(IsAndroid() && IsVulkan());
+
+ constexpr char kCS[] = R"(#version 310 es
+precision mediump float;
+layout(local_size_x=1) in;
+
+layout(binding = 0) uniform atomic_uint ac1[2][3];
+uniform uint testSideEffectValue;
+
+layout(binding = 1, std140) buffer Result
+{
+ uint value;
+} result;
+
+void main() {
+ bool passed = true;
+ if (ac1.length() != 2)
+ {
+ passed = false;
+ }
+ uint value = 0u;
+ if (ac1[(value = testSideEffectValue)].length() != 3)
+ {
+ passed = false;
+ }
+ if (value != testSideEffectValue)
+ {
+ passed = false;
+ }
+ result.value = passed ? 255u : 127u;
+})";
+
+ constexpr unsigned int kUniformTestValue = 17;
+ constexpr unsigned int kExpectedSuccessValue = 255;
+ constexpr unsigned int kAtomicCounterRows = 2;
+ constexpr unsigned int kAtomicCounterCols = 3;
+
+ GLint maxAtomicCounters = 0;
+ glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, &maxAtomicCounters);
+ EXPECT_GL_NO_ERROR();
+
+ // Required minimum is 8 by the spec
+ EXPECT_GE(maxAtomicCounters, 8);
+ ANGLE_SKIP_TEST_IF(static_cast<uint32_t>(maxAtomicCounters) <
+ kAtomicCounterRows * kAtomicCounterCols);
+
+ ANGLE_GL_COMPUTE_PROGRAM(program, kCS);
+ glUseProgram(program.get());
+
+ constexpr unsigned int kBufferData[kAtomicCounterRows * kAtomicCounterCols] = {};
+ GLBuffer atomicCounterBuffer;
+ glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomicCounterBuffer);
+ glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(kBufferData), kBufferData, GL_STATIC_DRAW);
+ glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomicCounterBuffer);
+
+ constexpr unsigned int kOutputInitValue = 0;
+ GLBuffer shaderStorageBuffer;
+ glBindBuffer(GL_SHADER_STORAGE_BUFFER, shaderStorageBuffer);
+ glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(kOutputInitValue), &kOutputInitValue,
+ GL_STATIC_DRAW);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, shaderStorageBuffer);
+
+ GLint uniformLocation = glGetUniformLocation(program.get(), "testSideEffectValue");
+ EXPECT_NE(uniformLocation, -1);
+ glUniform1i(uniformLocation, kUniformTestValue);
+
+ glDispatchCompute(1, 1, 1);
+
+ glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
+
+ const GLuint *ptr = reinterpret_cast<const GLuint *>(
+ glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint), GL_MAP_READ_BIT));
+ EXPECT_EQ(*ptr, kExpectedSuccessValue);
+ glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
+}
+
// Test that array indices for arrays of arrays work as expected.
TEST_P(GLSLTest_ES31, ArraysOfArrays)
{