ES31: Add atomic memory functions for SSBO

Due to SSBO is translated to RWByteAddressBuffer in HLSL, the corresponding
atomic memory functions atomic* will be translated to
RWByteAddressBuffer.Interlocked*. The translation is like below:

atomicAdd(instanceName.data[0], 5u);
// becomes
uint _ssbo_atomicAdd_uint(RWByteAddressBuffer buffer, uint loc, uint value)
{
    uint original_value;
    buffer.InterlockedAdd(loc, value, original_value);
    return original_value;
}

_ssbo_atomicAdd_uint(_instanceName, 0 + 16 * 0, 5);

Bug: angleproject:1951

Change-Id: If2af8bedb67a4135b443d2512d43c6058a78888d
Reviewed-on: https://chromium-review.googlesource.com/c/1370676
Reviewed-by: Corentin Wallez <cwallez@chromium.org>
Commit-Queue: Jiajia Qin <jiajia.qin@intel.com>
diff --git a/src/compiler/translator/OutputHLSL.cpp b/src/compiler/translator/OutputHLSL.cpp
index aa81c11..d660b9a 100644
--- a/src/compiler/translator/OutputHLSL.cpp
+++ b/src/compiler/translator/OutputHLSL.cpp
@@ -110,10 +110,20 @@
     }
 }
 
-bool IsAtomicFunctionDirectAssign(const TIntermBinary &node)
+bool IsAtomicFunctionForSharedVariableDirectAssign(const TIntermBinary &node)
 {
-    return node.getOp() == EOpAssign && node.getRight()->getAsAggregate() &&
-           IsAtomicFunction(node.getRight()->getAsAggregate()->getOp());
+    TIntermAggregate *aggregateNode = node.getRight()->getAsAggregate();
+    if (aggregateNode == nullptr)
+    {
+        return false;
+    }
+
+    if (node.getOp() == EOpAssign && IsAtomicFunction(aggregateNode->getOp()))
+    {
+        return !IsInShaderStorageBlock((*aggregateNode->getSequence())[0]->getAsTyped());
+    }
+
+    return false;
 }
 
 const char *kZeros       = "_ANGLE_ZEROS_";
@@ -1271,7 +1281,7 @@
             // function calls in HLSL.
             // e.g. original_value = atomicAdd(dest, value) should be translated into
             //      InterlockedAdd(dest, value, original_value);
-            else if (IsAtomicFunctionDirectAssign(*node))
+            else if (IsAtomicFunctionForSharedVariableDirectAssign(*node))
             {
                 TIntermAggregate *atomicFunctionNode = node->getRight()->getAsAggregate();
                 TOperator atomicFunctionOp           = atomicFunctionNode->getOp();
@@ -2400,20 +2410,60 @@
         case EOpAtomicAnd:
         case EOpAtomicOr:
         case EOpAtomicXor:
-            outputTriplet(out, visit, GetHLSLAtomicFunctionStringAndLeftParenthesis(node->getOp()),
-                          ",", ")");
-            break;
-
-        // The parameter 'original_value' of InterlockedExchange(dest, value, original_value) and
-        // InterlockedCompareExchange(dest, compare_value, value, original_value) is not optional.
+        // The parameter 'original_value' of InterlockedExchange(dest, value, original_value)
+        // and InterlockedCompareExchange(dest, compare_value, value, original_value) is not
+        // optional.
         // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/interlockedexchange
         // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/interlockedcompareexchange
-        // So all the call of atomicExchange(dest, value) and atomicCompSwap(dest, compare_value,
-        // value) should all be modified into the form of "int temp; temp = atomicExchange(dest,
-        // value);" and "int temp; temp = atomicCompSwap(dest, compare_value, value);" in the
-        // intermediate tree before traversing outputHLSL.
+        // So all the call of atomicExchange(dest, value) and atomicCompSwap(dest,
+        // compare_value, value) should all be modified into the form of "int temp; temp =
+        // atomicExchange(dest, value);" and "int temp; temp = atomicCompSwap(dest,
+        // compare_value, value);" in the intermediate tree before traversing outputHLSL.
         case EOpAtomicExchange:
         case EOpAtomicCompSwap:
+        {
+            ASSERT(node->getChildCount() > 1);
+            TIntermTyped *memNode = (*node->getSequence())[0]->getAsTyped();
+            if (IsInShaderStorageBlock(memNode))
+            {
+                // Atomic memory functions for SSBO.
+                // "_ssbo_atomicXXX_TYPE(RWByteAddressBuffer buffer, uint loc" is written to |out|.
+                mSSBOOutputHLSL->outputAtomicMemoryFunctionCallPrefix(memNode, node->getOp());
+                // Write the rest argument list to |out|.
+                for (size_t i = 1; i < node->getChildCount(); i++)
+                {
+                    out << ", ";
+                    TIntermTyped *argument = (*node->getSequence())[i]->getAsTyped();
+                    if (IsInShaderStorageBlock(argument))
+                    {
+                        mSSBOOutputHLSL->outputLoadFunctionCall(argument);
+                    }
+                    else
+                    {
+                        argument->traverse(this);
+                    }
+                }
+
+                out << ")";
+                return false;
+            }
+            else
+            {
+                // Atomic memory functions for shared variable.
+                if (node->getOp() != EOpAtomicExchange && node->getOp() != EOpAtomicCompSwap)
+                {
+                    outputTriplet(out, visit,
+                                  GetHLSLAtomicFunctionStringAndLeftParenthesis(node->getOp()), ",",
+                                  ")");
+                }
+                else
+                {
+                    UNREACHABLE();
+                }
+            }
+
+            break;
+        }
         default:
             UNREACHABLE();
     }
diff --git a/src/compiler/translator/ShaderStorageBlockFunctionHLSL.cpp b/src/compiler/translator/ShaderStorageBlockFunctionHLSL.cpp
index 090292e..a91b1d9 100644
--- a/src/compiler/translator/ShaderStorageBlockFunctionHLSL.cpp
+++ b/src/compiler/translator/ShaderStorageBlockFunctionHLSL.cpp
@@ -198,6 +198,45 @@
     out << "    return int((dim - loc)/uint(" << unsizedArrayStride << "));\n";
 }
 
+// static
+void ShaderStorageBlockFunctionHLSL::OutputSSBOAtomicMemoryFunctionBody(
+    TInfoSinkBase &out,
+    const ShaderStorageBlockFunction &ssboFunction)
+{
+    out << "    " << ssboFunction.typeString << " original_value;\n";
+    switch (ssboFunction.method)
+    {
+        case SSBOMethod::ATOMIC_ADD:
+            out << "    buffer.InterlockedAdd(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_MIN:
+            out << "    buffer.InterlockedMin(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_MAX:
+            out << "    buffer.InterlockedMax(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_AND:
+            out << "    buffer.InterlockedAnd(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_OR:
+            out << "    buffer.InterlockedOr(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_XOR:
+            out << "    buffer.InterlockedXor(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_EXCHANGE:
+            out << "    buffer.InterlockedExchange(loc, value, original_value);\n";
+            break;
+        case SSBOMethod::ATOMIC_COMPSWAP:
+            out << "    buffer.InterlockedCompareExchange(loc, compare_value, value, "
+                   "original_value);\n";
+            break;
+        default:
+            UNREACHABLE();
+    }
+    out << "    return original_value;\n";
+}
+
 bool ShaderStorageBlockFunctionHLSL::ShaderStorageBlockFunction::operator<(
     const ShaderStorageBlockFunction &rhs) const
 {
@@ -214,6 +253,7 @@
     TIntermSwizzle *swizzleNode)
 {
     ShaderStorageBlockFunction ssboFunction;
+    ssboFunction.typeString = TypeString(type);
     ssboFunction.method     = method;
     switch (method)
     {
@@ -228,11 +268,42 @@
             ssboFunction.functionName       = "_Length_" + str(unsizedArrayStride);
             mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
             return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_ADD:
+            ssboFunction.functionName = "_ssbo_atomicAdd_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_MIN:
+            ssboFunction.functionName = "_ssbo_atomicMin_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_MAX:
+            ssboFunction.functionName = "_ssbo_atomicMax_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_AND:
+            ssboFunction.functionName = "_ssbo_atomicAnd_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_OR:
+            ssboFunction.functionName = "_ssbo_atomicOr_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_XOR:
+            ssboFunction.functionName = "_ssbo_atomicXor_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_EXCHANGE:
+            ssboFunction.functionName = "_ssbo_atomicExchange_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
+        case SSBOMethod::ATOMIC_COMPSWAP:
+            ssboFunction.functionName = "_ssbo_atomicCompSwap_" + ssboFunction.typeString;
+            mRegisteredShaderStorageBlockFunctions.insert(ssboFunction);
+            return ssboFunction.functionName;
         default:
             UNREACHABLE();
     }
 
-    ssboFunction.typeString = TypeString(type);
     ssboFunction.functionName += ssboFunction.typeString;
     ssboFunction.type       = type;
     if (swizzleNode != nullptr)
@@ -298,29 +369,64 @@
 {
     for (const ShaderStorageBlockFunction &ssboFunction : mRegisteredShaderStorageBlockFunctions)
     {
-        if (ssboFunction.method == SSBOMethod::LOAD)
+        switch (ssboFunction.method)
         {
-            // Function header
-            out << ssboFunction.typeString << " " << ssboFunction.functionName
-                << "(RWByteAddressBuffer buffer, uint loc)\n";
-            out << "{\n";
-            OutputSSBOLoadFunctionBody(out, ssboFunction);
-        }
-        else if (ssboFunction.method == SSBOMethod::STORE)
-        {
-            // Function header
-            out << "void " << ssboFunction.functionName << "(RWByteAddressBuffer buffer, uint loc, "
-                << ssboFunction.typeString << " value)\n";
-            out << "{\n";
-            OutputSSBOStoreFunctionBody(out, ssboFunction);
-        }
-        else
-        {
-            ASSERT(ssboFunction.method == SSBOMethod::LENGTH);
-            out << "int " << ssboFunction.functionName
-                << "(RWByteAddressBuffer buffer, uint loc)\n";
-            out << "{\n";
-            OutputSSBOLengthFunctionBody(out, ssboFunction.unsizedArrayStride);
+            case SSBOMethod::LOAD:
+            {
+                // Function header
+                out << ssboFunction.typeString << " " << ssboFunction.functionName
+                    << "(RWByteAddressBuffer buffer, uint loc)\n";
+                out << "{\n";
+                OutputSSBOLoadFunctionBody(out, ssboFunction);
+                break;
+            }
+            case SSBOMethod::STORE:
+            {
+                // Function header
+                out << "void " << ssboFunction.functionName
+                    << "(RWByteAddressBuffer buffer, uint loc, " << ssboFunction.typeString
+                    << " value)\n";
+                out << "{\n";
+                OutputSSBOStoreFunctionBody(out, ssboFunction);
+                break;
+            }
+            case SSBOMethod::LENGTH:
+            {
+                out << "int " << ssboFunction.functionName
+                    << "(RWByteAddressBuffer buffer, uint loc)\n";
+                out << "{\n";
+                OutputSSBOLengthFunctionBody(out, ssboFunction.unsizedArrayStride);
+                break;
+            }
+            case SSBOMethod::ATOMIC_ADD:
+            case SSBOMethod::ATOMIC_MIN:
+            case SSBOMethod::ATOMIC_MAX:
+            case SSBOMethod::ATOMIC_AND:
+            case SSBOMethod::ATOMIC_OR:
+            case SSBOMethod::ATOMIC_XOR:
+            case SSBOMethod::ATOMIC_EXCHANGE:
+            {
+                out << ssboFunction.typeString << " " << ssboFunction.functionName
+                    << "(RWByteAddressBuffer buffer, uint loc, " << ssboFunction.typeString
+                    << " value)\n";
+                ;
+                out << "{\n";
+
+                OutputSSBOAtomicMemoryFunctionBody(out, ssboFunction);
+                break;
+            }
+            case SSBOMethod::ATOMIC_COMPSWAP:
+            {
+                out << ssboFunction.typeString << " " << ssboFunction.functionName
+                    << "(RWByteAddressBuffer buffer, uint loc, " << ssboFunction.typeString
+                    << " compare_value, " << ssboFunction.typeString << " value)\n";
+                ;
+                out << "{\n";
+                OutputSSBOAtomicMemoryFunctionBody(out, ssboFunction);
+                break;
+            }
+            default:
+                UNREACHABLE();
         }
 
         out << "}\n"
diff --git a/src/compiler/translator/ShaderStorageBlockFunctionHLSL.h b/src/compiler/translator/ShaderStorageBlockFunctionHLSL.h
index 7dba673..0d3cd02 100644
--- a/src/compiler/translator/ShaderStorageBlockFunctionHLSL.h
+++ b/src/compiler/translator/ShaderStorageBlockFunctionHLSL.h
@@ -39,7 +39,15 @@
 {
     LOAD,
     STORE,
-    LENGTH
+    LENGTH,
+    ATOMIC_ADD,
+    ATOMIC_MIN,
+    ATOMIC_MAX,
+    ATOMIC_AND,
+    ATOMIC_OR,
+    ATOMIC_XOR,
+    ATOMIC_EXCHANGE,
+    ATOMIC_COMPSWAP
 };
 
 class ShaderStorageBlockFunctionHLSL final : angle::NonCopyable
@@ -75,6 +83,8 @@
     static void OutputSSBOStoreFunctionBody(TInfoSinkBase &out,
                                             const ShaderStorageBlockFunction &ssboFunction);
     static void OutputSSBOLengthFunctionBody(TInfoSinkBase &out, int unsizedArrayStride);
+    static void OutputSSBOAtomicMemoryFunctionBody(TInfoSinkBase &out,
+                                                   const ShaderStorageBlockFunction &ssboFunction);
     using ShaderStorageBlockFunctionSet = std::set<ShaderStorageBlockFunction>;
     ShaderStorageBlockFunctionSet mRegisteredShaderStorageBlockFunctions;
 };
diff --git a/src/compiler/translator/ShaderStorageBlockOutputHLSL.cpp b/src/compiler/translator/ShaderStorageBlockOutputHLSL.cpp
index 9a9bd62..f5a945e 100644
--- a/src/compiler/translator/ShaderStorageBlockOutputHLSL.cpp
+++ b/src/compiler/translator/ShaderStorageBlockOutputHLSL.cpp
@@ -329,6 +329,43 @@
     traverseSSBOAccess(node, SSBOMethod::LENGTH);
 }
 
+void ShaderStorageBlockOutputHLSL::outputAtomicMemoryFunctionCallPrefix(TIntermTyped *node,
+                                                                        TOperator op)
+{
+    mLocationAsTheLastArgument = false;
+
+    switch (op)
+    {
+        case EOpAtomicAdd:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_ADD);
+            break;
+        case EOpAtomicMin:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_MIN);
+            break;
+        case EOpAtomicMax:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_MAX);
+            break;
+        case EOpAtomicAnd:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_AND);
+            break;
+        case EOpAtomicOr:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_OR);
+            break;
+        case EOpAtomicXor:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_XOR);
+            break;
+        case EOpAtomicExchange:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_EXCHANGE);
+            break;
+        case EOpAtomicCompSwap:
+            traverseSSBOAccess(node, SSBOMethod::ATOMIC_COMPSWAP);
+            break;
+        default:
+            UNREACHABLE();
+            break;
+    }
+}
+
 // Note that we must calculate the matrix stride here instead of ShaderStorageBlockFunctionHLSL.
 // It's because that if the current node's type is a vector which comes from a matrix, we will
 // lose the matrix type info once we enter ShaderStorageBlockFunctionHLSL.
diff --git a/src/compiler/translator/ShaderStorageBlockOutputHLSL.h b/src/compiler/translator/ShaderStorageBlockOutputHLSL.h
index 527698f..340e709 100644
--- a/src/compiler/translator/ShaderStorageBlockOutputHLSL.h
+++ b/src/compiler/translator/ShaderStorageBlockOutputHLSL.h
@@ -51,6 +51,8 @@
     void outputLoadFunctionCall(TIntermTyped *node);
     // This writes the function call to get the lengh of unsized array member of SSBO.
     void outputLengthFunctionCall(TIntermTyped *node);
+    // Writes the atomic memory function calls for SSBO.
+    void outputAtomicMemoryFunctionCallPrefix(TIntermTyped *node, TOperator op);
 
     void writeShaderStorageBlocksHeader(TInfoSinkBase &out) const;
 
diff --git a/src/compiler/translator/TranslatorHLSL.cpp b/src/compiler/translator/TranslatorHLSL.cpp
index 0bc6cd8..d72348b 100644
--- a/src/compiler/translator/TranslatorHLSL.cpp
+++ b/src/compiler/translator/TranslatorHLSL.cpp
@@ -129,8 +129,10 @@
 
     if (getShaderVersion() >= 310)
     {
-        sh::RewriteAtomicFunctionExpressions(root, &getSymbolTable(), getShaderVersion());
+        // Due to ssbo also can be used as the argument of atomic memory functions, we should put
+        // RewriteExpressionsWithShaderStorageBlock before RewriteAtomicFunctionExpressions.
         sh::RewriteExpressionsWithShaderStorageBlock(root, &getSymbolTable());
+        sh::RewriteAtomicFunctionExpressions(root, &getSymbolTable(), getShaderVersion());
     }
 
     sh::OutputHLSL outputHLSL(getShaderType(), getShaderVersion(), getExtensionBehavior(),
diff --git a/src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.cpp b/src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.cpp
index 2e232dd..f47e790 100644
--- a/src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.cpp
+++ b/src/compiler/translator/tree_ops/RewriteAtomicFunctionExpressions.cpp
@@ -11,6 +11,7 @@
 #include "compiler/translator/tree_util/IntermNodePatternMatcher.h"
 #include "compiler/translator/tree_util/IntermNode_util.h"
 #include "compiler/translator/tree_util/IntermTraverse.h"
+#include "compiler/translator/util.h"
 
 namespace sh
 {
@@ -138,6 +139,12 @@
 bool RewriteAtomicFunctionExpressionsTraverser::visitAggregate(Visit visit, TIntermAggregate *node)
 {
     ASSERT(visit == PostVisit);
+    // Skip atomic memory functions for SSBO. They will be processed in the OutputHLSL traverser.
+    if (IsAtomicFunction(node->getOp()) &&
+        IsInShaderStorageBlock((*node->getSequence())[0]->getAsTyped()))
+    {
+        return false;
+    }
 
     TIntermNode *parentNode = getParentNode();
     if (IsAtomicExchangeOrCompSwapNoReturnValue(node, parentNode) ||
diff --git a/src/compiler/translator/tree_ops/RewriteExpressionsWithShaderStorageBlock.cpp b/src/compiler/translator/tree_ops/RewriteExpressionsWithShaderStorageBlock.cpp
index ee3e7bc..d1ec8bb 100644
--- a/src/compiler/translator/tree_ops/RewriteExpressionsWithShaderStorageBlock.cpp
+++ b/src/compiler/translator/tree_ops/RewriteExpressionsWithShaderStorageBlock.cpp
@@ -269,7 +269,9 @@
         return false;
     }
 
-    if (IsAtomicFunction(node->getOp()))
+    // We still need to process the ssbo as the non-first argument of atomic memory functions.
+    if (IsAtomicFunction(node->getOp()) &&
+        IsInShaderStorageBlock((*node->getSequence())[0]->getAsTyped()))
     {
         return true;
     }
diff --git a/src/tests/gl_tests/ShaderStorageBufferTest.cpp b/src/tests/gl_tests/ShaderStorageBufferTest.cpp
index 87c982f..b7d7a76 100644
--- a/src/tests/gl_tests/ShaderStorageBufferTest.cpp
+++ b/src/tests/gl_tests/ShaderStorageBufferTest.cpp
@@ -1173,10 +1173,6 @@
 // Test atomic memory functions.
 TEST_P(ShaderStorageBufferTest31, AtomicMemoryFunctions)
 {
-    // TODO(jiajia.qin@intel.com): Don't skip this test once atomic memory functions for SSBO is
-    // supported on d3d backend. http://anglebug.com/1951
-
-    ANGLE_SKIP_TEST_IF(IsD3D11());
     constexpr char kCS[] = R"(#version 310 es
 layout(local_size_x=1, local_size_y=1, local_size_z=1) in;
 layout(std140, binding = 1) buffer blockName {