Enable MRT pixel shader rewriting.

Writing to all 8 pixel outputs was causing performance problems on
Intel and AMD. Enabling Geoff's work to rewrite our pixel shaders
solves the regression.

This patch also includes a workaround to the nVidia driver bug
where it would ignore NULL RT values in OMSetRenderTargets, by
compacting the RT list to skip NULL values.

BUG=angle:705
BUG=365078

Change-Id: Ia68af6f0ccd5f10c484d6f76297a0bec694948f0
Reviewed-on: https://chromium-review.googlesource.com/214852
Tested-by: Jamie Madill <jmadill@chromium.org>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libGLESv2/Framebuffer.cpp b/src/libGLESv2/Framebuffer.cpp
index b9c4a71..6247b5a 100644
--- a/src/libGLESv2/Framebuffer.cpp
+++ b/src/libGLESv2/Framebuffer.cpp
@@ -649,10 +649,12 @@
             ASSERT(drawBufferState == GL_BACK || drawBufferState == (GL_COLOR_ATTACHMENT0_EXT + colorAttachment));
             colorbuffersForRender.push_back(colorbuffer);
         }
+#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_DISABLED)
         else
         {
             colorbuffersForRender.push_back(NULL);
         }
+#endif
     }
 
     return colorbuffersForRender;
diff --git a/src/libGLESv2/ProgramBinary.cpp b/src/libGLESv2/ProgramBinary.cpp
index 652b7d6..97fc4ae 100644
--- a/src/libGLESv2/ProgramBinary.cpp
+++ b/src/libGLESv2/ProgramBinary.cpp
@@ -82,7 +82,7 @@
     return subscript;
 }
 
-void GetInputLayoutFromShader(const std::vector<sh::Attribute> &shaderAttributes, VertexFormat inputLayout[MAX_VERTEX_ATTRIBS])
+void GetDefaultInputLayoutFromShader(const std::vector<sh::Attribute> &shaderAttributes, VertexFormat inputLayout[MAX_VERTEX_ATTRIBS])
 {
     size_t layoutIndex = 0;
     for (size_t attributeIndex = 0; attributeIndex < shaderAttributes.size(); attributeIndex++)
@@ -108,6 +108,24 @@
     }
 }
 
+std::vector<GLenum> GetDefaultOutputLayoutFromShader(const std::vector<rx::PixelShaderOuputVariable> &shaderOutputVars)
+{
+#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED)
+    std::vector<GLenum> defaultPixelOutput(1);
+#else
+    std::vector<GLenum> defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS);
+#endif
+    for (size_t i = 0; i < defaultPixelOutput.size(); i++)
+    {
+        defaultPixelOutput[i] = GL_NONE;
+    }
+
+    ASSERT(!shaderOutputVars.empty());
+    defaultPixelOutput[0] = GL_COLOR_ATTACHMENT0 + shaderOutputVars[0].outputIndex;
+
+    return defaultPixelOutput;
+}
+
 bool IsRowMajorLayout(const sh::InterfaceBlockField &var)
 {
     return var.isRowMajorLayout;
@@ -261,7 +279,9 @@
 {
     for (size_t executableIndex = 0; executableIndex < mPixelExecutables.size(); executableIndex++)
     {
+#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED)
         if (mPixelExecutables[executableIndex]->matchesSignature(outputSignature))
+#endif
         {
             return mPixelExecutables[executableIndex]->shaderExecutable();
         }
@@ -1701,14 +1721,10 @@
     if (success)
     {
         VertexFormat defaultInputLayout[MAX_VERTEX_ATTRIBS];
-        GetInputLayoutFromShader(vertexShader->getActiveAttributes(), defaultInputLayout);
+        GetDefaultInputLayoutFromShader(vertexShader->getActiveAttributes(), defaultInputLayout);
         rx::ShaderExecutable *defaultVertexExecutable = getVertexExecutableForInputLayout(defaultInputLayout);
 
-        std::vector<GLenum> defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS);
-        for (size_t i = 0; i < defaultPixelOutput.size(); i++)
-        {
-            defaultPixelOutput[i] = (i == 0) ? GL_COLOR_ATTACHMENT0 : GL_NONE;
-        }
+        std::vector<GLenum> defaultPixelOutput = GetDefaultOutputLayoutFromShader(mPixelShaderKey);
         rx::ShaderExecutable *defaultPixelExecutable = getPixelExecutableForOutputLayout(defaultPixelOutput);
 
         if (usesGeometryShader())
diff --git a/src/libGLESv2/ProgramBinary.h b/src/libGLESv2/ProgramBinary.h
index ee6a645..76baaec 100644
--- a/src/libGLESv2/ProgramBinary.h
+++ b/src/libGLESv2/ProgramBinary.h
@@ -24,6 +24,11 @@
 #include <string>
 #include <vector>
 
+// TODO(jmadill): place this in workarounds library
+#define ANGLE_WORKAROUND_ENABLED 1
+#define ANGLE_WORKAROUND_DISABLED 2
+#define ANGLE_MRT_PERF_WORKAROUND ANGLE_WORKAROUND_ENABLED
+
 namespace sh
 {
 class HLSLBlockEncoder;
@@ -271,8 +276,7 @@
         PixelExecutable(const std::vector<GLenum> &outputSignature, rx::ShaderExecutable *shaderExecutable);
         ~PixelExecutable();
 
-        // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
-        bool matchesSignature(const std::vector<GLenum> &signature) const { return true; /* mOutputSignature == signature; */ }
+        bool matchesSignature(const std::vector<GLenum> &signature) const { return mOutputSignature == signature; }
 
         const std::vector<GLenum> &outputSignature() const { return mOutputSignature; }
         rx::ShaderExecutable *shaderExecutable() const { return mShaderExecutable; }
diff --git a/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp b/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp
index 6aa0d13..cd2b1a8 100644
--- a/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp
+++ b/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp
@@ -22,7 +22,7 @@
 
 using namespace gl;
 
-namespace gl_d3d
+namespace
 {
 
 std::string HLSLComponentTypeString(GLenum componentType)
@@ -70,6 +70,21 @@
     return HLSLComponentTypeString(gl::VariableComponentType(type), gl::VariableComponentCount(type));
 }
 
+const rx::PixelShaderOuputVariable &GetOutputAtLocation(const std::vector<rx::PixelShaderOuputVariable> &outputVariables,
+                                                        unsigned int location)
+{
+    for (size_t variableIndex = 0; variableIndex < outputVariables.size(); ++variableIndex)
+    {
+        if (outputVariables[variableIndex].outputIndex == location)
+        {
+            return outputVariables[variableIndex];
+        }
+    }
+
+    UNREACHABLE();
+    return outputVariables[0];
+}
+
 }
 
 namespace rx
@@ -328,7 +343,7 @@
                     {
                         GLenum componentType = VariableComponentType(transposedType);
                         int columnCount = VariableColumnCount(transposedType);
-                        typeString = gl_d3d::HLSLComponentTypeString(componentType, columnCount);
+                        typeString = HLSLComponentTypeString(componentType, columnCount);
                     }
                     varyingHLSL += typeString + " v" + n + " : " + varyingSemantic + n + ";\n";
                 }
@@ -361,12 +376,12 @@
             if (IsMatrixType(shaderAttribute.type))
             {
                 // Matrix types are always transposed
-                structHLSL += "    " + gl_d3d::HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type));
+                structHLSL += "    " + HLSLMatrixTypeString(TransposeMatrixType(shaderAttribute.type));
             }
             else
             {
                 GLenum componentType = mRenderer->getVertexComponentType(vertexFormat);
-                structHLSL += "    " + gl_d3d::HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type));
+                structHLSL += "    " + HLSLComponentTypeString(componentType, VariableComponentCount(shaderAttribute.type));
             }
 
             structHLSL += " " + decorateVariable(shaderAttribute.name) + " : TEXCOORD" + Str(semanticIndex) + ";\n";
@@ -421,17 +436,19 @@
 
     std::string declarationHLSL;
     std::string copyHLSL;
-    for (size_t i = 0; i < outputVariables.size(); i++)
-    {
-        const PixelShaderOuputVariable& outputVariable = outputVariables[i];
-        ASSERT(outputLayout.size() > outputVariable.outputIndex);
 
-        // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
-        bool outputIndexEnabled = true; // outputLayout[outputVariable.outputIndex] != GL_NONE
-        if (outputIndexEnabled)
+    for (size_t layoutIndex = 0; layoutIndex < outputLayout.size(); ++layoutIndex)
+    {
+        GLenum binding = outputLayout[layoutIndex];
+
+        if (binding != GL_NONE)
         {
-            declarationHLSL += "    " + gl_d3d::HLSLTypeString(outputVariable.type) + " " + outputVariable.name +
-                               " : " + targetSemantic + Str(outputVariable.outputIndex) + ";\n";
+            unsigned int location = (binding - GL_COLOR_ATTACHMENT0);
+
+            const PixelShaderOuputVariable &outputVariable = GetOutputAtLocation(outputVariables, location);
+
+            declarationHLSL += "    " + HLSLTypeString(outputVariable.type) + " " + outputVariable.name +
+                               " : " + targetSemantic + Str(layoutIndex) + ";\n";
 
             copyHLSL += "    output." + outputVariable.name + " = " + outputVariable.source + ";\n";
         }
diff --git a/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp b/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp
index c021e2a..bb2e536 100644
--- a/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp
+++ b/src/libGLESv2/renderer/d3d/d3d11/renderer11_utils.cpp
@@ -9,6 +9,7 @@
 
 #include "libGLESv2/renderer/d3d/d3d11/renderer11_utils.h"
 #include "libGLESv2/renderer/d3d/d3d11/formatutils11.h"
+#include "libGLESv2/ProgramBinary.h"
 
 #include "common/debug.h"
 
@@ -392,9 +393,13 @@
       case D3D_FEATURE_LEVEL_11_1:
       case D3D_FEATURE_LEVEL_11_0: return D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT;
 
-        // FIXME(geofflang): Work around NVIDIA driver bug by repacking buffers
       case D3D_FEATURE_LEVEL_10_1:
-      case D3D_FEATURE_LEVEL_10_0: return 1; /* D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT; */
+      case D3D_FEATURE_LEVEL_10_0:
+#if (ANGLE_MRT_PERF_WORKAROUND == ANGLE_WORKAROUND_ENABLED)
+          return D3D10_SIMULTANEOUS_RENDER_TARGET_COUNT;
+#else
+          return 1;
+#endif
 
       case D3D_FEATURE_LEVEL_9_3:  return D3D_FL9_3_SIMULTANEOUS_RENDER_TARGET_COUNT;
       case D3D_FEATURE_LEVEL_9_2:
diff --git a/tests/angle_tests/DrawBuffersTest.cpp b/tests/angle_tests/DrawBuffersTest.cpp
new file mode 100644
index 0000000..fdf2bdb
--- /dev/null
+++ b/tests/angle_tests/DrawBuffersTest.cpp
@@ -0,0 +1,329 @@
+#include "ANGLETest.h"
+
+class DrawBuffersTest : public ANGLETest
+{
+  protected:
+    DrawBuffersTest(int clientVersion)
+    {
+        setWindowWidth(128);
+        setWindowHeight(128);
+        setConfigRedBits(8);
+        setConfigGreenBits(8);
+        setConfigBlueBits(8);
+        setConfigAlphaBits(8);
+        setConfigDepthBits(24);
+        setClientVersion(clientVersion);
+    }
+
+    virtual void SetUp()
+    {
+        ANGLETest::SetUp();
+
+        glGenFramebuffers(1, &mFBO);
+        glBindFramebuffer(GL_FRAMEBUFFER, mFBO);
+
+        glGenTextures(4, mTextures);
+
+        for (size_t texIndex = 0; texIndex < ArraySize(mTextures); texIndex++)
+        {
+            glBindTexture(GL_TEXTURE_2D, mTextures[texIndex]);
+            glTexStorage2DEXT(GL_TEXTURE_2D, 1, GL_RGBA8, getWindowWidth(), getWindowHeight());
+        }
+
+        GLfloat data[] =
+        {
+            -1.0f, 1.0f,
+            -1.0f, -2.0f,
+            2.0f, 1.0f
+        };
+
+        glGenBuffers(1, &mBuffer);
+        glBindBuffer(GL_ARRAY_BUFFER, mBuffer);
+        glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * 6, data, GL_STATIC_DRAW);
+
+        GLint maxDrawBuffers;
+        glGetIntegerv(GL_MAX_DRAW_BUFFERS, &maxDrawBuffers);
+        ASSERT_EQ(maxDrawBuffers, 8);
+
+        ASSERT_GL_NO_ERROR();
+    }
+
+    virtual void TearDown()
+    {
+        glDeleteFramebuffers(1, &mFBO);
+        glDeleteTextures(4, mTextures);
+        glDeleteBuffers(1, &mBuffer);
+
+        ANGLETest::TearDown();
+    }
+
+    void setupMRTProgramESSL3(bool bufferEnabled[8], GLuint *programOut)
+    {
+        const std::string vertexShaderSource =
+            "#version 300 es\n"
+            "in vec4 position;\n"
+            "void main() {\n"
+            "    gl_Position = position;\n"
+            "}\n";
+
+        std::stringstream strstr;
+
+        strstr << "#version 300 es\n"
+                  "precision highp float;\n";
+
+        for (unsigned int index = 0; index < 8; index++)
+        {
+            if (bufferEnabled[index])
+            {
+                strstr << "layout(location = " << index << ") "
+                          "out vec4 value" << index << ";\n";
+            }
+        }
+
+        strstr << "void main()\n"
+                  "{\n";
+
+        for (unsigned int index = 0; index < 8; index++)
+        {
+            if (bufferEnabled[index])
+            {
+                unsigned int r = (index + 1) & 1;
+                unsigned int g = (index + 1) & 2;
+                unsigned int b = (index + 1) & 4;
+
+                strstr << "    value" << index << " = vec4("
+                       << r << ".0, " << g << ".0, "
+                       << b << ".0, 1.0);\n";
+            }
+        }
+
+        strstr << "}\n";
+
+        *programOut = CompileProgram(vertexShaderSource, strstr.str());
+        if (*programOut == 0)
+        {
+            FAIL() << "shader compilation failed.";
+        }
+
+        glUseProgram(*programOut);
+
+        GLint location = glGetAttribLocation(*programOut, "position");
+        ASSERT_NE(location, -1);
+        glBindBuffer(GL_ARRAY_BUFFER, mBuffer);
+        glVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 8, NULL);
+        glEnableVertexAttribArray(location);
+    }
+
+    void setupMRTProgramESSL1(bool bufferEnabled[8], GLuint *programOut)
+    {
+        const std::string vertexShaderSource =
+            "attribute vec4 position;\n"
+            "void main() {\n"
+            "    gl_Position = position;\n"
+            "}\n";
+
+        std::stringstream strstr;
+
+        strstr << "#extension GL_EXT_draw_buffers : enable\n"
+                  "precision highp float;\n"
+                  "void main()\n"
+                  "{\n";
+
+        for (unsigned int index = 0; index < 8; index++)
+        {
+            if (bufferEnabled[index])
+            {
+                unsigned int r = (index + 1) & 1;
+                unsigned int g = (index + 1) & 2;
+                unsigned int b = (index + 1) & 4;
+
+                strstr << "    gl_FragData[" << index << "] = vec4("
+                    << r << ".0, " << g << ".0, "
+                    << b << ".0, 1.0);\n";
+            }
+        }
+
+        strstr << "}\n";
+
+        *programOut = CompileProgram(vertexShaderSource, strstr.str());
+        if (*programOut == 0)
+        {
+            FAIL() << "shader compilation failed.";
+        }
+
+        glUseProgram(*programOut);
+
+        GLint location = glGetAttribLocation(*programOut, "position");
+        ASSERT_NE(location, -1);
+        glBindBuffer(GL_ARRAY_BUFFER, mBuffer);
+        glVertexAttribPointer(location, 2, GL_FLOAT, GL_FALSE, 8, NULL);
+        glEnableVertexAttribArray(location);
+    }
+
+    void setupMRTProgram(bool bufferEnabled[8], GLuint *programOut)
+    {
+        if (getClientVersion() == 3)
+        {
+            setupMRTProgramESSL3(bufferEnabled, programOut);
+        }
+        else
+        {
+            ASSERT_EQ(getClientVersion(), 2);
+            setupMRTProgramESSL1(bufferEnabled, programOut);
+        }
+    }
+
+    void verifyAttachment(unsigned int index, GLuint textureName)
+    {
+        for (unsigned int colorAttachment = 0; colorAttachment < 8; colorAttachment++)
+        {
+            glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + colorAttachment, GL_TEXTURE_2D, 0, 0);
+        }
+
+        glBindTexture(GL_TEXTURE_2D, textureName);
+        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, textureName, 0);
+
+        unsigned int r = (((index + 1) & 1) > 0) ? 255 : 0;
+        unsigned int g = (((index + 1) & 2) > 0) ? 255 : 0;
+        unsigned int b = (((index + 1) & 4) > 0) ? 255 : 0;
+
+        EXPECT_PIXEL_EQ(getWindowWidth() / 2, getWindowHeight() / 2, r, g, b, 255);
+    }
+
+    void gapsTest()
+    {
+        glBindTexture(GL_TEXTURE_2D, mTextures[0]);
+        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, GL_TEXTURE_2D, mTextures[0], 0);
+
+        bool flags[8] = { false, true };
+
+        GLuint program;
+        setupMRTProgram(flags, &program);
+
+        const GLenum bufs[] =
+        {
+            GL_NONE,
+            GL_COLOR_ATTACHMENT1
+        };
+        glUseProgram(program);
+        glDrawBuffersEXT(2, bufs);
+        glDrawArrays(GL_TRIANGLES, 0, 3);
+
+        verifyAttachment(1, mTextures[0]);
+
+        glDeleteProgram(program);
+    }
+
+    void firstAndLastTest()
+    {
+        glBindTexture(GL_TEXTURE_2D, mTextures[0]);
+        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, mTextures[0], 0);
+
+        glBindTexture(GL_TEXTURE_2D, mTextures[1]);
+        glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT3, GL_TEXTURE_2D, mTextures[1], 0);
+
+        bool flags[8] = { true, false, false, true };
+
+        GLuint program;
+        setupMRTProgram(flags, &program);
+
+        const GLenum bufs[] =
+        {
+            GL_COLOR_ATTACHMENT0,
+            GL_NONE,
+            GL_NONE,
+            GL_COLOR_ATTACHMENT3
+        };
+
+        glUseProgram(program);
+        glDrawBuffersEXT(4, bufs);
+        glDrawArrays(GL_TRIANGLES, 0, 3);
+
+        verifyAttachment(0, mTextures[0]);
+        verifyAttachment(3, mTextures[1]);
+
+        EXPECT_GL_NO_ERROR();
+
+        glDeleteProgram(program);
+    }
+
+    void firstHalfNULLTest()
+    {
+        bool flags[8] = { false };
+        GLenum bufs[8] = { GL_NONE };
+
+        for (unsigned int texIndex = 0; texIndex < 4; texIndex++)
+        {
+            glBindTexture(GL_TEXTURE_2D, mTextures[texIndex]);
+            glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT4 + texIndex, GL_TEXTURE_2D, mTextures[texIndex], 0);
+            flags[texIndex + 4] = true;
+            bufs[texIndex + 4] = GL_COLOR_ATTACHMENT4 + texIndex;
+        }
+
+        GLuint program;
+        setupMRTProgram(flags, &program);
+
+        glUseProgram(program);
+        glDrawBuffersEXT(8, bufs);
+        glDrawArrays(GL_TRIANGLES, 0, 3);
+
+        for (unsigned int texIndex = 0; texIndex < 4; texIndex++)
+        {
+            verifyAttachment(texIndex + 4, mTextures[texIndex]);
+        }
+
+        EXPECT_GL_NO_ERROR();
+
+        glDeleteProgram(program);
+    }
+
+    GLuint mFBO;
+    GLuint mTextures[4];
+    GLuint mBuffer;
+};
+
+class DrawBuffersTestESSL3 : public DrawBuffersTest
+{
+  protected:
+    DrawBuffersTestESSL3()
+        : DrawBuffersTest(3)
+    {}
+};
+
+class DrawBuffersTestESSL1 : public DrawBuffersTest
+{
+  protected:
+    DrawBuffersTestESSL1()
+        : DrawBuffersTest(2)
+    {}
+};
+
+TEST_F(DrawBuffersTestESSL3, Gaps)
+{
+    gapsTest();
+}
+
+TEST_F(DrawBuffersTestESSL1, Gaps)
+{
+    gapsTest();
+}
+
+TEST_F(DrawBuffersTestESSL3, FirstAndLast)
+{
+    firstAndLastTest();
+}
+
+TEST_F(DrawBuffersTestESSL1, FirstAndLast)
+{
+    firstAndLastTest();
+}
+
+TEST_F(DrawBuffersTestESSL3, FirstHalfNULL)
+{
+    firstHalfNULLTest();
+}
+
+TEST_F(DrawBuffersTestESSL1, FirstHalfNULL)
+{
+    firstHalfNULLTest();
+}