Generate pixel shader output to match the bound framebuffer.

Only generate pixel shader output variables for render targets that are
currently bound.  Fixes some performance issues with D3D10 cards that were
slow to discard unused outputs.

Fixed memory leaks in ProgramBinary by refactoring the freeing of the
current state into a reset function.

BUG=angle:670

Change-Id: I40f83e15724fb9a1a9ae61363a056999f1fa26d2
Reviewed-on: https://chromium-review.googlesource.com/202977
Reviewed-by: Jamie Madill <jmadill@chromium.org>
Tested-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/common/angleutils.h b/src/common/angleutils.h
index 8361d7d..44a907b 100644
--- a/src/common/angleutils.h
+++ b/src/common/angleutils.h
@@ -54,6 +54,16 @@
 }
 
 template <typename T>
+void SafeDeleteContainer(T& resource)
+{
+    for (T::iterator i = resource.begin(); i != resource.end(); i++)
+    {
+        SafeDelete(*i);
+    }
+    resource.clear();
+}
+
+template <typename T>
 void SafeDeleteArray(T*& resource)
 {
     delete[] resource;
diff --git a/src/libGLESv2/Context.cpp b/src/libGLESv2/Context.cpp
index c004b49..1454538 100644
--- a/src/libGLESv2/Context.cpp
+++ b/src/libGLESv2/Context.cpp
@@ -2404,7 +2404,9 @@
     VertexFormat inputLayout[gl::MAX_VERTEX_ATTRIBS];
     VertexFormat::GetInputLayout(inputLayout, programBinary, vertexAttributes, mState.vertexAttribCurrentValues);
 
-    mRenderer->applyShaders(programBinary, mState.rasterizer.rasterizerDiscard, transformFeedbackActive, inputLayout);
+    const Framebuffer *fbo = getDrawFramebuffer();
+
+    mRenderer->applyShaders(programBinary, inputLayout, fbo, mState.rasterizer.rasterizerDiscard, transformFeedbackActive);
 
     programBinary->applyUniforms();
 }
diff --git a/src/libGLESv2/DynamicHLSL.cpp b/src/libGLESv2/DynamicHLSL.cpp
index 20e4a20..b75a549 100644
--- a/src/libGLESv2/DynamicHLSL.cpp
+++ b/src/libGLESv2/DynamicHLSL.cpp
@@ -82,7 +82,8 @@
     return (i == GL_INVALID_INDEX ? "" : "[" + Str(i) + "]");
 }
 
-const std::string DynamicHLSL::VERTEX_ATTRIBUTE_STUB_STRING = "@@ VERTEX ATTRIBUTES @@";
+const std::string VERTEX_ATTRIBUTE_STUB_STRING = "@@ VERTEX ATTRIBUTES @@";
+const std::string PIXEL_OUTPUT_STUB_STRING = "@@ PIXEL OUTPUT @@";
 
 DynamicHLSL::DynamicHLSL(rx::Renderer *const renderer)
     : mRenderer(renderer)
@@ -339,7 +340,7 @@
     return varyingHLSL;
 }
 
-std::string DynamicHLSL::generateInputLayoutHLSL(const VertexFormat inputLayout[], const Attribute shaderAttributes[]) const
+std::string DynamicHLSL::generateVertexShaderForInputLayout(const std::string &sourceShader, const VertexFormat inputLayout[], const Attribute shaderAttributes[]) const
 {
     std::string structHLSL, initHLSL;
 
@@ -392,15 +393,70 @@
         }
     }
 
-    return "struct VS_INPUT\n"
-           "{\n" +
-           structHLSL +
-           "};\n"
-           "\n"
-           "void initAttributes(VS_INPUT input)\n"
-           "{\n" +
-           initHLSL +
-           "}\n";
+    std::string replacementHLSL = "struct VS_INPUT\n"
+                                  "{\n" +
+                                  structHLSL +
+                                  "};\n"
+                                  "\n"
+                                  "void initAttributes(VS_INPUT input)\n"
+                                  "{\n" +
+                                  initHLSL +
+                                  "}\n";
+
+    std::string vertexHLSL(sourceShader);
+
+    size_t copyInsertionPos = vertexHLSL.find(VERTEX_ATTRIBUTE_STUB_STRING);
+    vertexHLSL.replace(copyInsertionPos, VERTEX_ATTRIBUTE_STUB_STRING.length(), replacementHLSL);
+
+    return vertexHLSL;
+}
+
+std::string DynamicHLSL::generatePixelShaderForOutputSignature(const std::string &sourceShader, const std::vector<PixelShaderOuputVariable> &outputVariables,
+                                                               bool usesFragDepth, const std::vector<GLenum> &outputLayout) const
+{
+    const int shaderModel = mRenderer->getMajorShaderModel();
+    std::string targetSemantic = (shaderModel >= 4) ? "SV_TARGET" : "COLOR";
+    std::string depthSemantic = (shaderModel >= 4) ? "SV_Depth" : "DEPTH";
+
+    std::string declarationHLSL;
+    std::string copyHLSL;
+    for (size_t i = 0; i < outputVariables.size(); i++)
+    {
+        const PixelShaderOuputVariable& outputVariable = outputVariables[i];
+        ASSERT(outputLayout.size() > outputVariable.outputIndex);
+        if (outputLayout[outputVariable.outputIndex] != GL_NONE)
+        {
+            declarationHLSL += "    " + gl_d3d::HLSLTypeString(outputVariable.type) + " " + outputVariable.name +
+                               " : " + targetSemantic + Str(outputVariable.outputIndex) + ";\n";
+
+            copyHLSL += "    output." + outputVariable.name + " = " + outputVariable.source + ";\n";
+        }
+    }
+
+    if (usesFragDepth)
+    {
+        declarationHLSL += "    float gl_Depth : " + depthSemantic + ";\n";
+        copyHLSL += "    output.gl_Depth = gl_Depth; \n";
+    }
+
+    std::string replacementHLSL = "struct PS_OUTPUT\n"
+                                  "{\n" +
+                                  declarationHLSL +
+                                  "};\n"
+                                  "\n"
+                                  "PS_OUTPUT generateOutput()\n"
+                                  "{\n"
+                                  "    PS_OUTPUT output;\n" +
+                                  copyHLSL +
+                                  "    return output;\n"
+                                  "}\n";
+
+    std::string pixelHLSL(sourceShader);
+
+    size_t outputInsertionPos = pixelHLSL.find(PIXEL_OUTPUT_STUB_STRING);
+    pixelHLSL.replace(outputInsertionPos, PIXEL_OUTPUT_STUB_STRING.length(), replacementHLSL);
+
+    return pixelHLSL;
 }
 
 bool DynamicHLSL::generateShaderLinkHLSL(InfoLog &infoLog, int registers, const VaryingPacking packing,
@@ -408,7 +464,9 @@
                                          FragmentShader *fragmentShader, VertexShader *vertexShader,
                                          const std::vector<std::string>& transformFeedbackVaryings,
                                          std::vector<LinkedVarying> *linkedVaryings,
-                                         std::map<int, VariableLocation> *programOutputVars) const
+                                         std::map<int, VariableLocation> *programOutputVars,
+                                         std::vector<PixelShaderOuputVariable> *outPixelShaderKey,
+                                         bool *outUsesFragDepth) const
 {
     if (pixelHLSL.empty() || vertexHLSL.empty())
     {
@@ -448,7 +506,6 @@
     std::string varyingSemantic = (vertexShader->mUsesPointSize && shaderModel == 3) ? "COLOR" : "TEXCOORD";
     std::string targetSemantic = (shaderModel >= 4) ? "SV_Target" : "COLOR";
     std::string dxPositionSemantic = (shaderModel >= 4) ? "SV_Position" : "POSITION";
-    std::string depthSemantic = (shaderModel >= 4) ? "SV_Depth" : "DEPTH";
 
     std::string varyingHLSL = generateVaryingHLSL(vertexShader, varyingSemantic, linkedVaryings);
 
@@ -660,22 +717,22 @@
         }
     }
 
-    pixelHLSL += "};\n"
-                 "\n"
-                 "struct PS_OUTPUT\n"
-                 "{\n";
+    pixelHLSL += "};\n";
 
     if (shaderVersion < 300)
     {
         for (unsigned int renderTargetIndex = 0; renderTargetIndex < numRenderTargets; renderTargetIndex++)
         {
-            pixelHLSL += "    float4 gl_Color" + Str(renderTargetIndex) + " : " + targetSemantic + Str(renderTargetIndex) + ";\n";
+            PixelShaderOuputVariable outputKeyVariable;
+            outputKeyVariable.type = GL_FLOAT_VEC4;
+            outputKeyVariable.name = "gl_Color" + Str(renderTargetIndex);
+            outputKeyVariable.source = broadcast ? "gl_Color[0]" : "gl_Color[" + Str(renderTargetIndex) + "]";
+            outputKeyVariable.outputIndex = renderTargetIndex;
+
+            outPixelShaderKey->push_back(outputKeyVariable);
         }
 
-        if (fragmentShader->mUsesFragDepth)
-        {
-            pixelHLSL += "    float gl_Depth : " + depthSemantic + ";\n";
-        }
+        *outUsesFragDepth = fragmentShader->mUsesFragDepth;
     }
     else
     {
@@ -686,16 +743,22 @@
         {
             const VariableLocation &outputLocation = locationIt->second;
             const ShaderVariable &outputVariable = shaderOutputVars[outputLocation.index];
+            const std::string &variableName = "out_" + outputLocation.name;
             const std::string &elementString = (outputLocation.element == GL_INVALID_INDEX ? "" : Str(outputLocation.element));
 
-            pixelHLSL += "    " + gl_d3d::HLSLTypeString(outputVariable.type) +
-                         " out_" + outputLocation.name + elementString +
-                         " : " + targetSemantic + Str(locationIt->first) + ";\n";
+            PixelShaderOuputVariable outputKeyVariable;
+            outputKeyVariable.type = outputVariable.type;
+            outputKeyVariable.name = variableName + elementString;
+            outputKeyVariable.source = variableName + ArrayString(outputLocation.element);
+            outputKeyVariable.outputIndex = locationIt->first;
+
+            outPixelShaderKey->push_back(outputKeyVariable);
         }
+
+        *outUsesFragDepth = false;
     }
 
-    pixelHLSL += "};\n"
-                 "\n";
+    pixelHLSL += PIXEL_OUTPUT_STUB_STRING + "\n";
 
     if (fragmentShader->mUsesFrontFacing)
     {
@@ -807,37 +870,7 @@
     pixelHLSL += "\n"
                  "    gl_main();\n"
                  "\n"
-                 "    PS_OUTPUT output;\n";
-
-    if (shaderVersion < 300)
-    {
-        for (unsigned int renderTargetIndex = 0; renderTargetIndex < numRenderTargets; renderTargetIndex++)
-        {
-            unsigned int sourceColorIndex = broadcast ? 0 : renderTargetIndex;
-
-            pixelHLSL += "    output.gl_Color" + Str(renderTargetIndex) + " = gl_Color[" + Str(sourceColorIndex) + "];\n";
-        }
-
-        if (fragmentShader->mUsesFragDepth)
-        {
-            pixelHLSL += "    output.gl_Depth = gl_Depth;\n";
-        }
-    }
-    else
-    {
-        for (auto locationIt = programOutputVars->begin(); locationIt != programOutputVars->end(); locationIt++)
-        {
-            const VariableLocation &outputLocation = locationIt->second;
-            const std::string &variableName = "out_" + outputLocation.name;
-            const std::string &outVariableName = variableName + (outputLocation.element == GL_INVALID_INDEX ? "" : Str(outputLocation.element));
-            const std::string &staticVariableName = variableName + ArrayString(outputLocation.element);
-
-            pixelHLSL += "    output." + outVariableName + " = " + staticVariableName + ";\n";
-        }
-    }
-
-    pixelHLSL += "\n"
-                 "    return output;\n"
+                 "    return generateOutput();\n"
                  "}\n";
 
     return true;
diff --git a/src/libGLESv2/DynamicHLSL.h b/src/libGLESv2/DynamicHLSL.h
index 07c2a2f..dbbdbc0 100644
--- a/src/libGLESv2/DynamicHLSL.h
+++ b/src/libGLESv2/DynamicHLSL.h
@@ -34,6 +34,14 @@
 
 typedef const PackedVarying *VaryingPacking[IMPLEMENTATION_MAX_VARYING_VECTORS][4];
 
+struct PixelShaderOuputVariable
+{
+    GLenum type;
+    std::string name;
+    std::string source;
+    size_t outputIndex;
+};
+
 class DynamicHLSL
 {
   public:
@@ -41,19 +49,21 @@
 
     int packVaryings(InfoLog &infoLog, VaryingPacking packing, FragmentShader *fragmentShader,
                      VertexShader *vertexShader, const std::vector<std::string>& transformFeedbackVaryings);
-    std::string generateInputLayoutHLSL(const VertexFormat inputLayout[], const Attribute shaderAttributes[]) const;
+    std::string generateVertexShaderForInputLayout(const std::string &sourceShader, const VertexFormat inputLayout[], const Attribute shaderAttributes[]) const;
+    std::string generatePixelShaderForOutputSignature(const std::string &sourceShader, const std::vector<PixelShaderOuputVariable> &outputVariables,
+                                                      bool usesFragDepth, const std::vector<GLenum> &outputLayout) const;
     bool generateShaderLinkHLSL(InfoLog &infoLog, int registers, const VaryingPacking packing,
                                 std::string& pixelHLSL, std::string& vertexHLSL,
                                 FragmentShader *fragmentShader, VertexShader *vertexShader,
                                 const std::vector<std::string>& transformFeedbackVaryings,
                                 std::vector<LinkedVarying> *linkedVaryings,
-                                std::map<int, VariableLocation> *programOutputVars) const;
+                                std::map<int, VariableLocation> *programOutputVars,
+                                std::vector<PixelShaderOuputVariable> *outPixelShaderKey,
+                                bool *outUsesFragDepth) const;
 
     std::string generateGeometryShaderHLSL(int registers, FragmentShader *fragmentShader, VertexShader *vertexShader) const;
     void getInputLayoutSignature(const VertexFormat inputLayout[], GLenum signature[]) const;
 
-    static const std::string VERTEX_ATTRIBUTE_STUB_STRING;
-
   private:
     DISALLOW_COPY_AND_ASSIGN(DynamicHLSL);
 
diff --git a/src/libGLESv2/ProgramBinary.cpp b/src/libGLESv2/ProgramBinary.cpp
index e3ffa47..63ca3f6 100644
--- a/src/libGLESv2/ProgramBinary.cpp
+++ b/src/libGLESv2/ProgramBinary.cpp
@@ -10,6 +10,8 @@
 
 #include "libGLESv2/BinaryStream.h"
 #include "libGLESv2/ProgramBinary.h"
+#include "libGLESv2/Framebuffer.h"
+#include "libGLESv2/Renderbuffer.h"
 #include "libGLESv2/renderer/ShaderExecutable.h"
 
 #include "common/debug.h"
@@ -84,8 +86,7 @@
 {
 }
 
-ProgramBinary::VertexExecutable::VertexExecutable(rx::Renderer *const renderer,
-                                                  const VertexFormat inputLayout[],
+ProgramBinary::VertexExecutable::VertexExecutable(const VertexFormat inputLayout[],
                                                   const GLenum signature[],
                                                   rx::ShaderExecutable *shaderExecutable)
     : mShaderExecutable(shaderExecutable)
@@ -99,7 +100,7 @@
 
 ProgramBinary::VertexExecutable::~VertexExecutable()
 {
-    delete mShaderExecutable;
+    SafeDelete(mShaderExecutable);
 }
 
 bool ProgramBinary::VertexExecutable::matchesSignature(const GLenum signature[]) const
@@ -115,6 +116,17 @@
     return true;
 }
 
+ProgramBinary::PixelExecutable::PixelExecutable(const std::vector<GLenum> &outputSignature, rx::ShaderExecutable *shaderExecutable)
+    : mOutputSignature(outputSignature),
+      mShaderExecutable(shaderExecutable)
+{
+}
+
+ProgramBinary::PixelExecutable::~PixelExecutable()
+{
+    SafeDelete(mShaderExecutable);
+}
+
 LinkedVarying::LinkedVarying()
 {
 }
@@ -132,7 +144,7 @@
       mRenderer(renderer),
       mDynamicHLSL(NULL),
       mVertexWorkarounds(rx::ANGLE_D3D_WORKAROUND_NONE),
-      mPixelExecutable(NULL),
+      mPixelWorkarounds(rx::ANGLE_D3D_WORKAROUND_NONE),
       mGeometryExecutable(NULL),
       mUsedVertexSamplerRange(0),
       mUsedPixelSamplerRange(0),
@@ -163,29 +175,7 @@
 
 ProgramBinary::~ProgramBinary()
 {
-    while (!mVertexExecutables.empty())
-    {
-        delete mVertexExecutables.back();
-        mVertexExecutables.pop_back();
-    }
-
-    SafeDelete(mGeometryExecutable);
-    SafeDelete(mPixelExecutable);
-
-    while (!mUniforms.empty())
-    {
-        delete mUniforms.back();
-        mUniforms.pop_back();
-    }
-
-    while (!mUniformBlocks.empty())
-    {
-        delete mUniformBlocks.back();
-        mUniformBlocks.pop_back();
-    }
-
-    SafeDelete(mVertexUniformStorage);
-    SafeDelete(mFragmentUniformStorage);
+    reset();
     SafeDelete(mDynamicHLSL);
 }
 
@@ -204,9 +194,58 @@
     return mCurrentSerial++;
 }
 
-rx::ShaderExecutable *ProgramBinary::getPixelExecutable() const
+rx::ShaderExecutable *ProgramBinary::getPixelExecutableForFramebuffer(const Framebuffer *fbo)
 {
-    return mPixelExecutable;
+    std::vector<GLenum> outputs(IMPLEMENTATION_MAX_DRAW_BUFFERS);
+    for (size_t i = 0; i < IMPLEMENTATION_MAX_DRAW_BUFFERS; i++)
+    {
+        FramebufferAttachment *attachment = fbo->getColorbuffer(i);
+        if (attachment)
+        {
+            // Always output floats for now
+            outputs[i] = GL_FLOAT;
+        }
+        else
+        {
+            outputs[i] = GL_NONE;
+        }
+    }
+
+    return getPixelExecutableForOutputLayout(outputs);
+}
+
+rx::ShaderExecutable *ProgramBinary::getPixelExecutableForOutputLayout(const std::vector<GLenum> &outputSignature)
+{
+    for (size_t executableIndex = 0; executableIndex < mPixelExecutables.size(); executableIndex++)
+    {
+        if (mPixelExecutables[executableIndex]->matchesSignature(outputSignature))
+        {
+            return mPixelExecutables[executableIndex]->shaderExecutable();
+        }
+    }
+
+    std::string finalPixelHLSL = mDynamicHLSL->generatePixelShaderForOutputSignature(mPixelHLSL, mPixelShaderKey, mUsesFragDepth,
+                                                                                     outputSignature);
+
+    // Generate new pixel executable
+    InfoLog tempInfoLog;
+    rx::ShaderExecutable *pixelExecutable = mRenderer->compileToExecutable(tempInfoLog, finalPixelHLSL.c_str(), rx::SHADER_PIXEL,
+                                                                           mTransformFeedbackLinkedVaryings,
+                                                                           (mTransformFeedbackBufferMode == GL_SEPARATE_ATTRIBS),
+                                                                           mPixelWorkarounds);
+
+    if (!pixelExecutable)
+    {
+        std::vector<char> tempCharBuffer(tempInfoLog.getLength() + 3);
+        tempInfoLog.getLog(tempInfoLog.getLength(), NULL, &tempCharBuffer[0]);
+        ERR("Error compiling dynamic pixel executable:\n%s\n", &tempCharBuffer[0]);
+    }
+    else
+    {
+        mPixelExecutables.push_back(new PixelExecutable(outputSignature, pixelExecutable));
+    }
+
+    return pixelExecutable;
 }
 
 rx::ShaderExecutable *ProgramBinary::getVertexExecutableForInputLayout(const VertexFormat inputLayout[MAX_VERTEX_ATTRIBS])
@@ -223,16 +262,11 @@
     }
 
     // Generate new dynamic layout with attribute conversions
-    const std::string &layoutHLSL = mDynamicHLSL->generateInputLayoutHLSL(inputLayout, mShaderAttributes);
-
-    // Generate new shader source by replacing the attributes stub with the defined input layout
-    std::string vertexHLSL = mVertexHLSL;
-    size_t insertPos = vertexHLSL.find(DynamicHLSL::VERTEX_ATTRIBUTE_STUB_STRING);
-    vertexHLSL.replace(insertPos, DynamicHLSL::VERTEX_ATTRIBUTE_STUB_STRING.length(), layoutHLSL);
+    std::string finalVertexHLSL = mDynamicHLSL->generateVertexShaderForInputLayout(mVertexHLSL, inputLayout, mShaderAttributes);
 
     // Generate new vertex executable
     InfoLog tempInfoLog;
-    rx::ShaderExecutable *vertexExecutable = mRenderer->compileToExecutable(tempInfoLog, vertexHLSL.c_str(),
+    rx::ShaderExecutable *vertexExecutable = mRenderer->compileToExecutable(tempInfoLog, finalVertexHLSL.c_str(),
                                                                             rx::SHADER_VERTEX,
                                                                             mTransformFeedbackLinkedVaryings,
                                                                             (mTransformFeedbackBufferMode == GL_SEPARATE_ATTRIBS),
@@ -246,7 +280,7 @@
     }
     else
     {
-        mVertexExecutables.push_back(new VertexExecutable(mRenderer, inputLayout, signature, vertexExecutable));
+        mVertexExecutables.push_back(new VertexExecutable(inputLayout, signature, vertexExecutable));
     }
 
     return vertexExecutable;
@@ -1021,6 +1055,8 @@
 #ifdef ANGLE_DISABLE_PROGRAM_BINARY_LOAD
     return false;
 #else
+    reset();
+
     BinaryInputStream stream(binary, length);
 
     int format = stream.readInt<int>();
@@ -1180,7 +1216,6 @@
     stream.readInt(&mVertexWorkarounds);
 
     const unsigned int vertexShaderCount = stream.readInt<unsigned int>();
-
     for (unsigned int vertexShaderIndex = 0; vertexShaderIndex < vertexShaderCount; vertexShaderIndex++)
     {
         VertexFormat inputLayout[MAX_VERTEX_ATTRIBS];
@@ -1195,9 +1230,7 @@
         }
 
         unsigned int vertexShaderSize = stream.readInt<unsigned int>();
-
-        const char *vertexShaderFunction = (const char*) binary + stream.offset();
-
+        const unsigned char *vertexShaderFunction = reinterpret_cast<const unsigned char*>(binary) + stream.offset();
         rx::ShaderExecutable *shaderExecutable = mRenderer->loadExecutable(reinterpret_cast<const DWORD*>(vertexShaderFunction),
                                                                            vertexShaderSize, rx::SHADER_VERTEX,
                                                                            mTransformFeedbackLinkedVaryings,
@@ -1213,23 +1246,52 @@
         mDynamicHLSL->getInputLayoutSignature(inputLayout, signature);
 
         // add new binary
-        mVertexExecutables.push_back(new VertexExecutable(mRenderer, inputLayout, signature, shaderExecutable));
+        mVertexExecutables.push_back(new VertexExecutable(inputLayout, signature, shaderExecutable));
 
         stream.skip(vertexShaderSize);
     }
 
-    unsigned int pixelShaderSize = stream.readInt<unsigned int>();
+    stream.readString(&mPixelHLSL);
+    stream.readInt(&mPixelWorkarounds);
+    stream.readBool(&mUsesFragDepth);
 
-    const char *pixelShaderFunction = (const char*) binary + stream.offset();
-    mPixelExecutable = mRenderer->loadExecutable(reinterpret_cast<const DWORD*>(pixelShaderFunction),
-                                                 pixelShaderSize, rx::SHADER_PIXEL, mTransformFeedbackLinkedVaryings,
-                                                 (mTransformFeedbackBufferMode == GL_SEPARATE_ATTRIBS));
-    if (!mPixelExecutable)
+    const size_t pixelShaderKeySize = stream.readInt<unsigned int>();
+    mPixelShaderKey.resize(pixelShaderKeySize);
+    for (size_t pixelShaderKeyIndex = 0; pixelShaderKeyIndex < pixelShaderKeySize; pixelShaderKeyIndex++)
     {
-        infoLog.append("Could not create pixel shader.");
-        return false;
+        stream.readInt(&mPixelShaderKey[pixelShaderKeyIndex].type);
+        stream.readString(&mPixelShaderKey[pixelShaderKeyIndex].name);
+        stream.readString(&mPixelShaderKey[pixelShaderKeyIndex].source);
+        stream.readInt(&mPixelShaderKey[pixelShaderKeyIndex].outputIndex);
     }
-    stream.skip(pixelShaderSize);
+
+    const size_t pixelShaderCount = stream.readInt<unsigned int>();
+    for (size_t pixelShaderIndex = 0; pixelShaderIndex < pixelShaderCount; pixelShaderIndex++)
+    {
+        const size_t outputCount = stream.readInt<unsigned int>();
+        std::vector<GLenum> outputs(outputCount);
+        for (size_t outputIndex = 0; outputIndex < outputCount; outputIndex++)
+        {
+            stream.readInt(&outputs[outputIndex]);
+        }
+
+        const size_t pixelShaderSize = stream.readInt<unsigned int>();
+        const unsigned char *pixelShaderFunction = reinterpret_cast<const unsigned char*>(binary) + stream.offset();
+        rx::ShaderExecutable *shaderExecutable = mRenderer->loadExecutable(pixelShaderFunction, pixelShaderSize,
+                                                                           rx::SHADER_PIXEL,
+                                                                           mTransformFeedbackLinkedVaryings,
+                                                                           (mTransformFeedbackBufferMode == GL_SEPARATE_ATTRIBS));
+        if (!shaderExecutable)
+        {
+            infoLog.append("Could not create pixel shader.");
+            return false;
+        }
+
+        // add new binary
+        mPixelExecutables.push_back(new PixelExecutable(outputs, shaderExecutable));
+
+        stream.skip(pixelShaderSize);
+    }
 
     unsigned int geometryShaderSize = stream.readInt<unsigned int>();
 
@@ -1242,7 +1304,6 @@
         if (!mGeometryExecutable)
         {
             infoLog.append("Could not create geometry shader.");
-            SafeDelete(mPixelExecutable);
             return false;
         }
         stream.skip(geometryShaderSize);
@@ -1391,11 +1452,38 @@
         stream.writeBytes(vertexBlob, vertexShaderSize);
     }
 
-    size_t pixelShaderSize = mPixelExecutable->getLength();
-    stream.writeInt(pixelShaderSize);
+    stream.writeString(mPixelHLSL);
+    stream.writeInt(mPixelWorkarounds);
+    stream.writeInt(mUsesFragDepth);
 
-    unsigned char *pixelBlob = static_cast<unsigned char *>(mPixelExecutable->getFunction());
-    stream.writeBytes(pixelBlob, pixelShaderSize);
+    stream.writeInt(mPixelShaderKey.size());
+    for (size_t pixelShaderKeyIndex = 0; pixelShaderKeyIndex < mPixelShaderKey.size(); pixelShaderKeyIndex++)
+    {
+        const PixelShaderOuputVariable &variable = mPixelShaderKey[pixelShaderKeyIndex];
+        stream.writeInt(variable.type);
+        stream.writeString(variable.name);
+        stream.writeString(variable.source);
+        stream.writeInt(variable.outputIndex);
+    }
+
+    stream.writeInt(mPixelExecutables.size());
+    for (size_t pixelExecutableIndex = 0; pixelExecutableIndex < mPixelExecutables.size(); pixelExecutableIndex++)
+    {
+        PixelExecutable *pixelExecutable = mPixelExecutables[pixelExecutableIndex];
+
+        const std::vector<GLenum> outputs = pixelExecutable->outputSignature();
+        stream.writeInt(outputs.size());
+        for (size_t outputIndex = 0; outputIndex < outputs.size(); outputIndex++)
+        {
+            stream.writeInt(outputs[outputIndex]);
+        }
+
+        size_t pixelShaderSize = pixelExecutable->shaderExecutable()->getLength();
+        stream.writeInt(pixelShaderSize);
+
+        unsigned char *pixelBlob = static_cast<unsigned char *>(pixelExecutable->shaderExecutable()->getFunction());
+        stream.writeBytes(pixelBlob, pixelShaderSize);
+    }
 
     size_t geometryShaderSize = (mGeometryExecutable != NULL) ? mGeometryExecutable->getLength() : 0;
     stream.writeInt(geometryShaderSize);
@@ -1469,12 +1557,15 @@
         return false;
     }
 
-    mTransformFeedbackLinkedVaryings.clear();
+    reset();
+
     mTransformFeedbackBufferMode = transformFeedbackBufferMode;
 
     mShaderVersion = vertexShader->getShaderVersion();
 
-    std::string pixelHLSL = fragmentShader->getHLSL();
+    mPixelHLSL = fragmentShader->getHLSL();
+    mPixelWorkarounds = fragmentShader->getD3DWorkarounds();
+
     mVertexHLSL = vertexShader->getHLSL();
     mVertexWorkarounds = vertexShader->getD3DWorkarounds();
 
@@ -1494,9 +1585,9 @@
 
     mUsesPointSize = vertexShader->usesPointSize();
     std::vector<LinkedVarying> linkedVaryings;
-    if (!mDynamicHLSL->generateShaderLinkHLSL(infoLog, registers, packing, pixelHLSL, mVertexHLSL,
+    if (!mDynamicHLSL->generateShaderLinkHLSL(infoLog, registers, packing, mPixelHLSL, mVertexHLSL,
                                               fragmentShader, vertexShader, transformFeedbackVaryings,
-                                              &linkedVaryings, &mOutputVariables))
+                                              &linkedVaryings, &mOutputVariables, &mPixelShaderKey, &mUsesFragDepth))
     {
         return false;
     }
@@ -1536,12 +1627,14 @@
     {
         VertexFormat defaultInputLayout[MAX_VERTEX_ATTRIBS];
         GetInputLayoutFromShader(vertexShader->activeAttributes(), defaultInputLayout);
-
         rx::ShaderExecutable *defaultVertexExecutable = getVertexExecutableForInputLayout(defaultInputLayout);
-        mPixelExecutable = mRenderer->compileToExecutable(infoLog, pixelHLSL.c_str(), rx::SHADER_PIXEL,
-                                                          mTransformFeedbackLinkedVaryings,
-                                                          (mTransformFeedbackBufferMode == GL_SEPARATE_ATTRIBS),
-                                                          fragmentShader->getD3DWorkarounds());
+
+        std::vector<GLenum> defaultPixelOutput(IMPLEMENTATION_MAX_DRAW_BUFFERS);
+        for (size_t i = 0; i < defaultPixelOutput.size(); i++)
+        {
+            defaultPixelOutput[i] = (i == 0) ? GL_FLOAT : GL_NONE;
+        }
+        rx::ShaderExecutable *defaultPixelExecutable = getPixelExecutableForOutputLayout(defaultPixelOutput);
 
         if (usesGeometryShader())
         {
@@ -1552,21 +1645,11 @@
                                                                  rx::ANGLE_D3D_WORKAROUND_NONE);
         }
 
-        if (!defaultVertexExecutable || !mPixelExecutable || (usesGeometryShader() && !mGeometryExecutable))
+        if (!defaultVertexExecutable || !defaultPixelExecutable || (usesGeometryShader() && !mGeometryExecutable))
         {
             infoLog.append("Failed to create D3D shaders.");
             success = false;
-
-            while (!mVertexExecutables.empty())
-            {
-                delete mVertexExecutables.back();
-                mVertexExecutables.pop_back();
-            }
-
-            SafeDelete(mGeometryExecutable);
-            SafeDelete(mPixelExecutable);
-
-            mTransformFeedbackLinkedVaryings.clear();
+            reset();
         }
     }
 
@@ -2688,4 +2771,44 @@
     mFragmentUniformStorage = mRenderer->createUniformStorage(fragmentRegisters * 16u);
 }
 
+void ProgramBinary::reset()
+{
+    mVertexHLSL.clear();
+    mVertexWorkarounds = rx::ANGLE_D3D_WORKAROUND_NONE;
+    SafeDeleteContainer(mVertexExecutables);
+
+    mPixelHLSL.clear();
+    mPixelWorkarounds = rx::ANGLE_D3D_WORKAROUND_NONE;
+    mUsesFragDepth = false;
+    mPixelShaderKey.clear();
+    SafeDeleteContainer(mPixelExecutables);
+
+    SafeDelete(mGeometryExecutable);
+
+    mTransformFeedbackBufferMode = GL_NONE;
+    mTransformFeedbackLinkedVaryings.clear();
+
+    for (size_t i = 0; i < ArraySize(mSamplersPS); i++)
+    {
+        mSamplersPS[i] = Sampler();
+    }
+    for (size_t i = 0; i < ArraySize(mSamplersVS); i++)
+    {
+        mSamplersVS[i] = Sampler();
+    }
+    mUsedVertexSamplerRange = 0;
+    mUsedPixelSamplerRange = 0;
+    mUsesPointSize = false;
+    mShaderVersion = 0;
+
+    SafeDeleteContainer(mUniforms);
+    SafeDeleteContainer(mUniformBlocks);
+    mUniformIndex.clear();
+    mOutputVariables.clear();
+    SafeDelete(mVertexUniformStorage);
+    SafeDelete(mFragmentUniformStorage);
+
+    mValidated = false;
+}
+
 }
diff --git a/src/libGLESv2/ProgramBinary.h b/src/libGLESv2/ProgramBinary.h
index 839923b..88e5637 100644
--- a/src/libGLESv2/ProgramBinary.h
+++ b/src/libGLESv2/ProgramBinary.h
@@ -25,6 +25,7 @@
 #include "libGLESv2/Shader.h"
 #include "libGLESv2/Constants.h"
 #include "libGLESv2/renderer/VertexDataManager.h"
+#include "libGLESv2/DynamicHLSL.h"
 
 namespace rx
 {
@@ -32,7 +33,6 @@
 class Renderer;
 struct TranslatedAttribute;
 class UniformStorage;
-class DynamicHLSL;
 }
 
 namespace gl
@@ -42,6 +42,7 @@
 class InfoLog;
 class AttributeBindings;
 class Buffer;
+class Framebuffer;
 
 // Struct used for correlating uniforms/elements of uniform arrays to handles
 struct VariableLocation
@@ -82,7 +83,8 @@
     explicit ProgramBinary(rx::Renderer *renderer);
     ~ProgramBinary();
 
-    rx::ShaderExecutable *getPixelExecutable() const;
+    rx::ShaderExecutable *getPixelExecutableForFramebuffer(const Framebuffer *fbo);
+    rx::ShaderExecutable *getPixelExecutableForOutputLayout(const std::vector<GLenum> &outputLayout);
     rx::ShaderExecutable *getVertexExecutableForInputLayout(const VertexFormat inputLayout[MAX_VERTEX_ATTRIBS]);
     rx::ShaderExecutable *getGeometryExecutable() const;
 
@@ -177,6 +179,8 @@
   private:
     DISALLOW_COPY_AND_ASSIGN(ProgramBinary);
 
+    void reset();
+
     bool linkVaryings(InfoLog &infoLog, FragmentShader *fragmentShader, VertexShader *vertexShader);
     bool linkAttributes(InfoLog &infoLog, const AttributeBindings &attributeBindings, FragmentShader *fragmentShader, VertexShader *vertexShader);
 
@@ -217,8 +221,7 @@
     class VertexExecutable
     {
       public:
-        VertexExecutable(rx::Renderer *const renderer,
-                         const VertexFormat inputLayout[MAX_VERTEX_ATTRIBS],
+        VertexExecutable(const VertexFormat inputLayout[MAX_VERTEX_ATTRIBS],
                          const GLenum signature[MAX_VERTEX_ATTRIBS],
                          rx::ShaderExecutable *shaderExecutable);
         ~VertexExecutable();
@@ -235,14 +238,35 @@
         rx::ShaderExecutable *mShaderExecutable;
     };
 
+    class PixelExecutable
+    {
+      public:
+        PixelExecutable(const std::vector<GLenum> &outputSignature, rx::ShaderExecutable *shaderExecutable);
+        ~PixelExecutable();
+
+        bool matchesSignature(const std::vector<GLenum> &signature) const { return mOutputSignature == signature; }
+        const std::vector<GLenum> &outputSignature() const { return mOutputSignature; }
+        rx::ShaderExecutable *shaderExecutable() const { return mShaderExecutable; }
+
+      private:
+        std::vector<GLenum> mOutputSignature;
+        rx::ShaderExecutable *mShaderExecutable;
+    };
+
     rx::Renderer *const mRenderer;
     DynamicHLSL *mDynamicHLSL;
 
     std::string mVertexHLSL;
     rx::D3DWorkaroundType mVertexWorkarounds;
     std::vector<VertexExecutable *> mVertexExecutables;
+
+    std::string mPixelHLSL;
+    rx::D3DWorkaroundType mPixelWorkarounds;
+    bool mUsesFragDepth;
+    std::vector<PixelShaderOuputVariable> mPixelShaderKey;
+    std::vector<PixelExecutable *> mPixelExecutables;
+
     rx::ShaderExecutable *mGeometryExecutable;
-    rx::ShaderExecutable *mPixelExecutable;
 
     Attribute mLinkedAttribute[MAX_VERTEX_ATTRIBS];
     Attribute mShaderAttributes[MAX_VERTEX_ATTRIBS];
diff --git a/src/libGLESv2/renderer/Renderer.h b/src/libGLESv2/renderer/Renderer.h
index 91982bf..3d1e294 100644
--- a/src/libGLESv2/renderer/Renderer.h
+++ b/src/libGLESv2/renderer/Renderer.h
@@ -118,7 +118,8 @@
                              bool ignoreViewport) = 0;
 
     virtual bool applyRenderTarget(gl::Framebuffer *frameBuffer) = 0;
-    virtual void applyShaders(gl::ProgramBinary *programBinary, bool rasterizerDiscard, bool transformFeedbackActive, const gl::VertexFormat inputLayout[]) = 0;
+    virtual void applyShaders(gl::ProgramBinary *programBinary, const gl::VertexFormat inputLayout[], const gl::Framebuffer *framebuffer,
+                              bool rasterizerDiscard, bool transformFeedbackActive) = 0;
     virtual void applyUniforms(const gl::ProgramBinary &programBinary) = 0;
     virtual bool applyPrimitiveType(GLenum primitiveType, GLsizei elementCount) = 0;
     virtual GLenum applyVertexBuffer(gl::ProgramBinary *programBinary, const gl::VertexAttribute vertexAttributes[], gl::VertexAttribCurrentValueData currentValues[],
diff --git a/src/libGLESv2/renderer/d3d11/Renderer11.cpp b/src/libGLESv2/renderer/d3d11/Renderer11.cpp
index 89885cf..c018bda 100644
--- a/src/libGLESv2/renderer/d3d11/Renderer11.cpp
+++ b/src/libGLESv2/renderer/d3d11/Renderer11.cpp
@@ -1349,10 +1349,11 @@
     }
 }
 
-void Renderer11::applyShaders(gl::ProgramBinary *programBinary, bool rasterizerDiscard, bool transformFeedbackActive, const gl::VertexFormat inputLayout[])
+void Renderer11::applyShaders(gl::ProgramBinary *programBinary, const gl::VertexFormat inputLayout[], const gl::Framebuffer *framebuffer,
+                              bool rasterizerDiscard, bool transformFeedbackActive)
 {
     ShaderExecutable *vertexExe = programBinary->getVertexExecutableForInputLayout(inputLayout);
-    ShaderExecutable *pixelExe = programBinary->getPixelExecutable();
+    ShaderExecutable *pixelExe = programBinary->getPixelExecutableForFramebuffer(framebuffer);
     ShaderExecutable *geometryExe = programBinary->getGeometryExecutable();
 
     ID3D11VertexShader *vertexShader = (vertexExe ? ShaderExecutable11::makeShaderExecutable11(vertexExe)->getVertexShader() : NULL);
diff --git a/src/libGLESv2/renderer/d3d11/Renderer11.h b/src/libGLESv2/renderer/d3d11/Renderer11.h
index 341cb4d..8fe39ab 100644
--- a/src/libGLESv2/renderer/d3d11/Renderer11.h
+++ b/src/libGLESv2/renderer/d3d11/Renderer11.h
@@ -77,7 +77,8 @@
 
     virtual bool applyPrimitiveType(GLenum mode, GLsizei count);
     virtual bool applyRenderTarget(gl::Framebuffer *frameBuffer);
-    virtual void applyShaders(gl::ProgramBinary *programBinary, bool rasterizerDiscard, bool transformFeedbackActive, const gl::VertexFormat inputLayout[]);
+    virtual void applyShaders(gl::ProgramBinary *programBinary, const gl::VertexFormat inputLayout[], const gl::Framebuffer *framebuffer,
+                              bool rasterizerDiscard, bool transformFeedbackActive);
     virtual void applyUniforms(const gl::ProgramBinary &programBinary);
     virtual GLenum applyVertexBuffer(gl::ProgramBinary *programBinary, const gl::VertexAttribute vertexAttributes[], gl::VertexAttribCurrentValueData currentValues[],
                                      GLint first, GLsizei count, GLsizei instances);
diff --git a/src/libGLESv2/renderer/d3d9/Renderer9.cpp b/src/libGLESv2/renderer/d3d9/Renderer9.cpp
index 4d79def..e8e4053 100644
--- a/src/libGLESv2/renderer/d3d9/Renderer9.cpp
+++ b/src/libGLESv2/renderer/d3d9/Renderer9.cpp
@@ -1601,13 +1601,14 @@
     }
 }
 
-void Renderer9::applyShaders(gl::ProgramBinary *programBinary, bool rasterizerDiscard, bool transformFeedbackActive, const gl::VertexFormat inputLayout[])
+void Renderer9::applyShaders(gl::ProgramBinary *programBinary, const gl::VertexFormat inputLayout[], const gl::Framebuffer *framebuffer,
+                             bool rasterizerDiscard, bool transformFeedbackActive)
 {
     ASSERT(!transformFeedbackActive);
     ASSERT(!rasterizerDiscard);
 
     ShaderExecutable *vertexExe = programBinary->getVertexExecutableForInputLayout(inputLayout);
-    ShaderExecutable *pixelExe = programBinary->getPixelExecutable();
+    ShaderExecutable *pixelExe = programBinary->getPixelExecutableForFramebuffer(framebuffer);
 
     IDirect3DVertexShader9 *vertexShader = (vertexExe ? ShaderExecutable9::makeShaderExecutable9(vertexExe)->getVertexShader() : NULL);
     IDirect3DPixelShader9 *pixelShader = (pixelExe ? ShaderExecutable9::makeShaderExecutable9(pixelExe)->getPixelShader() : NULL);
diff --git a/src/libGLESv2/renderer/d3d9/Renderer9.h b/src/libGLESv2/renderer/d3d9/Renderer9.h
index d4d3f66..ecf310d 100644
--- a/src/libGLESv2/renderer/d3d9/Renderer9.h
+++ b/src/libGLESv2/renderer/d3d9/Renderer9.h
@@ -76,7 +76,8 @@
                              bool ignoreViewport);
 
     virtual bool applyRenderTarget(gl::Framebuffer *frameBuffer);
-    virtual void applyShaders(gl::ProgramBinary *programBinary, bool rasterizerDiscard, bool transformFeedbackActive, const gl::VertexFormat inputLayout[]);
+    virtual void applyShaders(gl::ProgramBinary *programBinary, const gl::VertexFormat inputLayout[], const gl::Framebuffer *framebuffer,
+                              bool rasterizerDiscard, bool transformFeedbackActive);
     virtual void applyUniforms(const gl::ProgramBinary &programBinary);
     virtual bool applyPrimitiveType(GLenum primitiveType, GLsizei elementCount);
     virtual GLenum applyVertexBuffer(gl::ProgramBinary *programBinary, const gl::VertexAttribute vertexAttributes[], gl::VertexAttribCurrentValueData currentValues[],