Disable optimizations for shaders with conditional discard in D3D9, and only use expanded
short-circuiting conditionals for expressions with potential side-effects.

Conservatively assume aggreate and selection operators have side effects for now.

BUG=
ANGLEBUG=486
R=geofflang@chromium.org, kbr@chromium.org, nicolas@transgaming.com, shannonwoods@chromium.org

Review URL: https://codereview.appspot.com/14441075
diff --git a/src/common/version.h b/src/common/version.h
index d917aa2..9f221f3 100644
--- a/src/common/version.h
+++ b/src/common/version.h
@@ -1,7 +1,7 @@
 #define MAJOR_VERSION 1
 #define MINOR_VERSION 2
 #define BUILD_VERSION 0
-#define BUILD_REVISION 2449
+#define BUILD_REVISION 2450
 
 #define STRINGIFY(x) #x
 #define MACRO_STRINGIFY(x) STRINGIFY(x)
diff --git a/src/compiler/Intermediate.cpp b/src/compiler/Intermediate.cpp
index 5bab216..b3cb3dd 100644
--- a/src/compiler/Intermediate.cpp
+++ b/src/compiler/Intermediate.cpp
@@ -806,7 +806,7 @@
 //
 // Returns true if state is modified.
 //
-bool TIntermOperator::modifiesState() const
+bool TIntermOperator::hasSideEffects() const
 {
     switch (op) {
         case EOpPostIncrement:
diff --git a/src/compiler/NodeSearch.h b/src/compiler/NodeSearch.h
new file mode 100644
index 0000000..27e471d
--- /dev/null
+++ b/src/compiler/NodeSearch.h
@@ -0,0 +1,78 @@
+//
+// Copyright (c) 2002-2013 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// NodeSearch.h: Utilities for searching translator node graphs
+//
+
+#ifndef TRANSLATOR_NODESEARCH_H_
+#define TRANSLATOR_NODESEARCH_H_
+
+namespace sh
+{
+
+template <class Parent>
+class NodeSearchTraverser : public TIntermTraverser
+{
+  public:
+    NodeSearchTraverser()
+        : mFound(false)
+    {}
+
+    bool found() const { return mFound; }
+
+    static bool search(TIntermNode *node)
+    {
+        Parent searchTraverser;
+        node->traverse(&searchTraverser);
+        return searchTraverser.found();
+    }
+
+  protected:
+    bool mFound;
+};
+
+class FindDiscard : public NodeSearchTraverser<FindDiscard>
+{
+  public:
+    virtual bool visitBranch(Visit visit, TIntermBranch *node)
+    {
+        switch (node->getFlowOp())
+        {
+          case EOpKill:
+            mFound = true;
+            break;
+
+          default: break;
+        }
+
+        return !mFound;
+    }
+};
+
+class FindSideEffectRewriting : public NodeSearchTraverser<FindSideEffectRewriting>
+{
+  public:
+    virtual bool visitBinary(Visit visit, TIntermBinary *node)
+    {
+        switch (node->getOp())
+        {
+          case EOpLogicalOr:
+          case EOpLogicalAnd:
+            if (node->getRight()->hasSideEffects())
+            {
+                mFound = true;
+            }
+            break;
+
+          default: break;
+        }
+
+        return !mFound;
+    }
+};
+
+}
+
+#endif // TRANSLATOR_NODESEARCH_H_
diff --git a/src/compiler/OutputHLSL.cpp b/src/compiler/OutputHLSL.cpp
index 79a373e..53799df 100644
--- a/src/compiler/OutputHLSL.cpp
+++ b/src/compiler/OutputHLSL.cpp
@@ -12,6 +12,7 @@
 #include "compiler/InfoSink.h"
 #include "compiler/SearchSymbol.h"
 #include "compiler/UnfoldShortCircuit.h"
+#include "compiler/NodeSearch.h"
 
 #include <algorithm>
 #include <cfloat>
@@ -72,6 +73,7 @@
     mUsesAtan2_2 = false;
     mUsesAtan2_3 = false;
     mUsesAtan2_4 = false;
+    mUsesDiscardRewriting = false;
 
     mNumRenderTargets = resources.EXT_draw_buffers ? resources.MaxDrawBuffers : 1;
 
@@ -196,6 +198,11 @@
         attributes += "static " + typeString(type) + " " + decorate(name) + arrayString(type) + " = " + initializer(type) + ";\n";
     }
 
+    if (mUsesDiscardRewriting)
+    {
+        out << "#define ANGLE_USES_DISCARD_REWRITING" << "\n";
+    }
+
     if (shaderType == SH_FRAGMENT_SHADER)
     {
         TExtensionBehavior::const_iterator iter = mContext.extensionBehavior().find("GL_EXT_draw_buffers");
@@ -1299,15 +1306,31 @@
       case EOpMatrixTimesVector: outputTriplet(visit, "mul(transpose(", "), ", ")"); break;
       case EOpMatrixTimesMatrix: outputTriplet(visit, "transpose(mul(transpose(", "), transpose(", ")))"); break;
       case EOpLogicalOr:
-        out << "s" << mUnfoldShortCircuit->getNextTemporaryIndex();
-        return false;
+        if (node->getRight()->hasSideEffects())
+        {
+            out << "s" << mUnfoldShortCircuit->getNextTemporaryIndex();
+            return false;
+        }
+        else
+        {
+           outputTriplet(visit, "(", " || ", ")");
+           return true;
+        }
       case EOpLogicalXor:
         mUsesXor = true;
         outputTriplet(visit, "xor(", ", ", ")");
         break;
       case EOpLogicalAnd:
-        out << "s" << mUnfoldShortCircuit->getNextTemporaryIndex();
-        return false;
+        if (node->getRight()->hasSideEffects())
+        {
+            out << "s" << mUnfoldShortCircuit->getNextTemporaryIndex();
+            return false;
+        }
+        else
+        {
+           outputTriplet(visit, "(", " && ", ")");
+           return true;
+        }
       default: UNREACHABLE();
     }
 
@@ -1944,7 +1967,7 @@
     {
         mUnfoldShortCircuit->traverse(node->getCondition());
 
-        out << "if(";
+        out << "if (";
 
         node->getCondition()->traverse(this);
 
@@ -1953,9 +1976,14 @@
         outputLineDirective(node->getLine().first_line);
         out << "{\n";
 
+        bool discard = false;
+
         if (node->getTrueBlock())
         {
             traverseStatements(node->getTrueBlock());
+
+            // Detect true discard
+            discard = (discard || FindDiscard::search(node->getTrueBlock()));
         }
 
         outputLineDirective(node->getLine().first_line);
@@ -1973,6 +2001,15 @@
 
             outputLineDirective(node->getFalseBlock()->getLine().first_line);
             out << ";\n}\n";
+
+            // Detect false discard
+            discard = (discard || FindDiscard::search(node->getFalseBlock()));
+        }
+
+        // ANGLE issue 486: Detect problematic conditional discard
+        if (discard && FindSideEffectRewriting::search(node))
+        {
+            mUsesDiscardRewriting = true;
         }
     }
 
@@ -2070,7 +2107,9 @@
 
     switch (node->getFlowOp())
     {
-      case EOpKill:     outputTriplet(visit, "discard;\n", "", "");  break;
+      case EOpKill:
+        outputTriplet(visit, "discard;\n", "", "");
+        break;
       case EOpBreak:
         if (visit == PreVisit)
         {
@@ -2293,7 +2332,7 @@
 
                 if (!firstLoopFragment)
                 {
-                    out << "if(!Break";
+                    out << "if (!Break";
                     index->traverse(this);
                     out << ") {\n";
                 }
diff --git a/src/compiler/OutputHLSL.h b/src/compiler/OutputHLSL.h
index 49bc137..586a76f 100644
--- a/src/compiler/OutputHLSL.h
+++ b/src/compiler/OutputHLSL.h
@@ -125,6 +125,7 @@
     bool mUsesAtan2_2;
     bool mUsesAtan2_3;
     bool mUsesAtan2_4;
+    bool mUsesDiscardRewriting;
 
     int mNumRenderTargets;
 
diff --git a/src/compiler/UnfoldShortCircuit.cpp b/src/compiler/UnfoldShortCircuit.cpp
index 47f0afc..5cfdd32 100644
--- a/src/compiler/UnfoldShortCircuit.cpp
+++ b/src/compiler/UnfoldShortCircuit.cpp
@@ -31,6 +31,14 @@
 {
     TInfoSinkBase &out = mOutputHLSL->getBodyStream();
 
+    // If our right node doesn't have side effects, we know we don't need to unfold this
+    // expression: there will be no short-circuiting side effects to avoid
+    // (note: unfolding doesn't depend on the left node -- it will always be evaluated)
+    if (!node->getRight()->hasSideEffects())
+    {
+        return true;
+    }
+
     switch (node->getOp())
     {
       case EOpLogicalOr:
@@ -49,7 +57,7 @@
             mTemporaryIndex = i + 1;
             node->getLeft()->traverse(mOutputHLSL);
             out << ";\n";
-            out << "if(!s" << i << ")\n"
+            out << "if (!s" << i << ")\n"
                    "{\n";
             mTemporaryIndex = i + 1;
             node->getRight()->traverse(this);
@@ -80,7 +88,7 @@
             mTemporaryIndex = i + 1;
             node->getLeft()->traverse(mOutputHLSL);
             out << ";\n";
-            out << "if(s" << i << ")\n"
+            out << "if (s" << i << ")\n"
                    "{\n";
             mTemporaryIndex = i + 1;
             node->getRight()->traverse(this);
@@ -115,7 +123,7 @@
 
         mTemporaryIndex = i + 1;
         node->getCondition()->traverse(this);
-        out << "if(";
+        out << "if (";
         mTemporaryIndex = i + 1;
         node->getCondition()->traverse(mOutputHLSL);
         out << ")\n"
diff --git a/src/compiler/ValidateLimitations.cpp b/src/compiler/ValidateLimitations.cpp
index 7b8b1c8..3f3260b 100644
--- a/src/compiler/ValidateLimitations.cpp
+++ b/src/compiler/ValidateLimitations.cpp
@@ -457,7 +457,7 @@
 bool ValidateLimitations::validateOperation(TIntermOperator* node,
                                             TIntermNode* operand) {
     // Check if loop index is modified in the loop body.
-    if (!withinLoopBody() || !node->modifiesState())
+    if (!withinLoopBody() || !node->hasSideEffects())
         return true;
 
     const TIntermSymbol* symbol = operand->getAsSymbolNode();
diff --git a/src/compiler/depgraph/DependencyGraphBuilder.cpp b/src/compiler/depgraph/DependencyGraphBuilder.cpp
index d586cfd..069e963 100644
--- a/src/compiler/depgraph/DependencyGraphBuilder.cpp
+++ b/src/compiler/depgraph/DependencyGraphBuilder.cpp
@@ -94,7 +94,7 @@
 bool TDependencyGraphBuilder::visitBinary(Visit visit, TIntermBinary* intermBinary)
 {
     TOperator op = intermBinary->getOp();
-    if (op == EOpInitialize || intermBinary->modifiesState())
+    if (op == EOpInitialize || intermBinary->hasSideEffects())
         visitAssignment(intermBinary);
     else if (op == EOpLogicalAnd || op == EOpLogicalOr)
         visitLogicalOp(intermBinary);
diff --git a/src/compiler/intermediate.h b/src/compiler/intermediate.h
index 69e3f07..4ddfdab 100644
--- a/src/compiler/intermediate.h
+++ b/src/compiler/intermediate.h
@@ -252,6 +252,8 @@
     TIntermTyped(const TType& t) : type(t)  { }
     virtual TIntermTyped* getAsTyped() { return this; }
 
+    virtual bool hasSideEffects() const = 0;
+
     void setType(const TType& t) { type = t; }
     const TType& getType() const { return type; }
     TType* getTypePointer() { return &type; }
@@ -354,6 +356,8 @@
     TIntermSymbol(int i, const TString& sym, const TType& t) : 
             TIntermTyped(t), id(i)  { symbol = sym; originalSymbol = sym; } 
 
+    virtual bool hasSideEffects() const { return false; }
+
     int getId() const { return id; }
     const TString& getSymbol() const { return symbol; }
 
@@ -376,6 +380,8 @@
 public:
     TIntermConstantUnion(ConstantUnion *unionPointer, const TType& t) : TIntermTyped(t), unionArrayPointer(unionPointer) { }
 
+    virtual bool hasSideEffects() const { return false; }
+
     ConstantUnion* getUnionArrayPointer() const { return unionArrayPointer; }
     
     int getIConst(int index) const { return unionArrayPointer ? unionArrayPointer[index].getIConst() : 0; }
@@ -400,7 +406,7 @@
     TOperator getOp() const { return op; }
     void setOp(TOperator o) { op = o; }
 
-    bool modifiesState() const;
+    virtual bool hasSideEffects() const;
     bool isConstructor() const;
 
 protected:
@@ -421,6 +427,8 @@
     virtual bool replaceChildNode(
         TIntermNode *original, TIntermNode *replacement);
 
+    virtual bool hasSideEffects() const { return (TIntermOperator::hasSideEffects() || left->hasSideEffects() || right->hasSideEffects()); }
+
     void setLeft(TIntermTyped* n) { left = n; }
     void setRight(TIntermTyped* n) { right = n; }
     TIntermTyped* getLeft() const { return left; }
@@ -451,6 +459,8 @@
     virtual bool replaceChildNode(
         TIntermNode *original, TIntermNode *replacement);
 
+    virtual bool hasSideEffects() const { return (TIntermOperator::hasSideEffects() || operand->hasSideEffects()); }
+
     void setOperand(TIntermTyped* o) { operand = o; }
     TIntermTyped* getOperand() { return operand; }    
     bool promote(TInfoSink&);
@@ -483,6 +493,9 @@
     virtual bool replaceChildNode(
         TIntermNode *original, TIntermNode *replacement);
 
+    // Conservatively assume function calls and other aggregate operators have side-effects
+    virtual bool hasSideEffects() const { return true; }
+
     TIntermSequence& getSequence() { return sequence; }
 
     void setName(const TString& n) { name = n; }
@@ -528,6 +541,9 @@
     virtual bool replaceChildNode(
         TIntermNode *original, TIntermNode *replacement);
 
+    // Conservatively assume selections have side-effects
+    virtual bool hasSideEffects() const { return true; }
+
     bool usesTernaryOperator() const { return getBasicType() != EbtVoid; }
     TIntermNode* getCondition() const { return condition; }
     TIntermNode* getTrueBlock() const { return trueBlock; }
diff --git a/src/compiler/translator.vcxproj b/src/compiler/translator.vcxproj
index 7325172..54d71a1 100644
--- a/src/compiler/translator.vcxproj
+++ b/src/compiler/translator.vcxproj
@@ -263,6 +263,7 @@
     <ClInclude Include="localintermediate.h" />
     <ClInclude Include="MapLongVariableNames.h" />
     <ClInclude Include="MMap.h" />
+    <ClInclude Include="NodeSearch.h" />
     <ClInclude Include="osinclude.h" />
     <ClInclude Include="OutputESSL.h" />
     <ClInclude Include="OutputGLSL.h" />
diff --git a/src/compiler/translator.vcxproj.filters b/src/compiler/translator.vcxproj.filters
index b8cd730..1df6c7c 100644
--- a/src/compiler/translator.vcxproj.filters
+++ b/src/compiler/translator.vcxproj.filters
@@ -346,6 +346,9 @@
     <ClInclude Include="ParseContext.h">
       <Filter>Header Files</Filter>
     </ClInclude>
+    <ClInclude Include="NodeSearch.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <CustomBuild Include="glslang.l">
diff --git a/src/libGLESv2/ProgramBinary.cpp b/src/libGLESv2/ProgramBinary.cpp
index 922cd3d..ee0ec8e 100644
--- a/src/libGLESv2/ProgramBinary.cpp
+++ b/src/libGLESv2/ProgramBinary.cpp
@@ -34,6 +34,11 @@
     return buffer;
 }
 
+static rx::D3DWorkaroundType DiscardWorkaround(bool usesDiscard)
+{
+    return (usesDiscard ? rx::ANGLE_D3D_WORKAROUND_SM3_OPTIMIZER : rx::ANGLE_D3D_WORKAROUND_NONE);
+}
+
 UniformLocation::UniformLocation(const std::string &name, unsigned int element, unsigned int index) 
     : name(name), element(element), index(index)
 {
@@ -1962,13 +1967,13 @@
 
     if (success)
     {
-        mVertexExecutable = mRenderer->compileToExecutable(infoLog, vertexHLSL.c_str(), rx::SHADER_VERTEX);
-        mPixelExecutable = mRenderer->compileToExecutable(infoLog, pixelHLSL.c_str(), rx::SHADER_PIXEL);
+        mVertexExecutable = mRenderer->compileToExecutable(infoLog, vertexHLSL.c_str(), rx::SHADER_VERTEX, DiscardWorkaround(vertexShader->mUsesDiscardRewriting));
+        mPixelExecutable = mRenderer->compileToExecutable(infoLog, pixelHLSL.c_str(), rx::SHADER_PIXEL, DiscardWorkaround(fragmentShader->mUsesDiscardRewriting));
 
         if (usesGeometryShader())
         {
             std::string geometryHLSL = generateGeometryShaderHLSL(registers, packing, fragmentShader, vertexShader);
-            mGeometryExecutable = mRenderer->compileToExecutable(infoLog, geometryHLSL.c_str(), rx::SHADER_GEOMETRY);
+            mGeometryExecutable = mRenderer->compileToExecutable(infoLog, geometryHLSL.c_str(), rx::SHADER_GEOMETRY, rx::ANGLE_D3D_WORKAROUND_NONE);
         }
 
         if (!mVertexExecutable || !mPixelExecutable || (usesGeometryShader() && !mGeometryExecutable))
diff --git a/src/libGLESv2/Shader.cpp b/src/libGLESv2/Shader.cpp
index 7dfdd0b..f6a2f03 100644
--- a/src/libGLESv2/Shader.cpp
+++ b/src/libGLESv2/Shader.cpp
@@ -307,6 +307,7 @@
         mUsesPointCoord = strstr(mHlsl, "GL_USES_POINT_COORD") != NULL;
         mUsesDepthRange = strstr(mHlsl, "GL_USES_DEPTH_RANGE") != NULL;
         mUsesFragDepth = strstr(mHlsl, "GL_USES_FRAG_DEPTH") != NULL;
+        mUsesDiscardRewriting = strstr(mHlsl, "ANGLE_USES_DISCARD_REWRITING") != NULL;
     }
 }
 
@@ -340,6 +341,7 @@
     mUsesPointCoord = false;
     mUsesDepthRange = false;
     mUsesFragDepth = false;
+    mUsesDiscardRewriting = false;
 
     mActiveUniforms.clear();
 }
diff --git a/src/libGLESv2/Shader.h b/src/libGLESv2/Shader.h
index 2afe297..848d4c9 100644
--- a/src/libGLESv2/Shader.h
+++ b/src/libGLESv2/Shader.h
@@ -107,6 +107,7 @@
     bool mUsesPointCoord;
     bool mUsesDepthRange;
     bool mUsesFragDepth;
+    bool mUsesDiscardRewriting;
 
     static void *mFragmentCompiler;
     static void *mVertexCompiler;
diff --git a/src/libGLESv2/renderer/Renderer.h b/src/libGLESv2/renderer/Renderer.h
index 04e877b..e1861f0 100644
--- a/src/libGLESv2/renderer/Renderer.h
+++ b/src/libGLESv2/renderer/Renderer.h
@@ -93,6 +93,12 @@
     SHADER_GEOMETRY
 };
 
+enum D3DWorkaroundType
+{
+    ANGLE_D3D_WORKAROUND_NONE,
+    ANGLE_D3D_WORKAROUND_SM3_OPTIMIZER
+};
+
 class Renderer
 {
   public:
@@ -207,7 +213,7 @@
 
     // Shader operations
     virtual ShaderExecutable *loadExecutable(const void *function, size_t length, rx::ShaderType type) = 0;
-    virtual ShaderExecutable *compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type) = 0;
+    virtual ShaderExecutable *compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type, D3DWorkaroundType workaround) = 0;
 
     // Image operations
     virtual Image *createImage() = 0;
diff --git a/src/libGLESv2/renderer/Renderer11.cpp b/src/libGLESv2/renderer/Renderer11.cpp
index 150e14c..13ba355 100644
--- a/src/libGLESv2/renderer/Renderer11.cpp
+++ b/src/libGLESv2/renderer/Renderer11.cpp
@@ -2830,7 +2830,7 @@
     return executable;
 }
 
-ShaderExecutable *Renderer11::compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type)
+ShaderExecutable *Renderer11::compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type, D3DWorkaroundType workaround)
 {
     const char *profile = NULL;
 
diff --git a/src/libGLESv2/renderer/Renderer11.h b/src/libGLESv2/renderer/Renderer11.h
index 90ef04d..b86f485 100644
--- a/src/libGLESv2/renderer/Renderer11.h
+++ b/src/libGLESv2/renderer/Renderer11.h
@@ -155,7 +155,7 @@
 
     // Shader operations
     virtual ShaderExecutable *loadExecutable(const void *function, size_t length, rx::ShaderType type);
-    virtual ShaderExecutable *compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type);
+    virtual ShaderExecutable *compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type, D3DWorkaroundType workaround);
 
     // Image operations
     virtual Image *createImage();
diff --git a/src/libGLESv2/renderer/Renderer9.cpp b/src/libGLESv2/renderer/Renderer9.cpp
index d3f3814..e89f8bd 100644
--- a/src/libGLESv2/renderer/Renderer9.cpp
+++ b/src/libGLESv2/renderer/Renderer9.cpp
@@ -3129,7 +3129,7 @@
     return executable;
 }
 
-ShaderExecutable *Renderer9::compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type)
+ShaderExecutable *Renderer9::compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type, D3DWorkaroundType workaround)
 {
     const char *profile = NULL;
 
@@ -3146,7 +3146,11 @@
         return NULL;
     }
 
-    ID3DBlob *binary = (ID3DBlob*)compileToBinary(infoLog, shaderHLSL, profile, ANGLE_COMPILE_OPTIMIZATION_LEVEL, true);
+    // ANGLE issue 486:
+    // Work-around a D3D9 compiler bug that presents itself when using conditional discard, by disabling optimization
+    UINT optimizationFlags = (workaround == ANGLE_D3D_WORKAROUND_SM3_OPTIMIZER ? D3DCOMPILE_SKIP_OPTIMIZATION : ANGLE_COMPILE_OPTIMIZATION_LEVEL);
+
+    ID3DBlob *binary = (ID3DBlob*)compileToBinary(infoLog, shaderHLSL, profile, optimizationFlags, true);
     if (!binary)
         return NULL;
 
diff --git a/src/libGLESv2/renderer/Renderer9.h b/src/libGLESv2/renderer/Renderer9.h
index f8932c4..afc2dc1 100644
--- a/src/libGLESv2/renderer/Renderer9.h
+++ b/src/libGLESv2/renderer/Renderer9.h
@@ -170,7 +170,7 @@
 
     // Shader operations
     virtual ShaderExecutable *loadExecutable(const void *function, size_t length, rx::ShaderType type);
-    virtual ShaderExecutable *compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type);
+    virtual ShaderExecutable *compileToExecutable(gl::InfoLog &infoLog, const char *shaderHLSL, rx::ShaderType type, D3DWorkaroundType workaround);
 
     // Image operations
     virtual Image *createImage();