Add a fast path for single render target masked clears in Renderer11.

TRAC #22898

Signed-off-by: Geoff Lang
Signed-off-by: Shannon Woods
Author: Jamie Madill

Conflicts:
	src/libGLESv2/libGLESv2.vcxproj.filters

git-svn-id: https://angleproject.googlecode.com/svn/branches/dx11proto@2219 736b8ea6-26fd-11df-bfd4-992fa37f6226
diff --git a/src/libGLESv2/libGLESv2.vcxproj b/src/libGLESv2/libGLESv2.vcxproj
index 55e321b..ca0d6b5 100644
--- a/src/libGLESv2/libGLESv2.vcxproj
+++ b/src/libGLESv2/libGLESv2.vcxproj
@@ -357,8 +357,9 @@
     <ClInclude Include="renderer\ShaderExecutable.h" />

     <ClInclude Include="renderer\ShaderExecutable11.h" />

     <ClInclude Include="renderer\ShaderExecutable9.h" />

-    <ClInclude Include="renderer\shaders\compiled\clear11ps.h" />

     <ClInclude Include="renderer\shaders\compiled\clear11vs.h" />

+    <ClInclude Include="renderer\shaders\compiled\clearmultiple11ps.h" />

+    <ClInclude Include="renderer\shaders\compiled\clearsingle11ps.h" />

     <ClInclude Include="renderer\shaders\compiled\componentmaskps.h" />

     <ClInclude Include="renderer\shaders\compiled\flipyvs.h" />

     <ClInclude Include="renderer\shaders\compiled\luminanceps.h" />

diff --git a/src/libGLESv2/libGLESv2.vcxproj.filters b/src/libGLESv2/libGLESv2.vcxproj.filters
index ebbd6c5..34f9704 100644
--- a/src/libGLESv2/libGLESv2.vcxproj.filters
+++ b/src/libGLESv2/libGLESv2.vcxproj.filters
@@ -325,10 +325,13 @@
     <ClInclude Include="renderer\shaders\compiled\passthroughlumalpha11ps.h">

       <Filter>Shaders\Compiled</Filter>

     </ClInclude>

-    <ClInclude Include="renderer\shaders\compiled\clear11ps.h">

+    <ClInclude Include="renderer\shaders\compiled\clear11vs.h">

       <Filter>Shaders\Compiled</Filter>

     </ClInclude>

-    <ClInclude Include="renderer\shaders\compiled\clear11vs.h">

+    <ClInclude Include="renderer\shaders\compiled\clearmultiple11ps.h">

+      <Filter>Shaders\Compiled</Filter>

+    </ClInclude>

+    <ClInclude Include="renderer\shaders\compiled\clearsingle11ps.h">

       <Filter>Shaders\Compiled</Filter>

     </ClInclude>

     <ClInclude Include="..\common\system.h">

diff --git a/src/libGLESv2/renderer/Renderer11.cpp b/src/libGLESv2/renderer/Renderer11.cpp
index 2a868cc..9abbc7d 100644
--- a/src/libGLESv2/renderer/Renderer11.cpp
+++ b/src/libGLESv2/renderer/Renderer11.cpp
@@ -35,7 +35,8 @@
 #include "libGLESv2/renderer/shaders/compiled/passthroughlumalpha11ps.h"
 
 #include "libGLESv2/renderer/shaders/compiled/clear11vs.h"
-#include "libGLESv2/renderer/shaders/compiled/clear11ps.h"
+#include "libGLESv2/renderer/shaders/compiled/clearsingle11ps.h"
+#include "libGLESv2/renderer/shaders/compiled/clearmultiple11ps.h"
 
 #include "libEGL/Display.h"
 
@@ -87,7 +88,8 @@
     mClearVB = NULL;
     mClearIL = NULL;
     mClearVS = NULL;
-    mClearPS = NULL;
+    mClearSinglePS = NULL;
+    mClearMultiplePS = NULL;
     mClearScissorRS = NULL;
     mClearNoScissorRS = NULL;
 
@@ -1467,7 +1469,7 @@
 
      if (needMaskedColorClear || needMaskedStencilClear || needScissoredClear)
      {
-         maskedClear(clearParams);
+         maskedClear(clearParams, frameBuffer->usingExtendedDrawBuffers());
      }
      else
      {
@@ -1545,13 +1547,13 @@
     }
 }
 
-void Renderer11::maskedClear(const gl::ClearParameters &clearParams)
+void Renderer11::maskedClear(const gl::ClearParameters &clearParams, bool usingExtendedDrawBuffers)
 {
     HRESULT result;
 
     if (!mClearResourcesInitialized)
     {
-        ASSERT(!mClearVB && !mClearVS && !mClearPS && !mClearScissorRS && !mClearNoScissorRS);
+        ASSERT(!mClearVB && !mClearVS && !mClearSinglePS && !mClearMultiplePS && !mClearScissorRS && !mClearNoScissorRS);
 
         D3D11_BUFFER_DESC vbDesc;
         vbDesc.ByteWidth = sizeof(d3d11::PositionDepthColorVertex) * 4;
@@ -1579,9 +1581,13 @@
         ASSERT(SUCCEEDED(result));
         d3d11::SetDebugName(mClearVS, "Renderer11 masked clear vertex shader");
 
-        result = mDevice->CreatePixelShader(g_PS_Clear, sizeof(g_PS_Clear), NULL, &mClearPS);
+        result = mDevice->CreatePixelShader(g_PS_ClearSingle, sizeof(g_PS_ClearSingle), NULL, &mClearSinglePS);
         ASSERT(SUCCEEDED(result));
-        d3d11::SetDebugName(mClearPS, "Renderer11 masked clear pixel shader");
+        d3d11::SetDebugName(mClearSinglePS, "Renderer11 masked clear pixel shader (1 RT)");
+
+        result = mDevice->CreatePixelShader(g_PS_ClearMultiple, sizeof(g_PS_ClearMultiple), NULL, &mClearMultiplePS);
+        ASSERT(SUCCEEDED(result));
+        d3d11::SetDebugName(mClearMultiplePS, "Renderer11 masked clear pixel shader (MRT)");
 
         D3D11_RASTERIZER_DESC rsScissorDesc;
         rsScissorDesc.FillMode = D3D11_FILL_SOLID;
@@ -1688,9 +1694,11 @@
     mDeviceContext->RSSetState(mScissorEnabled ? mClearScissorRS : mClearNoScissorRS);
 
     // Apply shaders
+    ID3D11PixelShader *pixelShader = usingExtendedDrawBuffers ? mClearMultiplePS : mClearSinglePS;
+
     mDeviceContext->IASetInputLayout(mClearIL);
     mDeviceContext->VSSetShader(mClearVS, NULL, 0);
-    mDeviceContext->PSSetShader(mClearPS, NULL, 0);
+    mDeviceContext->PSSetShader(pixelShader, NULL, 0);
     mDeviceContext->GSSetShader(NULL, NULL, 0);
 
     // Apply vertex buffer
@@ -1775,7 +1783,8 @@
     SafeRelease(mClearVB);
     SafeRelease(mClearIL);
     SafeRelease(mClearVS);
-    SafeRelease(mClearPS);
+    SafeRelease(mClearSinglePS);
+    SafeRelease(mClearMultiplePS);
     SafeRelease(mClearScissorRS);
     SafeRelease(mClearNoScissorRS);
 
diff --git a/src/libGLESv2/renderer/Renderer11.h b/src/libGLESv2/renderer/Renderer11.h
index a3e42e9..719eb1f 100644
--- a/src/libGLESv2/renderer/Renderer11.h
+++ b/src/libGLESv2/renderer/Renderer11.h
@@ -192,7 +192,7 @@
                          GLenum format, GLenum type, GLsizei outputPitch, bool packReverseRowOrder,
                          GLint packAlignment, void *pixels);
 
-    void maskedClear(const gl::ClearParameters &clearParams);
+    void maskedClear(const gl::ClearParameters &clearParams, bool usingExtendedDrawBuffers);
     rx::Range getViewportBounds() const;
 
     bool blitRenderbufferRect(const gl::Rectangle &readRect, const gl::Rectangle &drawRect, RenderTarget *readRenderTarget, 
@@ -325,7 +325,8 @@
     ID3D11Buffer *mClearVB;
     ID3D11InputLayout *mClearIL;
     ID3D11VertexShader *mClearVS;
-    ID3D11PixelShader *mClearPS;
+    ID3D11PixelShader *mClearSinglePS;
+    ID3D11PixelShader *mClearMultiplePS;
     ID3D11RasterizerState *mClearScissorRS;
     ID3D11RasterizerState *mClearNoScissorRS;
 
diff --git a/src/libGLESv2/renderer/shaders/Clear11.hlsl b/src/libGLESv2/renderer/shaders/Clear11.hlsl
index d275260..042ac69 100644
--- a/src/libGLESv2/renderer/shaders/Clear11.hlsl
+++ b/src/libGLESv2/renderer/shaders/Clear11.hlsl
@@ -6,7 +6,7 @@
 }
 
 // Assume we are in SM4+, which has 8 color outputs
-struct PS_Output
+struct PS_OutputMultiple
 {
 	float4 color0 : SV_TARGET0;
 	float4 color1 : SV_TARGET1;
@@ -18,9 +18,9 @@
 	float4 color7 : SV_TARGET7;
 };
 
-PS_Output PS_Clear(in float4 inPosition : SV_POSITION, in float4 inColor : COLOR)
+PS_OutputMultiple PS_ClearMultiple(in float4 inPosition : SV_POSITION, in float4 inColor : COLOR)
 {
-	PS_Output outColor;
+	PS_OutputMultiple outColor;
 	outColor.color0 = inColor;
 	outColor.color1 = inColor;
 	outColor.color2 = inColor;
@@ -31,3 +31,8 @@
 	outColor.color7 = inColor;
 	return outColor;
 }
+
+float4 PS_ClearSingle(in float4 inPosition : SV_Position, in float4 inColor : COLOR) : SV_Target0
+{
+	return inColor;
+}
diff --git a/src/libGLESv2/renderer/shaders/compiled/clear11ps.h b/src/libGLESv2/renderer/shaders/compiled/clearmultiple11ps.h
similarity index 97%
rename from src/libGLESv2/renderer/shaders/compiled/clear11ps.h
rename to src/libGLESv2/renderer/shaders/compiled/clearmultiple11ps.h
index c036a69..c70eebb 100644
--- a/src/libGLESv2/renderer/shaders/compiled/clear11ps.h
+++ b/src/libGLESv2/renderer/shaders/compiled/clearmultiple11ps.h
@@ -3,7 +3,8 @@
 // Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111

 //

 //

-//   fxc /E PS_Clear /T ps_4_0 /Fh compiled/clear11ps.h Clear11.hlsl

+//   fxc /E PS_ClearMultiple /T ps_4_0 /Fh compiled/clearmultiple11ps.h

+//    Clear11.hlsl

 //

 //

 //

@@ -50,7 +51,7 @@
 // Approximately 9 instruction slots used

 #endif

 

-const BYTE g_PS_Clear[] =

+const BYTE g_PS_ClearMultiple[] =

 {

      68,  88,  66,  67, 146, 246, 

     236, 240,  50,  40,  87,  55, 

diff --git a/src/libGLESv2/renderer/shaders/compiled/clearsingle11ps.h b/src/libGLESv2/renderer/shaders/compiled/clearsingle11ps.h
new file mode 100644
index 0000000..20395e2
--- /dev/null
+++ b/src/libGLESv2/renderer/shaders/compiled/clearsingle11ps.h
@@ -0,0 +1,113 @@
+#if 0

+//

+// Generated by Microsoft (R) HLSL Shader Compiler 9.29.952.3111

+//

+//

+//   fxc /E PS_ClearSingle /T ps_4_0 /Fh compiled/clearsingle11ps.h

+//    Clear11.hlsl

+//

+//

+//

+// Input signature:

+//

+// Name                 Index   Mask Register SysValue Format   Used

+// -------------------- ----- ------ -------- -------- ------ ------

+// SV_Position              0   xyzw        0      POS  float       

+// COLOR                    0   xyzw        1     NONE  float   xyzw

+//

+//

+// Output signature:

+//

+// Name                 Index   Mask Register SysValue Format   Used

+// -------------------- ----- ------ -------- -------- ------ ------

+// SV_Target                0   xyzw        0   TARGET  float   xyzw

+//

+ps_4_0

+dcl_input_ps linear v1.xyzw

+dcl_output o0.xyzw

+mov o0.xyzw, v1.xyzw

+ret 

+// Approximately 2 instruction slots used

+#endif

+

+const BYTE g_PS_ClearSingle[] =

+{

+     68,  88,  66,  67,  11,  49, 

+    220, 157,  35, 106, 175, 161, 

+    180, 178, 147, 150, 134, 162, 

+    222,  79,   1,   0,   0,   0, 

+    208,   1,   0,   0,   5,   0, 

+      0,   0,  52,   0,   0,   0, 

+    140,   0,   0,   0, 224,   0, 

+      0,   0,  20,   1,   0,   0, 

+     84,   1,   0,   0,  82,  68, 

+     69,  70,  80,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+     28,   0,   0,   0,   0,   4, 

+    255, 255,   0,   1,   0,   0, 

+     28,   0,   0,   0,  77, 105, 

+     99, 114, 111, 115, 111, 102, 

+    116,  32,  40,  82,  41,  32, 

+     72,  76,  83,  76,  32,  83, 

+    104,  97, 100, 101, 114,  32, 

+     67, 111, 109, 112, 105, 108, 

+    101, 114,  32,  57,  46,  50, 

+     57,  46,  57,  53,  50,  46, 

+     51,  49,  49,  49,   0, 171, 

+    171, 171,  73,  83,  71,  78, 

+     76,   0,   0,   0,   2,   0, 

+      0,   0,   8,   0,   0,   0, 

+     56,   0,   0,   0,   0,   0, 

+      0,   0,   1,   0,   0,   0, 

+      3,   0,   0,   0,   0,   0, 

+      0,   0,  15,   0,   0,   0, 

+     68,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      3,   0,   0,   0,   1,   0, 

+      0,   0,  15,  15,   0,   0, 

+     83,  86,  95,  80, 111, 115, 

+    105, 116, 105, 111, 110,   0, 

+     67,  79,  76,  79,  82,   0, 

+    171, 171,  79,  83,  71,  78, 

+     44,   0,   0,   0,   1,   0, 

+      0,   0,   8,   0,   0,   0, 

+     32,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      3,   0,   0,   0,   0,   0, 

+      0,   0,  15,   0,   0,   0, 

+     83,  86,  95,  84,  97, 114, 

+    103, 101, 116,   0, 171, 171, 

+     83,  72,  68,  82,  56,   0, 

+      0,   0,  64,   0,   0,   0, 

+     14,   0,   0,   0,  98,  16, 

+      0,   3, 242,  16,  16,   0, 

+      1,   0,   0,   0, 101,   0, 

+      0,   3, 242,  32,  16,   0, 

+      0,   0,   0,   0,  54,   0, 

+      0,   5, 242,  32,  16,   0, 

+      0,   0,   0,   0,  70,  30, 

+     16,   0,   1,   0,   0,   0, 

+     62,   0,   0,   1,  83,  84, 

+     65,  84, 116,   0,   0,   0, 

+      2,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      2,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   1,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   1,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0,   0,   0,   0,   0, 

+      0,   0

+};

diff --git a/src/libGLESv2/renderer/shaders/generate_shaders.bat b/src/libGLESv2/renderer/shaders/generate_shaders.bat
index a90c3bc..04ef136 100644
--- a/src/libGLESv2/renderer/shaders/generate_shaders.bat
+++ b/src/libGLESv2/renderer/shaders/generate_shaders.bat
@@ -20,4 +20,5 @@
 fxc /E PS_PassthroughLumAlpha /T ps_4_0 /Fh compiled/passthroughlumalpha11ps.h Passthrough11.hlsl

 

 fxc /E VS_Clear /T vs_4_0 /Fh compiled/clear11vs.h Clear11.hlsl

-fxc /E PS_Clear /T ps_4_0 /Fh compiled/clear11ps.h Clear11.hlsl

+fxc /E PS_ClearSingle /T ps_4_0 /Fh compiled/clearsingle11ps.h Clear11.hlsl

+fxc /E PS_ClearMultiple /T ps_4_0 /Fh compiled/clearmultiple11ps.h Clear11.hlsl