Improve D3D11 varying packing when there are more varyings than registers.

BUG=angle:738

Change-Id: I0599840fc79d571230acf26105d512322bcffdcd
Reviewed-on: https://chromium-review.googlesource.com/214108
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Tested-by: Geoff Lang <geofflang@chromium.org>
diff --git a/src/libGLESv2/ProgramBinary.cpp b/src/libGLESv2/ProgramBinary.cpp
index 2e755df..1cc55d2 100644
--- a/src/libGLESv2/ProgramBinary.cpp
+++ b/src/libGLESv2/ProgramBinary.cpp
@@ -1072,6 +1072,7 @@
                 }
 
                 output->registerIndex = input->registerIndex;
+                output->columnIndex = input->columnIndex;
 
                 matched = true;
                 break;
diff --git a/src/libGLESv2/Shader.h b/src/libGLESv2/Shader.h
index f28b805..7ba3bd1 100644
--- a/src/libGLESv2/Shader.h
+++ b/src/libGLESv2/Shader.h
@@ -35,10 +35,12 @@
 struct PackedVarying : public sh::Varying
 {
     unsigned int registerIndex; // Assigned during link
+    unsigned int columnIndex; // Assigned during link, defaults to 0
 
     PackedVarying(const sh::Varying &varying)
       : sh::Varying(varying),
-        registerIndex(GL_INVALID_INDEX)
+        registerIndex(GL_INVALID_INDEX),
+        columnIndex(0)
     {}
 
     bool registerAssigned() const { return registerIndex != GL_INVALID_INDEX; }
diff --git a/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp b/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp
index c91008a..13411eb 100644
--- a/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp
+++ b/src/libGLESv2/renderer/d3d/DynamicHLSL.cpp
@@ -126,6 +126,7 @@
             if (available)
             {
                 varying->registerIndex = r;
+                varying->columnIndex = 0;
 
                 for (int y = 0; y < registers; y++)
                 {
@@ -159,6 +160,7 @@
                 if (available)
                 {
                     varying->registerIndex = r;
+                    varying->columnIndex = 2;
 
                     for (int y = 0; y < registers; y++)
                     {
@@ -189,7 +191,7 @@
 
         for (int x = 0; x < 4; x++)
         {
-            if (space[x] >= registers && space[x] < space[column])
+            if (space[x] >= registers && (space[column] < registers || space[x] < space[column]))
             {
                 column = x;
             }
@@ -202,6 +204,7 @@
                 if (!packing[r][column])
                 {
                     varying->registerIndex = r;
+                    varying->columnIndex = column;
 
                     for (int y = r; y < r + registers; y++)
                     {
@@ -320,6 +323,10 @@
             {
                 for (int row = 0; row < variableRows; row++)
                 {
+                    // TODO: Add checks to ensure D3D interpolation modifiers don't result in too many registers being used.
+                    // For example, if there are N registers, and we have N vec3 varyings and 1 float varying, then D3D will pack them into N registers.
+                    // If the float varying has the 'nointerpolation' modifier on it then we would need N + 1 registers, and D3D compilation will fail.
+
                     switch (varying.interpolation)
                     {
                       case sh::INTERPOLATION_SMOOTH:   varyingHLSL += "    ";                 break;
@@ -328,7 +335,7 @@
                       default:  UNREACHABLE();
                     }
 
-                    unsigned int semanticIndex = elementIndex * variableRows + varying.registerIndex + row;
+                    unsigned int semanticIndex = elementIndex * variableRows + varying.columnIndex * mRenderer->getRendererCaps().maxVaryingVectors + varying.registerIndex + row;
                     std::string n = Str(semanticIndex);
 
                     std::string typeString;
@@ -765,39 +772,9 @@
 
                 for (int row = 0; row < variableRows; row++)
                 {
-                    int r = varying.registerIndex + elementIndex * variableRows + row;
+                    int r = varying.registerIndex + varying.columnIndex * mRenderer->getRendererCaps().maxVaryingVectors + elementIndex * variableRows + row;
                     vertexHLSL += "    output.v" + Str(r);
 
-                    bool sharedRegister = false;   // Register used by multiple varyings
-
-                    for (int x = 0; x < 4; x++)
-                    {
-                        if (packing[r][x] && packing[r][x] != packing[r][0])
-                        {
-                            sharedRegister = true;
-                            break;
-                        }
-                    }
-
-                    if(sharedRegister)
-                    {
-                        vertexHLSL += ".";
-
-                        for (int x = 0; x < 4; x++)
-                        {
-                            if (packing[r][x] == &varying)
-                            {
-                                switch(x)
-                                {
-                                  case 0: vertexHLSL += "x"; break;
-                                  case 1: vertexHLSL += "y"; break;
-                                  case 2: vertexHLSL += "z"; break;
-                                  case 3: vertexHLSL += "w"; break;
-                                }
-                            }
-                        }
-                    }
-
                     vertexHLSL += " = _" + varying.name;
 
                     if (varying.isArray())
@@ -943,7 +920,7 @@
                 int variableRows = (varying.isStruct() ? 1 : VariableRowCount(transposedType));
                 for (int row = 0; row < variableRows; row++)
                 {
-                    std::string n = Str(varying.registerIndex + elementIndex * variableRows + row);
+                    std::string n = Str(varying.registerIndex + varying.columnIndex * mRenderer->getRendererCaps().maxVaryingVectors + elementIndex * variableRows + row);
                     pixelHLSL += "    _" + varying.name;
 
                     if (varying.isArray())
diff --git a/tests/angle_tests/GLSLTest.cpp b/tests/angle_tests/GLSLTest.cpp
index b47b3f7..dca8ac2 100644
--- a/tests/angle_tests/GLSLTest.cpp
+++ b/tests/angle_tests/GLSLTest.cpp
@@ -27,6 +27,230 @@
         );
     }
 
+    std::string GenerateVaryingType(GLint vectorSize)
+    {
+        char varyingType[10];
+
+        if (vectorSize == 1)
+        {
+            sprintf(varyingType, "float");
+        }
+        else
+        {
+            sprintf(varyingType, "vec%d", vectorSize);
+        }
+
+        return std::string(varyingType);
+    }
+
+    std::string GenerateVectorVaryingDeclaration(GLint vectorSize, GLint arraySize, GLint id)
+    {
+        char buff[100];
+
+        if (arraySize == 1)
+        {
+            sprintf(buff, "varying %s v%d;\n", GenerateVaryingType(vectorSize).c_str(), id);
+        }
+        else
+        {
+            sprintf(buff, "varying %s v%d[%d];\n", GenerateVaryingType(vectorSize).c_str(), id, arraySize);
+        }
+
+        return std::string(buff);
+    }
+
+    std::string GenerateVectorVaryingSettingCode(GLint vectorSize, GLint arraySize, GLint id)
+    {
+        std::string returnString;
+        char buff[100];
+
+        if (arraySize == 1)
+        {
+            sprintf(buff, "\t v%d = %s(1.0);\n", id, GenerateVaryingType(vectorSize).c_str());
+            returnString += buff;
+        }
+        else
+        {
+            for (int i = 0; i < arraySize; i++)
+            {
+                sprintf(buff, "\t v%d[%d] = %s(1.0);\n", id, i, GenerateVaryingType(vectorSize).c_str());
+                returnString += buff;
+            }
+        }
+
+        return returnString;
+    }
+
+    std::string GenerateVectorVaryingUseCode(GLint arraySize, GLint id)
+    {
+        if (arraySize == 1)
+        {
+            char buff[100];
+            sprintf(buff, "v%d + ", id);
+            return std::string(buff);
+        }
+        else
+        {
+            std::string returnString;
+            for (int i = 0; i < arraySize; i++)
+            {
+                char buff[100];
+                sprintf(buff, "v%d[%d] + ", id, i);
+                returnString += buff;
+            }
+            return returnString;
+        }
+    }
+
+    void GenerateGLSLWithVaryings(GLint floatCount, GLint floatArrayCount, GLint vec2Count, GLint vec2ArrayCount, GLint vec3Count, GLint vec3ArrayCount, std::string* fragmentShader, std::string* vertexShader)
+    {
+        // Generate a string declaring the varyings, to share between the fragment shader and the vertex shader.
+        std::string varyingDeclaration;
+
+        unsigned int varyingCount = 0;
+
+        for (GLint i = 0; i < floatCount; i++)
+        {
+            varyingDeclaration += GenerateVectorVaryingDeclaration(1, 1, varyingCount);
+            varyingCount += 1;
+        }
+
+        for (GLint i = 0; i < floatArrayCount; i++)
+        {
+            varyingDeclaration += GenerateVectorVaryingDeclaration(1, 2, varyingCount);
+            varyingCount += 1;
+        }
+
+        for (GLint i = 0; i < vec2Count; i++)
+        {
+            varyingDeclaration += GenerateVectorVaryingDeclaration(2, 1, varyingCount);
+            varyingCount += 1;
+        }
+
+        for (GLint i = 0; i < vec2ArrayCount; i++)
+        {
+            varyingDeclaration += GenerateVectorVaryingDeclaration(2, 2, varyingCount);
+            varyingCount += 1;
+        }
+
+        for (GLint i = 0; i < vec3Count; i++)
+        {
+            varyingDeclaration += GenerateVectorVaryingDeclaration(3, 1, varyingCount);
+            varyingCount += 1;
+        }
+
+        for (GLint i = 0; i < vec3ArrayCount; i++)
+        {
+            varyingDeclaration += GenerateVectorVaryingDeclaration(3, 2, varyingCount);
+            varyingCount += 1;
+        }
+
+        // Generate the vertex shader
+        vertexShader->clear();
+        vertexShader->append(varyingDeclaration);
+        vertexShader->append("\nvoid main()\n{\n");
+
+        unsigned int currentVSVarying = 0;
+
+        for (GLint i = 0; i < floatCount; i++)
+        {
+            vertexShader->append(GenerateVectorVaryingSettingCode(1, 1, currentVSVarying));
+            currentVSVarying += 1;
+        }
+
+        for (GLint i = 0; i < floatArrayCount; i++)
+        {
+            vertexShader->append(GenerateVectorVaryingSettingCode(1, 2, currentVSVarying));
+            currentVSVarying += 1;
+        }
+
+        for (GLint i = 0; i < vec2Count; i++)
+        {
+            vertexShader->append(GenerateVectorVaryingSettingCode(2, 1, currentVSVarying));
+            currentVSVarying += 1;
+        }
+
+        for (GLint i = 0; i < vec2ArrayCount; i++)
+        {
+            vertexShader->append(GenerateVectorVaryingSettingCode(2, 2, currentVSVarying));
+            currentVSVarying += 1;
+        }
+
+        for (GLint i = 0; i < vec3Count; i++)
+        {
+            vertexShader->append(GenerateVectorVaryingSettingCode(3, 1, currentVSVarying));
+            currentVSVarying += 1;
+        }
+
+        for (GLint i = 0; i < vec3ArrayCount; i++)
+        {
+            vertexShader->append(GenerateVectorVaryingSettingCode(3, 2, currentVSVarying));
+            currentVSVarying += 1;
+        }
+
+        vertexShader->append("}\n");
+
+        // Generate the fragment shader
+        fragmentShader->clear();
+        fragmentShader->append("precision highp float;\n");
+        fragmentShader->append(varyingDeclaration);
+        fragmentShader->append("\nvoid main() \n{ \n\tvec4 retColor = vec4(0,0,0,0);\n");
+
+        unsigned int currentFSVarying = 0;
+
+        // Make use of the float varyings
+        fragmentShader->append("\tretColor += vec4(");
+
+        for (GLint i = 0; i < floatCount; i++)
+        {
+            fragmentShader->append(GenerateVectorVaryingUseCode(1, currentFSVarying));
+            currentFSVarying += 1;
+        }
+
+        for (GLint i = 0; i < floatArrayCount; i++)
+        {
+            fragmentShader->append(GenerateVectorVaryingUseCode(2, currentFSVarying));
+            currentFSVarying += 1;
+        }
+
+        fragmentShader->append("0.0, 0.0, 0.0, 0.0);\n");
+
+        // Make use of the vec2 varyings
+        fragmentShader->append("\tretColor += vec4(");
+
+        for (GLint i = 0; i < vec2Count; i++)
+        {
+            fragmentShader->append(GenerateVectorVaryingUseCode(1, currentFSVarying));
+            currentFSVarying += 1;
+        }
+
+        for (GLint i = 0; i < vec2ArrayCount; i++)
+        {
+            fragmentShader->append(GenerateVectorVaryingUseCode(2, currentFSVarying));
+            currentFSVarying += 1;
+        }
+
+        fragmentShader->append("vec2(0.0, 0.0), 0.0, 0.0);\n");
+
+        // Make use of the vec3 varyings
+        fragmentShader->append("\tretColor += vec4(");
+
+        for (GLint i = 0; i < vec3Count; i++)
+        {
+            fragmentShader->append(GenerateVectorVaryingUseCode(1, currentFSVarying));
+            currentFSVarying += 1;
+        }
+
+        for (GLint i = 0; i < vec3ArrayCount; i++)
+        {
+            fragmentShader->append(GenerateVectorVaryingUseCode(2, currentFSVarying));
+            currentFSVarying += 1;
+        }
+
+        fragmentShader->append("vec3(0.0, 0.0, 0.0), 0.0);\n");
+        fragmentShader->append("\tgl_FragColor = retColor;\n}");
+    }
+
     std::string mSimpleVSSource;
 };
 
@@ -342,3 +566,157 @@
     GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
     EXPECT_NE(0u, program);
 }
+
+TEST_F(GLSLTest, MaxVaryingVec3)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 0, 0, maxVaryings, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_NE(0u, program);
+}
+
+TEST_F(GLSLTest, MaxVaryingVec3Array)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 0, 0, 0, maxVaryings / 2, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_NE(0u, program);
+}
+
+TEST_F(GLSLTest, MaxVaryingVec3AndOneFloat)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(1, 0, 0, 0, maxVaryings, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_NE(0u, program);
+}
+
+TEST_F(GLSLTest, MaxVaryingVec3ArrayAndOneFloatArray)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 1, 0, 0, 0, maxVaryings / 2, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_NE(0u, program);
+}
+
+TEST_F(GLSLTest, TwiceMaxVaryingVec2)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 2 * maxVaryings, 0, 0, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_NE(0u, program);
+}
+
+TEST_F(GLSLTest, MaxVaryingVec2Arrays)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 0, maxVaryings, 0, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_NE(0u, program);
+}
+
+TEST_F(GLSLTest, MaxPlusOneVaryingVec3)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 0, 0, maxVaryings + 1, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_EQ(0u, program);
+}
+
+TEST_F(GLSLTest, MaxPlusOneVaryingVec3Array)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 0, 0, 0, maxVaryings / 2 + 1, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_EQ(0u, program);
+}
+
+TEST_F(GLSLTest, MaxVaryingVec3AndOneVec2)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 1, 0, maxVaryings, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_EQ(0u, program);
+}
+
+TEST_F(GLSLTest, MaxPlusOneVaryingVec2)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, 0, 2 * maxVaryings + 1, 0, 0, 0, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_EQ(0u, program);
+}
+
+TEST_F(GLSLTest, MaxVaryingVec3ArrayAndMaxPlusOneFloatArray)
+{
+    GLint maxVaryings = 0;
+    glGetIntegerv(GL_MAX_VARYING_VECTORS, &maxVaryings);
+
+    std::string fragmentShaderSource;
+    std::string vertexShaderSource;
+
+    GenerateGLSLWithVaryings(0, maxVaryings / 2 + 1, 0, 0, 0, maxVaryings / 2, &fragmentShaderSource, &vertexShaderSource);
+
+    GLuint program = CompileProgram(vertexShaderSource, fragmentShaderSource);
+    EXPECT_EQ(0u, program);
+}