Add struct index-lookup test.

Somewhat surprisingly, this test is ES2 compliant.

We didn't have any existing tests (AFAICS) which exercised nested
lookups having more than one index expression.

Change-Id: I869edc048744fad3a2a5587c8c9d70fc477c86ae
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/648458
Reviewed-by: Brian Osman <brianosman@google.com>
Auto-Submit: John Stiles <johnstiles@google.com>
diff --git a/gn/sksl_tests.gni b/gn/sksl_tests.gni
index 48179e0..1924247 100644
--- a/gn/sksl_tests.gni
+++ b/gn/sksl_tests.gni
@@ -639,6 +639,7 @@
   "shared/StaticSwitchWithStaticConditionalBreakInsideBlock.sksl",
   "shared/StructArrayFollowedByScalar.sksl",
   "shared/StructComparison.sksl",
+  "shared/StructIndexLookup.sksl",
   "shared/StructMaxDepth.sksl",
   "shared/Structs.sksl",
   "shared/StructsInFunctions.sksl",
diff --git a/resources/sksl/BUILD.bazel b/resources/sksl/BUILD.bazel
index f127ced..670b143 100644
--- a/resources/sksl/BUILD.bazel
+++ b/resources/sksl/BUILD.bazel
@@ -939,6 +939,7 @@
         "shared/StaticSwitchWithStaticConditionalBreakInsideBlock.sksl",
         "shared/StructArrayFollowedByScalar.sksl",
         "shared/StructComparison.sksl",
+        "shared/StructIndexLookup.sksl",
         "shared/StructMaxDepth.sksl",
         "shared/Structs.sksl",
         "shared/StructsInFunctions.sksl",
diff --git a/resources/sksl/shared/StructIndexLookup.sksl b/resources/sksl/shared/StructIndexLookup.sksl
new file mode 100644
index 0000000..349b574
--- /dev/null
+++ b/resources/sksl/shared/StructIndexLookup.sksl
@@ -0,0 +1,45 @@
+uniform half4 colorGreen, colorRed;
+
+struct InnerLUT {
+    float3 values;
+};
+struct OuterLUT {
+    InnerLUT inner[3];
+};
+struct Root {
+    OuterLUT outer[3];
+};
+
+half4 main(float2 coords) {
+    Root data;
+    data.outer[0].inner[0].values = float3(1, 10, 100);
+    data.outer[0].inner[1].values = float3(2, 20, 200);
+    data.outer[0].inner[2].values = float3(3, 30, 300);
+    data.outer[1].inner[0].values = float3(4, 40, 400);
+    data.outer[1].inner[1].values = float3(5, 50, 500);
+    data.outer[1].inner[2].values = float3(6, 60, 600);
+    data.outer[2].inner[0].values = float3(7, 70, 700);
+    data.outer[2].inner[1].values = float3(8, 80, 800);
+    data.outer[2].inner[2].values = float3(9, 90, 900);
+
+    float3 expected = float3(0);
+    for (int i=0; i<3; ++i) {
+        for (int j=0; j<3; ++j) {
+            expected += float3(1, 10, 100);
+
+            // Compare the float3 as a group.
+            if (data.outer[i].inner[j].values != expected) {
+                return colorRed;
+            }
+
+            // Compare the float3 as individual units.
+            for (int k=0; k<3; ++k) {
+                if (data.outer[i].inner[j].values[k] != expected[k]) {
+                    return colorRed;
+                }
+            }
+        }
+    }
+
+    return colorGreen;
+}
diff --git a/tests/SkSLTest.cpp b/tests/SkSLTest.cpp
index 766168a..f0678bc 100644
--- a/tests/SkSLTest.cpp
+++ b/tests/SkSLTest.cpp
@@ -680,6 +680,7 @@
 SKSL_TEST(RP + VM + GPU,     kApiLevel_T, StackingVectorCasts,             "shared/StackingVectorCasts.sksl")
 SKSL_TEST(RP + VM + GPU_ES3, kNever,      StaticSwitch,                    "shared/StaticSwitch.sksl")
 SKSL_TEST(RP + VM + GPU,     kApiLevel_T, StructArrayFollowedByScalar,     "shared/StructArrayFollowedByScalar.sksl")
+SKSL_TEST(RP + VM + GPU,     kNextRelease,StructIndexLookup,               "shared/StructIndexLookup.sksl")
 // TODO(skia:13920): StructComparison currently exposes a bug in SPIR-V codegen.
 // SKSL_TEST(RP /* +GPU_ES3 */, kNever,      StructComparison,                "shared/StructComparison.sksl")
 SKSL_TEST(RP + VM + GPU,     kApiLevel_T, StructsInFunctions,              "shared/StructsInFunctions.sksl")
diff --git a/tests/sksl/shared/StructIndexLookup.asm.frag b/tests/sksl/shared/StructIndexLookup.asm.frag
new file mode 100644
index 0000000..0308175
--- /dev/null
+++ b/tests/sksl/shared/StructIndexLookup.asm.frag
@@ -0,0 +1,237 @@
+OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %_entrypoint_v "_entrypoint" %sk_Clockwise %sk_FragColor
+OpExecutionMode %_entrypoint_v OriginUpperLeft
+OpName %sk_Clockwise "sk_Clockwise"
+OpName %sk_FragColor "sk_FragColor"
+OpName %_UniformBuffer "_UniformBuffer"
+OpMemberName %_UniformBuffer 0 "colorGreen"
+OpMemberName %_UniformBuffer 1 "colorRed"
+OpName %_entrypoint_v "_entrypoint_v"
+OpName %main "main"
+OpName %InnerLUT "InnerLUT"
+OpMemberName %InnerLUT 0 "values"
+OpName %OuterLUT "OuterLUT"
+OpMemberName %OuterLUT 0 "inner"
+OpName %Root "Root"
+OpMemberName %Root 0 "outer"
+OpName %data "data"
+OpName %expected "expected"
+OpName %i "i"
+OpName %j "j"
+OpName %k "k"
+OpDecorate %sk_Clockwise BuiltIn FrontFacing
+OpDecorate %sk_FragColor RelaxedPrecision
+OpDecorate %sk_FragColor Location 0
+OpDecorate %sk_FragColor Index 0
+OpMemberDecorate %_UniformBuffer 0 Offset 0
+OpMemberDecorate %_UniformBuffer 0 RelaxedPrecision
+OpMemberDecorate %_UniformBuffer 1 Offset 16
+OpMemberDecorate %_UniformBuffer 1 RelaxedPrecision
+OpDecorate %_UniformBuffer Block
+OpDecorate %10 Binding 0
+OpDecorate %10 DescriptorSet 0
+OpMemberDecorate %InnerLUT 0 Offset 0
+OpDecorate %_arr_InnerLUT_int_3 ArrayStride 16
+OpMemberDecorate %OuterLUT 0 Offset 0
+OpMemberDecorate %OuterLUT 0 RelaxedPrecision
+OpDecorate %_arr_OuterLUT_int_3 ArrayStride 48
+OpMemberDecorate %Root 0 Offset 0
+OpMemberDecorate %Root 0 RelaxedPrecision
+OpDecorate %117 RelaxedPrecision
+OpDecorate %139 RelaxedPrecision
+OpDecorate %147 RelaxedPrecision
+%bool = OpTypeBool
+%_ptr_Input_bool = OpTypePointer Input %bool
+%sk_Clockwise = OpVariable %_ptr_Input_bool Input
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%sk_FragColor = OpVariable %_ptr_Output_v4float Output
+%_UniformBuffer = OpTypeStruct %v4float %v4float
+%_ptr_Uniform__UniformBuffer = OpTypePointer Uniform %_UniformBuffer
+%10 = OpVariable %_ptr_Uniform__UniformBuffer Uniform
+%void = OpTypeVoid
+%15 = OpTypeFunction %void
+%float_0 = OpConstant %float 0
+%v2float = OpTypeVector %float 2
+%19 = OpConstantComposite %v2float %float_0 %float_0
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%23 = OpTypeFunction %v4float %_ptr_Function_v2float
+%v3float = OpTypeVector %float 3
+%InnerLUT = OpTypeStruct %v3float
+%int = OpTypeInt 32 1
+%int_3 = OpConstant %int 3
+%_arr_InnerLUT_int_3 = OpTypeArray %InnerLUT %int_3
+%OuterLUT = OpTypeStruct %_arr_InnerLUT_int_3
+%_arr_OuterLUT_int_3 = OpTypeArray %OuterLUT %int_3
+%Root = OpTypeStruct %_arr_OuterLUT_int_3
+%_ptr_Function_Root = OpTypePointer Function %Root
+%float_1 = OpConstant %float 1
+%float_10 = OpConstant %float 10
+%float_100 = OpConstant %float 100
+%39 = OpConstantComposite %v3float %float_1 %float_10 %float_100
+%int_0 = OpConstant %int 0
+%_ptr_Function_v3float = OpTypePointer Function %v3float
+%float_2 = OpConstant %float 2
+%float_20 = OpConstant %float 20
+%float_200 = OpConstant %float 200
+%46 = OpConstantComposite %v3float %float_2 %float_20 %float_200
+%int_1 = OpConstant %int 1
+%float_3 = OpConstant %float 3
+%float_30 = OpConstant %float 30
+%float_300 = OpConstant %float 300
+%52 = OpConstantComposite %v3float %float_3 %float_30 %float_300
+%int_2 = OpConstant %int 2
+%float_4 = OpConstant %float 4
+%float_40 = OpConstant %float 40
+%float_400 = OpConstant %float 400
+%58 = OpConstantComposite %v3float %float_4 %float_40 %float_400
+%float_5 = OpConstant %float 5
+%float_50 = OpConstant %float 50
+%float_500 = OpConstant %float 500
+%63 = OpConstantComposite %v3float %float_5 %float_50 %float_500
+%float_6 = OpConstant %float 6
+%float_60 = OpConstant %float 60
+%float_600 = OpConstant %float 600
+%68 = OpConstantComposite %v3float %float_6 %float_60 %float_600
+%float_7 = OpConstant %float 7
+%float_70 = OpConstant %float 70
+%float_700 = OpConstant %float 700
+%73 = OpConstantComposite %v3float %float_7 %float_70 %float_700
+%float_8 = OpConstant %float 8
+%float_80 = OpConstant %float 80
+%float_800 = OpConstant %float 800
+%78 = OpConstantComposite %v3float %float_8 %float_80 %float_800
+%float_9 = OpConstant %float 9
+%float_90 = OpConstant %float 90
+%float_900 = OpConstant %float 900
+%83 = OpConstantComposite %v3float %float_9 %float_90 %float_900
+%86 = OpConstantComposite %v3float %float_0 %float_0 %float_0
+%_ptr_Function_int = OpTypePointer Function %int
+%v3bool = OpTypeVector %bool 3
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%_entrypoint_v = OpFunction %void None %15
+%16 = OpLabel
+%20 = OpVariable %_ptr_Function_v2float Function
+OpStore %20 %19
+%22 = OpFunctionCall %v4float %main %20
+OpStore %sk_FragColor %22
+OpReturn
+OpFunctionEnd
+%main = OpFunction %v4float None %23
+%24 = OpFunctionParameter %_ptr_Function_v2float
+%25 = OpLabel
+%data = OpVariable %_ptr_Function_Root Function
+%expected = OpVariable %_ptr_Function_v3float Function
+%i = OpVariable %_ptr_Function_int Function
+%j = OpVariable %_ptr_Function_int Function
+%k = OpVariable %_ptr_Function_int Function
+%41 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_0 %int_0 %int_0 %int_0
+OpStore %41 %39
+%48 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_0 %int_0 %int_1 %int_0
+OpStore %48 %46
+%54 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_0 %int_0 %int_2 %int_0
+OpStore %54 %52
+%59 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_1 %int_0 %int_0 %int_0
+OpStore %59 %58
+%64 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_1 %int_0 %int_1 %int_0
+OpStore %64 %63
+%69 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_1 %int_0 %int_2 %int_0
+OpStore %69 %68
+%74 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_2 %int_0 %int_0 %int_0
+OpStore %74 %73
+%79 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_2 %int_0 %int_1 %int_0
+OpStore %79 %78
+%84 = OpAccessChain %_ptr_Function_v3float %data %int_0 %int_2 %int_0 %int_2 %int_0
+OpStore %84 %83
+OpStore %expected %86
+OpStore %i %int_0
+OpBranch %89
+%89 = OpLabel
+OpLoopMerge %93 %92 None
+OpBranch %90
+%90 = OpLabel
+%94 = OpLoad %int %i
+%95 = OpSLessThan %bool %94 %int_3
+OpBranchConditional %95 %91 %93
+%91 = OpLabel
+OpStore %j %int_0
+OpBranch %97
+%97 = OpLabel
+OpLoopMerge %101 %100 None
+OpBranch %98
+%98 = OpLabel
+%102 = OpLoad %int %j
+%103 = OpSLessThan %bool %102 %int_3
+OpBranchConditional %103 %99 %101
+%99 = OpLabel
+%104 = OpLoad %v3float %expected
+%105 = OpFAdd %v3float %104 %39
+OpStore %expected %105
+%106 = OpLoad %int %i
+%107 = OpLoad %int %j
+%108 = OpAccessChain %_ptr_Function_v3float %data %int_0 %106 %int_0 %107 %int_0
+%109 = OpLoad %v3float %108
+%110 = OpFUnordNotEqual %v3bool %109 %105
+%112 = OpAny %bool %110
+OpSelectionMerge %114 None
+OpBranchConditional %112 %113 %114
+%113 = OpLabel
+%115 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
+%117 = OpLoad %v4float %115
+OpReturnValue %117
+%114 = OpLabel
+OpStore %k %int_0
+OpBranch %119
+%119 = OpLabel
+OpLoopMerge %123 %122 None
+OpBranch %120
+%120 = OpLabel
+%124 = OpLoad %int %k
+%125 = OpSLessThan %bool %124 %int_3
+OpBranchConditional %125 %121 %123
+%121 = OpLabel
+%126 = OpLoad %int %i
+%127 = OpLoad %int %j
+%128 = OpAccessChain %_ptr_Function_v3float %data %int_0 %126 %int_0 %127 %int_0
+%129 = OpLoad %v3float %128
+%130 = OpLoad %int %k
+%131 = OpVectorExtractDynamic %float %129 %130
+%132 = OpLoad %v3float %expected
+%133 = OpLoad %int %k
+%134 = OpVectorExtractDynamic %float %132 %133
+%135 = OpFUnordNotEqual %bool %131 %134
+OpSelectionMerge %137 None
+OpBranchConditional %135 %136 %137
+%136 = OpLabel
+%138 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
+%139 = OpLoad %v4float %138
+OpReturnValue %139
+%137 = OpLabel
+OpBranch %122
+%122 = OpLabel
+%140 = OpLoad %int %k
+%141 = OpIAdd %int %140 %int_1
+OpStore %k %141
+OpBranch %119
+%123 = OpLabel
+OpBranch %100
+%100 = OpLabel
+%142 = OpLoad %int %j
+%143 = OpIAdd %int %142 %int_1
+OpStore %j %143
+OpBranch %97
+%101 = OpLabel
+OpBranch %92
+%92 = OpLabel
+%144 = OpLoad %int %i
+%145 = OpIAdd %int %144 %int_1
+OpStore %i %145
+OpBranch %89
+%93 = OpLabel
+%146 = OpAccessChain %_ptr_Uniform_v4float %10 %int_0
+%147 = OpLoad %v4float %146
+OpReturnValue %147
+OpFunctionEnd
diff --git a/tests/sksl/shared/StructIndexLookup.glsl b/tests/sksl/shared/StructIndexLookup.glsl
new file mode 100644
index 0000000..28a3225
--- /dev/null
+++ b/tests/sksl/shared/StructIndexLookup.glsl
@@ -0,0 +1,40 @@
+
+out vec4 sk_FragColor;
+uniform vec4 colorGreen;
+uniform vec4 colorRed;
+struct InnerLUT {
+    vec3 values;
+};
+struct OuterLUT {
+    InnerLUT inner[3];
+};
+struct Root {
+    OuterLUT outer[3];
+};
+vec4 main() {
+    Root data;
+    data.outer[0].inner[0].values = vec3(1.0, 10.0, 100.0);
+    data.outer[0].inner[1].values = vec3(2.0, 20.0, 200.0);
+    data.outer[0].inner[2].values = vec3(3.0, 30.0, 300.0);
+    data.outer[1].inner[0].values = vec3(4.0, 40.0, 400.0);
+    data.outer[1].inner[1].values = vec3(5.0, 50.0, 500.0);
+    data.outer[1].inner[2].values = vec3(6.0, 60.0, 600.0);
+    data.outer[2].inner[0].values = vec3(7.0, 70.0, 700.0);
+    data.outer[2].inner[1].values = vec3(8.0, 80.0, 800.0);
+    data.outer[2].inner[2].values = vec3(9.0, 90.0, 900.0);
+    vec3 expected = vec3(0.0);
+    for (int i = 0;i < 3; ++i) {
+        for (int j = 0;j < 3; ++j) {
+            expected += vec3(1.0, 10.0, 100.0);
+            if (data.outer[i].inner[j].values != expected) {
+                return colorRed;
+            }
+            for (int k = 0;k < 3; ++k) {
+                if (data.outer[i].inner[j].values[k] != expected[k]) {
+                    return colorRed;
+                }
+            }
+        }
+    }
+    return colorGreen;
+}
diff --git a/tests/sksl/shared/StructIndexLookup.hlsl b/tests/sksl/shared/StructIndexLookup.hlsl
new file mode 100644
index 0000000..81beb87
--- /dev/null
+++ b/tests/sksl/shared/StructIndexLookup.hlsl
@@ -0,0 +1,78 @@
+struct InnerLUT
+{
+    float3 values;
+};
+
+struct OuterLUT
+{
+    InnerLUT inner[3];
+};
+
+struct Root
+{
+    OuterLUT outer[3];
+};
+
+cbuffer _UniformBuffer : register(b0, space0)
+{
+    float4 _10_colorGreen : packoffset(c0);
+    float4 _10_colorRed : packoffset(c1);
+};
+
+
+static float4 sk_FragColor;
+
+struct SPIRV_Cross_Output
+{
+    float4 sk_FragColor : SV_Target0;
+};
+
+float4 main(float2 _24)
+{
+    Root data = { { { { { 0.0f.xxx }, { 0.0f.xxx }, { 0.0f.xxx } } }, { { { 0.0f.xxx }, { 0.0f.xxx }, { 0.0f.xxx } } }, { { { 0.0f.xxx }, { 0.0f.xxx }, { 0.0f.xxx } } } } };
+    data.outer[0].inner[0].values = float3(1.0f, 10.0f, 100.0f);
+    data.outer[0].inner[1].values = float3(2.0f, 20.0f, 200.0f);
+    data.outer[0].inner[2].values = float3(3.0f, 30.0f, 300.0f);
+    data.outer[1].inner[0].values = float3(4.0f, 40.0f, 400.0f);
+    data.outer[1].inner[1].values = float3(5.0f, 50.0f, 500.0f);
+    data.outer[1].inner[2].values = float3(6.0f, 60.0f, 600.0f);
+    data.outer[2].inner[0].values = float3(7.0f, 70.0f, 700.0f);
+    data.outer[2].inner[1].values = float3(8.0f, 80.0f, 800.0f);
+    data.outer[2].inner[2].values = float3(9.0f, 90.0f, 900.0f);
+    float3 expected = 0.0f.xxx;
+    for (int i = 0; i < 3; i++)
+    {
+        for (int j = 0; j < 3; j++)
+        {
+            float3 _104 = expected;
+            float3 _105 = _104 + float3(1.0f, 10.0f, 100.0f);
+            expected = _105;
+            if (any(bool3(data.outer[i].inner[j].values.x != _105.x, data.outer[i].inner[j].values.y != _105.y, data.outer[i].inner[j].values.z != _105.z)))
+            {
+                return _10_colorRed;
+            }
+            for (int k = 0; k < 3; k++)
+            {
+                if (data.outer[i].inner[j].values[k] != expected[k])
+                {
+                    return _10_colorRed;
+                }
+            }
+        }
+    }
+    return _10_colorGreen;
+}
+
+void frag_main()
+{
+    float2 _20 = 0.0f.xx;
+    sk_FragColor = main(_20);
+}
+
+SPIRV_Cross_Output main()
+{
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.sk_FragColor = sk_FragColor;
+    return stage_output;
+}
diff --git a/tests/sksl/shared/StructIndexLookup.metal b/tests/sksl/shared/StructIndexLookup.metal
new file mode 100644
index 0000000..f16c934
--- /dev/null
+++ b/tests/sksl/shared/StructIndexLookup.metal
@@ -0,0 +1,53 @@
+#include <metal_stdlib>
+#include <simd/simd.h>
+using namespace metal;
+struct InnerLUT {
+    float3 values;
+};
+struct OuterLUT {
+    array<InnerLUT, 3> inner;
+};
+struct Root {
+    array<OuterLUT, 3> outer;
+};
+struct Uniforms {
+    half4 colorGreen;
+    half4 colorRed;
+};
+struct Inputs {
+};
+struct Outputs {
+    half4 sk_FragColor [[color(0)]];
+};
+fragment Outputs fragmentMain(Inputs _in [[stage_in]], constant Uniforms& _uniforms [[buffer(0)]], bool _frontFacing [[front_facing]], float4 _fragCoord [[position]]) {
+    Outputs _out;
+    (void)_out;
+    Root data;
+    data.outer[0].inner[0].values = float3(1.0, 10.0, 100.0);
+    data.outer[0].inner[1].values = float3(2.0, 20.0, 200.0);
+    data.outer[0].inner[2].values = float3(3.0, 30.0, 300.0);
+    data.outer[1].inner[0].values = float3(4.0, 40.0, 400.0);
+    data.outer[1].inner[1].values = float3(5.0, 50.0, 500.0);
+    data.outer[1].inner[2].values = float3(6.0, 60.0, 600.0);
+    data.outer[2].inner[0].values = float3(7.0, 70.0, 700.0);
+    data.outer[2].inner[1].values = float3(8.0, 80.0, 800.0);
+    data.outer[2].inner[2].values = float3(9.0, 90.0, 900.0);
+    float3 expected = float3(0.0);
+    for (int i = 0;i < 3; ++i) {
+        for (int j = 0;j < 3; ++j) {
+            expected += float3(1.0, 10.0, 100.0);
+            if (any(data.outer[i].inner[j].values != expected)) {
+                _out.sk_FragColor = _uniforms.colorRed;
+                return _out;
+            }
+            for (int k = 0;k < 3; ++k) {
+                if (data.outer[i].inner[j].values[k] != expected[k]) {
+                    _out.sk_FragColor = _uniforms.colorRed;
+                    return _out;
+                }
+            }
+        }
+    }
+    _out.sk_FragColor = _uniforms.colorGreen;
+    return _out;
+}
diff --git a/tests/sksl/shared/StructIndexLookup.skrp b/tests/sksl/shared/StructIndexLookup.skrp
new file mode 100644
index 0000000..7b40cc8
--- /dev/null
+++ b/tests/sksl/shared/StructIndexLookup.skrp
@@ -0,0 +1,143 @@
+    1. store_src_rg                   coords = src.rg
+    2. init_lane_masks                CondMask = LoopMask = RetMask = true
+    3. zero_4_slots_unmasked          data.outer[0].inner[0].values, data.outer[0].inner[1].values(0) = 0
+    4. zero_4_slots_unmasked          data.outer[0].inner[1].values(1..2), data.outer[0].inner[2].values(0..1) = 0
+    5. zero_4_slots_unmasked          data.outer[0].inner[2].values(2), data.outer[1].inner[0].values = 0
+    6. zero_4_slots_unmasked          data.outer[1].inner[1].values, data.outer[1].inner[2].values(0) = 0
+    7. zero_4_slots_unmasked          data.outer[1].inner[2].values(1..2), data.outer[2].inner[0].values(0..1) = 0
+    8. zero_4_slots_unmasked          data.outer[2].inner[0].values(2), data.outer[2].inner[1].values = 0
+    9. zero_3_slots_unmasked          data.outer[2].inner[2].values = 0
+   10. copy_constant                  $0 = 0x3F800000 (1.0)
+   11. copy_constant                  $1 = 0x41200000 (10.0)
+   12. copy_constant                  $2 = 0x42C80000 (100.0)
+   13. copy_3_slots_masked            data.outer[0].inner[0].values = Mask($0..2)
+   14. copy_constant                  $0 = 0x40000000 (2.0)
+   15. copy_constant                  $1 = 0x41A00000 (20.0)
+   16. copy_constant                  $2 = 0x43480000 (200.0)
+   17. copy_3_slots_masked            data.outer[0].inner[1].values = Mask($0..2)
+   18. copy_constant                  $0 = 0x40400000 (3.0)
+   19. copy_constant                  $1 = 0x41F00000 (30.0)
+   20. copy_constant                  $2 = 0x43960000 (300.0)
+   21. copy_3_slots_masked            data.outer[0].inner[2].values = Mask($0..2)
+   22. copy_constant                  $0 = 0x40800000 (4.0)
+   23. copy_constant                  $1 = 0x42200000 (40.0)
+   24. copy_constant                  $2 = 0x43C80000 (400.0)
+   25. copy_3_slots_masked            data.outer[1].inner[0].values = Mask($0..2)
+   26. copy_constant                  $0 = 0x40A00000 (5.0)
+   27. copy_constant                  $1 = 0x42480000 (50.0)
+   28. copy_constant                  $2 = 0x43FA0000 (500.0)
+   29. copy_3_slots_masked            data.outer[1].inner[1].values = Mask($0..2)
+   30. copy_constant                  $0 = 0x40C00000 (6.0)
+   31. copy_constant                  $1 = 0x42700000 (60.0)
+   32. copy_constant                  $2 = 0x44160000 (600.0)
+   33. copy_3_slots_masked            data.outer[1].inner[2].values = Mask($0..2)
+   34. copy_constant                  $0 = 0x40E00000 (7.0)
+   35. copy_constant                  $1 = 0x428C0000 (70.0)
+   36. copy_constant                  $2 = 0x442F0000 (700.0)
+   37. copy_3_slots_masked            data.outer[2].inner[0].values = Mask($0..2)
+   38. copy_constant                  $0 = 0x41000000 (8.0)
+   39. copy_constant                  $1 = 0x42A00000 (80.0)
+   40. copy_constant                  $2 = 0x44480000 (800.0)
+   41. copy_3_slots_masked            data.outer[2].inner[1].values = Mask($0..2)
+   42. copy_constant                  $0 = 0x41100000 (9.0)
+   43. copy_constant                  $1 = 0x42B40000 (90.0)
+   44. copy_constant                  $2 = 0x44610000 (900.0)
+   45. copy_3_slots_masked            data.outer[2].inner[2].values = Mask($0..2)
+   46. zero_4_slots_unmasked          expected, i = 0
+   47. store_loop_mask                $0 = LoopMask
+   48. jump                           jump +84 (label 0 at #132)
+   49. label                          label 0x00000001
+   50. zero_slot_unmasked             j = 0
+   51. store_loop_mask                $1 = LoopMask
+   52. jump                           jump +68 (label 2 at #120)
+   53. label                          label 0x00000003
+   54. copy_3_slots_unmasked          $2..4 = expected
+   55. copy_constant                  $5 = 0x3F800000 (1.0)
+   56. copy_constant                  $6 = 0x41200000 (10.0)
+   57. copy_constant                  $7 = 0x42C80000 (100.0)
+   58. add_3_floats                   $2..4 += $5..7
+   59. copy_3_slots_masked            expected = Mask($2..4)
+   60. store_condition_mask           $2 = CondMask
+   61. copy_slot_unmasked             $13 = i
+   62. copy_constant                  $14 = 0x00000009 (1.261169e-44)
+   63. mul_int                        $13 *= $14
+   64. copy_slot_unmasked             $11 = j
+   65. copy_constant                  $12 = 0x00000003 (4.203895e-45)
+   66. mul_int                        $11 *= $12
+   67. copy_slot_unmasked             $12 = $13
+   68. add_int                        $11 += $12
+   69. copy_from_indirect_unmasked    $3..5 = Indirect(data.outer[0].inner[0].values + $11)
+   70. copy_3_slots_unmasked          $6..8 = expected
+   71. cmpne_3_floats                 $3..5 = notEqual($3..5, $6..8)
+   72. bitwise_or_int                 $4 |= $5
+   73. bitwise_or_int                 $3 |= $4
+   74. merge_condition_mask           CondMask = $2 & $3
+   75. copy_4_constants               $4..7 = colorRed
+   76. copy_4_slots_masked            [main].result = Mask($4..7)
+   77. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+   78. load_condition_mask            CondMask = $2
+   79. zero_slot_unmasked             k = 0
+   80. store_loop_mask                $2 = LoopMask
+   81. jump                           jump +27 (label 4 at #108)
+   82. label                          label 0x00000005
+   83. store_condition_mask           $3 = CondMask
+   84. copy_slot_unmasked             $13 = i
+   85. copy_constant                  $14 = 0x00000009 (1.261169e-44)
+   86. mul_int                        $13 *= $14
+   87. copy_slot_unmasked             $11 = j
+   88. copy_constant                  $12 = 0x00000003 (4.203895e-45)
+   89. mul_int                        $11 *= $12
+   90. copy_slot_unmasked             $12 = $13
+   91. add_int                        $11 += $12
+   92. copy_slot_unmasked             $9 = k
+   93. copy_slot_unmasked             $10 = $11
+   94. add_int                        $9 += $10
+   95. copy_from_indirect_unmasked    $4 = Indirect(data.outer[0].inner[0].values(0) + $9)
+   96. copy_slot_unmasked             $13 = k
+   97. copy_from_indirect_unmasked    $5 = Indirect(expected(0) + $13)
+   98. cmpne_float                    $4 = notEqual($4, $5)
+   99. merge_condition_mask           CondMask = $3 & $4
+  100. copy_4_constants               $5..8 = colorRed
+  101. copy_4_slots_masked            [main].result = Mask($5..8)
+  102. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+  103. load_condition_mask            CondMask = $3
+  104. copy_slot_unmasked             $3 = k
+  105. copy_constant                  $4 = 0x00000001 (1.401298e-45)
+  106. add_int                        $3 += $4
+  107. copy_slot_masked               k = Mask($3)
+  108. label                          label 0x00000004
+  109. copy_slot_unmasked             $3 = k
+  110. copy_constant                  $4 = 0x00000003 (4.203895e-45)
+  111. cmplt_int                      $3 = lessThan($3, $4)
+  112. merge_loop_mask                LoopMask &= $3
+  113. stack_rewind
+  114. branch_if_any_active_lanes     branch_if_any_active_lanes -32 (label 5 at #82)
+  115. load_loop_mask                 LoopMask = $2
+  116. copy_slot_unmasked             $2 = j
+  117. copy_constant                  $3 = 0x00000001 (1.401298e-45)
+  118. add_int                        $2 += $3
+  119. copy_slot_masked               j = Mask($2)
+  120. label                          label 0x00000002
+  121. copy_slot_unmasked             $2 = j
+  122. copy_constant                  $3 = 0x00000003 (4.203895e-45)
+  123. cmplt_int                      $2 = lessThan($2, $3)
+  124. merge_loop_mask                LoopMask &= $2
+  125. stack_rewind
+  126. branch_if_any_active_lanes     branch_if_any_active_lanes -73 (label 3 at #53)
+  127. load_loop_mask                 LoopMask = $1
+  128. copy_slot_unmasked             $1 = i
+  129. copy_constant                  $2 = 0x00000001 (1.401298e-45)
+  130. add_int                        $1 += $2
+  131. copy_slot_masked               i = Mask($1)
+  132. label                          label 0x00000000
+  133. copy_slot_unmasked             $1 = i
+  134. copy_constant                  $2 = 0x00000003 (4.203895e-45)
+  135. cmplt_int                      $1 = lessThan($1, $2)
+  136. merge_loop_mask                LoopMask &= $1
+  137. stack_rewind
+  138. branch_if_any_active_lanes     branch_if_any_active_lanes -89 (label 1 at #49)
+  139. load_loop_mask                 LoopMask = $0
+  140. copy_4_constants               $0..3 = colorGreen
+  141. copy_4_slots_masked            [main].result = Mask($0..3)
+  142. mask_off_return_mask           RetMask &= ~(CondMask & LoopMask & RetMask)
+  143. load_src                       src.rgba = [main].result