Add support for sqrt() to RP codegen.

The sqrt() intrinsic test was previously non-functional, although
it was mostly usable. It's now been been updated to run as part of
our standard dm test suite.

Change-Id: I0dd5f9db6898375f778096d2eb32b62743fdabc9
Bug: skia:13676
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/632037
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: John Stiles <johnstiles@google.com>
diff --git a/resources/sksl/intrinsics/Sqrt.sksl b/resources/sksl/intrinsics/Sqrt.sksl
index 0bdaddf..3238750 100644
--- a/resources/sksl/intrinsics/Sqrt.sksl
+++ b/resources/sksl/intrinsics/Sqrt.sksl
@@ -1,9 +1,16 @@
-uniform half4 inputVal, expected;
+uniform float2x2 testMatrix2x2;  // = {1, 2, 3, 4}
 uniform half4 colorGreen, colorRed;
 
 half4 main(float2 coords) {
-    const half4 constVal = half4(1, 4, 16, 64);
-    const half4 negativeVal = half4(-1, -4, -16, -64);  // should not optimize away
+    // We should not attempt to constant-fold `sqrt(negative values)`. This sqrt call should remain
+    // in the generated code as-is.
+    const float4 negativeVal = half4(-1, -4, -16, -64);
+    coords.xy = sqrt(negativeVal).xy;
+
+    float4       inputVal = half4(testMatrix2x2) + half4(0, 2, 6, 12); // = {1, 4, 9, 16}
+    const float4 constVal = half4(1, 4, 9, 16);
+    const float4 expected = half4(1, 2, 3, 4);
+
     return (sqrt(inputVal.x)       == expected.x     &&
             sqrt(inputVal.xy)      == expected.xy    &&
             sqrt(inputVal.xyz)     == expected.xyz   &&
@@ -11,9 +18,5 @@
             sqrt(constVal.x)       == expected.x     &&
             sqrt(constVal.xy)      == expected.xy    &&
             sqrt(constVal.xyz)     == expected.xyz   &&
-            sqrt(constVal.xyzw)    == expected.xyzw  &&
-            sqrt(negativeVal.x)    == expected.x     &&
-            sqrt(negativeVal.xy)   == expected.xy    &&
-            sqrt(negativeVal.xyz)  == expected.xyz   &&
-            sqrt(negativeVal.xyzw) == expected.xyzw) ? colorGreen : colorRed;
+            sqrt(constVal.xyzw)    == expected.xyzw) ? colorGreen : colorRed;
 }
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index af3f0bf..f3d2685 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -1597,6 +1597,13 @@
             fBuilder.unary_op(BuilderOp::sin_float, arg0.type().slotCount());
             return true;
 
+        case IntrinsicKind::k_sqrt_IntrinsicKind:
+            if (!this->pushExpression(arg0)) {
+                return unsupported();
+            }
+            fBuilder.unary_op(BuilderOp::sqrt_float, arg0.type().slotCount());
+            return true;
+
         case IntrinsicKind::k_tan_IntrinsicKind:
             if (!this->pushExpression(arg0)) {
                 return unsupported();
diff --git a/tests/SkSLTest.cpp b/tests/SkSLTest.cpp
index d18c38c..6d9270e 100644
--- a/tests/SkSLTest.cpp
+++ b/tests/SkSLTest.cpp
@@ -582,6 +582,7 @@
 SKSL_TEST(RP + VM + GPU, kNever,      IntrinsicSaturate,               "intrinsics/Saturate.sksl")
 SKSL_TEST(RP + VM + GPU, kApiLevel_T, IntrinsicSignFloat,              "intrinsics/SignFloat.sksl")
 SKSL_TEST(RP + GPU_ES3,  kNever,      IntrinsicSignInt,                "intrinsics/SignInt.sksl")
+SKSL_TEST(RP + VM + GPU, kNever,      IntrinsicSqrt,                   "intrinsics/Sqrt.sksl")
 SKSL_TEST(RP + VM + GPU, kApiLevel_T, IntrinsicStep,                   "intrinsics/Step.sksl")
 SKSL_TEST(GPU_ES3,       kNever,      IntrinsicTrunc,                  "intrinsics/Trunc.sksl")
 SKSL_TEST(RP + GPU_ES3,  kNever,      IntrinsicTranspose,              "intrinsics/Transpose.sksl")
diff --git a/tests/sksl/intrinsics/Sqrt.asm.frag b/tests/sksl/intrinsics/Sqrt.asm.frag
index b1119fb..1da06ad 100644
--- a/tests/sksl/intrinsics/Sqrt.asm.frag
+++ b/tests/sksl/intrinsics/Sqrt.asm.frag
@@ -6,66 +6,30 @@
 OpName %sk_Clockwise "sk_Clockwise"
 OpName %sk_FragColor "sk_FragColor"
 OpName %_UniformBuffer "_UniformBuffer"
-OpMemberName %_UniformBuffer 0 "inputVal"
-OpMemberName %_UniformBuffer 1 "expected"
-OpMemberName %_UniformBuffer 2 "colorGreen"
-OpMemberName %_UniformBuffer 3 "colorRed"
+OpMemberName %_UniformBuffer 0 "testMatrix2x2"
+OpMemberName %_UniformBuffer 1 "colorGreen"
+OpMemberName %_UniformBuffer 2 "colorRed"
 OpName %_entrypoint_v "_entrypoint_v"
 OpName %main "main"
+OpName %inputVal "inputVal"
 OpDecorate %sk_Clockwise BuiltIn FrontFacing
 OpDecorate %sk_FragColor RelaxedPrecision
 OpDecorate %sk_FragColor Location 0
 OpDecorate %sk_FragColor Index 0
 OpMemberDecorate %_UniformBuffer 0 Offset 0
-OpMemberDecorate %_UniformBuffer 0 RelaxedPrecision
-OpMemberDecorate %_UniformBuffer 1 Offset 16
+OpMemberDecorate %_UniformBuffer 0 ColMajor
+OpMemberDecorate %_UniformBuffer 0 MatrixStride 16
+OpMemberDecorate %_UniformBuffer 1 Offset 32
 OpMemberDecorate %_UniformBuffer 1 RelaxedPrecision
-OpMemberDecorate %_UniformBuffer 2 Offset 32
+OpMemberDecorate %_UniformBuffer 2 Offset 48
 OpMemberDecorate %_UniformBuffer 2 RelaxedPrecision
-OpMemberDecorate %_UniformBuffer 3 Offset 48
-OpMemberDecorate %_UniformBuffer 3 RelaxedPrecision
 OpDecorate %_UniformBuffer Block
 OpDecorate %10 Binding 0
 OpDecorate %10 DescriptorSet 0
-OpDecorate %27 RelaxedPrecision
-OpDecorate %32 RelaxedPrecision
-OpDecorate %33 RelaxedPrecision
-OpDecorate %36 RelaxedPrecision
-OpDecorate %37 RelaxedPrecision
-OpDecorate %41 RelaxedPrecision
-OpDecorate %43 RelaxedPrecision
-OpDecorate %44 RelaxedPrecision
-OpDecorate %46 RelaxedPrecision
-OpDecorate %47 RelaxedPrecision
-OpDecorate %54 RelaxedPrecision
-OpDecorate %56 RelaxedPrecision
-OpDecorate %57 RelaxedPrecision
-OpDecorate %60 RelaxedPrecision
-OpDecorate %61 RelaxedPrecision
-OpDecorate %68 RelaxedPrecision
-OpDecorate %70 RelaxedPrecision
-OpDecorate %72 RelaxedPrecision
-OpDecorate %81 RelaxedPrecision
-OpDecorate %82 RelaxedPrecision
-OpDecorate %90 RelaxedPrecision
-OpDecorate %91 RelaxedPrecision
-OpDecorate %100 RelaxedPrecision
-OpDecorate %101 RelaxedPrecision
-OpDecorate %110 RelaxedPrecision
-OpDecorate %116 RelaxedPrecision
-OpDecorate %119 RelaxedPrecision
-OpDecorate %120 RelaxedPrecision
-OpDecorate %125 RelaxedPrecision
-OpDecorate %129 RelaxedPrecision
-OpDecorate %130 RelaxedPrecision
-OpDecorate %136 RelaxedPrecision
-OpDecorate %140 RelaxedPrecision
-OpDecorate %141 RelaxedPrecision
-OpDecorate %147 RelaxedPrecision
-OpDecorate %151 RelaxedPrecision
-OpDecorate %162 RelaxedPrecision
-OpDecorate %165 RelaxedPrecision
-OpDecorate %166 RelaxedPrecision
+OpDecorate %50 RelaxedPrecision
+OpDecorate %92 RelaxedPrecision
+OpDecorate %95 RelaxedPrecision
+OpDecorate %96 RelaxedPrecision
 %bool = OpTypeBool
 %_ptr_Input_bool = OpTypePointer Input %bool
 %sk_Clockwise = OpVariable %_ptr_Input_bool Input
@@ -73,206 +37,114 @@
 %v4float = OpTypeVector %float 4
 %_ptr_Output_v4float = OpTypePointer Output %v4float
 %sk_FragColor = OpVariable %_ptr_Output_v4float Output
-%_UniformBuffer = OpTypeStruct %v4float %v4float %v4float %v4float
+%v2float = OpTypeVector %float 2
+%mat2v2float = OpTypeMatrix %v2float 2
+%_UniformBuffer = OpTypeStruct %mat2v2float %v4float %v4float
 %_ptr_Uniform__UniformBuffer = OpTypePointer Uniform %_UniformBuffer
 %10 = OpVariable %_ptr_Uniform__UniformBuffer Uniform
 %void = OpTypeVoid
-%15 = OpTypeFunction %void
+%17 = OpTypeFunction %void
 %float_0 = OpConstant %float 0
-%v2float = OpTypeVector %float 2
-%19 = OpConstantComposite %v2float %float_0 %float_0
+%20 = OpConstantComposite %v2float %float_0 %float_0
 %_ptr_Function_v2float = OpTypePointer Function %v2float
-%23 = OpTypeFunction %v4float %_ptr_Function_v2float
-%false = OpConstantFalse %bool
-%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
-%int = OpTypeInt 32 1
-%int_0 = OpConstant %int 0
-%int_1 = OpConstant %int 1
-%v2bool = OpTypeVector %bool 2
-%v3float = OpTypeVector %float 3
-%v3bool = OpTypeVector %bool 3
-%v4bool = OpTypeVector %bool 4
-%float_1 = OpConstant %float 1
-%float_2 = OpConstant %float 2
-%88 = OpConstantComposite %v2float %float_1 %float_2
-%float_4 = OpConstant %float 4
-%98 = OpConstantComposite %v3float %float_1 %float_2 %float_4
-%float_8 = OpConstant %float 8
-%108 = OpConstantComposite %v4float %float_1 %float_2 %float_4 %float_8
+%24 = OpTypeFunction %v4float %_ptr_Function_v2float
 %float_n1 = OpConstant %float -1
 %float_n4 = OpConstant %float -4
-%127 = OpConstantComposite %v2float %float_n1 %float_n4
 %float_n16 = OpConstant %float -16
-%138 = OpConstantComposite %v3float %float_n1 %float_n4 %float_n16
 %float_n64 = OpConstant %float -64
-%149 = OpConstantComposite %v4float %float_n1 %float_n4 %float_n16 %float_n64
+%32 = OpConstantComposite %v4float %float_n1 %float_n4 %float_n16 %float_n64
 %_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Uniform_mat2v2float = OpTypePointer Uniform %mat2v2float
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%float_2 = OpConstant %float 2
+%float_6 = OpConstant %float 6
+%float_12 = OpConstant %float 12
+%49 = OpConstantComposite %v4float %float_0 %float_2 %float_6 %float_12
+%false = OpConstantFalse %bool
+%float_1 = OpConstant %float 1
+%60 = OpConstantComposite %v2float %float_1 %float_2
+%v2bool = OpTypeVector %bool 2
+%v3float = OpTypeVector %float 3
+%float_3 = OpConstant %float 3
+%71 = OpConstantComposite %v3float %float_1 %float_2 %float_3
+%v3bool = OpTypeVector %bool 3
+%float_4 = OpConstant %float 4
+%80 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4
+%v4bool = OpTypeVector %bool 4
+%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float
+%int_1 = OpConstant %int 1
 %int_2 = OpConstant %int 2
-%int_3 = OpConstant %int 3
-%_entrypoint_v = OpFunction %void None %15
-%16 = OpLabel
-%20 = OpVariable %_ptr_Function_v2float Function
-OpStore %20 %19
-%22 = OpFunctionCall %v4float %main %20
-OpStore %sk_FragColor %22
+%_entrypoint_v = OpFunction %void None %17
+%18 = OpLabel
+%21 = OpVariable %_ptr_Function_v2float Function
+OpStore %21 %20
+%23 = OpFunctionCall %v4float %main %21
+OpStore %sk_FragColor %23
 OpReturn
 OpFunctionEnd
-%main = OpFunction %v4float None %23
-%24 = OpFunctionParameter %_ptr_Function_v2float
-%25 = OpLabel
-%155 = OpVariable %_ptr_Function_v4float Function
-%28 = OpAccessChain %_ptr_Uniform_v4float %10 %int_0
-%32 = OpLoad %v4float %28
-%33 = OpCompositeExtract %float %32 0
-%27 = OpExtInst %float %1 Sqrt %33
-%34 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%36 = OpLoad %v4float %34
-%37 = OpCompositeExtract %float %36 0
-%38 = OpFOrdEqual %bool %27 %37
-OpSelectionMerge %40 None
-OpBranchConditional %38 %39 %40
-%39 = OpLabel
-%42 = OpAccessChain %_ptr_Uniform_v4float %10 %int_0
-%43 = OpLoad %v4float %42
-%44 = OpVectorShuffle %v2float %43 %43 0 1
-%41 = OpExtInst %v2float %1 Sqrt %44
-%45 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%46 = OpLoad %v4float %45
-%47 = OpVectorShuffle %v2float %46 %46 0 1
-%48 = OpFOrdEqual %v2bool %41 %47
-%50 = OpAll %bool %48
-OpBranch %40
-%40 = OpLabel
-%51 = OpPhi %bool %false %25 %50 %39
-OpSelectionMerge %53 None
-OpBranchConditional %51 %52 %53
-%52 = OpLabel
-%55 = OpAccessChain %_ptr_Uniform_v4float %10 %int_0
-%56 = OpLoad %v4float %55
-%57 = OpVectorShuffle %v3float %56 %56 0 1 2
-%54 = OpExtInst %v3float %1 Sqrt %57
-%59 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%60 = OpLoad %v4float %59
-%61 = OpVectorShuffle %v3float %60 %60 0 1 2
-%62 = OpFOrdEqual %v3bool %54 %61
-%64 = OpAll %bool %62
-OpBranch %53
-%53 = OpLabel
-%65 = OpPhi %bool %false %40 %64 %52
-OpSelectionMerge %67 None
-OpBranchConditional %65 %66 %67
+%main = OpFunction %v4float None %24
+%25 = OpFunctionParameter %_ptr_Function_v2float
+%26 = OpLabel
+%inputVal = OpVariable %_ptr_Function_v4float Function
+%85 = OpVariable %_ptr_Function_v4float Function
+%27 = OpExtInst %v4float %1 Sqrt %32
+%33 = OpVectorShuffle %v2float %27 %27 0 1
+OpStore %25 %33
+%36 = OpAccessChain %_ptr_Uniform_mat2v2float %10 %int_0
+%40 = OpLoad %mat2v2float %36
+%41 = OpCompositeExtract %float %40 0 0
+%42 = OpCompositeExtract %float %40 0 1
+%43 = OpCompositeExtract %float %40 1 0
+%44 = OpCompositeExtract %float %40 1 1
+%45 = OpCompositeConstruct %v4float %41 %42 %43 %44
+%50 = OpFAdd %v4float %45 %49
+OpStore %inputVal %50
+%53 = OpCompositeExtract %float %50 0
+%52 = OpExtInst %float %1 Sqrt %53
+%55 = OpFOrdEqual %bool %52 %float_1
+OpSelectionMerge %57 None
+OpBranchConditional %55 %56 %57
+%56 = OpLabel
+%59 = OpVectorShuffle %v2float %50 %50 0 1
+%58 = OpExtInst %v2float %1 Sqrt %59
+%61 = OpFOrdEqual %v2bool %58 %60
+%63 = OpAll %bool %61
+OpBranch %57
+%57 = OpLabel
+%64 = OpPhi %bool %false %26 %63 %56
+OpSelectionMerge %66 None
+OpBranchConditional %64 %65 %66
+%65 = OpLabel
+%68 = OpVectorShuffle %v3float %50 %50 0 1 2
+%67 = OpExtInst %v3float %1 Sqrt %68
+%72 = OpFOrdEqual %v3bool %67 %71
+%74 = OpAll %bool %72
+OpBranch %66
 %66 = OpLabel
-%69 = OpAccessChain %_ptr_Uniform_v4float %10 %int_0
-%70 = OpLoad %v4float %69
-%68 = OpExtInst %v4float %1 Sqrt %70
-%71 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%72 = OpLoad %v4float %71
-%73 = OpFOrdEqual %v4bool %68 %72
-%75 = OpAll %bool %73
-OpBranch %67
-%67 = OpLabel
-%76 = OpPhi %bool %false %53 %75 %66
-OpSelectionMerge %78 None
-OpBranchConditional %76 %77 %78
+%75 = OpPhi %bool %false %57 %74 %65
+OpSelectionMerge %77 None
+OpBranchConditional %75 %76 %77
+%76 = OpLabel
+%78 = OpExtInst %v4float %1 Sqrt %50
+%81 = OpFOrdEqual %v4bool %78 %80
+%83 = OpAll %bool %81
+OpBranch %77
 %77 = OpLabel
-%80 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%81 = OpLoad %v4float %80
-%82 = OpCompositeExtract %float %81 0
-%83 = OpFOrdEqual %bool %float_1 %82
-OpBranch %78
-%78 = OpLabel
-%84 = OpPhi %bool %false %67 %83 %77
-OpSelectionMerge %86 None
-OpBranchConditional %84 %85 %86
-%85 = OpLabel
-%89 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%90 = OpLoad %v4float %89
-%91 = OpVectorShuffle %v2float %90 %90 0 1
-%92 = OpFOrdEqual %v2bool %88 %91
-%93 = OpAll %bool %92
-OpBranch %86
+%84 = OpPhi %bool %false %66 %83 %76
+OpSelectionMerge %88 None
+OpBranchConditional %84 %86 %87
 %86 = OpLabel
-%94 = OpPhi %bool %false %78 %93 %85
-OpSelectionMerge %96 None
-OpBranchConditional %94 %95 %96
-%95 = OpLabel
-%99 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%100 = OpLoad %v4float %99
-%101 = OpVectorShuffle %v3float %100 %100 0 1 2
-%102 = OpFOrdEqual %v3bool %98 %101
-%103 = OpAll %bool %102
-OpBranch %96
-%96 = OpLabel
-%104 = OpPhi %bool %false %86 %103 %95
-OpSelectionMerge %106 None
-OpBranchConditional %104 %105 %106
-%105 = OpLabel
-%109 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%110 = OpLoad %v4float %109
-%111 = OpFOrdEqual %v4bool %108 %110
-%112 = OpAll %bool %111
-OpBranch %106
-%106 = OpLabel
-%113 = OpPhi %bool %false %96 %112 %105
-OpSelectionMerge %115 None
-OpBranchConditional %113 %114 %115
-%114 = OpLabel
-%116 = OpExtInst %float %1 Sqrt %float_n1
-%118 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%119 = OpLoad %v4float %118
-%120 = OpCompositeExtract %float %119 0
-%121 = OpFOrdEqual %bool %116 %120
-OpBranch %115
-%115 = OpLabel
-%122 = OpPhi %bool %false %106 %121 %114
-OpSelectionMerge %124 None
-OpBranchConditional %122 %123 %124
-%123 = OpLabel
-%125 = OpExtInst %v2float %1 Sqrt %127
-%128 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%129 = OpLoad %v4float %128
-%130 = OpVectorShuffle %v2float %129 %129 0 1
-%131 = OpFOrdEqual %v2bool %125 %130
-%132 = OpAll %bool %131
-OpBranch %124
-%124 = OpLabel
-%133 = OpPhi %bool %false %115 %132 %123
-OpSelectionMerge %135 None
-OpBranchConditional %133 %134 %135
-%134 = OpLabel
-%136 = OpExtInst %v3float %1 Sqrt %138
-%139 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%140 = OpLoad %v4float %139
-%141 = OpVectorShuffle %v3float %140 %140 0 1 2
-%142 = OpFOrdEqual %v3bool %136 %141
-%143 = OpAll %bool %142
-OpBranch %135
-%135 = OpLabel
-%144 = OpPhi %bool %false %124 %143 %134
-OpSelectionMerge %146 None
-OpBranchConditional %144 %145 %146
-%145 = OpLabel
-%147 = OpExtInst %v4float %1 Sqrt %149
-%150 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
-%151 = OpLoad %v4float %150
-%152 = OpFOrdEqual %v4bool %147 %151
-%153 = OpAll %bool %152
-OpBranch %146
-%146 = OpLabel
-%154 = OpPhi %bool %false %135 %153 %145
-OpSelectionMerge %159 None
-OpBranchConditional %154 %157 %158
-%157 = OpLabel
-%160 = OpAccessChain %_ptr_Uniform_v4float %10 %int_2
-%162 = OpLoad %v4float %160
-OpStore %155 %162
-OpBranch %159
-%158 = OpLabel
-%163 = OpAccessChain %_ptr_Uniform_v4float %10 %int_3
-%165 = OpLoad %v4float %163
-OpStore %155 %165
-OpBranch %159
-%159 = OpLabel
-%166 = OpLoad %v4float %155
-OpReturnValue %166
+%89 = OpAccessChain %_ptr_Uniform_v4float %10 %int_1
+%92 = OpLoad %v4float %89
+OpStore %85 %92
+OpBranch %88
+%87 = OpLabel
+%93 = OpAccessChain %_ptr_Uniform_v4float %10 %int_2
+%95 = OpLoad %v4float %93
+OpStore %85 %95
+OpBranch %88
+%88 = OpLabel
+%96 = OpLoad %v4float %85
+OpReturnValue %96
 OpFunctionEnd
diff --git a/tests/sksl/intrinsics/Sqrt.glsl b/tests/sksl/intrinsics/Sqrt.glsl
index bee6671..1e76a99 100644
--- a/tests/sksl/intrinsics/Sqrt.glsl
+++ b/tests/sksl/intrinsics/Sqrt.glsl
@@ -1,10 +1,12 @@
 
 out vec4 sk_FragColor;
-uniform vec4 inputVal;
-uniform vec4 expected;
+uniform mat2 testMatrix2x2;
 uniform vec4 colorGreen;
 uniform vec4 colorRed;
 vec4 main() {
     const vec4 negativeVal = vec4(-1.0, -4.0, -16.0, -64.0);
-    return ((((((((((sqrt(inputVal.x) == expected.x && sqrt(inputVal.xy) == expected.xy) && sqrt(inputVal.xyz) == expected.xyz) && sqrt(inputVal) == expected) && 1.0 == expected.x) && vec2(1.0, 2.0) == expected.xy) && vec3(1.0, 2.0, 4.0) == expected.xyz) && vec4(1.0, 2.0, 4.0, 8.0) == expected) && sqrt(-1.0) == expected.x) && sqrt(vec2(-1.0, -4.0)) == expected.xy) && sqrt(vec3(-1.0, -4.0, -16.0)) == expected.xyz) && sqrt(negativeVal) == expected ? colorGreen : colorRed;
+    coords = sqrt(negativeVal).xy;
+    vec4 inputVal = vec4(testMatrix2x2) + vec4(0.0, 2.0, 6.0, 12.0);
+    const vec4 expected = vec4(1.0, 2.0, 3.0, 4.0);
+    return ((sqrt(inputVal.x) == 1.0 && sqrt(inputVal.xy) == vec2(1.0, 2.0)) && sqrt(inputVal.xyz) == vec3(1.0, 2.0, 3.0)) && sqrt(inputVal) == expected ? colorGreen : colorRed;
 }
diff --git a/tests/sksl/intrinsics/Sqrt.hlsl b/tests/sksl/intrinsics/Sqrt.hlsl
index dfc6d7a..47d2db9 100644
--- a/tests/sksl/intrinsics/Sqrt.hlsl
+++ b/tests/sksl/intrinsics/Sqrt.hlsl
@@ -1,7 +1,6 @@
 cbuffer _UniformBuffer : register(b0, space0)
 {
-    float4 _10_inputVal : packoffset(c0);
-    float4 _10_expected : packoffset(c1);
+    row_major float2x2 _10_testMatrix2x2 : packoffset(c0);
     float4 _10_colorGreen : packoffset(c2);
     float4 _10_colorRed : packoffset(c3);
 };
@@ -14,129 +13,58 @@
     float4 sk_FragColor : SV_Target0;
 };
 
-float4 main(float2 _24)
+float4 main(out float2 _25)
 {
-    bool _51 = false;
-    if (sqrt(_10_inputVal.x) == _10_expected.x)
+    _25 = sqrt(float4(-1.0f, -4.0f, -16.0f, -64.0f)).xy;
+    float4 _50 = float4(_10_testMatrix2x2[0].x, _10_testMatrix2x2[0].y, _10_testMatrix2x2[1].x, _10_testMatrix2x2[1].y) + float4(0.0f, 2.0f, 6.0f, 12.0f);
+    float4 inputVal = _50;
+    bool _64 = false;
+    if (sqrt(_50.x) == 1.0f)
     {
-        float2 _41 = sqrt(_10_inputVal.xy);
-        _51 = all(bool2(_41.x == _10_expected.xy.x, _41.y == _10_expected.xy.y));
+        float2 _58 = sqrt(_50.xy);
+        _64 = all(bool2(_58.x == float2(1.0f, 2.0f).x, _58.y == float2(1.0f, 2.0f).y));
     }
     else
     {
-        _51 = false;
+        _64 = false;
     }
-    bool _65 = false;
-    if (_51)
+    bool _75 = false;
+    if (_64)
     {
-        float3 _54 = sqrt(_10_inputVal.xyz);
-        _65 = all(bool3(_54.x == _10_expected.xyz.x, _54.y == _10_expected.xyz.y, _54.z == _10_expected.xyz.z));
+        float3 _67 = sqrt(_50.xyz);
+        _75 = all(bool3(_67.x == float3(1.0f, 2.0f, 3.0f).x, _67.y == float3(1.0f, 2.0f, 3.0f).y, _67.z == float3(1.0f, 2.0f, 3.0f).z));
     }
     else
     {
-        _65 = false;
-    }
-    bool _76 = false;
-    if (_65)
-    {
-        float4 _68 = sqrt(_10_inputVal);
-        _76 = all(bool4(_68.x == _10_expected.x, _68.y == _10_expected.y, _68.z == _10_expected.z, _68.w == _10_expected.w));
-    }
-    else
-    {
-        _76 = false;
+        _75 = false;
     }
     bool _84 = false;
-    if (_76)
+    if (_75)
     {
-        _84 = 1.0f == _10_expected.x;
+        float4 _78 = sqrt(_50);
+        _84 = all(bool4(_78.x == float4(1.0f, 2.0f, 3.0f, 4.0f).x, _78.y == float4(1.0f, 2.0f, 3.0f, 4.0f).y, _78.z == float4(1.0f, 2.0f, 3.0f, 4.0f).z, _78.w == float4(1.0f, 2.0f, 3.0f, 4.0f).w));
     }
     else
     {
         _84 = false;
     }
-    bool _94 = false;
+    float4 _85 = 0.0f.xxxx;
     if (_84)
     {
-        _94 = all(bool2(float2(1.0f, 2.0f).x == _10_expected.xy.x, float2(1.0f, 2.0f).y == _10_expected.xy.y));
+        _85 = _10_colorGreen;
     }
     else
     {
-        _94 = false;
+        _85 = _10_colorRed;
     }
-    bool _104 = false;
-    if (_94)
-    {
-        _104 = all(bool3(float3(1.0f, 2.0f, 4.0f).x == _10_expected.xyz.x, float3(1.0f, 2.0f, 4.0f).y == _10_expected.xyz.y, float3(1.0f, 2.0f, 4.0f).z == _10_expected.xyz.z));
-    }
-    else
-    {
-        _104 = false;
-    }
-    bool _113 = false;
-    if (_104)
-    {
-        _113 = all(bool4(float4(1.0f, 2.0f, 4.0f, 8.0f).x == _10_expected.x, float4(1.0f, 2.0f, 4.0f, 8.0f).y == _10_expected.y, float4(1.0f, 2.0f, 4.0f, 8.0f).z == _10_expected.z, float4(1.0f, 2.0f, 4.0f, 8.0f).w == _10_expected.w));
-    }
-    else
-    {
-        _113 = false;
-    }
-    bool _122 = false;
-    if (_113)
-    {
-        _122 = sqrt(-1.0f) == _10_expected.x;
-    }
-    else
-    {
-        _122 = false;
-    }
-    bool _133 = false;
-    if (_122)
-    {
-        float2 _125 = sqrt(float2(-1.0f, -4.0f));
-        _133 = all(bool2(_125.x == _10_expected.xy.x, _125.y == _10_expected.xy.y));
-    }
-    else
-    {
-        _133 = false;
-    }
-    bool _144 = false;
-    if (_133)
-    {
-        float3 _136 = sqrt(float3(-1.0f, -4.0f, -16.0f));
-        _144 = all(bool3(_136.x == _10_expected.xyz.x, _136.y == _10_expected.xyz.y, _136.z == _10_expected.xyz.z));
-    }
-    else
-    {
-        _144 = false;
-    }
-    bool _154 = false;
-    if (_144)
-    {
-        float4 _147 = sqrt(float4(-1.0f, -4.0f, -16.0f, -64.0f));
-        _154 = all(bool4(_147.x == _10_expected.x, _147.y == _10_expected.y, _147.z == _10_expected.z, _147.w == _10_expected.w));
-    }
-    else
-    {
-        _154 = false;
-    }
-    float4 _155 = 0.0f.xxxx;
-    if (_154)
-    {
-        _155 = _10_colorGreen;
-    }
-    else
-    {
-        _155 = _10_colorRed;
-    }
-    return _155;
+    return _85;
 }
 
 void frag_main()
 {
-    float2 _20 = 0.0f.xx;
-    sk_FragColor = main(_20);
+    float2 _21 = 0.0f.xx;
+    float4 _23 = main(_21);
+    sk_FragColor = _23;
 }
 
 SPIRV_Cross_Output main()
diff --git a/tests/sksl/intrinsics/Sqrt.metal b/tests/sksl/intrinsics/Sqrt.metal
index c43899e..08ba451 100644
--- a/tests/sksl/intrinsics/Sqrt.metal
+++ b/tests/sksl/intrinsics/Sqrt.metal
@@ -2,8 +2,7 @@
 #include <simd/simd.h>
 using namespace metal;
 struct Uniforms {
-    half4 inputVal;
-    half4 expected;
+    float2x2 testMatrix2x2;
     half4 colorGreen;
     half4 colorRed;
 };
@@ -12,10 +11,17 @@
 struct Outputs {
     half4 sk_FragColor [[color(0)]];
 };
+
+float4 float4_from_float2x2(float2x2 x) {
+    return float4(x[0].xy, x[1].xy);
+}
 fragment Outputs fragmentMain(Inputs _in [[stage_in]], constant Uniforms& _uniforms [[buffer(0)]], bool _frontFacing [[front_facing]], float4 _fragCoord [[position]]) {
     Outputs _out;
     (void)_out;
-    const half4 negativeVal = half4(-1.0h, -4.0h, -16.0h, -64.0h);
-    _out.sk_FragColor = ((((((((((sqrt(_uniforms.inputVal.x) == _uniforms.expected.x && all(sqrt(_uniforms.inputVal.xy) == _uniforms.expected.xy)) && all(sqrt(_uniforms.inputVal.xyz) == _uniforms.expected.xyz)) && all(sqrt(_uniforms.inputVal) == _uniforms.expected)) && 1.0h == _uniforms.expected.x) && all(half2(1.0h, 2.0h) == _uniforms.expected.xy)) && all(half3(1.0h, 2.0h, 4.0h) == _uniforms.expected.xyz)) && all(half4(1.0h, 2.0h, 4.0h, 8.0h) == _uniforms.expected)) && sqrt(-1.0h) == _uniforms.expected.x) && all(sqrt(half2(-1.0h, -4.0h)) == _uniforms.expected.xy)) && all(sqrt(half3(-1.0h, -4.0h, -16.0h)) == _uniforms.expected.xyz)) && all(sqrt(negativeVal) == _uniforms.expected) ? _uniforms.colorGreen : _uniforms.colorRed;
+    const float4 negativeVal = float4(-1.0, -4.0, -16.0, -64.0);
+    coords = sqrt(negativeVal).xy;
+    float4 inputVal = float4(half4(float4_from_float2x2(_uniforms.testMatrix2x2)) + half4(0.0h, 2.0h, 6.0h, 12.0h));
+    const float4 expected = float4(1.0, 2.0, 3.0, 4.0);
+    _out.sk_FragColor = ((sqrt(inputVal.x) == 1.0 && all(sqrt(inputVal.xy) == float2(1.0, 2.0))) && all(sqrt(inputVal.xyz) == float3(1.0, 2.0, 3.0))) && all(sqrt(inputVal) == expected) ? _uniforms.colorGreen : _uniforms.colorRed;
     return _out;
 }
diff --git a/tests/sksl/intrinsics/Sqrt.skrp b/tests/sksl/intrinsics/Sqrt.skrp
index 3ef3d71..5f35518 100644
--- a/tests/sksl/intrinsics/Sqrt.skrp
+++ b/tests/sksl/intrinsics/Sqrt.skrp
@@ -1,4 +1,65 @@
-### Compilation failed:
-
-error: code is not supported
-1 error
+    1. store_src_rg                   coords = src.rg
+    2. init_lane_masks                CondMask = LoopMask = RetMask = true
+    3. copy_constant                  $0 = 0xBF800000 (-1.0)
+    4. copy_constant                  $1 = 0xC0800000 (-4.0)
+    5. copy_constant                  $2 = 0xC1800000 (-16.0)
+    6. copy_constant                  $3 = 0xC2800000 (-64.0)
+    7. copy_4_slots_unmasked          negativeVal = $0..3
+    8. sqrt_float                     $0 = sqrt($0)
+    9. sqrt_float                     $1 = sqrt($1)
+   10. sqrt_float                     $2 = sqrt($2)
+   11. sqrt_float                     $3 = sqrt($3)
+   12. copy_2_slots_masked            coords = Mask($0..1)
+   13. copy_4_constants               $0..3 = testMatrix2x2
+   14. zero_slot_unmasked             $4 = 0
+   15. copy_constant                  $5 = 0x40000000 (2.0)
+   16. copy_constant                  $6 = 0x40C00000 (6.0)
+   17. copy_constant                  $7 = 0x41400000 (12.0)
+   18. add_4_floats                   $0..3 += $4..7
+   19. copy_4_slots_unmasked          inputVal = $0..3
+   20. copy_constant                  $0 = 0x3F800000 (1.0)
+   21. copy_constant                  $1 = 0x40000000 (2.0)
+   22. copy_constant                  $2 = 0x40400000 (3.0)
+   23. copy_constant                  $3 = 0x40800000 (4.0)
+   24. copy_4_slots_unmasked          expected = $0..3
+   25. store_condition_mask           $8 = CondMask
+   26. copy_slot_unmasked             $9 = inputVal(0)
+   27. sqrt_float                     $9 = sqrt($9)
+   28. copy_constant                  $10 = 0x3F800000 (1.0)
+   29. cmpeq_float                    $9 = equal($9, $10)
+   30. copy_2_slots_unmasked          $10..11 = inputVal(0..1)
+   31. sqrt_float                     $10 = sqrt($10)
+   32. sqrt_float                     $11 = sqrt($11)
+   33. copy_constant                  $12 = 0x3F800000 (1.0)
+   34. copy_constant                  $13 = 0x40000000 (2.0)
+   35. cmpeq_2_floats                 $10..11 = equal($10..11, $12..13)
+   36. bitwise_and_int                $10 &= $11
+   37. bitwise_and_int                $9 &= $10
+   38. copy_3_slots_unmasked          $10..12 = inputVal(0..2)
+   39. sqrt_float                     $10 = sqrt($10)
+   40. sqrt_float                     $11 = sqrt($11)
+   41. sqrt_float                     $12 = sqrt($12)
+   42. copy_constant                  $13 = 0x3F800000 (1.0)
+   43. copy_constant                  $14 = 0x40000000 (2.0)
+   44. copy_constant                  $15 = 0x40400000 (3.0)
+   45. cmpeq_3_floats                 $10..12 = equal($10..12, $13..15)
+   46. bitwise_and_int                $11 &= $12
+   47. bitwise_and_int                $10 &= $11
+   48. bitwise_and_int                $9 &= $10
+   49. copy_4_slots_unmasked          $10..13 = inputVal
+   50. sqrt_float                     $10 = sqrt($10)
+   51. sqrt_float                     $11 = sqrt($11)
+   52. sqrt_float                     $12 = sqrt($12)
+   53. sqrt_float                     $13 = sqrt($13)
+   54. copy_4_slots_unmasked          $14..17 = expected
+   55. cmpeq_4_floats                 $10..13 = equal($10..13, $14..17)
+   56. bitwise_and_2_ints             $10..11 &= $12..13
+   57. bitwise_and_int                $10 &= $11
+   58. bitwise_and_int                $9 &= $10
+   59. copy_4_constants               $0..3 = colorRed
+   60. merge_condition_mask           CondMask = $8 & $9
+   61. copy_4_constants               $4..7 = colorGreen
+   62. copy_4_slots_masked            $0..3 = Mask($4..7)
+   63. load_condition_mask            CondMask = $8
+   64. copy_4_slots_masked            [main].result = Mask($0..3)
+   65. load_src                       src.rgba = [main].result