Use the fixed slot-range when pushing and storing lvalues.

Rather than breaking up lvalue writes into multiple single-slot
pushes and stores, we now write the entire range in one go.
Previously, we would rely on the peephole optimizer to fuse these
reads and writes, but this didn't work when unsliceable swizzles
were involved.

Change-Id: I2313d5dc226e7d83452f41af3dfa874b2166e9c2
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/647042
Auto-Submit: John Stiles <johnstiles@google.com>
Reviewed-by: Arman Uguray <armansito@google.com>
Commit-Queue: Arman Uguray <armansito@google.com>
diff --git a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
index 5ffbb1b..38e27ad 100644
--- a/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
+++ b/src/sksl/codegen/SkSLRasterPipelineCodeGenerator.cpp
@@ -479,24 +479,19 @@
     /** Returns true if this lvalue is actually writable--temporaries and uniforms are not. */
     virtual bool isWritable() const = 0;
 
-    /** Returns the number of slots in this LValue. */
-    virtual int numSlots() const = 0;
-
-     /**
-      * Returns the slot range of the lvalue, after it is winnowed down to the selected field/index.
-      * The range is calculated assuming every dynamic index will evaluate to zero.
-      */
-     virtual SlotRange fixedSlotRange(Generator* gen) = 0;
-
-    /** Pushes a single slot directly onto the stack. */
-    [[nodiscard]] virtual bool push(Generator* gen, Slot slot) = 0;
-
     /**
-     * Stores a single stack value directly into the lvalue. The value on the stack covers
-     * `numSlots` in total, and we are taking the value from the `index`th stack position in range
-     * [0, numSlots).
+     * Returns the slot range of the lvalue, after it is winnowed down to the selected field/index.
+     * The range is calculated assuming every dynamic index will evaluate to zero.
      */
-    virtual void store(Generator* gen, Slot slot, int index, int numSlots) = 0;
+    virtual SlotRange fixedSlotRange(Generator* gen) = 0;
+
+    /** Pushes values directly onto the stack. */
+    [[nodiscard]] virtual bool push(Generator* gen,
+                                    SlotRange fixedOffset,
+                                    SkSpan<const int8_t> swizzle) = 0;
+
+    /** Stores topmost values from the stack directly into the lvalue. */
+    virtual void store(Generator* gen, SlotRange fixedOffset, SkSpan<const int8_t> swizzle) = 0;
 };
 
 class ScratchLValue final : public LValue {
@@ -518,15 +513,13 @@
         return false;
     }
 
-    int numSlots() const override {
-        return fNumSlots;
-    }
-
     SlotRange fixedSlotRange(Generator* gen) override {
         return SlotRange{0, fNumSlots};
     }
 
-    [[nodiscard]] bool push(Generator* gen, Slot slot) override {
+    [[nodiscard]] bool push(Generator* gen,
+                            SlotRange fixedOffset,
+                            SkSpan<const int8_t> swizzle) override {
         if (!fDedicatedStack.has_value()) {
             // Push the scratch expression onto a dedicated stack.
             fGenerator = gen;
@@ -538,12 +531,15 @@
             fDedicatedStack->exit();
         }
 
-        SkASSERT(slot >= 0 && slot < fNumSlots);
-        fDedicatedStack->pushClone(1, fNumSlots - slot - 1);
+        fDedicatedStack->pushClone(fixedOffset.count,
+                                   fNumSlots - fixedOffset.count - fixedOffset.index);
+        if (!swizzle.empty()) {
+            gen->builder()->swizzle(fixedOffset.count, swizzle);
+        }
         return true;
     }
 
-    void store(Generator*, Slot, int, int) override {
+    void store(Generator*, SlotRange, SkSpan<const int8_t>) override {
         SkDEBUGFAIL("scratch lvalues cannot be stored into");
     }
 
@@ -567,27 +563,28 @@
                                                 : gen->getVariableSlots(*fVariable);
     }
 
-    int numSlots() const override {
-        return fVariable->type().slotCount();
-    }
-
-    [[nodiscard]] bool push(Generator* gen, Slot slot) override {
+    [[nodiscard]] bool push(Generator* gen,
+                            SlotRange fixedOffset,
+                            SkSpan<const int8_t> swizzle) override {
         if (Generator::IsUniform(*fVariable)) {
-            SlotRange range = gen->getUniformSlots(*fVariable);
-            gen->builder()->push_uniform(SlotRange{range.index + slot, 1});
+            gen->builder()->push_uniform(fixedOffset);
         } else {
-            SlotRange range = gen->getVariableSlots(*fVariable);
-            gen->builder()->push_slots(SlotRange{range.index + slot, 1});
+            gen->builder()->push_slots(fixedOffset);
+        }
+        if (!swizzle.empty()) {
+            gen->builder()->swizzle(fixedOffset.count, swizzle);
         }
         return true;
     }
 
-    void store(Generator* gen, Slot slot, int index, int numSlots) override {
+    void store(Generator* gen, SlotRange fixedOffset, SkSpan<const int8_t> swizzle) override {
         SkASSERT(!Generator::IsUniform(*fVariable));
 
-        SlotRange range = gen->getVariableSlots(*fVariable);
-        int offsetFromStackTop = numSlots - index;
-        gen->builder()->copy_stack_to_slots(SlotRange{range.index + slot, 1}, offsetFromStackTop);
+        if (swizzle.empty()) {
+            gen->builder()->copy_stack_to_slots(fixedOffset, fixedOffset.count);
+        } else {
+            gen->builder()->swizzle_copy_stack_to_slots(fixedOffset, swizzle, swizzle.size());
+        }
     }
 
 private:
@@ -610,16 +607,21 @@
         return fParent->fixedSlotRange(gen);
     }
 
-    int numSlots() const override {
-        return fComponents.size();
+    [[nodiscard]] bool push(Generator* gen,
+                            SlotRange fixedOffset,
+                            SkSpan<const int8_t> swizzle) override {
+        if (!swizzle.empty()) {
+            SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
+            return unsupported();
+        }
+        return fParent->push(gen, fixedOffset, fComponents);
     }
 
-    [[nodiscard]] bool push(Generator* gen, Slot slot) override {
-        return fParent->push(gen, fComponents[slot]);
-    }
-
-    void store(Generator* gen, Slot slot, int index, int numSlots) override {
-        fParent->store(gen, fComponents[slot], index, numSlots);
+    void store(Generator* gen, SlotRange fixedOffset, SkSpan<const int8_t> swizzle) override {
+        if (!swizzle.empty()) {
+            SkDEBUGFAIL("swizzle-of-a-swizzle should have been folded out in front end");
+        }
+        fParent->store(gen, fixedOffset, fComponents);
     }
 
 private:
@@ -650,16 +652,14 @@
         return adjusted;
     }
 
-    int numSlots() const override {
-        return fNumSlots;
+    [[nodiscard]] bool push(Generator* gen,
+                            SlotRange fixedOffset,
+                            SkSpan<const int8_t> swizzle) override {
+        return fParent->push(gen, fixedOffset, swizzle);
     }
 
-    [[nodiscard]] bool push(Generator* gen, Slot slot) override {
-        return fParent->push(gen, slot + fInitialSlot);
-    }
-
-    void store(Generator* gen, Slot slot, int index, int numSlots) override {
-        fParent->store(gen, slot + fInitialSlot, index, numSlots);
+    void store(Generator* gen, SlotRange fixedOffset, SkSpan<const int8_t> swizzle) override {
+        fParent->store(gen, fixedOffset, swizzle);
     }
 
 protected:
@@ -891,25 +891,11 @@
 
 void Generator::store(LValue& lvalue) {
     SkASSERT(lvalue.isWritable());
-
-    // Copy our slots from the stack into their slots. The Builder will coalesce single-slot pushes
-    // into contiguous ranges where possible.
-    int numSlots = lvalue.numSlots();
-    for (Slot slot = 0; slot < numSlots; ++slot) {
-        lvalue.store(this, slot, /*index=*/(int)slot, numSlots);
-    }
+    return lvalue.store(this, lvalue.fixedSlotRange(this), /*swizzle=*/{});
 }
 
 bool Generator::push(LValue& lvalue) {
-    // Push our slots onto the stack one-by-one. The Builder will coalesce single-slot pushes into
-    // contiguous ranges where possible.
-    int numSlots = lvalue.numSlots();
-    for (Slot slot = 0; slot < numSlots; ++slot) {
-        if (!lvalue.push(this, slot)) {
-            return unsupported();
-        }
-    }
-    return true;
+    return lvalue.push(this, lvalue.fixedSlotRange(this), /*swizzle=*/{});
 }
 
 int Generator::getFunctionDebugInfo(const FunctionDeclaration& decl) {
diff --git a/tests/sksl/folding/VectorScalarFolding.skrp b/tests/sksl/folding/VectorScalarFolding.skrp
index 4258bed..2137d66 100644
--- a/tests/sksl/folding/VectorScalarFolding.skrp
+++ b/tests/sksl/folding/VectorScalarFolding.skrp
@@ -373,468 +373,464 @@
   373. copy_slot_unmasked             $0 = _2_unknown
   374. swizzle_4                      $0..3 = ($0..3).xxxx
   375. copy_4_slots_unmasked          _1_x = $0..3
-  376. copy_4_slots_unmasked          $0..3 = _1_x
-  377. copy_constant                  $4 = 0x3F800000 (1.0)
-  378. swizzle_4                      $4..7 = ($4..7).xxxx
-  379. add_4_floats                   $0..3 += $4..7
-  380. copy_4_slots_unmasked          _1_x = $0..3
-  381. copy_4_slots_unmasked          $0..3 = _1_x
-  382. copy_constant                  $4 = 0x3F800000 (1.0)
-  383. swizzle_4                      $4..7 = ($4..7).xxxx
-  384. sub_4_floats                   $0..3 -= $4..7
-  385. copy_4_slots_unmasked          _1_x = $0..3
-  386. copy_4_slots_unmasked          $0..3 = _0_ok, _1_x(0..2)
-  387. copy_2_slots_unmasked          $4..5 = _1_x(3), _2_unknown
-  388. swizzle_4                      $5..8 = ($5..8).xxxx
-  389. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  390. bitwise_and_2_ints             $1..2 &= $3..4
-  391. bitwise_and_int                $1 &= $2
-  392. bitwise_and_int                $0 &= $1
-  393. copy_slot_unmasked             _0_ok = $0
-  394. copy_slot_unmasked             $0 = _2_unknown
-  395. swizzle_4                      $0..3 = ($0..3).xxxx
-  396. copy_4_slots_unmasked          _1_x = $0..3
-  397. copy_constant                  $4 = 0x3F800000 (1.0)
-  398. swizzle_4                      $4..7 = ($4..7).xxxx
-  399. add_4_floats                   $0..3 += $4..7
-  400. copy_4_slots_unmasked          _1_x = $0..3
-  401. copy_constant                  $4 = 0x3F800000 (1.0)
-  402. swizzle_4                      $4..7 = ($4..7).xxxx
-  403. sub_4_floats                   $0..3 -= $4..7
-  404. copy_4_slots_unmasked          _1_x = $0..3
-  405. copy_4_slots_unmasked          $0..3 = _0_ok, _1_x(0..2)
-  406. copy_2_slots_unmasked          $4..5 = _1_x(3), _2_unknown
-  407. swizzle_4                      $5..8 = ($5..8).xxxx
-  408. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  409. bitwise_and_2_ints             $1..2 &= $3..4
-  410. bitwise_and_int                $1 &= $2
-  411. bitwise_and_int                $0 &= $1
-  412. copy_slot_unmasked             _0_ok = $0
-  413. store_condition_mask           $12 = CondMask
-  414. copy_slot_unmasked             $13 = _0_ok
-  415. zero_slot_unmasked             $0 = 0
-  416. merge_condition_mask           CondMask = $12 & $13
-  417. branch_if_no_active_lanes      branch_if_no_active_lanes +416 (label 1 at #833)
-  418. copy_constant                  ok = 0xFFFFFFFF
-  419. copy_constant                  x(0) = 0x00000006 (8.407791e-45)
-  420. copy_constant                  x(1) = 0x00000006 (8.407791e-45)
-  421. copy_constant                  x(2) = 0x00000007 (9.809089e-45)
-  422. copy_constant                  x(3) = 0x00000008 (1.121039e-44)
-  423. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  424. copy_slot_unmasked             $5 = x(3)
-  425. copy_constant                  $6 = 0x00000006 (8.407791e-45)
-  426. copy_constant                  $7 = 0x00000006 (8.407791e-45)
-  427. copy_constant                  $8 = 0x00000007 (9.809089e-45)
-  428. copy_constant                  $9 = 0x00000008 (1.121039e-44)
-  429. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  430. bitwise_and_2_ints             $2..3 &= $4..5
-  431. bitwise_and_int                $2 &= $3
-  432. bitwise_and_int                $1 &= $2
-  433. copy_slot_masked               ok = Mask($1)
-  434. copy_constant                  $1 = 0x00000007 (9.809089e-45)
-  435. copy_constant                  $2 = 0x00000009 (1.261169e-44)
-  436. copy_constant                  $3 = 0x00000009 (1.261169e-44)
-  437. copy_constant                  $4 = 0x00000009 (1.261169e-44)
-  438. copy_4_slots_masked            x = Mask($1..4)
-  439. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  440. copy_slot_unmasked             $5 = x(3)
-  441. copy_constant                  $6 = 0x00000007 (9.809089e-45)
-  442. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  443. copy_constant                  $8 = 0x00000009 (1.261169e-44)
-  444. copy_constant                  $9 = 0x00000009 (1.261169e-44)
-  445. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  446. bitwise_and_2_ints             $2..3 &= $4..5
-  447. bitwise_and_int                $2 &= $3
-  448. bitwise_and_int                $1 &= $2
-  449. copy_slot_masked               ok = Mask($1)
-  450. copy_constant                  $1 = 0x00000009 (1.261169e-44)
-  451. copy_constant                  $2 = 0x00000009 (1.261169e-44)
-  452. copy_constant                  $3 = 0x0000000A (1.401298e-44)
-  453. copy_constant                  $4 = 0x0000000A (1.401298e-44)
-  454. copy_4_slots_masked            x = Mask($1..4)
-  455. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  456. copy_slot_unmasked             $5 = x(3)
-  457. copy_constant                  $6 = 0x00000009 (1.261169e-44)
-  458. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  459. copy_constant                  $8 = 0x0000000A (1.401298e-44)
-  460. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  461. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  462. bitwise_and_2_ints             $2..3 &= $4..5
-  463. bitwise_and_int                $2 &= $3
-  464. bitwise_and_int                $1 &= $2
-  465. copy_slot_masked               ok = Mask($1)
-  466. copy_constant                  $1 = 0x00000006 (8.407791e-45)
-  467. swizzle_3                      $1..3 = ($1..3).xxx
-  468. copy_3_slots_masked            x(0..2) = Mask($1..3)
-  469. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  470. copy_slot_unmasked             $5 = x(3)
-  471. copy_constant                  $6 = 0x00000006 (8.407791e-45)
-  472. copy_constant                  $7 = 0x00000006 (8.407791e-45)
-  473. copy_constant                  $8 = 0x00000006 (8.407791e-45)
-  474. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  475. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  476. bitwise_and_2_ints             $2..3 &= $4..5
-  477. bitwise_and_int                $2 &= $3
-  478. bitwise_and_int                $1 &= $2
-  479. copy_slot_masked               ok = Mask($1)
-  480. copy_constant                  $1 = 0x00000003 (4.203895e-45)
-  481. copy_slot_unmasked             $2 = $1
-  482. copy_2_slots_masked            x(0..1) = Mask($1..2)
-  483. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  484. copy_slot_unmasked             $5 = x(3)
-  485. copy_constant                  $6 = 0x00000003 (4.203895e-45)
-  486. copy_constant                  $7 = 0x00000003 (4.203895e-45)
-  487. copy_constant                  $8 = 0x00000006 (8.407791e-45)
-  488. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  489. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  490. bitwise_and_2_ints             $2..3 &= $4..5
-  491. bitwise_and_int                $2 &= $3
-  492. bitwise_and_int                $1 &= $2
-  493. copy_slot_masked               ok = Mask($1)
-  494. copy_constant                  $1 = 0x00000006 (8.407791e-45)
-  495. swizzle_4                      $1..4 = ($1..4).xxxx
-  496. copy_4_slots_masked            x = Mask($1..4)
-  497. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  498. copy_slot_unmasked             $5 = x(3)
-  499. copy_constant                  $6 = 0x00000006 (8.407791e-45)
-  500. swizzle_4                      $6..9 = ($6..9).xxxx
-  501. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  376. copy_constant                  $4 = 0x3F800000 (1.0)
+  377. swizzle_4                      $4..7 = ($4..7).xxxx
+  378. add_4_floats                   $0..3 += $4..7
+  379. copy_4_slots_unmasked          _1_x = $0..3
+  380. copy_constant                  $4 = 0x3F800000 (1.0)
+  381. swizzle_4                      $4..7 = ($4..7).xxxx
+  382. sub_4_floats                   $0..3 -= $4..7
+  383. copy_4_slots_unmasked          _1_x = $0..3
+  384. copy_4_slots_unmasked          $0..3 = _0_ok, _1_x(0..2)
+  385. copy_2_slots_unmasked          $4..5 = _1_x(3), _2_unknown
+  386. swizzle_4                      $5..8 = ($5..8).xxxx
+  387. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  388. bitwise_and_2_ints             $1..2 &= $3..4
+  389. bitwise_and_int                $1 &= $2
+  390. bitwise_and_int                $0 &= $1
+  391. copy_slot_unmasked             _0_ok = $0
+  392. copy_slot_unmasked             $0 = _2_unknown
+  393. swizzle_4                      $0..3 = ($0..3).xxxx
+  394. copy_4_slots_unmasked          _1_x = $0..3
+  395. copy_constant                  $4 = 0x3F800000 (1.0)
+  396. swizzle_4                      $4..7 = ($4..7).xxxx
+  397. add_4_floats                   $0..3 += $4..7
+  398. copy_4_slots_unmasked          _1_x = $0..3
+  399. copy_constant                  $4 = 0x3F800000 (1.0)
+  400. swizzle_4                      $4..7 = ($4..7).xxxx
+  401. sub_4_floats                   $0..3 -= $4..7
+  402. copy_4_slots_unmasked          _1_x = $0..3
+  403. copy_4_slots_unmasked          $0..3 = _0_ok, _1_x(0..2)
+  404. copy_2_slots_unmasked          $4..5 = _1_x(3), _2_unknown
+  405. swizzle_4                      $5..8 = ($5..8).xxxx
+  406. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  407. bitwise_and_2_ints             $1..2 &= $3..4
+  408. bitwise_and_int                $1 &= $2
+  409. bitwise_and_int                $0 &= $1
+  410. copy_slot_unmasked             _0_ok = $0
+  411. store_condition_mask           $12 = CondMask
+  412. copy_slot_unmasked             $13 = _0_ok
+  413. zero_slot_unmasked             $0 = 0
+  414. merge_condition_mask           CondMask = $12 & $13
+  415. branch_if_no_active_lanes      branch_if_no_active_lanes +414 (label 1 at #829)
+  416. copy_constant                  ok = 0xFFFFFFFF
+  417. copy_constant                  x(0) = 0x00000006 (8.407791e-45)
+  418. copy_constant                  x(1) = 0x00000006 (8.407791e-45)
+  419. copy_constant                  x(2) = 0x00000007 (9.809089e-45)
+  420. copy_constant                  x(3) = 0x00000008 (1.121039e-44)
+  421. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  422. copy_slot_unmasked             $5 = x(3)
+  423. copy_constant                  $6 = 0x00000006 (8.407791e-45)
+  424. copy_constant                  $7 = 0x00000006 (8.407791e-45)
+  425. copy_constant                  $8 = 0x00000007 (9.809089e-45)
+  426. copy_constant                  $9 = 0x00000008 (1.121039e-44)
+  427. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  428. bitwise_and_2_ints             $2..3 &= $4..5
+  429. bitwise_and_int                $2 &= $3
+  430. bitwise_and_int                $1 &= $2
+  431. copy_slot_masked               ok = Mask($1)
+  432. copy_constant                  $1 = 0x00000007 (9.809089e-45)
+  433. copy_constant                  $2 = 0x00000009 (1.261169e-44)
+  434. copy_constant                  $3 = 0x00000009 (1.261169e-44)
+  435. copy_constant                  $4 = 0x00000009 (1.261169e-44)
+  436. copy_4_slots_masked            x = Mask($1..4)
+  437. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  438. copy_slot_unmasked             $5 = x(3)
+  439. copy_constant                  $6 = 0x00000007 (9.809089e-45)
+  440. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  441. copy_constant                  $8 = 0x00000009 (1.261169e-44)
+  442. copy_constant                  $9 = 0x00000009 (1.261169e-44)
+  443. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  444. bitwise_and_2_ints             $2..3 &= $4..5
+  445. bitwise_and_int                $2 &= $3
+  446. bitwise_and_int                $1 &= $2
+  447. copy_slot_masked               ok = Mask($1)
+  448. copy_constant                  $1 = 0x00000009 (1.261169e-44)
+  449. copy_constant                  $2 = 0x00000009 (1.261169e-44)
+  450. copy_constant                  $3 = 0x0000000A (1.401298e-44)
+  451. copy_constant                  $4 = 0x0000000A (1.401298e-44)
+  452. copy_4_slots_masked            x = Mask($1..4)
+  453. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  454. copy_slot_unmasked             $5 = x(3)
+  455. copy_constant                  $6 = 0x00000009 (1.261169e-44)
+  456. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  457. copy_constant                  $8 = 0x0000000A (1.401298e-44)
+  458. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  459. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  460. bitwise_and_2_ints             $2..3 &= $4..5
+  461. bitwise_and_int                $2 &= $3
+  462. bitwise_and_int                $1 &= $2
+  463. copy_slot_masked               ok = Mask($1)
+  464. copy_constant                  $1 = 0x00000006 (8.407791e-45)
+  465. swizzle_3                      $1..3 = ($1..3).xxx
+  466. copy_3_slots_masked            x(0..2) = Mask($1..3)
+  467. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  468. copy_slot_unmasked             $5 = x(3)
+  469. copy_constant                  $6 = 0x00000006 (8.407791e-45)
+  470. copy_constant                  $7 = 0x00000006 (8.407791e-45)
+  471. copy_constant                  $8 = 0x00000006 (8.407791e-45)
+  472. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  473. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  474. bitwise_and_2_ints             $2..3 &= $4..5
+  475. bitwise_and_int                $2 &= $3
+  476. bitwise_and_int                $1 &= $2
+  477. copy_slot_masked               ok = Mask($1)
+  478. copy_constant                  $1 = 0x00000003 (4.203895e-45)
+  479. copy_slot_unmasked             $2 = $1
+  480. copy_2_slots_masked            x(0..1) = Mask($1..2)
+  481. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  482. copy_slot_unmasked             $5 = x(3)
+  483. copy_constant                  $6 = 0x00000003 (4.203895e-45)
+  484. copy_constant                  $7 = 0x00000003 (4.203895e-45)
+  485. copy_constant                  $8 = 0x00000006 (8.407791e-45)
+  486. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  487. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  488. bitwise_and_2_ints             $2..3 &= $4..5
+  489. bitwise_and_int                $2 &= $3
+  490. bitwise_and_int                $1 &= $2
+  491. copy_slot_masked               ok = Mask($1)
+  492. copy_constant                  $1 = 0x00000006 (8.407791e-45)
+  493. swizzle_4                      $1..4 = ($1..4).xxxx
+  494. copy_4_slots_masked            x = Mask($1..4)
+  495. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  496. copy_slot_unmasked             $5 = x(3)
+  497. copy_constant                  $6 = 0x00000006 (8.407791e-45)
+  498. swizzle_4                      $6..9 = ($6..9).xxxx
+  499. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  500. bitwise_and_2_ints             $2..3 &= $4..5
+  501. bitwise_and_int                $2 &= $3
   502. stack_rewind
-  503. bitwise_and_2_ints             $2..3 &= $4..5
-  504. bitwise_and_int                $2 &= $3
-  505. bitwise_and_int                $1 &= $2
-  506. copy_slot_masked               ok = Mask($1)
-  507. copy_constant                  $1 = 0x00000006 (8.407791e-45)
-  508. copy_constant                  $2 = 0x00000006 (8.407791e-45)
-  509. copy_constant                  $3 = 0x00000007 (9.809089e-45)
-  510. copy_constant                  $4 = 0x00000008 (1.121039e-44)
-  511. copy_4_slots_masked            x = Mask($1..4)
-  512. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  513. copy_slot_unmasked             $5 = x(3)
-  514. copy_constant                  $6 = 0x00000006 (8.407791e-45)
-  515. copy_constant                  $7 = 0x00000006 (8.407791e-45)
-  516. copy_constant                  $8 = 0x00000007 (9.809089e-45)
-  517. copy_constant                  $9 = 0x00000008 (1.121039e-44)
-  518. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  519. bitwise_and_2_ints             $2..3 &= $4..5
-  520. bitwise_and_int                $2 &= $3
-  521. bitwise_and_int                $1 &= $2
-  522. copy_slot_masked               ok = Mask($1)
-  523. copy_constant                  $1 = 0xFFFFFFF9
-  524. copy_constant                  $2 = 0xFFFFFFF7
-  525. copy_constant                  $3 = 0xFFFFFFF7
-  526. copy_constant                  $4 = 0xFFFFFFF7
-  527. copy_4_slots_masked            x = Mask($1..4)
-  528. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  529. copy_slot_unmasked             $5 = x(3)
-  530. copy_constant                  $6 = 0xFFFFFFF9
-  531. copy_constant                  $7 = 0xFFFFFFF7
-  532. copy_constant                  $8 = 0xFFFFFFF7
-  533. copy_constant                  $9 = 0xFFFFFFF7
-  534. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  535. bitwise_and_2_ints             $2..3 &= $4..5
-  536. bitwise_and_int                $2 &= $3
-  537. bitwise_and_int                $1 &= $2
-  538. copy_slot_masked               ok = Mask($1)
-  539. copy_constant                  $1 = 0x00000009 (1.261169e-44)
-  540. copy_constant                  $2 = 0x00000009 (1.261169e-44)
-  541. copy_constant                  $3 = 0x0000000A (1.401298e-44)
-  542. copy_constant                  $4 = 0x0000000A (1.401298e-44)
-  543. copy_4_slots_masked            x = Mask($1..4)
-  544. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  545. copy_slot_unmasked             $5 = x(3)
-  546. copy_constant                  $6 = 0x00000009 (1.261169e-44)
-  547. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  548. copy_constant                  $8 = 0x0000000A (1.401298e-44)
-  549. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  550. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  551. bitwise_and_2_ints             $2..3 &= $4..5
-  552. bitwise_and_int                $2 &= $3
-  553. bitwise_and_int                $1 &= $2
-  554. copy_slot_masked               ok = Mask($1)
-  555. copy_constant                  $1 = 0x00000006 (8.407791e-45)
-  556. swizzle_3                      $1..3 = ($1..3).xxx
-  557. copy_3_slots_masked            x(0..2) = Mask($1..3)
-  558. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  559. copy_slot_unmasked             $5 = x(3)
-  560. copy_constant                  $6 = 0x00000006 (8.407791e-45)
-  561. copy_constant                  $7 = 0x00000006 (8.407791e-45)
-  562. copy_constant                  $8 = 0x00000006 (8.407791e-45)
-  563. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  564. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  565. bitwise_and_2_ints             $2..3 &= $4..5
-  566. bitwise_and_int                $2 &= $3
-  567. bitwise_and_int                $1 &= $2
-  568. copy_slot_masked               ok = Mask($1)
-  569. copy_constant                  $1 = 0x00000008 (1.121039e-44)
-  570. copy_slot_unmasked             $2 = $1
-  571. copy_2_slots_masked            x(0..1) = Mask($1..2)
-  572. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  573. copy_slot_unmasked             $5 = x(3)
-  574. copy_constant                  $6 = 0x00000008 (1.121039e-44)
-  575. copy_constant                  $7 = 0x00000008 (1.121039e-44)
-  576. copy_constant                  $8 = 0x00000006 (8.407791e-45)
-  577. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  578. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  579. bitwise_and_2_ints             $2..3 &= $4..5
-  580. bitwise_and_int                $2 &= $3
-  581. bitwise_and_int                $1 &= $2
-  582. copy_slot_masked               ok = Mask($1)
-  583. copy_constant                  $1 = 0x000000C8 (2.802597e-43)
-  584. copy_constant                  $2 = 0x00000064 (1.401298e-43)
-  585. copy_constant                  $3 = 0x00000032 (7.006492e-44)
-  586. copy_constant                  $4 = 0x00000019 (3.503246e-44)
-  587. copy_4_slots_masked            x = Mask($1..4)
-  588. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  589. copy_slot_unmasked             $5 = x(3)
-  590. copy_constant                  $6 = 0x000000C8 (2.802597e-43)
-  591. copy_constant                  $7 = 0x00000064 (1.401298e-43)
-  592. copy_constant                  $8 = 0x00000032 (7.006492e-44)
-  593. copy_constant                  $9 = 0x00000019 (3.503246e-44)
-  594. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  595. bitwise_and_2_ints             $2..3 &= $4..5
-  596. bitwise_and_int                $2 &= $3
-  597. bitwise_and_int                $1 &= $2
-  598. copy_slot_masked               ok = Mask($1)
-  599. copy_constant                  $1 = 0x00000006 (8.407791e-45)
-  600. swizzle_4                      $1..4 = ($1..4).xxxx
-  601. copy_4_slots_masked            x = Mask($1..4)
-  602. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  603. copy_slot_unmasked             $5 = x(3)
-  604. copy_constant                  $6 = 0x00000006 (8.407791e-45)
-  605. swizzle_4                      $6..9 = ($6..9).xxxx
-  606. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  607. bitwise_and_2_ints             $2..3 &= $4..5
-  608. bitwise_and_int                $2 &= $3
-  609. bitwise_and_int                $1 &= $2
-  610. copy_slot_masked               ok = Mask($1)
-  611. copy_constant                  $1 = unknownInput
-  612. cast_to_int_from_float         $1 = FloatToInt($1)
-  613. copy_slot_unmasked             unknown = $1
-  614. swizzle_4                      $1..4 = ($1..4).xxxx
-  615. copy_4_slots_masked            x = Mask($1..4)
-  616. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  617. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  618. swizzle_4                      $6..9 = ($6..9).xxxx
-  619. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  620. bitwise_and_2_ints             $2..3 &= $4..5
-  621. bitwise_and_int                $2 &= $3
-  622. bitwise_and_int                $1 &= $2
-  623. copy_slot_masked               ok = Mask($1)
-  624. zero_4_slots_unmasked          $1..4 = 0
-  625. copy_4_slots_masked            x = Mask($1..4)
-  626. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  627. copy_slot_unmasked             $5 = x(3)
-  628. zero_4_slots_unmasked          $6..9 = 0
-  629. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  630. bitwise_and_2_ints             $2..3 &= $4..5
-  631. bitwise_and_int                $2 &= $3
-  632. bitwise_and_int                $1 &= $2
-  633. copy_slot_masked               ok = Mask($1)
-  634. zero_4_slots_unmasked          $1..4 = 0
-  635. copy_slot_unmasked             $5 = unknown
-  636. swizzle_4                      $5..8 = ($5..8).xxxx
-  637. div_4_ints                     $1..4 /= $5..8
-  638. copy_4_slots_masked            x = Mask($1..4)
-  639. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  640. copy_slot_unmasked             $5 = x(3)
-  641. zero_4_slots_unmasked          $6..9 = 0
-  642. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  643. bitwise_and_2_ints             $2..3 &= $4..5
-  644. bitwise_and_int                $2 &= $3
-  645. bitwise_and_int                $1 &= $2
-  646. copy_slot_masked               ok = Mask($1)
-  647. copy_slot_unmasked             $1 = unknown
-  648. swizzle_4                      $1..4 = ($1..4).xxxx
-  649. copy_4_slots_masked            x = Mask($1..4)
-  650. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  651. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  652. swizzle_4                      $6..9 = ($6..9).xxxx
-  653. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  654. bitwise_and_2_ints             $2..3 &= $4..5
-  655. bitwise_and_int                $2 &= $3
-  656. bitwise_and_int                $1 &= $2
-  657. copy_slot_masked               ok = Mask($1)
-  658. copy_slot_unmasked             $1 = unknown
-  659. swizzle_4                      $1..4 = ($1..4).xxxx
-  660. copy_4_slots_masked            x = Mask($1..4)
-  661. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  662. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  663. swizzle_4                      $6..9 = ($6..9).xxxx
-  664. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  665. bitwise_and_2_ints             $2..3 &= $4..5
-  666. bitwise_and_int                $2 &= $3
-  667. bitwise_and_int                $1 &= $2
-  668. copy_slot_masked               ok = Mask($1)
-  669. copy_slot_unmasked             $1 = unknown
-  670. swizzle_4                      $1..4 = ($1..4).xxxx
-  671. copy_4_slots_masked            x = Mask($1..4)
-  672. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  673. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  674. swizzle_4                      $6..9 = ($6..9).xxxx
-  675. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  676. bitwise_and_2_ints             $2..3 &= $4..5
-  677. bitwise_and_int                $2 &= $3
-  678. bitwise_and_int                $1 &= $2
-  679. copy_slot_masked               ok = Mask($1)
-  680. copy_slot_unmasked             $1 = unknown
-  681. swizzle_4                      $1..4 = ($1..4).xxxx
-  682. copy_4_slots_masked            x = Mask($1..4)
-  683. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  684. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  685. swizzle_4                      $6..9 = ($6..9).xxxx
-  686. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  687. bitwise_and_2_ints             $2..3 &= $4..5
-  688. bitwise_and_int                $2 &= $3
-  689. bitwise_and_int                $1 &= $2
-  690. copy_slot_masked               ok = Mask($1)
-  691. copy_slot_unmasked             $1 = unknown
-  692. swizzle_4                      $1..4 = ($1..4).xxxx
-  693. copy_4_slots_masked            x = Mask($1..4)
-  694. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  695. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  696. swizzle_4                      $6..9 = ($6..9).xxxx
-  697. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  698. bitwise_and_2_ints             $2..3 &= $4..5
-  699. bitwise_and_int                $2 &= $3
-  700. bitwise_and_int                $1 &= $2
-  701. copy_slot_masked               ok = Mask($1)
-  702. copy_slot_unmasked             $1 = unknown
-  703. swizzle_4                      $1..4 = ($1..4).xxxx
-  704. copy_4_slots_masked            x = Mask($1..4)
-  705. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  706. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  707. swizzle_4                      $6..9 = ($6..9).xxxx
-  708. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  709. bitwise_and_2_ints             $2..3 &= $4..5
-  710. bitwise_and_int                $2 &= $3
-  711. bitwise_and_int                $1 &= $2
-  712. copy_slot_masked               ok = Mask($1)
-  713. zero_4_slots_unmasked          $1..4 = 0
-  714. copy_4_slots_masked            x = Mask($1..4)
-  715. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  716. copy_slot_unmasked             $5 = x(3)
-  717. zero_4_slots_unmasked          $6..9 = 0
-  718. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  719. bitwise_and_2_ints             $2..3 &= $4..5
-  720. bitwise_and_int                $2 &= $3
-  721. bitwise_and_int                $1 &= $2
-  722. copy_slot_masked               ok = Mask($1)
-  723. zero_4_slots_unmasked          $1..4 = 0
-  724. copy_slot_unmasked             $5 = unknown
-  725. swizzle_4                      $5..8 = ($5..8).xxxx
-  726. div_4_ints                     $1..4 /= $5..8
-  727. copy_4_slots_masked            x = Mask($1..4)
-  728. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  729. copy_slot_unmasked             $5 = x(3)
-  730. zero_4_slots_unmasked          $6..9 = 0
-  731. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  732. bitwise_and_2_ints             $2..3 &= $4..5
-  733. bitwise_and_int                $2 &= $3
-  734. bitwise_and_int                $1 &= $2
-  735. copy_slot_masked               ok = Mask($1)
-  736. copy_slot_unmasked             $1 = unknown
-  737. swizzle_4                      $1..4 = ($1..4).xxxx
-  738. copy_4_slots_masked            x = Mask($1..4)
-  739. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  740. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  741. swizzle_4                      $6..9 = ($6..9).xxxx
-  742. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  743. bitwise_and_2_ints             $2..3 &= $4..5
-  744. bitwise_and_int                $2 &= $3
-  745. bitwise_and_int                $1 &= $2
-  746. copy_slot_masked               ok = Mask($1)
-  747. copy_slot_unmasked             $1 = unknown
-  748. swizzle_4                      $1..4 = ($1..4).xxxx
-  749. copy_4_slots_masked            x = Mask($1..4)
-  750. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  751. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  752. swizzle_4                      $6..9 = ($6..9).xxxx
-  753. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  754. bitwise_and_2_ints             $2..3 &= $4..5
-  755. bitwise_and_int                $2 &= $3
-  756. bitwise_and_int                $1 &= $2
-  757. copy_slot_masked               ok = Mask($1)
-  758. zero_4_slots_unmasked          $1..4 = 0
-  759. copy_4_slots_masked            x = Mask($1..4)
-  760. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  761. copy_slot_unmasked             $5 = x(3)
-  762. zero_4_slots_unmasked          $6..9 = 0
-  763. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  764. bitwise_and_2_ints             $2..3 &= $4..5
-  765. bitwise_and_int                $2 &= $3
-  766. bitwise_and_int                $1 &= $2
-  767. copy_slot_masked               ok = Mask($1)
-  768. copy_slot_unmasked             $1 = unknown
-  769. swizzle_4                      $1..4 = ($1..4).xxxx
-  770. copy_4_slots_masked            x = Mask($1..4)
-  771. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  772. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  773. swizzle_4                      $6..9 = ($6..9).xxxx
-  774. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  775. bitwise_and_2_ints             $2..3 &= $4..5
-  776. bitwise_and_int                $2 &= $3
-  777. bitwise_and_int                $1 &= $2
-  778. copy_slot_masked               ok = Mask($1)
-  779. copy_slot_unmasked             $1 = unknown
-  780. swizzle_4                      $1..4 = ($1..4).xxxx
-  781. copy_4_slots_masked            x = Mask($1..4)
-  782. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  783. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  784. swizzle_4                      $6..9 = ($6..9).xxxx
-  785. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  786. bitwise_and_2_ints             $2..3 &= $4..5
-  787. bitwise_and_int                $2 &= $3
-  788. bitwise_and_int                $1 &= $2
-  789. copy_slot_masked               ok = Mask($1)
-  790. copy_slot_unmasked             $1 = unknown
-  791. swizzle_4                      $1..4 = ($1..4).xxxx
-  792. copy_4_slots_masked            x = Mask($1..4)
-  793. copy_4_slots_unmasked          $1..4 = x
-  794. copy_constant                  $5 = 0x00000001 (1.401298e-45)
-  795. swizzle_4                      $5..8 = ($5..8).xxxx
-  796. add_4_ints                     $1..4 += $5..8
-  797. copy_4_slots_masked            x = Mask($1..4)
-  798. copy_4_slots_unmasked          $1..4 = x
-  799. copy_constant                  $5 = 0x00000001 (1.401298e-45)
-  800. swizzle_4                      $5..8 = ($5..8).xxxx
-  801. sub_4_ints                     $1..4 -= $5..8
-  802. copy_4_slots_masked            x = Mask($1..4)
-  803. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  804. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  805. swizzle_4                      $6..9 = ($6..9).xxxx
-  806. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  807. bitwise_and_2_ints             $2..3 &= $4..5
-  808. bitwise_and_int                $2 &= $3
-  809. bitwise_and_int                $1 &= $2
-  810. copy_slot_masked               ok = Mask($1)
-  811. copy_slot_unmasked             $1 = unknown
-  812. swizzle_4                      $1..4 = ($1..4).xxxx
+  503. bitwise_and_int                $1 &= $2
+  504. copy_slot_masked               ok = Mask($1)
+  505. copy_constant                  $1 = 0x00000006 (8.407791e-45)
+  506. copy_constant                  $2 = 0x00000006 (8.407791e-45)
+  507. copy_constant                  $3 = 0x00000007 (9.809089e-45)
+  508. copy_constant                  $4 = 0x00000008 (1.121039e-44)
+  509. copy_4_slots_masked            x = Mask($1..4)
+  510. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  511. copy_slot_unmasked             $5 = x(3)
+  512. copy_constant                  $6 = 0x00000006 (8.407791e-45)
+  513. copy_constant                  $7 = 0x00000006 (8.407791e-45)
+  514. copy_constant                  $8 = 0x00000007 (9.809089e-45)
+  515. copy_constant                  $9 = 0x00000008 (1.121039e-44)
+  516. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  517. bitwise_and_2_ints             $2..3 &= $4..5
+  518. bitwise_and_int                $2 &= $3
+  519. bitwise_and_int                $1 &= $2
+  520. copy_slot_masked               ok = Mask($1)
+  521. copy_constant                  $1 = 0xFFFFFFF9
+  522. copy_constant                  $2 = 0xFFFFFFF7
+  523. copy_constant                  $3 = 0xFFFFFFF7
+  524. copy_constant                  $4 = 0xFFFFFFF7
+  525. copy_4_slots_masked            x = Mask($1..4)
+  526. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  527. copy_slot_unmasked             $5 = x(3)
+  528. copy_constant                  $6 = 0xFFFFFFF9
+  529. copy_constant                  $7 = 0xFFFFFFF7
+  530. copy_constant                  $8 = 0xFFFFFFF7
+  531. copy_constant                  $9 = 0xFFFFFFF7
+  532. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  533. bitwise_and_2_ints             $2..3 &= $4..5
+  534. bitwise_and_int                $2 &= $3
+  535. bitwise_and_int                $1 &= $2
+  536. copy_slot_masked               ok = Mask($1)
+  537. copy_constant                  $1 = 0x00000009 (1.261169e-44)
+  538. copy_constant                  $2 = 0x00000009 (1.261169e-44)
+  539. copy_constant                  $3 = 0x0000000A (1.401298e-44)
+  540. copy_constant                  $4 = 0x0000000A (1.401298e-44)
+  541. copy_4_slots_masked            x = Mask($1..4)
+  542. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  543. copy_slot_unmasked             $5 = x(3)
+  544. copy_constant                  $6 = 0x00000009 (1.261169e-44)
+  545. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  546. copy_constant                  $8 = 0x0000000A (1.401298e-44)
+  547. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  548. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  549. bitwise_and_2_ints             $2..3 &= $4..5
+  550. bitwise_and_int                $2 &= $3
+  551. bitwise_and_int                $1 &= $2
+  552. copy_slot_masked               ok = Mask($1)
+  553. copy_constant                  $1 = 0x00000006 (8.407791e-45)
+  554. swizzle_3                      $1..3 = ($1..3).xxx
+  555. copy_3_slots_masked            x(0..2) = Mask($1..3)
+  556. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  557. copy_slot_unmasked             $5 = x(3)
+  558. copy_constant                  $6 = 0x00000006 (8.407791e-45)
+  559. copy_constant                  $7 = 0x00000006 (8.407791e-45)
+  560. copy_constant                  $8 = 0x00000006 (8.407791e-45)
+  561. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  562. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  563. bitwise_and_2_ints             $2..3 &= $4..5
+  564. bitwise_and_int                $2 &= $3
+  565. bitwise_and_int                $1 &= $2
+  566. copy_slot_masked               ok = Mask($1)
+  567. copy_constant                  $1 = 0x00000008 (1.121039e-44)
+  568. copy_slot_unmasked             $2 = $1
+  569. copy_2_slots_masked            x(0..1) = Mask($1..2)
+  570. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  571. copy_slot_unmasked             $5 = x(3)
+  572. copy_constant                  $6 = 0x00000008 (1.121039e-44)
+  573. copy_constant                  $7 = 0x00000008 (1.121039e-44)
+  574. copy_constant                  $8 = 0x00000006 (8.407791e-45)
+  575. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  576. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  577. bitwise_and_2_ints             $2..3 &= $4..5
+  578. bitwise_and_int                $2 &= $3
+  579. bitwise_and_int                $1 &= $2
+  580. copy_slot_masked               ok = Mask($1)
+  581. copy_constant                  $1 = 0x000000C8 (2.802597e-43)
+  582. copy_constant                  $2 = 0x00000064 (1.401298e-43)
+  583. copy_constant                  $3 = 0x00000032 (7.006492e-44)
+  584. copy_constant                  $4 = 0x00000019 (3.503246e-44)
+  585. copy_4_slots_masked            x = Mask($1..4)
+  586. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  587. copy_slot_unmasked             $5 = x(3)
+  588. copy_constant                  $6 = 0x000000C8 (2.802597e-43)
+  589. copy_constant                  $7 = 0x00000064 (1.401298e-43)
+  590. copy_constant                  $8 = 0x00000032 (7.006492e-44)
+  591. copy_constant                  $9 = 0x00000019 (3.503246e-44)
+  592. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  593. bitwise_and_2_ints             $2..3 &= $4..5
+  594. bitwise_and_int                $2 &= $3
+  595. bitwise_and_int                $1 &= $2
+  596. copy_slot_masked               ok = Mask($1)
+  597. copy_constant                  $1 = 0x00000006 (8.407791e-45)
+  598. swizzle_4                      $1..4 = ($1..4).xxxx
+  599. copy_4_slots_masked            x = Mask($1..4)
+  600. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  601. copy_slot_unmasked             $5 = x(3)
+  602. copy_constant                  $6 = 0x00000006 (8.407791e-45)
+  603. swizzle_4                      $6..9 = ($6..9).xxxx
+  604. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  605. bitwise_and_2_ints             $2..3 &= $4..5
+  606. bitwise_and_int                $2 &= $3
+  607. bitwise_and_int                $1 &= $2
+  608. copy_slot_masked               ok = Mask($1)
+  609. copy_constant                  $1 = unknownInput
+  610. cast_to_int_from_float         $1 = FloatToInt($1)
+  611. copy_slot_unmasked             unknown = $1
+  612. swizzle_4                      $1..4 = ($1..4).xxxx
+  613. copy_4_slots_masked            x = Mask($1..4)
+  614. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  615. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  616. swizzle_4                      $6..9 = ($6..9).xxxx
+  617. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  618. bitwise_and_2_ints             $2..3 &= $4..5
+  619. bitwise_and_int                $2 &= $3
+  620. bitwise_and_int                $1 &= $2
+  621. copy_slot_masked               ok = Mask($1)
+  622. zero_4_slots_unmasked          $1..4 = 0
+  623. copy_4_slots_masked            x = Mask($1..4)
+  624. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  625. copy_slot_unmasked             $5 = x(3)
+  626. zero_4_slots_unmasked          $6..9 = 0
+  627. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  628. bitwise_and_2_ints             $2..3 &= $4..5
+  629. bitwise_and_int                $2 &= $3
+  630. bitwise_and_int                $1 &= $2
+  631. copy_slot_masked               ok = Mask($1)
+  632. zero_4_slots_unmasked          $1..4 = 0
+  633. copy_slot_unmasked             $5 = unknown
+  634. swizzle_4                      $5..8 = ($5..8).xxxx
+  635. div_4_ints                     $1..4 /= $5..8
+  636. copy_4_slots_masked            x = Mask($1..4)
+  637. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  638. copy_slot_unmasked             $5 = x(3)
+  639. zero_4_slots_unmasked          $6..9 = 0
+  640. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  641. bitwise_and_2_ints             $2..3 &= $4..5
+  642. bitwise_and_int                $2 &= $3
+  643. bitwise_and_int                $1 &= $2
+  644. copy_slot_masked               ok = Mask($1)
+  645. copy_slot_unmasked             $1 = unknown
+  646. swizzle_4                      $1..4 = ($1..4).xxxx
+  647. copy_4_slots_masked            x = Mask($1..4)
+  648. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  649. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  650. swizzle_4                      $6..9 = ($6..9).xxxx
+  651. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  652. bitwise_and_2_ints             $2..3 &= $4..5
+  653. bitwise_and_int                $2 &= $3
+  654. bitwise_and_int                $1 &= $2
+  655. copy_slot_masked               ok = Mask($1)
+  656. copy_slot_unmasked             $1 = unknown
+  657. swizzle_4                      $1..4 = ($1..4).xxxx
+  658. copy_4_slots_masked            x = Mask($1..4)
+  659. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  660. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  661. swizzle_4                      $6..9 = ($6..9).xxxx
+  662. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  663. bitwise_and_2_ints             $2..3 &= $4..5
+  664. bitwise_and_int                $2 &= $3
+  665. bitwise_and_int                $1 &= $2
+  666. copy_slot_masked               ok = Mask($1)
+  667. copy_slot_unmasked             $1 = unknown
+  668. swizzle_4                      $1..4 = ($1..4).xxxx
+  669. copy_4_slots_masked            x = Mask($1..4)
+  670. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  671. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  672. swizzle_4                      $6..9 = ($6..9).xxxx
+  673. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  674. bitwise_and_2_ints             $2..3 &= $4..5
+  675. bitwise_and_int                $2 &= $3
+  676. bitwise_and_int                $1 &= $2
+  677. copy_slot_masked               ok = Mask($1)
+  678. copy_slot_unmasked             $1 = unknown
+  679. swizzle_4                      $1..4 = ($1..4).xxxx
+  680. copy_4_slots_masked            x = Mask($1..4)
+  681. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  682. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  683. swizzle_4                      $6..9 = ($6..9).xxxx
+  684. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  685. bitwise_and_2_ints             $2..3 &= $4..5
+  686. bitwise_and_int                $2 &= $3
+  687. bitwise_and_int                $1 &= $2
+  688. copy_slot_masked               ok = Mask($1)
+  689. copy_slot_unmasked             $1 = unknown
+  690. swizzle_4                      $1..4 = ($1..4).xxxx
+  691. copy_4_slots_masked            x = Mask($1..4)
+  692. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  693. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  694. swizzle_4                      $6..9 = ($6..9).xxxx
+  695. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  696. bitwise_and_2_ints             $2..3 &= $4..5
+  697. bitwise_and_int                $2 &= $3
+  698. bitwise_and_int                $1 &= $2
+  699. copy_slot_masked               ok = Mask($1)
+  700. copy_slot_unmasked             $1 = unknown
+  701. swizzle_4                      $1..4 = ($1..4).xxxx
+  702. copy_4_slots_masked            x = Mask($1..4)
+  703. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  704. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  705. swizzle_4                      $6..9 = ($6..9).xxxx
+  706. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  707. bitwise_and_2_ints             $2..3 &= $4..5
+  708. bitwise_and_int                $2 &= $3
+  709. bitwise_and_int                $1 &= $2
+  710. copy_slot_masked               ok = Mask($1)
+  711. zero_4_slots_unmasked          $1..4 = 0
+  712. copy_4_slots_masked            x = Mask($1..4)
+  713. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  714. copy_slot_unmasked             $5 = x(3)
+  715. zero_4_slots_unmasked          $6..9 = 0
+  716. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  717. bitwise_and_2_ints             $2..3 &= $4..5
+  718. bitwise_and_int                $2 &= $3
+  719. bitwise_and_int                $1 &= $2
+  720. copy_slot_masked               ok = Mask($1)
+  721. zero_4_slots_unmasked          $1..4 = 0
+  722. copy_slot_unmasked             $5 = unknown
+  723. swizzle_4                      $5..8 = ($5..8).xxxx
+  724. div_4_ints                     $1..4 /= $5..8
+  725. copy_4_slots_masked            x = Mask($1..4)
+  726. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  727. copy_slot_unmasked             $5 = x(3)
+  728. zero_4_slots_unmasked          $6..9 = 0
+  729. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  730. bitwise_and_2_ints             $2..3 &= $4..5
+  731. bitwise_and_int                $2 &= $3
+  732. bitwise_and_int                $1 &= $2
+  733. copy_slot_masked               ok = Mask($1)
+  734. copy_slot_unmasked             $1 = unknown
+  735. swizzle_4                      $1..4 = ($1..4).xxxx
+  736. copy_4_slots_masked            x = Mask($1..4)
+  737. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  738. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  739. swizzle_4                      $6..9 = ($6..9).xxxx
+  740. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  741. bitwise_and_2_ints             $2..3 &= $4..5
+  742. bitwise_and_int                $2 &= $3
+  743. bitwise_and_int                $1 &= $2
+  744. copy_slot_masked               ok = Mask($1)
+  745. copy_slot_unmasked             $1 = unknown
+  746. swizzle_4                      $1..4 = ($1..4).xxxx
+  747. copy_4_slots_masked            x = Mask($1..4)
+  748. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  749. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  750. swizzle_4                      $6..9 = ($6..9).xxxx
+  751. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  752. bitwise_and_2_ints             $2..3 &= $4..5
+  753. bitwise_and_int                $2 &= $3
+  754. bitwise_and_int                $1 &= $2
+  755. copy_slot_masked               ok = Mask($1)
+  756. zero_4_slots_unmasked          $1..4 = 0
+  757. copy_4_slots_masked            x = Mask($1..4)
+  758. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  759. copy_slot_unmasked             $5 = x(3)
+  760. zero_4_slots_unmasked          $6..9 = 0
+  761. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  762. bitwise_and_2_ints             $2..3 &= $4..5
+  763. bitwise_and_int                $2 &= $3
+  764. bitwise_and_int                $1 &= $2
+  765. copy_slot_masked               ok = Mask($1)
+  766. copy_slot_unmasked             $1 = unknown
+  767. swizzle_4                      $1..4 = ($1..4).xxxx
+  768. copy_4_slots_masked            x = Mask($1..4)
+  769. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  770. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  771. swizzle_4                      $6..9 = ($6..9).xxxx
+  772. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  773. bitwise_and_2_ints             $2..3 &= $4..5
+  774. bitwise_and_int                $2 &= $3
+  775. bitwise_and_int                $1 &= $2
+  776. copy_slot_masked               ok = Mask($1)
+  777. copy_slot_unmasked             $1 = unknown
+  778. swizzle_4                      $1..4 = ($1..4).xxxx
+  779. copy_4_slots_masked            x = Mask($1..4)
+  780. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  781. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  782. swizzle_4                      $6..9 = ($6..9).xxxx
+  783. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  784. bitwise_and_2_ints             $2..3 &= $4..5
+  785. bitwise_and_int                $2 &= $3
+  786. bitwise_and_int                $1 &= $2
+  787. copy_slot_masked               ok = Mask($1)
+  788. copy_slot_unmasked             $1 = unknown
+  789. swizzle_4                      $1..4 = ($1..4).xxxx
+  790. copy_4_slots_masked            x = Mask($1..4)
+  791. copy_constant                  $5 = 0x00000001 (1.401298e-45)
+  792. swizzle_4                      $5..8 = ($5..8).xxxx
+  793. add_4_ints                     $1..4 += $5..8
+  794. copy_4_slots_masked            x = Mask($1..4)
+  795. copy_constant                  $5 = 0x00000001 (1.401298e-45)
+  796. swizzle_4                      $5..8 = ($5..8).xxxx
+  797. sub_4_ints                     $1..4 -= $5..8
+  798. copy_4_slots_masked            x = Mask($1..4)
+  799. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  800. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  801. swizzle_4                      $6..9 = ($6..9).xxxx
+  802. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  803. bitwise_and_2_ints             $2..3 &= $4..5
+  804. bitwise_and_int                $2 &= $3
+  805. bitwise_and_int                $1 &= $2
+  806. copy_slot_masked               ok = Mask($1)
+  807. copy_slot_unmasked             $1 = unknown
+  808. swizzle_4                      $1..4 = ($1..4).xxxx
+  809. copy_4_slots_masked            x = Mask($1..4)
+  810. copy_constant                  $5 = 0x00000001 (1.401298e-45)
+  811. swizzle_4                      $5..8 = ($5..8).xxxx
+  812. add_4_ints                     $1..4 += $5..8
   813. copy_4_slots_masked            x = Mask($1..4)
   814. copy_constant                  $5 = 0x00000001 (1.401298e-45)
   815. swizzle_4                      $5..8 = ($5..8).xxxx
-  816. add_4_ints                     $1..4 += $5..8
+  816. sub_4_ints                     $1..4 -= $5..8
   817. copy_4_slots_masked            x = Mask($1..4)
-  818. copy_constant                  $5 = 0x00000001 (1.401298e-45)
-  819. swizzle_4                      $5..8 = ($5..8).xxxx
-  820. sub_4_ints                     $1..4 -= $5..8
-  821. copy_4_slots_masked            x = Mask($1..4)
-  822. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
-  823. copy_2_slots_unmasked          $5..6 = x(3), unknown
-  824. swizzle_4                      $6..9 = ($6..9).xxxx
-  825. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  826. bitwise_and_2_ints             $2..3 &= $4..5
-  827. bitwise_and_int                $2 &= $3
-  828. bitwise_and_int                $1 &= $2
-  829. copy_slot_masked               ok = Mask($1)
-  830. copy_slot_masked               [test_int].result = Mask($1)
-  831. label                          label 0x00000002
-  832. copy_slot_masked               $0 = Mask($1)
-  833. label                          label 0x00000001
-  834. load_condition_mask            CondMask = $12
-  835. swizzle_4                      $0..3 = ($0..3).xxxx
-  836. copy_4_constants               $4..7 = colorRed
-  837. copy_4_constants               $8..11 = colorGreen
-  838. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  839. copy_4_slots_unmasked          [main].result = $0..3
-  840. load_src                       src.rgba = [main].result
+  818. copy_4_slots_unmasked          $1..4 = ok, x(0..2)
+  819. copy_2_slots_unmasked          $5..6 = x(3), unknown
+  820. swizzle_4                      $6..9 = ($6..9).xxxx
+  821. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  822. bitwise_and_2_ints             $2..3 &= $4..5
+  823. bitwise_and_int                $2 &= $3
+  824. bitwise_and_int                $1 &= $2
+  825. copy_slot_masked               ok = Mask($1)
+  826. copy_slot_masked               [test_int].result = Mask($1)
+  827. label                          label 0x00000002
+  828. copy_slot_masked               $0 = Mask($1)
+  829. label                          label 0x00000001
+  830. load_condition_mask            CondMask = $12
+  831. swizzle_4                      $0..3 = ($0..3).xxxx
+  832. copy_4_constants               $4..7 = colorRed
+  833. copy_4_constants               $8..11 = colorGreen
+  834. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  835. copy_4_slots_unmasked          [main].result = $0..3
+  836. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/folding/VectorVectorFolding.skrp b/tests/sksl/folding/VectorVectorFolding.skrp
index 5f9d206..a32d888 100644
--- a/tests/sksl/folding/VectorVectorFolding.skrp
+++ b/tests/sksl/folding/VectorVectorFolding.skrp
@@ -17,145 +17,139 @@
    17. copy_slot_unmasked             $0 = _0_unknown
    18. swizzle_4                      $0..3 = ($0..3).xxxx
    19. copy_4_slots_unmasked          _2_val = $0..3
-   20. copy_4_slots_unmasked          $0..3 = _2_val
-   21. copy_constant                  $4 = 0x3F800000 (1.0)
-   22. swizzle_4                      $4..7 = ($4..7).xxxx
-   23. add_4_floats                   $0..3 += $4..7
-   24. copy_4_slots_unmasked          _2_val = $0..3
-   25. copy_4_slots_unmasked          $0..3 = _2_val
-   26. copy_constant                  $4 = 0x3F800000 (1.0)
-   27. swizzle_4                      $4..7 = ($4..7).xxxx
-   28. sub_4_floats                   $0..3 -= $4..7
-   29. copy_4_slots_unmasked          _2_val = $0..3
-   30. copy_constant                  $4 = 0x3F800000 (1.0)
-   31. swizzle_4                      $4..7 = ($4..7).xxxx
-   32. add_4_floats                   $0..3 += $4..7
-   33. copy_4_slots_unmasked          _2_val = $0..3
-   34. copy_constant                  $4 = 0x3F800000 (1.0)
-   35. swizzle_4                      $4..7 = ($4..7).xxxx
-   36. sub_4_floats                   $0..3 -= $4..7
-   37. copy_4_slots_unmasked          _2_val = $0..3
-   38. copy_4_slots_unmasked          $0..3 = _1_ok, _2_val(0..2)
-   39. copy_slot_unmasked             $4 = _2_val(3)
-   40. copy_slot_unmasked             $5 = _0_unknown
-   41. swizzle_4                      $5..8 = ($5..8).xxxx
-   42. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-   43. bitwise_and_2_ints             $1..2 &= $3..4
-   44. bitwise_and_int                $1 &= $2
-   45. bitwise_and_int                $0 &= $1
-   46. copy_slot_unmasked             _1_ok = $0
-   47. copy_4_slots_unmasked          $0..3 = _2_val
-   48. copy_constant                  $4 = 0x40000000 (2.0)
-   49. swizzle_4                      $4..7 = ($4..7).xxxx
-   50. mul_4_floats                   $0..3 *= $4..7
-   51. copy_4_slots_unmasked          _2_val = $0..3
-   52. copy_4_slots_unmasked          $0..3 = _2_val
-   53. copy_constant                  $4 = 0x3F000000 (0.5)
-   54. swizzle_4                      $4..7 = ($4..7).xxxx
-   55. mul_4_floats                   $0..3 *= $4..7
-   56. copy_4_slots_unmasked          _2_val = $0..3
-   57. copy_constant                  $4 = 0x40000000 (2.0)
-   58. swizzle_4                      $4..7 = ($4..7).xxxx
-   59. mul_4_floats                   $0..3 *= $4..7
-   60. copy_4_slots_unmasked          _2_val = $0..3
-   61. copy_constant                  $4 = 0x3F000000 (0.5)
-   62. swizzle_4                      $4..7 = ($4..7).xxxx
-   63. mul_4_floats                   $0..3 *= $4..7
-   64. copy_4_slots_unmasked          _2_val = $0..3
-   65. copy_4_slots_unmasked          $0..3 = _1_ok, _2_val(0..2)
-   66. copy_slot_unmasked             $4 = _2_val(3)
-   67. copy_slot_unmasked             $5 = _0_unknown
-   68. swizzle_4                      $5..8 = ($5..8).xxxx
-   69. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-   70. bitwise_and_2_ints             $1..2 &= $3..4
-   71. bitwise_and_int                $1 &= $2
-   72. bitwise_and_int                $0 &= $1
-   73. copy_slot_unmasked             _1_ok = $0
-   74. store_condition_mask           $12 = CondMask
-   75. copy_slot_unmasked             $13 = _1_ok
-   76. zero_slot_unmasked             $0 = 0
-   77. merge_condition_mask           CondMask = $12 & $13
-   78. branch_if_no_active_lanes      branch_if_no_active_lanes +76 (label 1 at #154)
-   79. copy_constant                  $1 = unknownInput
-   80. cast_to_int_from_float         $1 = FloatToInt($1)
-   81. copy_slot_unmasked             unknown = $1
-   82. copy_constant                  ok = 0xFFFFFFFF
-   83. copy_slot_unmasked             $1 = ok
-   84. zero_4_slots_unmasked          $2..5 = 0
-   85. copy_slot_unmasked             $6 = unknown
-   86. swizzle_4                      $6..9 = ($6..9).xxxx
-   87. div_4_ints                     $2..5 /= $6..9
-   88. zero_4_slots_unmasked          $6..9 = 0
-   89. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-   90. bitwise_and_2_ints             $2..3 &= $4..5
-   91. bitwise_and_int                $2 &= $3
-   92. bitwise_and_int                $1 &= $2
-   93. copy_slot_masked               ok = Mask($1)
-   94. copy_slot_unmasked             $1 = unknown
-   95. swizzle_4                      $1..4 = ($1..4).xxxx
-   96. copy_4_slots_unmasked          val = $1..4
-   97. copy_4_slots_unmasked          $1..4 = val
+   20. copy_constant                  $4 = 0x3F800000 (1.0)
+   21. swizzle_4                      $4..7 = ($4..7).xxxx
+   22. add_4_floats                   $0..3 += $4..7
+   23. copy_4_slots_unmasked          _2_val = $0..3
+   24. copy_constant                  $4 = 0x3F800000 (1.0)
+   25. swizzle_4                      $4..7 = ($4..7).xxxx
+   26. sub_4_floats                   $0..3 -= $4..7
+   27. copy_4_slots_unmasked          _2_val = $0..3
+   28. copy_constant                  $4 = 0x3F800000 (1.0)
+   29. swizzle_4                      $4..7 = ($4..7).xxxx
+   30. add_4_floats                   $0..3 += $4..7
+   31. copy_4_slots_unmasked          _2_val = $0..3
+   32. copy_constant                  $4 = 0x3F800000 (1.0)
+   33. swizzle_4                      $4..7 = ($4..7).xxxx
+   34. sub_4_floats                   $0..3 -= $4..7
+   35. copy_4_slots_unmasked          _2_val = $0..3
+   36. copy_4_slots_unmasked          $0..3 = _1_ok, _2_val(0..2)
+   37. copy_slot_unmasked             $4 = _2_val(3)
+   38. copy_slot_unmasked             $5 = _0_unknown
+   39. swizzle_4                      $5..8 = ($5..8).xxxx
+   40. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   41. bitwise_and_2_ints             $1..2 &= $3..4
+   42. bitwise_and_int                $1 &= $2
+   43. bitwise_and_int                $0 &= $1
+   44. copy_slot_unmasked             _1_ok = $0
+   45. copy_4_slots_unmasked          $0..3 = _2_val
+   46. copy_constant                  $4 = 0x40000000 (2.0)
+   47. swizzle_4                      $4..7 = ($4..7).xxxx
+   48. mul_4_floats                   $0..3 *= $4..7
+   49. copy_4_slots_unmasked          _2_val = $0..3
+   50. copy_constant                  $4 = 0x3F000000 (0.5)
+   51. swizzle_4                      $4..7 = ($4..7).xxxx
+   52. mul_4_floats                   $0..3 *= $4..7
+   53. copy_4_slots_unmasked          _2_val = $0..3
+   54. copy_constant                  $4 = 0x40000000 (2.0)
+   55. swizzle_4                      $4..7 = ($4..7).xxxx
+   56. mul_4_floats                   $0..3 *= $4..7
+   57. copy_4_slots_unmasked          _2_val = $0..3
+   58. copy_constant                  $4 = 0x3F000000 (0.5)
+   59. swizzle_4                      $4..7 = ($4..7).xxxx
+   60. mul_4_floats                   $0..3 *= $4..7
+   61. copy_4_slots_unmasked          _2_val = $0..3
+   62. copy_4_slots_unmasked          $0..3 = _1_ok, _2_val(0..2)
+   63. copy_slot_unmasked             $4 = _2_val(3)
+   64. copy_slot_unmasked             $5 = _0_unknown
+   65. swizzle_4                      $5..8 = ($5..8).xxxx
+   66. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   67. bitwise_and_2_ints             $1..2 &= $3..4
+   68. bitwise_and_int                $1 &= $2
+   69. bitwise_and_int                $0 &= $1
+   70. copy_slot_unmasked             _1_ok = $0
+   71. store_condition_mask           $12 = CondMask
+   72. copy_slot_unmasked             $13 = _1_ok
+   73. zero_slot_unmasked             $0 = 0
+   74. merge_condition_mask           CondMask = $12 & $13
+   75. branch_if_no_active_lanes      branch_if_no_active_lanes +73 (label 1 at #148)
+   76. copy_constant                  $1 = unknownInput
+   77. cast_to_int_from_float         $1 = FloatToInt($1)
+   78. copy_slot_unmasked             unknown = $1
+   79. copy_constant                  ok = 0xFFFFFFFF
+   80. copy_slot_unmasked             $1 = ok
+   81. zero_4_slots_unmasked          $2..5 = 0
+   82. copy_slot_unmasked             $6 = unknown
+   83. swizzle_4                      $6..9 = ($6..9).xxxx
+   84. div_4_ints                     $2..5 /= $6..9
+   85. zero_4_slots_unmasked          $6..9 = 0
+   86. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+   87. bitwise_and_2_ints             $2..3 &= $4..5
+   88. bitwise_and_int                $2 &= $3
+   89. bitwise_and_int                $1 &= $2
+   90. copy_slot_masked               ok = Mask($1)
+   91. copy_slot_unmasked             $1 = unknown
+   92. swizzle_4                      $1..4 = ($1..4).xxxx
+   93. copy_4_slots_unmasked          val = $1..4
+   94. copy_constant                  $5 = 0x00000001 (1.401298e-45)
+   95. swizzle_4                      $5..8 = ($5..8).xxxx
+   96. add_4_ints                     $1..4 += $5..8
+   97. copy_4_slots_masked            val = Mask($1..4)
    98. copy_constant                  $5 = 0x00000001 (1.401298e-45)
    99. swizzle_4                      $5..8 = ($5..8).xxxx
-  100. add_4_ints                     $1..4 += $5..8
+  100. sub_4_ints                     $1..4 -= $5..8
   101. copy_4_slots_masked            val = Mask($1..4)
-  102. copy_4_slots_unmasked          $1..4 = val
-  103. copy_constant                  $5 = 0x00000001 (1.401298e-45)
-  104. swizzle_4                      $5..8 = ($5..8).xxxx
-  105. sub_4_ints                     $1..4 -= $5..8
-  106. copy_4_slots_masked            val = Mask($1..4)
-  107. copy_constant                  $5 = 0x00000001 (1.401298e-45)
-  108. swizzle_4                      $5..8 = ($5..8).xxxx
-  109. add_4_ints                     $1..4 += $5..8
-  110. copy_4_slots_masked            val = Mask($1..4)
-  111. copy_constant                  $5 = 0x00000001 (1.401298e-45)
-  112. swizzle_4                      $5..8 = ($5..8).xxxx
-  113. sub_4_ints                     $1..4 -= $5..8
-  114. copy_4_slots_masked            val = Mask($1..4)
-  115. copy_4_slots_unmasked          $1..4 = ok, val(0..2)
-  116. copy_slot_unmasked             $5 = val(3)
-  117. copy_slot_unmasked             $6 = unknown
-  118. swizzle_4                      $6..9 = ($6..9).xxxx
-  119. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  120. bitwise_and_2_ints             $2..3 &= $4..5
-  121. bitwise_and_int                $2 &= $3
-  122. bitwise_and_int                $1 &= $2
-  123. copy_slot_masked               ok = Mask($1)
-  124. copy_4_slots_unmasked          $1..4 = val
-  125. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  126. swizzle_4                      $5..8 = ($5..8).xxxx
-  127. mul_4_ints                     $1..4 *= $5..8
-  128. copy_4_slots_masked            val = Mask($1..4)
-  129. copy_4_slots_unmasked          $1..4 = val
-  130. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  131. swizzle_4                      $5..8 = ($5..8).xxxx
-  132. div_4_ints                     $1..4 /= $5..8
-  133. copy_4_slots_masked            val = Mask($1..4)
-  134. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  135. swizzle_4                      $5..8 = ($5..8).xxxx
-  136. mul_4_ints                     $1..4 *= $5..8
-  137. copy_4_slots_masked            val = Mask($1..4)
-  138. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  139. swizzle_4                      $5..8 = ($5..8).xxxx
-  140. div_4_ints                     $1..4 /= $5..8
-  141. copy_4_slots_masked            val = Mask($1..4)
-  142. copy_4_slots_unmasked          $1..4 = ok, val(0..2)
-  143. copy_slot_unmasked             $5 = val(3)
-  144. copy_slot_unmasked             $6 = unknown
-  145. swizzle_4                      $6..9 = ($6..9).xxxx
-  146. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  147. bitwise_and_2_ints             $2..3 &= $4..5
-  148. bitwise_and_int                $2 &= $3
-  149. bitwise_and_int                $1 &= $2
-  150. copy_slot_masked               ok = Mask($1)
-  151. copy_slot_masked               [test_int].result = Mask($1)
-  152. label                          label 0x00000002
-  153. copy_slot_masked               $0 = Mask($1)
-  154. label                          label 0x00000001
-  155. load_condition_mask            CondMask = $12
-  156. swizzle_4                      $0..3 = ($0..3).xxxx
-  157. copy_4_constants               $4..7 = colorRed
-  158. copy_4_constants               $8..11 = colorGreen
-  159. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  160. copy_4_slots_unmasked          [main].result = $0..3
-  161. load_src                       src.rgba = [main].result
+  102. copy_constant                  $5 = 0x00000001 (1.401298e-45)
+  103. swizzle_4                      $5..8 = ($5..8).xxxx
+  104. add_4_ints                     $1..4 += $5..8
+  105. copy_4_slots_masked            val = Mask($1..4)
+  106. copy_constant                  $5 = 0x00000001 (1.401298e-45)
+  107. swizzle_4                      $5..8 = ($5..8).xxxx
+  108. sub_4_ints                     $1..4 -= $5..8
+  109. copy_4_slots_masked            val = Mask($1..4)
+  110. copy_4_slots_unmasked          $1..4 = ok, val(0..2)
+  111. copy_slot_unmasked             $5 = val(3)
+  112. copy_slot_unmasked             $6 = unknown
+  113. swizzle_4                      $6..9 = ($6..9).xxxx
+  114. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  115. bitwise_and_2_ints             $2..3 &= $4..5
+  116. bitwise_and_int                $2 &= $3
+  117. bitwise_and_int                $1 &= $2
+  118. copy_slot_masked               ok = Mask($1)
+  119. copy_4_slots_unmasked          $1..4 = val
+  120. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  121. swizzle_4                      $5..8 = ($5..8).xxxx
+  122. mul_4_ints                     $1..4 *= $5..8
+  123. copy_4_slots_masked            val = Mask($1..4)
+  124. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  125. swizzle_4                      $5..8 = ($5..8).xxxx
+  126. div_4_ints                     $1..4 /= $5..8
+  127. copy_4_slots_masked            val = Mask($1..4)
+  128. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  129. swizzle_4                      $5..8 = ($5..8).xxxx
+  130. mul_4_ints                     $1..4 *= $5..8
+  131. copy_4_slots_masked            val = Mask($1..4)
+  132. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  133. swizzle_4                      $5..8 = ($5..8).xxxx
+  134. div_4_ints                     $1..4 /= $5..8
+  135. copy_4_slots_masked            val = Mask($1..4)
+  136. copy_4_slots_unmasked          $1..4 = ok, val(0..2)
+  137. copy_slot_unmasked             $5 = val(3)
+  138. copy_slot_unmasked             $6 = unknown
+  139. swizzle_4                      $6..9 = ($6..9).xxxx
+  140. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  141. bitwise_and_2_ints             $2..3 &= $4..5
+  142. bitwise_and_int                $2 &= $3
+  143. bitwise_and_int                $1 &= $2
+  144. copy_slot_masked               ok = Mask($1)
+  145. copy_slot_masked               [test_int].result = Mask($1)
+  146. label                          label 0x00000002
+  147. copy_slot_masked               $0 = Mask($1)
+  148. label                          label 0x00000001
+  149. load_condition_mask            CondMask = $12
+  150. swizzle_4                      $0..3 = ($0..3).xxxx
+  151. copy_4_constants               $4..7 = colorRed
+  152. copy_4_constants               $8..11 = colorGreen
+  153. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  154. copy_4_slots_unmasked          [main].result = $0..3
+  155. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/Assignment.skrp b/tests/sksl/shared/Assignment.skrp
index feb42d8..88bd2d8 100644
--- a/tests/sksl/shared/Assignment.skrp
+++ b/tests/sksl/shared/Assignment.skrp
@@ -37,121 +37,115 @@
    37. zero_slot_unmasked             $0 = 0
    38. copy_slot_unmasked             x(3) = $0
    39. zero_2_slots_unmasked          $0..1 = 0
-   40. copy_slot_unmasked             x(1) = $0
-   41. copy_slot_unmasked             x(0) = $1
-   42. zero_slot_unmasked             ai[0] = 0
-   43. zero_slot_unmasked             $0 = 0
-   44. copy_slot_unmasked             ai[0] = $0
-   45. zero_4_slots_unmasked          ai4[0] = 0
-   46. copy_constant                  $0 = 0x00000001 (1.401298e-45)
-   47. copy_constant                  $1 = 0x00000002 (2.802597e-45)
-   48. copy_constant                  $2 = 0x00000003 (4.203895e-45)
-   49. copy_constant                  $3 = 0x00000004 (5.605194e-45)
-   50. copy_4_slots_unmasked          ai4[0] = $0..3
-   51. zero_4_slots_unmasked          ah3x3[0](0..3) = 0
-   52. zero_4_slots_unmasked          ah3x3[0](4..7) = 0
-   53. zero_slot_unmasked             ah3x3[0](8) = 0
-   54. copy_constant                  $0 = 0x3F800000 (1.0)
-   55. copy_constant                  $1 = 0x40000000 (2.0)
-   56. copy_constant                  $2 = 0x40400000 (3.0)
-   57. copy_constant                  $3 = 0x40800000 (4.0)
-   58. copy_constant                  $4 = 0x40A00000 (5.0)
-   59. copy_constant                  $5 = 0x40C00000 (6.0)
-   60. copy_constant                  $6 = 0x40E00000 (7.0)
-   61. copy_constant                  $7 = 0x41000000 (8.0)
-   62. copy_constant                  $8 = 0x41100000 (9.0)
-   63. copy_4_slots_unmasked          ah3x3[0](0..3) = $0..3
-   64. copy_4_slots_unmasked          ah3x3[0](4..7) = $4..7
-   65. copy_slot_unmasked             ah3x3[0](8) = $8
-   66. zero_4_slots_unmasked          af4[0] = 0
-   67. zero_slot_unmasked             $0 = 0
-   68. copy_slot_unmasked             af4[0](0) = $0
-   69. copy_constant                  $0 = 0x3F800000 (1.0)
-   70. swizzle_4                      $0..3 = ($0..3).xxxx
-   71. copy_slot_unmasked             af4[0](1) = $0
-   72. copy_slot_unmasked             af4[0](3) = $1
-   73. copy_slot_unmasked             af4[0](0) = $2
-   74. copy_slot_unmasked             af4[0](2) = $3
-   75. zero_4_slots_unmasked          s.f, s.af[0], s.af[1], s.af[2] = 0
-   76. zero_4_slots_unmasked          s.af[3], s.af[4], s.h4(0..1) = 0
-   77. zero_4_slots_unmasked          s.h4(2..3), s.ah4[0](0..1) = 0
-   78. zero_4_slots_unmasked          s.ah4[0](2..3), s.ah4[1](0..1) = 0
-   79. zero_4_slots_unmasked          s.ah4[1](2..3), s.ah4[2](0..1) = 0
-   80. zero_4_slots_unmasked          s.ah4[2](2..3), s.ah4[3](0..1) = 0
-   81. zero_4_slots_unmasked          s.ah4[3](2..3), s.ah4[4](0..1) = 0
-   82. zero_2_slots_unmasked          s.ah4[4](2..3) = 0
-   83. zero_slot_unmasked             $0 = 0
-   84. copy_slot_unmasked             s.f = $0
-   85. zero_slot_unmasked             $0 = 0
-   86. copy_slot_unmasked             s.af[1] = $0
-   87. copy_constant                  $0 = 0x41100000 (9.0)
-   88. swizzle_3                      $0..2 = ($0..2).xxx
-   89. copy_slot_unmasked             s.h4(2) = $0
-   90. copy_2_slots_unmasked          s.h4(0..1) = $1..2
-   91. copy_constant                  $0 = 0x40A00000 (5.0)
-   92. copy_slot_unmasked             $1 = $0
-   93. copy_slot_unmasked             s.ah4[2](1) = $0
-   94. copy_slot_unmasked             s.ah4[2](3) = $1
-   95. zero_4_slots_unmasked          $0..3 = 0
-   96. copy_4_slots_unmasked          globalVar = $0..3
-   97. zero_slot_unmasked             $0 = 0
-   98. copy_slot_unmasked             globalStruct.f = $0
-   99. zero_slot_unmasked             l = 0
-  100. zero_slot_unmasked             $0 = 0
-  101. copy_slot_unmasked             l = $0
-  102. copy_2_slots_unmasked          $0..1 = ai[0], ai4[0](0)
-  103. add_int                        $0 += $1
-  104. copy_slot_unmasked             ai[0] = $0
-  105. copy_constant                  $0 = 0x3F800000 (1.0)
-  106. copy_slot_unmasked             s.f = $0
-  107. copy_constant                  $0 = 0x40000000 (2.0)
-  108. copy_slot_unmasked             s.af[0] = $0
-  109. copy_constant                  $0 = 0x3F800000 (1.0)
-  110. swizzle_4                      $0..3 = ($0..3).xxxx
-  111. copy_4_slots_unmasked          s.h4 = $0..3
-  112. copy_constant                  $0 = 0x40000000 (2.0)
-  113. swizzle_4                      $0..3 = ($0..3).xxxx
-  114. copy_4_slots_unmasked          s.ah4[0] = $0..3
-  115. copy_slot_unmasked             f = af4[0](0)
-  116. copy_slot_unmasked             $0 = f
-  117. copy_slot_unmasked             af4[0](0) = $0
-  118. label                          label 0x00000000
-  119. copy_slot_unmasked             h = ah3x3[0](0)
-  120. copy_slot_unmasked             $0 = h
-  121. copy_slot_unmasked             ah3x3[0](0) = $0
-  122. label                          label 0x00000001
-  123. copy_slot_unmasked             i₁ = i
-  124. copy_slot_unmasked             $0 = i₁
-  125. copy_slot_unmasked             i = $0
-  126. label                          label 0x00000002
-  127. copy_slot_unmasked             i₁ = i4(1)
-  128. copy_slot_unmasked             $0 = i₁
-  129. copy_slot_unmasked             i4(1) = $0
-  130. label                          label 0x00000003
-  131. copy_slot_unmasked             i₁ = ai[0]
-  132. copy_slot_unmasked             $0 = i₁
-  133. copy_slot_unmasked             ai[0] = $0
-  134. label                          label 0x00000004
-  135. copy_slot_unmasked             i₁ = ai4[0](0)
-  136. copy_slot_unmasked             $0 = i₁
-  137. copy_slot_unmasked             ai4[0](0) = $0
-  138. label                          label 0x00000005
-  139. copy_slot_unmasked             h = x(1)
-  140. copy_slot_unmasked             $0 = h
-  141. copy_slot_unmasked             x(1) = $0
-  142. label                          label 0x00000006
-  143. copy_slot_unmasked             f = s.f
-  144. copy_slot_unmasked             $0 = f
-  145. copy_slot_unmasked             s.f = $0
-  146. label                          label 0x00000007
-  147. copy_slot_unmasked             h = l
-  148. copy_slot_unmasked             $0 = h
-  149. copy_slot_unmasked             l = $0
-  150. label                          label 0x00000008
-  151. copy_slot_unmasked             f = f3x3(0)
-  152. copy_slot_unmasked             $0 = f
-  153. copy_slot_unmasked             f3x3(0) = $0
-  154. label                          label 0x00000009
-  155. copy_4_constants               $0..3 = colorGreen
-  156. copy_4_slots_unmasked          [main].result = $0..3
-  157. load_src                       src.rgba = [main].result
+   40. swizzle_copy_2_slots_masked    (x(0..1)).yx = Mask($0..1)
+   41. zero_slot_unmasked             ai[0] = 0
+   42. zero_slot_unmasked             $0 = 0
+   43. copy_slot_unmasked             ai[0] = $0
+   44. zero_4_slots_unmasked          ai4[0] = 0
+   45. copy_constant                  $0 = 0x00000001 (1.401298e-45)
+   46. copy_constant                  $1 = 0x00000002 (2.802597e-45)
+   47. copy_constant                  $2 = 0x00000003 (4.203895e-45)
+   48. copy_constant                  $3 = 0x00000004 (5.605194e-45)
+   49. copy_4_slots_unmasked          ai4[0] = $0..3
+   50. zero_4_slots_unmasked          ah3x3[0](0..3) = 0
+   51. zero_4_slots_unmasked          ah3x3[0](4..7) = 0
+   52. zero_slot_unmasked             ah3x3[0](8) = 0
+   53. copy_constant                  $0 = 0x3F800000 (1.0)
+   54. copy_constant                  $1 = 0x40000000 (2.0)
+   55. copy_constant                  $2 = 0x40400000 (3.0)
+   56. copy_constant                  $3 = 0x40800000 (4.0)
+   57. copy_constant                  $4 = 0x40A00000 (5.0)
+   58. copy_constant                  $5 = 0x40C00000 (6.0)
+   59. copy_constant                  $6 = 0x40E00000 (7.0)
+   60. copy_constant                  $7 = 0x41000000 (8.0)
+   61. copy_constant                  $8 = 0x41100000 (9.0)
+   62. copy_4_slots_unmasked          ah3x3[0](0..3) = $0..3
+   63. copy_4_slots_unmasked          ah3x3[0](4..7) = $4..7
+   64. copy_slot_unmasked             ah3x3[0](8) = $8
+   65. zero_4_slots_unmasked          af4[0] = 0
+   66. zero_slot_unmasked             $0 = 0
+   67. copy_slot_unmasked             af4[0](0) = $0
+   68. copy_constant                  $0 = 0x3F800000 (1.0)
+   69. swizzle_4                      $0..3 = ($0..3).xxxx
+   70. swizzle_copy_4_slots_masked    (af4[0]).ywxz = Mask($0..3)
+   71. zero_4_slots_unmasked          s.f, s.af[0], s.af[1], s.af[2] = 0
+   72. zero_4_slots_unmasked          s.af[3], s.af[4], s.h4(0..1) = 0
+   73. zero_4_slots_unmasked          s.h4(2..3), s.ah4[0](0..1) = 0
+   74. zero_4_slots_unmasked          s.ah4[0](2..3), s.ah4[1](0..1) = 0
+   75. zero_4_slots_unmasked          s.ah4[1](2..3), s.ah4[2](0..1) = 0
+   76. zero_4_slots_unmasked          s.ah4[2](2..3), s.ah4[3](0..1) = 0
+   77. zero_4_slots_unmasked          s.ah4[3](2..3), s.ah4[4](0..1) = 0
+   78. zero_2_slots_unmasked          s.ah4[4](2..3) = 0
+   79. zero_slot_unmasked             $0 = 0
+   80. copy_slot_unmasked             s.f = $0
+   81. zero_slot_unmasked             $0 = 0
+   82. copy_slot_unmasked             s.af[1] = $0
+   83. copy_constant                  $0 = 0x41100000 (9.0)
+   84. swizzle_3                      $0..2 = ($0..2).xxx
+   85. swizzle_copy_3_slots_masked    (s.h4(0..2)).zxy = Mask($0..2)
+   86. copy_constant                  $0 = 0x40A00000 (5.0)
+   87. copy_slot_unmasked             $1 = $0
+   88. swizzle_copy_2_slots_masked    (s.ah4[2]).yw = Mask($0..1)
+   89. zero_4_slots_unmasked          $0..3 = 0
+   90. copy_4_slots_unmasked          globalVar = $0..3
+   91. zero_slot_unmasked             $0 = 0
+   92. copy_slot_unmasked             globalStruct.f = $0
+   93. zero_slot_unmasked             l = 0
+   94. zero_slot_unmasked             $0 = 0
+   95. copy_slot_unmasked             l = $0
+   96. copy_2_slots_unmasked          $0..1 = ai[0], ai4[0](0)
+   97. add_int                        $0 += $1
+   98. copy_slot_unmasked             ai[0] = $0
+   99. copy_constant                  $0 = 0x3F800000 (1.0)
+  100. copy_slot_unmasked             s.f = $0
+  101. copy_constant                  $0 = 0x40000000 (2.0)
+  102. copy_slot_unmasked             s.af[0] = $0
+  103. copy_constant                  $0 = 0x3F800000 (1.0)
+  104. swizzle_4                      $0..3 = ($0..3).xxxx
+  105. copy_4_slots_unmasked          s.h4 = $0..3
+  106. copy_constant                  $0 = 0x40000000 (2.0)
+  107. swizzle_4                      $0..3 = ($0..3).xxxx
+  108. copy_4_slots_unmasked          s.ah4[0] = $0..3
+  109. copy_slot_unmasked             f = af4[0](0)
+  110. copy_slot_unmasked             $0 = f
+  111. copy_slot_unmasked             af4[0](0) = $0
+  112. label                          label 0x00000000
+  113. copy_slot_unmasked             h = ah3x3[0](0)
+  114. copy_slot_unmasked             $0 = h
+  115. copy_slot_unmasked             ah3x3[0](0) = $0
+  116. label                          label 0x00000001
+  117. copy_slot_unmasked             i₁ = i
+  118. copy_slot_unmasked             $0 = i₁
+  119. copy_slot_unmasked             i = $0
+  120. label                          label 0x00000002
+  121. copy_slot_unmasked             i₁ = i4(1)
+  122. copy_slot_unmasked             $0 = i₁
+  123. copy_slot_unmasked             i4(1) = $0
+  124. label                          label 0x00000003
+  125. copy_slot_unmasked             i₁ = ai[0]
+  126. copy_slot_unmasked             $0 = i₁
+  127. copy_slot_unmasked             ai[0] = $0
+  128. label                          label 0x00000004
+  129. copy_slot_unmasked             i₁ = ai4[0](0)
+  130. copy_slot_unmasked             $0 = i₁
+  131. copy_slot_unmasked             ai4[0](0) = $0
+  132. label                          label 0x00000005
+  133. copy_slot_unmasked             h = x(1)
+  134. copy_slot_unmasked             $0 = h
+  135. copy_slot_unmasked             x(1) = $0
+  136. label                          label 0x00000006
+  137. copy_slot_unmasked             f = s.f
+  138. copy_slot_unmasked             $0 = f
+  139. copy_slot_unmasked             s.f = $0
+  140. label                          label 0x00000007
+  141. copy_slot_unmasked             h = l
+  142. copy_slot_unmasked             $0 = h
+  143. copy_slot_unmasked             l = $0
+  144. label                          label 0x00000008
+  145. copy_slot_unmasked             f = f3x3(0)
+  146. copy_slot_unmasked             $0 = f
+  147. copy_slot_unmasked             f3x3(0) = $0
+  148. label                          label 0x00000009
+  149. copy_4_constants               $0..3 = colorGreen
+  150. copy_4_slots_unmasked          [main].result = $0..3
+  151. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/CommaSideEffects.skrp b/tests/sksl/shared/CommaSideEffects.skrp
index 5def377..06416f1 100644
--- a/tests/sksl/shared/CommaSideEffects.skrp
+++ b/tests/sksl/shared/CommaSideEffects.skrp
@@ -14,48 +14,47 @@
    14. label                          label 0x00000000
    15. copy_4_constants               $0..3 = colorWhite
    16. copy_4_slots_unmasked          a = $0..3
-   17. copy_4_slots_unmasked          $0..3 = a
-   18. copy_4_slots_unmasked          $4..7 = a
-   19. mul_4_floats                   $0..3 *= $4..7
-   20. copy_4_slots_unmasked          a = $0..3
-   21. copy_4_slots_unmasked          $0..3 = b
-   22. copy_4_slots_unmasked          $4..7 = b
-   23. mul_4_floats                   $0..3 *= $4..7
-   24. copy_4_slots_unmasked          b = $0..3
-   25. copy_4_slots_unmasked          $0..3 = c
-   26. copy_4_slots_unmasked          $4..7 = c
-   27. mul_4_floats                   $0..3 *= $4..7
-   28. copy_4_slots_unmasked          c = $0..3
-   29. copy_4_slots_unmasked          $0..3 = d
-   30. copy_4_slots_unmasked          $4..7 = d
-   31. mul_4_floats                   $0..3 *= $4..7
-   32. copy_4_slots_unmasked          d = $0..3
-   33. copy_4_slots_unmasked          $0..3 = a
-   34. copy_4_constants               $4..7 = colorWhite
-   35. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
-   36. bitwise_and_2_ints             $0..1 &= $2..3
-   37. bitwise_and_int                $0 &= $1
-   38. copy_4_slots_unmasked          $1..4 = b
-   39. copy_4_constants               $5..8 = colorRed
-   40. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-   41. bitwise_and_2_ints             $1..2 &= $3..4
-   42. bitwise_and_int                $1 &= $2
-   43. bitwise_and_int                $0 &= $1
-   44. copy_4_slots_unmasked          $1..4 = c
-   45. copy_4_constants               $5..8 = colorGreen
-   46. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-   47. bitwise_and_2_ints             $1..2 &= $3..4
-   48. bitwise_and_int                $1 &= $2
-   49. bitwise_and_int                $0 &= $1
-   50. copy_4_slots_unmasked          $1..4 = d
-   51. copy_4_constants               $5..8 = colorBlack
-   52. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-   53. bitwise_and_2_ints             $1..2 &= $3..4
-   54. bitwise_and_int                $1 &= $2
-   55. bitwise_and_int                $0 &= $1
-   56. swizzle_4                      $0..3 = ($0..3).xxxx
-   57. copy_4_constants               $4..7 = colorRed
-   58. copy_4_constants               $8..11 = colorGreen
-   59. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-   60. copy_4_slots_unmasked          [main].result = $0..3
-   61. load_src                       src.rgba = [main].result
+   17. copy_4_slots_unmasked          $4..7 = a
+   18. mul_4_floats                   $0..3 *= $4..7
+   19. copy_4_slots_unmasked          a = $0..3
+   20. copy_4_slots_unmasked          $0..3 = b
+   21. copy_4_slots_unmasked          $4..7 = b
+   22. mul_4_floats                   $0..3 *= $4..7
+   23. copy_4_slots_unmasked          b = $0..3
+   24. copy_4_slots_unmasked          $0..3 = c
+   25. copy_4_slots_unmasked          $4..7 = c
+   26. mul_4_floats                   $0..3 *= $4..7
+   27. copy_4_slots_unmasked          c = $0..3
+   28. copy_4_slots_unmasked          $0..3 = d
+   29. copy_4_slots_unmasked          $4..7 = d
+   30. mul_4_floats                   $0..3 *= $4..7
+   31. copy_4_slots_unmasked          d = $0..3
+   32. copy_4_slots_unmasked          $0..3 = a
+   33. copy_4_constants               $4..7 = colorWhite
+   34. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
+   35. bitwise_and_2_ints             $0..1 &= $2..3
+   36. bitwise_and_int                $0 &= $1
+   37. copy_4_slots_unmasked          $1..4 = b
+   38. copy_4_constants               $5..8 = colorRed
+   39. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   40. bitwise_and_2_ints             $1..2 &= $3..4
+   41. bitwise_and_int                $1 &= $2
+   42. bitwise_and_int                $0 &= $1
+   43. copy_4_slots_unmasked          $1..4 = c
+   44. copy_4_constants               $5..8 = colorGreen
+   45. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   46. bitwise_and_2_ints             $1..2 &= $3..4
+   47. bitwise_and_int                $1 &= $2
+   48. bitwise_and_int                $0 &= $1
+   49. copy_4_slots_unmasked          $1..4 = d
+   50. copy_4_constants               $5..8 = colorBlack
+   51. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+   52. bitwise_and_2_ints             $1..2 &= $3..4
+   53. bitwise_and_int                $1 &= $2
+   54. bitwise_and_int                $0 &= $1
+   55. swizzle_4                      $0..3 = ($0..3).xxxx
+   56. copy_4_constants               $4..7 = colorRed
+   57. copy_4_constants               $8..11 = colorGreen
+   58. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+   59. copy_4_slots_unmasked          [main].result = $0..3
+   60. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/MatrixOpEqualsES2.skrp b/tests/sksl/shared/MatrixOpEqualsES2.skrp
index f09dc15..e059f56 100644
--- a/tests/sksl/shared/MatrixOpEqualsES2.skrp
+++ b/tests/sksl/shared/MatrixOpEqualsES2.skrp
@@ -28,542 +28,542 @@
    28. copy_4_slots_unmasked          _3_m(0..3) = $0..3
    29. copy_4_slots_unmasked          _3_m(4..7) = $4..7
    30. copy_slot_unmasked             _3_m(8) = $8
-   31. copy_4_slots_unmasked          $0..3 = _3_m(0..3)
-   32. copy_4_slots_unmasked          $4..7 = _3_m(4..7)
-   33. copy_slot_unmasked             $8 = _3_m(8)
-   34. copy_4_slots_unmasked          $9..12 = _1_splat_4(0..3)
-   35. copy_4_slots_unmasked          $13..16 = _1_splat_4(4..7)
-   36. copy_slot_unmasked             $17 = _1_splat_4(8)
-   37. add_n_floats                   $0..8 += $9..17
-   38. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-   39. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-   40. copy_slot_unmasked             _3_m(8) = $8
-   41. copy_slot_unmasked             $0 = _0_ok
-   42. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
-   43. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
-   44. copy_slot_unmasked             $9 = _3_m(8)
-   45. copy_constant                  $10 = 0x40C00000 (6.0)
-   46. copy_constant                  $11 = 0x40800000 (4.0)
-   47. copy_constant                  $12 = 0x40800000 (4.0)
-   48. copy_constant                  $13 = 0x40800000 (4.0)
-   49. copy_constant                  $14 = 0x40C00000 (6.0)
-   50. copy_constant                  $15 = 0x40800000 (4.0)
-   51. copy_constant                  $16 = 0x40800000 (4.0)
-   52. copy_constant                  $17 = 0x40800000 (4.0)
-   53. copy_constant                  $18 = 0x40C00000 (6.0)
-   54. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-   55. bitwise_and_4_ints             $2..5 &= $6..9
-   56. bitwise_and_2_ints             $2..3 &= $4..5
-   57. bitwise_and_int                $2 &= $3
-   58. bitwise_and_int                $1 &= $2
-   59. bitwise_and_int                $0 &= $1
-   60. copy_slot_unmasked             _0_ok = $0
-   61. zero_slot_unmasked             $0 = 0
-   62. copy_constant                  $1 = 0x40000000 (2.0)
-   63. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
-   64. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-   65. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-   66. copy_slot_unmasked             _3_m(8) = $8
-   67. copy_4_slots_unmasked          $0..3 = _3_m(0..3)
-   68. copy_4_slots_unmasked          $4..7 = _3_m(4..7)
-   69. copy_slot_unmasked             $8 = _3_m(8)
-   70. copy_4_slots_unmasked          $9..12 = _1_splat_4(0..3)
-   71. copy_4_slots_unmasked          $13..16 = _1_splat_4(4..7)
-   72. copy_slot_unmasked             $17 = _1_splat_4(8)
-   73. sub_n_floats                   $0..8 -= $9..17
-   74. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-   75. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-   76. copy_slot_unmasked             _3_m(8) = $8
-   77. copy_slot_unmasked             $0 = _0_ok
-   78. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
-   79. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
-   80. copy_slot_unmasked             $9 = _3_m(8)
-   81. copy_constant                  $10 = 0xC0000000 (-2.0)
-   82. copy_constant                  $11 = 0xC0800000 (-4.0)
-   83. copy_constant                  $12 = 0xC0800000 (-4.0)
-   84. copy_constant                  $13 = 0xC0800000 (-4.0)
-   85. copy_constant                  $14 = 0xC0000000 (-2.0)
-   86. copy_constant                  $15 = 0xC0800000 (-4.0)
-   87. copy_constant                  $16 = 0xC0800000 (-4.0)
-   88. copy_constant                  $17 = 0xC0800000 (-4.0)
-   89. copy_constant                  $18 = 0xC0000000 (-2.0)
-   90. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-   91. bitwise_and_4_ints             $2..5 &= $6..9
-   92. bitwise_and_2_ints             $2..3 &= $4..5
-   93. bitwise_and_int                $2 &= $3
-   94. bitwise_and_int                $1 &= $2
-   95. bitwise_and_int                $0 &= $1
-   96. copy_slot_unmasked             _0_ok = $0
-   97. zero_slot_unmasked             $0 = 0
-   98. copy_constant                  $1 = 0x40000000 (2.0)
-   99. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
-  100. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  101. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  102. copy_slot_unmasked             _3_m(8) = $8
-  103. copy_4_slots_unmasked          $0..3 = _3_m(0..3)
-  104. copy_4_slots_unmasked          $4..7 = _3_m(4..7)
-  105. copy_slot_unmasked             $8 = _3_m(8)
-  106. copy_4_slots_unmasked          $9..12 = _1_splat_4(0..3)
-  107. copy_4_slots_unmasked          $13..16 = _1_splat_4(4..7)
-  108. copy_slot_unmasked             $17 = _1_splat_4(8)
-  109. div_n_floats                   $0..8 /= $9..17
-  110. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  111. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  112. copy_slot_unmasked             _3_m(8) = $8
-  113. copy_slot_unmasked             $0 = _0_ok
-  114. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
-  115. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
-  116. copy_slot_unmasked             $9 = _3_m(8)
-  117. zero_slot_unmasked             $10 = 0
-  118. copy_constant                  $11 = 0x3F000000 (0.5)
-  119. shuffle                        $10..18 = ($10..18)[1 0 0 0 1 0 0 0 1]
-  120. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-  121. bitwise_and_4_ints             $2..5 &= $6..9
-  122. bitwise_and_2_ints             $2..3 &= $4..5
-  123. bitwise_and_int                $2 &= $3
-  124. bitwise_and_int                $1 &= $2
-  125. bitwise_and_int                $0 &= $1
-  126. copy_slot_unmasked             _0_ok = $0
-  127. copy_4_slots_unmasked          $0..3 = _1_splat_4(0..3)
-  128. copy_4_slots_unmasked          $4..7 = _1_splat_4(4..7)
-  129. copy_slot_unmasked             $8 = _1_splat_4(8)
-  130. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  131. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  132. copy_slot_unmasked             _3_m(8) = $8
-  133. copy_4_slots_unmasked          $0..3 = _3_m(0..3)
-  134. copy_4_slots_unmasked          $4..7 = _3_m(4..7)
-  135. copy_slot_unmasked             $8 = _3_m(8)
-  136. zero_slot_unmasked             $9 = 0
-  137. copy_constant                  $10 = 0x40000000 (2.0)
-  138. shuffle                        $9..17 = ($9..17)[1 0 0 0 1 0 0 0 1]
-  139. add_n_floats                   $0..8 += $9..17
-  140. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  141. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  142. copy_slot_unmasked             _3_m(8) = $8
-  143. copy_slot_unmasked             $0 = _0_ok
-  144. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
-  145. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
-  146. copy_slot_unmasked             $9 = _3_m(8)
-  147. copy_constant                  $10 = 0x40C00000 (6.0)
-  148. copy_constant                  $11 = 0x40800000 (4.0)
-  149. copy_constant                  $12 = 0x40800000 (4.0)
-  150. copy_constant                  $13 = 0x40800000 (4.0)
-  151. copy_constant                  $14 = 0x40C00000 (6.0)
-  152. copy_constant                  $15 = 0x40800000 (4.0)
-  153. copy_constant                  $16 = 0x40800000 (4.0)
-  154. copy_constant                  $17 = 0x40800000 (4.0)
-  155. copy_constant                  $18 = 0x40C00000 (6.0)
-  156. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-  157. bitwise_and_4_ints             $2..5 &= $6..9
-  158. bitwise_and_2_ints             $2..3 &= $4..5
-  159. bitwise_and_int                $2 &= $3
-  160. bitwise_and_int                $1 &= $2
-  161. bitwise_and_int                $0 &= $1
-  162. copy_slot_unmasked             _0_ok = $0
-  163. copy_4_slots_unmasked          $0..3 = _1_splat_4(0..3)
-  164. copy_4_slots_unmasked          $4..7 = _1_splat_4(4..7)
-  165. copy_slot_unmasked             $8 = _1_splat_4(8)
-  166. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  167. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  168. copy_slot_unmasked             _3_m(8) = $8
-  169. copy_4_slots_unmasked          $0..3 = _3_m(0..3)
-  170. copy_4_slots_unmasked          $4..7 = _3_m(4..7)
-  171. copy_slot_unmasked             $8 = _3_m(8)
-  172. zero_slot_unmasked             $9 = 0
-  173. copy_constant                  $10 = 0x40000000 (2.0)
-  174. shuffle                        $9..17 = ($9..17)[1 0 0 0 1 0 0 0 1]
-  175. sub_n_floats                   $0..8 -= $9..17
-  176. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  177. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  178. copy_slot_unmasked             _3_m(8) = $8
-  179. copy_slot_unmasked             $0 = _0_ok
-  180. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
-  181. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
-  182. copy_slot_unmasked             $9 = _3_m(8)
-  183. copy_constant                  $10 = 0x40000000 (2.0)
-  184. copy_constant                  $11 = 0x40800000 (4.0)
-  185. copy_constant                  $12 = 0x40800000 (4.0)
-  186. copy_constant                  $13 = 0x40800000 (4.0)
-  187. copy_constant                  $14 = 0x40000000 (2.0)
-  188. copy_constant                  $15 = 0x40800000 (4.0)
-  189. copy_constant                  $16 = 0x40800000 (4.0)
-  190. copy_constant                  $17 = 0x40800000 (4.0)
-  191. copy_constant                  $18 = 0x40000000 (2.0)
-  192. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-  193. bitwise_and_4_ints             $2..5 &= $6..9
-  194. bitwise_and_2_ints             $2..3 &= $4..5
-  195. bitwise_and_int                $2 &= $3
-  196. bitwise_and_int                $1 &= $2
-  197. bitwise_and_int                $0 &= $1
-  198. copy_slot_unmasked             _0_ok = $0
-  199. copy_4_slots_unmasked          $0..3 = _1_splat_4(0..3)
-  200. copy_4_slots_unmasked          $4..7 = _1_splat_4(4..7)
-  201. copy_slot_unmasked             $8 = _1_splat_4(8)
-  202. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  203. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  204. copy_slot_unmasked             _3_m(8) = $8
-  205. copy_4_slots_unmasked          $0..3 = _3_m(0..3)
-  206. copy_4_slots_unmasked          $4..7 = _3_m(4..7)
-  207. copy_slot_unmasked             $8 = _3_m(8)
-  208. copy_4_slots_unmasked          $9..12 = _2_splat_2(0..3)
-  209. copy_4_slots_unmasked          $13..16 = _2_splat_2(4..7)
-  210. copy_slot_unmasked             $17 = _2_splat_2(8)
-  211. div_n_floats                   $0..8 /= $9..17
-  212. copy_4_slots_unmasked          _3_m(0..3) = $0..3
-  213. copy_4_slots_unmasked          _3_m(4..7) = $4..7
-  214. copy_slot_unmasked             _3_m(8) = $8
-  215. copy_slot_unmasked             $0 = _0_ok
-  216. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
-  217. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
-  218. copy_slot_unmasked             $9 = _3_m(8)
-  219. copy_constant                  $10 = 0x40000000 (2.0)
-  220. copy_constant                  $11 = 0x40000000 (2.0)
-  221. copy_constant                  $12 = 0x40000000 (2.0)
-  222. copy_constant                  $13 = 0x40000000 (2.0)
-  223. copy_constant                  $14 = 0x40000000 (2.0)
-  224. copy_constant                  $15 = 0x40000000 (2.0)
-  225. copy_constant                  $16 = 0x40000000 (2.0)
-  226. copy_constant                  $17 = 0x40000000 (2.0)
-  227. copy_constant                  $18 = 0x40000000 (2.0)
-  228. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-  229. bitwise_and_4_ints             $2..5 &= $6..9
-  230. bitwise_and_2_ints             $2..3 &= $4..5
-  231. bitwise_and_int                $2 &= $3
-  232. bitwise_and_int                $1 &= $2
-  233. bitwise_and_int                $0 &= $1
-  234. copy_slot_unmasked             _0_ok = $0
-  235. copy_constant                  _4_m(0) = 0x3F800000 (1.0)
-  236. copy_constant                  _4_m(1) = 0x40000000 (2.0)
-  237. copy_constant                  _4_m(2) = 0x40400000 (3.0)
-  238. copy_constant                  _4_m(3) = 0x40800000 (4.0)
-  239. copy_constant                  _4_m(4) = 0x40A00000 (5.0)
-  240. copy_constant                  _4_m(5) = 0x40C00000 (6.0)
-  241. copy_constant                  _4_m(6) = 0x40E00000 (7.0)
-  242. copy_constant                  _4_m(7) = 0x41000000 (8.0)
-  243. copy_constant                  _4_m(8) = 0x41100000 (9.0)
-  244. copy_constant                  _4_m(9) = 0x41200000 (10.0)
-  245. copy_constant                  _4_m(10) = 0x41300000 (11.0)
-  246. copy_constant                  _4_m(11) = 0x41400000 (12.0)
-  247. copy_constant                  _4_m(12) = 0x41500000 (13.0)
-  248. copy_constant                  _4_m(13) = 0x41600000 (14.0)
-  249. copy_constant                  _4_m(14) = 0x41700000 (15.0)
-  250. copy_constant                  _4_m(15) = 0x41800000 (16.0)
-  251. copy_4_slots_unmasked          $0..3 = _4_m(0..3)
-  252. copy_4_slots_unmasked          $4..7 = _4_m(4..7)
-  253. copy_4_slots_unmasked          $8..11 = _4_m(8..11)
-  254. copy_4_slots_unmasked          $12..15 = _4_m(12..15)
-  255. copy_constant                  $16 = 0x41800000 (16.0)
-  256. copy_constant                  $17 = 0x41700000 (15.0)
-  257. copy_constant                  $18 = 0x41600000 (14.0)
-  258. copy_constant                  $19 = 0x41500000 (13.0)
-  259. copy_constant                  $20 = 0x41400000 (12.0)
-  260. copy_constant                  $21 = 0x41300000 (11.0)
-  261. copy_constant                  $22 = 0x41200000 (10.0)
-  262. copy_constant                  $23 = 0x41100000 (9.0)
-  263. copy_constant                  $24 = 0x41000000 (8.0)
-  264. copy_constant                  $25 = 0x40E00000 (7.0)
-  265. copy_constant                  $26 = 0x40C00000 (6.0)
-  266. copy_constant                  $27 = 0x40A00000 (5.0)
-  267. copy_constant                  $28 = 0x40800000 (4.0)
-  268. copy_constant                  $29 = 0x40400000 (3.0)
-  269. copy_constant                  $30 = 0x40000000 (2.0)
-  270. copy_constant                  $31 = 0x3F800000 (1.0)
-  271. add_n_floats                   $0..15 += $16..31
-  272. copy_4_slots_unmasked          _4_m(0..3) = $0..3
-  273. copy_4_slots_unmasked          _4_m(4..7) = $4..7
-  274. copy_4_slots_unmasked          _4_m(8..11) = $8..11
-  275. copy_4_slots_unmasked          _4_m(12..15) = $12..15
-  276. copy_slot_unmasked             $0 = _0_ok
-  277. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
-  278. copy_4_slots_unmasked          $5..8 = _4_m(4..7)
-  279. copy_4_slots_unmasked          $9..12 = _4_m(8..11)
-  280. copy_4_slots_unmasked          $13..16 = _4_m(12..15)
-  281. copy_constant                  $17 = 0x41880000 (17.0)
-  282. copy_constant                  $18 = 0x41880000 (17.0)
-  283. copy_constant                  $19 = 0x41880000 (17.0)
-  284. copy_constant                  $20 = 0x41880000 (17.0)
-  285. copy_constant                  $21 = 0x41880000 (17.0)
-  286. copy_constant                  $22 = 0x41880000 (17.0)
-  287. copy_constant                  $23 = 0x41880000 (17.0)
-  288. copy_constant                  $24 = 0x41880000 (17.0)
-  289. copy_constant                  $25 = 0x41880000 (17.0)
-  290. copy_constant                  $26 = 0x41880000 (17.0)
-  291. copy_constant                  $27 = 0x41880000 (17.0)
-  292. copy_constant                  $28 = 0x41880000 (17.0)
-  293. copy_constant                  $29 = 0x41880000 (17.0)
-  294. copy_constant                  $30 = 0x41880000 (17.0)
-  295. copy_constant                  $31 = 0x41880000 (17.0)
-  296. copy_constant                  $32 = 0x41880000 (17.0)
-  297. cmpeq_n_floats                 $1..16 = equal($1..16, $17..32)
-  298. bitwise_and_4_ints             $9..12 &= $13..16
-  299. bitwise_and_4_ints             $5..8 &= $9..12
-  300. bitwise_and_4_ints             $1..4 &= $5..8
-  301. bitwise_and_2_ints             $1..2 &= $3..4
-  302. bitwise_and_int                $1 &= $2
-  303. bitwise_and_int                $0 &= $1
-  304. copy_slot_unmasked             _0_ok = $0
-  305. copy_constant                  _5_m(0) = 0x41200000 (10.0)
-  306. copy_constant                  _5_m(1) = 0x41A00000 (20.0)
-  307. copy_constant                  _5_m(2) = 0x41F00000 (30.0)
-  308. copy_constant                  _5_m(3) = 0x42200000 (40.0)
-  309. copy_4_slots_unmasked          $0..3 = _5_m
-  310. copy_constant                  $4 = 0x3F800000 (1.0)
-  311. copy_constant                  $5 = 0x40000000 (2.0)
-  312. copy_constant                  $6 = 0x40400000 (3.0)
-  313. copy_constant                  $7 = 0x40800000 (4.0)
-  314. sub_4_floats                   $0..3 -= $4..7
-  315. copy_4_slots_unmasked          _5_m = $0..3
-  316. copy_slot_unmasked             $0 = _0_ok
-  317. copy_4_slots_unmasked          $1..4 = _5_m
-  318. copy_constant                  $5 = 0x41100000 (9.0)
-  319. copy_constant                  $6 = 0x41900000 (18.0)
-  320. copy_constant                  $7 = 0x41D80000 (27.0)
-  321. copy_constant                  $8 = 0x42100000 (36.0)
-  322. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  323. bitwise_and_2_ints             $1..2 &= $3..4
-  324. bitwise_and_int                $1 &= $2
-  325. bitwise_and_int                $0 &= $1
-  326. copy_slot_unmasked             _0_ok = $0
-  327. copy_constant                  _6_m(0) = 0x40000000 (2.0)
-  328. copy_constant                  _6_m(1) = 0x40800000 (4.0)
-  329. copy_constant                  _6_m(2) = 0x40C00000 (6.0)
-  330. copy_constant                  _6_m(3) = 0x41000000 (8.0)
-  331. copy_4_slots_unmasked          $0..3 = _6_m
-  332. copy_constant                  $4 = 0x40000000 (2.0)
-  333. copy_constant                  $5 = 0x40000000 (2.0)
-  334. copy_constant                  $6 = 0x40000000 (2.0)
-  335. copy_constant                  $7 = 0x40800000 (4.0)
-  336. div_4_floats                   $0..3 /= $4..7
-  337. copy_4_slots_unmasked          _6_m = $0..3
-  338. copy_slot_unmasked             $0 = _0_ok
-  339. copy_4_slots_unmasked          $1..4 = _6_m
-  340. copy_constant                  $5 = 0x3F800000 (1.0)
-  341. copy_constant                  $6 = 0x40000000 (2.0)
-  342. copy_constant                  $7 = 0x40400000 (3.0)
-  343. copy_constant                  $8 = 0x40000000 (2.0)
-  344. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  345. bitwise_and_2_ints             $1..2 &= $3..4
-  346. bitwise_and_int                $1 &= $2
-  347. bitwise_and_int                $0 &= $1
-  348. copy_slot_unmasked             _0_ok = $0
-  349. copy_constant                  _7_m(0) = 0x3F800000 (1.0)
-  350. copy_constant                  _7_m(1) = 0x40000000 (2.0)
-  351. copy_constant                  _7_m(2) = 0x40E00000 (7.0)
-  352. copy_constant                  _7_m(3) = 0x40800000 (4.0)
-  353. copy_4_slots_unmasked          $52..55 = _7_m
-  354. swizzle_3                      $53..55 = ($53..55).yxz
-  355. copy_constant                  $56 = 0x40400000 (3.0)
-  356. copy_constant                  $57 = 0x40A00000 (5.0)
-  357. copy_constant                  $58 = 0x40400000 (3.0)
-  358. copy_constant                  $59 = 0x40000000 (2.0)
-  359. copy_2_slots_unmasked          $0..1 = $52..53
-  360. copy_2_slots_unmasked          $2..3 = $56..57
-  361. dot_2_floats                   $0 = dot($0..1, $2..3)
-  362. copy_4_slots_unmasked          $1..4 = $54..57
-  363. dot_2_floats                   $1 = dot($1..2, $3..4)
-  364. copy_2_slots_unmasked          $2..3 = $52..53
-  365. copy_2_slots_unmasked          $4..5 = $58..59
-  366. dot_2_floats                   $2 = dot($2..3, $4..5)
-  367. copy_2_slots_unmasked          $3..4 = $54..55
-  368. copy_2_slots_unmasked          $5..6 = $58..59
-  369. dot_2_floats                   $3 = dot($3..4, $5..6)
-  370. copy_4_slots_unmasked          _7_m = $0..3
-  371. copy_slot_unmasked             $0 = _0_ok
-  372. copy_4_slots_unmasked          $1..4 = _7_m
-  373. copy_constant                  $5 = 0x42180000 (38.0)
-  374. copy_constant                  $6 = 0x41D00000 (26.0)
-  375. copy_constant                  $7 = 0x41880000 (17.0)
-  376. copy_constant                  $8 = 0x41600000 (14.0)
-  377. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  378. bitwise_and_2_ints             $1..2 &= $3..4
-  379. bitwise_and_int                $1 &= $2
-  380. bitwise_and_int                $0 &= $1
-  381. copy_slot_unmasked             _0_ok = $0
-  382. copy_constant                  _8_m(0) = 0x41200000 (10.0)
-  383. copy_constant                  _8_m(1) = 0x40800000 (4.0)
-  384. copy_constant                  _8_m(2) = 0x40000000 (2.0)
-  385. copy_constant                  _8_m(3) = 0x41A00000 (20.0)
-  386. copy_constant                  _8_m(4) = 0x40A00000 (5.0)
-  387. copy_constant                  _8_m(5) = 0x40400000 (3.0)
-  388. copy_constant                  _8_m(6) = 0x41200000 (10.0)
-  389. copy_constant                  _8_m(7) = 0x40C00000 (6.0)
-  390. copy_constant                  _8_m(8) = 0x40A00000 (5.0)
-  391. copy_4_slots_unmasked          $52..55 = _8_m(0..3)
-  392. copy_4_slots_unmasked          $56..59 = _8_m(4..7)
-  393. copy_slot_unmasked             $60 = _8_m(8)
-  394. shuffle                        $53..60 = ($53..60)[2 5 0 3 6 1 4 7]
-  395. copy_constant                  $61 = 0x40400000 (3.0)
-  396. copy_constant                  $62 = 0x40400000 (3.0)
-  397. copy_constant                  $63 = 0x40800000 (4.0)
-  398. copy_constant                  $64 = 0x40000000 (2.0)
-  399. copy_constant                  $65 = 0x40400000 (3.0)
-  400. copy_constant                  $66 = 0x40800000 (4.0)
-  401. copy_constant                  $67 = 0x40800000 (4.0)
-  402. copy_constant                  $68 = 0x41100000 (9.0)
-  403. copy_constant                  $69 = 0x40000000 (2.0)
-  404. copy_3_slots_unmasked          $0..2 = $52..54
-  405. copy_3_slots_unmasked          $3..5 = $61..63
-  406. dot_3_floats                   $0 = dot($0..2, $3..5)
-  407. copy_3_slots_unmasked          $1..3 = $55..57
-  408. copy_3_slots_unmasked          $4..6 = $61..63
-  409. dot_3_floats                   $1 = dot($1..3, $4..6)
-  410. copy_4_slots_unmasked          $2..5 = $58..61
-  411. copy_2_slots_unmasked          $6..7 = $62..63
-  412. dot_3_floats                   $2 = dot($2..4, $5..7)
-  413. copy_3_slots_unmasked          $3..5 = $52..54
-  414. copy_3_slots_unmasked          $6..8 = $64..66
-  415. dot_3_floats                   $3 = dot($3..5, $6..8)
-  416. copy_3_slots_unmasked          $4..6 = $55..57
-  417. copy_3_slots_unmasked          $7..9 = $64..66
-  418. dot_3_floats                   $4 = dot($4..6, $7..9)
-  419. copy_3_slots_unmasked          $5..7 = $58..60
-  420. copy_3_slots_unmasked          $8..10 = $64..66
-  421. dot_3_floats                   $5 = dot($5..7, $8..10)
-  422. copy_3_slots_unmasked          $6..8 = $52..54
-  423. copy_3_slots_unmasked          $9..11 = $67..69
-  424. dot_3_floats                   $6 = dot($6..8, $9..11)
-  425. copy_3_slots_unmasked          $7..9 = $55..57
-  426. copy_3_slots_unmasked          $10..12 = $67..69
-  427. dot_3_floats                   $7 = dot($7..9, $10..12)
-  428. copy_3_slots_unmasked          $8..10 = $58..60
-  429. copy_3_slots_unmasked          $11..13 = $67..69
-  430. dot_3_floats                   $8 = dot($8..10, $11..13)
-  431. copy_4_slots_unmasked          _8_m(0..3) = $0..3
-  432. copy_4_slots_unmasked          _8_m(4..7) = $4..7
-  433. copy_slot_unmasked             _8_m(8) = $8
-  434. copy_slot_unmasked             $0 = _0_ok
-  435. copy_4_slots_unmasked          $1..4 = _8_m(0..3)
-  436. copy_4_slots_unmasked          $5..8 = _8_m(4..7)
-  437. copy_slot_unmasked             $9 = _8_m(8)
-  438. copy_constant                  $10 = 0x43020000 (130.0)
-  439. copy_constant                  $11 = 0x424C0000 (51.0)
-  440. copy_constant                  $12 = 0x420C0000 (35.0)
-  441. copy_constant                  $13 = 0x42F00000 (120.0)
-  442. copy_constant                  $14 = 0x423C0000 (47.0)
-  443. copy_constant                  $15 = 0x42040000 (33.0)
-  444. copy_constant                  $16 = 0x43700000 (240.0)
-  445. copy_constant                  $17 = 0x42920000 (73.0)
-  446. copy_constant                  $18 = 0x42340000 (45.0)
-  447. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
-  448. bitwise_and_4_ints             $2..5 &= $6..9
-  449. bitwise_and_2_ints             $2..3 &= $4..5
-  450. bitwise_and_int                $2 &= $3
-  451. bitwise_and_int                $1 &= $2
-  452. bitwise_and_int                $0 &= $1
-  453. copy_slot_unmasked             _0_ok = $0
-  454. store_condition_mask           $52 = CondMask
-  455. copy_slot_unmasked             $53 = _0_ok
-  456. zero_slot_unmasked             $0 = 0
-  457. merge_condition_mask           CondMask = $52 & $53
-  458. branch_if_no_active_lanes      branch_if_no_active_lanes +456 (label 1 at #914)
-  459. copy_constant                  ok = 0xFFFFFFFF
-  460. copy_constant                  splat_4(0) = 0x40800000 (4.0)
-  461. copy_constant                  splat_4(1) = 0x40800000 (4.0)
-  462. copy_constant                  splat_4(2) = 0x40800000 (4.0)
-  463. copy_constant                  splat_4(3) = 0x40800000 (4.0)
-  464. copy_constant                  splat_4(4) = 0x40800000 (4.0)
-  465. copy_constant                  splat_4(5) = 0x40800000 (4.0)
-  466. copy_constant                  splat_4(6) = 0x40800000 (4.0)
-  467. copy_constant                  splat_4(7) = 0x40800000 (4.0)
-  468. copy_constant                  splat_4(8) = 0x40800000 (4.0)
-  469. copy_constant                  splat_2(0) = 0x40000000 (2.0)
-  470. copy_constant                  splat_2(1) = 0x40000000 (2.0)
-  471. copy_constant                  splat_2(2) = 0x40000000 (2.0)
-  472. copy_constant                  splat_2(3) = 0x40000000 (2.0)
-  473. copy_constant                  splat_2(4) = 0x40000000 (2.0)
-  474. copy_constant                  splat_2(5) = 0x40000000 (2.0)
-  475. copy_constant                  splat_2(6) = 0x40000000 (2.0)
-  476. copy_constant                  splat_2(7) = 0x40000000 (2.0)
-  477. copy_constant                  splat_2(8) = 0x40000000 (2.0)
-  478. zero_4_slots_unmasked          m(0..3) = 0
-  479. zero_4_slots_unmasked          m(4..7) = 0
-  480. zero_slot_unmasked             m(8) = 0
-  481. zero_slot_unmasked             $1 = 0
-  482. copy_constant                  $2 = 0x40000000 (2.0)
-  483. shuffle                        $1..9 = ($1..9)[1 0 0 0 1 0 0 0 1]
-  484. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  485. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  486. copy_slot_masked               m(8) = Mask($9)
-  487. copy_4_slots_unmasked          $1..4 = m(0..3)
-  488. copy_4_slots_unmasked          $5..8 = m(4..7)
-  489. copy_slot_unmasked             $9 = m(8)
-  490. copy_4_slots_unmasked          $10..13 = splat_4(0..3)
-  491. copy_4_slots_unmasked          $14..17 = splat_4(4..7)
-  492. copy_slot_unmasked             $18 = splat_4(8)
-  493. add_n_floats                   $1..9 += $10..18
-  494. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  495. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  496. copy_slot_masked               m(8) = Mask($9)
-  497. copy_slot_unmasked             $1 = ok
-  498. copy_4_slots_unmasked          $2..5 = m(0..3)
-  499. copy_4_slots_unmasked          $6..9 = m(4..7)
-  500. copy_slot_unmasked             $10 = m(8)
-  501. copy_constant                  $11 = 0x40C00000 (6.0)
+   31. copy_4_slots_unmasked          $9..12 = _1_splat_4(0..3)
+   32. copy_4_slots_unmasked          $13..16 = _1_splat_4(4..7)
+   33. copy_slot_unmasked             $17 = _1_splat_4(8)
+   34. add_n_floats                   $0..8 += $9..17
+   35. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+   36. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+   37. copy_slot_unmasked             _3_m(8) = $8
+   38. copy_slot_unmasked             $0 = _0_ok
+   39. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
+   40. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
+   41. copy_slot_unmasked             $9 = _3_m(8)
+   42. copy_constant                  $10 = 0x40C00000 (6.0)
+   43. copy_constant                  $11 = 0x40800000 (4.0)
+   44. copy_constant                  $12 = 0x40800000 (4.0)
+   45. copy_constant                  $13 = 0x40800000 (4.0)
+   46. copy_constant                  $14 = 0x40C00000 (6.0)
+   47. copy_constant                  $15 = 0x40800000 (4.0)
+   48. copy_constant                  $16 = 0x40800000 (4.0)
+   49. copy_constant                  $17 = 0x40800000 (4.0)
+   50. copy_constant                  $18 = 0x40C00000 (6.0)
+   51. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+   52. bitwise_and_4_ints             $2..5 &= $6..9
+   53. bitwise_and_2_ints             $2..3 &= $4..5
+   54. bitwise_and_int                $2 &= $3
+   55. bitwise_and_int                $1 &= $2
+   56. bitwise_and_int                $0 &= $1
+   57. copy_slot_unmasked             _0_ok = $0
+   58. zero_slot_unmasked             $0 = 0
+   59. copy_constant                  $1 = 0x40000000 (2.0)
+   60. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
+   61. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+   62. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+   63. copy_slot_unmasked             _3_m(8) = $8
+   64. copy_4_slots_unmasked          $9..12 = _1_splat_4(0..3)
+   65. copy_4_slots_unmasked          $13..16 = _1_splat_4(4..7)
+   66. copy_slot_unmasked             $17 = _1_splat_4(8)
+   67. sub_n_floats                   $0..8 -= $9..17
+   68. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+   69. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+   70. copy_slot_unmasked             _3_m(8) = $8
+   71. copy_slot_unmasked             $0 = _0_ok
+   72. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
+   73. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
+   74. copy_slot_unmasked             $9 = _3_m(8)
+   75. copy_constant                  $10 = 0xC0000000 (-2.0)
+   76. copy_constant                  $11 = 0xC0800000 (-4.0)
+   77. copy_constant                  $12 = 0xC0800000 (-4.0)
+   78. copy_constant                  $13 = 0xC0800000 (-4.0)
+   79. copy_constant                  $14 = 0xC0000000 (-2.0)
+   80. copy_constant                  $15 = 0xC0800000 (-4.0)
+   81. copy_constant                  $16 = 0xC0800000 (-4.0)
+   82. copy_constant                  $17 = 0xC0800000 (-4.0)
+   83. copy_constant                  $18 = 0xC0000000 (-2.0)
+   84. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+   85. bitwise_and_4_ints             $2..5 &= $6..9
+   86. bitwise_and_2_ints             $2..3 &= $4..5
+   87. bitwise_and_int                $2 &= $3
+   88. bitwise_and_int                $1 &= $2
+   89. bitwise_and_int                $0 &= $1
+   90. copy_slot_unmasked             _0_ok = $0
+   91. zero_slot_unmasked             $0 = 0
+   92. copy_constant                  $1 = 0x40000000 (2.0)
+   93. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
+   94. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+   95. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+   96. copy_slot_unmasked             _3_m(8) = $8
+   97. copy_4_slots_unmasked          $9..12 = _1_splat_4(0..3)
+   98. copy_4_slots_unmasked          $13..16 = _1_splat_4(4..7)
+   99. copy_slot_unmasked             $17 = _1_splat_4(8)
+  100. div_n_floats                   $0..8 /= $9..17
+  101. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  102. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  103. copy_slot_unmasked             _3_m(8) = $8
+  104. copy_slot_unmasked             $0 = _0_ok
+  105. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
+  106. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
+  107. copy_slot_unmasked             $9 = _3_m(8)
+  108. zero_slot_unmasked             $10 = 0
+  109. copy_constant                  $11 = 0x3F000000 (0.5)
+  110. shuffle                        $10..18 = ($10..18)[1 0 0 0 1 0 0 0 1]
+  111. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+  112. bitwise_and_4_ints             $2..5 &= $6..9
+  113. bitwise_and_2_ints             $2..3 &= $4..5
+  114. bitwise_and_int                $2 &= $3
+  115. bitwise_and_int                $1 &= $2
+  116. bitwise_and_int                $0 &= $1
+  117. copy_slot_unmasked             _0_ok = $0
+  118. copy_4_slots_unmasked          $0..3 = _1_splat_4(0..3)
+  119. copy_4_slots_unmasked          $4..7 = _1_splat_4(4..7)
+  120. copy_slot_unmasked             $8 = _1_splat_4(8)
+  121. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  122. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  123. copy_slot_unmasked             _3_m(8) = $8
+  124. zero_slot_unmasked             $9 = 0
+  125. copy_constant                  $10 = 0x40000000 (2.0)
+  126. shuffle                        $9..17 = ($9..17)[1 0 0 0 1 0 0 0 1]
+  127. add_n_floats                   $0..8 += $9..17
+  128. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  129. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  130. copy_slot_unmasked             _3_m(8) = $8
+  131. copy_slot_unmasked             $0 = _0_ok
+  132. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
+  133. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
+  134. copy_slot_unmasked             $9 = _3_m(8)
+  135. copy_constant                  $10 = 0x40C00000 (6.0)
+  136. copy_constant                  $11 = 0x40800000 (4.0)
+  137. copy_constant                  $12 = 0x40800000 (4.0)
+  138. copy_constant                  $13 = 0x40800000 (4.0)
+  139. copy_constant                  $14 = 0x40C00000 (6.0)
+  140. copy_constant                  $15 = 0x40800000 (4.0)
+  141. copy_constant                  $16 = 0x40800000 (4.0)
+  142. copy_constant                  $17 = 0x40800000 (4.0)
+  143. copy_constant                  $18 = 0x40C00000 (6.0)
+  144. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+  145. bitwise_and_4_ints             $2..5 &= $6..9
+  146. bitwise_and_2_ints             $2..3 &= $4..5
+  147. bitwise_and_int                $2 &= $3
+  148. bitwise_and_int                $1 &= $2
+  149. bitwise_and_int                $0 &= $1
+  150. copy_slot_unmasked             _0_ok = $0
+  151. copy_4_slots_unmasked          $0..3 = _1_splat_4(0..3)
+  152. copy_4_slots_unmasked          $4..7 = _1_splat_4(4..7)
+  153. copy_slot_unmasked             $8 = _1_splat_4(8)
+  154. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  155. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  156. copy_slot_unmasked             _3_m(8) = $8
+  157. zero_slot_unmasked             $9 = 0
+  158. copy_constant                  $10 = 0x40000000 (2.0)
+  159. shuffle                        $9..17 = ($9..17)[1 0 0 0 1 0 0 0 1]
+  160. sub_n_floats                   $0..8 -= $9..17
+  161. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  162. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  163. copy_slot_unmasked             _3_m(8) = $8
+  164. copy_slot_unmasked             $0 = _0_ok
+  165. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
+  166. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
+  167. copy_slot_unmasked             $9 = _3_m(8)
+  168. copy_constant                  $10 = 0x40000000 (2.0)
+  169. copy_constant                  $11 = 0x40800000 (4.0)
+  170. copy_constant                  $12 = 0x40800000 (4.0)
+  171. copy_constant                  $13 = 0x40800000 (4.0)
+  172. copy_constant                  $14 = 0x40000000 (2.0)
+  173. copy_constant                  $15 = 0x40800000 (4.0)
+  174. copy_constant                  $16 = 0x40800000 (4.0)
+  175. copy_constant                  $17 = 0x40800000 (4.0)
+  176. copy_constant                  $18 = 0x40000000 (2.0)
+  177. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+  178. bitwise_and_4_ints             $2..5 &= $6..9
+  179. bitwise_and_2_ints             $2..3 &= $4..5
+  180. bitwise_and_int                $2 &= $3
+  181. bitwise_and_int                $1 &= $2
+  182. bitwise_and_int                $0 &= $1
+  183. copy_slot_unmasked             _0_ok = $0
+  184. copy_4_slots_unmasked          $0..3 = _1_splat_4(0..3)
+  185. copy_4_slots_unmasked          $4..7 = _1_splat_4(4..7)
+  186. copy_slot_unmasked             $8 = _1_splat_4(8)
+  187. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  188. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  189. copy_slot_unmasked             _3_m(8) = $8
+  190. copy_4_slots_unmasked          $9..12 = _2_splat_2(0..3)
+  191. copy_4_slots_unmasked          $13..16 = _2_splat_2(4..7)
+  192. copy_slot_unmasked             $17 = _2_splat_2(8)
+  193. div_n_floats                   $0..8 /= $9..17
+  194. copy_4_slots_unmasked          _3_m(0..3) = $0..3
+  195. copy_4_slots_unmasked          _3_m(4..7) = $4..7
+  196. copy_slot_unmasked             _3_m(8) = $8
+  197. copy_slot_unmasked             $0 = _0_ok
+  198. copy_4_slots_unmasked          $1..4 = _3_m(0..3)
+  199. copy_4_slots_unmasked          $5..8 = _3_m(4..7)
+  200. copy_slot_unmasked             $9 = _3_m(8)
+  201. copy_constant                  $10 = 0x40000000 (2.0)
+  202. copy_constant                  $11 = 0x40000000 (2.0)
+  203. copy_constant                  $12 = 0x40000000 (2.0)
+  204. copy_constant                  $13 = 0x40000000 (2.0)
+  205. copy_constant                  $14 = 0x40000000 (2.0)
+  206. copy_constant                  $15 = 0x40000000 (2.0)
+  207. copy_constant                  $16 = 0x40000000 (2.0)
+  208. copy_constant                  $17 = 0x40000000 (2.0)
+  209. copy_constant                  $18 = 0x40000000 (2.0)
+  210. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+  211. bitwise_and_4_ints             $2..5 &= $6..9
+  212. bitwise_and_2_ints             $2..3 &= $4..5
+  213. bitwise_and_int                $2 &= $3
+  214. bitwise_and_int                $1 &= $2
+  215. bitwise_and_int                $0 &= $1
+  216. copy_slot_unmasked             _0_ok = $0
+  217. copy_constant                  _4_m(0) = 0x3F800000 (1.0)
+  218. copy_constant                  _4_m(1) = 0x40000000 (2.0)
+  219. copy_constant                  _4_m(2) = 0x40400000 (3.0)
+  220. copy_constant                  _4_m(3) = 0x40800000 (4.0)
+  221. copy_constant                  _4_m(4) = 0x40A00000 (5.0)
+  222. copy_constant                  _4_m(5) = 0x40C00000 (6.0)
+  223. copy_constant                  _4_m(6) = 0x40E00000 (7.0)
+  224. copy_constant                  _4_m(7) = 0x41000000 (8.0)
+  225. copy_constant                  _4_m(8) = 0x41100000 (9.0)
+  226. copy_constant                  _4_m(9) = 0x41200000 (10.0)
+  227. copy_constant                  _4_m(10) = 0x41300000 (11.0)
+  228. copy_constant                  _4_m(11) = 0x41400000 (12.0)
+  229. copy_constant                  _4_m(12) = 0x41500000 (13.0)
+  230. copy_constant                  _4_m(13) = 0x41600000 (14.0)
+  231. copy_constant                  _4_m(14) = 0x41700000 (15.0)
+  232. copy_constant                  _4_m(15) = 0x41800000 (16.0)
+  233. copy_4_slots_unmasked          $0..3 = _4_m(0..3)
+  234. copy_4_slots_unmasked          $4..7 = _4_m(4..7)
+  235. copy_4_slots_unmasked          $8..11 = _4_m(8..11)
+  236. copy_4_slots_unmasked          $12..15 = _4_m(12..15)
+  237. copy_constant                  $16 = 0x41800000 (16.0)
+  238. copy_constant                  $17 = 0x41700000 (15.0)
+  239. copy_constant                  $18 = 0x41600000 (14.0)
+  240. copy_constant                  $19 = 0x41500000 (13.0)
+  241. copy_constant                  $20 = 0x41400000 (12.0)
+  242. copy_constant                  $21 = 0x41300000 (11.0)
+  243. copy_constant                  $22 = 0x41200000 (10.0)
+  244. copy_constant                  $23 = 0x41100000 (9.0)
+  245. copy_constant                  $24 = 0x41000000 (8.0)
+  246. copy_constant                  $25 = 0x40E00000 (7.0)
+  247. copy_constant                  $26 = 0x40C00000 (6.0)
+  248. copy_constant                  $27 = 0x40A00000 (5.0)
+  249. copy_constant                  $28 = 0x40800000 (4.0)
+  250. copy_constant                  $29 = 0x40400000 (3.0)
+  251. copy_constant                  $30 = 0x40000000 (2.0)
+  252. copy_constant                  $31 = 0x3F800000 (1.0)
+  253. add_n_floats                   $0..15 += $16..31
+  254. copy_4_slots_unmasked          _4_m(0..3) = $0..3
+  255. copy_4_slots_unmasked          _4_m(4..7) = $4..7
+  256. copy_4_slots_unmasked          _4_m(8..11) = $8..11
+  257. copy_4_slots_unmasked          _4_m(12..15) = $12..15
+  258. copy_slot_unmasked             $0 = _0_ok
+  259. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
+  260. copy_4_slots_unmasked          $5..8 = _4_m(4..7)
+  261. copy_4_slots_unmasked          $9..12 = _4_m(8..11)
+  262. copy_4_slots_unmasked          $13..16 = _4_m(12..15)
+  263. copy_constant                  $17 = 0x41880000 (17.0)
+  264. copy_constant                  $18 = 0x41880000 (17.0)
+  265. copy_constant                  $19 = 0x41880000 (17.0)
+  266. copy_constant                  $20 = 0x41880000 (17.0)
+  267. copy_constant                  $21 = 0x41880000 (17.0)
+  268. copy_constant                  $22 = 0x41880000 (17.0)
+  269. copy_constant                  $23 = 0x41880000 (17.0)
+  270. copy_constant                  $24 = 0x41880000 (17.0)
+  271. copy_constant                  $25 = 0x41880000 (17.0)
+  272. copy_constant                  $26 = 0x41880000 (17.0)
+  273. copy_constant                  $27 = 0x41880000 (17.0)
+  274. copy_constant                  $28 = 0x41880000 (17.0)
+  275. copy_constant                  $29 = 0x41880000 (17.0)
+  276. copy_constant                  $30 = 0x41880000 (17.0)
+  277. copy_constant                  $31 = 0x41880000 (17.0)
+  278. copy_constant                  $32 = 0x41880000 (17.0)
+  279. cmpeq_n_floats                 $1..16 = equal($1..16, $17..32)
+  280. bitwise_and_4_ints             $9..12 &= $13..16
+  281. bitwise_and_4_ints             $5..8 &= $9..12
+  282. bitwise_and_4_ints             $1..4 &= $5..8
+  283. bitwise_and_2_ints             $1..2 &= $3..4
+  284. bitwise_and_int                $1 &= $2
+  285. bitwise_and_int                $0 &= $1
+  286. copy_slot_unmasked             _0_ok = $0
+  287. copy_constant                  _5_m(0) = 0x41200000 (10.0)
+  288. copy_constant                  _5_m(1) = 0x41A00000 (20.0)
+  289. copy_constant                  _5_m(2) = 0x41F00000 (30.0)
+  290. copy_constant                  _5_m(3) = 0x42200000 (40.0)
+  291. copy_4_slots_unmasked          $0..3 = _5_m
+  292. copy_constant                  $4 = 0x3F800000 (1.0)
+  293. copy_constant                  $5 = 0x40000000 (2.0)
+  294. copy_constant                  $6 = 0x40400000 (3.0)
+  295. copy_constant                  $7 = 0x40800000 (4.0)
+  296. sub_4_floats                   $0..3 -= $4..7
+  297. copy_4_slots_unmasked          _5_m = $0..3
+  298. copy_slot_unmasked             $0 = _0_ok
+  299. copy_4_slots_unmasked          $1..4 = _5_m
+  300. copy_constant                  $5 = 0x41100000 (9.0)
+  301. copy_constant                  $6 = 0x41900000 (18.0)
+  302. copy_constant                  $7 = 0x41D80000 (27.0)
+  303. copy_constant                  $8 = 0x42100000 (36.0)
+  304. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  305. bitwise_and_2_ints             $1..2 &= $3..4
+  306. bitwise_and_int                $1 &= $2
+  307. bitwise_and_int                $0 &= $1
+  308. copy_slot_unmasked             _0_ok = $0
+  309. copy_constant                  _6_m(0) = 0x40000000 (2.0)
+  310. copy_constant                  _6_m(1) = 0x40800000 (4.0)
+  311. copy_constant                  _6_m(2) = 0x40C00000 (6.0)
+  312. copy_constant                  _6_m(3) = 0x41000000 (8.0)
+  313. copy_4_slots_unmasked          $0..3 = _6_m
+  314. copy_constant                  $4 = 0x40000000 (2.0)
+  315. copy_constant                  $5 = 0x40000000 (2.0)
+  316. copy_constant                  $6 = 0x40000000 (2.0)
+  317. copy_constant                  $7 = 0x40800000 (4.0)
+  318. div_4_floats                   $0..3 /= $4..7
+  319. copy_4_slots_unmasked          _6_m = $0..3
+  320. copy_slot_unmasked             $0 = _0_ok
+  321. copy_4_slots_unmasked          $1..4 = _6_m
+  322. copy_constant                  $5 = 0x3F800000 (1.0)
+  323. copy_constant                  $6 = 0x40000000 (2.0)
+  324. copy_constant                  $7 = 0x40400000 (3.0)
+  325. copy_constant                  $8 = 0x40000000 (2.0)
+  326. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  327. bitwise_and_2_ints             $1..2 &= $3..4
+  328. bitwise_and_int                $1 &= $2
+  329. bitwise_and_int                $0 &= $1
+  330. copy_slot_unmasked             _0_ok = $0
+  331. copy_constant                  _7_m(0) = 0x3F800000 (1.0)
+  332. copy_constant                  _7_m(1) = 0x40000000 (2.0)
+  333. copy_constant                  _7_m(2) = 0x40E00000 (7.0)
+  334. copy_constant                  _7_m(3) = 0x40800000 (4.0)
+  335. copy_4_slots_unmasked          $52..55 = _7_m
+  336. swizzle_3                      $53..55 = ($53..55).yxz
+  337. copy_constant                  $56 = 0x40400000 (3.0)
+  338. copy_constant                  $57 = 0x40A00000 (5.0)
+  339. copy_constant                  $58 = 0x40400000 (3.0)
+  340. copy_constant                  $59 = 0x40000000 (2.0)
+  341. copy_2_slots_unmasked          $0..1 = $52..53
+  342. copy_2_slots_unmasked          $2..3 = $56..57
+  343. dot_2_floats                   $0 = dot($0..1, $2..3)
+  344. copy_4_slots_unmasked          $1..4 = $54..57
+  345. dot_2_floats                   $1 = dot($1..2, $3..4)
+  346. copy_2_slots_unmasked          $2..3 = $52..53
+  347. copy_2_slots_unmasked          $4..5 = $58..59
+  348. dot_2_floats                   $2 = dot($2..3, $4..5)
+  349. copy_2_slots_unmasked          $3..4 = $54..55
+  350. copy_2_slots_unmasked          $5..6 = $58..59
+  351. dot_2_floats                   $3 = dot($3..4, $5..6)
+  352. copy_4_slots_unmasked          _7_m = $0..3
+  353. copy_slot_unmasked             $0 = _0_ok
+  354. copy_4_slots_unmasked          $1..4 = _7_m
+  355. copy_constant                  $5 = 0x42180000 (38.0)
+  356. copy_constant                  $6 = 0x41D00000 (26.0)
+  357. copy_constant                  $7 = 0x41880000 (17.0)
+  358. copy_constant                  $8 = 0x41600000 (14.0)
+  359. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  360. bitwise_and_2_ints             $1..2 &= $3..4
+  361. bitwise_and_int                $1 &= $2
+  362. bitwise_and_int                $0 &= $1
+  363. copy_slot_unmasked             _0_ok = $0
+  364. copy_constant                  _8_m(0) = 0x41200000 (10.0)
+  365. copy_constant                  _8_m(1) = 0x40800000 (4.0)
+  366. copy_constant                  _8_m(2) = 0x40000000 (2.0)
+  367. copy_constant                  _8_m(3) = 0x41A00000 (20.0)
+  368. copy_constant                  _8_m(4) = 0x40A00000 (5.0)
+  369. copy_constant                  _8_m(5) = 0x40400000 (3.0)
+  370. copy_constant                  _8_m(6) = 0x41200000 (10.0)
+  371. copy_constant                  _8_m(7) = 0x40C00000 (6.0)
+  372. copy_constant                  _8_m(8) = 0x40A00000 (5.0)
+  373. copy_4_slots_unmasked          $52..55 = _8_m(0..3)
+  374. copy_4_slots_unmasked          $56..59 = _8_m(4..7)
+  375. copy_slot_unmasked             $60 = _8_m(8)
+  376. shuffle                        $53..60 = ($53..60)[2 5 0 3 6 1 4 7]
+  377. copy_constant                  $61 = 0x40400000 (3.0)
+  378. copy_constant                  $62 = 0x40400000 (3.0)
+  379. copy_constant                  $63 = 0x40800000 (4.0)
+  380. copy_constant                  $64 = 0x40000000 (2.0)
+  381. copy_constant                  $65 = 0x40400000 (3.0)
+  382. copy_constant                  $66 = 0x40800000 (4.0)
+  383. copy_constant                  $67 = 0x40800000 (4.0)
+  384. copy_constant                  $68 = 0x41100000 (9.0)
+  385. copy_constant                  $69 = 0x40000000 (2.0)
+  386. copy_3_slots_unmasked          $0..2 = $52..54
+  387. copy_3_slots_unmasked          $3..5 = $61..63
+  388. dot_3_floats                   $0 = dot($0..2, $3..5)
+  389. copy_3_slots_unmasked          $1..3 = $55..57
+  390. copy_3_slots_unmasked          $4..6 = $61..63
+  391. dot_3_floats                   $1 = dot($1..3, $4..6)
+  392. copy_4_slots_unmasked          $2..5 = $58..61
+  393. copy_2_slots_unmasked          $6..7 = $62..63
+  394. dot_3_floats                   $2 = dot($2..4, $5..7)
+  395. copy_3_slots_unmasked          $3..5 = $52..54
+  396. copy_3_slots_unmasked          $6..8 = $64..66
+  397. dot_3_floats                   $3 = dot($3..5, $6..8)
+  398. copy_3_slots_unmasked          $4..6 = $55..57
+  399. copy_3_slots_unmasked          $7..9 = $64..66
+  400. dot_3_floats                   $4 = dot($4..6, $7..9)
+  401. copy_3_slots_unmasked          $5..7 = $58..60
+  402. copy_3_slots_unmasked          $8..10 = $64..66
+  403. dot_3_floats                   $5 = dot($5..7, $8..10)
+  404. copy_3_slots_unmasked          $6..8 = $52..54
+  405. copy_3_slots_unmasked          $9..11 = $67..69
+  406. dot_3_floats                   $6 = dot($6..8, $9..11)
+  407. copy_3_slots_unmasked          $7..9 = $55..57
+  408. copy_3_slots_unmasked          $10..12 = $67..69
+  409. dot_3_floats                   $7 = dot($7..9, $10..12)
+  410. copy_3_slots_unmasked          $8..10 = $58..60
+  411. copy_3_slots_unmasked          $11..13 = $67..69
+  412. dot_3_floats                   $8 = dot($8..10, $11..13)
+  413. copy_4_slots_unmasked          _8_m(0..3) = $0..3
+  414. copy_4_slots_unmasked          _8_m(4..7) = $4..7
+  415. copy_slot_unmasked             _8_m(8) = $8
+  416. copy_slot_unmasked             $0 = _0_ok
+  417. copy_4_slots_unmasked          $1..4 = _8_m(0..3)
+  418. copy_4_slots_unmasked          $5..8 = _8_m(4..7)
+  419. copy_slot_unmasked             $9 = _8_m(8)
+  420. copy_constant                  $10 = 0x43020000 (130.0)
+  421. copy_constant                  $11 = 0x424C0000 (51.0)
+  422. copy_constant                  $12 = 0x420C0000 (35.0)
+  423. copy_constant                  $13 = 0x42F00000 (120.0)
+  424. copy_constant                  $14 = 0x423C0000 (47.0)
+  425. copy_constant                  $15 = 0x42040000 (33.0)
+  426. copy_constant                  $16 = 0x43700000 (240.0)
+  427. copy_constant                  $17 = 0x42920000 (73.0)
+  428. copy_constant                  $18 = 0x42340000 (45.0)
+  429. cmpeq_n_floats                 $1..9 = equal($1..9, $10..18)
+  430. bitwise_and_4_ints             $2..5 &= $6..9
+  431. bitwise_and_2_ints             $2..3 &= $4..5
+  432. bitwise_and_int                $2 &= $3
+  433. bitwise_and_int                $1 &= $2
+  434. bitwise_and_int                $0 &= $1
+  435. copy_slot_unmasked             _0_ok = $0
+  436. store_condition_mask           $52 = CondMask
+  437. copy_slot_unmasked             $53 = _0_ok
+  438. zero_slot_unmasked             $0 = 0
+  439. merge_condition_mask           CondMask = $52 & $53
+  440. branch_if_no_active_lanes      branch_if_no_active_lanes +438 (label 1 at #878)
+  441. copy_constant                  ok = 0xFFFFFFFF
+  442. copy_constant                  splat_4(0) = 0x40800000 (4.0)
+  443. copy_constant                  splat_4(1) = 0x40800000 (4.0)
+  444. copy_constant                  splat_4(2) = 0x40800000 (4.0)
+  445. copy_constant                  splat_4(3) = 0x40800000 (4.0)
+  446. copy_constant                  splat_4(4) = 0x40800000 (4.0)
+  447. copy_constant                  splat_4(5) = 0x40800000 (4.0)
+  448. copy_constant                  splat_4(6) = 0x40800000 (4.0)
+  449. copy_constant                  splat_4(7) = 0x40800000 (4.0)
+  450. copy_constant                  splat_4(8) = 0x40800000 (4.0)
+  451. copy_constant                  splat_2(0) = 0x40000000 (2.0)
+  452. copy_constant                  splat_2(1) = 0x40000000 (2.0)
+  453. copy_constant                  splat_2(2) = 0x40000000 (2.0)
+  454. copy_constant                  splat_2(3) = 0x40000000 (2.0)
+  455. copy_constant                  splat_2(4) = 0x40000000 (2.0)
+  456. copy_constant                  splat_2(5) = 0x40000000 (2.0)
+  457. copy_constant                  splat_2(6) = 0x40000000 (2.0)
+  458. copy_constant                  splat_2(7) = 0x40000000 (2.0)
+  459. copy_constant                  splat_2(8) = 0x40000000 (2.0)
+  460. zero_4_slots_unmasked          m(0..3) = 0
+  461. zero_4_slots_unmasked          m(4..7) = 0
+  462. zero_slot_unmasked             m(8) = 0
+  463. zero_slot_unmasked             $1 = 0
+  464. copy_constant                  $2 = 0x40000000 (2.0)
+  465. shuffle                        $1..9 = ($1..9)[1 0 0 0 1 0 0 0 1]
+  466. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  467. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  468. copy_slot_masked               m(8) = Mask($9)
+  469. copy_4_slots_unmasked          $10..13 = splat_4(0..3)
+  470. copy_4_slots_unmasked          $14..17 = splat_4(4..7)
+  471. copy_slot_unmasked             $18 = splat_4(8)
+  472. add_n_floats                   $1..9 += $10..18
+  473. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  474. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  475. copy_slot_masked               m(8) = Mask($9)
+  476. copy_slot_unmasked             $1 = ok
+  477. copy_4_slots_unmasked          $2..5 = m(0..3)
+  478. copy_4_slots_unmasked          $6..9 = m(4..7)
+  479. copy_slot_unmasked             $10 = m(8)
+  480. copy_constant                  $11 = 0x40C00000 (6.0)
+  481. copy_constant                  $12 = 0x40800000 (4.0)
+  482. copy_constant                  $13 = 0x40800000 (4.0)
+  483. copy_constant                  $14 = 0x40800000 (4.0)
+  484. copy_constant                  $15 = 0x40C00000 (6.0)
+  485. copy_constant                  $16 = 0x40800000 (4.0)
+  486. copy_constant                  $17 = 0x40800000 (4.0)
+  487. copy_constant                  $18 = 0x40800000 (4.0)
+  488. copy_constant                  $19 = 0x40C00000 (6.0)
+  489. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
+  490. bitwise_and_4_ints             $3..6 &= $7..10
+  491. bitwise_and_2_ints             $3..4 &= $5..6
+  492. bitwise_and_int                $3 &= $4
+  493. bitwise_and_int                $2 &= $3
+  494. bitwise_and_int                $1 &= $2
+  495. copy_slot_masked               ok = Mask($1)
+  496. zero_slot_unmasked             $1 = 0
+  497. copy_constant                  $2 = 0x40000000 (2.0)
+  498. shuffle                        $1..9 = ($1..9)[1 0 0 0 1 0 0 0 1]
+  499. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  500. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  501. copy_slot_masked               m(8) = Mask($9)
   502. stack_rewind
-  503. copy_constant                  $12 = 0x40800000 (4.0)
-  504. copy_constant                  $13 = 0x40800000 (4.0)
-  505. copy_constant                  $14 = 0x40800000 (4.0)
-  506. copy_constant                  $15 = 0x40C00000 (6.0)
-  507. copy_constant                  $16 = 0x40800000 (4.0)
-  508. copy_constant                  $17 = 0x40800000 (4.0)
-  509. copy_constant                  $18 = 0x40800000 (4.0)
-  510. copy_constant                  $19 = 0x40C00000 (6.0)
-  511. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
-  512. bitwise_and_4_ints             $3..6 &= $7..10
-  513. bitwise_and_2_ints             $3..4 &= $5..6
-  514. bitwise_and_int                $3 &= $4
-  515. bitwise_and_int                $2 &= $3
-  516. bitwise_and_int                $1 &= $2
-  517. copy_slot_masked               ok = Mask($1)
-  518. zero_slot_unmasked             $1 = 0
-  519. copy_constant                  $2 = 0x40000000 (2.0)
-  520. shuffle                        $1..9 = ($1..9)[1 0 0 0 1 0 0 0 1]
-  521. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  522. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  523. copy_slot_masked               m(8) = Mask($9)
-  524. copy_4_slots_unmasked          $1..4 = m(0..3)
-  525. copy_4_slots_unmasked          $5..8 = m(4..7)
-  526. copy_slot_unmasked             $9 = m(8)
-  527. copy_4_slots_unmasked          $10..13 = splat_4(0..3)
-  528. copy_4_slots_unmasked          $14..17 = splat_4(4..7)
-  529. copy_slot_unmasked             $18 = splat_4(8)
-  530. sub_n_floats                   $1..9 -= $10..18
-  531. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  532. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  533. copy_slot_masked               m(8) = Mask($9)
-  534. copy_slot_unmasked             $1 = ok
-  535. copy_4_slots_unmasked          $2..5 = m(0..3)
-  536. copy_4_slots_unmasked          $6..9 = m(4..7)
-  537. copy_slot_unmasked             $10 = m(8)
-  538. copy_constant                  $11 = 0xC0000000 (-2.0)
-  539. copy_constant                  $12 = 0xC0800000 (-4.0)
-  540. copy_constant                  $13 = 0xC0800000 (-4.0)
-  541. copy_constant                  $14 = 0xC0800000 (-4.0)
-  542. copy_constant                  $15 = 0xC0000000 (-2.0)
-  543. copy_constant                  $16 = 0xC0800000 (-4.0)
-  544. copy_constant                  $17 = 0xC0800000 (-4.0)
-  545. copy_constant                  $18 = 0xC0800000 (-4.0)
-  546. copy_constant                  $19 = 0xC0000000 (-2.0)
-  547. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
-  548. bitwise_and_4_ints             $3..6 &= $7..10
-  549. bitwise_and_2_ints             $3..4 &= $5..6
-  550. bitwise_and_int                $3 &= $4
-  551. bitwise_and_int                $2 &= $3
-  552. bitwise_and_int                $1 &= $2
-  553. copy_slot_masked               ok = Mask($1)
-  554. zero_slot_unmasked             $1 = 0
-  555. copy_constant                  $2 = 0x40000000 (2.0)
-  556. shuffle                        $1..9 = ($1..9)[1 0 0 0 1 0 0 0 1]
-  557. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  558. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  559. copy_slot_masked               m(8) = Mask($9)
-  560. copy_4_slots_unmasked          $1..4 = m(0..3)
-  561. copy_4_slots_unmasked          $5..8 = m(4..7)
-  562. copy_slot_unmasked             $9 = m(8)
-  563. copy_4_slots_unmasked          $10..13 = splat_4(0..3)
-  564. copy_4_slots_unmasked          $14..17 = splat_4(4..7)
-  565. copy_slot_unmasked             $18 = splat_4(8)
-  566. div_n_floats                   $1..9 /= $10..18
+  503. copy_4_slots_unmasked          $10..13 = splat_4(0..3)
+  504. copy_4_slots_unmasked          $14..17 = splat_4(4..7)
+  505. copy_slot_unmasked             $18 = splat_4(8)
+  506. sub_n_floats                   $1..9 -= $10..18
+  507. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  508. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  509. copy_slot_masked               m(8) = Mask($9)
+  510. copy_slot_unmasked             $1 = ok
+  511. copy_4_slots_unmasked          $2..5 = m(0..3)
+  512. copy_4_slots_unmasked          $6..9 = m(4..7)
+  513. copy_slot_unmasked             $10 = m(8)
+  514. copy_constant                  $11 = 0xC0000000 (-2.0)
+  515. copy_constant                  $12 = 0xC0800000 (-4.0)
+  516. copy_constant                  $13 = 0xC0800000 (-4.0)
+  517. copy_constant                  $14 = 0xC0800000 (-4.0)
+  518. copy_constant                  $15 = 0xC0000000 (-2.0)
+  519. copy_constant                  $16 = 0xC0800000 (-4.0)
+  520. copy_constant                  $17 = 0xC0800000 (-4.0)
+  521. copy_constant                  $18 = 0xC0800000 (-4.0)
+  522. copy_constant                  $19 = 0xC0000000 (-2.0)
+  523. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
+  524. bitwise_and_4_ints             $3..6 &= $7..10
+  525. bitwise_and_2_ints             $3..4 &= $5..6
+  526. bitwise_and_int                $3 &= $4
+  527. bitwise_and_int                $2 &= $3
+  528. bitwise_and_int                $1 &= $2
+  529. copy_slot_masked               ok = Mask($1)
+  530. zero_slot_unmasked             $1 = 0
+  531. copy_constant                  $2 = 0x40000000 (2.0)
+  532. shuffle                        $1..9 = ($1..9)[1 0 0 0 1 0 0 0 1]
+  533. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  534. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  535. copy_slot_masked               m(8) = Mask($9)
+  536. copy_4_slots_unmasked          $10..13 = splat_4(0..3)
+  537. copy_4_slots_unmasked          $14..17 = splat_4(4..7)
+  538. copy_slot_unmasked             $18 = splat_4(8)
+  539. div_n_floats                   $1..9 /= $10..18
+  540. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  541. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  542. copy_slot_masked               m(8) = Mask($9)
+  543. copy_slot_unmasked             $1 = ok
+  544. copy_4_slots_unmasked          $2..5 = m(0..3)
+  545. copy_4_slots_unmasked          $6..9 = m(4..7)
+  546. copy_slot_unmasked             $10 = m(8)
+  547. zero_slot_unmasked             $11 = 0
+  548. copy_constant                  $12 = 0x3F000000 (0.5)
+  549. shuffle                        $11..19 = ($11..19)[1 0 0 0 1 0 0 0 1]
+  550. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
+  551. bitwise_and_4_ints             $3..6 &= $7..10
+  552. bitwise_and_2_ints             $3..4 &= $5..6
+  553. bitwise_and_int                $3 &= $4
+  554. bitwise_and_int                $2 &= $3
+  555. bitwise_and_int                $1 &= $2
+  556. copy_slot_masked               ok = Mask($1)
+  557. copy_4_slots_unmasked          $1..4 = splat_4(0..3)
+  558. copy_4_slots_unmasked          $5..8 = splat_4(4..7)
+  559. copy_slot_unmasked             $9 = splat_4(8)
+  560. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  561. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  562. copy_slot_masked               m(8) = Mask($9)
+  563. zero_slot_unmasked             $10 = 0
+  564. copy_constant                  $11 = 0x40000000 (2.0)
+  565. shuffle                        $10..18 = ($10..18)[1 0 0 0 1 0 0 0 1]
+  566. add_n_floats                   $1..9 += $10..18
   567. copy_4_slots_masked            m(0..3) = Mask($1..4)
   568. copy_4_slots_masked            m(4..7) = Mask($5..8)
   569. copy_slot_masked               m(8) = Mask($9)
@@ -571,65 +571,65 @@
   571. copy_4_slots_unmasked          $2..5 = m(0..3)
   572. copy_4_slots_unmasked          $6..9 = m(4..7)
   573. copy_slot_unmasked             $10 = m(8)
-  574. zero_slot_unmasked             $11 = 0
-  575. copy_constant                  $12 = 0x3F000000 (0.5)
-  576. shuffle                        $11..19 = ($11..19)[1 0 0 0 1 0 0 0 1]
-  577. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
-  578. bitwise_and_4_ints             $3..6 &= $7..10
-  579. bitwise_and_2_ints             $3..4 &= $5..6
-  580. bitwise_and_int                $3 &= $4
-  581. bitwise_and_int                $2 &= $3
-  582. bitwise_and_int                $1 &= $2
-  583. copy_slot_masked               ok = Mask($1)
-  584. copy_4_slots_unmasked          $1..4 = splat_4(0..3)
-  585. copy_4_slots_unmasked          $5..8 = splat_4(4..7)
-  586. copy_slot_unmasked             $9 = splat_4(8)
-  587. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  588. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  589. copy_slot_masked               m(8) = Mask($9)
-  590. copy_4_slots_unmasked          $1..4 = m(0..3)
-  591. copy_4_slots_unmasked          $5..8 = m(4..7)
-  592. copy_slot_unmasked             $9 = m(8)
-  593. zero_slot_unmasked             $10 = 0
-  594. copy_constant                  $11 = 0x40000000 (2.0)
-  595. shuffle                        $10..18 = ($10..18)[1 0 0 0 1 0 0 0 1]
-  596. add_n_floats                   $1..9 += $10..18
-  597. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  598. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  599. copy_slot_masked               m(8) = Mask($9)
-  600. copy_slot_unmasked             $1 = ok
-  601. copy_4_slots_unmasked          $2..5 = m(0..3)
-  602. copy_4_slots_unmasked          $6..9 = m(4..7)
-  603. copy_slot_unmasked             $10 = m(8)
-  604. copy_constant                  $11 = 0x40C00000 (6.0)
-  605. copy_constant                  $12 = 0x40800000 (4.0)
-  606. copy_constant                  $13 = 0x40800000 (4.0)
-  607. copy_constant                  $14 = 0x40800000 (4.0)
-  608. copy_constant                  $15 = 0x40C00000 (6.0)
-  609. copy_constant                  $16 = 0x40800000 (4.0)
-  610. copy_constant                  $17 = 0x40800000 (4.0)
-  611. copy_constant                  $18 = 0x40800000 (4.0)
-  612. copy_constant                  $19 = 0x40C00000 (6.0)
-  613. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
-  614. bitwise_and_4_ints             $3..6 &= $7..10
-  615. bitwise_and_2_ints             $3..4 &= $5..6
-  616. bitwise_and_int                $3 &= $4
-  617. bitwise_and_int                $2 &= $3
-  618. bitwise_and_int                $1 &= $2
-  619. copy_slot_masked               ok = Mask($1)
-  620. copy_4_slots_unmasked          $1..4 = splat_4(0..3)
-  621. copy_4_slots_unmasked          $5..8 = splat_4(4..7)
-  622. copy_slot_unmasked             $9 = splat_4(8)
-  623. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  624. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  625. copy_slot_masked               m(8) = Mask($9)
-  626. copy_4_slots_unmasked          $1..4 = m(0..3)
-  627. copy_4_slots_unmasked          $5..8 = m(4..7)
-  628. copy_slot_unmasked             $9 = m(8)
-  629. zero_slot_unmasked             $10 = 0
-  630. copy_constant                  $11 = 0x40000000 (2.0)
-  631. shuffle                        $10..18 = ($10..18)[1 0 0 0 1 0 0 0 1]
-  632. sub_n_floats                   $1..9 -= $10..18
+  574. copy_constant                  $11 = 0x40C00000 (6.0)
+  575. copy_constant                  $12 = 0x40800000 (4.0)
+  576. copy_constant                  $13 = 0x40800000 (4.0)
+  577. copy_constant                  $14 = 0x40800000 (4.0)
+  578. copy_constant                  $15 = 0x40C00000 (6.0)
+  579. copy_constant                  $16 = 0x40800000 (4.0)
+  580. copy_constant                  $17 = 0x40800000 (4.0)
+  581. copy_constant                  $18 = 0x40800000 (4.0)
+  582. copy_constant                  $19 = 0x40C00000 (6.0)
+  583. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
+  584. bitwise_and_4_ints             $3..6 &= $7..10
+  585. bitwise_and_2_ints             $3..4 &= $5..6
+  586. bitwise_and_int                $3 &= $4
+  587. bitwise_and_int                $2 &= $3
+  588. bitwise_and_int                $1 &= $2
+  589. copy_slot_masked               ok = Mask($1)
+  590. copy_4_slots_unmasked          $1..4 = splat_4(0..3)
+  591. copy_4_slots_unmasked          $5..8 = splat_4(4..7)
+  592. copy_slot_unmasked             $9 = splat_4(8)
+  593. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  594. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  595. copy_slot_masked               m(8) = Mask($9)
+  596. zero_slot_unmasked             $10 = 0
+  597. copy_constant                  $11 = 0x40000000 (2.0)
+  598. shuffle                        $10..18 = ($10..18)[1 0 0 0 1 0 0 0 1]
+  599. sub_n_floats                   $1..9 -= $10..18
+  600. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  601. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  602. copy_slot_masked               m(8) = Mask($9)
+  603. copy_slot_unmasked             $1 = ok
+  604. copy_4_slots_unmasked          $2..5 = m(0..3)
+  605. copy_4_slots_unmasked          $6..9 = m(4..7)
+  606. copy_slot_unmasked             $10 = m(8)
+  607. copy_constant                  $11 = 0x40000000 (2.0)
+  608. copy_constant                  $12 = 0x40800000 (4.0)
+  609. copy_constant                  $13 = 0x40800000 (4.0)
+  610. copy_constant                  $14 = 0x40800000 (4.0)
+  611. copy_constant                  $15 = 0x40000000 (2.0)
+  612. copy_constant                  $16 = 0x40800000 (4.0)
+  613. copy_constant                  $17 = 0x40800000 (4.0)
+  614. copy_constant                  $18 = 0x40800000 (4.0)
+  615. copy_constant                  $19 = 0x40000000 (2.0)
+  616. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
+  617. bitwise_and_4_ints             $3..6 &= $7..10
+  618. bitwise_and_2_ints             $3..4 &= $5..6
+  619. bitwise_and_int                $3 &= $4
+  620. bitwise_and_int                $2 &= $3
+  621. bitwise_and_int                $1 &= $2
+  622. copy_slot_masked               ok = Mask($1)
+  623. copy_4_slots_unmasked          $1..4 = splat_4(0..3)
+  624. copy_4_slots_unmasked          $5..8 = splat_4(4..7)
+  625. copy_slot_unmasked             $9 = splat_4(8)
+  626. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  627. copy_4_slots_masked            m(4..7) = Mask($5..8)
+  628. copy_slot_masked               m(8) = Mask($9)
+  629. copy_4_slots_unmasked          $10..13 = splat_2(0..3)
+  630. copy_4_slots_unmasked          $14..17 = splat_2(4..7)
+  631. copy_slot_unmasked             $18 = splat_2(8)
+  632. div_n_floats                   $1..9 /= $10..18
   633. copy_4_slots_masked            m(0..3) = Mask($1..4)
   634. copy_4_slots_masked            m(4..7) = Mask($5..8)
   635. copy_slot_masked               m(8) = Mask($9)
@@ -638,13 +638,13 @@
   638. copy_4_slots_unmasked          $6..9 = m(4..7)
   639. copy_slot_unmasked             $10 = m(8)
   640. copy_constant                  $11 = 0x40000000 (2.0)
-  641. copy_constant                  $12 = 0x40800000 (4.0)
-  642. copy_constant                  $13 = 0x40800000 (4.0)
-  643. copy_constant                  $14 = 0x40800000 (4.0)
+  641. copy_constant                  $12 = 0x40000000 (2.0)
+  642. copy_constant                  $13 = 0x40000000 (2.0)
+  643. copy_constant                  $14 = 0x40000000 (2.0)
   644. copy_constant                  $15 = 0x40000000 (2.0)
-  645. copy_constant                  $16 = 0x40800000 (4.0)
-  646. copy_constant                  $17 = 0x40800000 (4.0)
-  647. copy_constant                  $18 = 0x40800000 (4.0)
+  645. copy_constant                  $16 = 0x40000000 (2.0)
+  646. copy_constant                  $17 = 0x40000000 (2.0)
+  647. copy_constant                  $18 = 0x40000000 (2.0)
   648. copy_constant                  $19 = 0x40000000 (2.0)
   649. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
   650. bitwise_and_4_ints             $3..6 &= $7..10
@@ -653,269 +653,233 @@
   653. bitwise_and_int                $2 &= $3
   654. bitwise_and_int                $1 &= $2
   655. copy_slot_masked               ok = Mask($1)
-  656. copy_4_slots_unmasked          $1..4 = splat_4(0..3)
-  657. copy_4_slots_unmasked          $5..8 = splat_4(4..7)
-  658. copy_slot_unmasked             $9 = splat_4(8)
-  659. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  660. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  661. copy_slot_masked               m(8) = Mask($9)
-  662. copy_4_slots_unmasked          $1..4 = m(0..3)
-  663. copy_4_slots_unmasked          $5..8 = m(4..7)
-  664. copy_slot_unmasked             $9 = m(8)
-  665. copy_4_slots_unmasked          $10..13 = splat_2(0..3)
-  666. copy_4_slots_unmasked          $14..17 = splat_2(4..7)
-  667. copy_slot_unmasked             $18 = splat_2(8)
-  668. div_n_floats                   $1..9 /= $10..18
-  669. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  670. copy_4_slots_masked            m(4..7) = Mask($5..8)
-  671. copy_slot_masked               m(8) = Mask($9)
-  672. copy_slot_unmasked             $1 = ok
-  673. copy_4_slots_unmasked          $2..5 = m(0..3)
-  674. copy_4_slots_unmasked          $6..9 = m(4..7)
-  675. copy_slot_unmasked             $10 = m(8)
-  676. copy_constant                  $11 = 0x40000000 (2.0)
-  677. copy_constant                  $12 = 0x40000000 (2.0)
-  678. copy_constant                  $13 = 0x40000000 (2.0)
-  679. copy_constant                  $14 = 0x40000000 (2.0)
-  680. copy_constant                  $15 = 0x40000000 (2.0)
-  681. copy_constant                  $16 = 0x40000000 (2.0)
-  682. copy_constant                  $17 = 0x40000000 (2.0)
-  683. copy_constant                  $18 = 0x40000000 (2.0)
-  684. copy_constant                  $19 = 0x40000000 (2.0)
-  685. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
-  686. bitwise_and_4_ints             $3..6 &= $7..10
-  687. bitwise_and_2_ints             $3..4 &= $5..6
-  688. bitwise_and_int                $3 &= $4
-  689. bitwise_and_int                $2 &= $3
-  690. bitwise_and_int                $1 &= $2
-  691. copy_slot_masked               ok = Mask($1)
-  692. copy_constant                  m₁(0) = 0x3F800000 (1.0)
-  693. copy_constant                  m₁(1) = 0x40000000 (2.0)
-  694. copy_constant                  m₁(2) = 0x40400000 (3.0)
-  695. copy_constant                  m₁(3) = 0x40800000 (4.0)
-  696. copy_constant                  m₁(4) = 0x40A00000 (5.0)
-  697. copy_constant                  m₁(5) = 0x40C00000 (6.0)
-  698. copy_constant                  m₁(6) = 0x40E00000 (7.0)
-  699. copy_constant                  m₁(7) = 0x41000000 (8.0)
-  700. copy_constant                  m₁(8) = 0x41100000 (9.0)
-  701. copy_constant                  m₁(9) = 0x41200000 (10.0)
-  702. copy_constant                  m₁(10) = 0x41300000 (11.0)
-  703. copy_constant                  m₁(11) = 0x41400000 (12.0)
-  704. copy_constant                  m₁(12) = 0x41500000 (13.0)
-  705. copy_constant                  m₁(13) = 0x41600000 (14.0)
-  706. copy_constant                  m₁(14) = 0x41700000 (15.0)
-  707. copy_constant                  m₁(15) = 0x41800000 (16.0)
-  708. copy_4_slots_unmasked          $1..4 = m₁(0..3)
-  709. copy_4_slots_unmasked          $5..8 = m₁(4..7)
-  710. copy_4_slots_unmasked          $9..12 = m₁(8..11)
-  711. copy_4_slots_unmasked          $13..16 = m₁(12..15)
-  712. copy_constant                  $17 = 0x41800000 (16.0)
-  713. copy_constant                  $18 = 0x41700000 (15.0)
-  714. copy_constant                  $19 = 0x41600000 (14.0)
-  715. copy_constant                  $20 = 0x41500000 (13.0)
-  716. copy_constant                  $21 = 0x41400000 (12.0)
-  717. copy_constant                  $22 = 0x41300000 (11.0)
-  718. copy_constant                  $23 = 0x41200000 (10.0)
-  719. copy_constant                  $24 = 0x41100000 (9.0)
-  720. copy_constant                  $25 = 0x41000000 (8.0)
-  721. copy_constant                  $26 = 0x40E00000 (7.0)
-  722. copy_constant                  $27 = 0x40C00000 (6.0)
-  723. copy_constant                  $28 = 0x40A00000 (5.0)
-  724. copy_constant                  $29 = 0x40800000 (4.0)
-  725. copy_constant                  $30 = 0x40400000 (3.0)
-  726. copy_constant                  $31 = 0x40000000 (2.0)
-  727. copy_constant                  $32 = 0x3F800000 (1.0)
-  728. add_n_floats                   $1..16 += $17..32
-  729. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  730. copy_4_slots_masked            m₁(4..7) = Mask($5..8)
-  731. copy_4_slots_masked            m₁(8..11) = Mask($9..12)
-  732. copy_4_slots_masked            m₁(12..15) = Mask($13..16)
-  733. copy_slot_unmasked             $1 = ok
-  734. copy_4_slots_unmasked          $2..5 = m₁(0..3)
-  735. copy_4_slots_unmasked          $6..9 = m₁(4..7)
-  736. copy_4_slots_unmasked          $10..13 = m₁(8..11)
-  737. copy_4_slots_unmasked          $14..17 = m₁(12..15)
-  738. copy_constant                  $18 = 0x41880000 (17.0)
-  739. copy_constant                  $19 = 0x41880000 (17.0)
-  740. copy_constant                  $20 = 0x41880000 (17.0)
-  741. copy_constant                  $21 = 0x41880000 (17.0)
-  742. copy_constant                  $22 = 0x41880000 (17.0)
-  743. copy_constant                  $23 = 0x41880000 (17.0)
-  744. copy_constant                  $24 = 0x41880000 (17.0)
-  745. copy_constant                  $25 = 0x41880000 (17.0)
-  746. copy_constant                  $26 = 0x41880000 (17.0)
-  747. copy_constant                  $27 = 0x41880000 (17.0)
-  748. copy_constant                  $28 = 0x41880000 (17.0)
-  749. copy_constant                  $29 = 0x41880000 (17.0)
-  750. copy_constant                  $30 = 0x41880000 (17.0)
-  751. copy_constant                  $31 = 0x41880000 (17.0)
-  752. copy_constant                  $32 = 0x41880000 (17.0)
-  753. copy_constant                  $33 = 0x41880000 (17.0)
-  754. cmpeq_n_floats                 $2..17 = equal($2..17, $18..33)
-  755. bitwise_and_4_ints             $10..13 &= $14..17
-  756. bitwise_and_4_ints             $6..9 &= $10..13
-  757. bitwise_and_4_ints             $2..5 &= $6..9
-  758. bitwise_and_2_ints             $2..3 &= $4..5
-  759. bitwise_and_int                $2 &= $3
-  760. bitwise_and_int                $1 &= $2
-  761. copy_slot_masked               ok = Mask($1)
-  762. copy_constant                  m₂(0) = 0x41200000 (10.0)
-  763. copy_constant                  m₂(1) = 0x41A00000 (20.0)
-  764. copy_constant                  m₂(2) = 0x41F00000 (30.0)
-  765. copy_constant                  m₂(3) = 0x42200000 (40.0)
-  766. copy_4_slots_unmasked          $1..4 = m₂
-  767. copy_constant                  $5 = 0x3F800000 (1.0)
-  768. copy_constant                  $6 = 0x40000000 (2.0)
-  769. copy_constant                  $7 = 0x40400000 (3.0)
-  770. copy_constant                  $8 = 0x40800000 (4.0)
-  771. sub_4_floats                   $1..4 -= $5..8
-  772. copy_4_slots_masked            m₂ = Mask($1..4)
-  773. copy_slot_unmasked             $1 = ok
-  774. copy_4_slots_unmasked          $2..5 = m₂
-  775. copy_constant                  $6 = 0x41100000 (9.0)
-  776. copy_constant                  $7 = 0x41900000 (18.0)
-  777. copy_constant                  $8 = 0x41D80000 (27.0)
-  778. copy_constant                  $9 = 0x42100000 (36.0)
-  779. cmpeq_4_floats                 $2..5 = equal($2..5, $6..9)
-  780. bitwise_and_2_ints             $2..3 &= $4..5
-  781. bitwise_and_int                $2 &= $3
-  782. bitwise_and_int                $1 &= $2
-  783. copy_slot_masked               ok = Mask($1)
-  784. copy_constant                  m₃(0) = 0x40000000 (2.0)
-  785. copy_constant                  m₃(1) = 0x40800000 (4.0)
-  786. copy_constant                  m₃(2) = 0x40C00000 (6.0)
-  787. copy_constant                  m₃(3) = 0x41000000 (8.0)
-  788. copy_4_slots_unmasked          $1..4 = m₃
-  789. copy_constant                  $5 = 0x40000000 (2.0)
-  790. copy_constant                  $6 = 0x40000000 (2.0)
-  791. copy_constant                  $7 = 0x40000000 (2.0)
-  792. copy_constant                  $8 = 0x40800000 (4.0)
-  793. div_4_floats                   $1..4 /= $5..8
-  794. copy_4_slots_masked            m₃ = Mask($1..4)
-  795. copy_slot_unmasked             $1 = ok
-  796. copy_4_slots_unmasked          $2..5 = m₃
-  797. copy_constant                  $6 = 0x3F800000 (1.0)
-  798. copy_constant                  $7 = 0x40000000 (2.0)
-  799. copy_constant                  $8 = 0x40400000 (3.0)
-  800. copy_constant                  $9 = 0x40000000 (2.0)
-  801. cmpeq_4_floats                 $2..5 = equal($2..5, $6..9)
-  802. bitwise_and_2_ints             $2..3 &= $4..5
-  803. bitwise_and_int                $2 &= $3
-  804. bitwise_and_int                $1 &= $2
-  805. copy_slot_masked               ok = Mask($1)
-  806. copy_constant                  m₄(0) = 0x3F800000 (1.0)
-  807. copy_constant                  m₄(1) = 0x40000000 (2.0)
-  808. copy_constant                  m₄(2) = 0x40E00000 (7.0)
-  809. copy_constant                  m₄(3) = 0x40800000 (4.0)
-  810. copy_4_slots_unmasked          $34..37 = m₄
-  811. swizzle_3                      $35..37 = ($35..37).yxz
-  812. copy_constant                  $38 = 0x40400000 (3.0)
-  813. copy_constant                  $39 = 0x40A00000 (5.0)
-  814. copy_constant                  $40 = 0x40400000 (3.0)
-  815. copy_constant                  $41 = 0x40000000 (2.0)
-  816. copy_2_slots_unmasked          $1..2 = $34..35
-  817. copy_2_slots_unmasked          $3..4 = $38..39
-  818. dot_2_floats                   $1 = dot($1..2, $3..4)
-  819. copy_4_slots_unmasked          $2..5 = $36..39
-  820. dot_2_floats                   $2 = dot($2..3, $4..5)
-  821. copy_2_slots_unmasked          $3..4 = $34..35
-  822. copy_2_slots_unmasked          $5..6 = $40..41
-  823. dot_2_floats                   $3 = dot($3..4, $5..6)
-  824. copy_2_slots_unmasked          $4..5 = $36..37
-  825. copy_2_slots_unmasked          $6..7 = $40..41
-  826. dot_2_floats                   $4 = dot($4..5, $6..7)
-  827. copy_4_slots_masked            m₄ = Mask($1..4)
-  828. copy_slot_unmasked             $1 = ok
-  829. copy_4_slots_unmasked          $2..5 = m₄
-  830. copy_constant                  $6 = 0x42180000 (38.0)
-  831. copy_constant                  $7 = 0x41D00000 (26.0)
-  832. copy_constant                  $8 = 0x41880000 (17.0)
-  833. copy_constant                  $9 = 0x41600000 (14.0)
-  834. cmpeq_4_floats                 $2..5 = equal($2..5, $6..9)
-  835. bitwise_and_2_ints             $2..3 &= $4..5
-  836. bitwise_and_int                $2 &= $3
-  837. bitwise_and_int                $1 &= $2
-  838. copy_slot_masked               ok = Mask($1)
-  839. copy_constant                  m₅(0) = 0x41200000 (10.0)
-  840. copy_constant                  m₅(1) = 0x40800000 (4.0)
-  841. copy_constant                  m₅(2) = 0x40000000 (2.0)
-  842. copy_constant                  m₅(3) = 0x41A00000 (20.0)
-  843. copy_constant                  m₅(4) = 0x40A00000 (5.0)
-  844. copy_constant                  m₅(5) = 0x40400000 (3.0)
-  845. copy_constant                  m₅(6) = 0x41200000 (10.0)
-  846. copy_constant                  m₅(7) = 0x40C00000 (6.0)
-  847. copy_constant                  m₅(8) = 0x40A00000 (5.0)
-  848. copy_4_slots_unmasked          $34..37 = m₅(0..3)
-  849. copy_4_slots_unmasked          $38..41 = m₅(4..7)
-  850. copy_slot_unmasked             $42 = m₅(8)
-  851. shuffle                        $35..42 = ($35..42)[2 5 0 3 6 1 4 7]
-  852. copy_constant                  $43 = 0x40400000 (3.0)
-  853. copy_constant                  $44 = 0x40400000 (3.0)
-  854. copy_constant                  $45 = 0x40800000 (4.0)
-  855. copy_constant                  $46 = 0x40000000 (2.0)
-  856. copy_constant                  $47 = 0x40400000 (3.0)
-  857. copy_constant                  $48 = 0x40800000 (4.0)
-  858. copy_constant                  $49 = 0x40800000 (4.0)
-  859. copy_constant                  $50 = 0x41100000 (9.0)
-  860. copy_constant                  $51 = 0x40000000 (2.0)
-  861. copy_3_slots_unmasked          $1..3 = $34..36
-  862. copy_3_slots_unmasked          $4..6 = $43..45
-  863. dot_3_floats                   $1 = dot($1..3, $4..6)
-  864. copy_3_slots_unmasked          $2..4 = $37..39
-  865. copy_3_slots_unmasked          $5..7 = $43..45
-  866. dot_3_floats                   $2 = dot($2..4, $5..7)
-  867. copy_4_slots_unmasked          $3..6 = $40..43
-  868. copy_2_slots_unmasked          $7..8 = $44..45
-  869. dot_3_floats                   $3 = dot($3..5, $6..8)
-  870. copy_3_slots_unmasked          $4..6 = $34..36
-  871. copy_3_slots_unmasked          $7..9 = $46..48
-  872. dot_3_floats                   $4 = dot($4..6, $7..9)
-  873. copy_3_slots_unmasked          $5..7 = $37..39
-  874. copy_3_slots_unmasked          $8..10 = $46..48
-  875. dot_3_floats                   $5 = dot($5..7, $8..10)
-  876. copy_3_slots_unmasked          $6..8 = $40..42
-  877. copy_3_slots_unmasked          $9..11 = $46..48
-  878. dot_3_floats                   $6 = dot($6..8, $9..11)
-  879. copy_3_slots_unmasked          $7..9 = $34..36
-  880. copy_3_slots_unmasked          $10..12 = $49..51
-  881. dot_3_floats                   $7 = dot($7..9, $10..12)
-  882. copy_3_slots_unmasked          $8..10 = $37..39
-  883. copy_3_slots_unmasked          $11..13 = $49..51
-  884. dot_3_floats                   $8 = dot($8..10, $11..13)
-  885. copy_3_slots_unmasked          $9..11 = $40..42
-  886. copy_3_slots_unmasked          $12..14 = $49..51
-  887. dot_3_floats                   $9 = dot($9..11, $12..14)
-  888. copy_4_slots_masked            m₅(0..3) = Mask($1..4)
-  889. copy_4_slots_masked            m₅(4..7) = Mask($5..8)
-  890. copy_slot_masked               m₅(8) = Mask($9)
-  891. copy_slot_unmasked             $1 = ok
-  892. copy_4_slots_unmasked          $2..5 = m₅(0..3)
-  893. copy_4_slots_unmasked          $6..9 = m₅(4..7)
-  894. copy_slot_unmasked             $10 = m₅(8)
-  895. copy_constant                  $11 = 0x43020000 (130.0)
-  896. copy_constant                  $12 = 0x424C0000 (51.0)
-  897. copy_constant                  $13 = 0x420C0000 (35.0)
-  898. copy_constant                  $14 = 0x42F00000 (120.0)
-  899. copy_constant                  $15 = 0x423C0000 (47.0)
-  900. copy_constant                  $16 = 0x42040000 (33.0)
-  901. copy_constant                  $17 = 0x43700000 (240.0)
-  902. copy_constant                  $18 = 0x42920000 (73.0)
-  903. copy_constant                  $19 = 0x42340000 (45.0)
-  904. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
-  905. bitwise_and_4_ints             $3..6 &= $7..10
-  906. bitwise_and_2_ints             $3..4 &= $5..6
-  907. bitwise_and_int                $3 &= $4
-  908. bitwise_and_int                $2 &= $3
-  909. bitwise_and_int                $1 &= $2
-  910. copy_slot_masked               ok = Mask($1)
-  911. copy_slot_masked               [test_matrix_op_matrix_half].result = Mask($1)
-  912. label                          label 0x00000002
-  913. copy_slot_masked               $0 = Mask($1)
-  914. label                          label 0x00000001
-  915. load_condition_mask            CondMask = $52
-  916. swizzle_4                      $0..3 = ($0..3).xxxx
-  917. copy_4_constants               $4..7 = colorRed
-  918. copy_4_constants               $8..11 = colorGreen
-  919. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  920. copy_4_slots_unmasked          [main].result = $0..3
-  921. load_src                       src.rgba = [main].result
+  656. copy_constant                  m₁(0) = 0x3F800000 (1.0)
+  657. copy_constant                  m₁(1) = 0x40000000 (2.0)
+  658. copy_constant                  m₁(2) = 0x40400000 (3.0)
+  659. copy_constant                  m₁(3) = 0x40800000 (4.0)
+  660. copy_constant                  m₁(4) = 0x40A00000 (5.0)
+  661. copy_constant                  m₁(5) = 0x40C00000 (6.0)
+  662. copy_constant                  m₁(6) = 0x40E00000 (7.0)
+  663. copy_constant                  m₁(7) = 0x41000000 (8.0)
+  664. copy_constant                  m₁(8) = 0x41100000 (9.0)
+  665. copy_constant                  m₁(9) = 0x41200000 (10.0)
+  666. copy_constant                  m₁(10) = 0x41300000 (11.0)
+  667. copy_constant                  m₁(11) = 0x41400000 (12.0)
+  668. copy_constant                  m₁(12) = 0x41500000 (13.0)
+  669. copy_constant                  m₁(13) = 0x41600000 (14.0)
+  670. copy_constant                  m₁(14) = 0x41700000 (15.0)
+  671. copy_constant                  m₁(15) = 0x41800000 (16.0)
+  672. copy_4_slots_unmasked          $1..4 = m₁(0..3)
+  673. copy_4_slots_unmasked          $5..8 = m₁(4..7)
+  674. copy_4_slots_unmasked          $9..12 = m₁(8..11)
+  675. copy_4_slots_unmasked          $13..16 = m₁(12..15)
+  676. copy_constant                  $17 = 0x41800000 (16.0)
+  677. copy_constant                  $18 = 0x41700000 (15.0)
+  678. copy_constant                  $19 = 0x41600000 (14.0)
+  679. copy_constant                  $20 = 0x41500000 (13.0)
+  680. copy_constant                  $21 = 0x41400000 (12.0)
+  681. copy_constant                  $22 = 0x41300000 (11.0)
+  682. copy_constant                  $23 = 0x41200000 (10.0)
+  683. copy_constant                  $24 = 0x41100000 (9.0)
+  684. copy_constant                  $25 = 0x41000000 (8.0)
+  685. copy_constant                  $26 = 0x40E00000 (7.0)
+  686. copy_constant                  $27 = 0x40C00000 (6.0)
+  687. copy_constant                  $28 = 0x40A00000 (5.0)
+  688. copy_constant                  $29 = 0x40800000 (4.0)
+  689. copy_constant                  $30 = 0x40400000 (3.0)
+  690. copy_constant                  $31 = 0x40000000 (2.0)
+  691. copy_constant                  $32 = 0x3F800000 (1.0)
+  692. add_n_floats                   $1..16 += $17..32
+  693. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  694. copy_4_slots_masked            m₁(4..7) = Mask($5..8)
+  695. copy_4_slots_masked            m₁(8..11) = Mask($9..12)
+  696. copy_4_slots_masked            m₁(12..15) = Mask($13..16)
+  697. copy_slot_unmasked             $1 = ok
+  698. copy_4_slots_unmasked          $2..5 = m₁(0..3)
+  699. copy_4_slots_unmasked          $6..9 = m₁(4..7)
+  700. copy_4_slots_unmasked          $10..13 = m₁(8..11)
+  701. copy_4_slots_unmasked          $14..17 = m₁(12..15)
+  702. copy_constant                  $18 = 0x41880000 (17.0)
+  703. copy_constant                  $19 = 0x41880000 (17.0)
+  704. copy_constant                  $20 = 0x41880000 (17.0)
+  705. copy_constant                  $21 = 0x41880000 (17.0)
+  706. copy_constant                  $22 = 0x41880000 (17.0)
+  707. copy_constant                  $23 = 0x41880000 (17.0)
+  708. copy_constant                  $24 = 0x41880000 (17.0)
+  709. copy_constant                  $25 = 0x41880000 (17.0)
+  710. copy_constant                  $26 = 0x41880000 (17.0)
+  711. copy_constant                  $27 = 0x41880000 (17.0)
+  712. copy_constant                  $28 = 0x41880000 (17.0)
+  713. copy_constant                  $29 = 0x41880000 (17.0)
+  714. copy_constant                  $30 = 0x41880000 (17.0)
+  715. copy_constant                  $31 = 0x41880000 (17.0)
+  716. copy_constant                  $32 = 0x41880000 (17.0)
+  717. copy_constant                  $33 = 0x41880000 (17.0)
+  718. cmpeq_n_floats                 $2..17 = equal($2..17, $18..33)
+  719. bitwise_and_4_ints             $10..13 &= $14..17
+  720. bitwise_and_4_ints             $6..9 &= $10..13
+  721. bitwise_and_4_ints             $2..5 &= $6..9
+  722. bitwise_and_2_ints             $2..3 &= $4..5
+  723. bitwise_and_int                $2 &= $3
+  724. bitwise_and_int                $1 &= $2
+  725. copy_slot_masked               ok = Mask($1)
+  726. copy_constant                  m₂(0) = 0x41200000 (10.0)
+  727. copy_constant                  m₂(1) = 0x41A00000 (20.0)
+  728. copy_constant                  m₂(2) = 0x41F00000 (30.0)
+  729. copy_constant                  m₂(3) = 0x42200000 (40.0)
+  730. copy_4_slots_unmasked          $1..4 = m₂
+  731. copy_constant                  $5 = 0x3F800000 (1.0)
+  732. copy_constant                  $6 = 0x40000000 (2.0)
+  733. copy_constant                  $7 = 0x40400000 (3.0)
+  734. copy_constant                  $8 = 0x40800000 (4.0)
+  735. sub_4_floats                   $1..4 -= $5..8
+  736. copy_4_slots_masked            m₂ = Mask($1..4)
+  737. copy_slot_unmasked             $1 = ok
+  738. copy_4_slots_unmasked          $2..5 = m₂
+  739. copy_constant                  $6 = 0x41100000 (9.0)
+  740. copy_constant                  $7 = 0x41900000 (18.0)
+  741. copy_constant                  $8 = 0x41D80000 (27.0)
+  742. copy_constant                  $9 = 0x42100000 (36.0)
+  743. cmpeq_4_floats                 $2..5 = equal($2..5, $6..9)
+  744. bitwise_and_2_ints             $2..3 &= $4..5
+  745. bitwise_and_int                $2 &= $3
+  746. bitwise_and_int                $1 &= $2
+  747. copy_slot_masked               ok = Mask($1)
+  748. copy_constant                  m₃(0) = 0x40000000 (2.0)
+  749. copy_constant                  m₃(1) = 0x40800000 (4.0)
+  750. copy_constant                  m₃(2) = 0x40C00000 (6.0)
+  751. copy_constant                  m₃(3) = 0x41000000 (8.0)
+  752. copy_4_slots_unmasked          $1..4 = m₃
+  753. copy_constant                  $5 = 0x40000000 (2.0)
+  754. copy_constant                  $6 = 0x40000000 (2.0)
+  755. copy_constant                  $7 = 0x40000000 (2.0)
+  756. copy_constant                  $8 = 0x40800000 (4.0)
+  757. div_4_floats                   $1..4 /= $5..8
+  758. copy_4_slots_masked            m₃ = Mask($1..4)
+  759. copy_slot_unmasked             $1 = ok
+  760. copy_4_slots_unmasked          $2..5 = m₃
+  761. copy_constant                  $6 = 0x3F800000 (1.0)
+  762. copy_constant                  $7 = 0x40000000 (2.0)
+  763. copy_constant                  $8 = 0x40400000 (3.0)
+  764. copy_constant                  $9 = 0x40000000 (2.0)
+  765. cmpeq_4_floats                 $2..5 = equal($2..5, $6..9)
+  766. bitwise_and_2_ints             $2..3 &= $4..5
+  767. bitwise_and_int                $2 &= $3
+  768. bitwise_and_int                $1 &= $2
+  769. copy_slot_masked               ok = Mask($1)
+  770. copy_constant                  m₄(0) = 0x3F800000 (1.0)
+  771. copy_constant                  m₄(1) = 0x40000000 (2.0)
+  772. copy_constant                  m₄(2) = 0x40E00000 (7.0)
+  773. copy_constant                  m₄(3) = 0x40800000 (4.0)
+  774. copy_4_slots_unmasked          $34..37 = m₄
+  775. swizzle_3                      $35..37 = ($35..37).yxz
+  776. copy_constant                  $38 = 0x40400000 (3.0)
+  777. copy_constant                  $39 = 0x40A00000 (5.0)
+  778. copy_constant                  $40 = 0x40400000 (3.0)
+  779. copy_constant                  $41 = 0x40000000 (2.0)
+  780. copy_2_slots_unmasked          $1..2 = $34..35
+  781. copy_2_slots_unmasked          $3..4 = $38..39
+  782. dot_2_floats                   $1 = dot($1..2, $3..4)
+  783. copy_4_slots_unmasked          $2..5 = $36..39
+  784. dot_2_floats                   $2 = dot($2..3, $4..5)
+  785. copy_2_slots_unmasked          $3..4 = $34..35
+  786. copy_2_slots_unmasked          $5..6 = $40..41
+  787. dot_2_floats                   $3 = dot($3..4, $5..6)
+  788. copy_2_slots_unmasked          $4..5 = $36..37
+  789. copy_2_slots_unmasked          $6..7 = $40..41
+  790. dot_2_floats                   $4 = dot($4..5, $6..7)
+  791. copy_4_slots_masked            m₄ = Mask($1..4)
+  792. copy_slot_unmasked             $1 = ok
+  793. copy_4_slots_unmasked          $2..5 = m₄
+  794. copy_constant                  $6 = 0x42180000 (38.0)
+  795. copy_constant                  $7 = 0x41D00000 (26.0)
+  796. copy_constant                  $8 = 0x41880000 (17.0)
+  797. copy_constant                  $9 = 0x41600000 (14.0)
+  798. cmpeq_4_floats                 $2..5 = equal($2..5, $6..9)
+  799. bitwise_and_2_ints             $2..3 &= $4..5
+  800. bitwise_and_int                $2 &= $3
+  801. bitwise_and_int                $1 &= $2
+  802. copy_slot_masked               ok = Mask($1)
+  803. copy_constant                  m₅(0) = 0x41200000 (10.0)
+  804. copy_constant                  m₅(1) = 0x40800000 (4.0)
+  805. copy_constant                  m₅(2) = 0x40000000 (2.0)
+  806. copy_constant                  m₅(3) = 0x41A00000 (20.0)
+  807. copy_constant                  m₅(4) = 0x40A00000 (5.0)
+  808. copy_constant                  m₅(5) = 0x40400000 (3.0)
+  809. copy_constant                  m₅(6) = 0x41200000 (10.0)
+  810. copy_constant                  m₅(7) = 0x40C00000 (6.0)
+  811. copy_constant                  m₅(8) = 0x40A00000 (5.0)
+  812. copy_4_slots_unmasked          $34..37 = m₅(0..3)
+  813. copy_4_slots_unmasked          $38..41 = m₅(4..7)
+  814. copy_slot_unmasked             $42 = m₅(8)
+  815. shuffle                        $35..42 = ($35..42)[2 5 0 3 6 1 4 7]
+  816. copy_constant                  $43 = 0x40400000 (3.0)
+  817. copy_constant                  $44 = 0x40400000 (3.0)
+  818. copy_constant                  $45 = 0x40800000 (4.0)
+  819. copy_constant                  $46 = 0x40000000 (2.0)
+  820. copy_constant                  $47 = 0x40400000 (3.0)
+  821. copy_constant                  $48 = 0x40800000 (4.0)
+  822. copy_constant                  $49 = 0x40800000 (4.0)
+  823. copy_constant                  $50 = 0x41100000 (9.0)
+  824. copy_constant                  $51 = 0x40000000 (2.0)
+  825. copy_3_slots_unmasked          $1..3 = $34..36
+  826. copy_3_slots_unmasked          $4..6 = $43..45
+  827. dot_3_floats                   $1 = dot($1..3, $4..6)
+  828. copy_3_slots_unmasked          $2..4 = $37..39
+  829. copy_3_slots_unmasked          $5..7 = $43..45
+  830. dot_3_floats                   $2 = dot($2..4, $5..7)
+  831. copy_4_slots_unmasked          $3..6 = $40..43
+  832. copy_2_slots_unmasked          $7..8 = $44..45
+  833. dot_3_floats                   $3 = dot($3..5, $6..8)
+  834. copy_3_slots_unmasked          $4..6 = $34..36
+  835. copy_3_slots_unmasked          $7..9 = $46..48
+  836. dot_3_floats                   $4 = dot($4..6, $7..9)
+  837. copy_3_slots_unmasked          $5..7 = $37..39
+  838. copy_3_slots_unmasked          $8..10 = $46..48
+  839. dot_3_floats                   $5 = dot($5..7, $8..10)
+  840. copy_3_slots_unmasked          $6..8 = $40..42
+  841. copy_3_slots_unmasked          $9..11 = $46..48
+  842. dot_3_floats                   $6 = dot($6..8, $9..11)
+  843. copy_3_slots_unmasked          $7..9 = $34..36
+  844. copy_3_slots_unmasked          $10..12 = $49..51
+  845. dot_3_floats                   $7 = dot($7..9, $10..12)
+  846. copy_3_slots_unmasked          $8..10 = $37..39
+  847. copy_3_slots_unmasked          $11..13 = $49..51
+  848. dot_3_floats                   $8 = dot($8..10, $11..13)
+  849. copy_3_slots_unmasked          $9..11 = $40..42
+  850. copy_3_slots_unmasked          $12..14 = $49..51
+  851. dot_3_floats                   $9 = dot($9..11, $12..14)
+  852. copy_4_slots_masked            m₅(0..3) = Mask($1..4)
+  853. copy_4_slots_masked            m₅(4..7) = Mask($5..8)
+  854. copy_slot_masked               m₅(8) = Mask($9)
+  855. copy_slot_unmasked             $1 = ok
+  856. copy_4_slots_unmasked          $2..5 = m₅(0..3)
+  857. copy_4_slots_unmasked          $6..9 = m₅(4..7)
+  858. copy_slot_unmasked             $10 = m₅(8)
+  859. copy_constant                  $11 = 0x43020000 (130.0)
+  860. copy_constant                  $12 = 0x424C0000 (51.0)
+  861. copy_constant                  $13 = 0x420C0000 (35.0)
+  862. copy_constant                  $14 = 0x42F00000 (120.0)
+  863. copy_constant                  $15 = 0x423C0000 (47.0)
+  864. copy_constant                  $16 = 0x42040000 (33.0)
+  865. copy_constant                  $17 = 0x43700000 (240.0)
+  866. copy_constant                  $18 = 0x42920000 (73.0)
+  867. copy_constant                  $19 = 0x42340000 (45.0)
+  868. cmpeq_n_floats                 $2..10 = equal($2..10, $11..19)
+  869. bitwise_and_4_ints             $3..6 &= $7..10
+  870. bitwise_and_2_ints             $3..4 &= $5..6
+  871. bitwise_and_int                $3 &= $4
+  872. bitwise_and_int                $2 &= $3
+  873. bitwise_and_int                $1 &= $2
+  874. copy_slot_masked               ok = Mask($1)
+  875. copy_slot_masked               [test_matrix_op_matrix_half].result = Mask($1)
+  876. label                          label 0x00000002
+  877. copy_slot_masked               $0 = Mask($1)
+  878. label                          label 0x00000001
+  879. load_condition_mask            CondMask = $52
+  880. swizzle_4                      $0..3 = ($0..3).xxxx
+  881. copy_4_constants               $4..7 = colorRed
+  882. copy_4_constants               $8..11 = colorGreen
+  883. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  884. copy_4_slots_unmasked          [main].result = $0..3
+  885. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/MatrixOpEqualsES3.skrp b/tests/sksl/shared/MatrixOpEqualsES3.skrp
index a79b216..e98780c 100644
--- a/tests/sksl/shared/MatrixOpEqualsES3.skrp
+++ b/tests/sksl/shared/MatrixOpEqualsES3.skrp
@@ -14,718 +14,694 @@
    14. shuffle                        $0..5 = ($0..5)[1 0 0 1 0 0]
    15. copy_4_slots_unmasked          _2_m(0..3) = $0..3
    16. copy_2_slots_unmasked          _2_m(4..5) = $4..5
-   17. copy_4_slots_unmasked          $0..3 = _2_m(0..3)
-   18. copy_2_slots_unmasked          $4..5 = _2_m(4..5)
-   19. copy_4_slots_unmasked          $6..9 = _1_splat_4(0..3)
-   20. copy_2_slots_unmasked          $10..11 = _1_splat_4(4..5)
-   21. add_n_floats                   $0..5 += $6..11
-   22. copy_4_slots_unmasked          _2_m(0..3) = $0..3
-   23. copy_2_slots_unmasked          _2_m(4..5) = $4..5
-   24. copy_slot_unmasked             $0 = _0_ok
-   25. copy_4_slots_unmasked          $1..4 = _2_m(0..3)
-   26. copy_2_slots_unmasked          $5..6 = _2_m(4..5)
-   27. copy_constant                  $7 = 0x40C00000 (6.0)
-   28. copy_constant                  $8 = 0x40800000 (4.0)
-   29. copy_constant                  $9 = 0x40800000 (4.0)
-   30. copy_constant                  $10 = 0x40C00000 (6.0)
-   31. copy_constant                  $11 = 0x40800000 (4.0)
-   32. copy_constant                  $12 = 0x40800000 (4.0)
-   33. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-   34. bitwise_and_3_ints             $1..3 &= $4..6
-   35. bitwise_and_int                $2 &= $3
-   36. bitwise_and_int                $1 &= $2
-   37. bitwise_and_int                $0 &= $1
-   38. copy_slot_unmasked             _0_ok = $0
-   39. zero_slot_unmasked             $0 = 0
-   40. copy_constant                  $1 = 0x40000000 (2.0)
-   41. shuffle                        $0..5 = ($0..5)[1 0 0 1 0 0]
-   42. copy_4_slots_unmasked          _2_m(0..3) = $0..3
-   43. copy_2_slots_unmasked          _2_m(4..5) = $4..5
-   44. copy_4_slots_unmasked          $0..3 = _2_m(0..3)
-   45. copy_2_slots_unmasked          $4..5 = _2_m(4..5)
-   46. copy_4_slots_unmasked          $6..9 = _1_splat_4(0..3)
-   47. copy_2_slots_unmasked          $10..11 = _1_splat_4(4..5)
-   48. sub_n_floats                   $0..5 -= $6..11
-   49. copy_4_slots_unmasked          _2_m(0..3) = $0..3
-   50. copy_2_slots_unmasked          _2_m(4..5) = $4..5
-   51. copy_slot_unmasked             $0 = _0_ok
-   52. copy_4_slots_unmasked          $1..4 = _2_m(0..3)
-   53. copy_2_slots_unmasked          $5..6 = _2_m(4..5)
-   54. copy_constant                  $7 = 0xC0000000 (-2.0)
-   55. copy_constant                  $8 = 0xC0800000 (-4.0)
-   56. copy_constant                  $9 = 0xC0800000 (-4.0)
-   57. copy_constant                  $10 = 0xC0000000 (-2.0)
-   58. copy_constant                  $11 = 0xC0800000 (-4.0)
-   59. copy_constant                  $12 = 0xC0800000 (-4.0)
-   60. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-   61. bitwise_and_3_ints             $1..3 &= $4..6
-   62. bitwise_and_int                $2 &= $3
-   63. bitwise_and_int                $1 &= $2
-   64. bitwise_and_int                $0 &= $1
-   65. copy_slot_unmasked             _0_ok = $0
-   66. zero_slot_unmasked             $0 = 0
-   67. copy_constant                  $1 = 0x40000000 (2.0)
-   68. shuffle                        $0..5 = ($0..5)[1 0 0 1 0 0]
-   69. copy_4_slots_unmasked          _2_m(0..3) = $0..3
-   70. copy_2_slots_unmasked          _2_m(4..5) = $4..5
-   71. copy_4_slots_unmasked          $0..3 = _2_m(0..3)
-   72. copy_2_slots_unmasked          $4..5 = _2_m(4..5)
-   73. copy_4_slots_unmasked          $6..9 = _1_splat_4(0..3)
-   74. copy_2_slots_unmasked          $10..11 = _1_splat_4(4..5)
-   75. div_n_floats                   $0..5 /= $6..11
-   76. copy_4_slots_unmasked          _2_m(0..3) = $0..3
-   77. copy_2_slots_unmasked          _2_m(4..5) = $4..5
-   78. copy_slot_unmasked             $0 = _0_ok
-   79. copy_4_slots_unmasked          $1..4 = _2_m(0..3)
-   80. copy_2_slots_unmasked          $5..6 = _2_m(4..5)
-   81. zero_slot_unmasked             $7 = 0
-   82. copy_constant                  $8 = 0x3F000000 (0.5)
-   83. shuffle                        $7..12 = ($7..12)[1 0 0 1 0 0]
-   84. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-   85. bitwise_and_3_ints             $1..3 &= $4..6
-   86. bitwise_and_int                $2 &= $3
-   87. bitwise_and_int                $1 &= $2
-   88. bitwise_and_int                $0 &= $1
-   89. copy_slot_unmasked             _0_ok = $0
-   90. copy_constant                  _3_splat_4(0) = 0x40800000 (4.0)
-   91. copy_constant                  _3_splat_4(1) = 0x40800000 (4.0)
-   92. copy_constant                  _3_splat_4(2) = 0x40800000 (4.0)
-   93. copy_constant                  _3_splat_4(3) = 0x40800000 (4.0)
-   94. copy_constant                  _3_splat_4(4) = 0x40800000 (4.0)
-   95. copy_constant                  _3_splat_4(5) = 0x40800000 (4.0)
-   96. zero_4_slots_unmasked          _4_m(0..3) = 0
-   97. zero_2_slots_unmasked          _4_m(4..5) = 0
-   98. copy_4_slots_unmasked          $0..3 = _3_splat_4(0..3)
-   99. copy_2_slots_unmasked          $4..5 = _3_splat_4(4..5)
+   17. copy_4_slots_unmasked          $6..9 = _1_splat_4(0..3)
+   18. copy_2_slots_unmasked          $10..11 = _1_splat_4(4..5)
+   19. add_n_floats                   $0..5 += $6..11
+   20. copy_4_slots_unmasked          _2_m(0..3) = $0..3
+   21. copy_2_slots_unmasked          _2_m(4..5) = $4..5
+   22. copy_slot_unmasked             $0 = _0_ok
+   23. copy_4_slots_unmasked          $1..4 = _2_m(0..3)
+   24. copy_2_slots_unmasked          $5..6 = _2_m(4..5)
+   25. copy_constant                  $7 = 0x40C00000 (6.0)
+   26. copy_constant                  $8 = 0x40800000 (4.0)
+   27. copy_constant                  $9 = 0x40800000 (4.0)
+   28. copy_constant                  $10 = 0x40C00000 (6.0)
+   29. copy_constant                  $11 = 0x40800000 (4.0)
+   30. copy_constant                  $12 = 0x40800000 (4.0)
+   31. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+   32. bitwise_and_3_ints             $1..3 &= $4..6
+   33. bitwise_and_int                $2 &= $3
+   34. bitwise_and_int                $1 &= $2
+   35. bitwise_and_int                $0 &= $1
+   36. copy_slot_unmasked             _0_ok = $0
+   37. zero_slot_unmasked             $0 = 0
+   38. copy_constant                  $1 = 0x40000000 (2.0)
+   39. shuffle                        $0..5 = ($0..5)[1 0 0 1 0 0]
+   40. copy_4_slots_unmasked          _2_m(0..3) = $0..3
+   41. copy_2_slots_unmasked          _2_m(4..5) = $4..5
+   42. copy_4_slots_unmasked          $6..9 = _1_splat_4(0..3)
+   43. copy_2_slots_unmasked          $10..11 = _1_splat_4(4..5)
+   44. sub_n_floats                   $0..5 -= $6..11
+   45. copy_4_slots_unmasked          _2_m(0..3) = $0..3
+   46. copy_2_slots_unmasked          _2_m(4..5) = $4..5
+   47. copy_slot_unmasked             $0 = _0_ok
+   48. copy_4_slots_unmasked          $1..4 = _2_m(0..3)
+   49. copy_2_slots_unmasked          $5..6 = _2_m(4..5)
+   50. copy_constant                  $7 = 0xC0000000 (-2.0)
+   51. copy_constant                  $8 = 0xC0800000 (-4.0)
+   52. copy_constant                  $9 = 0xC0800000 (-4.0)
+   53. copy_constant                  $10 = 0xC0000000 (-2.0)
+   54. copy_constant                  $11 = 0xC0800000 (-4.0)
+   55. copy_constant                  $12 = 0xC0800000 (-4.0)
+   56. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+   57. bitwise_and_3_ints             $1..3 &= $4..6
+   58. bitwise_and_int                $2 &= $3
+   59. bitwise_and_int                $1 &= $2
+   60. bitwise_and_int                $0 &= $1
+   61. copy_slot_unmasked             _0_ok = $0
+   62. zero_slot_unmasked             $0 = 0
+   63. copy_constant                  $1 = 0x40000000 (2.0)
+   64. shuffle                        $0..5 = ($0..5)[1 0 0 1 0 0]
+   65. copy_4_slots_unmasked          _2_m(0..3) = $0..3
+   66. copy_2_slots_unmasked          _2_m(4..5) = $4..5
+   67. copy_4_slots_unmasked          $6..9 = _1_splat_4(0..3)
+   68. copy_2_slots_unmasked          $10..11 = _1_splat_4(4..5)
+   69. div_n_floats                   $0..5 /= $6..11
+   70. copy_4_slots_unmasked          _2_m(0..3) = $0..3
+   71. copy_2_slots_unmasked          _2_m(4..5) = $4..5
+   72. copy_slot_unmasked             $0 = _0_ok
+   73. copy_4_slots_unmasked          $1..4 = _2_m(0..3)
+   74. copy_2_slots_unmasked          $5..6 = _2_m(4..5)
+   75. zero_slot_unmasked             $7 = 0
+   76. copy_constant                  $8 = 0x3F000000 (0.5)
+   77. shuffle                        $7..12 = ($7..12)[1 0 0 1 0 0]
+   78. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+   79. bitwise_and_3_ints             $1..3 &= $4..6
+   80. bitwise_and_int                $2 &= $3
+   81. bitwise_and_int                $1 &= $2
+   82. bitwise_and_int                $0 &= $1
+   83. copy_slot_unmasked             _0_ok = $0
+   84. copy_constant                  _3_splat_4(0) = 0x40800000 (4.0)
+   85. copy_constant                  _3_splat_4(1) = 0x40800000 (4.0)
+   86. copy_constant                  _3_splat_4(2) = 0x40800000 (4.0)
+   87. copy_constant                  _3_splat_4(3) = 0x40800000 (4.0)
+   88. copy_constant                  _3_splat_4(4) = 0x40800000 (4.0)
+   89. copy_constant                  _3_splat_4(5) = 0x40800000 (4.0)
+   90. zero_4_slots_unmasked          _4_m(0..3) = 0
+   91. zero_2_slots_unmasked          _4_m(4..5) = 0
+   92. copy_4_slots_unmasked          $0..3 = _3_splat_4(0..3)
+   93. copy_2_slots_unmasked          $4..5 = _3_splat_4(4..5)
+   94. copy_4_slots_unmasked          _4_m(0..3) = $0..3
+   95. copy_2_slots_unmasked          _4_m(4..5) = $4..5
+   96. zero_slot_unmasked             $6 = 0
+   97. copy_constant                  $7 = 0x40000000 (2.0)
+   98. shuffle                        $6..11 = ($6..11)[1 0 0 0 1 0]
+   99. add_n_floats                   $0..5 += $6..11
   100. copy_4_slots_unmasked          _4_m(0..3) = $0..3
   101. copy_2_slots_unmasked          _4_m(4..5) = $4..5
-  102. copy_4_slots_unmasked          $0..3 = _4_m(0..3)
-  103. copy_2_slots_unmasked          $4..5 = _4_m(4..5)
-  104. zero_slot_unmasked             $6 = 0
-  105. copy_constant                  $7 = 0x40000000 (2.0)
-  106. shuffle                        $6..11 = ($6..11)[1 0 0 0 1 0]
-  107. add_n_floats                   $0..5 += $6..11
-  108. copy_4_slots_unmasked          _4_m(0..3) = $0..3
-  109. copy_2_slots_unmasked          _4_m(4..5) = $4..5
-  110. copy_slot_unmasked             $0 = _0_ok
-  111. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
-  112. copy_2_slots_unmasked          $5..6 = _4_m(4..5)
-  113. copy_constant                  $7 = 0x40C00000 (6.0)
-  114. copy_constant                  $8 = 0x40800000 (4.0)
-  115. copy_constant                  $9 = 0x40800000 (4.0)
-  116. copy_constant                  $10 = 0x40800000 (4.0)
-  117. copy_constant                  $11 = 0x40C00000 (6.0)
-  118. copy_constant                  $12 = 0x40800000 (4.0)
-  119. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-  120. bitwise_and_3_ints             $1..3 &= $4..6
-  121. bitwise_and_int                $2 &= $3
-  122. bitwise_and_int                $1 &= $2
-  123. bitwise_and_int                $0 &= $1
-  124. copy_slot_unmasked             _0_ok = $0
-  125. copy_4_slots_unmasked          $0..3 = _3_splat_4(0..3)
-  126. copy_2_slots_unmasked          $4..5 = _3_splat_4(4..5)
-  127. copy_4_slots_unmasked          _4_m(0..3) = $0..3
-  128. copy_2_slots_unmasked          _4_m(4..5) = $4..5
-  129. copy_4_slots_unmasked          $0..3 = _4_m(0..3)
-  130. copy_2_slots_unmasked          $4..5 = _4_m(4..5)
-  131. zero_slot_unmasked             $6 = 0
-  132. copy_constant                  $7 = 0x40000000 (2.0)
-  133. shuffle                        $6..11 = ($6..11)[1 0 0 0 1 0]
-  134. sub_n_floats                   $0..5 -= $6..11
-  135. copy_4_slots_unmasked          _4_m(0..3) = $0..3
-  136. copy_2_slots_unmasked          _4_m(4..5) = $4..5
-  137. copy_slot_unmasked             $0 = _0_ok
-  138. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
-  139. copy_2_slots_unmasked          $5..6 = _4_m(4..5)
-  140. copy_constant                  $7 = 0x40000000 (2.0)
-  141. copy_constant                  $8 = 0x40800000 (4.0)
-  142. copy_constant                  $9 = 0x40800000 (4.0)
-  143. copy_constant                  $10 = 0x40800000 (4.0)
-  144. copy_constant                  $11 = 0x40000000 (2.0)
-  145. copy_constant                  $12 = 0x40800000 (4.0)
-  146. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-  147. bitwise_and_3_ints             $1..3 &= $4..6
-  148. bitwise_and_int                $2 &= $3
-  149. bitwise_and_int                $1 &= $2
-  150. bitwise_and_int                $0 &= $1
-  151. copy_slot_unmasked             _0_ok = $0
-  152. copy_4_slots_unmasked          $0..3 = _3_splat_4(0..3)
-  153. copy_2_slots_unmasked          $4..5 = _3_splat_4(4..5)
-  154. copy_4_slots_unmasked          _4_m(0..3) = $0..3
-  155. copy_2_slots_unmasked          _4_m(4..5) = $4..5
-  156. copy_4_slots_unmasked          $0..3 = _4_m(0..3)
-  157. copy_2_slots_unmasked          $4..5 = _4_m(4..5)
-  158. copy_constant                  $6 = 0x40000000 (2.0)
-  159. copy_constant                  $7 = 0x40000000 (2.0)
-  160. copy_constant                  $8 = 0x40000000 (2.0)
-  161. copy_constant                  $9 = 0x40000000 (2.0)
-  162. copy_constant                  $10 = 0x40000000 (2.0)
-  163. copy_constant                  $11 = 0x40000000 (2.0)
-  164. div_n_floats                   $0..5 /= $6..11
-  165. copy_4_slots_unmasked          _4_m(0..3) = $0..3
-  166. copy_2_slots_unmasked          _4_m(4..5) = $4..5
-  167. copy_slot_unmasked             $0 = _0_ok
-  168. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
-  169. copy_2_slots_unmasked          $5..6 = _4_m(4..5)
-  170. copy_constant                  $7 = 0x40000000 (2.0)
-  171. copy_constant                  $8 = 0x40000000 (2.0)
-  172. copy_constant                  $9 = 0x40000000 (2.0)
-  173. copy_constant                  $10 = 0x40000000 (2.0)
-  174. copy_constant                  $11 = 0x40000000 (2.0)
-  175. copy_constant                  $12 = 0x40000000 (2.0)
-  176. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-  177. bitwise_and_3_ints             $1..3 &= $4..6
-  178. bitwise_and_int                $2 &= $3
-  179. bitwise_and_int                $1 &= $2
-  180. bitwise_and_int                $0 &= $1
-  181. copy_slot_unmasked             _0_ok = $0
-  182. copy_constant                  _5_m(0) = 0x3F800000 (1.0)
-  183. copy_constant                  _5_m(1) = 0x40000000 (2.0)
-  184. copy_constant                  _5_m(2) = 0x40400000 (3.0)
-  185. copy_constant                  _5_m(3) = 0x40800000 (4.0)
-  186. copy_constant                  _5_m(4) = 0x40A00000 (5.0)
-  187. copy_constant                  _5_m(5) = 0x40C00000 (6.0)
-  188. copy_constant                  _5_m(6) = 0x40E00000 (7.0)
-  189. copy_constant                  _5_m(7) = 0x41000000 (8.0)
-  190. copy_constant                  _5_m(8) = 0x41100000 (9.0)
-  191. copy_constant                  _5_m(9) = 0x41200000 (10.0)
-  192. copy_constant                  _5_m(10) = 0x41300000 (11.0)
-  193. copy_constant                  _5_m(11) = 0x41400000 (12.0)
-  194. copy_4_slots_unmasked          $0..3 = _5_m(0..3)
-  195. copy_4_slots_unmasked          $4..7 = _5_m(4..7)
-  196. copy_4_slots_unmasked          $8..11 = _5_m(8..11)
-  197. copy_constant                  $12 = 0x41800000 (16.0)
-  198. copy_constant                  $13 = 0x41700000 (15.0)
-  199. copy_constant                  $14 = 0x41600000 (14.0)
-  200. copy_constant                  $15 = 0x41500000 (13.0)
-  201. copy_constant                  $16 = 0x41400000 (12.0)
-  202. copy_constant                  $17 = 0x41300000 (11.0)
-  203. copy_constant                  $18 = 0x41200000 (10.0)
-  204. copy_constant                  $19 = 0x41100000 (9.0)
-  205. copy_constant                  $20 = 0x41000000 (8.0)
-  206. copy_constant                  $21 = 0x40E00000 (7.0)
-  207. copy_constant                  $22 = 0x40C00000 (6.0)
-  208. copy_constant                  $23 = 0x40A00000 (5.0)
-  209. add_n_floats                   $0..11 += $12..23
-  210. copy_4_slots_unmasked          _5_m(0..3) = $0..3
-  211. copy_4_slots_unmasked          _5_m(4..7) = $4..7
-  212. copy_4_slots_unmasked          _5_m(8..11) = $8..11
-  213. copy_slot_unmasked             $0 = _0_ok
-  214. copy_4_slots_unmasked          $1..4 = _5_m(0..3)
-  215. copy_4_slots_unmasked          $5..8 = _5_m(4..7)
-  216. copy_4_slots_unmasked          $9..12 = _5_m(8..11)
-  217. copy_constant                  $13 = 0x41880000 (17.0)
-  218. copy_constant                  $14 = 0x41880000 (17.0)
-  219. copy_constant                  $15 = 0x41880000 (17.0)
-  220. copy_constant                  $16 = 0x41880000 (17.0)
-  221. copy_constant                  $17 = 0x41880000 (17.0)
-  222. copy_constant                  $18 = 0x41880000 (17.0)
-  223. copy_constant                  $19 = 0x41880000 (17.0)
-  224. copy_constant                  $20 = 0x41880000 (17.0)
-  225. copy_constant                  $21 = 0x41880000 (17.0)
-  226. copy_constant                  $22 = 0x41880000 (17.0)
-  227. copy_constant                  $23 = 0x41880000 (17.0)
-  228. copy_constant                  $24 = 0x41880000 (17.0)
-  229. cmpeq_n_floats                 $1..12 = equal($1..12, $13..24)
-  230. bitwise_and_4_ints             $5..8 &= $9..12
-  231. bitwise_and_4_ints             $1..4 &= $5..8
-  232. bitwise_and_2_ints             $1..2 &= $3..4
-  233. bitwise_and_int                $1 &= $2
-  234. bitwise_and_int                $0 &= $1
-  235. copy_slot_unmasked             _0_ok = $0
-  236. copy_constant                  _6_m(0) = 0x41200000 (10.0)
-  237. copy_constant                  _6_m(1) = 0x41A00000 (20.0)
-  238. copy_constant                  _6_m(2) = 0x41F00000 (30.0)
-  239. copy_constant                  _6_m(3) = 0x42200000 (40.0)
-  240. copy_constant                  _6_m(4) = 0x42480000 (50.0)
-  241. copy_constant                  _6_m(5) = 0x42700000 (60.0)
-  242. copy_constant                  _6_m(6) = 0x428C0000 (70.0)
-  243. copy_constant                  _6_m(7) = 0x42A00000 (80.0)
-  244. copy_4_slots_unmasked          $0..3 = _6_m(0..3)
-  245. copy_4_slots_unmasked          $4..7 = _6_m(4..7)
-  246. copy_constant                  $8 = 0x3F800000 (1.0)
-  247. copy_constant                  $9 = 0x40000000 (2.0)
-  248. copy_constant                  $10 = 0x40400000 (3.0)
-  249. copy_constant                  $11 = 0x40800000 (4.0)
-  250. copy_constant                  $12 = 0x40A00000 (5.0)
-  251. copy_constant                  $13 = 0x40C00000 (6.0)
-  252. copy_constant                  $14 = 0x40E00000 (7.0)
-  253. copy_constant                  $15 = 0x41000000 (8.0)
-  254. sub_n_floats                   $0..7 -= $8..15
-  255. copy_4_slots_unmasked          _6_m(0..3) = $0..3
-  256. copy_4_slots_unmasked          _6_m(4..7) = $4..7
-  257. copy_slot_unmasked             $0 = _0_ok
-  258. copy_4_slots_unmasked          $1..4 = _6_m(0..3)
-  259. copy_4_slots_unmasked          $5..8 = _6_m(4..7)
-  260. copy_constant                  $9 = 0x41100000 (9.0)
-  261. copy_constant                  $10 = 0x41900000 (18.0)
-  262. copy_constant                  $11 = 0x41D80000 (27.0)
-  263. copy_constant                  $12 = 0x42100000 (36.0)
-  264. copy_constant                  $13 = 0x42340000 (45.0)
-  265. copy_constant                  $14 = 0x42580000 (54.0)
-  266. copy_constant                  $15 = 0x427C0000 (63.0)
-  267. copy_constant                  $16 = 0x42900000 (72.0)
-  268. cmpeq_n_floats                 $1..8 = equal($1..8, $9..16)
-  269. bitwise_and_4_ints             $1..4 &= $5..8
-  270. bitwise_and_2_ints             $1..2 &= $3..4
-  271. bitwise_and_int                $1 &= $2
-  272. bitwise_and_int                $0 &= $1
-  273. copy_slot_unmasked             _0_ok = $0
-  274. copy_constant                  _7_m(0) = 0x41200000 (10.0)
-  275. copy_constant                  _7_m(1) = 0x41A00000 (20.0)
-  276. copy_constant                  _7_m(2) = 0x41F00000 (30.0)
-  277. copy_constant                  _7_m(3) = 0x42200000 (40.0)
-  278. copy_constant                  _7_m(4) = 0x41200000 (10.0)
-  279. copy_constant                  _7_m(5) = 0x41A00000 (20.0)
-  280. copy_constant                  _7_m(6) = 0x41F00000 (30.0)
-  281. copy_constant                  _7_m(7) = 0x42200000 (40.0)
-  282. copy_4_slots_unmasked          $0..3 = _7_m(0..3)
-  283. copy_4_slots_unmasked          $4..7 = _7_m(4..7)
-  284. copy_constant                  $8 = 0x41200000 (10.0)
-  285. copy_constant                  $9 = 0x41200000 (10.0)
-  286. copy_constant                  $10 = 0x41200000 (10.0)
-  287. copy_constant                  $11 = 0x41200000 (10.0)
-  288. copy_constant                  $12 = 0x40A00000 (5.0)
-  289. copy_constant                  $13 = 0x40A00000 (5.0)
-  290. copy_constant                  $14 = 0x40A00000 (5.0)
-  291. copy_constant                  $15 = 0x40A00000 (5.0)
-  292. div_n_floats                   $0..7 /= $8..15
-  293. copy_4_slots_unmasked          _7_m(0..3) = $0..3
-  294. copy_4_slots_unmasked          _7_m(4..7) = $4..7
-  295. copy_slot_unmasked             $0 = _0_ok
-  296. copy_4_slots_unmasked          $1..4 = _7_m(0..3)
-  297. copy_4_slots_unmasked          $5..8 = _7_m(4..7)
-  298. copy_constant                  $9 = 0x3F800000 (1.0)
-  299. copy_constant                  $10 = 0x40000000 (2.0)
-  300. copy_constant                  $11 = 0x40400000 (3.0)
-  301. copy_constant                  $12 = 0x40800000 (4.0)
-  302. copy_constant                  $13 = 0x40000000 (2.0)
-  303. copy_constant                  $14 = 0x40800000 (4.0)
-  304. copy_constant                  $15 = 0x40C00000 (6.0)
-  305. copy_constant                  $16 = 0x41000000 (8.0)
-  306. cmpeq_n_floats                 $1..8 = equal($1..8, $9..16)
-  307. bitwise_and_4_ints             $1..4 &= $5..8
-  308. bitwise_and_2_ints             $1..2 &= $3..4
-  309. bitwise_and_int                $1 &= $2
-  310. bitwise_and_int                $0 &= $1
-  311. copy_slot_unmasked             _0_ok = $0
-  312. copy_constant                  _8_m(0) = 0x40E00000 (7.0)
-  313. copy_constant                  _8_m(1) = 0x41100000 (9.0)
-  314. copy_constant                  _8_m(2) = 0x41300000 (11.0)
-  315. copy_constant                  _8_m(3) = 0x41000000 (8.0)
-  316. copy_constant                  _8_m(4) = 0x41200000 (10.0)
-  317. copy_constant                  _8_m(5) = 0x41400000 (12.0)
-  318. copy_4_slots_unmasked          $36..39 = _8_m(0..3)
-  319. copy_2_slots_unmasked          $40..41 = _8_m(4..5)
-  320. shuffle                        $37..41 = ($37..41)[2 0 3 1 4]
-  321. copy_constant                  $42 = 0x3F800000 (1.0)
-  322. copy_constant                  $43 = 0x40800000 (4.0)
-  323. copy_constant                  $44 = 0x40000000 (2.0)
-  324. copy_constant                  $45 = 0x40A00000 (5.0)
-  325. copy_2_slots_unmasked          $0..1 = $36..37
-  326. copy_2_slots_unmasked          $2..3 = $42..43
-  327. dot_2_floats                   $0 = dot($0..1, $2..3)
-  328. copy_2_slots_unmasked          $1..2 = $38..39
-  329. copy_2_slots_unmasked          $3..4 = $42..43
-  330. dot_2_floats                   $1 = dot($1..2, $3..4)
-  331. copy_4_slots_unmasked          $2..5 = $40..43
-  332. dot_2_floats                   $2 = dot($2..3, $4..5)
-  333. copy_2_slots_unmasked          $3..4 = $36..37
-  334. copy_2_slots_unmasked          $5..6 = $44..45
-  335. dot_2_floats                   $3 = dot($3..4, $5..6)
-  336. copy_2_slots_unmasked          $4..5 = $38..39
-  337. copy_2_slots_unmasked          $6..7 = $44..45
-  338. dot_2_floats                   $4 = dot($4..5, $6..7)
-  339. copy_2_slots_unmasked          $5..6 = $40..41
-  340. copy_2_slots_unmasked          $7..8 = $44..45
-  341. dot_2_floats                   $5 = dot($5..6, $7..8)
-  342. copy_4_slots_unmasked          _8_m(0..3) = $0..3
-  343. copy_2_slots_unmasked          _8_m(4..5) = $4..5
-  344. copy_slot_unmasked             $0 = _0_ok
-  345. copy_4_slots_unmasked          $1..4 = _8_m(0..3)
-  346. copy_2_slots_unmasked          $5..6 = _8_m(4..5)
-  347. copy_constant                  $7 = 0x421C0000 (39.0)
-  348. copy_constant                  $8 = 0x42440000 (49.0)
-  349. copy_constant                  $9 = 0x426C0000 (59.0)
-  350. copy_constant                  $10 = 0x42580000 (54.0)
-  351. copy_constant                  $11 = 0x42880000 (68.0)
-  352. copy_constant                  $12 = 0x42A40000 (82.0)
-  353. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
-  354. bitwise_and_3_ints             $1..3 &= $4..6
-  355. bitwise_and_int                $2 &= $3
-  356. bitwise_and_int                $1 &= $2
-  357. bitwise_and_int                $0 &= $1
-  358. copy_slot_unmasked             _0_ok = $0
-  359. store_condition_mask           $36 = CondMask
-  360. copy_slot_unmasked             $37 = _0_ok
-  361. zero_slot_unmasked             $0 = 0
-  362. merge_condition_mask           CondMask = $36 & $37
-  363. branch_if_no_active_lanes      branch_if_no_active_lanes +361 (label 1 at #724)
-  364. copy_constant                  ok = 0xFFFFFFFF
-  365. copy_constant                  splat_4(0) = 0x40800000 (4.0)
-  366. copy_constant                  splat_4(1) = 0x40800000 (4.0)
-  367. copy_constant                  splat_4(2) = 0x40800000 (4.0)
-  368. copy_constant                  splat_4(3) = 0x40800000 (4.0)
-  369. copy_constant                  splat_4(4) = 0x40800000 (4.0)
-  370. copy_constant                  splat_4(5) = 0x40800000 (4.0)
-  371. zero_4_slots_unmasked          m(0..3) = 0
-  372. zero_2_slots_unmasked          m(4..5) = 0
-  373. zero_slot_unmasked             $1 = 0
-  374. copy_constant                  $2 = 0x40000000 (2.0)
-  375. shuffle                        $1..6 = ($1..6)[1 0 0 1 0 0]
-  376. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  377. copy_2_slots_masked            m(4..5) = Mask($5..6)
-  378. copy_4_slots_unmasked          $1..4 = m(0..3)
-  379. copy_2_slots_unmasked          $5..6 = m(4..5)
-  380. copy_4_slots_unmasked          $7..10 = splat_4(0..3)
-  381. copy_2_slots_unmasked          $11..12 = splat_4(4..5)
-  382. add_n_floats                   $1..6 += $7..12
-  383. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  384. copy_2_slots_masked            m(4..5) = Mask($5..6)
-  385. copy_slot_unmasked             $1 = ok
-  386. copy_4_slots_unmasked          $2..5 = m(0..3)
-  387. copy_2_slots_unmasked          $6..7 = m(4..5)
-  388. copy_constant                  $8 = 0x40C00000 (6.0)
-  389. copy_constant                  $9 = 0x40800000 (4.0)
-  390. copy_constant                  $10 = 0x40800000 (4.0)
-  391. copy_constant                  $11 = 0x40C00000 (6.0)
-  392. copy_constant                  $12 = 0x40800000 (4.0)
-  393. copy_constant                  $13 = 0x40800000 (4.0)
-  394. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  395. bitwise_and_3_ints             $2..4 &= $5..7
-  396. bitwise_and_int                $3 &= $4
-  397. bitwise_and_int                $2 &= $3
-  398. bitwise_and_int                $1 &= $2
-  399. copy_slot_masked               ok = Mask($1)
-  400. zero_slot_unmasked             $1 = 0
-  401. copy_constant                  $2 = 0x40000000 (2.0)
-  402. shuffle                        $1..6 = ($1..6)[1 0 0 1 0 0]
-  403. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  404. copy_2_slots_masked            m(4..5) = Mask($5..6)
-  405. copy_4_slots_unmasked          $1..4 = m(0..3)
-  406. copy_2_slots_unmasked          $5..6 = m(4..5)
-  407. copy_4_slots_unmasked          $7..10 = splat_4(0..3)
-  408. copy_2_slots_unmasked          $11..12 = splat_4(4..5)
-  409. sub_n_floats                   $1..6 -= $7..12
-  410. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  411. copy_2_slots_masked            m(4..5) = Mask($5..6)
-  412. copy_slot_unmasked             $1 = ok
-  413. copy_4_slots_unmasked          $2..5 = m(0..3)
-  414. copy_2_slots_unmasked          $6..7 = m(4..5)
-  415. copy_constant                  $8 = 0xC0000000 (-2.0)
-  416. copy_constant                  $9 = 0xC0800000 (-4.0)
-  417. copy_constant                  $10 = 0xC0800000 (-4.0)
-  418. copy_constant                  $11 = 0xC0000000 (-2.0)
-  419. copy_constant                  $12 = 0xC0800000 (-4.0)
-  420. copy_constant                  $13 = 0xC0800000 (-4.0)
-  421. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  422. bitwise_and_3_ints             $2..4 &= $5..7
-  423. bitwise_and_int                $3 &= $4
-  424. bitwise_and_int                $2 &= $3
-  425. bitwise_and_int                $1 &= $2
-  426. copy_slot_masked               ok = Mask($1)
-  427. zero_slot_unmasked             $1 = 0
-  428. copy_constant                  $2 = 0x40000000 (2.0)
-  429. shuffle                        $1..6 = ($1..6)[1 0 0 1 0 0]
-  430. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  431. copy_2_slots_masked            m(4..5) = Mask($5..6)
-  432. copy_4_slots_unmasked          $1..4 = m(0..3)
-  433. copy_2_slots_unmasked          $5..6 = m(4..5)
-  434. copy_4_slots_unmasked          $7..10 = splat_4(0..3)
-  435. copy_2_slots_unmasked          $11..12 = splat_4(4..5)
-  436. div_n_floats                   $1..6 /= $7..12
-  437. copy_4_slots_masked            m(0..3) = Mask($1..4)
-  438. copy_2_slots_masked            m(4..5) = Mask($5..6)
-  439. copy_slot_unmasked             $1 = ok
-  440. copy_4_slots_unmasked          $2..5 = m(0..3)
-  441. copy_2_slots_unmasked          $6..7 = m(4..5)
-  442. zero_slot_unmasked             $8 = 0
-  443. copy_constant                  $9 = 0x3F000000 (0.5)
-  444. shuffle                        $8..13 = ($8..13)[1 0 0 1 0 0]
-  445. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  446. bitwise_and_3_ints             $2..4 &= $5..7
-  447. bitwise_and_int                $3 &= $4
-  448. bitwise_and_int                $2 &= $3
-  449. bitwise_and_int                $1 &= $2
-  450. copy_slot_masked               ok = Mask($1)
-  451. copy_constant                  splat_4₁(0) = 0x40800000 (4.0)
-  452. copy_constant                  splat_4₁(1) = 0x40800000 (4.0)
-  453. copy_constant                  splat_4₁(2) = 0x40800000 (4.0)
-  454. copy_constant                  splat_4₁(3) = 0x40800000 (4.0)
-  455. copy_constant                  splat_4₁(4) = 0x40800000 (4.0)
-  456. copy_constant                  splat_4₁(5) = 0x40800000 (4.0)
-  457. zero_4_slots_unmasked          m₁(0..3) = 0
-  458. zero_2_slots_unmasked          m₁(4..5) = 0
-  459. copy_4_slots_unmasked          $1..4 = splat_4₁(0..3)
-  460. copy_2_slots_unmasked          $5..6 = splat_4₁(4..5)
-  461. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  462. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
-  463. copy_4_slots_unmasked          $1..4 = m₁(0..3)
-  464. copy_2_slots_unmasked          $5..6 = m₁(4..5)
-  465. zero_slot_unmasked             $7 = 0
-  466. copy_constant                  $8 = 0x40000000 (2.0)
-  467. shuffle                        $7..12 = ($7..12)[1 0 0 0 1 0]
-  468. add_n_floats                   $1..6 += $7..12
-  469. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  470. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
-  471. copy_slot_unmasked             $1 = ok
-  472. copy_4_slots_unmasked          $2..5 = m₁(0..3)
-  473. copy_2_slots_unmasked          $6..7 = m₁(4..5)
-  474. copy_constant                  $8 = 0x40C00000 (6.0)
-  475. copy_constant                  $9 = 0x40800000 (4.0)
-  476. copy_constant                  $10 = 0x40800000 (4.0)
-  477. copy_constant                  $11 = 0x40800000 (4.0)
-  478. copy_constant                  $12 = 0x40C00000 (6.0)
-  479. copy_constant                  $13 = 0x40800000 (4.0)
-  480. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  481. bitwise_and_3_ints             $2..4 &= $5..7
-  482. bitwise_and_int                $3 &= $4
-  483. bitwise_and_int                $2 &= $3
-  484. bitwise_and_int                $1 &= $2
-  485. copy_slot_masked               ok = Mask($1)
-  486. copy_4_slots_unmasked          $1..4 = splat_4₁(0..3)
-  487. copy_2_slots_unmasked          $5..6 = splat_4₁(4..5)
-  488. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  489. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
-  490. copy_4_slots_unmasked          $1..4 = m₁(0..3)
-  491. copy_2_slots_unmasked          $5..6 = m₁(4..5)
-  492. zero_slot_unmasked             $7 = 0
-  493. copy_constant                  $8 = 0x40000000 (2.0)
-  494. shuffle                        $7..12 = ($7..12)[1 0 0 0 1 0]
-  495. sub_n_floats                   $1..6 -= $7..12
-  496. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  497. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
-  498. copy_slot_unmasked             $1 = ok
-  499. copy_4_slots_unmasked          $2..5 = m₁(0..3)
-  500. copy_2_slots_unmasked          $6..7 = m₁(4..5)
-  501. copy_constant                  $8 = 0x40000000 (2.0)
+  102. copy_slot_unmasked             $0 = _0_ok
+  103. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
+  104. copy_2_slots_unmasked          $5..6 = _4_m(4..5)
+  105. copy_constant                  $7 = 0x40C00000 (6.0)
+  106. copy_constant                  $8 = 0x40800000 (4.0)
+  107. copy_constant                  $9 = 0x40800000 (4.0)
+  108. copy_constant                  $10 = 0x40800000 (4.0)
+  109. copy_constant                  $11 = 0x40C00000 (6.0)
+  110. copy_constant                  $12 = 0x40800000 (4.0)
+  111. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+  112. bitwise_and_3_ints             $1..3 &= $4..6
+  113. bitwise_and_int                $2 &= $3
+  114. bitwise_and_int                $1 &= $2
+  115. bitwise_and_int                $0 &= $1
+  116. copy_slot_unmasked             _0_ok = $0
+  117. copy_4_slots_unmasked          $0..3 = _3_splat_4(0..3)
+  118. copy_2_slots_unmasked          $4..5 = _3_splat_4(4..5)
+  119. copy_4_slots_unmasked          _4_m(0..3) = $0..3
+  120. copy_2_slots_unmasked          _4_m(4..5) = $4..5
+  121. zero_slot_unmasked             $6 = 0
+  122. copy_constant                  $7 = 0x40000000 (2.0)
+  123. shuffle                        $6..11 = ($6..11)[1 0 0 0 1 0]
+  124. sub_n_floats                   $0..5 -= $6..11
+  125. copy_4_slots_unmasked          _4_m(0..3) = $0..3
+  126. copy_2_slots_unmasked          _4_m(4..5) = $4..5
+  127. copy_slot_unmasked             $0 = _0_ok
+  128. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
+  129. copy_2_slots_unmasked          $5..6 = _4_m(4..5)
+  130. copy_constant                  $7 = 0x40000000 (2.0)
+  131. copy_constant                  $8 = 0x40800000 (4.0)
+  132. copy_constant                  $9 = 0x40800000 (4.0)
+  133. copy_constant                  $10 = 0x40800000 (4.0)
+  134. copy_constant                  $11 = 0x40000000 (2.0)
+  135. copy_constant                  $12 = 0x40800000 (4.0)
+  136. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+  137. bitwise_and_3_ints             $1..3 &= $4..6
+  138. bitwise_and_int                $2 &= $3
+  139. bitwise_and_int                $1 &= $2
+  140. bitwise_and_int                $0 &= $1
+  141. copy_slot_unmasked             _0_ok = $0
+  142. copy_4_slots_unmasked          $0..3 = _3_splat_4(0..3)
+  143. copy_2_slots_unmasked          $4..5 = _3_splat_4(4..5)
+  144. copy_4_slots_unmasked          _4_m(0..3) = $0..3
+  145. copy_2_slots_unmasked          _4_m(4..5) = $4..5
+  146. copy_constant                  $6 = 0x40000000 (2.0)
+  147. copy_constant                  $7 = 0x40000000 (2.0)
+  148. copy_constant                  $8 = 0x40000000 (2.0)
+  149. copy_constant                  $9 = 0x40000000 (2.0)
+  150. copy_constant                  $10 = 0x40000000 (2.0)
+  151. copy_constant                  $11 = 0x40000000 (2.0)
+  152. div_n_floats                   $0..5 /= $6..11
+  153. copy_4_slots_unmasked          _4_m(0..3) = $0..3
+  154. copy_2_slots_unmasked          _4_m(4..5) = $4..5
+  155. copy_slot_unmasked             $0 = _0_ok
+  156. copy_4_slots_unmasked          $1..4 = _4_m(0..3)
+  157. copy_2_slots_unmasked          $5..6 = _4_m(4..5)
+  158. copy_constant                  $7 = 0x40000000 (2.0)
+  159. copy_constant                  $8 = 0x40000000 (2.0)
+  160. copy_constant                  $9 = 0x40000000 (2.0)
+  161. copy_constant                  $10 = 0x40000000 (2.0)
+  162. copy_constant                  $11 = 0x40000000 (2.0)
+  163. copy_constant                  $12 = 0x40000000 (2.0)
+  164. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+  165. bitwise_and_3_ints             $1..3 &= $4..6
+  166. bitwise_and_int                $2 &= $3
+  167. bitwise_and_int                $1 &= $2
+  168. bitwise_and_int                $0 &= $1
+  169. copy_slot_unmasked             _0_ok = $0
+  170. copy_constant                  _5_m(0) = 0x3F800000 (1.0)
+  171. copy_constant                  _5_m(1) = 0x40000000 (2.0)
+  172. copy_constant                  _5_m(2) = 0x40400000 (3.0)
+  173. copy_constant                  _5_m(3) = 0x40800000 (4.0)
+  174. copy_constant                  _5_m(4) = 0x40A00000 (5.0)
+  175. copy_constant                  _5_m(5) = 0x40C00000 (6.0)
+  176. copy_constant                  _5_m(6) = 0x40E00000 (7.0)
+  177. copy_constant                  _5_m(7) = 0x41000000 (8.0)
+  178. copy_constant                  _5_m(8) = 0x41100000 (9.0)
+  179. copy_constant                  _5_m(9) = 0x41200000 (10.0)
+  180. copy_constant                  _5_m(10) = 0x41300000 (11.0)
+  181. copy_constant                  _5_m(11) = 0x41400000 (12.0)
+  182. copy_4_slots_unmasked          $0..3 = _5_m(0..3)
+  183. copy_4_slots_unmasked          $4..7 = _5_m(4..7)
+  184. copy_4_slots_unmasked          $8..11 = _5_m(8..11)
+  185. copy_constant                  $12 = 0x41800000 (16.0)
+  186. copy_constant                  $13 = 0x41700000 (15.0)
+  187. copy_constant                  $14 = 0x41600000 (14.0)
+  188. copy_constant                  $15 = 0x41500000 (13.0)
+  189. copy_constant                  $16 = 0x41400000 (12.0)
+  190. copy_constant                  $17 = 0x41300000 (11.0)
+  191. copy_constant                  $18 = 0x41200000 (10.0)
+  192. copy_constant                  $19 = 0x41100000 (9.0)
+  193. copy_constant                  $20 = 0x41000000 (8.0)
+  194. copy_constant                  $21 = 0x40E00000 (7.0)
+  195. copy_constant                  $22 = 0x40C00000 (6.0)
+  196. copy_constant                  $23 = 0x40A00000 (5.0)
+  197. add_n_floats                   $0..11 += $12..23
+  198. copy_4_slots_unmasked          _5_m(0..3) = $0..3
+  199. copy_4_slots_unmasked          _5_m(4..7) = $4..7
+  200. copy_4_slots_unmasked          _5_m(8..11) = $8..11
+  201. copy_slot_unmasked             $0 = _0_ok
+  202. copy_4_slots_unmasked          $1..4 = _5_m(0..3)
+  203. copy_4_slots_unmasked          $5..8 = _5_m(4..7)
+  204. copy_4_slots_unmasked          $9..12 = _5_m(8..11)
+  205. copy_constant                  $13 = 0x41880000 (17.0)
+  206. copy_constant                  $14 = 0x41880000 (17.0)
+  207. copy_constant                  $15 = 0x41880000 (17.0)
+  208. copy_constant                  $16 = 0x41880000 (17.0)
+  209. copy_constant                  $17 = 0x41880000 (17.0)
+  210. copy_constant                  $18 = 0x41880000 (17.0)
+  211. copy_constant                  $19 = 0x41880000 (17.0)
+  212. copy_constant                  $20 = 0x41880000 (17.0)
+  213. copy_constant                  $21 = 0x41880000 (17.0)
+  214. copy_constant                  $22 = 0x41880000 (17.0)
+  215. copy_constant                  $23 = 0x41880000 (17.0)
+  216. copy_constant                  $24 = 0x41880000 (17.0)
+  217. cmpeq_n_floats                 $1..12 = equal($1..12, $13..24)
+  218. bitwise_and_4_ints             $5..8 &= $9..12
+  219. bitwise_and_4_ints             $1..4 &= $5..8
+  220. bitwise_and_2_ints             $1..2 &= $3..4
+  221. bitwise_and_int                $1 &= $2
+  222. bitwise_and_int                $0 &= $1
+  223. copy_slot_unmasked             _0_ok = $0
+  224. copy_constant                  _6_m(0) = 0x41200000 (10.0)
+  225. copy_constant                  _6_m(1) = 0x41A00000 (20.0)
+  226. copy_constant                  _6_m(2) = 0x41F00000 (30.0)
+  227. copy_constant                  _6_m(3) = 0x42200000 (40.0)
+  228. copy_constant                  _6_m(4) = 0x42480000 (50.0)
+  229. copy_constant                  _6_m(5) = 0x42700000 (60.0)
+  230. copy_constant                  _6_m(6) = 0x428C0000 (70.0)
+  231. copy_constant                  _6_m(7) = 0x42A00000 (80.0)
+  232. copy_4_slots_unmasked          $0..3 = _6_m(0..3)
+  233. copy_4_slots_unmasked          $4..7 = _6_m(4..7)
+  234. copy_constant                  $8 = 0x3F800000 (1.0)
+  235. copy_constant                  $9 = 0x40000000 (2.0)
+  236. copy_constant                  $10 = 0x40400000 (3.0)
+  237. copy_constant                  $11 = 0x40800000 (4.0)
+  238. copy_constant                  $12 = 0x40A00000 (5.0)
+  239. copy_constant                  $13 = 0x40C00000 (6.0)
+  240. copy_constant                  $14 = 0x40E00000 (7.0)
+  241. copy_constant                  $15 = 0x41000000 (8.0)
+  242. sub_n_floats                   $0..7 -= $8..15
+  243. copy_4_slots_unmasked          _6_m(0..3) = $0..3
+  244. copy_4_slots_unmasked          _6_m(4..7) = $4..7
+  245. copy_slot_unmasked             $0 = _0_ok
+  246. copy_4_slots_unmasked          $1..4 = _6_m(0..3)
+  247. copy_4_slots_unmasked          $5..8 = _6_m(4..7)
+  248. copy_constant                  $9 = 0x41100000 (9.0)
+  249. copy_constant                  $10 = 0x41900000 (18.0)
+  250. copy_constant                  $11 = 0x41D80000 (27.0)
+  251. copy_constant                  $12 = 0x42100000 (36.0)
+  252. copy_constant                  $13 = 0x42340000 (45.0)
+  253. copy_constant                  $14 = 0x42580000 (54.0)
+  254. copy_constant                  $15 = 0x427C0000 (63.0)
+  255. copy_constant                  $16 = 0x42900000 (72.0)
+  256. cmpeq_n_floats                 $1..8 = equal($1..8, $9..16)
+  257. bitwise_and_4_ints             $1..4 &= $5..8
+  258. bitwise_and_2_ints             $1..2 &= $3..4
+  259. bitwise_and_int                $1 &= $2
+  260. bitwise_and_int                $0 &= $1
+  261. copy_slot_unmasked             _0_ok = $0
+  262. copy_constant                  _7_m(0) = 0x41200000 (10.0)
+  263. copy_constant                  _7_m(1) = 0x41A00000 (20.0)
+  264. copy_constant                  _7_m(2) = 0x41F00000 (30.0)
+  265. copy_constant                  _7_m(3) = 0x42200000 (40.0)
+  266. copy_constant                  _7_m(4) = 0x41200000 (10.0)
+  267. copy_constant                  _7_m(5) = 0x41A00000 (20.0)
+  268. copy_constant                  _7_m(6) = 0x41F00000 (30.0)
+  269. copy_constant                  _7_m(7) = 0x42200000 (40.0)
+  270. copy_4_slots_unmasked          $0..3 = _7_m(0..3)
+  271. copy_4_slots_unmasked          $4..7 = _7_m(4..7)
+  272. copy_constant                  $8 = 0x41200000 (10.0)
+  273. copy_constant                  $9 = 0x41200000 (10.0)
+  274. copy_constant                  $10 = 0x41200000 (10.0)
+  275. copy_constant                  $11 = 0x41200000 (10.0)
+  276. copy_constant                  $12 = 0x40A00000 (5.0)
+  277. copy_constant                  $13 = 0x40A00000 (5.0)
+  278. copy_constant                  $14 = 0x40A00000 (5.0)
+  279. copy_constant                  $15 = 0x40A00000 (5.0)
+  280. div_n_floats                   $0..7 /= $8..15
+  281. copy_4_slots_unmasked          _7_m(0..3) = $0..3
+  282. copy_4_slots_unmasked          _7_m(4..7) = $4..7
+  283. copy_slot_unmasked             $0 = _0_ok
+  284. copy_4_slots_unmasked          $1..4 = _7_m(0..3)
+  285. copy_4_slots_unmasked          $5..8 = _7_m(4..7)
+  286. copy_constant                  $9 = 0x3F800000 (1.0)
+  287. copy_constant                  $10 = 0x40000000 (2.0)
+  288. copy_constant                  $11 = 0x40400000 (3.0)
+  289. copy_constant                  $12 = 0x40800000 (4.0)
+  290. copy_constant                  $13 = 0x40000000 (2.0)
+  291. copy_constant                  $14 = 0x40800000 (4.0)
+  292. copy_constant                  $15 = 0x40C00000 (6.0)
+  293. copy_constant                  $16 = 0x41000000 (8.0)
+  294. cmpeq_n_floats                 $1..8 = equal($1..8, $9..16)
+  295. bitwise_and_4_ints             $1..4 &= $5..8
+  296. bitwise_and_2_ints             $1..2 &= $3..4
+  297. bitwise_and_int                $1 &= $2
+  298. bitwise_and_int                $0 &= $1
+  299. copy_slot_unmasked             _0_ok = $0
+  300. copy_constant                  _8_m(0) = 0x40E00000 (7.0)
+  301. copy_constant                  _8_m(1) = 0x41100000 (9.0)
+  302. copy_constant                  _8_m(2) = 0x41300000 (11.0)
+  303. copy_constant                  _8_m(3) = 0x41000000 (8.0)
+  304. copy_constant                  _8_m(4) = 0x41200000 (10.0)
+  305. copy_constant                  _8_m(5) = 0x41400000 (12.0)
+  306. copy_4_slots_unmasked          $36..39 = _8_m(0..3)
+  307. copy_2_slots_unmasked          $40..41 = _8_m(4..5)
+  308. shuffle                        $37..41 = ($37..41)[2 0 3 1 4]
+  309. copy_constant                  $42 = 0x3F800000 (1.0)
+  310. copy_constant                  $43 = 0x40800000 (4.0)
+  311. copy_constant                  $44 = 0x40000000 (2.0)
+  312. copy_constant                  $45 = 0x40A00000 (5.0)
+  313. copy_2_slots_unmasked          $0..1 = $36..37
+  314. copy_2_slots_unmasked          $2..3 = $42..43
+  315. dot_2_floats                   $0 = dot($0..1, $2..3)
+  316. copy_2_slots_unmasked          $1..2 = $38..39
+  317. copy_2_slots_unmasked          $3..4 = $42..43
+  318. dot_2_floats                   $1 = dot($1..2, $3..4)
+  319. copy_4_slots_unmasked          $2..5 = $40..43
+  320. dot_2_floats                   $2 = dot($2..3, $4..5)
+  321. copy_2_slots_unmasked          $3..4 = $36..37
+  322. copy_2_slots_unmasked          $5..6 = $44..45
+  323. dot_2_floats                   $3 = dot($3..4, $5..6)
+  324. copy_2_slots_unmasked          $4..5 = $38..39
+  325. copy_2_slots_unmasked          $6..7 = $44..45
+  326. dot_2_floats                   $4 = dot($4..5, $6..7)
+  327. copy_2_slots_unmasked          $5..6 = $40..41
+  328. copy_2_slots_unmasked          $7..8 = $44..45
+  329. dot_2_floats                   $5 = dot($5..6, $7..8)
+  330. copy_4_slots_unmasked          _8_m(0..3) = $0..3
+  331. copy_2_slots_unmasked          _8_m(4..5) = $4..5
+  332. copy_slot_unmasked             $0 = _0_ok
+  333. copy_4_slots_unmasked          $1..4 = _8_m(0..3)
+  334. copy_2_slots_unmasked          $5..6 = _8_m(4..5)
+  335. copy_constant                  $7 = 0x421C0000 (39.0)
+  336. copy_constant                  $8 = 0x42440000 (49.0)
+  337. copy_constant                  $9 = 0x426C0000 (59.0)
+  338. copy_constant                  $10 = 0x42580000 (54.0)
+  339. copy_constant                  $11 = 0x42880000 (68.0)
+  340. copy_constant                  $12 = 0x42A40000 (82.0)
+  341. cmpeq_n_floats                 $1..6 = equal($1..6, $7..12)
+  342. bitwise_and_3_ints             $1..3 &= $4..6
+  343. bitwise_and_int                $2 &= $3
+  344. bitwise_and_int                $1 &= $2
+  345. bitwise_and_int                $0 &= $1
+  346. copy_slot_unmasked             _0_ok = $0
+  347. store_condition_mask           $36 = CondMask
+  348. copy_slot_unmasked             $37 = _0_ok
+  349. zero_slot_unmasked             $0 = 0
+  350. merge_condition_mask           CondMask = $36 & $37
+  351. branch_if_no_active_lanes      branch_if_no_active_lanes +349 (label 1 at #700)
+  352. copy_constant                  ok = 0xFFFFFFFF
+  353. copy_constant                  splat_4(0) = 0x40800000 (4.0)
+  354. copy_constant                  splat_4(1) = 0x40800000 (4.0)
+  355. copy_constant                  splat_4(2) = 0x40800000 (4.0)
+  356. copy_constant                  splat_4(3) = 0x40800000 (4.0)
+  357. copy_constant                  splat_4(4) = 0x40800000 (4.0)
+  358. copy_constant                  splat_4(5) = 0x40800000 (4.0)
+  359. zero_4_slots_unmasked          m(0..3) = 0
+  360. zero_2_slots_unmasked          m(4..5) = 0
+  361. zero_slot_unmasked             $1 = 0
+  362. copy_constant                  $2 = 0x40000000 (2.0)
+  363. shuffle                        $1..6 = ($1..6)[1 0 0 1 0 0]
+  364. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  365. copy_2_slots_masked            m(4..5) = Mask($5..6)
+  366. copy_4_slots_unmasked          $7..10 = splat_4(0..3)
+  367. copy_2_slots_unmasked          $11..12 = splat_4(4..5)
+  368. add_n_floats                   $1..6 += $7..12
+  369. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  370. copy_2_slots_masked            m(4..5) = Mask($5..6)
+  371. copy_slot_unmasked             $1 = ok
+  372. copy_4_slots_unmasked          $2..5 = m(0..3)
+  373. copy_2_slots_unmasked          $6..7 = m(4..5)
+  374. copy_constant                  $8 = 0x40C00000 (6.0)
+  375. copy_constant                  $9 = 0x40800000 (4.0)
+  376. copy_constant                  $10 = 0x40800000 (4.0)
+  377. copy_constant                  $11 = 0x40C00000 (6.0)
+  378. copy_constant                  $12 = 0x40800000 (4.0)
+  379. copy_constant                  $13 = 0x40800000 (4.0)
+  380. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  381. bitwise_and_3_ints             $2..4 &= $5..7
+  382. bitwise_and_int                $3 &= $4
+  383. bitwise_and_int                $2 &= $3
+  384. bitwise_and_int                $1 &= $2
+  385. copy_slot_masked               ok = Mask($1)
+  386. zero_slot_unmasked             $1 = 0
+  387. copy_constant                  $2 = 0x40000000 (2.0)
+  388. shuffle                        $1..6 = ($1..6)[1 0 0 1 0 0]
+  389. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  390. copy_2_slots_masked            m(4..5) = Mask($5..6)
+  391. copy_4_slots_unmasked          $7..10 = splat_4(0..3)
+  392. copy_2_slots_unmasked          $11..12 = splat_4(4..5)
+  393. sub_n_floats                   $1..6 -= $7..12
+  394. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  395. copy_2_slots_masked            m(4..5) = Mask($5..6)
+  396. copy_slot_unmasked             $1 = ok
+  397. copy_4_slots_unmasked          $2..5 = m(0..3)
+  398. copy_2_slots_unmasked          $6..7 = m(4..5)
+  399. copy_constant                  $8 = 0xC0000000 (-2.0)
+  400. copy_constant                  $9 = 0xC0800000 (-4.0)
+  401. copy_constant                  $10 = 0xC0800000 (-4.0)
+  402. copy_constant                  $11 = 0xC0000000 (-2.0)
+  403. copy_constant                  $12 = 0xC0800000 (-4.0)
+  404. copy_constant                  $13 = 0xC0800000 (-4.0)
+  405. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  406. bitwise_and_3_ints             $2..4 &= $5..7
+  407. bitwise_and_int                $3 &= $4
+  408. bitwise_and_int                $2 &= $3
+  409. bitwise_and_int                $1 &= $2
+  410. copy_slot_masked               ok = Mask($1)
+  411. zero_slot_unmasked             $1 = 0
+  412. copy_constant                  $2 = 0x40000000 (2.0)
+  413. shuffle                        $1..6 = ($1..6)[1 0 0 1 0 0]
+  414. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  415. copy_2_slots_masked            m(4..5) = Mask($5..6)
+  416. copy_4_slots_unmasked          $7..10 = splat_4(0..3)
+  417. copy_2_slots_unmasked          $11..12 = splat_4(4..5)
+  418. div_n_floats                   $1..6 /= $7..12
+  419. copy_4_slots_masked            m(0..3) = Mask($1..4)
+  420. copy_2_slots_masked            m(4..5) = Mask($5..6)
+  421. copy_slot_unmasked             $1 = ok
+  422. copy_4_slots_unmasked          $2..5 = m(0..3)
+  423. copy_2_slots_unmasked          $6..7 = m(4..5)
+  424. zero_slot_unmasked             $8 = 0
+  425. copy_constant                  $9 = 0x3F000000 (0.5)
+  426. shuffle                        $8..13 = ($8..13)[1 0 0 1 0 0]
+  427. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  428. bitwise_and_3_ints             $2..4 &= $5..7
+  429. bitwise_and_int                $3 &= $4
+  430. bitwise_and_int                $2 &= $3
+  431. bitwise_and_int                $1 &= $2
+  432. copy_slot_masked               ok = Mask($1)
+  433. copy_constant                  splat_4₁(0) = 0x40800000 (4.0)
+  434. copy_constant                  splat_4₁(1) = 0x40800000 (4.0)
+  435. copy_constant                  splat_4₁(2) = 0x40800000 (4.0)
+  436. copy_constant                  splat_4₁(3) = 0x40800000 (4.0)
+  437. copy_constant                  splat_4₁(4) = 0x40800000 (4.0)
+  438. copy_constant                  splat_4₁(5) = 0x40800000 (4.0)
+  439. zero_4_slots_unmasked          m₁(0..3) = 0
+  440. zero_2_slots_unmasked          m₁(4..5) = 0
+  441. copy_4_slots_unmasked          $1..4 = splat_4₁(0..3)
+  442. copy_2_slots_unmasked          $5..6 = splat_4₁(4..5)
+  443. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  444. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
+  445. zero_slot_unmasked             $7 = 0
+  446. copy_constant                  $8 = 0x40000000 (2.0)
+  447. shuffle                        $7..12 = ($7..12)[1 0 0 0 1 0]
+  448. add_n_floats                   $1..6 += $7..12
+  449. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  450. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
+  451. copy_slot_unmasked             $1 = ok
+  452. copy_4_slots_unmasked          $2..5 = m₁(0..3)
+  453. copy_2_slots_unmasked          $6..7 = m₁(4..5)
+  454. copy_constant                  $8 = 0x40C00000 (6.0)
+  455. copy_constant                  $9 = 0x40800000 (4.0)
+  456. copy_constant                  $10 = 0x40800000 (4.0)
+  457. copy_constant                  $11 = 0x40800000 (4.0)
+  458. copy_constant                  $12 = 0x40C00000 (6.0)
+  459. copy_constant                  $13 = 0x40800000 (4.0)
+  460. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  461. bitwise_and_3_ints             $2..4 &= $5..7
+  462. bitwise_and_int                $3 &= $4
+  463. bitwise_and_int                $2 &= $3
+  464. bitwise_and_int                $1 &= $2
+  465. copy_slot_masked               ok = Mask($1)
+  466. copy_4_slots_unmasked          $1..4 = splat_4₁(0..3)
+  467. copy_2_slots_unmasked          $5..6 = splat_4₁(4..5)
+  468. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  469. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
+  470. zero_slot_unmasked             $7 = 0
+  471. copy_constant                  $8 = 0x40000000 (2.0)
+  472. shuffle                        $7..12 = ($7..12)[1 0 0 0 1 0]
+  473. sub_n_floats                   $1..6 -= $7..12
+  474. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  475. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
+  476. copy_slot_unmasked             $1 = ok
+  477. copy_4_slots_unmasked          $2..5 = m₁(0..3)
+  478. copy_2_slots_unmasked          $6..7 = m₁(4..5)
+  479. copy_constant                  $8 = 0x40000000 (2.0)
+  480. copy_constant                  $9 = 0x40800000 (4.0)
+  481. copy_constant                  $10 = 0x40800000 (4.0)
+  482. copy_constant                  $11 = 0x40800000 (4.0)
+  483. copy_constant                  $12 = 0x40000000 (2.0)
+  484. copy_constant                  $13 = 0x40800000 (4.0)
+  485. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  486. bitwise_and_3_ints             $2..4 &= $5..7
+  487. bitwise_and_int                $3 &= $4
+  488. bitwise_and_int                $2 &= $3
+  489. bitwise_and_int                $1 &= $2
+  490. copy_slot_masked               ok = Mask($1)
+  491. copy_4_slots_unmasked          $1..4 = splat_4₁(0..3)
+  492. copy_2_slots_unmasked          $5..6 = splat_4₁(4..5)
+  493. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  494. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
+  495. copy_constant                  $7 = 0x40000000 (2.0)
+  496. copy_constant                  $8 = 0x40000000 (2.0)
+  497. copy_constant                  $9 = 0x40000000 (2.0)
+  498. copy_constant                  $10 = 0x40000000 (2.0)
+  499. copy_constant                  $11 = 0x40000000 (2.0)
+  500. copy_constant                  $12 = 0x40000000 (2.0)
+  501. div_n_floats                   $1..6 /= $7..12
   502. stack_rewind
-  503. copy_constant                  $9 = 0x40800000 (4.0)
-  504. copy_constant                  $10 = 0x40800000 (4.0)
-  505. copy_constant                  $11 = 0x40800000 (4.0)
-  506. copy_constant                  $12 = 0x40000000 (2.0)
-  507. copy_constant                  $13 = 0x40800000 (4.0)
-  508. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  509. bitwise_and_3_ints             $2..4 &= $5..7
-  510. bitwise_and_int                $3 &= $4
-  511. bitwise_and_int                $2 &= $3
-  512. bitwise_and_int                $1 &= $2
-  513. copy_slot_masked               ok = Mask($1)
-  514. copy_4_slots_unmasked          $1..4 = splat_4₁(0..3)
-  515. copy_2_slots_unmasked          $5..6 = splat_4₁(4..5)
-  516. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  517. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
-  518. copy_4_slots_unmasked          $1..4 = m₁(0..3)
-  519. copy_2_slots_unmasked          $5..6 = m₁(4..5)
-  520. copy_constant                  $7 = 0x40000000 (2.0)
-  521. copy_constant                  $8 = 0x40000000 (2.0)
-  522. copy_constant                  $9 = 0x40000000 (2.0)
-  523. copy_constant                  $10 = 0x40000000 (2.0)
-  524. copy_constant                  $11 = 0x40000000 (2.0)
-  525. copy_constant                  $12 = 0x40000000 (2.0)
-  526. div_n_floats                   $1..6 /= $7..12
-  527. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
-  528. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
-  529. copy_slot_unmasked             $1 = ok
-  530. copy_4_slots_unmasked          $2..5 = m₁(0..3)
-  531. copy_2_slots_unmasked          $6..7 = m₁(4..5)
-  532. copy_constant                  $8 = 0x40000000 (2.0)
-  533. copy_constant                  $9 = 0x40000000 (2.0)
-  534. copy_constant                  $10 = 0x40000000 (2.0)
-  535. copy_constant                  $11 = 0x40000000 (2.0)
-  536. copy_constant                  $12 = 0x40000000 (2.0)
-  537. copy_constant                  $13 = 0x40000000 (2.0)
-  538. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  539. bitwise_and_3_ints             $2..4 &= $5..7
-  540. bitwise_and_int                $3 &= $4
-  541. bitwise_and_int                $2 &= $3
-  542. bitwise_and_int                $1 &= $2
-  543. copy_slot_masked               ok = Mask($1)
-  544. copy_constant                  m₂(0) = 0x3F800000 (1.0)
-  545. copy_constant                  m₂(1) = 0x40000000 (2.0)
-  546. copy_constant                  m₂(2) = 0x40400000 (3.0)
-  547. copy_constant                  m₂(3) = 0x40800000 (4.0)
-  548. copy_constant                  m₂(4) = 0x40A00000 (5.0)
-  549. copy_constant                  m₂(5) = 0x40C00000 (6.0)
-  550. copy_constant                  m₂(6) = 0x40E00000 (7.0)
-  551. copy_constant                  m₂(7) = 0x41000000 (8.0)
-  552. copy_constant                  m₂(8) = 0x41100000 (9.0)
-  553. copy_constant                  m₂(9) = 0x41200000 (10.0)
-  554. copy_constant                  m₂(10) = 0x41300000 (11.0)
-  555. copy_constant                  m₂(11) = 0x41400000 (12.0)
-  556. copy_4_slots_unmasked          $1..4 = m₂(0..3)
-  557. copy_4_slots_unmasked          $5..8 = m₂(4..7)
-  558. copy_4_slots_unmasked          $9..12 = m₂(8..11)
-  559. copy_constant                  $13 = 0x41800000 (16.0)
-  560. copy_constant                  $14 = 0x41700000 (15.0)
-  561. copy_constant                  $15 = 0x41600000 (14.0)
-  562. copy_constant                  $16 = 0x41500000 (13.0)
-  563. copy_constant                  $17 = 0x41400000 (12.0)
-  564. copy_constant                  $18 = 0x41300000 (11.0)
-  565. copy_constant                  $19 = 0x41200000 (10.0)
-  566. copy_constant                  $20 = 0x41100000 (9.0)
-  567. copy_constant                  $21 = 0x41000000 (8.0)
-  568. copy_constant                  $22 = 0x40E00000 (7.0)
-  569. copy_constant                  $23 = 0x40C00000 (6.0)
-  570. copy_constant                  $24 = 0x40A00000 (5.0)
-  571. add_n_floats                   $1..12 += $13..24
-  572. copy_4_slots_masked            m₂(0..3) = Mask($1..4)
-  573. copy_4_slots_masked            m₂(4..7) = Mask($5..8)
-  574. copy_4_slots_masked            m₂(8..11) = Mask($9..12)
-  575. copy_slot_unmasked             $1 = ok
-  576. copy_4_slots_unmasked          $2..5 = m₂(0..3)
-  577. copy_4_slots_unmasked          $6..9 = m₂(4..7)
-  578. copy_4_slots_unmasked          $10..13 = m₂(8..11)
-  579. copy_constant                  $14 = 0x41880000 (17.0)
-  580. copy_constant                  $15 = 0x41880000 (17.0)
-  581. copy_constant                  $16 = 0x41880000 (17.0)
-  582. copy_constant                  $17 = 0x41880000 (17.0)
-  583. copy_constant                  $18 = 0x41880000 (17.0)
-  584. copy_constant                  $19 = 0x41880000 (17.0)
-  585. copy_constant                  $20 = 0x41880000 (17.0)
-  586. copy_constant                  $21 = 0x41880000 (17.0)
-  587. copy_constant                  $22 = 0x41880000 (17.0)
-  588. copy_constant                  $23 = 0x41880000 (17.0)
-  589. copy_constant                  $24 = 0x41880000 (17.0)
-  590. copy_constant                  $25 = 0x41880000 (17.0)
-  591. cmpeq_n_floats                 $2..13 = equal($2..13, $14..25)
-  592. bitwise_and_4_ints             $6..9 &= $10..13
-  593. bitwise_and_4_ints             $2..5 &= $6..9
-  594. bitwise_and_2_ints             $2..3 &= $4..5
-  595. bitwise_and_int                $2 &= $3
-  596. bitwise_and_int                $1 &= $2
-  597. copy_slot_masked               ok = Mask($1)
-  598. copy_constant                  m₃(0) = 0x41200000 (10.0)
-  599. copy_constant                  m₃(1) = 0x41A00000 (20.0)
-  600. copy_constant                  m₃(2) = 0x41F00000 (30.0)
-  601. copy_constant                  m₃(3) = 0x42200000 (40.0)
-  602. copy_constant                  m₃(4) = 0x42480000 (50.0)
-  603. copy_constant                  m₃(5) = 0x42700000 (60.0)
-  604. copy_constant                  m₃(6) = 0x428C0000 (70.0)
-  605. copy_constant                  m₃(7) = 0x42A00000 (80.0)
-  606. copy_4_slots_unmasked          $1..4 = m₃(0..3)
-  607. copy_4_slots_unmasked          $5..8 = m₃(4..7)
-  608. copy_constant                  $9 = 0x3F800000 (1.0)
-  609. copy_constant                  $10 = 0x40000000 (2.0)
-  610. copy_constant                  $11 = 0x40400000 (3.0)
-  611. copy_constant                  $12 = 0x40800000 (4.0)
-  612. copy_constant                  $13 = 0x40A00000 (5.0)
-  613. copy_constant                  $14 = 0x40C00000 (6.0)
-  614. copy_constant                  $15 = 0x40E00000 (7.0)
-  615. copy_constant                  $16 = 0x41000000 (8.0)
-  616. sub_n_floats                   $1..8 -= $9..16
-  617. copy_4_slots_masked            m₃(0..3) = Mask($1..4)
-  618. copy_4_slots_masked            m₃(4..7) = Mask($5..8)
-  619. copy_slot_unmasked             $1 = ok
-  620. copy_4_slots_unmasked          $2..5 = m₃(0..3)
-  621. copy_4_slots_unmasked          $6..9 = m₃(4..7)
-  622. copy_constant                  $10 = 0x41100000 (9.0)
-  623. copy_constant                  $11 = 0x41900000 (18.0)
-  624. copy_constant                  $12 = 0x41D80000 (27.0)
-  625. copy_constant                  $13 = 0x42100000 (36.0)
-  626. copy_constant                  $14 = 0x42340000 (45.0)
-  627. copy_constant                  $15 = 0x42580000 (54.0)
-  628. copy_constant                  $16 = 0x427C0000 (63.0)
-  629. copy_constant                  $17 = 0x42900000 (72.0)
-  630. cmpeq_n_floats                 $2..9 = equal($2..9, $10..17)
-  631. bitwise_and_4_ints             $2..5 &= $6..9
-  632. bitwise_and_2_ints             $2..3 &= $4..5
-  633. bitwise_and_int                $2 &= $3
-  634. bitwise_and_int                $1 &= $2
-  635. copy_slot_masked               ok = Mask($1)
-  636. copy_constant                  m₄(0) = 0x41200000 (10.0)
-  637. copy_constant                  m₄(1) = 0x41A00000 (20.0)
-  638. copy_constant                  m₄(2) = 0x41F00000 (30.0)
-  639. copy_constant                  m₄(3) = 0x42200000 (40.0)
-  640. copy_constant                  m₄(4) = 0x41200000 (10.0)
-  641. copy_constant                  m₄(5) = 0x41A00000 (20.0)
-  642. copy_constant                  m₄(6) = 0x41F00000 (30.0)
-  643. copy_constant                  m₄(7) = 0x42200000 (40.0)
-  644. copy_4_slots_unmasked          $1..4 = m₄(0..3)
-  645. copy_4_slots_unmasked          $5..8 = m₄(4..7)
-  646. copy_constant                  $9 = 0x41200000 (10.0)
-  647. copy_constant                  $10 = 0x41200000 (10.0)
-  648. copy_constant                  $11 = 0x41200000 (10.0)
-  649. copy_constant                  $12 = 0x41200000 (10.0)
-  650. copy_constant                  $13 = 0x40A00000 (5.0)
-  651. copy_constant                  $14 = 0x40A00000 (5.0)
-  652. copy_constant                  $15 = 0x40A00000 (5.0)
-  653. copy_constant                  $16 = 0x40A00000 (5.0)
-  654. div_n_floats                   $1..8 /= $9..16
-  655. copy_4_slots_masked            m₄(0..3) = Mask($1..4)
-  656. copy_4_slots_masked            m₄(4..7) = Mask($5..8)
-  657. copy_slot_unmasked             $1 = ok
-  658. copy_4_slots_unmasked          $2..5 = m₄(0..3)
-  659. copy_4_slots_unmasked          $6..9 = m₄(4..7)
-  660. copy_constant                  $10 = 0x3F800000 (1.0)
-  661. copy_constant                  $11 = 0x40000000 (2.0)
-  662. copy_constant                  $12 = 0x40400000 (3.0)
-  663. copy_constant                  $13 = 0x40800000 (4.0)
-  664. copy_constant                  $14 = 0x40000000 (2.0)
-  665. copy_constant                  $15 = 0x40800000 (4.0)
-  666. copy_constant                  $16 = 0x40C00000 (6.0)
-  667. copy_constant                  $17 = 0x41000000 (8.0)
-  668. cmpeq_n_floats                 $2..9 = equal($2..9, $10..17)
-  669. bitwise_and_4_ints             $2..5 &= $6..9
-  670. bitwise_and_2_ints             $2..3 &= $4..5
-  671. bitwise_and_int                $2 &= $3
-  672. bitwise_and_int                $1 &= $2
-  673. copy_slot_masked               ok = Mask($1)
-  674. copy_constant                  m₅(0) = 0x40E00000 (7.0)
-  675. copy_constant                  m₅(1) = 0x41100000 (9.0)
-  676. copy_constant                  m₅(2) = 0x41300000 (11.0)
-  677. copy_constant                  m₅(3) = 0x41000000 (8.0)
-  678. copy_constant                  m₅(4) = 0x41200000 (10.0)
-  679. copy_constant                  m₅(5) = 0x41400000 (12.0)
-  680. copy_4_slots_unmasked          $26..29 = m₅(0..3)
-  681. copy_2_slots_unmasked          $30..31 = m₅(4..5)
-  682. shuffle                        $27..31 = ($27..31)[2 0 3 1 4]
-  683. copy_constant                  $32 = 0x3F800000 (1.0)
-  684. copy_constant                  $33 = 0x40800000 (4.0)
-  685. copy_constant                  $34 = 0x40000000 (2.0)
-  686. copy_constant                  $35 = 0x40A00000 (5.0)
-  687. copy_2_slots_unmasked          $1..2 = $26..27
-  688. copy_2_slots_unmasked          $3..4 = $32..33
-  689. dot_2_floats                   $1 = dot($1..2, $3..4)
-  690. copy_2_slots_unmasked          $2..3 = $28..29
-  691. copy_2_slots_unmasked          $4..5 = $32..33
-  692. dot_2_floats                   $2 = dot($2..3, $4..5)
-  693. copy_4_slots_unmasked          $3..6 = $30..33
-  694. dot_2_floats                   $3 = dot($3..4, $5..6)
-  695. copy_2_slots_unmasked          $4..5 = $26..27
-  696. copy_2_slots_unmasked          $6..7 = $34..35
-  697. dot_2_floats                   $4 = dot($4..5, $6..7)
-  698. copy_2_slots_unmasked          $5..6 = $28..29
-  699. copy_2_slots_unmasked          $7..8 = $34..35
-  700. dot_2_floats                   $5 = dot($5..6, $7..8)
-  701. copy_2_slots_unmasked          $6..7 = $30..31
-  702. copy_2_slots_unmasked          $8..9 = $34..35
-  703. dot_2_floats                   $6 = dot($6..7, $8..9)
-  704. copy_4_slots_masked            m₅(0..3) = Mask($1..4)
-  705. copy_2_slots_masked            m₅(4..5) = Mask($5..6)
-  706. copy_slot_unmasked             $1 = ok
-  707. copy_4_slots_unmasked          $2..5 = m₅(0..3)
-  708. copy_2_slots_unmasked          $6..7 = m₅(4..5)
-  709. copy_constant                  $8 = 0x421C0000 (39.0)
-  710. copy_constant                  $9 = 0x42440000 (49.0)
-  711. copy_constant                  $10 = 0x426C0000 (59.0)
-  712. copy_constant                  $11 = 0x42580000 (54.0)
-  713. copy_constant                  $12 = 0x42880000 (68.0)
-  714. copy_constant                  $13 = 0x42A40000 (82.0)
-  715. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
-  716. bitwise_and_3_ints             $2..4 &= $5..7
-  717. bitwise_and_int                $3 &= $4
-  718. bitwise_and_int                $2 &= $3
-  719. bitwise_and_int                $1 &= $2
-  720. copy_slot_masked               ok = Mask($1)
-  721. copy_slot_masked               [test_matrix_op_matrix_half].result = Mask($1)
-  722. label                          label 0x00000002
-  723. copy_slot_masked               $0 = Mask($1)
-  724. label                          label 0x00000001
-  725. load_condition_mask            CondMask = $36
-  726. swizzle_4                      $0..3 = ($0..3).xxxx
-  727. copy_4_constants               $4..7 = colorRed
-  728. copy_4_constants               $8..11 = colorGreen
-  729. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  730. copy_4_slots_unmasked          [main].result = $0..3
-  731. load_src                       src.rgba = [main].result
+  503. copy_4_slots_masked            m₁(0..3) = Mask($1..4)
+  504. copy_2_slots_masked            m₁(4..5) = Mask($5..6)
+  505. copy_slot_unmasked             $1 = ok
+  506. copy_4_slots_unmasked          $2..5 = m₁(0..3)
+  507. copy_2_slots_unmasked          $6..7 = m₁(4..5)
+  508. copy_constant                  $8 = 0x40000000 (2.0)
+  509. copy_constant                  $9 = 0x40000000 (2.0)
+  510. copy_constant                  $10 = 0x40000000 (2.0)
+  511. copy_constant                  $11 = 0x40000000 (2.0)
+  512. copy_constant                  $12 = 0x40000000 (2.0)
+  513. copy_constant                  $13 = 0x40000000 (2.0)
+  514. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  515. bitwise_and_3_ints             $2..4 &= $5..7
+  516. bitwise_and_int                $3 &= $4
+  517. bitwise_and_int                $2 &= $3
+  518. bitwise_and_int                $1 &= $2
+  519. copy_slot_masked               ok = Mask($1)
+  520. copy_constant                  m₂(0) = 0x3F800000 (1.0)
+  521. copy_constant                  m₂(1) = 0x40000000 (2.0)
+  522. copy_constant                  m₂(2) = 0x40400000 (3.0)
+  523. copy_constant                  m₂(3) = 0x40800000 (4.0)
+  524. copy_constant                  m₂(4) = 0x40A00000 (5.0)
+  525. copy_constant                  m₂(5) = 0x40C00000 (6.0)
+  526. copy_constant                  m₂(6) = 0x40E00000 (7.0)
+  527. copy_constant                  m₂(7) = 0x41000000 (8.0)
+  528. copy_constant                  m₂(8) = 0x41100000 (9.0)
+  529. copy_constant                  m₂(9) = 0x41200000 (10.0)
+  530. copy_constant                  m₂(10) = 0x41300000 (11.0)
+  531. copy_constant                  m₂(11) = 0x41400000 (12.0)
+  532. copy_4_slots_unmasked          $1..4 = m₂(0..3)
+  533. copy_4_slots_unmasked          $5..8 = m₂(4..7)
+  534. copy_4_slots_unmasked          $9..12 = m₂(8..11)
+  535. copy_constant                  $13 = 0x41800000 (16.0)
+  536. copy_constant                  $14 = 0x41700000 (15.0)
+  537. copy_constant                  $15 = 0x41600000 (14.0)
+  538. copy_constant                  $16 = 0x41500000 (13.0)
+  539. copy_constant                  $17 = 0x41400000 (12.0)
+  540. copy_constant                  $18 = 0x41300000 (11.0)
+  541. copy_constant                  $19 = 0x41200000 (10.0)
+  542. copy_constant                  $20 = 0x41100000 (9.0)
+  543. copy_constant                  $21 = 0x41000000 (8.0)
+  544. copy_constant                  $22 = 0x40E00000 (7.0)
+  545. copy_constant                  $23 = 0x40C00000 (6.0)
+  546. copy_constant                  $24 = 0x40A00000 (5.0)
+  547. add_n_floats                   $1..12 += $13..24
+  548. copy_4_slots_masked            m₂(0..3) = Mask($1..4)
+  549. copy_4_slots_masked            m₂(4..7) = Mask($5..8)
+  550. copy_4_slots_masked            m₂(8..11) = Mask($9..12)
+  551. copy_slot_unmasked             $1 = ok
+  552. copy_4_slots_unmasked          $2..5 = m₂(0..3)
+  553. copy_4_slots_unmasked          $6..9 = m₂(4..7)
+  554. copy_4_slots_unmasked          $10..13 = m₂(8..11)
+  555. copy_constant                  $14 = 0x41880000 (17.0)
+  556. copy_constant                  $15 = 0x41880000 (17.0)
+  557. copy_constant                  $16 = 0x41880000 (17.0)
+  558. copy_constant                  $17 = 0x41880000 (17.0)
+  559. copy_constant                  $18 = 0x41880000 (17.0)
+  560. copy_constant                  $19 = 0x41880000 (17.0)
+  561. copy_constant                  $20 = 0x41880000 (17.0)
+  562. copy_constant                  $21 = 0x41880000 (17.0)
+  563. copy_constant                  $22 = 0x41880000 (17.0)
+  564. copy_constant                  $23 = 0x41880000 (17.0)
+  565. copy_constant                  $24 = 0x41880000 (17.0)
+  566. copy_constant                  $25 = 0x41880000 (17.0)
+  567. cmpeq_n_floats                 $2..13 = equal($2..13, $14..25)
+  568. bitwise_and_4_ints             $6..9 &= $10..13
+  569. bitwise_and_4_ints             $2..5 &= $6..9
+  570. bitwise_and_2_ints             $2..3 &= $4..5
+  571. bitwise_and_int                $2 &= $3
+  572. bitwise_and_int                $1 &= $2
+  573. copy_slot_masked               ok = Mask($1)
+  574. copy_constant                  m₃(0) = 0x41200000 (10.0)
+  575. copy_constant                  m₃(1) = 0x41A00000 (20.0)
+  576. copy_constant                  m₃(2) = 0x41F00000 (30.0)
+  577. copy_constant                  m₃(3) = 0x42200000 (40.0)
+  578. copy_constant                  m₃(4) = 0x42480000 (50.0)
+  579. copy_constant                  m₃(5) = 0x42700000 (60.0)
+  580. copy_constant                  m₃(6) = 0x428C0000 (70.0)
+  581. copy_constant                  m₃(7) = 0x42A00000 (80.0)
+  582. copy_4_slots_unmasked          $1..4 = m₃(0..3)
+  583. copy_4_slots_unmasked          $5..8 = m₃(4..7)
+  584. copy_constant                  $9 = 0x3F800000 (1.0)
+  585. copy_constant                  $10 = 0x40000000 (2.0)
+  586. copy_constant                  $11 = 0x40400000 (3.0)
+  587. copy_constant                  $12 = 0x40800000 (4.0)
+  588. copy_constant                  $13 = 0x40A00000 (5.0)
+  589. copy_constant                  $14 = 0x40C00000 (6.0)
+  590. copy_constant                  $15 = 0x40E00000 (7.0)
+  591. copy_constant                  $16 = 0x41000000 (8.0)
+  592. sub_n_floats                   $1..8 -= $9..16
+  593. copy_4_slots_masked            m₃(0..3) = Mask($1..4)
+  594. copy_4_slots_masked            m₃(4..7) = Mask($5..8)
+  595. copy_slot_unmasked             $1 = ok
+  596. copy_4_slots_unmasked          $2..5 = m₃(0..3)
+  597. copy_4_slots_unmasked          $6..9 = m₃(4..7)
+  598. copy_constant                  $10 = 0x41100000 (9.0)
+  599. copy_constant                  $11 = 0x41900000 (18.0)
+  600. copy_constant                  $12 = 0x41D80000 (27.0)
+  601. copy_constant                  $13 = 0x42100000 (36.0)
+  602. copy_constant                  $14 = 0x42340000 (45.0)
+  603. copy_constant                  $15 = 0x42580000 (54.0)
+  604. copy_constant                  $16 = 0x427C0000 (63.0)
+  605. copy_constant                  $17 = 0x42900000 (72.0)
+  606. cmpeq_n_floats                 $2..9 = equal($2..9, $10..17)
+  607. bitwise_and_4_ints             $2..5 &= $6..9
+  608. bitwise_and_2_ints             $2..3 &= $4..5
+  609. bitwise_and_int                $2 &= $3
+  610. bitwise_and_int                $1 &= $2
+  611. copy_slot_masked               ok = Mask($1)
+  612. copy_constant                  m₄(0) = 0x41200000 (10.0)
+  613. copy_constant                  m₄(1) = 0x41A00000 (20.0)
+  614. copy_constant                  m₄(2) = 0x41F00000 (30.0)
+  615. copy_constant                  m₄(3) = 0x42200000 (40.0)
+  616. copy_constant                  m₄(4) = 0x41200000 (10.0)
+  617. copy_constant                  m₄(5) = 0x41A00000 (20.0)
+  618. copy_constant                  m₄(6) = 0x41F00000 (30.0)
+  619. copy_constant                  m₄(7) = 0x42200000 (40.0)
+  620. copy_4_slots_unmasked          $1..4 = m₄(0..3)
+  621. copy_4_slots_unmasked          $5..8 = m₄(4..7)
+  622. copy_constant                  $9 = 0x41200000 (10.0)
+  623. copy_constant                  $10 = 0x41200000 (10.0)
+  624. copy_constant                  $11 = 0x41200000 (10.0)
+  625. copy_constant                  $12 = 0x41200000 (10.0)
+  626. copy_constant                  $13 = 0x40A00000 (5.0)
+  627. copy_constant                  $14 = 0x40A00000 (5.0)
+  628. copy_constant                  $15 = 0x40A00000 (5.0)
+  629. copy_constant                  $16 = 0x40A00000 (5.0)
+  630. div_n_floats                   $1..8 /= $9..16
+  631. copy_4_slots_masked            m₄(0..3) = Mask($1..4)
+  632. copy_4_slots_masked            m₄(4..7) = Mask($5..8)
+  633. copy_slot_unmasked             $1 = ok
+  634. copy_4_slots_unmasked          $2..5 = m₄(0..3)
+  635. copy_4_slots_unmasked          $6..9 = m₄(4..7)
+  636. copy_constant                  $10 = 0x3F800000 (1.0)
+  637. copy_constant                  $11 = 0x40000000 (2.0)
+  638. copy_constant                  $12 = 0x40400000 (3.0)
+  639. copy_constant                  $13 = 0x40800000 (4.0)
+  640. copy_constant                  $14 = 0x40000000 (2.0)
+  641. copy_constant                  $15 = 0x40800000 (4.0)
+  642. copy_constant                  $16 = 0x40C00000 (6.0)
+  643. copy_constant                  $17 = 0x41000000 (8.0)
+  644. cmpeq_n_floats                 $2..9 = equal($2..9, $10..17)
+  645. bitwise_and_4_ints             $2..5 &= $6..9
+  646. bitwise_and_2_ints             $2..3 &= $4..5
+  647. bitwise_and_int                $2 &= $3
+  648. bitwise_and_int                $1 &= $2
+  649. copy_slot_masked               ok = Mask($1)
+  650. copy_constant                  m₅(0) = 0x40E00000 (7.0)
+  651. copy_constant                  m₅(1) = 0x41100000 (9.0)
+  652. copy_constant                  m₅(2) = 0x41300000 (11.0)
+  653. copy_constant                  m₅(3) = 0x41000000 (8.0)
+  654. copy_constant                  m₅(4) = 0x41200000 (10.0)
+  655. copy_constant                  m₅(5) = 0x41400000 (12.0)
+  656. copy_4_slots_unmasked          $26..29 = m₅(0..3)
+  657. copy_2_slots_unmasked          $30..31 = m₅(4..5)
+  658. shuffle                        $27..31 = ($27..31)[2 0 3 1 4]
+  659. copy_constant                  $32 = 0x3F800000 (1.0)
+  660. copy_constant                  $33 = 0x40800000 (4.0)
+  661. copy_constant                  $34 = 0x40000000 (2.0)
+  662. copy_constant                  $35 = 0x40A00000 (5.0)
+  663. copy_2_slots_unmasked          $1..2 = $26..27
+  664. copy_2_slots_unmasked          $3..4 = $32..33
+  665. dot_2_floats                   $1 = dot($1..2, $3..4)
+  666. copy_2_slots_unmasked          $2..3 = $28..29
+  667. copy_2_slots_unmasked          $4..5 = $32..33
+  668. dot_2_floats                   $2 = dot($2..3, $4..5)
+  669. copy_4_slots_unmasked          $3..6 = $30..33
+  670. dot_2_floats                   $3 = dot($3..4, $5..6)
+  671. copy_2_slots_unmasked          $4..5 = $26..27
+  672. copy_2_slots_unmasked          $6..7 = $34..35
+  673. dot_2_floats                   $4 = dot($4..5, $6..7)
+  674. copy_2_slots_unmasked          $5..6 = $28..29
+  675. copy_2_slots_unmasked          $7..8 = $34..35
+  676. dot_2_floats                   $5 = dot($5..6, $7..8)
+  677. copy_2_slots_unmasked          $6..7 = $30..31
+  678. copy_2_slots_unmasked          $8..9 = $34..35
+  679. dot_2_floats                   $6 = dot($6..7, $8..9)
+  680. copy_4_slots_masked            m₅(0..3) = Mask($1..4)
+  681. copy_2_slots_masked            m₅(4..5) = Mask($5..6)
+  682. copy_slot_unmasked             $1 = ok
+  683. copy_4_slots_unmasked          $2..5 = m₅(0..3)
+  684. copy_2_slots_unmasked          $6..7 = m₅(4..5)
+  685. copy_constant                  $8 = 0x421C0000 (39.0)
+  686. copy_constant                  $9 = 0x42440000 (49.0)
+  687. copy_constant                  $10 = 0x426C0000 (59.0)
+  688. copy_constant                  $11 = 0x42580000 (54.0)
+  689. copy_constant                  $12 = 0x42880000 (68.0)
+  690. copy_constant                  $13 = 0x42A40000 (82.0)
+  691. cmpeq_n_floats                 $2..7 = equal($2..7, $8..13)
+  692. bitwise_and_3_ints             $2..4 &= $5..7
+  693. bitwise_and_int                $3 &= $4
+  694. bitwise_and_int                $2 &= $3
+  695. bitwise_and_int                $1 &= $2
+  696. copy_slot_masked               ok = Mask($1)
+  697. copy_slot_masked               [test_matrix_op_matrix_half].result = Mask($1)
+  698. label                          label 0x00000002
+  699. copy_slot_masked               $0 = Mask($1)
+  700. label                          label 0x00000001
+  701. load_condition_mask            CondMask = $36
+  702. swizzle_4                      $0..3 = ($0..3).xxxx
+  703. copy_4_constants               $4..7 = colorRed
+  704. copy_4_constants               $8..11 = colorGreen
+  705. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  706. copy_4_slots_unmasked          [main].result = $0..3
+  707. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/MatrixScalarMath.skrp b/tests/sksl/shared/MatrixScalarMath.skrp
index 624be83..e711a06 100644
--- a/tests/sksl/shared/MatrixScalarMath.skrp
+++ b/tests/sksl/shared/MatrixScalarMath.skrp
@@ -45,381 +45,380 @@
    45. copy_slot_unmasked             $4 = _1_one
    46. mul_float                      $3 *= $4
    47. copy_4_slots_unmasked          _2_m2 = $0..3
-   48. copy_4_slots_unmasked          $0..3 = _2_m2
-   49. copy_constant                  $4 = 0x3F800000 (1.0)
-   50. swizzle_4                      $4..7 = ($4..7).xxxx
-   51. add_4_floats                   $0..3 += $4..7
-   52. copy_4_slots_unmasked          _2_m2 = $0..3
-   53. store_condition_mask           $43 = CondMask
-   54. store_condition_mask           $30 = CondMask
-   55. store_condition_mask           $17 = CondMask
-   56. store_condition_mask           $12 = CondMask
-   57. copy_slot_unmasked             $13 = _2_m2(0)
-   58. copy_slot_unmasked             $14 = _0_expected(0)
-   59. cmpeq_float                    $13 = equal($13, $14)
-   60. copy_2_slots_unmasked          $14..15 = _2_m2(0..1)
-   61. swizzle_1                      $14 = ($14..15).y
-   62. copy_2_slots_unmasked          $15..16 = _0_expected(0..1)
-   63. swizzle_1                      $15 = ($15..16).y
-   64. cmpeq_float                    $14 = equal($14, $15)
-   65. bitwise_and_int                $13 &= $14
-   66. copy_slot_unmasked             $14 = _2_m2(2)
-   67. copy_slot_unmasked             $15 = _0_expected(2)
-   68. cmpeq_float                    $14 = equal($14, $15)
-   69. bitwise_and_int                $13 &= $14
-   70. copy_2_slots_unmasked          $14..15 = _2_m2(2..3)
-   71. swizzle_1                      $14 = ($14..15).y
-   72. copy_2_slots_unmasked          $15..16 = _0_expected(2..3)
-   73. swizzle_1                      $15 = ($15..16).y
-   74. cmpeq_float                    $14 = equal($14, $15)
-   75. bitwise_and_int                $13 &= $14
-   76. zero_slot_unmasked             $18 = 0
-   77. merge_condition_mask           CondMask = $12 & $13
-   78. branch_if_no_active_lanes      branch_if_no_active_lanes +94 (label 4 at #172)
-   79. copy_slot_unmasked             op = minus
-   80. copy_4_slots_unmasked          m11, m12, m21, m22 = f1, f2, f3, f4
-   81. copy_slot_unmasked             $19 = f1
-   82. copy_constant                  $20 = 0x3F800000 (1.0)
-   83. sub_float                      $19 -= $20
-   84. copy_slot_unmasked             $20 = f2
-   85. copy_constant                  $21 = 0x3F800000 (1.0)
-   86. sub_float                      $20 -= $21
-   87. copy_slot_unmasked             $21 = f3
-   88. copy_constant                  $22 = 0x3F800000 (1.0)
-   89. sub_float                      $21 -= $22
-   90. copy_slot_unmasked             $22 = f4
-   91. copy_constant                  $23 = 0x3F800000 (1.0)
-   92. sub_float                      $22 -= $23
-   93. copy_4_slots_unmasked          expected = $19..22
-   94. copy_constant                  $19 = colorRed(0)
-   95. copy_slot_unmasked             one = $19
-   96. copy_slot_unmasked             $19 = m11
-   97. copy_slot_unmasked             $20 = one
-   98. mul_float                      $19 *= $20
-   99. copy_slot_unmasked             $20 = m12
-  100. copy_slot_unmasked             $21 = one
-  101. mul_float                      $20 *= $21
-  102. copy_slot_unmasked             $21 = m21
-  103. copy_slot_unmasked             $22 = one
-  104. mul_float                      $21 *= $22
-  105. copy_slot_unmasked             $22 = m22
-  106. copy_slot_unmasked             $23 = one
-  107. mul_float                      $22 *= $23
-  108. copy_4_slots_unmasked          m2 = $19..22
-  109. store_loop_mask                $19 = LoopMask
-  110. copy_slot_unmasked             $20 = op
-  111. store_loop_mask                $21 = LoopMask
-  112. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  113. case_op                        if ($20 == 0x00000001) { LoopMask = true; $21 = false; }
-  114. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 6 at #121)
-  115. copy_4_slots_unmasked          $22..25 = m2
-  116. copy_constant                  $26 = 0x3F800000 (1.0)
-  117. swizzle_4                      $26..29 = ($26..29).xxxx
-  118. add_4_floats                   $22..25 += $26..29
-  119. copy_4_slots_masked            m2 = Mask($22..25)
-  120. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  121. label                          label 0x00000006
-  122. case_op                        if ($20 == 0x00000002) { LoopMask = true; $21 = false; }
-  123. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 7 at #130)
-  124. copy_4_slots_unmasked          $22..25 = m2
-  125. copy_constant                  $26 = 0x3F800000 (1.0)
-  126. swizzle_4                      $26..29 = ($26..29).xxxx
-  127. sub_4_floats                   $22..25 -= $26..29
-  128. copy_4_slots_masked            m2 = Mask($22..25)
-  129. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  130. label                          label 0x00000007
-  131. case_op                        if ($20 == 0x00000003) { LoopMask = true; $21 = false; }
-  132. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 8 at #139)
-  133. copy_4_slots_unmasked          $22..25 = m2
-  134. copy_constant                  $26 = 0x40000000 (2.0)
-  135. swizzle_4                      $26..29 = ($26..29).xxxx
-  136. mul_4_floats                   $22..25 *= $26..29
-  137. copy_4_slots_masked            m2 = Mask($22..25)
-  138. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  139. label                          label 0x00000008
-  140. case_op                        if ($20 == 0x00000004) { LoopMask = true; $21 = false; }
-  141. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 9 at #148)
-  142. copy_4_slots_unmasked          $22..25 = m2
-  143. copy_constant                  $26 = 0x3F000000 (0.5)
-  144. swizzle_4                      $26..29 = ($26..29).xxxx
-  145. mul_4_floats                   $22..25 *= $26..29
-  146. copy_4_slots_masked            m2 = Mask($22..25)
-  147. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  148. label                          label 0x00000009
-  149. load_loop_mask                 LoopMask = $19
-  150. copy_slot_unmasked             $19 = m2(0)
-  151. copy_slot_unmasked             $20 = expected(0)
-  152. cmpeq_float                    $19 = equal($19, $20)
-  153. copy_2_slots_unmasked          $20..21 = m2(0..1)
-  154. swizzle_1                      $20 = ($20..21).y
-  155. copy_2_slots_unmasked          $21..22 = expected(0..1)
-  156. swizzle_1                      $21 = ($21..22).y
-  157. cmpeq_float                    $20 = equal($20, $21)
-  158. bitwise_and_int                $19 &= $20
-  159. copy_slot_unmasked             $20 = m2(2)
-  160. copy_slot_unmasked             $21 = expected(2)
-  161. cmpeq_float                    $20 = equal($20, $21)
-  162. bitwise_and_int                $19 &= $20
-  163. copy_2_slots_unmasked          $20..21 = m2(2..3)
-  164. swizzle_1                      $20 = ($20..21).y
-  165. copy_2_slots_unmasked          $21..22 = expected(2..3)
-  166. swizzle_1                      $21 = ($21..22).y
-  167. cmpeq_float                    $20 = equal($20, $21)
-  168. bitwise_and_int                $19 &= $20
-  169. copy_slot_masked               [test].result = Mask($19)
-  170. label                          label 0x00000005
-  171. copy_slot_masked               $18 = Mask($19)
-  172. label                          label 0x00000004
-  173. load_condition_mask            CondMask = $12
-  174. zero_slot_unmasked             $31 = 0
-  175. merge_condition_mask           CondMask = $17 & $18
-  176. branch_if_no_active_lanes      branch_if_no_active_lanes +94 (label 3 at #270)
-  177. copy_slot_unmasked             op = star
-  178. copy_4_slots_unmasked          m11, m12, m21, m22 = f1, f2, f3, f4
-  179. copy_slot_unmasked             $32 = f1
-  180. copy_constant                  $33 = 0x40000000 (2.0)
-  181. mul_float                      $32 *= $33
-  182. copy_slot_unmasked             $33 = f2
-  183. copy_constant                  $34 = 0x40000000 (2.0)
-  184. mul_float                      $33 *= $34
-  185. copy_slot_unmasked             $34 = f3
-  186. copy_constant                  $35 = 0x40000000 (2.0)
-  187. mul_float                      $34 *= $35
-  188. copy_slot_unmasked             $35 = f4
-  189. copy_constant                  $36 = 0x40000000 (2.0)
-  190. mul_float                      $35 *= $36
-  191. copy_4_slots_unmasked          expected = $32..35
-  192. copy_constant                  $32 = colorRed(0)
-  193. copy_slot_unmasked             one = $32
-  194. copy_slot_unmasked             $32 = m11
-  195. copy_slot_unmasked             $33 = one
-  196. mul_float                      $32 *= $33
-  197. copy_slot_unmasked             $33 = m12
-  198. copy_slot_unmasked             $34 = one
-  199. mul_float                      $33 *= $34
-  200. copy_slot_unmasked             $34 = m21
-  201. copy_slot_unmasked             $35 = one
-  202. mul_float                      $34 *= $35
-  203. copy_slot_unmasked             $35 = m22
-  204. copy_slot_unmasked             $36 = one
-  205. mul_float                      $35 *= $36
-  206. copy_4_slots_unmasked          m2 = $32..35
-  207. store_loop_mask                $32 = LoopMask
-  208. copy_slot_unmasked             $33 = op
-  209. store_loop_mask                $34 = LoopMask
-  210. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  211. case_op                        if ($33 == 0x00000001) { LoopMask = true; $34 = false; }
-  212. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 11 at #219)
-  213. copy_4_slots_unmasked          $35..38 = m2
-  214. copy_constant                  $39 = 0x3F800000 (1.0)
-  215. swizzle_4                      $39..42 = ($39..42).xxxx
-  216. add_4_floats                   $35..38 += $39..42
-  217. copy_4_slots_masked            m2 = Mask($35..38)
-  218. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  219. label                          label 0x0000000B
-  220. case_op                        if ($33 == 0x00000002) { LoopMask = true; $34 = false; }
-  221. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 12 at #228)
-  222. copy_4_slots_unmasked          $35..38 = m2
-  223. copy_constant                  $39 = 0x3F800000 (1.0)
-  224. swizzle_4                      $39..42 = ($39..42).xxxx
-  225. sub_4_floats                   $35..38 -= $39..42
-  226. copy_4_slots_masked            m2 = Mask($35..38)
-  227. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  228. label                          label 0x0000000C
-  229. case_op                        if ($33 == 0x00000003) { LoopMask = true; $34 = false; }
-  230. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 13 at #237)
-  231. copy_4_slots_unmasked          $35..38 = m2
-  232. copy_constant                  $39 = 0x40000000 (2.0)
-  233. swizzle_4                      $39..42 = ($39..42).xxxx
-  234. mul_4_floats                   $35..38 *= $39..42
-  235. copy_4_slots_masked            m2 = Mask($35..38)
-  236. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  237. label                          label 0x0000000D
-  238. case_op                        if ($33 == 0x00000004) { LoopMask = true; $34 = false; }
-  239. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 14 at #246)
-  240. copy_4_slots_unmasked          $35..38 = m2
-  241. copy_constant                  $39 = 0x3F000000 (0.5)
-  242. swizzle_4                      $39..42 = ($39..42).xxxx
-  243. mul_4_floats                   $35..38 *= $39..42
-  244. copy_4_slots_masked            m2 = Mask($35..38)
-  245. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  246. label                          label 0x0000000E
-  247. load_loop_mask                 LoopMask = $32
-  248. copy_slot_unmasked             $32 = m2(0)
-  249. copy_slot_unmasked             $33 = expected(0)
-  250. cmpeq_float                    $32 = equal($32, $33)
-  251. copy_2_slots_unmasked          $33..34 = m2(0..1)
-  252. swizzle_1                      $33 = ($33..34).y
-  253. copy_2_slots_unmasked          $34..35 = expected(0..1)
-  254. swizzle_1                      $34 = ($34..35).y
-  255. cmpeq_float                    $33 = equal($33, $34)
-  256. bitwise_and_int                $32 &= $33
-  257. copy_slot_unmasked             $33 = m2(2)
-  258. copy_slot_unmasked             $34 = expected(2)
-  259. cmpeq_float                    $33 = equal($33, $34)
-  260. bitwise_and_int                $32 &= $33
-  261. copy_2_slots_unmasked          $33..34 = m2(2..3)
-  262. swizzle_1                      $33 = ($33..34).y
-  263. copy_2_slots_unmasked          $34..35 = expected(2..3)
-  264. swizzle_1                      $34 = ($34..35).y
-  265. cmpeq_float                    $33 = equal($33, $34)
-  266. bitwise_and_int                $32 &= $33
-  267. copy_slot_masked               [test].result = Mask($32)
-  268. label                          label 0x0000000A
-  269. copy_slot_masked               $31 = Mask($32)
-  270. label                          label 0x00000003
-  271. load_condition_mask            CondMask = $17
-  272. zero_slot_unmasked             $44 = 0
-  273. merge_condition_mask           CondMask = $30 & $31
-  274. branch_if_no_active_lanes      branch_if_no_active_lanes +94 (label 2 at #368)
-  275. copy_slot_unmasked             op = slash
-  276. copy_4_slots_unmasked          m11, m12, m21, m22 = f1, f2, f3, f4
-  277. copy_slot_unmasked             $45 = f1
-  278. copy_constant                  $46 = 0x3F000000 (0.5)
-  279. mul_float                      $45 *= $46
-  280. copy_slot_unmasked             $46 = f2
-  281. copy_constant                  $47 = 0x3F000000 (0.5)
-  282. mul_float                      $46 *= $47
-  283. copy_slot_unmasked             $47 = f3
-  284. copy_constant                  $48 = 0x3F000000 (0.5)
-  285. mul_float                      $47 *= $48
-  286. copy_slot_unmasked             $48 = f4
-  287. copy_constant                  $49 = 0x3F000000 (0.5)
-  288. mul_float                      $48 *= $49
-  289. copy_4_slots_unmasked          expected = $45..48
-  290. copy_constant                  $45 = colorRed(0)
-  291. copy_slot_unmasked             one = $45
-  292. copy_slot_unmasked             $45 = m11
-  293. copy_slot_unmasked             $46 = one
-  294. mul_float                      $45 *= $46
-  295. copy_slot_unmasked             $46 = m12
-  296. copy_slot_unmasked             $47 = one
-  297. mul_float                      $46 *= $47
-  298. copy_slot_unmasked             $47 = m21
-  299. copy_slot_unmasked             $48 = one
-  300. mul_float                      $47 *= $48
-  301. copy_slot_unmasked             $48 = m22
-  302. copy_slot_unmasked             $49 = one
-  303. mul_float                      $48 *= $49
-  304. copy_4_slots_unmasked          m2 = $45..48
-  305. store_loop_mask                $45 = LoopMask
-  306. copy_slot_unmasked             $46 = op
-  307. store_loop_mask                $47 = LoopMask
-  308. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  309. case_op                        if ($46 == 0x00000001) { LoopMask = true; $47 = false; }
-  310. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 16 at #317)
-  311. copy_4_slots_unmasked          $48..51 = m2
-  312. copy_constant                  $52 = 0x3F800000 (1.0)
-  313. swizzle_4                      $52..55 = ($52..55).xxxx
-  314. add_4_floats                   $48..51 += $52..55
-  315. copy_4_slots_masked            m2 = Mask($48..51)
-  316. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  317. label                          label 0x00000010
-  318. case_op                        if ($46 == 0x00000002) { LoopMask = true; $47 = false; }
-  319. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 17 at #326)
-  320. copy_4_slots_unmasked          $48..51 = m2
-  321. copy_constant                  $52 = 0x3F800000 (1.0)
-  322. swizzle_4                      $52..55 = ($52..55).xxxx
-  323. sub_4_floats                   $48..51 -= $52..55
-  324. copy_4_slots_masked            m2 = Mask($48..51)
-  325. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  326. label                          label 0x00000011
-  327. case_op                        if ($46 == 0x00000003) { LoopMask = true; $47 = false; }
-  328. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 18 at #335)
-  329. copy_4_slots_unmasked          $48..51 = m2
-  330. copy_constant                  $52 = 0x40000000 (2.0)
-  331. swizzle_4                      $52..55 = ($52..55).xxxx
-  332. mul_4_floats                   $48..51 *= $52..55
-  333. copy_4_slots_masked            m2 = Mask($48..51)
-  334. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  335. label                          label 0x00000012
-  336. case_op                        if ($46 == 0x00000004) { LoopMask = true; $47 = false; }
-  337. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 19 at #344)
-  338. copy_4_slots_unmasked          $48..51 = m2
-  339. copy_constant                  $52 = 0x3F000000 (0.5)
-  340. swizzle_4                      $52..55 = ($52..55).xxxx
-  341. mul_4_floats                   $48..51 *= $52..55
-  342. copy_4_slots_masked            m2 = Mask($48..51)
-  343. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
-  344. label                          label 0x00000013
-  345. load_loop_mask                 LoopMask = $45
-  346. copy_slot_unmasked             $45 = m2(0)
-  347. copy_slot_unmasked             $46 = expected(0)
-  348. cmpeq_float                    $45 = equal($45, $46)
-  349. copy_2_slots_unmasked          $46..47 = m2(0..1)
-  350. swizzle_1                      $46 = ($46..47).y
-  351. copy_2_slots_unmasked          $47..48 = expected(0..1)
-  352. swizzle_1                      $47 = ($47..48).y
-  353. cmpeq_float                    $46 = equal($46, $47)
-  354. bitwise_and_int                $45 &= $46
-  355. copy_slot_unmasked             $46 = m2(2)
-  356. copy_slot_unmasked             $47 = expected(2)
-  357. cmpeq_float                    $46 = equal($46, $47)
-  358. bitwise_and_int                $45 &= $46
-  359. copy_2_slots_unmasked          $46..47 = m2(2..3)
-  360. swizzle_1                      $46 = ($46..47).y
-  361. copy_2_slots_unmasked          $47..48 = expected(2..3)
-  362. swizzle_1                      $47 = ($47..48).y
-  363. cmpeq_float                    $46 = equal($46, $47)
-  364. bitwise_and_int                $45 &= $46
-  365. copy_slot_masked               [test].result = Mask($45)
-  366. label                          label 0x0000000F
-  367. copy_slot_masked               $44 = Mask($45)
-  368. label                          label 0x00000002
-  369. load_condition_mask            CondMask = $30
-  370. zero_slot_unmasked             $0 = 0
-  371. merge_condition_mask           CondMask = $43 & $44
-  372. branch_if_no_active_lanes      branch_if_no_active_lanes +46 (label 1 at #418)
-  373. copy_constant                  $1 = colorRed(0)
-  374. copy_constant                  $2 = 0x41200000 (10.0)
-  375. mul_float                      $1 *= $2
-  376. copy_slot_unmasked             ten = $1
-  377. copy_slot_unmasked             $2 = $1
-  378. copy_slot_unmasked             $3 = ten
-  379. copy_slot_unmasked             $4 = $3
-  380. copy_4_slots_unmasked          mat = $1..4
-  381. copy_constant                  $5 = 0x3F800000 (1.0)
-  382. copy_constant                  $6 = testInputs(0)
-  383. div_float                      $5 /= $6
-  384. swizzle_4                      $5..8 = ($5..8).xxxx
-  385. mul_4_floats                   $1..4 *= $5..8
-  386. copy_4_slots_unmasked          div = $1..4
-  387. copy_4_slots_unmasked          $1..4 = mat
-  388. copy_constant                  $5 = 0x3F800000 (1.0)
-  389. copy_constant                  $6 = testInputs(0)
-  390. div_float                      $5 /= $6
-  391. swizzle_4                      $5..8 = ($5..8).xxxx
-  392. mul_4_floats                   $1..4 *= $5..8
-  393. copy_4_slots_masked            mat = Mask($1..4)
-  394. copy_4_slots_unmasked          $1..4 = div
-  395. copy_constant                  $5 = 0x41000000 (8.0)
-  396. swizzle_4                      $5..8 = ($5..8).xxxx
-  397. add_4_floats                   $1..4 += $5..8
-  398. abs_4_floats                   $1..4 = abs($1..4)
-  399. copy_constant                  $5 = 0x3C23D70A (0.01)
-  400. swizzle_4                      $5..8 = ($5..8).xxxx
-  401. cmplt_4_floats                 $1..4 = lessThan($1..4, $5..8)
-  402. bitwise_and_2_ints             $1..2 &= $3..4
-  403. bitwise_and_int                $1 &= $2
-  404. copy_4_slots_unmasked          $2..5 = mat
-  405. copy_constant                  $6 = 0x41000000 (8.0)
-  406. swizzle_4                      $6..9 = ($6..9).xxxx
-  407. add_4_floats                   $2..5 += $6..9
-  408. abs_4_floats                   $2..5 = abs($2..5)
-  409. copy_constant                  $6 = 0x3C23D70A (0.01)
-  410. swizzle_4                      $6..9 = ($6..9).xxxx
-  411. cmplt_4_floats                 $2..5 = lessThan($2..5, $6..9)
-  412. bitwise_and_2_ints             $2..3 &= $4..5
-  413. bitwise_and_int                $2 &= $3
-  414. bitwise_and_int                $1 &= $2
-  415. copy_slot_masked               [divisionTest].result = Mask($1)
-  416. label                          label 0x00000014
-  417. copy_slot_masked               $0 = Mask($1)
-  418. label                          label 0x00000001
-  419. load_condition_mask            CondMask = $43
-  420. swizzle_4                      $0..3 = ($0..3).xxxx
-  421. copy_4_constants               $4..7 = colorRed
-  422. copy_4_constants               $8..11 = colorGreen
-  423. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  424. copy_4_slots_unmasked          [main].result = $0..3
-  425. load_src                       src.rgba = [main].result
+   48. copy_constant                  $4 = 0x3F800000 (1.0)
+   49. swizzle_4                      $4..7 = ($4..7).xxxx
+   50. add_4_floats                   $0..3 += $4..7
+   51. copy_4_slots_unmasked          _2_m2 = $0..3
+   52. store_condition_mask           $43 = CondMask
+   53. store_condition_mask           $30 = CondMask
+   54. store_condition_mask           $17 = CondMask
+   55. store_condition_mask           $12 = CondMask
+   56. copy_slot_unmasked             $13 = _2_m2(0)
+   57. copy_slot_unmasked             $14 = _0_expected(0)
+   58. cmpeq_float                    $13 = equal($13, $14)
+   59. copy_2_slots_unmasked          $14..15 = _2_m2(0..1)
+   60. swizzle_1                      $14 = ($14..15).y
+   61. copy_2_slots_unmasked          $15..16 = _0_expected(0..1)
+   62. swizzle_1                      $15 = ($15..16).y
+   63. cmpeq_float                    $14 = equal($14, $15)
+   64. bitwise_and_int                $13 &= $14
+   65. copy_slot_unmasked             $14 = _2_m2(2)
+   66. copy_slot_unmasked             $15 = _0_expected(2)
+   67. cmpeq_float                    $14 = equal($14, $15)
+   68. bitwise_and_int                $13 &= $14
+   69. copy_2_slots_unmasked          $14..15 = _2_m2(2..3)
+   70. swizzle_1                      $14 = ($14..15).y
+   71. copy_2_slots_unmasked          $15..16 = _0_expected(2..3)
+   72. swizzle_1                      $15 = ($15..16).y
+   73. cmpeq_float                    $14 = equal($14, $15)
+   74. bitwise_and_int                $13 &= $14
+   75. zero_slot_unmasked             $18 = 0
+   76. merge_condition_mask           CondMask = $12 & $13
+   77. branch_if_no_active_lanes      branch_if_no_active_lanes +94 (label 4 at #171)
+   78. copy_slot_unmasked             op = minus
+   79. copy_4_slots_unmasked          m11, m12, m21, m22 = f1, f2, f3, f4
+   80. copy_slot_unmasked             $19 = f1
+   81. copy_constant                  $20 = 0x3F800000 (1.0)
+   82. sub_float                      $19 -= $20
+   83. copy_slot_unmasked             $20 = f2
+   84. copy_constant                  $21 = 0x3F800000 (1.0)
+   85. sub_float                      $20 -= $21
+   86. copy_slot_unmasked             $21 = f3
+   87. copy_constant                  $22 = 0x3F800000 (1.0)
+   88. sub_float                      $21 -= $22
+   89. copy_slot_unmasked             $22 = f4
+   90. copy_constant                  $23 = 0x3F800000 (1.0)
+   91. sub_float                      $22 -= $23
+   92. copy_4_slots_unmasked          expected = $19..22
+   93. copy_constant                  $19 = colorRed(0)
+   94. copy_slot_unmasked             one = $19
+   95. copy_slot_unmasked             $19 = m11
+   96. copy_slot_unmasked             $20 = one
+   97. mul_float                      $19 *= $20
+   98. copy_slot_unmasked             $20 = m12
+   99. copy_slot_unmasked             $21 = one
+  100. mul_float                      $20 *= $21
+  101. copy_slot_unmasked             $21 = m21
+  102. copy_slot_unmasked             $22 = one
+  103. mul_float                      $21 *= $22
+  104. copy_slot_unmasked             $22 = m22
+  105. copy_slot_unmasked             $23 = one
+  106. mul_float                      $22 *= $23
+  107. copy_4_slots_unmasked          m2 = $19..22
+  108. store_loop_mask                $19 = LoopMask
+  109. copy_slot_unmasked             $20 = op
+  110. store_loop_mask                $21 = LoopMask
+  111. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  112. case_op                        if ($20 == 0x00000001) { LoopMask = true; $21 = false; }
+  113. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 6 at #120)
+  114. copy_4_slots_unmasked          $22..25 = m2
+  115. copy_constant                  $26 = 0x3F800000 (1.0)
+  116. swizzle_4                      $26..29 = ($26..29).xxxx
+  117. add_4_floats                   $22..25 += $26..29
+  118. copy_4_slots_masked            m2 = Mask($22..25)
+  119. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  120. label                          label 0x00000006
+  121. case_op                        if ($20 == 0x00000002) { LoopMask = true; $21 = false; }
+  122. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 7 at #129)
+  123. copy_4_slots_unmasked          $22..25 = m2
+  124. copy_constant                  $26 = 0x3F800000 (1.0)
+  125. swizzle_4                      $26..29 = ($26..29).xxxx
+  126. sub_4_floats                   $22..25 -= $26..29
+  127. copy_4_slots_masked            m2 = Mask($22..25)
+  128. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  129. label                          label 0x00000007
+  130. case_op                        if ($20 == 0x00000003) { LoopMask = true; $21 = false; }
+  131. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 8 at #138)
+  132. copy_4_slots_unmasked          $22..25 = m2
+  133. copy_constant                  $26 = 0x40000000 (2.0)
+  134. swizzle_4                      $26..29 = ($26..29).xxxx
+  135. mul_4_floats                   $22..25 *= $26..29
+  136. copy_4_slots_masked            m2 = Mask($22..25)
+  137. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  138. label                          label 0x00000008
+  139. case_op                        if ($20 == 0x00000004) { LoopMask = true; $21 = false; }
+  140. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 9 at #147)
+  141. copy_4_slots_unmasked          $22..25 = m2
+  142. copy_constant                  $26 = 0x3F000000 (0.5)
+  143. swizzle_4                      $26..29 = ($26..29).xxxx
+  144. mul_4_floats                   $22..25 *= $26..29
+  145. copy_4_slots_masked            m2 = Mask($22..25)
+  146. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  147. label                          label 0x00000009
+  148. load_loop_mask                 LoopMask = $19
+  149. copy_slot_unmasked             $19 = m2(0)
+  150. copy_slot_unmasked             $20 = expected(0)
+  151. cmpeq_float                    $19 = equal($19, $20)
+  152. copy_2_slots_unmasked          $20..21 = m2(0..1)
+  153. swizzle_1                      $20 = ($20..21).y
+  154. copy_2_slots_unmasked          $21..22 = expected(0..1)
+  155. swizzle_1                      $21 = ($21..22).y
+  156. cmpeq_float                    $20 = equal($20, $21)
+  157. bitwise_and_int                $19 &= $20
+  158. copy_slot_unmasked             $20 = m2(2)
+  159. copy_slot_unmasked             $21 = expected(2)
+  160. cmpeq_float                    $20 = equal($20, $21)
+  161. bitwise_and_int                $19 &= $20
+  162. copy_2_slots_unmasked          $20..21 = m2(2..3)
+  163. swizzle_1                      $20 = ($20..21).y
+  164. copy_2_slots_unmasked          $21..22 = expected(2..3)
+  165. swizzle_1                      $21 = ($21..22).y
+  166. cmpeq_float                    $20 = equal($20, $21)
+  167. bitwise_and_int                $19 &= $20
+  168. copy_slot_masked               [test].result = Mask($19)
+  169. label                          label 0x00000005
+  170. copy_slot_masked               $18 = Mask($19)
+  171. label                          label 0x00000004
+  172. load_condition_mask            CondMask = $12
+  173. zero_slot_unmasked             $31 = 0
+  174. merge_condition_mask           CondMask = $17 & $18
+  175. branch_if_no_active_lanes      branch_if_no_active_lanes +94 (label 3 at #269)
+  176. copy_slot_unmasked             op = star
+  177. copy_4_slots_unmasked          m11, m12, m21, m22 = f1, f2, f3, f4
+  178. copy_slot_unmasked             $32 = f1
+  179. copy_constant                  $33 = 0x40000000 (2.0)
+  180. mul_float                      $32 *= $33
+  181. copy_slot_unmasked             $33 = f2
+  182. copy_constant                  $34 = 0x40000000 (2.0)
+  183. mul_float                      $33 *= $34
+  184. copy_slot_unmasked             $34 = f3
+  185. copy_constant                  $35 = 0x40000000 (2.0)
+  186. mul_float                      $34 *= $35
+  187. copy_slot_unmasked             $35 = f4
+  188. copy_constant                  $36 = 0x40000000 (2.0)
+  189. mul_float                      $35 *= $36
+  190. copy_4_slots_unmasked          expected = $32..35
+  191. copy_constant                  $32 = colorRed(0)
+  192. copy_slot_unmasked             one = $32
+  193. copy_slot_unmasked             $32 = m11
+  194. copy_slot_unmasked             $33 = one
+  195. mul_float                      $32 *= $33
+  196. copy_slot_unmasked             $33 = m12
+  197. copy_slot_unmasked             $34 = one
+  198. mul_float                      $33 *= $34
+  199. copy_slot_unmasked             $34 = m21
+  200. copy_slot_unmasked             $35 = one
+  201. mul_float                      $34 *= $35
+  202. copy_slot_unmasked             $35 = m22
+  203. copy_slot_unmasked             $36 = one
+  204. mul_float                      $35 *= $36
+  205. copy_4_slots_unmasked          m2 = $32..35
+  206. store_loop_mask                $32 = LoopMask
+  207. copy_slot_unmasked             $33 = op
+  208. store_loop_mask                $34 = LoopMask
+  209. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  210. case_op                        if ($33 == 0x00000001) { LoopMask = true; $34 = false; }
+  211. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 11 at #218)
+  212. copy_4_slots_unmasked          $35..38 = m2
+  213. copy_constant                  $39 = 0x3F800000 (1.0)
+  214. swizzle_4                      $39..42 = ($39..42).xxxx
+  215. add_4_floats                   $35..38 += $39..42
+  216. copy_4_slots_masked            m2 = Mask($35..38)
+  217. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  218. label                          label 0x0000000B
+  219. case_op                        if ($33 == 0x00000002) { LoopMask = true; $34 = false; }
+  220. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 12 at #227)
+  221. copy_4_slots_unmasked          $35..38 = m2
+  222. copy_constant                  $39 = 0x3F800000 (1.0)
+  223. swizzle_4                      $39..42 = ($39..42).xxxx
+  224. sub_4_floats                   $35..38 -= $39..42
+  225. copy_4_slots_masked            m2 = Mask($35..38)
+  226. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  227. label                          label 0x0000000C
+  228. case_op                        if ($33 == 0x00000003) { LoopMask = true; $34 = false; }
+  229. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 13 at #236)
+  230. copy_4_slots_unmasked          $35..38 = m2
+  231. copy_constant                  $39 = 0x40000000 (2.0)
+  232. swizzle_4                      $39..42 = ($39..42).xxxx
+  233. mul_4_floats                   $35..38 *= $39..42
+  234. copy_4_slots_masked            m2 = Mask($35..38)
+  235. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  236. label                          label 0x0000000D
+  237. case_op                        if ($33 == 0x00000004) { LoopMask = true; $34 = false; }
+  238. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 14 at #245)
+  239. copy_4_slots_unmasked          $35..38 = m2
+  240. copy_constant                  $39 = 0x3F000000 (0.5)
+  241. swizzle_4                      $39..42 = ($39..42).xxxx
+  242. mul_4_floats                   $35..38 *= $39..42
+  243. copy_4_slots_masked            m2 = Mask($35..38)
+  244. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  245. label                          label 0x0000000E
+  246. load_loop_mask                 LoopMask = $32
+  247. copy_slot_unmasked             $32 = m2(0)
+  248. copy_slot_unmasked             $33 = expected(0)
+  249. cmpeq_float                    $32 = equal($32, $33)
+  250. copy_2_slots_unmasked          $33..34 = m2(0..1)
+  251. swizzle_1                      $33 = ($33..34).y
+  252. copy_2_slots_unmasked          $34..35 = expected(0..1)
+  253. swizzle_1                      $34 = ($34..35).y
+  254. cmpeq_float                    $33 = equal($33, $34)
+  255. bitwise_and_int                $32 &= $33
+  256. copy_slot_unmasked             $33 = m2(2)
+  257. copy_slot_unmasked             $34 = expected(2)
+  258. cmpeq_float                    $33 = equal($33, $34)
+  259. bitwise_and_int                $32 &= $33
+  260. copy_2_slots_unmasked          $33..34 = m2(2..3)
+  261. swizzle_1                      $33 = ($33..34).y
+  262. copy_2_slots_unmasked          $34..35 = expected(2..3)
+  263. swizzle_1                      $34 = ($34..35).y
+  264. cmpeq_float                    $33 = equal($33, $34)
+  265. bitwise_and_int                $32 &= $33
+  266. copy_slot_masked               [test].result = Mask($32)
+  267. label                          label 0x0000000A
+  268. copy_slot_masked               $31 = Mask($32)
+  269. label                          label 0x00000003
+  270. load_condition_mask            CondMask = $17
+  271. zero_slot_unmasked             $44 = 0
+  272. merge_condition_mask           CondMask = $30 & $31
+  273. branch_if_no_active_lanes      branch_if_no_active_lanes +94 (label 2 at #367)
+  274. copy_slot_unmasked             op = slash
+  275. copy_4_slots_unmasked          m11, m12, m21, m22 = f1, f2, f3, f4
+  276. copy_slot_unmasked             $45 = f1
+  277. copy_constant                  $46 = 0x3F000000 (0.5)
+  278. mul_float                      $45 *= $46
+  279. copy_slot_unmasked             $46 = f2
+  280. copy_constant                  $47 = 0x3F000000 (0.5)
+  281. mul_float                      $46 *= $47
+  282. copy_slot_unmasked             $47 = f3
+  283. copy_constant                  $48 = 0x3F000000 (0.5)
+  284. mul_float                      $47 *= $48
+  285. copy_slot_unmasked             $48 = f4
+  286. copy_constant                  $49 = 0x3F000000 (0.5)
+  287. mul_float                      $48 *= $49
+  288. copy_4_slots_unmasked          expected = $45..48
+  289. copy_constant                  $45 = colorRed(0)
+  290. copy_slot_unmasked             one = $45
+  291. copy_slot_unmasked             $45 = m11
+  292. copy_slot_unmasked             $46 = one
+  293. mul_float                      $45 *= $46
+  294. copy_slot_unmasked             $46 = m12
+  295. copy_slot_unmasked             $47 = one
+  296. mul_float                      $46 *= $47
+  297. copy_slot_unmasked             $47 = m21
+  298. copy_slot_unmasked             $48 = one
+  299. mul_float                      $47 *= $48
+  300. copy_slot_unmasked             $48 = m22
+  301. copy_slot_unmasked             $49 = one
+  302. mul_float                      $48 *= $49
+  303. copy_4_slots_unmasked          m2 = $45..48
+  304. store_loop_mask                $45 = LoopMask
+  305. copy_slot_unmasked             $46 = op
+  306. store_loop_mask                $47 = LoopMask
+  307. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  308. case_op                        if ($46 == 0x00000001) { LoopMask = true; $47 = false; }
+  309. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 16 at #316)
+  310. copy_4_slots_unmasked          $48..51 = m2
+  311. copy_constant                  $52 = 0x3F800000 (1.0)
+  312. swizzle_4                      $52..55 = ($52..55).xxxx
+  313. add_4_floats                   $48..51 += $52..55
+  314. copy_4_slots_masked            m2 = Mask($48..51)
+  315. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  316. label                          label 0x00000010
+  317. case_op                        if ($46 == 0x00000002) { LoopMask = true; $47 = false; }
+  318. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 17 at #325)
+  319. copy_4_slots_unmasked          $48..51 = m2
+  320. copy_constant                  $52 = 0x3F800000 (1.0)
+  321. swizzle_4                      $52..55 = ($52..55).xxxx
+  322. sub_4_floats                   $48..51 -= $52..55
+  323. copy_4_slots_masked            m2 = Mask($48..51)
+  324. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  325. label                          label 0x00000011
+  326. case_op                        if ($46 == 0x00000003) { LoopMask = true; $47 = false; }
+  327. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 18 at #334)
+  328. copy_4_slots_unmasked          $48..51 = m2
+  329. copy_constant                  $52 = 0x40000000 (2.0)
+  330. swizzle_4                      $52..55 = ($52..55).xxxx
+  331. mul_4_floats                   $48..51 *= $52..55
+  332. copy_4_slots_masked            m2 = Mask($48..51)
+  333. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  334. label                          label 0x00000012
+  335. case_op                        if ($46 == 0x00000004) { LoopMask = true; $47 = false; }
+  336. branch_if_no_active_lanes      branch_if_no_active_lanes +7 (label 19 at #343)
+  337. copy_4_slots_unmasked          $48..51 = m2
+  338. copy_constant                  $52 = 0x3F000000 (0.5)
+  339. swizzle_4                      $52..55 = ($52..55).xxxx
+  340. mul_4_floats                   $48..51 *= $52..55
+  341. copy_4_slots_masked            m2 = Mask($48..51)
+  342. mask_off_loop_mask             LoopMask &= ~(CondMask & LoopMask & RetMask)
+  343. label                          label 0x00000013
+  344. load_loop_mask                 LoopMask = $45
+  345. copy_slot_unmasked             $45 = m2(0)
+  346. copy_slot_unmasked             $46 = expected(0)
+  347. cmpeq_float                    $45 = equal($45, $46)
+  348. copy_2_slots_unmasked          $46..47 = m2(0..1)
+  349. swizzle_1                      $46 = ($46..47).y
+  350. copy_2_slots_unmasked          $47..48 = expected(0..1)
+  351. swizzle_1                      $47 = ($47..48).y
+  352. cmpeq_float                    $46 = equal($46, $47)
+  353. bitwise_and_int                $45 &= $46
+  354. copy_slot_unmasked             $46 = m2(2)
+  355. copy_slot_unmasked             $47 = expected(2)
+  356. cmpeq_float                    $46 = equal($46, $47)
+  357. bitwise_and_int                $45 &= $46
+  358. copy_2_slots_unmasked          $46..47 = m2(2..3)
+  359. swizzle_1                      $46 = ($46..47).y
+  360. copy_2_slots_unmasked          $47..48 = expected(2..3)
+  361. swizzle_1                      $47 = ($47..48).y
+  362. cmpeq_float                    $46 = equal($46, $47)
+  363. bitwise_and_int                $45 &= $46
+  364. copy_slot_masked               [test].result = Mask($45)
+  365. label                          label 0x0000000F
+  366. copy_slot_masked               $44 = Mask($45)
+  367. label                          label 0x00000002
+  368. load_condition_mask            CondMask = $30
+  369. zero_slot_unmasked             $0 = 0
+  370. merge_condition_mask           CondMask = $43 & $44
+  371. branch_if_no_active_lanes      branch_if_no_active_lanes +46 (label 1 at #417)
+  372. copy_constant                  $1 = colorRed(0)
+  373. copy_constant                  $2 = 0x41200000 (10.0)
+  374. mul_float                      $1 *= $2
+  375. copy_slot_unmasked             ten = $1
+  376. copy_slot_unmasked             $2 = $1
+  377. copy_slot_unmasked             $3 = ten
+  378. copy_slot_unmasked             $4 = $3
+  379. copy_4_slots_unmasked          mat = $1..4
+  380. copy_constant                  $5 = 0x3F800000 (1.0)
+  381. copy_constant                  $6 = testInputs(0)
+  382. div_float                      $5 /= $6
+  383. swizzle_4                      $5..8 = ($5..8).xxxx
+  384. mul_4_floats                   $1..4 *= $5..8
+  385. copy_4_slots_unmasked          div = $1..4
+  386. copy_4_slots_unmasked          $1..4 = mat
+  387. copy_constant                  $5 = 0x3F800000 (1.0)
+  388. copy_constant                  $6 = testInputs(0)
+  389. div_float                      $5 /= $6
+  390. swizzle_4                      $5..8 = ($5..8).xxxx
+  391. mul_4_floats                   $1..4 *= $5..8
+  392. copy_4_slots_masked            mat = Mask($1..4)
+  393. copy_4_slots_unmasked          $1..4 = div
+  394. copy_constant                  $5 = 0x41000000 (8.0)
+  395. swizzle_4                      $5..8 = ($5..8).xxxx
+  396. add_4_floats                   $1..4 += $5..8
+  397. abs_4_floats                   $1..4 = abs($1..4)
+  398. copy_constant                  $5 = 0x3C23D70A (0.01)
+  399. swizzle_4                      $5..8 = ($5..8).xxxx
+  400. cmplt_4_floats                 $1..4 = lessThan($1..4, $5..8)
+  401. bitwise_and_2_ints             $1..2 &= $3..4
+  402. bitwise_and_int                $1 &= $2
+  403. copy_4_slots_unmasked          $2..5 = mat
+  404. copy_constant                  $6 = 0x41000000 (8.0)
+  405. swizzle_4                      $6..9 = ($6..9).xxxx
+  406. add_4_floats                   $2..5 += $6..9
+  407. abs_4_floats                   $2..5 = abs($2..5)
+  408. copy_constant                  $6 = 0x3C23D70A (0.01)
+  409. swizzle_4                      $6..9 = ($6..9).xxxx
+  410. cmplt_4_floats                 $2..5 = lessThan($2..5, $6..9)
+  411. bitwise_and_2_ints             $2..3 &= $4..5
+  412. bitwise_and_int                $2 &= $3
+  413. bitwise_and_int                $1 &= $2
+  414. copy_slot_masked               [divisionTest].result = Mask($1)
+  415. label                          label 0x00000014
+  416. copy_slot_masked               $0 = Mask($1)
+  417. label                          label 0x00000001
+  418. load_condition_mask            CondMask = $43
+  419. swizzle_4                      $0..3 = ($0..3).xxxx
+  420. copy_4_constants               $4..7 = colorRed
+  421. copy_4_constants               $8..11 = colorGreen
+  422. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  423. copy_4_slots_unmasked          [main].result = $0..3
+  424. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/OutParams.skrp b/tests/sksl/shared/OutParams.skrp
index b70843a..2149e0c 100644
--- a/tests/sksl/shared/OutParams.skrp
+++ b/tests/sksl/shared/OutParams.skrp
@@ -30,278 +30,275 @@
    30. copy_constant                  $0 = colorWhite(1)
    31. copy_slot_unmasked             $1 = $0
    32. copy_2_slots_unmasked          v₁ = $0..1
-   33. copy_slot_unmasked             h3(0) = $0
-   34. copy_slot_unmasked             h3(2) = $1
-   35. label                          label 0x00000005
-   36. copy_constant                  $0 = colorWhite(3)
-   37. swizzle_4                      $0..3 = ($0..3).xxxx
-   38. copy_4_slots_unmasked          v₃ = $0..3
-   39. copy_2_slots_unmasked          h4(2..3) = $0..1
-   40. copy_2_slots_unmasked          h4(0..1) = $2..3
-   41. label                          label 0x00000006
-   42. zero_4_slots_unmasked          h2x2 = 0
-   43. zero_slot_unmasked             $0 = 0
-   44. copy_constant                  $1 = colorWhite(0)
-   45. swizzle_4                      $0..3 = ($0..3).yxxy
-   46. copy_4_slots_unmasked          v₄ = $0..3
-   47. copy_4_slots_unmasked          h2x2 = $0..3
-   48. label                          label 0x00000007
-   49. zero_4_slots_unmasked          h3x3(0..3) = 0
-   50. zero_4_slots_unmasked          h3x3(4..7) = 0
-   51. zero_slot_unmasked             h3x3(8) = 0
-   52. zero_slot_unmasked             $0 = 0
-   53. copy_constant                  $1 = colorWhite(1)
-   54. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
-   55. copy_4_slots_unmasked          v₅(0..3) = $0..3
-   56. copy_4_slots_unmasked          v₅(4..7) = $4..7
-   57. copy_slot_unmasked             v₅(8) = $8
-   58. copy_4_slots_unmasked          h3x3(0..3) = $0..3
-   59. copy_4_slots_unmasked          h3x3(4..7) = $4..7
-   60. copy_slot_unmasked             h3x3(8) = $8
-   61. label                          label 0x00000008
-   62. zero_4_slots_unmasked          h4x4(0..3) = 0
-   63. zero_4_slots_unmasked          h4x4(4..7) = 0
-   64. zero_4_slots_unmasked          h4x4(8..11) = 0
-   65. zero_4_slots_unmasked          h4x4(12..15) = 0
-   66. zero_slot_unmasked             $0 = 0
-   67. copy_constant                  $1 = colorWhite(2)
-   68. shuffle                        $0..15 = ($0..15)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1]
-   69. copy_4_slots_unmasked          v₆(0..3) = $0..3
-   70. copy_4_slots_unmasked          v₆(4..7) = $4..7
-   71. copy_4_slots_unmasked          v₆(8..11) = $8..11
-   72. copy_4_slots_unmasked          v₆(12..15) = $12..15
-   73. copy_4_slots_unmasked          h4x4(0..3) = $0..3
-   74. copy_4_slots_unmasked          h4x4(4..7) = $4..7
-   75. copy_4_slots_unmasked          h4x4(8..11) = $8..11
-   76. copy_4_slots_unmasked          h4x4(12..15) = $12..15
-   77. label                          label 0x00000009
-   78. copy_constant                  $0 = colorWhite(2)
-   79. swizzle_3                      $0..2 = ($0..2).xxx
-   80. copy_3_slots_unmasked          v₂ = $0..2
-   81. copy_3_slots_unmasked          h3x3(3..5) = $0..2
-   82. label                          label 0x0000000A
-   83. copy_constant                  $0 = colorWhite(0)
-   84. copy_slot_unmasked             v = $0
-   85. copy_slot_unmasked             h4x4(15) = $0
-   86. label                          label 0x0000000B
-   87. copy_constant                  $0 = colorWhite(0)
-   88. copy_slot_unmasked             v = $0
-   89. copy_slot_unmasked             h2x2(0) = $0
-   90. label                          label 0x0000000C
-   91. zero_slot_unmasked             i = 0
-   92. copy_constant                  $0 = colorWhite(0)
-   93. cast_to_int_from_float         $0 = FloatToInt($0)
-   94. copy_slot_unmasked             v₇ = $0
-   95. copy_slot_unmasked             i = $0
-   96. label                          label 0x0000000D
-   97. zero_2_slots_unmasked          i2 = 0
-   98. copy_constant                  $0 = colorWhite(1)
-   99. cast_to_int_from_float         $0 = FloatToInt($0)
-  100. copy_slot_unmasked             $1 = $0
-  101. copy_2_slots_unmasked          v₈ = $0..1
-  102. copy_2_slots_unmasked          i2 = $0..1
-  103. label                          label 0x0000000E
-  104. zero_3_slots_unmasked          i3 = 0
-  105. copy_constant                  $0 = colorWhite(2)
-  106. cast_to_int_from_float         $0 = FloatToInt($0)
-  107. swizzle_3                      $0..2 = ($0..2).xxx
-  108. copy_3_slots_unmasked          v₉ = $0..2
-  109. copy_3_slots_unmasked          i3 = $0..2
-  110. label                          label 0x0000000F
-  111. zero_4_slots_unmasked          i4 = 0
-  112. copy_constant                  $0 = colorWhite(3)
-  113. cast_to_int_from_float         $0 = FloatToInt($0)
-  114. swizzle_4                      $0..3 = ($0..3).xxxx
-  115. copy_4_slots_unmasked          v₁₀ = $0..3
-  116. copy_4_slots_unmasked          i4 = $0..3
-  117. label                          label 0x00000010
-  118. copy_constant                  $0 = colorWhite(2)
-  119. cast_to_int_from_float         $0 = FloatToInt($0)
-  120. swizzle_3                      $0..2 = ($0..2).xxx
-  121. copy_3_slots_unmasked          v₉ = $0..2
-  122. copy_3_slots_unmasked          i4(0..2) = $0..2
-  123. label                          label 0x00000011
-  124. copy_constant                  $0 = colorWhite(0)
-  125. cast_to_int_from_float         $0 = FloatToInt($0)
-  126. copy_slot_unmasked             v₇ = $0
-  127. copy_slot_unmasked             i2(1) = $0
-  128. label                          label 0x00000012
-  129. zero_slot_unmasked             f = 0
-  130. copy_constant                  $0 = colorWhite(0)
-  131. copy_slot_unmasked             v₁₁ = $0
-  132. copy_slot_unmasked             f = $0
-  133. label                          label 0x00000013
-  134. zero_2_slots_unmasked          f2 = 0
-  135. copy_constant                  $0 = colorWhite(1)
-  136. copy_slot_unmasked             $1 = $0
-  137. copy_2_slots_unmasked          v₁₂ = $0..1
-  138. copy_2_slots_unmasked          f2 = $0..1
-  139. label                          label 0x00000014
-  140. zero_3_slots_unmasked          f3 = 0
-  141. copy_constant                  $0 = colorWhite(2)
-  142. swizzle_3                      $0..2 = ($0..2).xxx
-  143. copy_3_slots_unmasked          v₁₃ = $0..2
-  144. copy_3_slots_unmasked          f3 = $0..2
-  145. label                          label 0x00000015
-  146. zero_4_slots_unmasked          f4 = 0
-  147. copy_constant                  $0 = colorWhite(3)
-  148. swizzle_4                      $0..3 = ($0..3).xxxx
-  149. copy_4_slots_unmasked          v₁₄ = $0..3
-  150. copy_4_slots_unmasked          f4 = $0..3
-  151. label                          label 0x00000016
-  152. copy_constant                  $0 = colorWhite(1)
-  153. copy_slot_unmasked             $1 = $0
-  154. copy_2_slots_unmasked          v₁₂ = $0..1
-  155. copy_2_slots_unmasked          f3(0..1) = $0..1
-  156. label                          label 0x00000017
-  157. copy_constant                  $0 = colorWhite(0)
-  158. copy_slot_unmasked             v₁₁ = $0
-  159. copy_slot_unmasked             f2(0) = $0
-  160. label                          label 0x00000018
-  161. zero_4_slots_unmasked          f2x2 = 0
-  162. zero_slot_unmasked             $0 = 0
-  163. copy_constant                  $1 = colorWhite(0)
-  164. swizzle_4                      $0..3 = ($0..3).yxxy
-  165. copy_4_slots_unmasked          v₁₅ = $0..3
-  166. copy_4_slots_unmasked          f2x2 = $0..3
-  167. label                          label 0x00000019
-  168. zero_4_slots_unmasked          f3x3(0..3) = 0
-  169. zero_4_slots_unmasked          f3x3(4..7) = 0
-  170. zero_slot_unmasked             f3x3(8) = 0
-  171. zero_slot_unmasked             $0 = 0
-  172. copy_constant                  $1 = colorWhite(1)
-  173. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
-  174. copy_4_slots_unmasked          v₁₆(0..3) = $0..3
-  175. copy_4_slots_unmasked          v₁₆(4..7) = $4..7
-  176. copy_slot_unmasked             v₁₆(8) = $8
-  177. copy_4_slots_unmasked          f3x3(0..3) = $0..3
-  178. copy_4_slots_unmasked          f3x3(4..7) = $4..7
-  179. copy_slot_unmasked             f3x3(8) = $8
-  180. label                          label 0x0000001A
-  181. zero_4_slots_unmasked          f4x4(0..3) = 0
-  182. zero_4_slots_unmasked          f4x4(4..7) = 0
-  183. zero_4_slots_unmasked          f4x4(8..11) = 0
-  184. zero_4_slots_unmasked          f4x4(12..15) = 0
-  185. zero_slot_unmasked             $0 = 0
-  186. copy_constant                  $1 = colorWhite(2)
-  187. shuffle                        $0..15 = ($0..15)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1]
-  188. copy_4_slots_unmasked          v₁₇(0..3) = $0..3
-  189. copy_4_slots_unmasked          v₁₇(4..7) = $4..7
-  190. copy_4_slots_unmasked          v₁₇(8..11) = $8..11
-  191. copy_4_slots_unmasked          v₁₇(12..15) = $12..15
-  192. copy_4_slots_unmasked          f4x4(0..3) = $0..3
-  193. copy_4_slots_unmasked          f4x4(4..7) = $4..7
-  194. copy_4_slots_unmasked          f4x4(8..11) = $8..11
-  195. copy_4_slots_unmasked          f4x4(12..15) = $12..15
-  196. label                          label 0x0000001B
-  197. copy_constant                  $0 = colorWhite(0)
-  198. copy_slot_unmasked             v₁₁ = $0
-  199. copy_slot_unmasked             f2x2(0) = $0
-  200. label                          label 0x0000001C
-  201. zero_slot_unmasked             b = 0
-  202. copy_constant                  $0 = colorWhite(0)
-  203. zero_slot_unmasked             $1 = 0
-  204. cmpne_float                    $0 = notEqual($0, $1)
-  205. copy_slot_unmasked             v₁₈ = $0
-  206. copy_slot_unmasked             b = $0
-  207. label                          label 0x0000001D
-  208. zero_2_slots_unmasked          b2 = 0
-  209. copy_constant                  $0 = colorWhite(1)
-  210. zero_slot_unmasked             $1 = 0
-  211. cmpne_float                    $0 = notEqual($0, $1)
-  212. copy_slot_unmasked             $1 = $0
-  213. copy_2_slots_unmasked          v₁₉ = $0..1
-  214. copy_2_slots_unmasked          b2 = $0..1
-  215. label                          label 0x0000001E
-  216. zero_3_slots_unmasked          b3 = 0
-  217. copy_constant                  $0 = colorWhite(2)
-  218. zero_slot_unmasked             $1 = 0
-  219. cmpne_float                    $0 = notEqual($0, $1)
-  220. swizzle_3                      $0..2 = ($0..2).xxx
-  221. copy_3_slots_unmasked          v₂₀ = $0..2
-  222. copy_3_slots_unmasked          b3 = $0..2
-  223. label                          label 0x0000001F
-  224. zero_4_slots_unmasked          b4 = 0
-  225. copy_constant                  $0 = colorWhite(3)
-  226. zero_slot_unmasked             $1 = 0
-  227. cmpne_float                    $0 = notEqual($0, $1)
-  228. swizzle_4                      $0..3 = ($0..3).xxxx
-  229. copy_4_slots_unmasked          v₂₁ = $0..3
-  230. copy_4_slots_unmasked          b4 = $0..3
-  231. label                          label 0x00000020
-  232. copy_constant                  $0 = colorWhite(1)
-  233. zero_slot_unmasked             $1 = 0
-  234. cmpne_float                    $0 = notEqual($0, $1)
-  235. copy_slot_unmasked             $1 = $0
-  236. copy_2_slots_unmasked          v₁₉ = $0..1
-  237. copy_slot_unmasked             b4(0) = $0
-  238. copy_slot_unmasked             b4(3) = $1
-  239. label                          label 0x00000021
-  240. copy_constant                  $0 = colorWhite(0)
-  241. zero_slot_unmasked             $1 = 0
-  242. cmpne_float                    $0 = notEqual($0, $1)
-  243. copy_slot_unmasked             v₁₈ = $0
-  244. copy_slot_unmasked             b3(2) = $0
-  245. label                          label 0x00000022
-  246. copy_constant                  ok = 0xFFFFFFFF
-  247. copy_slot_unmasked             $0 = ok
-  248. copy_constant                  $1 = 0x3F800000 (1.0)
-  249. copy_slot_unmasked             $2 = h
-  250. copy_slot_unmasked             $3 = h2(0)
-  251. mul_float                      $2 *= $3
-  252. copy_slot_unmasked             $3 = h3(0)
-  253. mul_float                      $2 *= $3
-  254. copy_slot_unmasked             $3 = h4(0)
-  255. mul_float                      $2 *= $3
-  256. copy_slot_unmasked             $3 = h2x2(0)
-  257. mul_float                      $2 *= $3
-  258. copy_slot_unmasked             $3 = h3x3(0)
-  259. mul_float                      $2 *= $3
-  260. copy_slot_unmasked             $3 = h4x4(0)
-  261. mul_float                      $2 *= $3
-  262. cmpeq_float                    $1 = equal($1, $2)
-  263. bitwise_and_int                $0 &= $1
-  264. copy_slot_unmasked             ok = $0
-  265. copy_constant                  $1 = 0x3F800000 (1.0)
-  266. copy_slot_unmasked             $2 = f
-  267. copy_slot_unmasked             $3 = f2(0)
-  268. mul_float                      $2 *= $3
-  269. copy_slot_unmasked             $3 = f3(0)
-  270. mul_float                      $2 *= $3
-  271. copy_slot_unmasked             $3 = f4(0)
-  272. mul_float                      $2 *= $3
-  273. copy_slot_unmasked             $3 = f2x2(0)
-  274. mul_float                      $2 *= $3
-  275. copy_slot_unmasked             $3 = f3x3(0)
-  276. mul_float                      $2 *= $3
-  277. copy_slot_unmasked             $3 = f4x4(0)
-  278. mul_float                      $2 *= $3
-  279. cmpeq_float                    $1 = equal($1, $2)
-  280. bitwise_and_int                $0 &= $1
-  281. copy_slot_unmasked             ok = $0
-  282. copy_constant                  $1 = 0x00000001 (1.401298e-45)
-  283. copy_slot_unmasked             $2 = i
-  284. copy_slot_unmasked             $3 = i2(0)
-  285. mul_int                        $2 *= $3
-  286. copy_slot_unmasked             $3 = i3(0)
-  287. mul_int                        $2 *= $3
-  288. copy_slot_unmasked             $3 = i4(0)
-  289. mul_int                        $2 *= $3
-  290. cmpeq_int                      $1 = equal($1, $2)
-  291. bitwise_and_int                $0 &= $1
-  292. copy_slot_unmasked             ok = $0
-  293. copy_slot_unmasked             $1 = b
-  294. copy_slot_unmasked             $2 = b2(0)
-  295. bitwise_and_int                $1 &= $2
-  296. copy_slot_unmasked             $2 = b3(0)
-  297. bitwise_and_int                $1 &= $2
-  298. copy_slot_unmasked             $2 = b4(0)
-  299. bitwise_and_int                $1 &= $2
-  300. bitwise_and_int                $0 &= $1
-  301. copy_slot_unmasked             ok = $0
-  302. swizzle_4                      $0..3 = ($0..3).xxxx
-  303. copy_4_constants               $4..7 = colorRed
-  304. copy_4_constants               $8..11 = colorGreen
-  305. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  306. copy_4_slots_unmasked          [main].result = $0..3
-  307. load_src                       src.rgba = [main].result
+   33. swizzle_copy_2_slots_masked    (h3).xz = Mask($0..1)
+   34. label                          label 0x00000005
+   35. copy_constant                  $0 = colorWhite(3)
+   36. swizzle_4                      $0..3 = ($0..3).xxxx
+   37. copy_4_slots_unmasked          v₃ = $0..3
+   38. swizzle_copy_4_slots_masked    (h4).zwxy = Mask($0..3)
+   39. label                          label 0x00000006
+   40. zero_4_slots_unmasked          h2x2 = 0
+   41. zero_slot_unmasked             $0 = 0
+   42. copy_constant                  $1 = colorWhite(0)
+   43. swizzle_4                      $0..3 = ($0..3).yxxy
+   44. copy_4_slots_unmasked          v₄ = $0..3
+   45. copy_4_slots_unmasked          h2x2 = $0..3
+   46. label                          label 0x00000007
+   47. zero_4_slots_unmasked          h3x3(0..3) = 0
+   48. zero_4_slots_unmasked          h3x3(4..7) = 0
+   49. zero_slot_unmasked             h3x3(8) = 0
+   50. zero_slot_unmasked             $0 = 0
+   51. copy_constant                  $1 = colorWhite(1)
+   52. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
+   53. copy_4_slots_unmasked          v₅(0..3) = $0..3
+   54. copy_4_slots_unmasked          v₅(4..7) = $4..7
+   55. copy_slot_unmasked             v₅(8) = $8
+   56. copy_4_slots_unmasked          h3x3(0..3) = $0..3
+   57. copy_4_slots_unmasked          h3x3(4..7) = $4..7
+   58. copy_slot_unmasked             h3x3(8) = $8
+   59. label                          label 0x00000008
+   60. zero_4_slots_unmasked          h4x4(0..3) = 0
+   61. zero_4_slots_unmasked          h4x4(4..7) = 0
+   62. zero_4_slots_unmasked          h4x4(8..11) = 0
+   63. zero_4_slots_unmasked          h4x4(12..15) = 0
+   64. zero_slot_unmasked             $0 = 0
+   65. copy_constant                  $1 = colorWhite(2)
+   66. shuffle                        $0..15 = ($0..15)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1]
+   67. copy_4_slots_unmasked          v₆(0..3) = $0..3
+   68. copy_4_slots_unmasked          v₆(4..7) = $4..7
+   69. copy_4_slots_unmasked          v₆(8..11) = $8..11
+   70. copy_4_slots_unmasked          v₆(12..15) = $12..15
+   71. copy_4_slots_unmasked          h4x4(0..3) = $0..3
+   72. copy_4_slots_unmasked          h4x4(4..7) = $4..7
+   73. copy_4_slots_unmasked          h4x4(8..11) = $8..11
+   74. copy_4_slots_unmasked          h4x4(12..15) = $12..15
+   75. label                          label 0x00000009
+   76. copy_constant                  $0 = colorWhite(2)
+   77. swizzle_3                      $0..2 = ($0..2).xxx
+   78. copy_3_slots_unmasked          v₂ = $0..2
+   79. copy_3_slots_unmasked          h3x3(3..5) = $0..2
+   80. label                          label 0x0000000A
+   81. copy_constant                  $0 = colorWhite(0)
+   82. copy_slot_unmasked             v = $0
+   83. copy_slot_unmasked             h4x4(15) = $0
+   84. label                          label 0x0000000B
+   85. copy_constant                  $0 = colorWhite(0)
+   86. copy_slot_unmasked             v = $0
+   87. copy_slot_unmasked             h2x2(0) = $0
+   88. label                          label 0x0000000C
+   89. zero_slot_unmasked             i = 0
+   90. copy_constant                  $0 = colorWhite(0)
+   91. cast_to_int_from_float         $0 = FloatToInt($0)
+   92. copy_slot_unmasked             v₇ = $0
+   93. copy_slot_unmasked             i = $0
+   94. label                          label 0x0000000D
+   95. zero_2_slots_unmasked          i2 = 0
+   96. copy_constant                  $0 = colorWhite(1)
+   97. cast_to_int_from_float         $0 = FloatToInt($0)
+   98. copy_slot_unmasked             $1 = $0
+   99. copy_2_slots_unmasked          v₈ = $0..1
+  100. copy_2_slots_unmasked          i2 = $0..1
+  101. label                          label 0x0000000E
+  102. zero_3_slots_unmasked          i3 = 0
+  103. copy_constant                  $0 = colorWhite(2)
+  104. cast_to_int_from_float         $0 = FloatToInt($0)
+  105. swizzle_3                      $0..2 = ($0..2).xxx
+  106. copy_3_slots_unmasked          v₉ = $0..2
+  107. copy_3_slots_unmasked          i3 = $0..2
+  108. label                          label 0x0000000F
+  109. zero_4_slots_unmasked          i4 = 0
+  110. copy_constant                  $0 = colorWhite(3)
+  111. cast_to_int_from_float         $0 = FloatToInt($0)
+  112. swizzle_4                      $0..3 = ($0..3).xxxx
+  113. copy_4_slots_unmasked          v₁₀ = $0..3
+  114. copy_4_slots_unmasked          i4 = $0..3
+  115. label                          label 0x00000010
+  116. copy_constant                  $0 = colorWhite(2)
+  117. cast_to_int_from_float         $0 = FloatToInt($0)
+  118. swizzle_3                      $0..2 = ($0..2).xxx
+  119. copy_3_slots_unmasked          v₉ = $0..2
+  120. copy_3_slots_unmasked          i4(0..2) = $0..2
+  121. label                          label 0x00000011
+  122. copy_constant                  $0 = colorWhite(0)
+  123. cast_to_int_from_float         $0 = FloatToInt($0)
+  124. copy_slot_unmasked             v₇ = $0
+  125. copy_slot_unmasked             i2(1) = $0
+  126. label                          label 0x00000012
+  127. zero_slot_unmasked             f = 0
+  128. copy_constant                  $0 = colorWhite(0)
+  129. copy_slot_unmasked             v₁₁ = $0
+  130. copy_slot_unmasked             f = $0
+  131. label                          label 0x00000013
+  132. zero_2_slots_unmasked          f2 = 0
+  133. copy_constant                  $0 = colorWhite(1)
+  134. copy_slot_unmasked             $1 = $0
+  135. copy_2_slots_unmasked          v₁₂ = $0..1
+  136. copy_2_slots_unmasked          f2 = $0..1
+  137. label                          label 0x00000014
+  138. zero_3_slots_unmasked          f3 = 0
+  139. copy_constant                  $0 = colorWhite(2)
+  140. swizzle_3                      $0..2 = ($0..2).xxx
+  141. copy_3_slots_unmasked          v₁₃ = $0..2
+  142. copy_3_slots_unmasked          f3 = $0..2
+  143. label                          label 0x00000015
+  144. zero_4_slots_unmasked          f4 = 0
+  145. copy_constant                  $0 = colorWhite(3)
+  146. swizzle_4                      $0..3 = ($0..3).xxxx
+  147. copy_4_slots_unmasked          v₁₄ = $0..3
+  148. copy_4_slots_unmasked          f4 = $0..3
+  149. label                          label 0x00000016
+  150. copy_constant                  $0 = colorWhite(1)
+  151. copy_slot_unmasked             $1 = $0
+  152. copy_2_slots_unmasked          v₁₂ = $0..1
+  153. copy_2_slots_unmasked          f3(0..1) = $0..1
+  154. label                          label 0x00000017
+  155. copy_constant                  $0 = colorWhite(0)
+  156. copy_slot_unmasked             v₁₁ = $0
+  157. copy_slot_unmasked             f2(0) = $0
+  158. label                          label 0x00000018
+  159. zero_4_slots_unmasked          f2x2 = 0
+  160. zero_slot_unmasked             $0 = 0
+  161. copy_constant                  $1 = colorWhite(0)
+  162. swizzle_4                      $0..3 = ($0..3).yxxy
+  163. copy_4_slots_unmasked          v₁₅ = $0..3
+  164. copy_4_slots_unmasked          f2x2 = $0..3
+  165. label                          label 0x00000019
+  166. zero_4_slots_unmasked          f3x3(0..3) = 0
+  167. zero_4_slots_unmasked          f3x3(4..7) = 0
+  168. zero_slot_unmasked             f3x3(8) = 0
+  169. zero_slot_unmasked             $0 = 0
+  170. copy_constant                  $1 = colorWhite(1)
+  171. shuffle                        $0..8 = ($0..8)[1 0 0 0 1 0 0 0 1]
+  172. copy_4_slots_unmasked          v₁₆(0..3) = $0..3
+  173. copy_4_slots_unmasked          v₁₆(4..7) = $4..7
+  174. copy_slot_unmasked             v₁₆(8) = $8
+  175. copy_4_slots_unmasked          f3x3(0..3) = $0..3
+  176. copy_4_slots_unmasked          f3x3(4..7) = $4..7
+  177. copy_slot_unmasked             f3x3(8) = $8
+  178. label                          label 0x0000001A
+  179. zero_4_slots_unmasked          f4x4(0..3) = 0
+  180. zero_4_slots_unmasked          f4x4(4..7) = 0
+  181. zero_4_slots_unmasked          f4x4(8..11) = 0
+  182. zero_4_slots_unmasked          f4x4(12..15) = 0
+  183. zero_slot_unmasked             $0 = 0
+  184. copy_constant                  $1 = colorWhite(2)
+  185. shuffle                        $0..15 = ($0..15)[1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1]
+  186. copy_4_slots_unmasked          v₁₇(0..3) = $0..3
+  187. copy_4_slots_unmasked          v₁₇(4..7) = $4..7
+  188. copy_4_slots_unmasked          v₁₇(8..11) = $8..11
+  189. copy_4_slots_unmasked          v₁₇(12..15) = $12..15
+  190. copy_4_slots_unmasked          f4x4(0..3) = $0..3
+  191. copy_4_slots_unmasked          f4x4(4..7) = $4..7
+  192. copy_4_slots_unmasked          f4x4(8..11) = $8..11
+  193. copy_4_slots_unmasked          f4x4(12..15) = $12..15
+  194. label                          label 0x0000001B
+  195. copy_constant                  $0 = colorWhite(0)
+  196. copy_slot_unmasked             v₁₁ = $0
+  197. copy_slot_unmasked             f2x2(0) = $0
+  198. label                          label 0x0000001C
+  199. zero_slot_unmasked             b = 0
+  200. copy_constant                  $0 = colorWhite(0)
+  201. zero_slot_unmasked             $1 = 0
+  202. cmpne_float                    $0 = notEqual($0, $1)
+  203. copy_slot_unmasked             v₁₈ = $0
+  204. copy_slot_unmasked             b = $0
+  205. label                          label 0x0000001D
+  206. zero_2_slots_unmasked          b2 = 0
+  207. copy_constant                  $0 = colorWhite(1)
+  208. zero_slot_unmasked             $1 = 0
+  209. cmpne_float                    $0 = notEqual($0, $1)
+  210. copy_slot_unmasked             $1 = $0
+  211. copy_2_slots_unmasked          v₁₉ = $0..1
+  212. copy_2_slots_unmasked          b2 = $0..1
+  213. label                          label 0x0000001E
+  214. zero_3_slots_unmasked          b3 = 0
+  215. copy_constant                  $0 = colorWhite(2)
+  216. zero_slot_unmasked             $1 = 0
+  217. cmpne_float                    $0 = notEqual($0, $1)
+  218. swizzle_3                      $0..2 = ($0..2).xxx
+  219. copy_3_slots_unmasked          v₂₀ = $0..2
+  220. copy_3_slots_unmasked          b3 = $0..2
+  221. label                          label 0x0000001F
+  222. zero_4_slots_unmasked          b4 = 0
+  223. copy_constant                  $0 = colorWhite(3)
+  224. zero_slot_unmasked             $1 = 0
+  225. cmpne_float                    $0 = notEqual($0, $1)
+  226. swizzle_4                      $0..3 = ($0..3).xxxx
+  227. copy_4_slots_unmasked          v₂₁ = $0..3
+  228. copy_4_slots_unmasked          b4 = $0..3
+  229. label                          label 0x00000020
+  230. copy_constant                  $0 = colorWhite(1)
+  231. zero_slot_unmasked             $1 = 0
+  232. cmpne_float                    $0 = notEqual($0, $1)
+  233. copy_slot_unmasked             $1 = $0
+  234. copy_2_slots_unmasked          v₁₉ = $0..1
+  235. swizzle_copy_2_slots_masked    (b4).xw = Mask($0..1)
+  236. label                          label 0x00000021
+  237. copy_constant                  $0 = colorWhite(0)
+  238. zero_slot_unmasked             $1 = 0
+  239. cmpne_float                    $0 = notEqual($0, $1)
+  240. copy_slot_unmasked             v₁₈ = $0
+  241. copy_slot_unmasked             b3(2) = $0
+  242. label                          label 0x00000022
+  243. copy_constant                  ok = 0xFFFFFFFF
+  244. copy_slot_unmasked             $0 = ok
+  245. copy_constant                  $1 = 0x3F800000 (1.0)
+  246. copy_slot_unmasked             $2 = h
+  247. copy_slot_unmasked             $3 = h2(0)
+  248. mul_float                      $2 *= $3
+  249. copy_slot_unmasked             $3 = h3(0)
+  250. mul_float                      $2 *= $3
+  251. copy_slot_unmasked             $3 = h4(0)
+  252. mul_float                      $2 *= $3
+  253. copy_slot_unmasked             $3 = h2x2(0)
+  254. mul_float                      $2 *= $3
+  255. copy_slot_unmasked             $3 = h3x3(0)
+  256. mul_float                      $2 *= $3
+  257. copy_slot_unmasked             $3 = h4x4(0)
+  258. mul_float                      $2 *= $3
+  259. cmpeq_float                    $1 = equal($1, $2)
+  260. bitwise_and_int                $0 &= $1
+  261. copy_slot_unmasked             ok = $0
+  262. copy_constant                  $1 = 0x3F800000 (1.0)
+  263. copy_slot_unmasked             $2 = f
+  264. copy_slot_unmasked             $3 = f2(0)
+  265. mul_float                      $2 *= $3
+  266. copy_slot_unmasked             $3 = f3(0)
+  267. mul_float                      $2 *= $3
+  268. copy_slot_unmasked             $3 = f4(0)
+  269. mul_float                      $2 *= $3
+  270. copy_slot_unmasked             $3 = f2x2(0)
+  271. mul_float                      $2 *= $3
+  272. copy_slot_unmasked             $3 = f3x3(0)
+  273. mul_float                      $2 *= $3
+  274. copy_slot_unmasked             $3 = f4x4(0)
+  275. mul_float                      $2 *= $3
+  276. cmpeq_float                    $1 = equal($1, $2)
+  277. bitwise_and_int                $0 &= $1
+  278. copy_slot_unmasked             ok = $0
+  279. copy_constant                  $1 = 0x00000001 (1.401298e-45)
+  280. copy_slot_unmasked             $2 = i
+  281. copy_slot_unmasked             $3 = i2(0)
+  282. mul_int                        $2 *= $3
+  283. copy_slot_unmasked             $3 = i3(0)
+  284. mul_int                        $2 *= $3
+  285. copy_slot_unmasked             $3 = i4(0)
+  286. mul_int                        $2 *= $3
+  287. cmpeq_int                      $1 = equal($1, $2)
+  288. bitwise_and_int                $0 &= $1
+  289. copy_slot_unmasked             ok = $0
+  290. copy_slot_unmasked             $1 = b
+  291. copy_slot_unmasked             $2 = b2(0)
+  292. bitwise_and_int                $1 &= $2
+  293. copy_slot_unmasked             $2 = b3(0)
+  294. bitwise_and_int                $1 &= $2
+  295. copy_slot_unmasked             $2 = b4(0)
+  296. bitwise_and_int                $1 &= $2
+  297. bitwise_and_int                $0 &= $1
+  298. copy_slot_unmasked             ok = $0
+  299. swizzle_4                      $0..3 = ($0..3).xxxx
+  300. copy_4_constants               $4..7 = colorRed
+  301. copy_4_constants               $8..11 = colorGreen
+  302. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  303. copy_4_slots_unmasked          [main].result = $0..3
+  304. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/OutParamsTricky.skrp b/tests/sksl/shared/OutParamsTricky.skrp
index 4fa4ed0..a1902d8 100644
--- a/tests/sksl/shared/OutParamsTricky.skrp
+++ b/tests/sksl/shared/OutParamsTricky.skrp
@@ -7,38 +7,37 @@
     7. copy_4_slots_unmasked          color = result
     8. copy_constant                  x = 0x3F800000 (1.0)
     9. copy_constant                  y = 0x40000000 (2.0)
-   10. copy_slot_unmasked             color₁(0) = color(0)
-   11. copy_slot_unmasked             color₁(1) = color(2)
-   12. copy_constant                  z = 0x40A00000 (5.0)
-   13. copy_2_slots_unmasked          $0..1 = color₁
-   14. swizzle_2                      $0..1 = ($0..1).yx
-   15. copy_2_slots_unmasked          color₁ = $0..1
-   16. copy_2_slots_unmasked          $0..1 = x, y
-   17. add_float                      $0 += $1
-   18. copy_slot_unmasked             [tricky].result(1) = z
-   19. copy_slot_unmasked             [tricky].result(0) = $0
-   20. copy_2_slots_unmasked          $0..1 = color₁
-   21. copy_slot_unmasked             color(0) = $0
-   22. copy_slot_unmasked             color(2) = $1
+   10. copy_4_slots_unmasked          $0..3 = color
+   11. swizzle_1                      $1 = ($1..2).y
+   12. copy_2_slots_unmasked          color₁ = $0..1
+   13. copy_constant                  z = 0x40A00000 (5.0)
+   14. copy_2_slots_unmasked          $0..1 = color₁
+   15. swizzle_2                      $0..1 = ($0..1).yx
+   16. copy_2_slots_unmasked          color₁ = $0..1
+   17. copy_2_slots_unmasked          $0..1 = x, y
+   18. add_float                      $0 += $1
+   19. copy_slot_unmasked             [tricky].result(1) = z
+   20. copy_slot_unmasked             [tricky].result(0) = $0
+   21. copy_2_slots_unmasked          $0..1 = color₁
+   22. swizzle_copy_2_slots_masked    (color(0..2)).xz = Mask($0..1)
    23. copy_2_slots_unmasked          $0..1 = [tricky].result
    24. label                          label 0x00000001
    25. copy_2_slots_unmasked          t = $0..1
-   26. copy_slot_unmasked             color(1) = $0
-   27. copy_slot_unmasked             color(3) = $1
-   28. copy_4_slots_unmasked          $0..3 = color
-   29. copy_4_slots_unmasked          result = $0..3
-   30. label                          label 0x00000000
-   31. copy_4_slots_unmasked          $0..3 = result
-   32. copy_constant                  $4 = 0x40000000 (2.0)
-   33. copy_constant                  $5 = 0x40400000 (3.0)
-   34. zero_slot_unmasked             $6 = 0
-   35. copy_constant                  $7 = 0x40A00000 (5.0)
-   36. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
-   37. bitwise_and_2_ints             $0..1 &= $2..3
-   38. bitwise_and_int                $0 &= $1
-   39. swizzle_4                      $0..3 = ($0..3).xxxx
-   40. copy_4_constants               $4..7 = colorRed
-   41. copy_4_constants               $8..11 = colorGreen
-   42. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-   43. copy_4_slots_unmasked          [main].result = $0..3
-   44. load_src                       src.rgba = [main].result
+   26. swizzle_copy_2_slots_masked    (color).yw = Mask($0..1)
+   27. copy_4_slots_unmasked          $0..3 = color
+   28. copy_4_slots_unmasked          result = $0..3
+   29. label                          label 0x00000000
+   30. copy_4_slots_unmasked          $0..3 = result
+   31. copy_constant                  $4 = 0x40000000 (2.0)
+   32. copy_constant                  $5 = 0x40400000 (3.0)
+   33. zero_slot_unmasked             $6 = 0
+   34. copy_constant                  $7 = 0x40A00000 (5.0)
+   35. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
+   36. bitwise_and_2_ints             $0..1 &= $2..3
+   37. bitwise_and_int                $0 &= $1
+   38. swizzle_4                      $0..3 = ($0..3).xxxx
+   39. copy_4_constants               $4..7 = colorRed
+   40. copy_4_constants               $8..11 = colorGreen
+   41. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+   42. copy_4_slots_unmasked          [main].result = $0..3
+   43. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/StructsInFunctions.skrp b/tests/sksl/shared/StructsInFunctions.skrp
index 75c7c8a..b87142b 100644
--- a/tests/sksl/shared/StructsInFunctions.skrp
+++ b/tests/sksl/shared/StructsInFunctions.skrp
@@ -45,178 +45,177 @@
    45. copy_2_slots_unmasked          $0..1 = [returns_a_struct].result.x, [returns_a_struct].result.y
    46. label                          label 0x00000004
    47. copy_2_slots_unmasked          n1.a.x, n1.a.y = $0..1
-   48. copy_2_slots_unmasked          $0..1 = n1.a.x, n1.a.y
-   49. copy_2_slots_unmasked          n1.b.x, n1.b.y = $0..1
-   50. copy_4_slots_unmasked          $0..3 = n1.a.x, n1.a.y, n1.b.x, n1.b.y
-   51. copy_4_slots_unmasked          n2.a.x, n2.a.y, n2.b.x, n2.b.y = $0..3
-   52. copy_4_slots_unmasked          n3.a.x, n3.a.y, n3.b.x, n3.b.y = $0..3
-   53. copy_2_slots_unmasked          s.x₃, s.y₃ = n3.b.x, n3.b.y
-   54. copy_slot_unmasked             $0 = s.x₃
-   55. copy_constant                  $1 = 0x3F800000 (1.0)
-   56. add_float                      $0 += $1
-   57. copy_slot_unmasked             s.x₃ = $0
-   58. copy_slot_unmasked             $0 = s.y₃
-   59. copy_constant                  $1 = 0x00000001 (1.401298e-45)
-   60. add_int                        $0 += $1
-   61. copy_slot_unmasked             s.y₃ = $0
-   62. copy_2_slots_unmasked          $0..1 = s.x₃, s.y₃
-   63. copy_2_slots_unmasked          n3.b.x, n3.b.y = $0..1
-   64. label                          label 0x00000005
-   65. copy_constant                  c1.f4(0) = 0x3F800000 (1.0)
-   66. copy_constant                  c1.f4(1) = 0x40000000 (2.0)
-   67. copy_constant                  c1.f4(2) = 0x40400000 (3.0)
-   68. copy_constant                  c1.f4(3) = 0x40800000 (4.0)
-   69. copy_constant                  c1.i3(0) = 0x00000005 (7.006492e-45)
-   70. copy_constant                  c1.i3(1) = 0x00000006 (8.407791e-45)
-   71. copy_constant                  c1.i3(2) = 0x00000007 (9.809089e-45)
-   72. copy_constant                  $0 = colorGreen(1)
-   73. copy_constant                  c2.f4(1) = 0x40000000 (2.0)
-   74. copy_constant                  c2.f4(2) = 0x40400000 (3.0)
-   75. copy_constant                  c2.f4(3) = 0x40800000 (4.0)
-   76. copy_constant                  c2.i3(0) = 0x00000005 (7.006492e-45)
-   77. copy_constant                  c2.i3(1) = 0x00000006 (8.407791e-45)
-   78. copy_constant                  c2.i3(2) = 0x00000007 (9.809089e-45)
-   79. copy_slot_unmasked             c2.f4(0) = $0
-   80. copy_constant                  $0 = colorGreen(0)
-   81. copy_constant                  c3.f4(1) = 0x40000000 (2.0)
-   82. copy_constant                  c3.f4(2) = 0x40400000 (3.0)
-   83. copy_constant                  c3.f4(3) = 0x40800000 (4.0)
-   84. copy_constant                  c3.i3(0) = 0x00000005 (7.006492e-45)
-   85. copy_constant                  c3.i3(1) = 0x00000006 (8.407791e-45)
-   86. copy_constant                  c3.i3(2) = 0x00000007 (9.809089e-45)
-   87. copy_slot_unmasked             c3.f4(0) = $0
-   88. store_condition_mask           $14 = CondMask
-   89. copy_slot_unmasked             $15 = x
-   90. copy_constant                  $16 = 0x40400000 (3.0)
-   91. cmpeq_float                    $15 = equal($15, $16)
-   92. copy_slot_unmasked             $16 = s.x₁
-   93. copy_constant                  $17 = 0x40000000 (2.0)
-   94. cmpeq_float                    $16 = equal($16, $17)
-   95. bitwise_and_int                $15 &= $16
-   96. copy_slot_unmasked             $16 = s.y₁
-   97. copy_constant                  $17 = 0x00000003 (4.203895e-45)
-   98. cmpeq_int                      $16 = equal($16, $17)
-   99. bitwise_and_int                $15 &= $16
-  100. copy_slot_unmasked             $16 = s.x₁
-  101. copy_slot_unmasked             $17 = expected.x
-  102. cmpeq_float                    $16 = equal($16, $17)
-  103. copy_slot_unmasked             $17 = s.y₁
-  104. copy_slot_unmasked             $18 = expected.y
-  105. cmpeq_int                      $17 = equal($17, $18)
-  106. bitwise_and_int                $16 &= $17
-  107. bitwise_and_int                $15 &= $16
-  108. copy_slot_unmasked             $16 = s.x₁
-  109. copy_constant                  $12 = 0x40000000 (2.0)
-  110. copy_constant                  $13 = 0x00000003 (4.203895e-45)
-  111. copy_slot_unmasked             $17 = $12
-  112. cmpeq_float                    $16 = equal($16, $17)
-  113. copy_slot_unmasked             $17 = s.y₁
-  114. copy_slot_unmasked             $18 = $13
-  115. cmpeq_int                      $17 = equal($17, $18)
-  116. bitwise_and_int                $16 &= $17
-  117. bitwise_and_int                $15 &= $16
-  118. zero_slot_unmasked             $0 = 0
-  119. merge_condition_mask           CondMask = $14 & $15
-  120. branch_if_no_active_lanes      branch_if_no_active_lanes +18 (label 6 at #138)
-  121. copy_slot_unmasked             $1 = s.x₁
-  122. branch_if_no_active_lanes      branch_if_no_active_lanes +8 (label 7 at #130)
-  123. zero_2_slots_unmasked          s.x, s.y = 0
-  124. copy_constant                  $12 = 0x3F800000 (1.0)
-  125. copy_slot_masked               s.x = Mask($12)
-  126. copy_constant                  $12 = 0x00000002 (2.802597e-45)
-  127. copy_slot_masked               s.y = Mask($12)
-  128. copy_2_slots_unmasked          $12..13 = s.x, s.y
-  129. copy_2_slots_masked            [returns_a_struct].result.x, [returns_a_struct].result.y = Mask($12..13)
-  130. label                          label 0x00000007
-  131. copy_slot_unmasked             $2 = $12
-  132. cmpne_float                    $1 = notEqual($1, $2)
-  133. copy_slot_unmasked             $2 = s.y₁
-  134. copy_slot_unmasked             $3 = $13
-  135. cmpne_int                      $2 = notEqual($2, $3)
-  136. bitwise_or_int                 $1 |= $2
-  137. copy_slot_masked               $0 = Mask($1)
-  138. label                          label 0x00000006
-  139. load_condition_mask            CondMask = $14
-  140. copy_slot_unmasked             $1 = n1.a.x
-  141. copy_slot_unmasked             $2 = n2.a.x
-  142. cmpeq_float                    $1 = equal($1, $2)
-  143. copy_slot_unmasked             $2 = n1.a.y
-  144. copy_slot_unmasked             $3 = n2.a.y
-  145. cmpeq_int                      $2 = equal($2, $3)
-  146. bitwise_and_int                $1 &= $2
-  147. copy_slot_unmasked             $2 = n1.b.x
-  148. copy_slot_unmasked             $3 = n2.b.x
-  149. cmpeq_float                    $2 = equal($2, $3)
-  150. copy_slot_unmasked             $3 = n1.b.y
-  151. copy_slot_unmasked             $4 = n2.b.y
-  152. cmpeq_int                      $3 = equal($3, $4)
-  153. bitwise_and_int                $2 &= $3
-  154. bitwise_and_int                $1 &= $2
-  155. bitwise_and_int                $0 &= $1
-  156. copy_slot_unmasked             $1 = n1.a.x
-  157. copy_slot_unmasked             $2 = n3.a.x
-  158. cmpne_float                    $1 = notEqual($1, $2)
-  159. copy_slot_unmasked             $2 = n1.a.y
-  160. copy_slot_unmasked             $3 = n3.a.y
-  161. cmpne_int                      $2 = notEqual($2, $3)
-  162. bitwise_or_int                 $1 |= $2
-  163. copy_slot_unmasked             $2 = n1.b.x
-  164. copy_slot_unmasked             $3 = n3.b.x
-  165. cmpne_float                    $2 = notEqual($2, $3)
-  166. copy_slot_unmasked             $3 = n1.b.y
-  167. copy_slot_unmasked             $4 = n3.b.y
-  168. cmpne_int                      $3 = notEqual($3, $4)
-  169. bitwise_or_int                 $2 |= $3
-  170. bitwise_or_int                 $1 |= $2
-  171. bitwise_and_int                $0 &= $1
-  172. copy_slot_unmasked             $1 = n3.a.x
-  173. copy_constant                  $14 = 0x3F800000 (1.0)
-  174. copy_constant                  $15 = 0x00000002 (2.802597e-45)
-  175. copy_constant                  $16 = 0x40000000 (2.0)
-  176. copy_constant                  $17 = 0x00000003 (4.203895e-45)
-  177. copy_slot_unmasked             $2 = $14
-  178. cmpeq_float                    $1 = equal($1, $2)
-  179. copy_slot_unmasked             $2 = n3.a.y
-  180. copy_slot_unmasked             $3 = $15
-  181. cmpeq_int                      $2 = equal($2, $3)
-  182. bitwise_and_int                $1 &= $2
-  183. copy_slot_unmasked             $2 = n3.b.x
-  184. copy_slot_unmasked             $3 = $16
-  185. cmpeq_float                    $2 = equal($2, $3)
-  186. copy_slot_unmasked             $3 = n3.b.y
-  187. copy_slot_unmasked             $4 = $17
-  188. cmpeq_int                      $3 = equal($3, $4)
-  189. bitwise_and_int                $2 &= $3
-  190. bitwise_and_int                $1 &= $2
-  191. bitwise_and_int                $0 &= $1
-  192. copy_4_slots_unmasked          $1..4 = c1.f4
-  193. copy_4_slots_unmasked          $5..8 = c2.f4
-  194. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  195. bitwise_and_2_ints             $1..2 &= $3..4
-  196. bitwise_and_int                $1 &= $2
-  197. copy_3_slots_unmasked          $2..4 = c1.i3
-  198. copy_3_slots_unmasked          $5..7 = c2.i3
-  199. cmpeq_3_ints                   $2..4 = equal($2..4, $5..7)
-  200. bitwise_and_int                $3 &= $4
-  201. bitwise_and_int                $2 &= $3
-  202. bitwise_and_int                $1 &= $2
-  203. bitwise_and_int                $0 &= $1
-  204. copy_4_slots_unmasked          $1..4 = c2.f4
-  205. copy_4_slots_unmasked          $5..8 = c3.f4
-  206. cmpne_4_floats                 $1..4 = notEqual($1..4, $5..8)
-  207. bitwise_or_2_ints              $1..2 |= $3..4
-  208. bitwise_or_int                 $1 |= $2
-  209. copy_3_slots_unmasked          $2..4 = c2.i3
-  210. copy_3_slots_unmasked          $5..7 = c3.i3
-  211. cmpne_3_ints                   $2..4 = notEqual($2..4, $5..7)
-  212. bitwise_or_int                 $3 |= $4
-  213. bitwise_or_int                 $2 |= $3
-  214. bitwise_or_int                 $1 |= $2
-  215. bitwise_and_int                $0 &= $1
-  216. copy_slot_unmasked             valid = $0
-  217. swizzle_4                      $0..3 = ($0..3).xxxx
-  218. copy_4_constants               $4..7 = colorRed
-  219. copy_4_constants               $8..11 = colorGreen
-  220. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  221. copy_4_slots_unmasked          [main].result = $0..3
-  222. load_src                       src.rgba = [main].result
+   48. copy_2_slots_unmasked          n1.b.x, n1.b.y = $0..1
+   49. copy_4_slots_unmasked          $0..3 = n1.a.x, n1.a.y, n1.b.x, n1.b.y
+   50. copy_4_slots_unmasked          n2.a.x, n2.a.y, n2.b.x, n2.b.y = $0..3
+   51. copy_4_slots_unmasked          n3.a.x, n3.a.y, n3.b.x, n3.b.y = $0..3
+   52. copy_2_slots_unmasked          s.x₃, s.y₃ = n3.b.x, n3.b.y
+   53. copy_slot_unmasked             $0 = s.x₃
+   54. copy_constant                  $1 = 0x3F800000 (1.0)
+   55. add_float                      $0 += $1
+   56. copy_slot_unmasked             s.x₃ = $0
+   57. copy_slot_unmasked             $0 = s.y₃
+   58. copy_constant                  $1 = 0x00000001 (1.401298e-45)
+   59. add_int                        $0 += $1
+   60. copy_slot_unmasked             s.y₃ = $0
+   61. copy_2_slots_unmasked          $0..1 = s.x₃, s.y₃
+   62. copy_2_slots_unmasked          n3.b.x, n3.b.y = $0..1
+   63. label                          label 0x00000005
+   64. copy_constant                  c1.f4(0) = 0x3F800000 (1.0)
+   65. copy_constant                  c1.f4(1) = 0x40000000 (2.0)
+   66. copy_constant                  c1.f4(2) = 0x40400000 (3.0)
+   67. copy_constant                  c1.f4(3) = 0x40800000 (4.0)
+   68. copy_constant                  c1.i3(0) = 0x00000005 (7.006492e-45)
+   69. copy_constant                  c1.i3(1) = 0x00000006 (8.407791e-45)
+   70. copy_constant                  c1.i3(2) = 0x00000007 (9.809089e-45)
+   71. copy_constant                  $0 = colorGreen(1)
+   72. copy_constant                  c2.f4(1) = 0x40000000 (2.0)
+   73. copy_constant                  c2.f4(2) = 0x40400000 (3.0)
+   74. copy_constant                  c2.f4(3) = 0x40800000 (4.0)
+   75. copy_constant                  c2.i3(0) = 0x00000005 (7.006492e-45)
+   76. copy_constant                  c2.i3(1) = 0x00000006 (8.407791e-45)
+   77. copy_constant                  c2.i3(2) = 0x00000007 (9.809089e-45)
+   78. copy_slot_unmasked             c2.f4(0) = $0
+   79. copy_constant                  $0 = colorGreen(0)
+   80. copy_constant                  c3.f4(1) = 0x40000000 (2.0)
+   81. copy_constant                  c3.f4(2) = 0x40400000 (3.0)
+   82. copy_constant                  c3.f4(3) = 0x40800000 (4.0)
+   83. copy_constant                  c3.i3(0) = 0x00000005 (7.006492e-45)
+   84. copy_constant                  c3.i3(1) = 0x00000006 (8.407791e-45)
+   85. copy_constant                  c3.i3(2) = 0x00000007 (9.809089e-45)
+   86. copy_slot_unmasked             c3.f4(0) = $0
+   87. store_condition_mask           $14 = CondMask
+   88. copy_slot_unmasked             $15 = x
+   89. copy_constant                  $16 = 0x40400000 (3.0)
+   90. cmpeq_float                    $15 = equal($15, $16)
+   91. copy_slot_unmasked             $16 = s.x₁
+   92. copy_constant                  $17 = 0x40000000 (2.0)
+   93. cmpeq_float                    $16 = equal($16, $17)
+   94. bitwise_and_int                $15 &= $16
+   95. copy_slot_unmasked             $16 = s.y₁
+   96. copy_constant                  $17 = 0x00000003 (4.203895e-45)
+   97. cmpeq_int                      $16 = equal($16, $17)
+   98. bitwise_and_int                $15 &= $16
+   99. copy_slot_unmasked             $16 = s.x₁
+  100. copy_slot_unmasked             $17 = expected.x
+  101. cmpeq_float                    $16 = equal($16, $17)
+  102. copy_slot_unmasked             $17 = s.y₁
+  103. copy_slot_unmasked             $18 = expected.y
+  104. cmpeq_int                      $17 = equal($17, $18)
+  105. bitwise_and_int                $16 &= $17
+  106. bitwise_and_int                $15 &= $16
+  107. copy_slot_unmasked             $16 = s.x₁
+  108. copy_constant                  $12 = 0x40000000 (2.0)
+  109. copy_constant                  $13 = 0x00000003 (4.203895e-45)
+  110. copy_slot_unmasked             $17 = $12
+  111. cmpeq_float                    $16 = equal($16, $17)
+  112. copy_slot_unmasked             $17 = s.y₁
+  113. copy_slot_unmasked             $18 = $13
+  114. cmpeq_int                      $17 = equal($17, $18)
+  115. bitwise_and_int                $16 &= $17
+  116. bitwise_and_int                $15 &= $16
+  117. zero_slot_unmasked             $0 = 0
+  118. merge_condition_mask           CondMask = $14 & $15
+  119. branch_if_no_active_lanes      branch_if_no_active_lanes +18 (label 6 at #137)
+  120. copy_slot_unmasked             $1 = s.x₁
+  121. branch_if_no_active_lanes      branch_if_no_active_lanes +8 (label 7 at #129)
+  122. zero_2_slots_unmasked          s.x, s.y = 0
+  123. copy_constant                  $12 = 0x3F800000 (1.0)
+  124. copy_slot_masked               s.x = Mask($12)
+  125. copy_constant                  $12 = 0x00000002 (2.802597e-45)
+  126. copy_slot_masked               s.y = Mask($12)
+  127. copy_2_slots_unmasked          $12..13 = s.x, s.y
+  128. copy_2_slots_masked            [returns_a_struct].result.x, [returns_a_struct].result.y = Mask($12..13)
+  129. label                          label 0x00000007
+  130. copy_slot_unmasked             $2 = $12
+  131. cmpne_float                    $1 = notEqual($1, $2)
+  132. copy_slot_unmasked             $2 = s.y₁
+  133. copy_slot_unmasked             $3 = $13
+  134. cmpne_int                      $2 = notEqual($2, $3)
+  135. bitwise_or_int                 $1 |= $2
+  136. copy_slot_masked               $0 = Mask($1)
+  137. label                          label 0x00000006
+  138. load_condition_mask            CondMask = $14
+  139. copy_slot_unmasked             $1 = n1.a.x
+  140. copy_slot_unmasked             $2 = n2.a.x
+  141. cmpeq_float                    $1 = equal($1, $2)
+  142. copy_slot_unmasked             $2 = n1.a.y
+  143. copy_slot_unmasked             $3 = n2.a.y
+  144. cmpeq_int                      $2 = equal($2, $3)
+  145. bitwise_and_int                $1 &= $2
+  146. copy_slot_unmasked             $2 = n1.b.x
+  147. copy_slot_unmasked             $3 = n2.b.x
+  148. cmpeq_float                    $2 = equal($2, $3)
+  149. copy_slot_unmasked             $3 = n1.b.y
+  150. copy_slot_unmasked             $4 = n2.b.y
+  151. cmpeq_int                      $3 = equal($3, $4)
+  152. bitwise_and_int                $2 &= $3
+  153. bitwise_and_int                $1 &= $2
+  154. bitwise_and_int                $0 &= $1
+  155. copy_slot_unmasked             $1 = n1.a.x
+  156. copy_slot_unmasked             $2 = n3.a.x
+  157. cmpne_float                    $1 = notEqual($1, $2)
+  158. copy_slot_unmasked             $2 = n1.a.y
+  159. copy_slot_unmasked             $3 = n3.a.y
+  160. cmpne_int                      $2 = notEqual($2, $3)
+  161. bitwise_or_int                 $1 |= $2
+  162. copy_slot_unmasked             $2 = n1.b.x
+  163. copy_slot_unmasked             $3 = n3.b.x
+  164. cmpne_float                    $2 = notEqual($2, $3)
+  165. copy_slot_unmasked             $3 = n1.b.y
+  166. copy_slot_unmasked             $4 = n3.b.y
+  167. cmpne_int                      $3 = notEqual($3, $4)
+  168. bitwise_or_int                 $2 |= $3
+  169. bitwise_or_int                 $1 |= $2
+  170. bitwise_and_int                $0 &= $1
+  171. copy_slot_unmasked             $1 = n3.a.x
+  172. copy_constant                  $14 = 0x3F800000 (1.0)
+  173. copy_constant                  $15 = 0x00000002 (2.802597e-45)
+  174. copy_constant                  $16 = 0x40000000 (2.0)
+  175. copy_constant                  $17 = 0x00000003 (4.203895e-45)
+  176. copy_slot_unmasked             $2 = $14
+  177. cmpeq_float                    $1 = equal($1, $2)
+  178. copy_slot_unmasked             $2 = n3.a.y
+  179. copy_slot_unmasked             $3 = $15
+  180. cmpeq_int                      $2 = equal($2, $3)
+  181. bitwise_and_int                $1 &= $2
+  182. copy_slot_unmasked             $2 = n3.b.x
+  183. copy_slot_unmasked             $3 = $16
+  184. cmpeq_float                    $2 = equal($2, $3)
+  185. copy_slot_unmasked             $3 = n3.b.y
+  186. copy_slot_unmasked             $4 = $17
+  187. cmpeq_int                      $3 = equal($3, $4)
+  188. bitwise_and_int                $2 &= $3
+  189. bitwise_and_int                $1 &= $2
+  190. bitwise_and_int                $0 &= $1
+  191. copy_4_slots_unmasked          $1..4 = c1.f4
+  192. copy_4_slots_unmasked          $5..8 = c2.f4
+  193. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  194. bitwise_and_2_ints             $1..2 &= $3..4
+  195. bitwise_and_int                $1 &= $2
+  196. copy_3_slots_unmasked          $2..4 = c1.i3
+  197. copy_3_slots_unmasked          $5..7 = c2.i3
+  198. cmpeq_3_ints                   $2..4 = equal($2..4, $5..7)
+  199. bitwise_and_int                $3 &= $4
+  200. bitwise_and_int                $2 &= $3
+  201. bitwise_and_int                $1 &= $2
+  202. bitwise_and_int                $0 &= $1
+  203. copy_4_slots_unmasked          $1..4 = c2.f4
+  204. copy_4_slots_unmasked          $5..8 = c3.f4
+  205. cmpne_4_floats                 $1..4 = notEqual($1..4, $5..8)
+  206. bitwise_or_2_ints              $1..2 |= $3..4
+  207. bitwise_or_int                 $1 |= $2
+  208. copy_3_slots_unmasked          $2..4 = c2.i3
+  209. copy_3_slots_unmasked          $5..7 = c3.i3
+  210. cmpne_3_ints                   $2..4 = notEqual($2..4, $5..7)
+  211. bitwise_or_int                 $3 |= $4
+  212. bitwise_or_int                 $2 |= $3
+  213. bitwise_or_int                 $1 |= $2
+  214. bitwise_and_int                $0 &= $1
+  215. copy_slot_unmasked             valid = $0
+  216. swizzle_4                      $0..3 = ($0..3).xxxx
+  217. copy_4_constants               $4..7 = colorRed
+  218. copy_4_constants               $8..11 = colorGreen
+  219. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  220. copy_4_slots_unmasked          [main].result = $0..3
+  221. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/SwizzleAsLValue.skrp b/tests/sksl/shared/SwizzleAsLValue.skrp
index b21a0d1..f0d9e84 100644
--- a/tests/sksl/shared/SwizzleAsLValue.skrp
+++ b/tests/sksl/shared/SwizzleAsLValue.skrp
@@ -12,43 +12,38 @@
    12. copy_constant                  $1 = 0x40800000 (4.0)
    13. mul_float                      $0 *= $1
    14. copy_slot_unmasked             color(1) = $0
-   15. copy_2_slots_unmasked          $1..2 = color(2..3)
+   15. copy_3_slots_unmasked          $0..2 = color(1..3)
    16. copy_constant                  $3 = 0x3F000000 (0.5)
    17. swizzle_3                      $3..5 = ($3..5).xxx
    18. mul_3_floats                   $0..2 *= $3..5
    19. copy_3_slots_unmasked          color(1..3) = $0..2
-   20. copy_slot_unmasked             $0 = color(2)
-   21. copy_slot_unmasked             $1 = color(1)
-   22. copy_slot_unmasked             $2 = color(3)
-   23. copy_slot_unmasked             $3 = color(0)
-   24. copy_constant                  $4 = 0x3E800000 (0.25)
-   25. zero_2_slots_unmasked          $5..6 = 0
-   26. copy_constant                  $7 = 0x3F400000 (0.75)
-   27. add_4_floats                   $0..3 += $4..7
-   28. copy_slot_unmasked             color(2) = $0
-   29. copy_slot_unmasked             color(1) = $1
-   30. copy_slot_unmasked             color(3) = $2
-   31. copy_slot_unmasked             color(0) = $3
-   32. copy_slot_unmasked             $0 = color(0)
-   33. copy_slot_unmasked             $1 = color(3)
-   34. copy_constant                  $2 = 0x3F800000 (1.0)
-   35. cmple_float                    $1 = lessThanEqual($1, $2)
-   36. zero_slot_unmasked             $2 = 0
-   37. copy_slot_unmasked             $3 = color(2)
-   38. mix_int                        $1 = mix($2, $3, $1)
-   39. add_float                      $0 += $1
-   40. copy_slot_unmasked             color(0) = $0
-   41. copy_4_slots_unmasked          $0..3 = color
-   42. copy_constant                  $4 = 0x3F800000 (1.0)
-   43. copy_constant                  $5 = 0x3F800000 (1.0)
-   44. copy_constant                  $6 = 0x3E800000 (0.25)
-   45. copy_constant                  $7 = 0x3F800000 (1.0)
-   46. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
-   47. bitwise_and_2_ints             $0..1 &= $2..3
-   48. bitwise_and_int                $0 &= $1
-   49. swizzle_4                      $0..3 = ($0..3).xxxx
-   50. copy_4_constants               $4..7 = colorRed
-   51. copy_4_constants               $8..11 = colorGreen
-   52. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-   53. copy_4_slots_unmasked          [main].result = $0..3
-   54. load_src                       src.rgba = [main].result
+   20. copy_4_slots_unmasked          $0..3 = color
+   21. swizzle_4                      $0..3 = ($0..3).zywx
+   22. copy_constant                  $4 = 0x3E800000 (0.25)
+   23. zero_2_slots_unmasked          $5..6 = 0
+   24. copy_constant                  $7 = 0x3F400000 (0.75)
+   25. add_4_floats                   $0..3 += $4..7
+   26. swizzle_copy_4_slots_masked    (color).zywx = Mask($0..3)
+   27. copy_slot_unmasked             $0 = color(0)
+   28. copy_slot_unmasked             $1 = color(3)
+   29. copy_constant                  $2 = 0x3F800000 (1.0)
+   30. cmple_float                    $1 = lessThanEqual($1, $2)
+   31. zero_slot_unmasked             $2 = 0
+   32. copy_slot_unmasked             $3 = color(2)
+   33. mix_int                        $1 = mix($2, $3, $1)
+   34. add_float                      $0 += $1
+   35. copy_slot_unmasked             color(0) = $0
+   36. copy_4_slots_unmasked          $0..3 = color
+   37. copy_constant                  $4 = 0x3F800000 (1.0)
+   38. copy_constant                  $5 = 0x3F800000 (1.0)
+   39. copy_constant                  $6 = 0x3E800000 (0.25)
+   40. copy_constant                  $7 = 0x3F800000 (1.0)
+   41. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
+   42. bitwise_and_2_ints             $0..1 &= $2..3
+   43. bitwise_and_int                $0 &= $1
+   44. swizzle_4                      $0..3 = ($0..3).xxxx
+   45. copy_4_constants               $4..7 = colorRed
+   46. copy_4_constants               $8..11 = colorGreen
+   47. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+   48. copy_4_slots_unmasked          [main].result = $0..3
+   49. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/SwizzleOpt.skrp b/tests/sksl/shared/SwizzleOpt.skrp
index 839ddb6..5058e3c 100644
--- a/tests/sksl/shared/SwizzleOpt.skrp
+++ b/tests/sksl/shared/SwizzleOpt.skrp
@@ -205,28 +205,22 @@
   205. copy_constant                  $1 = 0x3F800000 (1.0)
   206. copy_2_constants               $2..3 = colorRed(1..2)
   207. copy_4_slots_unmasked          v = $0..3
-  208. copy_slot_unmasked             v(3) = $0
-  209. copy_slot_unmasked             v(2) = $1
-  210. copy_slot_unmasked             v(1) = $2
-  211. copy_slot_unmasked             v(0) = $3
-  212. copy_2_slots_unmasked          $0..1 = v(1..2)
-  213. copy_slot_unmasked             v(0) = $0
-  214. copy_slot_unmasked             v(3) = $1
+  208. swizzle_copy_4_slots_masked    (v).wzyx = Mask($0..3)
+  209. copy_2_slots_unmasked          $0..1 = v(1..2)
+  210. swizzle_copy_2_slots_masked    (v).xw = Mask($0..1)
+  211. copy_4_slots_unmasked          $0..3 = v
+  212. swizzle_2                      $0..1 = ($0..3).ww
+  213. copy_constant                  $2 = 0x3F800000 (1.0)
+  214. swizzle_copy_3_slots_masked    (v(0..2)).zyx = Mask($0..2)
   215. copy_4_slots_unmasked          $0..3 = v
-  216. swizzle_2                      $0..1 = ($0..3).ww
-  217. copy_constant                  $2 = 0x3F800000 (1.0)
-  218. copy_slot_unmasked             v(2) = $0
-  219. copy_slot_unmasked             v(1) = $1
-  220. copy_slot_unmasked             v(0) = $2
-  221. copy_4_slots_unmasked          $0..3 = v
-  222. copy_constant                  $4 = 0x3F800000 (1.0)
-  223. swizzle_4                      $4..7 = ($4..7).xxxx
-  224. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
-  225. bitwise_and_2_ints             $0..1 &= $2..3
-  226. bitwise_and_int                $0 &= $1
-  227. swizzle_4                      $0..3 = ($0..3).xxxx
-  228. copy_4_constants               $4..7 = colorRed
-  229. copy_4_constants               $8..11 = colorGreen
-  230. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  231. copy_4_slots_unmasked          [main].result = $0..3
-  232. load_src                       src.rgba = [main].result
+  216. copy_constant                  $4 = 0x3F800000 (1.0)
+  217. swizzle_4                      $4..7 = ($4..7).xxxx
+  218. cmpeq_4_floats                 $0..3 = equal($0..3, $4..7)
+  219. bitwise_and_2_ints             $0..1 &= $2..3
+  220. bitwise_and_int                $0 &= $1
+  221. swizzle_4                      $0..3 = ($0..3).xxxx
+  222. copy_4_constants               $4..7 = colorRed
+  223. copy_4_constants               $8..11 = colorGreen
+  224. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  225. copy_4_slots_unmasked          [main].result = $0..3
+  226. load_src                       src.rgba = [main].result
diff --git a/tests/sksl/shared/VectorScalarMath.skrp b/tests/sksl/shared/VectorScalarMath.skrp
index d63f542..87269fe 100644
--- a/tests/sksl/shared/VectorScalarMath.skrp
+++ b/tests/sksl/shared/VectorScalarMath.skrp
@@ -208,338 +208,332 @@
   208. swizzle_4                      $4..7 = ($4..7).xxxx
   209. add_4_floats                   $0..3 += $4..7
   210. copy_4_slots_unmasked          _3_x = $0..3
-  211. copy_4_slots_unmasked          $0..3 = _3_x
-  212. copy_constant                  $4 = 0x40000000 (2.0)
-  213. swizzle_4                      $4..7 = ($4..7).xxxx
-  214. mul_4_floats                   $0..3 *= $4..7
-  215. copy_4_slots_unmasked          _3_x = $0..3
-  216. copy_4_slots_unmasked          $0..3 = _3_x
-  217. copy_constant                  $4 = 0x40800000 (4.0)
-  218. swizzle_4                      $4..7 = ($4..7).xxxx
-  219. sub_4_floats                   $0..3 -= $4..7
-  220. copy_4_slots_unmasked          _3_x = $0..3
-  221. copy_4_slots_unmasked          $0..3 = _3_x
-  222. copy_constant                  $4 = 0x3F000000 (0.5)
-  223. swizzle_4                      $4..7 = ($4..7).xxxx
-  224. mul_4_floats                   $0..3 *= $4..7
-  225. copy_4_slots_unmasked          _3_x = $0..3
-  226. copy_slot_unmasked             $0 = _0_ok
-  227. copy_4_slots_unmasked          $1..4 = _3_x
-  228. copy_constant                  $5 = 0x40000000 (2.0)
-  229. copy_constant                  $6 = 0x41000000 (8.0)
-  230. copy_constant                  $7 = 0x41800000 (16.0)
-  231. copy_constant                  $8 = 0x40800000 (4.0)
-  232. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  233. bitwise_and_2_ints             $1..2 &= $3..4
-  234. bitwise_and_int                $1 &= $2
-  235. bitwise_and_int                $0 &= $1
-  236. copy_slot_unmasked             _0_ok = $0
-  237. copy_4_slots_unmasked          $0..3 = _3_x
-  238. copy_constant                  $4 = 0x40000000 (2.0)
-  239. swizzle_4                      $4..7 = ($4..7).xxxx
-  240. add_4_floats                   $0..3 += $4..7
-  241. copy_4_slots_unmasked          _3_x = $0..3
-  242. copy_constant                  $4 = 0x40000000 (2.0)
-  243. swizzle_4                      $4..7 = ($4..7).xxxx
-  244. mul_4_floats                   $0..3 *= $4..7
-  245. copy_4_slots_unmasked          _3_x = $0..3
-  246. copy_constant                  $4 = 0x40800000 (4.0)
-  247. swizzle_4                      $4..7 = ($4..7).xxxx
-  248. sub_4_floats                   $0..3 -= $4..7
-  249. copy_4_slots_unmasked          _3_x = $0..3
-  250. copy_constant                  $4 = 0x3F000000 (0.5)
-  251. swizzle_4                      $4..7 = ($4..7).xxxx
-  252. mul_4_floats                   $0..3 *= $4..7
-  253. copy_4_slots_unmasked          _3_x = $0..3
-  254. copy_slot_unmasked             $0 = _0_ok
-  255. copy_4_slots_unmasked          $1..4 = _3_x
-  256. copy_constant                  $5 = 0x40000000 (2.0)
-  257. copy_constant                  $6 = 0x41000000 (8.0)
-  258. copy_constant                  $7 = 0x41800000 (16.0)
-  259. copy_constant                  $8 = 0x40800000 (4.0)
-  260. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
-  261. bitwise_and_2_ints             $1..2 &= $3..4
-  262. bitwise_and_int                $1 &= $2
-  263. bitwise_and_int                $0 &= $1
-  264. copy_slot_unmasked             _0_ok = $0
-  265. store_condition_mask           $12 = CondMask
-  266. copy_slot_unmasked             $13 = _0_ok
-  267. zero_slot_unmasked             $0 = 0
-  268. merge_condition_mask           CondMask = $12 & $13
-  269. branch_if_no_active_lanes      branch_if_no_active_lanes +269 (label 1 at #538)
-  270. copy_constant                  ok = 0xFFFFFFFF
-  271. copy_4_constants               $1..4 = colorRed
+  211. copy_constant                  $4 = 0x40000000 (2.0)
+  212. swizzle_4                      $4..7 = ($4..7).xxxx
+  213. mul_4_floats                   $0..3 *= $4..7
+  214. copy_4_slots_unmasked          _3_x = $0..3
+  215. copy_constant                  $4 = 0x40800000 (4.0)
+  216. swizzle_4                      $4..7 = ($4..7).xxxx
+  217. sub_4_floats                   $0..3 -= $4..7
+  218. copy_4_slots_unmasked          _3_x = $0..3
+  219. copy_constant                  $4 = 0x3F000000 (0.5)
+  220. swizzle_4                      $4..7 = ($4..7).xxxx
+  221. mul_4_floats                   $0..3 *= $4..7
+  222. copy_4_slots_unmasked          _3_x = $0..3
+  223. copy_slot_unmasked             $0 = _0_ok
+  224. copy_4_slots_unmasked          $1..4 = _3_x
+  225. copy_constant                  $5 = 0x40000000 (2.0)
+  226. copy_constant                  $6 = 0x41000000 (8.0)
+  227. copy_constant                  $7 = 0x41800000 (16.0)
+  228. copy_constant                  $8 = 0x40800000 (4.0)
+  229. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  230. bitwise_and_2_ints             $1..2 &= $3..4
+  231. bitwise_and_int                $1 &= $2
+  232. bitwise_and_int                $0 &= $1
+  233. copy_slot_unmasked             _0_ok = $0
+  234. copy_4_slots_unmasked          $0..3 = _3_x
+  235. copy_constant                  $4 = 0x40000000 (2.0)
+  236. swizzle_4                      $4..7 = ($4..7).xxxx
+  237. add_4_floats                   $0..3 += $4..7
+  238. copy_4_slots_unmasked          _3_x = $0..3
+  239. copy_constant                  $4 = 0x40000000 (2.0)
+  240. swizzle_4                      $4..7 = ($4..7).xxxx
+  241. mul_4_floats                   $0..3 *= $4..7
+  242. copy_4_slots_unmasked          _3_x = $0..3
+  243. copy_constant                  $4 = 0x40800000 (4.0)
+  244. swizzle_4                      $4..7 = ($4..7).xxxx
+  245. sub_4_floats                   $0..3 -= $4..7
+  246. copy_4_slots_unmasked          _3_x = $0..3
+  247. copy_constant                  $4 = 0x3F000000 (0.5)
+  248. swizzle_4                      $4..7 = ($4..7).xxxx
+  249. mul_4_floats                   $0..3 *= $4..7
+  250. copy_4_slots_unmasked          _3_x = $0..3
+  251. copy_slot_unmasked             $0 = _0_ok
+  252. copy_4_slots_unmasked          $1..4 = _3_x
+  253. copy_constant                  $5 = 0x40000000 (2.0)
+  254. copy_constant                  $6 = 0x41000000 (8.0)
+  255. copy_constant                  $7 = 0x41800000 (16.0)
+  256. copy_constant                  $8 = 0x40800000 (4.0)
+  257. cmpeq_4_floats                 $1..4 = equal($1..4, $5..8)
+  258. bitwise_and_2_ints             $1..2 &= $3..4
+  259. bitwise_and_int                $1 &= $2
+  260. bitwise_and_int                $0 &= $1
+  261. copy_slot_unmasked             _0_ok = $0
+  262. store_condition_mask           $12 = CondMask
+  263. copy_slot_unmasked             $13 = _0_ok
+  264. zero_slot_unmasked             $0 = 0
+  265. merge_condition_mask           CondMask = $12 & $13
+  266. branch_if_no_active_lanes      branch_if_no_active_lanes +266 (label 1 at #532)
+  267. copy_constant                  ok = 0xFFFFFFFF
+  268. copy_4_constants               $1..4 = colorRed
+  269. cast_to_int_from_4_floats      $1..4 = FloatToInt($1..4)
+  270. copy_4_slots_unmasked          inputRed = $1..4
+  271. copy_4_constants               $1..4 = colorGreen
   272. cast_to_int_from_4_floats      $1..4 = FloatToInt($1..4)
-  273. copy_4_slots_unmasked          inputRed = $1..4
-  274. copy_4_constants               $1..4 = colorGreen
-  275. cast_to_int_from_4_floats      $1..4 = FloatToInt($1..4)
-  276. copy_4_slots_unmasked          inputGreen = $1..4
-  277. copy_4_slots_unmasked          $1..4 = inputRed
-  278. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  279. swizzle_4                      $5..8 = ($5..8).xxxx
-  280. add_4_ints                     $1..4 += $5..8
-  281. copy_4_slots_unmasked          x = $1..4
-  282. copy_slot_unmasked             $1 = ok
-  283. copy_4_slots_unmasked          $2..5 = x
-  284. copy_constant                  $6 = 0x00000003 (4.203895e-45)
-  285. copy_constant                  $7 = 0x00000002 (2.802597e-45)
-  286. copy_constant                  $8 = 0x00000002 (2.802597e-45)
-  287. copy_constant                  $9 = 0x00000003 (4.203895e-45)
-  288. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  289. bitwise_and_2_ints             $2..3 &= $4..5
-  290. bitwise_and_int                $2 &= $3
-  291. bitwise_and_int                $1 &= $2
-  292. copy_slot_masked               ok = Mask($1)
-  293. copy_4_slots_unmasked          $1..4 = inputGreen
-  294. swizzle_4                      $1..4 = ($1..4).ywxz
-  295. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  296. swizzle_4                      $5..8 = ($5..8).xxxx
-  297. sub_4_ints                     $1..4 -= $5..8
-  298. copy_4_slots_masked            x = Mask($1..4)
-  299. copy_slot_unmasked             $1 = ok
-  300. copy_4_slots_unmasked          $2..5 = x
-  301. copy_constant                  $6 = 0xFFFFFFFF
-  302. copy_constant                  $7 = 0xFFFFFFFF
-  303. copy_constant                  $8 = 0xFFFFFFFE
-  304. copy_constant                  $9 = 0xFFFFFFFE
-  305. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  306. bitwise_and_2_ints             $2..3 &= $4..5
-  307. bitwise_and_int                $2 &= $3
-  308. bitwise_and_int                $1 &= $2
-  309. copy_slot_masked               ok = Mask($1)
-  310. copy_4_slots_unmasked          $1..4 = inputRed
-  311. copy_slot_unmasked             $5 = inputGreen(1)
-  312. swizzle_4                      $5..8 = ($5..8).xxxx
-  313. add_4_ints                     $1..4 += $5..8
-  314. copy_4_slots_masked            x = Mask($1..4)
-  315. copy_slot_unmasked             $1 = ok
-  316. copy_4_slots_unmasked          $2..5 = x
-  317. copy_constant                  $6 = 0x00000002 (2.802597e-45)
-  318. copy_constant                  $7 = 0x00000001 (1.401298e-45)
-  319. copy_constant                  $8 = 0x00000001 (1.401298e-45)
-  320. copy_constant                  $9 = 0x00000002 (2.802597e-45)
-  321. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  322. bitwise_and_2_ints             $2..3 &= $4..5
-  323. bitwise_and_int                $2 &= $3
-  324. bitwise_and_int                $1 &= $2
-  325. copy_slot_masked               ok = Mask($1)
-  326. copy_4_slots_unmasked          $1..4 = inputGreen
-  327. swizzle_3                      $1..3 = ($1..4).wyw
-  328. copy_constant                  $4 = 0x00000009 (1.261169e-44)
-  329. swizzle_3                      $4..6 = ($4..6).xxx
-  330. mul_3_ints                     $1..3 *= $4..6
-  331. copy_3_slots_masked            x(0..2) = Mask($1..3)
-  332. copy_slot_unmasked             $1 = ok
-  333. copy_4_slots_unmasked          $2..5 = x
-  334. copy_constant                  $6 = 0x00000009 (1.261169e-44)
-  335. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  336. copy_constant                  $8 = 0x00000009 (1.261169e-44)
-  337. copy_constant                  $9 = 0x00000002 (2.802597e-45)
-  338. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  339. bitwise_and_2_ints             $2..3 &= $4..5
-  340. bitwise_and_int                $2 &= $3
-  341. bitwise_and_int                $1 &= $2
-  342. copy_slot_masked               ok = Mask($1)
-  343. copy_2_slots_unmasked          $1..2 = x(2..3)
-  344. copy_constant                  $3 = 0x00000004 (5.605194e-45)
-  345. copy_slot_unmasked             $4 = $3
-  346. div_2_ints                     $1..2 /= $3..4
-  347. copy_2_slots_masked            x(0..1) = Mask($1..2)
-  348. copy_slot_unmasked             $1 = ok
-  349. copy_4_slots_unmasked          $2..5 = x
-  350. copy_constant                  $6 = 0x00000002 (2.802597e-45)
-  351. zero_slot_unmasked             $7 = 0
-  352. copy_constant                  $8 = 0x00000009 (1.261169e-44)
-  353. copy_constant                  $9 = 0x00000002 (2.802597e-45)
-  354. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  355. bitwise_and_2_ints             $2..3 &= $4..5
-  356. bitwise_and_int                $2 &= $3
-  357. bitwise_and_int                $1 &= $2
-  358. copy_slot_masked               ok = Mask($1)
-  359. copy_4_slots_unmasked          $1..4 = inputRed
-  360. copy_constant                  $5 = 0x00000005 (7.006492e-45)
-  361. swizzle_4                      $5..8 = ($5..8).xxxx
-  362. mul_4_ints                     $1..4 *= $5..8
-  363. swizzle_4                      $1..4 = ($1..4).yxwz
-  364. copy_4_slots_masked            x = Mask($1..4)
-  365. copy_slot_unmasked             $1 = ok
-  366. copy_4_slots_unmasked          $2..5 = x
-  367. zero_slot_unmasked             $6 = 0
-  368. copy_constant                  $7 = 0x00000005 (7.006492e-45)
-  369. copy_constant                  $8 = 0x00000005 (7.006492e-45)
-  370. zero_slot_unmasked             $9 = 0
-  371. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  372. bitwise_and_2_ints             $2..3 &= $4..5
-  373. bitwise_and_int                $2 &= $3
-  374. bitwise_and_int                $1 &= $2
-  375. copy_slot_masked               ok = Mask($1)
-  376. copy_constant                  $1 = 0x00000002 (2.802597e-45)
-  377. swizzle_4                      $1..4 = ($1..4).xxxx
-  378. copy_4_slots_unmasked          $5..8 = inputRed
-  379. add_4_ints                     $1..4 += $5..8
-  380. copy_4_slots_masked            x = Mask($1..4)
-  381. copy_slot_unmasked             $1 = ok
-  382. copy_4_slots_unmasked          $2..5 = x
-  383. copy_constant                  $6 = 0x00000003 (4.203895e-45)
-  384. copy_constant                  $7 = 0x00000002 (2.802597e-45)
-  385. copy_constant                  $8 = 0x00000002 (2.802597e-45)
-  386. copy_constant                  $9 = 0x00000003 (4.203895e-45)
-  387. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  388. bitwise_and_2_ints             $2..3 &= $4..5
-  389. bitwise_and_int                $2 &= $3
-  390. bitwise_and_int                $1 &= $2
-  391. copy_slot_masked               ok = Mask($1)
-  392. copy_constant                  $1 = 0x0000000A (1.401298e-44)
-  393. swizzle_4                      $1..4 = ($1..4).xxxx
-  394. copy_4_slots_unmasked          $5..8 = inputGreen
-  395. swizzle_4                      $5..8 = ($5..8).ywxz
-  396. sub_4_ints                     $1..4 -= $5..8
-  397. copy_4_slots_masked            x = Mask($1..4)
-  398. copy_slot_unmasked             $1 = ok
-  399. copy_4_slots_unmasked          $2..5 = x
-  400. copy_constant                  $6 = 0x00000009 (1.261169e-44)
-  401. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  402. copy_constant                  $8 = 0x0000000A (1.401298e-44)
-  403. copy_constant                  $9 = 0x0000000A (1.401298e-44)
-  404. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  405. bitwise_and_2_ints             $2..3 &= $4..5
-  406. bitwise_and_int                $2 &= $3
-  407. bitwise_and_int                $1 &= $2
-  408. copy_slot_masked               ok = Mask($1)
-  409. copy_slot_unmasked             $1 = inputRed(0)
-  410. swizzle_4                      $1..4 = ($1..4).xxxx
-  411. copy_4_slots_unmasked          $5..8 = inputGreen
-  412. add_4_ints                     $1..4 += $5..8
-  413. copy_4_slots_masked            x = Mask($1..4)
-  414. copy_slot_unmasked             $1 = ok
-  415. copy_4_slots_unmasked          $2..5 = x
-  416. copy_constant                  $6 = 0x00000001 (1.401298e-45)
-  417. copy_constant                  $7 = 0x00000002 (2.802597e-45)
-  418. copy_constant                  $8 = 0x00000001 (1.401298e-45)
-  419. copy_constant                  $9 = 0x00000002 (2.802597e-45)
-  420. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  421. bitwise_and_2_ints             $2..3 &= $4..5
-  422. bitwise_and_int                $2 &= $3
-  423. bitwise_and_int                $1 &= $2
-  424. copy_slot_masked               ok = Mask($1)
-  425. copy_constant                  $1 = 0x00000008 (1.121039e-44)
-  426. swizzle_3                      $1..3 = ($1..3).xxx
-  427. copy_4_slots_unmasked          $4..7 = inputGreen
-  428. swizzle_3                      $4..6 = ($4..7).wyw
-  429. mul_3_ints                     $1..3 *= $4..6
-  430. copy_3_slots_masked            x(0..2) = Mask($1..3)
-  431. copy_slot_unmasked             $1 = ok
-  432. copy_4_slots_unmasked          $2..5 = x
-  433. copy_constant                  $6 = 0x00000008 (1.121039e-44)
-  434. copy_constant                  $7 = 0x00000008 (1.121039e-44)
-  435. copy_constant                  $8 = 0x00000008 (1.121039e-44)
-  436. copy_constant                  $9 = 0x00000002 (2.802597e-45)
-  437. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  438. bitwise_and_2_ints             $2..3 &= $4..5
-  439. bitwise_and_int                $2 &= $3
-  440. bitwise_and_int                $1 &= $2
-  441. copy_slot_masked               ok = Mask($1)
-  442. copy_constant                  $1 = 0x00000024 (5.044674e-44)
-  443. copy_slot_unmasked             $2 = $1
-  444. copy_2_slots_unmasked          $3..4 = x(2..3)
-  445. div_2_ints                     $1..2 /= $3..4
-  446. copy_2_slots_masked            x(0..1) = Mask($1..2)
-  447. copy_slot_unmasked             $1 = ok
-  448. copy_4_slots_unmasked          $2..5 = x
-  449. copy_constant                  $6 = 0x00000004 (5.605194e-45)
-  450. copy_constant                  $7 = 0x00000012 (2.522337e-44)
-  451. copy_constant                  $8 = 0x00000008 (1.121039e-44)
-  452. copy_constant                  $9 = 0x00000002 (2.802597e-45)
-  453. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  454. bitwise_and_2_ints             $2..3 &= $4..5
-  455. bitwise_and_int                $2 &= $3
-  456. bitwise_and_int                $1 &= $2
-  457. copy_slot_masked               ok = Mask($1)
-  458. copy_constant                  $1 = 0x00000025 (5.184804e-44)
-  459. swizzle_4                      $1..4 = ($1..4).xxxx
-  460. copy_4_slots_unmasked          $5..8 = x
-  461. div_4_ints                     $1..4 /= $5..8
-  462. swizzle_4                      $1..4 = ($1..4).yxwz
-  463. copy_4_slots_masked            x = Mask($1..4)
-  464. copy_slot_unmasked             $1 = ok
-  465. copy_4_slots_unmasked          $2..5 = x
-  466. copy_constant                  $6 = 0x00000002 (2.802597e-45)
-  467. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  468. copy_constant                  $8 = 0x00000012 (2.522337e-44)
-  469. copy_constant                  $9 = 0x00000004 (5.605194e-45)
-  470. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  471. bitwise_and_2_ints             $2..3 &= $4..5
-  472. bitwise_and_int                $2 &= $3
-  473. bitwise_and_int                $1 &= $2
-  474. copy_slot_masked               ok = Mask($1)
-  475. copy_4_slots_unmasked          $1..4 = x
-  476. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  477. swizzle_4                      $5..8 = ($5..8).xxxx
-  478. add_4_ints                     $1..4 += $5..8
-  479. copy_4_slots_masked            x = Mask($1..4)
-  480. copy_4_slots_unmasked          $1..4 = x
-  481. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  273. copy_4_slots_unmasked          inputGreen = $1..4
+  274. copy_4_slots_unmasked          $1..4 = inputRed
+  275. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  276. swizzle_4                      $5..8 = ($5..8).xxxx
+  277. add_4_ints                     $1..4 += $5..8
+  278. copy_4_slots_unmasked          x = $1..4
+  279. copy_slot_unmasked             $1 = ok
+  280. copy_4_slots_unmasked          $2..5 = x
+  281. copy_constant                  $6 = 0x00000003 (4.203895e-45)
+  282. copy_constant                  $7 = 0x00000002 (2.802597e-45)
+  283. copy_constant                  $8 = 0x00000002 (2.802597e-45)
+  284. copy_constant                  $9 = 0x00000003 (4.203895e-45)
+  285. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  286. bitwise_and_2_ints             $2..3 &= $4..5
+  287. bitwise_and_int                $2 &= $3
+  288. bitwise_and_int                $1 &= $2
+  289. copy_slot_masked               ok = Mask($1)
+  290. copy_4_slots_unmasked          $1..4 = inputGreen
+  291. swizzle_4                      $1..4 = ($1..4).ywxz
+  292. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  293. swizzle_4                      $5..8 = ($5..8).xxxx
+  294. sub_4_ints                     $1..4 -= $5..8
+  295. copy_4_slots_masked            x = Mask($1..4)
+  296. copy_slot_unmasked             $1 = ok
+  297. copy_4_slots_unmasked          $2..5 = x
+  298. copy_constant                  $6 = 0xFFFFFFFF
+  299. copy_constant                  $7 = 0xFFFFFFFF
+  300. copy_constant                  $8 = 0xFFFFFFFE
+  301. copy_constant                  $9 = 0xFFFFFFFE
+  302. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  303. bitwise_and_2_ints             $2..3 &= $4..5
+  304. bitwise_and_int                $2 &= $3
+  305. bitwise_and_int                $1 &= $2
+  306. copy_slot_masked               ok = Mask($1)
+  307. copy_4_slots_unmasked          $1..4 = inputRed
+  308. copy_slot_unmasked             $5 = inputGreen(1)
+  309. swizzle_4                      $5..8 = ($5..8).xxxx
+  310. add_4_ints                     $1..4 += $5..8
+  311. copy_4_slots_masked            x = Mask($1..4)
+  312. copy_slot_unmasked             $1 = ok
+  313. copy_4_slots_unmasked          $2..5 = x
+  314. copy_constant                  $6 = 0x00000002 (2.802597e-45)
+  315. copy_constant                  $7 = 0x00000001 (1.401298e-45)
+  316. copy_constant                  $8 = 0x00000001 (1.401298e-45)
+  317. copy_constant                  $9 = 0x00000002 (2.802597e-45)
+  318. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  319. bitwise_and_2_ints             $2..3 &= $4..5
+  320. bitwise_and_int                $2 &= $3
+  321. bitwise_and_int                $1 &= $2
+  322. copy_slot_masked               ok = Mask($1)
+  323. copy_4_slots_unmasked          $1..4 = inputGreen
+  324. swizzle_3                      $1..3 = ($1..4).wyw
+  325. copy_constant                  $4 = 0x00000009 (1.261169e-44)
+  326. swizzle_3                      $4..6 = ($4..6).xxx
+  327. mul_3_ints                     $1..3 *= $4..6
+  328. copy_3_slots_masked            x(0..2) = Mask($1..3)
+  329. copy_slot_unmasked             $1 = ok
+  330. copy_4_slots_unmasked          $2..5 = x
+  331. copy_constant                  $6 = 0x00000009 (1.261169e-44)
+  332. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  333. copy_constant                  $8 = 0x00000009 (1.261169e-44)
+  334. copy_constant                  $9 = 0x00000002 (2.802597e-45)
+  335. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  336. bitwise_and_2_ints             $2..3 &= $4..5
+  337. bitwise_and_int                $2 &= $3
+  338. bitwise_and_int                $1 &= $2
+  339. copy_slot_masked               ok = Mask($1)
+  340. copy_2_slots_unmasked          $1..2 = x(2..3)
+  341. copy_constant                  $3 = 0x00000004 (5.605194e-45)
+  342. copy_slot_unmasked             $4 = $3
+  343. div_2_ints                     $1..2 /= $3..4
+  344. copy_2_slots_masked            x(0..1) = Mask($1..2)
+  345. copy_slot_unmasked             $1 = ok
+  346. copy_4_slots_unmasked          $2..5 = x
+  347. copy_constant                  $6 = 0x00000002 (2.802597e-45)
+  348. zero_slot_unmasked             $7 = 0
+  349. copy_constant                  $8 = 0x00000009 (1.261169e-44)
+  350. copy_constant                  $9 = 0x00000002 (2.802597e-45)
+  351. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  352. bitwise_and_2_ints             $2..3 &= $4..5
+  353. bitwise_and_int                $2 &= $3
+  354. bitwise_and_int                $1 &= $2
+  355. copy_slot_masked               ok = Mask($1)
+  356. copy_4_slots_unmasked          $1..4 = inputRed
+  357. copy_constant                  $5 = 0x00000005 (7.006492e-45)
+  358. swizzle_4                      $5..8 = ($5..8).xxxx
+  359. mul_4_ints                     $1..4 *= $5..8
+  360. swizzle_4                      $1..4 = ($1..4).yxwz
+  361. copy_4_slots_masked            x = Mask($1..4)
+  362. copy_slot_unmasked             $1 = ok
+  363. copy_4_slots_unmasked          $2..5 = x
+  364. zero_slot_unmasked             $6 = 0
+  365. copy_constant                  $7 = 0x00000005 (7.006492e-45)
+  366. copy_constant                  $8 = 0x00000005 (7.006492e-45)
+  367. zero_slot_unmasked             $9 = 0
+  368. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  369. bitwise_and_2_ints             $2..3 &= $4..5
+  370. bitwise_and_int                $2 &= $3
+  371. bitwise_and_int                $1 &= $2
+  372. copy_slot_masked               ok = Mask($1)
+  373. copy_constant                  $1 = 0x00000002 (2.802597e-45)
+  374. swizzle_4                      $1..4 = ($1..4).xxxx
+  375. copy_4_slots_unmasked          $5..8 = inputRed
+  376. add_4_ints                     $1..4 += $5..8
+  377. copy_4_slots_masked            x = Mask($1..4)
+  378. copy_slot_unmasked             $1 = ok
+  379. copy_4_slots_unmasked          $2..5 = x
+  380. copy_constant                  $6 = 0x00000003 (4.203895e-45)
+  381. copy_constant                  $7 = 0x00000002 (2.802597e-45)
+  382. copy_constant                  $8 = 0x00000002 (2.802597e-45)
+  383. copy_constant                  $9 = 0x00000003 (4.203895e-45)
+  384. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  385. bitwise_and_2_ints             $2..3 &= $4..5
+  386. bitwise_and_int                $2 &= $3
+  387. bitwise_and_int                $1 &= $2
+  388. copy_slot_masked               ok = Mask($1)
+  389. copy_constant                  $1 = 0x0000000A (1.401298e-44)
+  390. swizzle_4                      $1..4 = ($1..4).xxxx
+  391. copy_4_slots_unmasked          $5..8 = inputGreen
+  392. swizzle_4                      $5..8 = ($5..8).ywxz
+  393. sub_4_ints                     $1..4 -= $5..8
+  394. copy_4_slots_masked            x = Mask($1..4)
+  395. copy_slot_unmasked             $1 = ok
+  396. copy_4_slots_unmasked          $2..5 = x
+  397. copy_constant                  $6 = 0x00000009 (1.261169e-44)
+  398. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  399. copy_constant                  $8 = 0x0000000A (1.401298e-44)
+  400. copy_constant                  $9 = 0x0000000A (1.401298e-44)
+  401. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  402. bitwise_and_2_ints             $2..3 &= $4..5
+  403. bitwise_and_int                $2 &= $3
+  404. bitwise_and_int                $1 &= $2
+  405. copy_slot_masked               ok = Mask($1)
+  406. copy_slot_unmasked             $1 = inputRed(0)
+  407. swizzle_4                      $1..4 = ($1..4).xxxx
+  408. copy_4_slots_unmasked          $5..8 = inputGreen
+  409. add_4_ints                     $1..4 += $5..8
+  410. copy_4_slots_masked            x = Mask($1..4)
+  411. copy_slot_unmasked             $1 = ok
+  412. copy_4_slots_unmasked          $2..5 = x
+  413. copy_constant                  $6 = 0x00000001 (1.401298e-45)
+  414. copy_constant                  $7 = 0x00000002 (2.802597e-45)
+  415. copy_constant                  $8 = 0x00000001 (1.401298e-45)
+  416. copy_constant                  $9 = 0x00000002 (2.802597e-45)
+  417. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  418. bitwise_and_2_ints             $2..3 &= $4..5
+  419. bitwise_and_int                $2 &= $3
+  420. bitwise_and_int                $1 &= $2
+  421. copy_slot_masked               ok = Mask($1)
+  422. copy_constant                  $1 = 0x00000008 (1.121039e-44)
+  423. swizzle_3                      $1..3 = ($1..3).xxx
+  424. copy_4_slots_unmasked          $4..7 = inputGreen
+  425. swizzle_3                      $4..6 = ($4..7).wyw
+  426. mul_3_ints                     $1..3 *= $4..6
+  427. copy_3_slots_masked            x(0..2) = Mask($1..3)
+  428. copy_slot_unmasked             $1 = ok
+  429. copy_4_slots_unmasked          $2..5 = x
+  430. copy_constant                  $6 = 0x00000008 (1.121039e-44)
+  431. copy_constant                  $7 = 0x00000008 (1.121039e-44)
+  432. copy_constant                  $8 = 0x00000008 (1.121039e-44)
+  433. copy_constant                  $9 = 0x00000002 (2.802597e-45)
+  434. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  435. bitwise_and_2_ints             $2..3 &= $4..5
+  436. bitwise_and_int                $2 &= $3
+  437. bitwise_and_int                $1 &= $2
+  438. copy_slot_masked               ok = Mask($1)
+  439. copy_constant                  $1 = 0x00000024 (5.044674e-44)
+  440. copy_slot_unmasked             $2 = $1
+  441. copy_2_slots_unmasked          $3..4 = x(2..3)
+  442. div_2_ints                     $1..2 /= $3..4
+  443. copy_2_slots_masked            x(0..1) = Mask($1..2)
+  444. copy_slot_unmasked             $1 = ok
+  445. copy_4_slots_unmasked          $2..5 = x
+  446. copy_constant                  $6 = 0x00000004 (5.605194e-45)
+  447. copy_constant                  $7 = 0x00000012 (2.522337e-44)
+  448. copy_constant                  $8 = 0x00000008 (1.121039e-44)
+  449. copy_constant                  $9 = 0x00000002 (2.802597e-45)
+  450. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  451. bitwise_and_2_ints             $2..3 &= $4..5
+  452. bitwise_and_int                $2 &= $3
+  453. bitwise_and_int                $1 &= $2
+  454. copy_slot_masked               ok = Mask($1)
+  455. copy_constant                  $1 = 0x00000025 (5.184804e-44)
+  456. swizzle_4                      $1..4 = ($1..4).xxxx
+  457. copy_4_slots_unmasked          $5..8 = x
+  458. div_4_ints                     $1..4 /= $5..8
+  459. swizzle_4                      $1..4 = ($1..4).yxwz
+  460. copy_4_slots_masked            x = Mask($1..4)
+  461. copy_slot_unmasked             $1 = ok
+  462. copy_4_slots_unmasked          $2..5 = x
+  463. copy_constant                  $6 = 0x00000002 (2.802597e-45)
+  464. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  465. copy_constant                  $8 = 0x00000012 (2.522337e-44)
+  466. copy_constant                  $9 = 0x00000004 (5.605194e-45)
+  467. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  468. bitwise_and_2_ints             $2..3 &= $4..5
+  469. bitwise_and_int                $2 &= $3
+  470. bitwise_and_int                $1 &= $2
+  471. copy_slot_masked               ok = Mask($1)
+  472. copy_4_slots_unmasked          $1..4 = x
+  473. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  474. swizzle_4                      $5..8 = ($5..8).xxxx
+  475. add_4_ints                     $1..4 += $5..8
+  476. copy_4_slots_masked            x = Mask($1..4)
+  477. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  478. swizzle_4                      $5..8 = ($5..8).xxxx
+  479. mul_4_ints                     $1..4 *= $5..8
+  480. copy_4_slots_masked            x = Mask($1..4)
+  481. copy_constant                  $5 = 0x00000004 (5.605194e-45)
   482. swizzle_4                      $5..8 = ($5..8).xxxx
-  483. mul_4_ints                     $1..4 *= $5..8
+  483. sub_4_ints                     $1..4 -= $5..8
   484. copy_4_slots_masked            x = Mask($1..4)
-  485. copy_4_slots_unmasked          $1..4 = x
-  486. copy_constant                  $5 = 0x00000004 (5.605194e-45)
-  487. swizzle_4                      $5..8 = ($5..8).xxxx
-  488. sub_4_ints                     $1..4 -= $5..8
-  489. copy_4_slots_masked            x = Mask($1..4)
-  490. copy_4_slots_unmasked          $1..4 = x
-  491. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  492. swizzle_4                      $5..8 = ($5..8).xxxx
-  493. div_4_ints                     $1..4 /= $5..8
-  494. copy_4_slots_masked            x = Mask($1..4)
-  495. copy_slot_unmasked             $1 = ok
-  496. copy_4_slots_unmasked          $2..5 = x
-  497. copy_constant                  $6 = 0x00000002 (2.802597e-45)
-  498. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  499. copy_constant                  $8 = 0x00000012 (2.522337e-44)
-  500. copy_constant                  $9 = 0x00000004 (5.605194e-45)
-  501. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  485. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  486. swizzle_4                      $5..8 = ($5..8).xxxx
+  487. div_4_ints                     $1..4 /= $5..8
+  488. copy_4_slots_masked            x = Mask($1..4)
+  489. copy_slot_unmasked             $1 = ok
+  490. copy_4_slots_unmasked          $2..5 = x
+  491. copy_constant                  $6 = 0x00000002 (2.802597e-45)
+  492. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  493. copy_constant                  $8 = 0x00000012 (2.522337e-44)
+  494. copy_constant                  $9 = 0x00000004 (5.605194e-45)
+  495. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  496. bitwise_and_2_ints             $2..3 &= $4..5
+  497. bitwise_and_int                $2 &= $3
+  498. bitwise_and_int                $1 &= $2
+  499. copy_slot_masked               ok = Mask($1)
+  500. copy_4_slots_unmasked          $1..4 = x
+  501. copy_constant                  $5 = 0x00000002 (2.802597e-45)
   502. stack_rewind
-  503. bitwise_and_2_ints             $2..3 &= $4..5
-  504. bitwise_and_int                $2 &= $3
-  505. bitwise_and_int                $1 &= $2
-  506. copy_slot_masked               ok = Mask($1)
-  507. copy_4_slots_unmasked          $1..4 = x
-  508. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  509. swizzle_4                      $5..8 = ($5..8).xxxx
-  510. add_4_ints                     $1..4 += $5..8
-  511. copy_4_slots_masked            x = Mask($1..4)
-  512. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  513. swizzle_4                      $5..8 = ($5..8).xxxx
-  514. mul_4_ints                     $1..4 *= $5..8
-  515. copy_4_slots_masked            x = Mask($1..4)
-  516. copy_constant                  $5 = 0x00000004 (5.605194e-45)
-  517. swizzle_4                      $5..8 = ($5..8).xxxx
-  518. sub_4_ints                     $1..4 -= $5..8
-  519. copy_4_slots_masked            x = Mask($1..4)
-  520. copy_constant                  $5 = 0x00000002 (2.802597e-45)
-  521. swizzle_4                      $5..8 = ($5..8).xxxx
-  522. div_4_ints                     $1..4 /= $5..8
-  523. copy_4_slots_masked            x = Mask($1..4)
-  524. copy_slot_unmasked             $1 = ok
-  525. copy_4_slots_unmasked          $2..5 = x
-  526. copy_constant                  $6 = 0x00000002 (2.802597e-45)
-  527. copy_constant                  $7 = 0x00000009 (1.261169e-44)
-  528. copy_constant                  $8 = 0x00000012 (2.522337e-44)
-  529. copy_constant                  $9 = 0x00000004 (5.605194e-45)
-  530. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
-  531. bitwise_and_2_ints             $2..3 &= $4..5
-  532. bitwise_and_int                $2 &= $3
-  533. bitwise_and_int                $1 &= $2
-  534. copy_slot_masked               ok = Mask($1)
-  535. copy_slot_masked               [test_int].result = Mask($1)
-  536. label                          label 0x00000002
-  537. copy_slot_masked               $0 = Mask($1)
-  538. label                          label 0x00000001
-  539. load_condition_mask            CondMask = $12
-  540. swizzle_4                      $0..3 = ($0..3).xxxx
-  541. copy_4_constants               $4..7 = colorRed
-  542. copy_4_constants               $8..11 = colorGreen
-  543. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
-  544. copy_4_slots_unmasked          [main].result = $0..3
-  545. load_src                       src.rgba = [main].result
+  503. swizzle_4                      $5..8 = ($5..8).xxxx
+  504. add_4_ints                     $1..4 += $5..8
+  505. copy_4_slots_masked            x = Mask($1..4)
+  506. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  507. swizzle_4                      $5..8 = ($5..8).xxxx
+  508. mul_4_ints                     $1..4 *= $5..8
+  509. copy_4_slots_masked            x = Mask($1..4)
+  510. copy_constant                  $5 = 0x00000004 (5.605194e-45)
+  511. swizzle_4                      $5..8 = ($5..8).xxxx
+  512. sub_4_ints                     $1..4 -= $5..8
+  513. copy_4_slots_masked            x = Mask($1..4)
+  514. copy_constant                  $5 = 0x00000002 (2.802597e-45)
+  515. swizzle_4                      $5..8 = ($5..8).xxxx
+  516. div_4_ints                     $1..4 /= $5..8
+  517. copy_4_slots_masked            x = Mask($1..4)
+  518. copy_slot_unmasked             $1 = ok
+  519. copy_4_slots_unmasked          $2..5 = x
+  520. copy_constant                  $6 = 0x00000002 (2.802597e-45)
+  521. copy_constant                  $7 = 0x00000009 (1.261169e-44)
+  522. copy_constant                  $8 = 0x00000012 (2.522337e-44)
+  523. copy_constant                  $9 = 0x00000004 (5.605194e-45)
+  524. cmpeq_4_ints                   $2..5 = equal($2..5, $6..9)
+  525. bitwise_and_2_ints             $2..3 &= $4..5
+  526. bitwise_and_int                $2 &= $3
+  527. bitwise_and_int                $1 &= $2
+  528. copy_slot_masked               ok = Mask($1)
+  529. copy_slot_masked               [test_int].result = Mask($1)
+  530. label                          label 0x00000002
+  531. copy_slot_masked               $0 = Mask($1)
+  532. label                          label 0x00000001
+  533. load_condition_mask            CondMask = $12
+  534. swizzle_4                      $0..3 = ($0..3).xxxx
+  535. copy_4_constants               $4..7 = colorRed
+  536. copy_4_constants               $8..11 = colorGreen
+  537. mix_4_ints                     $0..3 = mix($4..7, $8..11, $0..3)
+  538. copy_4_slots_unmasked          [main].result = $0..3
+  539. load_src                       src.rgba = [main].result