aco: use Builder::copy more

fossil-db (Navi):
Totals from 6973 (5.07% of 137413) affected shaders:
SGPRs: 381768 -> 381776 (+0.00%)
VGPRs: 306092 -> 306096 (+0.00%); split: -0.00%, +0.00%
CodeSize: 24440844 -> 24421196 (-0.08%); split: -0.09%, +0.01%
MaxWaves: 86581 -> 86583 (+0.00%)
Instrs: 4682161 -> 4679578 (-0.06%); split: -0.06%, +0.00%
Cycles: 68793116 -> 68261648 (-0.77%); split: -0.83%, +0.05%

fossil-db (Polaris):
Totals from 8154 (5.87% of 138881) affected shaders:
VGPRs: 338916 -> 338920 (+0.00%); split: -0.00%, +0.00%
CodeSize: 23540428 -> 23540488 (+0.00%); split: -0.00%, +0.00%
MaxWaves: 49090 -> 49091 (+0.00%)
Instrs: 4576085 -> 4576101 (+0.00%); split: -0.00%, +0.00%
Cycles: 51720704 -> 51720888 (+0.00%); split: -0.00%, +0.00%

Most of the Navi cycle/instruction changes are from 8/16-bit parallel-rdp
shaders. They appear to be improved because the p_create_vector from
lower_subdword_phis() was blocking constant propagation.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7216>
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 30c408a..638157c 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -381,7 +381,7 @@
 
       /* exec seems to need to be manually initialized with combined shaders */
       if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
-         bld.sop1(Builder::s_mov, bld.exec(Definition(exec_mask)), bld.lm == s2 ? Operand(UINT64_MAX) : Operand(UINT32_MAX));
+         bld.copy(bld.exec(Definition(exec_mask)), Operand(UINT32_MAX, bld.lm == s2));
          instructions[0]->definitions.pop_back();
       }
 
@@ -653,15 +653,15 @@
    Operand offset = instr->operands[1];
    if (need_check) {
       /* if exec is zero, then use UINT32_MAX as an offset and make this store a no-op */
-      Temp nonempty = bld.sopc(Builder::s_cmp_lg, bld.def(s1, scc), cur_exec, Operand(0u));
+      Temp nonempty = bld.sopc(Builder::s_cmp_lg, bld.def(s1, scc), cur_exec, Operand(0u, bld.lm == s2));
 
       if (offset.isLiteral())
-         offset = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1), offset);
+         offset = bld.copy(bld.def(s1), offset);
 
       offset = bld.sop2(aco_opcode::s_cselect_b32, bld.hint_m0(bld.def(s1)),
                         offset, Operand(UINT32_MAX), bld.scc(nonempty));
    } else if (offset.isConstant() && offset.constantValue() > 0xFFFFF) {
-      offset = bld.sop1(aco_opcode::s_mov_b32, bld.hint_m0(bld.def(s1)), offset);
+      offset = bld.copy(bld.hint_m0(bld.def(s1)), offset);
    }
    if (!offset.isConstant())
       offset.setFixed(m0);
@@ -1076,7 +1076,7 @@
       unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
       Block& succ = ctx.program->blocks[succ_idx];
       if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
-         ctx.info[idx].exec.back().first = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand(0u));
+         ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm, exec), Operand(0u, bld.lm == s2));
       }
 
       bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]);
@@ -1105,7 +1105,7 @@
       unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
       Block& succ = ctx.program->blocks[succ_idx];
       if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
-         ctx.info[idx].exec.back().first = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand(0u));
+         ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm, exec), Operand(0u, bld.lm == s2));
       }
 
       bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]);
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 4de0fe6..6b82f82 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -279,7 +279,7 @@
    bool post_shift = info.post_shift != 0;
 
    if (!pre_shift && !increment && !multiply && !post_shift) {
-      bld.vop1(aco_opcode::v_mov_b32, Definition(dst), a);
+      bld.copy(Definition(dst), a);
       return;
    }
 
@@ -299,7 +299,7 @@
    if (multiply) {
       multiply_dst = post_shift ? bld.tmp(v1) : dst;
       bld.vop3(aco_opcode::v_mul_hi_u32, Definition(multiply_dst), increment_dst,
-               bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand((uint32_t)info.multiplier)));
+               bld.copy(bld.def(v1), Operand((uint32_t)info.multiplier)));
    }
 
    if (post_shift) {
@@ -1007,7 +1007,7 @@
       then = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), cond, then);
 
    if (cond.id() == els.id())
-      bld.sop1(Builder::s_mov, Definition(dst), then);
+      bld.copy(Definition(dst), then);
    else
       bld.sop2(Builder::s_or, Definition(dst), bld.def(s1, scc), then,
                bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), els, cond));
@@ -1200,26 +1200,12 @@
    }
    case nir_op_mov: {
       Temp src = get_alu_src(ctx, instr->src[0]);
-      aco_ptr<Instruction> mov;
-      if (dst.type() == RegType::sgpr) {
-         if (src.type() == RegType::vgpr)
-            bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
-         else if (src.regClass() == s1)
-            bld.sop1(aco_opcode::s_mov_b32, Definition(dst), src);
-         else if (src.regClass() == s2)
-            bld.sop1(aco_opcode::s_mov_b64, Definition(dst), src);
-         else
-            unreachable("wrong src register class for nir_op_imov");
-      } else {
-         if (dst.regClass() == v1)
-            bld.vop1(aco_opcode::v_mov_b32, Definition(dst), src);
-         else if (dst.regClass() == v1b ||
-                  dst.regClass() == v2b ||
-                  dst.regClass() == v2)
-            bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src);
-         else
-            unreachable("wrong src register class for nir_op_imov");
-      }
+      if (src.bytes() != dst.bytes())
+         unreachable("wrong src or dst register class for nir_op_mov");
+      if (src.type() == RegType::vgpr && dst.type() == RegType::sgpr)
+         bld.pseudo(aco_opcode::p_as_uniform, Definition(dst), src);
+      else
+         bld.copy(Definition(dst), src);
       break;
    }
    case nir_op_inot: {
@@ -2113,11 +2099,11 @@
          bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src);
       } else if (dst.regClass() == v2) {
          Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
-         Temp tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0x3FF00000u));
+         Temp tmp = bld.copy(bld.def(v1), Operand(0x3FF00000u));
          Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, emit_extract_vector(ctx, src, 1, v1), cond);
 
          cond = bld.vopc(aco_opcode::v_cmp_le_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src);
-         tmp = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0xBFF00000u));
+         tmp = bld.copy(bld.def(v1), Operand(0xBFF00000u));
          upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond);
 
          bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), upper);
@@ -2492,7 +2478,7 @@
          src = bool_to_scalar_condition(ctx, src);
          bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand(0x3f800000u), Operand(0u), bld.scc(src));
       } else if (dst.regClass() == v2) {
-         Temp one = bld.vop1(aco_opcode::v_mov_b32, bld.def(v2), Operand(0x3FF00000u));
+         Temp one = bld.copy(bld.def(v2), Operand(0x3FF00000u));
          Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), one, src);
          bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), upper);
       } else {
@@ -2902,13 +2888,12 @@
       assert(dst.regClass() == bld.lm);
       int val = instr->value[0].b ? -1 : 0;
       Operand op = bld.lm.size() == 1 ? Operand((uint32_t) val) : Operand((uint64_t) val);
-      bld.sop1(Builder::s_mov, Definition(dst), op);
+      bld.copy(Definition(dst), op);
    } else if (instr->def.bit_size == 8) {
-      /* ensure that the value is correctly represented in the low byte of the register */
-      bld.sopk(aco_opcode::s_movk_i32, Definition(dst), instr->value[0].u8);
+      bld.copy(Definition(dst), Operand((uint32_t)instr->value[0].u8));
    } else if (instr->def.bit_size == 16) {
-      /* ensure that the value is correctly represented in the low half of the register */
-      bld.sopk(aco_opcode::s_movk_i32, Definition(dst), instr->value[0].u16);
+      /* sign-extend to use s_movk_i32 instead of a literal */
+      bld.copy(Definition(dst), Operand((uint32_t)instr->value[0].i16));
    } else if (dst.size() == 1) {
       bld.copy(Definition(dst), Operand(instr->value[0].u32));
    } else {
@@ -3209,7 +3194,7 @@
 Operand load_lds_size_m0(Builder& bld)
 {
    /* TODO: m0 does not need to be initialized on GFX9+ */
-   return bld.m0((Temp)bld.sopk(aco_opcode::s_movk_i32, bld.def(s1, m0), 0xffff));
+   return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand(0xffffffffu)));
 }
 
 Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
@@ -4601,7 +4586,7 @@
                index = bld.vadd32(bld.def(v1), start_instance, instance_id);
             }
          } else {
-            index = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), start_instance);
+            index = bld.copy(bld.def(v1), start_instance);
          }
       } else {
          index = bld.vadd32(bld.def(v1),
@@ -5582,7 +5567,7 @@
    bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(compare),
                 Operand(0u), emit_extract_vector(ctx, fmask_desc_ptr, 1, s1)).def(0).setHint(vcc);
 
-   Temp sample_index_v = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), sample_index);
+   Temp sample_index_v = bld.copy(bld.def(v1), sample_index);
 
    /* Replace the MSAA sample index. */
    return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), sample_index_v, final_sample, compare);
@@ -5985,7 +5970,7 @@
 
    /* LOD */
    assert(nir_src_as_uint(instr->src[1]) == 0);
-   Temp lod = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
+   Temp lod = bld.copy(bld.def(v1), Operand(0u));
 
    /* Resource */
    Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), ACO_DESC_IMAGE, NULL, true, false);
@@ -6802,7 +6787,7 @@
 
    Temp sample_id = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
                              get_arg(ctx, ctx->args->ac.ancillary), Operand(8u), Operand(4u));
-   Temp ps_iter_mask = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(ps_iter_masks[log2_ps_iter_samples]));
+   Temp ps_iter_mask = bld.copy(bld.def(v1), Operand(ps_iter_masks[log2_ps_iter_samples]));
    Temp mask = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), sample_id, ps_iter_mask);
    Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
    bld.vop2(aco_opcode::v_and_b32, Definition(dst), mask, get_arg(ctx, ctx->args->ac.sample_coverage));
@@ -7167,15 +7152,10 @@
    Builder bld(ctx->program, ctx->block);
    Definition dst(get_ssa_temp(ctx, &instr->dest.ssa));
    assert(dst.regClass().type() != RegType::vgpr);
-   if (src.regClass().type() == RegType::vgpr) {
+   if (src.regClass().type() == RegType::vgpr)
       bld.pseudo(aco_opcode::p_as_uniform, dst, src);
-   } else if (src.regClass() == s1) {
-      bld.sop1(aco_opcode::s_mov_b32, dst, src);
-   } else if (src.regClass() == s2) {
-      bld.sop1(aco_opcode::s_mov_b64, dst, src);
-   } else {
-      isel_err(&instr->instr, "Unimplemented NIR instr bit size");
-   }
+   else
+      bld.copy(dst, src);
 }
 
 void emit_addition_uniform_reduce(isel_context *ctx, nir_op op, Definition dst, nir_src src, Temp count)
@@ -7768,7 +7748,7 @@
          bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc),
                   get_arg(ctx, ctx->args->ac.tg_size), Operand(0x6u | (0x6u << 16)));
       } else {
-         bld.sop1(aco_opcode::s_mov_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand(0x0u));
+         bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand(0x0u));
       }
       break;
    }
@@ -7781,7 +7761,7 @@
          bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc), Operand(0x3fu),
                   get_arg(ctx, ctx->args->ac.tg_size));
       else
-         bld.sop1(aco_opcode::s_mov_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand(0x1u));
+         bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand(0x1u));
       break;
    }
    case nir_intrinsic_ballot: {
@@ -7882,12 +7862,8 @@
          Temp tmp = bld.sopc(Builder::s_bitcmp1, bld.def(s1, scc), src,
                              bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)));
          bool_to_vector_condition(ctx, emit_wqm(ctx, tmp), dst);
-      } else if (src.regClass() == s1) {
-         bld.sop1(aco_opcode::s_mov_b32, Definition(dst), src);
-      } else if (src.regClass() == s2) {
-         bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src);
       } else {
-         isel_err(&instr->instr, "Unimplemented NIR instr bit size");
+         bld.copy(Definition(dst), src);
       }
       break;
    }
@@ -8705,7 +8681,7 @@
             pack = bld.sop2(aco_opcode::v_or_b32, bld.def(v1), Operand(pack_const), pack);
       }
       if (pack_const && pack == Temp())
-         offset = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(pack_const));
+         offset = bld.copy(bld.def(v1), Operand(pack_const));
       else if (pack == Temp())
          has_offset = false;
       else
@@ -8809,7 +8785,7 @@
    aco_ptr<MIMG_instruction> tex;
    if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels) {
       if (!has_lod)
-         lod = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
+         lod = bld.copy(bld.def(v1), Operand(0u));
 
       bool div_by_6 = instr->op == nir_texop_txs &&
                       instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
@@ -8861,7 +8837,7 @@
       tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1));
       tex->operands[0] = Operand(resource);
       tex->operands[1] = Operand(s4); /* no sampler */
-      tex->operands[2] = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
+      tex->operands[2] = bld.copy(bld.def(v1), Operand(0u));
       tex->dim = dim;
       tex->dmask = 0x3;
       tex->da = da;
@@ -9259,10 +9235,8 @@
 
    if (num_defined == 0) {
       Builder bld(ctx->program, ctx->block);
-      if (dst.regClass() == s1) {
-         bld.sop1(aco_opcode::s_mov_b32, Definition(dst), Operand(0u));
-      } else if (dst.regClass() == v1) {
-         bld.vop1(aco_opcode::v_mov_b32, Definition(dst), Operand(0u));
+      if (dst.bytes() == 4) {
+         bld.copy(Definition(dst), Operand(0u));
       } else {
          aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)};
          for (unsigned i = 0; i < dst.size(); i++)
@@ -11385,7 +11359,7 @@
    bld.reset(ctx->block);
 
    Temp gds_addr = bld.copy(bld.def(v1), Operand(0u));
-   Operand m = bld.m0((Temp)bld.sopk(aco_opcode::s_movk_i32, bld.def(s1, m0), 0x100));
+   Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand(0x100u)));
    bld.ds(aco_opcode::ds_add_u32, gds_addr, as_vgpr(ctx, sg_prm_cnt), m, 0u, 0u, true);
 
    begin_divergent_if_else(ctx, &ic_last_lane);
diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp
index f21d954..42e2e89 100644
--- a/src/amd/compiler/aco_lower_phis.cpp
+++ b/src/amd/compiler/aco_lower_phis.cpp
@@ -128,7 +128,7 @@
    bld.reset(&block->instructions, std::prev(it.base()));
 
    if (prev.isUndefined()) {
-      bld.sop1(Builder::s_mov, dst, cur);
+      bld.copy(dst, cur);
       return;
    }
 
@@ -150,16 +150,16 @@
       if (!cur_is_constant)
          bld.sop2(Builder::s_orn2, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
       else if (cur.constantValue64(true))
-         bld.sop1(Builder::s_mov, dst, program->wave_size == 64 ? Operand(UINT64_MAX) : Operand(UINT32_MAX));
+         bld.copy(dst, Operand(UINT32_MAX, bld.lm == s2));
       else
          bld.sop1(Builder::s_not, dst, bld.def(s1, scc), Operand(exec, bld.lm));
    } else {
       if (!cur_is_constant)
          bld.sop2(Builder::s_and, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
       else if (cur.constantValue64(true))
-         bld.sop1(Builder::s_mov, dst, Operand(exec, bld.lm));
+         bld.copy(dst, Operand(exec, bld.lm));
       else
-         bld.sop1(Builder::s_mov, dst, program->wave_size == 64 ? Operand((uint64_t)0u) : Operand(0u));
+         bld.copy(dst, Operand(0u, bld.lm == s2));
    }
 }
 
@@ -266,7 +266,7 @@
 
       assert(phi_src.regClass().type() == RegType::sgpr);
       Temp tmp = bld.tmp(RegClass(RegType::vgpr, phi_src.size()));
-      insert_before_logical_end(pred, bld.pseudo(aco_opcode::p_create_vector, Definition(tmp), phi_src).get_ptr());
+      insert_before_logical_end(pred, bld.copy(Definition(tmp), phi_src).get_ptr());
       Temp new_phi_src = bld.tmp(phi->definitions[0].regClass());
       insert_before_logical_end(pred, bld.pseudo(aco_opcode::p_extract_vector, Definition(new_phi_src), tmp, Operand(0u)).get_ptr());
 
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 3c4898e..8c6766b 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -1003,9 +1003,10 @@
       }
    }
 
+   if (op.bytes() == 4 && op.constantEquals(0x3e22f983) && ctx->program->chip_class >= GFX8)
+      op.setFixed(PhysReg{248}); /* it can be an inline constant on GFX8+ */
+
    if (dst.regClass() == s1) {
-      if (op.constantEquals(0x3e22f983) && ctx->program->chip_class >= GFX8)
-         op.setFixed(PhysReg{248}); /* it can be an inline constant on GFX8+ */
       bld.sop1(aco_opcode::s_mov_b32, dst, op);
    } else if (dst.regClass() == s2) {
       bld.sop1(aco_opcode::s_mov_b64, dst, op);
@@ -1066,8 +1067,14 @@
       if (def.physReg() == scc) {
          bld.sopc(aco_opcode::s_cmp_lg_i32, def, op, Operand(0u));
          *preserve_scc = true;
-      } else if (def.bytes() == 8 && def.getTemp().type() == RegType::sgpr) {
-         bld.sop1(aco_opcode::s_mov_b64, def, Operand(op.physReg(), s2));
+      } else if (op.isConstant()) {
+         copy_constant(ctx, bld, def, op);
+      } else if (def.regClass() == v1) {
+         bld.vop1(aco_opcode::v_mov_b32, def, op);
+      } else if (def.regClass() == s1) {
+         bld.sop1(aco_opcode::s_mov_b32, def, op);
+      } else if (def.regClass() == s2) {
+         bld.sop1(aco_opcode::s_mov_b64, def, op);
       } else if (def.regClass().is_subdword() && ctx->program->chip_class < GFX8) {
          if (op.physReg().byte()) {
             assert(def.physReg().byte() == 0);
@@ -1098,14 +1105,6 @@
          } else {
             bld.vop1(aco_opcode::v_mov_b32, def, op);
          }
-      } else if (op.isConstant()) {
-         copy_constant(ctx, bld, def, op);
-      } else if (def.regClass() == v1) {
-         bld.vop1(aco_opcode::v_mov_b32, def, op);
-      } else if (def.regClass() == s1) {
-         bld.sop1(aco_opcode::s_mov_b32, def, op);
-      } else if (def.regClass() == s2) {
-         bld.sop1(aco_opcode::s_mov_b64, def, op);
       } else if (def.regClass().is_subdword()) {
          bld.vop1_sdwa(aco_opcode::v_mov_b32, def, op);
       } else {