Replace templates with “auto” parameters in OpVectorToMask

Test: m berberis_all

Change-Id: Ic8c08edd26ecdeba4ff4bdfe9a4e3b6ce6dcdf04
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index e5df9aa..7494b51 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -1676,23 +1676,23 @@
               args.dst, arg2, /*dst_mask=*/args.src1);
         }
       case Decoder::VOpFVfOpcode::kVmfeqvf:
-        return OpVectorToMaskvx<intrinsics::Vfeqvx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vfeqvx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpFVfOpcode::kVmflevf:
-        return OpVectorToMaskvx<intrinsics::Vflevx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vflevx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpFVfOpcode::kVmfltvf:
-        return OpVectorToMaskvx<intrinsics::Vfltvx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vfltvx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpFVfOpcode::kVmfnevf:
-        return OpVectorToMaskvx<intrinsics::Vfnevx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vfnevx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpFVfOpcode::kVmfgtvf:
-        return OpVectorToMaskvx<intrinsics::Vfgtvx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vfgtvx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpFVfOpcode::kVmfgevf:
-        return OpVectorToMaskvx<intrinsics::Vfgevx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vfgevx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpFVfOpcode::kVfdivvf:
         return OpVectorSameWidth<intrinsics::Vfdivvf<ElementType>,
                                  ElementType,
@@ -2232,17 +2232,17 @@
           }
           return OpVectorVmvfs<ElementType>(args.dst, args.src1);
         case Decoder::VOpFVvOpcode::kVmfeqvv:
-          return OpVectorToMaskvv<intrinsics::Vfeqvv<ElementType>, ElementType, vlmul, kVma>(
-              args.dst, args.src1, args.src2);
+          return OpVectorToMaskvv<intrinsics::Vfeqvv<ElementType>>(
+              args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
         case Decoder::VOpFVvOpcode::kVmflevv:
-          return OpVectorToMaskvv<intrinsics::Vflevv<ElementType>, ElementType, vlmul, kVma>(
-              args.dst, args.src1, args.src2);
+          return OpVectorToMaskvv<intrinsics::Vflevv<ElementType>>(
+              args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
         case Decoder::VOpFVvOpcode::kVmfltvv:
-          return OpVectorToMaskvv<intrinsics::Vfltvv<ElementType>, ElementType, vlmul, kVma>(
-              args.dst, args.src1, args.src2);
+          return OpVectorToMaskvv<intrinsics::Vfltvv<ElementType>>(
+              args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
         case Decoder::VOpFVvOpcode::kVmfnevv:
-          return OpVectorToMaskvv<intrinsics::Vfnevv<ElementType>, ElementType, vlmul, kVma>(
-              args.dst, args.src1, args.src2);
+          return OpVectorToMaskvv<intrinsics::Vfnevv<ElementType>>(
+              args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
         case Decoder::VOpFVvOpcode::kVfdivvv:
           return OpVectorSameWidth<intrinsics::Vfdivvv<ElementType>,
                                    ElementType,
@@ -2385,27 +2385,37 @@
                            kVta,
                            kVma>(args.dst, args.src, SignedType{args.imm});
       case Decoder::VOpIViOpcode::kVmseqvi:
-        return OpVectorToMaskvx<intrinsics::Vseqvx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src, SignedType{args.imm});
+        return OpVectorToMaskvx<intrinsics::Vseqvx<SignedType>>(
+            args.dst, args.src, SignedType{args.imm}, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIViOpcode::kVmsnevi:
-        return OpVectorToMaskvx<intrinsics::Vsnevx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src, SignedType{args.imm});
+        return OpVectorToMaskvx<intrinsics::Vsnevx<SignedType>>(
+            args.dst, args.src, SignedType{args.imm}, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIViOpcode::kVmsleuvi:
         // Note: Vmsleu.vi actually have signed immediate which means that we first need to
         // expand it to the width of element as signed value and then bit-cast to unsigned.
-        return OpVectorToMaskvx<intrinsics::Vslevx<UnsignedType>, UnsignedType, vlmul, kVma>(
-            args.dst, args.src, BitCastToUnsigned(SignedType{args.imm}));
+        return OpVectorToMaskvx<intrinsics::Vslevx<UnsignedType>>(
+            args.dst,
+            args.src,
+            BitCastToUnsigned(SignedType{args.imm}),
+            ToUnsigned(kElementType),
+            vlmul,
+            kValue<kVma>);
       case Decoder::VOpIViOpcode::kVmslevi:
-        return OpVectorToMaskvx<intrinsics::Vslevx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src, SignedType{args.imm});
+        return OpVectorToMaskvx<intrinsics::Vslevx<SignedType>>(
+            args.dst, args.src, SignedType{args.imm}, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIViOpcode::kVmsgtuvi:
         // Note: Vmsleu.vi actually have signed immediate which means that we first need to
         // expand it to the width of element as signed value and then bit-cast to unsigned.
-        return OpVectorToMaskvx<intrinsics::Vsgtvx<UnsignedType>, UnsignedType, vlmul, kVma>(
-            args.dst, args.src, BitCastToUnsigned(SignedType{args.imm}));
+        return OpVectorToMaskvx<intrinsics::Vsgtvx<UnsignedType>>(
+            args.dst,
+            args.src,
+            BitCastToUnsigned(SignedType{args.imm}),
+            ToUnsigned(kElementType),
+            vlmul,
+            kValue<kVma>);
       case Decoder::VOpIViOpcode::kVmsgtvi:
-        return OpVectorToMaskvx<intrinsics::Vsgtvx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src, SignedType{args.imm});
+        return OpVectorToMaskvx<intrinsics::Vsgtvx<SignedType>>(
+            args.dst, args.src, SignedType{args.imm}, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIViOpcode::kVsadduvi:
         // Note: Vsaddu.vi actually have signed immediate which means that we first need to
         // expand it to the width of element as signed value and then bit-cast to unsigned.
@@ -2562,23 +2572,23 @@
                            kVta,
                            kVma>(args.dst, args.src1, args.src2);
       case Decoder::VOpIVvOpcode::kVmseqvv:
-        return OpVectorToMaskvv<intrinsics::Vseqvv<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorToMaskvv<intrinsics::Vseqvv<ElementType>>(
+            args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVvOpcode::kVmsnevv:
-        return OpVectorToMaskvv<intrinsics::Vsnevv<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorToMaskvv<intrinsics::Vsnevv<ElementType>>(
+            args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVvOpcode::kVmsltuvv:
-        return OpVectorToMaskvv<intrinsics::Vsltvv<UnsignedType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorToMaskvv<intrinsics::Vsltvv<UnsignedType>>(
+            args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVvOpcode::kVmsltvv:
-        return OpVectorToMaskvv<intrinsics::Vsltvv<SignedType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorToMaskvv<intrinsics::Vsltvv<SignedType>>(
+            args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVvOpcode::kVmsleuvv:
-        return OpVectorToMaskvv<intrinsics::Vslevv<UnsignedType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorToMaskvv<intrinsics::Vslevv<UnsignedType>>(
+            args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVvOpcode::kVmslevv:
-        return OpVectorToMaskvv<intrinsics::Vslevv<SignedType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorToMaskvv<intrinsics::Vslevv<SignedType>>(
+            args.dst, args.src1, args.src2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVvOpcode::kVsadduvv:
         return OpVectorvv<intrinsics::Vaddvv<SaturatingUnsignedType>,
                           SaturatingUnsignedType,
@@ -2746,29 +2756,29 @@
                            kVta,
                            kVma>(args.dst, args.src1, arg2);
       case Decoder::VOpIVxOpcode::kVmseqvx:
-        return OpVectorToMaskvx<intrinsics::Vseqvx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vseqvx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmsnevx:
-        return OpVectorToMaskvx<intrinsics::Vsnevx<ElementType>, ElementType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vsnevx<ElementType>>(
+            args.dst, args.src1, arg2, kElementType, vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmsltuvx:
-        return OpVectorToMaskvx<intrinsics::Vsltvx<UnsignedType>, UnsignedType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vsltvx<UnsignedType>>(
+            args.dst, args.src1, arg2, ToUnsigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmsltvx:
-        return OpVectorToMaskvx<intrinsics::Vsltvx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vsltvx<SignedType>>(
+            args.dst, args.src1, arg2, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmsleuvx:
-        return OpVectorToMaskvx<intrinsics::Vslevx<UnsignedType>, UnsignedType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vslevx<UnsignedType>>(
+            args.dst, args.src1, arg2, ToUnsigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmslevx:
-        return OpVectorToMaskvx<intrinsics::Vslevx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vslevx<SignedType>>(
+            args.dst, args.src1, arg2, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmsgtuvx:
-        return OpVectorToMaskvx<intrinsics::Vsgtvx<UnsignedType>, UnsignedType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vsgtvx<UnsignedType>>(
+            args.dst, args.src1, arg2, ToUnsigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVmsgtvx:
-        return OpVectorToMaskvx<intrinsics::Vsgtvx<SignedType>, SignedType, vlmul, kVma>(
-            args.dst, args.src1, arg2);
+        return OpVectorToMaskvx<intrinsics::Vsgtvx<SignedType>>(
+            args.dst, args.src1, arg2, ToSigned(kElementType), vlmul, kValue<kVma>);
       case Decoder::VOpIVxOpcode::kVsadduvx:
         return OpVectorvx<intrinsics::Vaddvx<SaturatingUnsignedType>,
                           SaturatingUnsignedType,
@@ -3712,42 +3722,42 @@
     }
   }
 
-  template <auto Intrinsic,
-            typename ElementType,
-            VectorRegisterGroupMultiplier vlmul,
-            const auto kVma,
-            CsrName... kExtraCsrs>
-  void OpVectorToMaskvv(uint8_t dst, uint8_t src1, uint8_t src2) {
-    return OpVectorToMask<Intrinsic,
-                          ElementType,
-                          NumberOfRegistersInvolved(vlmul),
-                          kVma,
-                          kExtraCsrs...>(dst, Vec{src1}, Vec{src2});
+  template <auto Intrinsic, CsrName... kExtraCsrs>
+  void OpVectorToMaskvv(uint8_t dst,
+                        uint8_t src1,
+                        uint8_t src2,
+                        const auto kElementType,
+                        const auto kVlmul,
+                        const auto kVma) {
+    return OpVectorToMask<Intrinsic, kExtraCsrs...>(
+        dst, kElementType, NumberOfRegistersInvolved(kVlmul), kVma, Vec{src1}, Vec{src2});
   }
 
-  template <auto Intrinsic,
-            typename ElementType,
-            VectorRegisterGroupMultiplier vlmul,
-            const auto kVma,
-            CsrName... kExtraCsrs>
-  void OpVectorToMaskvx(uint8_t dst, uint8_t src1, auto arg2) {
-    return OpVectorToMask<Intrinsic,
-                          ElementType,
-                          NumberOfRegistersInvolved(vlmul),
-                          kVma,
-                          kExtraCsrs...>(dst, Vec{src1}, MaybeTruncateTo<ElementType>(arg2));
+  template <auto Intrinsic, CsrName... kExtraCsrs>
+  void OpVectorToMaskvx(uint8_t dst,
+                        uint8_t src1,
+                        auto arg2,
+                        const auto kElementType,
+                        const auto kVlmul,
+                        const auto kVma) {
+    using ElementType = WrappedTypeFromId<kElementType>;
+    return OpVectorToMask<Intrinsic, kExtraCsrs...>(dst,
+                                                    kElementType,
+                                                    NumberOfRegistersInvolved(kVlmul),
+                                                    kVma,
+                                                    Vec{src1},
+                                                    MaybeTruncateTo<ElementType>(arg2));
   }
 
-  template <auto Intrinsic,
-            typename ElementType,
-            size_t kRegistersInvolved,
-            const auto kVma,
-            CsrName... kExtraCsrs,
-            typename... Args>
-  void OpVectorToMask(uint8_t dst, Args... args) {
+  template <auto Intrinsic, CsrName... kExtraCsrs, const auto kVma>
+  void OpVectorToMask(uint8_t dst,
+                      const auto kElementType,
+                      const auto kRegistersInvolved,
+                      const Value<kVma>,
+                      auto... args) {
     // All args, except dst must be aligned at kRegistersInvolved amount. We'll merge them
     // together and then do a combined check for all of them at once.
-    if (!IsAligned<kRegistersInvolved>(OrValuesOnlyForType<Vec>(args...))) {
+    if (!IsAligned(OrValuesOnlyForType<Vec>(args...), kRegistersInvolved)) {
       return Undefined();
     }
     SIMD128Register original_result(state_->cpu.v[dst]);
@@ -3760,8 +3770,9 @@
     if (vstart >= vl) [[unlikely]] {
       result_before_vl_masking = original_result;
     } else {
-      result_before_vl_masking = CollectBitmaskResult<ElementType, kRegistersInvolved>(
-          [this, vstart, vl, args...](auto index) {
+      using ElementType = WrappedTypeFromId<kElementType>;
+      result_before_vl_masking = CollectBitmaskResult(
+          kElementType, kRegistersInvolved, [this, vstart, vl, args...](auto index) {
             return Intrinsic(this->GetCsr<kExtraCsrs>()...,
                              this->GetVectorArgument<ElementType, TailProcessing::kAgnostic, kVma>(
                                  args, vstart, vl, index, intrinsics::NoInactiveProcessing{})...);
@@ -4733,19 +4744,21 @@
   // Note that we are not handling tail here! These bits remain undefined and should be handled
   // later.
   // TODO(b/317757595): Add separate tests to verify the logic.
-  template <typename ElementType, size_t kRegistersInvolved, typename Intrinsic>
-  SIMD128Register CollectBitmaskResult(Intrinsic intrinsic) {
+  SIMD128Register CollectBitmaskResult(const auto kElementType,
+                                       const size_t kRegistersInvolved,
+                                       auto intrinsic) {
     // We employ two distinct tactics to handle all possibilities:
     //   1. For 8bit/16bit types we get full UInt8/UInt16 result and thus use SIMD128Register.Set.
     //   2. For 32bit/64bit types we only get 2bit or 4bit from each call and thus need to use
     //      shifts to accumulate the result.
     //      But since each of up to 8 results is at most 4bits total bitmask is 32bit (or less).
-    std::conditional_t<sizeof(ElementType) < sizeof(UInt32), SIMD128Register, UInt32>
+    std::conditional_t<SizeOf(kElementType) < sizeof(UInt32), SIMD128Register, UInt32>
         bitmask_result{};
     for (UInt32 index = UInt32{0}; index < UInt32(kRegistersInvolved); index += UInt32{1}) {
+      using ElementType = WrappedTypeFromId<kElementType>;
       const auto [raw_result] =
           intrinsics::SimdMaskToBitMask<ElementType>(std::get<0>(intrinsic(index)));
-      if constexpr (sizeof(ElementType) < sizeof(Int32)) {
+      if constexpr (SizeOf(kElementType) < sizeof(Int32)) {
         bitmask_result.Set(raw_result, index);
       } else {
         constexpr UInt32 kElemNum =