Remove DEFINE_2OP_ARITHMETIC_INTRINSIC_VS intrinsics.

As a result of refactoring when masking was moved partialy to separate
intrinsics and partially to the interpreter we ended up with pretty
bizzare intrinsics that take two scalars and produce the desires result
in all elements of vector. And then interpreter takes one element from
that result.

Such intrinsics technically work but are entirely useless and to design
sensible handling of reduction instructions we first need to run real
apps on the emulator and collect statistics: what kinds of reductions
are in actual use, do they use masking or not, etc.

Test: berberis_all

Change-Id: I34929a3f38ca06e4dad3683e3052c236dc673616
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index f3e9337..267ee3b 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -1790,29 +1790,53 @@
     }
     switch (args.opcode) {
       case Decoder::VOpMVvOpcode::kVredsumvs:
-        return OpVectorvs<intrinsics::Vredsumvs<ElementType>, ElementType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{(args + ...)}; },
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredandvs:
-        return OpVectorvs<intrinsics::Vredandvs<ElementType>, ElementType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{(args & ...)}; },
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredorvs:
-        return OpVectorvs<intrinsics::Vredorvs<ElementType>, ElementType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{(args | ...)}; },
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredxorvs:
-        return OpVectorvs<intrinsics::Vredxorvs<ElementType>, ElementType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{(args ^ ...)}; },
+                          ElementType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredminuvs:
-        return OpVectorvs<intrinsics::Vredminvs<UnsignedType>, UnsignedType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{std::min(args...)}; },
+                          UnsignedType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredminvs:
-        return OpVectorvs<intrinsics::Vredminvs<SignedType>, SignedType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{std::min(args...)}; },
+                          SignedType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredmaxuvs:
-        return OpVectorvs<intrinsics::Vredmaxvs<UnsignedType>, UnsignedType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{std::max(args...)}; },
+                          UnsignedType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVredmaxvs:
-        return OpVectorvs<intrinsics::Vredmaxvs<SignedType>, SignedType, vlmul, vta, vma>(
-            args.dst, args.src1, args.src2);
+        return OpVectorvs<[](auto... args) { return std::tuple{std::max(args...)}; },
+                          SignedType,
+                          vlmul,
+                          vta,
+                          vma>(args.dst, args.src1, args.src2);
       case Decoder::VOpMVvOpcode::kVWXUnary0:
         switch (args.vwxunary0_opcode) {
           case Decoder::VWXUnary0Opcode::kVmvxs:
@@ -2682,7 +2706,6 @@
     if (vl == 0) [[unlikely]] {
       return;
     }
-    SIMD128Register result;
     auto mask = GetMaskForVectorOperations<vma>();
     ElementType arg1 = SIMD128Register{state_->cpu.v[src1]}.Get<ElementType>(0);
     for (size_t index = 0; index < kRegistersInvolved; ++index) {
@@ -2698,11 +2721,10 @@
             continue;
           }
         }
-        result = std::get<0>(Intrinsic(arg1, arg2.Get<ElementType>(element_index)));
-        arg1 = result.Get<ElementType>(0);
+        arg1 = std::get<0>(Intrinsic(arg1, arg2.Get<ElementType>(element_index)));
       }
     }
-    result.Set(state_->cpu.v[dst]);
+    SIMD128Register result{state_->cpu.v[dst]};
     result.Set(arg1, 0);
     result = std::get<0>(intrinsics::VectorMasking<ElementType, vta>(result, result, 0, 1));
     state_->cpu.v[dst] = result.Get<__uint128_t>();
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index 67c35b7..bc98d0c 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -539,10 +539,6 @@
                               (),                                                                 \
                               (src1, src2, src3))
 
-#define DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(name, ...)                 \
-  DEFINE_ARITHMETIC_INTRINSIC(V##name##vs, return ({ __VA_ARGS__; }); \
-                              , (ElementType src1, ElementType src2), (), (src1, src2))
-
 #define DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(name, ...)                 \
   DEFINE_ARITHMETIC_INTRINSIC(V##name##vx, return ({ __VA_ARGS__; }); \
                               , (SIMD128Register src1, ElementType src2), (), (src1, src2))
@@ -696,12 +692,6 @@
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(min, std::min(args...))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(max, std::max(args...))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(max, std::max(args...))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redsum, (args + ...))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redand, (args & ...))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redor, (args | ...))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redxor, (args ^ ...))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redmin, std::min(args...))
-DEFINE_2OP_ARITHMETIC_INTRINSIC_VS(redmax, std::max(args...))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(mul, auto [arg1, arg2] = std::tuple{args...}; (arg2 * arg1))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VX(mul, auto [arg1, arg2] = std::tuple{args...}; (arg2 * arg1))
 DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(mulh, auto [arg1, arg2] = std::tuple{args...};