Vwadd[u].wx instructions.
Test: berberis_host_tests
Change-Id: Id2754314d9124bf7285f9e86683584a86cb48fb1
diff --git a/decoder/include/berberis/decoder/riscv64/decoder.h b/decoder/include/berberis/decoder/riscv64/decoder.h
index 5c77347..3ad40ea 100644
--- a/decoder/include/berberis/decoder/riscv64/decoder.h
+++ b/decoder/include/berberis/decoder/riscv64/decoder.h
@@ -489,6 +489,8 @@
kVnmsubvx = 0b101011,
kVmaccvx = 0b101101,
kVnmsacvx = 0b101111,
+ kVwadduwx = 0b110100,
+ kVwaddwx = 0b110101,
};
enum class VSUmOpOpcode : uint8_t {
diff --git a/interpreter/riscv64/interpreter.h b/interpreter/riscv64/interpreter.h
index f04c681..1fc5750 100644
--- a/interpreter/riscv64/interpreter.h
+++ b/interpreter/riscv64/interpreter.h
@@ -2342,6 +2342,22 @@
case Decoder::VOpMVxOpcode::kVmulhvx:
return OpVectorvx<intrinsics::Vmulhvx<SignedType>, SignedType, vlmul, vta, vma>(
args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2));
+ case Decoder::VOpMVxOpcode::kVwaddwx:
+ if constexpr (sizeof(ElementType) == sizeof(Int64) ||
+ vlmul == VectorRegisterGroupMultiplier::k8registers) {
+ return Unimplemented();
+ } else {
+ return OpVectorWidenwx<intrinsics::Vwaddwx<SignedType>, SignedType, vlmul, vta, vma>(
+ args.dst, args.src1, MaybeTruncateTo<SignedType>(arg2));
+ }
+ case Decoder::VOpMVxOpcode::kVwadduwx:
+ if constexpr (sizeof(ElementType) == sizeof(Int64) ||
+ vlmul == VectorRegisterGroupMultiplier::k8registers) {
+ return Unimplemented();
+ } else {
+ return OpVectorWidenwx<intrinsics::Vwaddwx<UnsignedType>, UnsignedType, vlmul, vta, vma>(
+ args.dst, args.src1, MaybeTruncateTo<UnsignedType>(arg2));
+ }
default:
Unimplemented();
}
@@ -2935,6 +2951,22 @@
template <auto Intrinsic,
typename ElementType,
+ VectorRegisterGroupMultiplier vlmul,
+ TailProcessing vta,
+ auto vma,
+ CsrName... kExtraCsrs>
+ void OpVectorWidenwx(uint8_t dst, uint8_t src1, ElementType arg2) {
+ return OpVectorWiden<Intrinsic,
+ ElementType,
+ NumRegistersInvolvedForWideOperand(vlmul),
+ NumberOfRegistersInvolved(vlmul),
+ vta,
+ vma,
+ kExtraCsrs...>(dst, WideVec{src1}, arg2);
+ }
+
+ template <auto Intrinsic,
+ typename ElementType,
size_t kDestRegistersInvolved,
size_t kRegistersInvolved,
TailProcessing vta,
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 7e9f93c..b557205 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -9729,6 +9729,64 @@
kVectorCalculationsSource);
}
+TEST_F(Riscv64InterpreterTest, TestVwaddwx) {
+ TestWideningVectorInstruction(0xd500e457, // vwadd.wx v8,v16,ra,v0.t
+ {{0x80aa, 0x82ac, 0x84ae, 0x86b0, 0x88b2, 0x8ab4, 0x8cb6, 0x8eb8},
+ {0x90ba, 0x92bc, 0x94be, 0x96c0, 0x98c2, 0x9ac4, 0x9cc6, 0x9ec8},
+ {0xa0ca, 0xa2cc, 0xa4ce, 0xa6d0, 0xa8d2, 0xaad4, 0xacd6, 0xaed8},
+ {0xb0da, 0xb2dc, 0xb4de, 0xb6e0, 0xb8e2, 0xbae4, 0xbce6, 0xbee8},
+ {0xc0ea, 0xc2ec, 0xc4ee, 0xc6f0, 0xc8f2, 0xcaf4, 0xccf6, 0xcef8},
+ {0xd0fa, 0xd2fc, 0xd4fe, 0xd700, 0xd902, 0xdb04, 0xdd06, 0xdf08},
+ {0xe10a, 0xe30c, 0xe50e, 0xe710, 0xe912, 0xeb14, 0xed16, 0xef18},
+ {0xf11a, 0xf31c, 0xf51e, 0xf720, 0xf922, 0xfb24, 0xfd26, 0xff28}},
+ {{0x8302'2baa, 0x8706'2fae, 0x8b0a'33b2, 0x8f0e'37b6},
+ {0x9312'3bba, 0x9716'3fbe, 0x9b1a'43c2, 0x9f1e'47c6},
+ {0xa322'4bca, 0xa726'4fce, 0xab2a'53d2, 0xaf2e'57d6},
+ {0xb332'5bda, 0xb736'5fde, 0xbb3a'63e2, 0xbf3e'67e6},
+ {0xc342'6bea, 0xc746'6fee, 0xcb4a'73f2, 0xcf4e'77f6},
+ {0xd352'7bfa, 0xd756'7ffe, 0xdb5a'8402, 0xdf5e'8806},
+ {0xe362'8c0a, 0xe766'900e, 0xeb6a'9412, 0xef6e'9816},
+ {0xf372'9c1a, 0xf776'a01e, 0xfb7a'a422, 0xff7e'a826}},
+ {{0x8706'8504'2dad'2baa, 0x8f0e'8d0c'35b5'33b2},
+ {0x9716'9514'3dbd'3bba, 0x9f1e'9d1c'45c5'43c2},
+ {0xa726'a524'4dcd'4bca, 0xaf2e'ad2c'55d5'53d2},
+ {0xb736'b534'5ddd'5bda, 0xbf3e'bd3c'65e5'63e2},
+ {0xc746'c544'6ded'6bea, 0xcf4e'cd4c'75f5'73f2},
+ {0xd756'd554'7dfd'7bfa, 0xdf5e'dd5c'8605'8402},
+ {0xe766'e564'8e0d'8c0a, 0xef6e'ed6c'9615'9412},
+ {0xf776'f574'9e1d'9c1a, 0xff7e'fd7c'a625'a422}},
+ kVectorCalculationsSource);
+}
+
+TEST_F(Riscv64InterpreterTest, TestVwadduwx) {
+ TestWideningVectorInstruction(0xd100e457, // vwaddu.wx v8,v16,ra,v0.t
+ {{0x81aa, 0x83ac, 0x85ae, 0x87b0, 0x89b2, 0x8bb4, 0x8db6, 0x8fb8},
+ {0x91ba, 0x93bc, 0x95be, 0x97c0, 0x99c2, 0x9bc4, 0x9dc6, 0x9fc8},
+ {0xa1ca, 0xa3cc, 0xa5ce, 0xa7d0, 0xa9d2, 0xabd4, 0xadd6, 0xafd8},
+ {0xb1da, 0xb3dc, 0xb5de, 0xb7e0, 0xb9e2, 0xbbe4, 0xbde6, 0xbfe8},
+ {0xc1ea, 0xc3ec, 0xc5ee, 0xc7f0, 0xc9f2, 0xcbf4, 0xcdf6, 0xcff8},
+ {0xd1fa, 0xd3fc, 0xd5fe, 0xd800, 0xda02, 0xdc04, 0xde06, 0xe008},
+ {0xe20a, 0xe40c, 0xe60e, 0xe810, 0xea12, 0xec14, 0xee16, 0xf018},
+ {0xf21a, 0xf41c, 0xf61e, 0xf820, 0xfa22, 0xfc24, 0xfe26, 0x0028}},
+ {{0x8303'2baa, 0x8707'2fae, 0x8b0b'33b2, 0x8f0f'37b6},
+ {0x9313'3bba, 0x9717'3fbe, 0x9b1b'43c2, 0x9f1f'47c6},
+ {0xa323'4bca, 0xa727'4fce, 0xab2b'53d2, 0xaf2f'57d6},
+ {0xb333'5bda, 0xb737'5fde, 0xbb3b'63e2, 0xbf3f'67e6},
+ {0xc343'6bea, 0xc747'6fee, 0xcb4b'73f2, 0xcf4f'77f6},
+ {0xd353'7bfa, 0xd757'7ffe, 0xdb5b'8402, 0xdf5f'8806},
+ {0xe363'8c0a, 0xe767'900e, 0xeb6b'9412, 0xef6f'9816},
+ {0xf373'9c1a, 0xf777'a01e, 0xfb7b'a422, 0xff7f'a826}},
+ {{0x8706'8505'2dad'2baa, 0x8f0e'8d0d'35b5'33b2},
+ {0x9716'9515'3dbd'3bba, 0x9f1e'9d1d'45c5'43c2},
+ {0xa726'a525'4dcd'4bca, 0xaf2e'ad2d'55d5'53d2},
+ {0xb736'b535'5ddd'5bda, 0xbf3e'bd3d'65e5'63e2},
+ {0xc746'c545'6ded'6bea, 0xcf4e'cd4d'75f5'73f2},
+ {0xd756'd555'7dfd'7bfa, 0xdf5e'dd5d'8605'8402},
+ {0xe766'e565'8e0d'8c0a, 0xef6e'ed6d'9615'9412},
+ {0xf776'f575'9e1d'9c1a, 0xff7e'fd7d'a625'a422}},
+ kVectorCalculationsSource);
+}
+
TEST_F(Riscv64InterpreterTest, TestVwsubu) {
TestWideningVectorInstruction(0xc90c2457, // vwsubu.vv v8, v16, v24, v0.t
{{0x0000, 0xffef, 0xfffe, 0xffed, 0xfffb, 0xffeb, 0xfffa, 0xffe9},
diff --git a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
index da8d993..fb6fbd5 100644
--- a/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
+++ b/intrinsics/riscv64/include/berberis/intrinsics/riscv64/vector_intrinsics.h
@@ -446,6 +446,24 @@
return result;
}
+// SEW = 2*SEW op SEW
+// TODO(b/260725458): Pass lambda as template argument after C++20 would become available.
+template <typename ElementType, typename Lambda, typename... ParameterType>
+inline std::tuple<SIMD128Register> VectorArithmeticWidenwv(Lambda lambda,
+ ParameterType... parameters) {
+ static_assert(((std::is_same_v<ParameterType, SIMD128Register> ||
+ std::is_same_v<ParameterType, ElementType>)&&...));
+ SIMD128Register result;
+ constexpr int kElementsCount = static_cast<int>(8 / sizeof(ElementType));
+ for (int index = 0; index < kElementsCount; ++index) {
+ auto [src1, src2] = std::tuple{parameters...};
+ result.Set(lambda(VectorElement<WideType<ElementType>>(src1, index),
+ Widen(VectorElement<ElementType>(src2, index))),
+ index);
+ }
+ return result;
+}
+
template <typename ElementType>
SIMD128Register VectorExtend(SIMD128Register src) {
SIMD128Register result;
@@ -707,6 +725,10 @@
DEFINE_W_ARITHMETIC_INTRINSIC(Vw##name##vv, Widenvv, return ({ __VA_ARGS__; }); \
, (SIMD128Register src1, SIMD128Register src2), (src1, src2))
+#define DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WX(name, ...) \
+ DEFINE_W_ARITHMETIC_INTRINSIC(Vw##name##wx, Widenwv, return ({ __VA_ARGS__; }); \
+ , (SIMD128Register src1, ElementType src2), (src1, src2))
+
DEFINE_1OP_ARITHMETIC_INTRINSIC_V(copy, auto [arg] = std::tuple{args...}; arg)
DEFINE_1OP_ARITHMETIC_INTRINSIC_X(copy, auto [arg] = std::tuple{args...}; arg)
DEFINE_2OP_ARITHMETIC_INTRINSIC_VV(add, (args + ...))
@@ -841,6 +863,7 @@
DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV(mulsu, auto [arg1, arg2] = std::tuple{args...};
(BitCastToUnsigned(Widen(BitCastToSigned(Narrow(arg2))))) *
(Widen(BitCastToUnsigned(Narrow(arg1)))))
+DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WX(add, (args + ...))
DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WV(sr, auto [arg1, arg2] = std::tuple{args...};
(arg1 >> arg2))
@@ -861,6 +884,7 @@
#undef DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WV
#undef DEFINE_2OP_NARROW_ARITHMETIC_INTRINSIC_WX
#undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_VV
+#undef DEFINE_2OP_WIDEN_ARITHMETIC_INTRINSIC_WX
} // namespace berberis::intrinsics