[Berberis][Intrinsics] Add intrinsics for interpreter part3
Instead of bring up text assembler, macro assembler to get assembler
intrinsics, writing necessary functions for interpreter directly.
This CL adds Rev8, Rol, Ror, Sext, Sh1/2/3add(uw), and Zext.
Bug: b/358425719
Test: test berberis_interpreter_riscv64_to_arm64_insn_tests_static
Change-Id: I5c709b5c93ebde827b27888c609841c3249e29e0
diff --git a/interpreter/riscv64/interpreter_arm64_test.cc b/interpreter/riscv64/interpreter_arm64_test.cc
index d811998..5d5aff5 100644
--- a/interpreter/riscv64/interpreter_arm64_test.cc
+++ b/interpreter/riscv64/interpreter_arm64_test.cc
@@ -280,6 +280,18 @@
bit_cast<uint64_t>(int64_t{-10})}});
// Minu
TestOp(0x0a3150b3, {{50, 1, 1}});
+ // Ror
+ TestOp(0x603150b3, {{0xf000'0000'0000'000fULL, 4, 0xff00'0000'0000'0000ULL}});
+ TestOp(0x603150b3, {{0xf000'0000'0000'000fULL, 8, 0x0ff0'0000'0000'0000ULL}});
+ // // Rol
+ TestOp(0x603110b3, {{0xff00'0000'0000'0000ULL, 4, 0xf000'0000'0000'000fULL}});
+ TestOp(0x603110b3, {{0x000f'ff00'0000'000fULL, 8, 0x0fff'0000'0000'0f00ULL}});
+ // Sh1add
+ TestOp(0x203120b3, {{0x0008'0000'0000'0001, 0x1001'0001'0000'0000ULL, 0x1011'0001'0000'0002ULL}});
+ // Sh2add
+ TestOp(0x203140b3, {{0x0008'0000'0000'0001, 0x0001'0001'0000'0000ULL, 0x0021'0001'0000'0004ULL}});
+ // Sh3add
+ TestOp(0x203160b3, {{0x0008'0000'0000'0001, 0x1001'0011'0000'0000ULL, 0x1041'0011'0000'0008ULL}});
// Bclr
TestOp(0x483110b3, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
TestOp(0x483110b3, {{0b1000'0001'0000'0001ULL, 8, 0b1000'0000'0000'0001ULL}});
@@ -326,6 +338,17 @@
TestOpImm(0x40015093, {{0xf000'0000'0000'0000ULL, 12, 0xffff'0000'0000'0000ULL}});
// Rori
TestOpImm(0x60015093, {{0xf000'0000'0000'000fULL, 4, 0xff00'0000'0000'0000ULL}});
+ // Rev8
+ TestOpImm(0x6b815093, {{0x0000'0000'0000'000fULL, 0, 0x0f00'0000'0000'0000ULL}});
+ TestOpImm(0x6b815093, {{0xf000'0000'0000'0000ULL, 0, 0x0000'0000'0000'00f0ULL}});
+ TestOpImm(0x6b815093, {{0x00f0'0000'0000'0000ULL, 0, 0x0000'0000'0000'f000ULL}});
+ TestOpImm(0x6b815093, {{0x0000'000f'0000'0000ULL, 0, 0x0000'0000'0f00'0000ULL}});
+
+ // Sext.b
+ TestOpImm(0x60411093, {{0b1111'1110, 0, 0xffff'ffff'ffff'fffe}}); // -2
+ // Sext.h
+ TestOpImm(0x60511093, {{0b1111'1110, 0, 0xfe}});
+ TestOpImm(0x60511093, {{0b1111'1111'1111'1110, 0, 0xffff'ffff'ffff'fffe}});
// Bclri
TestOpImm(0x48011093, {{0b1000'0001'0000'0001ULL, 0, 0b1000'0001'0000'0000ULL}});
TestOpImm(0x48011093, {{0b1000'0001'0000'0001ULL, 8, 0b1000'0000'0000'0001ULL}});
diff --git a/intrinsics/gen_intrinsics.py b/intrinsics/gen_intrinsics.py
index 36a1099..89a1795 100755
--- a/intrinsics/gen_intrinsics.py
+++ b/intrinsics/gen_intrinsics.py
@@ -408,7 +408,8 @@
# TODO(b/363057506): Add float support and clean up the logic here.
arm64_allowlist = ['AmoAdd', 'AmoAnd', 'AmoMax', 'AmoMin', 'AmoOr', 'AmoSwap', 'AmoXor', 'Bclr',
'Bclri', 'Bext', 'Bexti', 'Binv', 'Binvi', 'Bset', 'Bseti', 'Div', 'Max',
- 'Min', 'Rem']
+ 'Min', 'Rem', 'Rev8', 'Rol', 'Ror', 'Sext', 'Sh1add', 'Sh1adduw', 'Sh2add',
+ 'Sh2adduw', 'Sh3add', 'Sh3adduw', 'Zext']
if (option == 'arm64') and (name not in arm64_allowlist):
_get_placeholder_return_stmt(intr, f)
else:
diff --git a/intrinsics/riscv64_to_arm64/include/berberis/intrinsics/intrinsics.h b/intrinsics/riscv64_to_arm64/include/berberis/intrinsics/intrinsics.h
index d51960f..22cf62d 100644
--- a/intrinsics/riscv64_to_arm64/include/berberis/intrinsics/intrinsics.h
+++ b/intrinsics/riscv64_to_arm64/include/berberis/intrinsics/intrinsics.h
@@ -91,6 +91,73 @@
return {in1 % in2};
};
+inline std::tuple<uint64_t> Rev8(uint64_t in1) {
+ return {__builtin_bswap64(in1)};
+};
+
+template <typename T, enum PreferredIntrinsicsImplementation>
+inline std::tuple<T> Rol(T in1, int8_t in2) {
+ static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>);
+ // We need unsigned shifts, so that shifted-in bits are filled with zeroes.
+ if (std::is_same_v<T, int32_t>) {
+ return {(static_cast<uint32_t>(in1) << (in2 % 32)) |
+ (static_cast<uint32_t>(in1) >> (32 - (in2 % 32)))};
+ } else {
+ return {(static_cast<uint64_t>(in1) << (in2 % 64)) |
+ (static_cast<uint64_t>(in1) >> (64 - (in2 % 64)))};
+ }
+};
+
+template <typename T, enum PreferredIntrinsicsImplementation>
+inline std::tuple<T> Ror(T in1, int8_t in2) {
+ static_assert(std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>);
+ // We need unsigned shifts, so that shifted-in bits are filled with zeroes.
+ if (std::is_same_v<T, int32_t>) {
+ return {(static_cast<uint32_t>(in1) >> (in2 % 32)) |
+ (static_cast<uint32_t>(in1) << (32 - (in2 % 32)))};
+ } else {
+ return {(static_cast<uint64_t>(in1) >> (in2 % 64)) |
+ (static_cast<uint64_t>(in1) << (64 - (in2 % 64)))};
+ }
+};
+
+template <typename T, enum PreferredIntrinsicsImplementation>
+inline std::tuple<int64_t> Sext(T in1) {
+ static_assert(std::is_same_v<T, int8_t> || std::is_same_v<T, int16_t>);
+ return {static_cast<int64_t>(in1)};
+};
+
+inline std::tuple<uint64_t> Sh1add(uint64_t in1, uint64_t in2) {
+ return {uint64_t{in1} * 2 + in2};
+};
+
+inline std::tuple<uint64_t> Sh1adduw(uint32_t in1, uint64_t in2) {
+ return Sh1add(uint64_t{in1}, in2);
+};
+
+inline std::tuple<uint64_t> Sh2add(uint64_t in1, uint64_t in2) {
+ return {uint64_t{in1} * 4 + in2};
+};
+
+inline std::tuple<uint64_t> Sh2adduw(uint32_t in1, uint64_t in2) {
+ return Sh2add(uint64_t{in1}, in2);
+};
+
+inline std::tuple<uint64_t> Sh3add(uint64_t in1, uint64_t in2) {
+ return {uint64_t{in1} * 8 + in2};
+};
+
+inline std::tuple<uint64_t> Sh3adduw(uint32_t in1, uint64_t in2) {
+ return Sh3add(uint64_t{in1}, in2);
+};
+
+template <typename T, enum PreferredIntrinsicsImplementation>
+inline std::tuple<uint64_t> Zext(T in1) {
+ static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint16_t> ||
+ std::is_same_v<T, uint8_t>);
+ return {static_cast<uint64_t>(in1)};
+};
+
} // namespace intrinsics
} // namespace berberis