[intrinsics] Emit mem and FCsr full support
This also enables the FCsr tests now that the intrinsics are turned on.
Bug: 291126259
Test: mm and berberis_host_tests
(cherry picked from https://googleplex-android-review.googlesource.com/q/commit:728d170ac92a1dd3a5b14e249e191b75e8add1f9)
Merged-In: I194d5897e4ef57f655d2957a6b01974837eb277c
Change-Id: I194d5897e4ef57f655d2957a6b01974837eb277c
diff --git a/backend/Android.bp b/backend/Android.bp
index f05d744..5481d72 100644
--- a/backend/Android.bp
+++ b/backend/Android.bp
@@ -149,6 +149,7 @@
name: "libberberis_backend_headers_riscv64_to_x86_64",
defaults: ["berberis_defaults_64"],
host_supported: true,
+ export_include_dirs: ["riscv64_to_x86_64/include"],
header_libs: [
"libberberis_backend_headers",
"libberberis_guest_state_riscv64_headers",
diff --git a/backend/include/berberis/backend/x86_64/machine_ir.h b/backend/include/berberis/backend/x86_64/machine_ir.h
index d8d4a67..259c9fd 100644
--- a/backend/include/berberis/backend/x86_64/machine_ir.h
+++ b/backend/include/berberis/backend/x86_64/machine_ir.h
@@ -43,6 +43,7 @@
kMachineOpPseudoJump,
kMachineOpPseudoReadFlags,
kMachineOpPseudoWriteFlags,
+#include "berberis/backend/x86_64/machine_opcode_guest-inl.h"
#include "machine_opcode_x86_64-inl.h" // NOLINT generated file!
};
diff --git a/backend/riscv64_to_x86_64/include/berberis/backend/x86_64/machine_opcode_guest-inl.h b/backend/riscv64_to_x86_64/include/berberis/backend/x86_64/machine_opcode_guest-inl.h
new file mode 100644
index 0000000..7707972
--- /dev/null
+++ b/backend/riscv64_to_x86_64/include/berberis/backend/x86_64/machine_opcode_guest-inl.h
@@ -0,0 +1,7 @@
+
+kMachineOpMacroFeGetExceptionsTranslateMemBaseDispReg,
+ kMachineOpMacroFeSetExceptionsAndRoundImmTranslateMemBaseDispImm,
+ kMachineOpMacroFeSetExceptionsAndRoundTranslateRegMemBaseDispRegReg,
+ kMachineOpMacroFeSetExceptionsImmTranslateMemBaseDispImm,
+ kMachineOpMacroFeSetExceptionsTranslateRegMemBaseDispReg,
+ kMachineOpMacroFeSetRoundImmTranslateMemBaseDispMemBaseDispImm,
diff --git a/heavy_optimizer/riscv64/frontend.h b/heavy_optimizer/riscv64/frontend.h
index 7294bec..54cbe09 100644
--- a/heavy_optimizer/riscv64/frontend.h
+++ b/heavy_optimizer/riscv64/frontend.h
@@ -499,9 +499,8 @@
HeavyOptimizerFrontend::GetCsr<CsrName::kFCsr>() {
auto csr_reg = AllocTempReg();
auto tmp = AllocTempReg();
- bool inline_successful = TryInlineIntrinsicForHeavyOptimizer<&intrinsics::FeGetExceptions>(
+ InlineIntrinsicForHeavyOptimizer<&intrinsics::FeGetExceptions>(
&builder_, tmp, GetFlagsRegister());
- CHECK(inline_successful);
Gen<x86_64::MovzxbqRegMemBaseDisp>(
csr_reg, x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>);
Gen<x86_64::ShlbRegImm>(csr_reg, 5, GetFlagsRegister());
@@ -540,52 +539,38 @@
}
template <>
-inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFCsr>(uint8_t /* imm */) {
- Unimplemented();
- // TODO(b/291126436) Figure out how to pass Mem arg to FeSetExceptionsAndRoundImmTranslate.
- // // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five
- // bits.
- // // But these instruction don't pass their immediate-specified argument into `SetCsr`, they
- // combine
- // // it with register first. Fixing that can only be done by changing code in the semantics
- // player.
- // //
- // // But Csrrwi may clear it. And we actually may only arrive here from Csrrwi.
- // // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good
- // idea
- // // to rely on that: it's very subtle and it only affects code generation speed.
- // Gen<x86_64::MovbMemBaseDispImm>(x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>,
- // static_cast<int8_t>(imm >> 5)); bool successful =
- // TryInlineIntrinsicForHeavyOptimizer<&intrinsics::FeSetExceptionsAndRoundImmTranslate>(
- // &builder_,
- // GetFlagsRegister(),
- // x86_64::kMachineRegRBP,
- // static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area)),
- // imm);
- // CHECK(successful);
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFCsr>(uint8_t imm) {
+ // Note: instructions Csrrci or Csrrsi couldn't affect Frm because immediate only has five bits.
+ // But these instruction don't pass their immediate-specified argument into `SetCsr`, they combine
+ // it with register first. Fixing that can only be done by changing code in the semantics player.
+ //
+ // But Csrrwi may clear it. And we actually may only arrive here from Csrrwi.
+ // Thus, technically, we know that imm >> 5 is always zero, but it doesn't look like a good idea
+ // to rely on that: it's very subtle and it only affects code generation speed.
+ Gen<x86_64::MovbMemBaseDispImm>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>, static_cast<int8_t>(imm >> 5));
+ InlineIntrinsicForHeavyOptimizerVoid<&intrinsics::FeSetExceptionsAndRoundImm>(
+ &builder_, GetFlagsRegister(), imm);
}
template <>
-inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFCsr>(Register /* arg */) {
- Unimplemented();
- // TODO(b/291126436) Figure out how to pass Mem arg to FeSetExceptionsAndRoundTranslate.
- // auto tmp1 = AllocTempReg();
- // auto tmp2 = AllocTempReg();
- // Gen<PseudoCopy>(tmp1, arg, 1);
- // Gen<x86_64::AndlRegImm>(tmp1, 0b1'1111, GetFlagsRegister());
- // Gen<x86_64::ShldlRegRegImm>(tmp2, arg, int8_t{32 - 5}, GetFlagsRegister());
- // Gen<x86_64::AndbRegImm>(tmp2, kCsrMask<CsrName::kFrm>, GetFlagsRegister());
- // Gen<x86_64::MovbMemBaseDispReg>(x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>,
- // tmp2);
- // bool successful =
- // TryInlineIntrinsicForHeavyOptimizer<&intrinsics::FeSetExceptionsAndRoundTranslate>(
- // &builder_,
- // GetFlagsRegister(),
- // tmp1,
- // x86_64::kMachineRegRBP,
- // static_cast<int>(offsetof(ThreadState, intrinsics_scratch_area)),
- // tmp1);
- // CHECK(successful);
+inline void HeavyOptimizerFrontend::SetCsr<CsrName::kFCsr>(Register arg) {
+ // Check size to be sure we can use Andb and Movb below.
+ static_assert(sizeof(kCsrMask<CsrName::kFrm>) == 1);
+
+ auto exceptions = AllocTempReg();
+ auto rounding_mode = AllocTempReg();
+ Gen<PseudoCopy>(exceptions, arg, 1);
+ Gen<x86_64::AndlRegImm>(exceptions, 0b1'1111, GetFlagsRegister());
+ // We don't care about the data in rounding_mode because we will shift in the
+ // data we need.
+ Gen<PseudoDefReg>(rounding_mode);
+ Gen<x86_64::ShldlRegRegImm>(rounding_mode, arg, int8_t{32 - 5}, GetFlagsRegister());
+ Gen<x86_64::AndbRegImm>(rounding_mode, kCsrMask<CsrName::kFrm>, GetFlagsRegister());
+ Gen<x86_64::MovbMemBaseDispReg>(
+ x86_64::kMachineRegRBP, kCsrFieldOffset<CsrName::kFrm>, rounding_mode);
+ InlineIntrinsicForHeavyOptimizerVoid<&intrinsics::FeSetExceptionsAndRound>(
+ &builder_, GetFlagsRegister(), exceptions, rounding_mode);
}
template <>
diff --git a/heavy_optimizer/riscv64/inline_intrinsic.h b/heavy_optimizer/riscv64/inline_intrinsic.h
index 443a1f7..a869f6f 100644
--- a/heavy_optimizer/riscv64/inline_intrinsic.h
+++ b/heavy_optimizer/riscv64/inline_intrinsic.h
@@ -29,6 +29,7 @@
#include "berberis/backend/x86_64/machine_insn_intrinsics.h"
#include "berberis/backend/x86_64/machine_ir.h"
#include "berberis/backend/x86_64/machine_ir_builder.h"
+#include "berberis/base/checks.h"
#include "berberis/base/config.h"
#include "berberis/base/dependent_false.h"
#include "berberis/intrinsics/common_to_x86/intrinsics_bindings.h"
@@ -490,6 +491,16 @@
builder, result, flag_register, args...);
}
+template <auto kFunction, typename ResType, typename FlagRegister, typename... ArgType>
+void InlineIntrinsicForHeavyOptimizer(x86_64::MachineIRBuilder* builder,
+ ResType result,
+ FlagRegister flag_register,
+ ArgType... args) {
+ bool success = TryInlineIntrinsicForHeavyOptimizer<kFunction, ResType, FlagRegister, ArgType...>(
+ builder, result, flag_register, args...);
+ CHECK(success);
+}
+
template <auto kFunction, typename FlagRegister, typename... ArgType>
bool TryInlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
FlagRegister flag_register,
@@ -505,6 +516,15 @@
builder, std::monostate{}, flag_register, args...);
}
+template <auto kFunction, typename FlagRegister, typename... ArgType>
+void InlineIntrinsicForHeavyOptimizerVoid(x86_64::MachineIRBuilder* builder,
+ FlagRegister flag_register,
+ ArgType... args) {
+ bool success = TryInlineIntrinsicForHeavyOptimizerVoid<kFunction, FlagRegister, ArgType...>(
+ builder, flag_register, args...);
+ CHECK(success);
+}
+
} // namespace berberis
#endif // BERBERIS_HEAVY_OPTIMIZER_RISCV64_INLINE_INTRINSIC_H_
diff --git a/intrinsics/gen_intrinsics.py b/intrinsics/gen_intrinsics.py
index 2d56a66..74d2fed 100755
--- a/intrinsics/gen_intrinsics.py
+++ b/intrinsics/gen_intrinsics.py
@@ -785,7 +785,9 @@
def _gen_opcode_generator(asm, opcode_generators):
name = asm['name']
- opcode = 'Undefined' if any([arg.get('class').startswith("Mem") and arg.get('usage') == 'def_early_clobber' for arg in asm['args']]) else name
+ num_mem_args = sum(1 for arg in asm['args'] if arg.get('class').startswith("Mem") and arg.get('usage') == 'def_early_clobber')
+ opcode = 'Undefined' if num_mem_args > 2 else (asm_defs.get_mem_macro_name(asm, '').replace("Mem", "MemBaseDisp")) if num_mem_args > 0 else name
+
if name not in opcode_generators:
opcode_generators[name] = True
yield """
diff --git a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
index f3196ee..cb5c7fe 100644
--- a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
+++ b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
@@ -1126,8 +1126,6 @@
TestFrm(0x0020f173, 0, 0);
}
-#if defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
-
TEST_F(TESTSUITE, FCsrRegister) {
fenv_t saved_environment;
EXPECT_EQ(fegetenv(&saved_environment), 0);
@@ -1171,8 +1169,6 @@
EXPECT_EQ(fesetenv(&saved_environment), 0);
}
-#endif // defined(TESTING_INTERPRETER) || defined(TESTING_LITE_TRANSLATOR)
-
TEST_F(TESTSUITE, FFlagsRegister) {
fenv_t saved_environment;
EXPECT_EQ(fegetenv(&saved_environment), 0);