[heavy_optimizer/riscv64] Fix typo in lr/sc

Address offset calculation had a typo. Was calculating for 128-bit
reservation value rather than 64-bit. Moved lr/sc unit tests into shared
insns tests. Disabled for lite-translator.

Bug: 291126436
Test: run berberis_hello_world_static in emulator
(cherry picked from https://googleplex-android-review.googlesource.com/q/commit:6c8f342a775233b7f5306946d3925f2a851fe0da)

Merged-In: I08245c9cf83811a2ab5e62cfcd5c006e5e8652af
Change-Id: I08245c9cf83811a2ab5e62cfcd5c006e5e8652af
diff --git a/heavy_optimizer/riscv64/frontend.cc b/heavy_optimizer/riscv64/frontend.cc
index 69d7d21..3761e0a 100644
--- a/heavy_optimizer/riscv64/frontend.cc
+++ b/heavy_optimizer/riscv64/frontend.cc
@@ -877,12 +877,11 @@
                           {x86_64::kMachineRegRBP, x86_64::CallImm::kIntRegType},
                       }});
 
-  // Load monitor value and store it in CPUState.
-  auto monitor = AllocTempSimdReg();
-  MachineReg reservation_reg = monitor.machine_reg();
-  Gen<x86_64::MovqRegMemBaseDisp>(reservation_reg, aligned_addr, 0);
+  // Load reservation value and store it in CPUState.
+  auto reservation = AllocTempReg();
+  Gen<x86_64::MovqRegMemBaseDisp>(reservation, aligned_addr, 0);
   int32_t value_offset = GetThreadStateReservationValueOffset();
-  Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, value_offset, reservation_reg);
+  Gen<x86_64::MovqMemBaseDispReg>(x86_64::kMachineRegRBP, value_offset, reservation);
 }
 
 Register HeavyOptimizerFrontend::MemoryRegionReservationExchange(Register aligned_addr,
diff --git a/heavy_optimizer/riscv64/frontend.h b/heavy_optimizer/riscv64/frontend.h
index 9dbbdfc..577bdfd 100644
--- a/heavy_optimizer/riscv64/frontend.h
+++ b/heavy_optimizer/riscv64/frontend.h
@@ -172,7 +172,7 @@
     Register aligned_addr = AllocTempReg();
     Gen<PseudoCopy>(aligned_addr, addr, 8);
     // The immediate is sign extended to 64-bit.
-    Gen<x86_64::AndqRegImm>(aligned_addr, ~int32_t{0xf}, GetFlagsRegister());
+    Gen<x86_64::AndqRegImm>(aligned_addr, ~int32_t{sizeof(Reservation) - 1}, GetFlagsRegister());
 
     MemoryRegionReservationLoad(aligned_addr);
 
@@ -193,7 +193,7 @@
     auto aligned_addr = AllocTempReg();
     Gen<PseudoCopy>(aligned_addr, addr, 8);
     // The immediate is sign extended to 64-bit.
-    Gen<x86_64::AndqRegImm>(aligned_addr, ~int32_t{0xf}, GetFlagsRegister());
+    Gen<x86_64::AndqRegImm>(aligned_addr, ~int32_t{sizeof(Reservation) - 1}, GetFlagsRegister());
 
     // Load current monitor value before we clobber it.
     auto reservation_value = AllocTempReg();
diff --git a/heavy_optimizer/riscv64/heavy_optimizer_insn_exec_tests.cc b/heavy_optimizer/riscv64/heavy_optimizer_insn_exec_tests.cc
index c8fb724..861bad8 100644
--- a/heavy_optimizer/riscv64/heavy_optimizer_insn_exec_tests.cc
+++ b/heavy_optimizer/riscv64/heavy_optimizer_insn_exec_tests.cc
@@ -25,6 +25,7 @@
 #include "berberis/guest_state/guest_addr.h"
 #include "berberis/guest_state/guest_state.h"
 #include "berberis/heavy_optimizer/riscv64/heavy_optimize_region.h"
+#include "berberis/runtime_primitives/memory_region_reservation.h"
 #include "berberis/test_utils/scoped_exec_region.h"
 #include "berberis/test_utils/testing_run_generated_code.h"
 
diff --git a/interpreter/riscv64/interpreter_test.cc b/interpreter/riscv64/interpreter_test.cc
index 607e0f6..456a075 100644
--- a/interpreter/riscv64/interpreter_test.cc
+++ b/interpreter/riscv64/interpreter_test.cc
@@ -54,58 +54,6 @@
     InterpretInsn(&state_);
   }
 
-  void TestAtomicLoad(uint32_t insn_bytes,
-                      const uint64_t* const data_to_load,
-                      uint64_t expected_result) {
-    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
-    SetXReg<1>(state_.cpu, ToGuestAddr(data_to_load));
-    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
-    EXPECT_EQ(GetXReg<2>(state_.cpu), expected_result);
-    EXPECT_EQ(state_.cpu.reservation_address, ToGuestAddr(data_to_load));
-    // We always reserve the full 64-bit range of the reservation address.
-    EXPECT_EQ(state_.cpu.reservation_value, *data_to_load);
-  }
-
-  template <typename T>
-  void TestAtomicStore(uint32_t insn_bytes, T expected_result) {
-    store_area_[0] = ~uint64_t{0};
-    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
-    SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
-    SetXReg<2>(state_.cpu, kScalarDataToStore);
-    SetXReg<3>(state_.cpu, 0xdeadbeef);
-    state_.cpu.reservation_address = ToGuestAddr(&store_area_);
-    state_.cpu.reservation_value = store_area_[0];
-    MemoryRegionReservation::SetOwner(ToGuestAddr(&store_area_), &state_.cpu);
-    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
-    EXPECT_EQ(static_cast<T>(store_area_[0]), expected_result);
-    EXPECT_EQ(GetXReg<3>(state_.cpu), 0u);
-  }
-
-  void TestAtomicStoreNoLoadFailure(uint32_t insn_bytes) {
-    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
-    SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
-    SetXReg<2>(state_.cpu, kScalarDataToStore);
-    SetXReg<3>(state_.cpu, 0xdeadbeef);
-    store_area_[0] = 0;
-    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
-    EXPECT_EQ(store_area_[0], 0u);
-    EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
-  }
-
-  void TestAtomicStoreDifferentLoadFailure(uint32_t insn_bytes) {
-    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
-    SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
-    SetXReg<2>(state_.cpu, kScalarDataToStore);
-    SetXReg<3>(state_.cpu, 0xdeadbeef);
-    state_.cpu.reservation_address = ToGuestAddr(&kScalarDataToStore);
-    state_.cpu.reservation_value = 0;
-    MemoryRegionReservation::SetOwner(ToGuestAddr(&kScalarDataToStore), &state_.cpu);
-    store_area_[0] = 0;
-    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
-    EXPECT_EQ(store_area_[0], 0u);
-    EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
-  }
-
   // Vector instructions.
   template <size_t kNFfields>
   void TestVle(uint32_t insn_bytes) {
@@ -365,10 +313,6 @@
   // Undisturbed result is put in registers v8, v9, …, v15 and is expected to get read back.
   static constexpr __m128i kUndisturbedResult = {0x5555'5555'5555'5555, 0x5555'5555'5555'5555};
 
-  static constexpr uint64_t kScalarDataToLoad{0xffffeeeeddddccccULL};
-  static constexpr uint64_t kScalarDataToStore = kScalarDataToLoad;
-  // Store area for store instructions.  We need at least 16 uint64_t to handle 8×128bit registers,
-  // plus 2× of that to test strided instructions.
   alignas(16) uint64_t store_area_[32];
   ThreadState state_;
 };
@@ -422,47 +366,6 @@
   close(pipefd[0]);
   close(pipefd[1]);
 }
-
-TEST_F(Riscv64InterpreterTest, AtomicLoadInstructions) {
-  // Validate sign-extension of returned value.
-  const uint64_t kNegative32BitValue = 0x0000'0000'8000'0000ULL;
-  const uint64_t kSignExtendedNegative = 0xffff'ffff'8000'0000ULL;
-  const uint64_t kPositive32BitValue = 0xffff'ffff'0000'0000ULL;
-  const uint64_t kSignExtendedPositive = 0ULL;
-  static_assert(static_cast<int32_t>(kSignExtendedPositive) >= 0);
-  static_assert(static_cast<int32_t>(kSignExtendedNegative) < 0);
-
-  // Lrw - sign extends from 32 to 64.
-  TestAtomicLoad(0x1000a12f, &kPositive32BitValue, kSignExtendedPositive);
-  TestAtomicLoad(0x1000a12f, &kNegative32BitValue, kSignExtendedNegative);
-
-  // Lrd
-  TestAtomicLoad(0x1000b12f, &kScalarDataToLoad, kScalarDataToLoad);
-}
-
-TEST_F(Riscv64InterpreterTest, AtomicStoreInstructions) {
-  // Scw
-  TestAtomicStore(0x1820a1af, static_cast<uint32_t>(kScalarDataToStore));
-
-  // Scd
-  TestAtomicStore(0x1820b1af, kScalarDataToStore);
-}
-
-TEST_F(Riscv64InterpreterTest, AtomicStoreInstructionNoLoadFailure) {
-  // Scw
-  TestAtomicStoreNoLoadFailure(0x1820a1af);
-
-  // Scd
-  TestAtomicStoreNoLoadFailure(0x1820b1af);
-}
-
-TEST_F(Riscv64InterpreterTest, AtomicStoreInstructionDifferentLoadFailure) {
-  // Scw
-  TestAtomicStoreDifferentLoadFailure(0x1820a1af);
-
-  // Scd
-  TestAtomicStoreDifferentLoadFailure(0x1820b1af);
-}
 TEST_F(Riscv64InterpreterTest, TestVle) {
   TestVle<1>(0x2808407);   // vl1re8.v v8, (x1)
   TestVle<2>(0x22808407);  // vl2re8.v v8, (x1)
diff --git a/lite_translator/riscv64_to_x86_64/lite_translate_insn_exec_tests.cc b/lite_translator/riscv64_to_x86_64/lite_translate_insn_exec_tests.cc
index 5f5d44f..d9146e1 100644
--- a/lite_translator/riscv64_to_x86_64/lite_translate_insn_exec_tests.cc
+++ b/lite_translator/riscv64_to_x86_64/lite_translate_insn_exec_tests.cc
@@ -24,6 +24,7 @@
 #include "berberis/guest_state/guest_addr.h"
 #include "berberis/guest_state/guest_state.h"
 #include "berberis/lite_translator/lite_translate_region.h"
+#include "berberis/runtime_primitives/memory_region_reservation.h"
 #include "berberis/test_utils/scoped_exec_region.h"
 #include "berberis/test_utils/testing_run_generated_code.h"
 
diff --git a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
index cb5c7fe..93a8f53 100644
--- a/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
+++ b/test_utils/include/berberis/test_utils/insn_tests_riscv64-inl.h
@@ -347,6 +347,62 @@
     }
   }
 
+#if (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
+
+  void TestAtomicLoad(uint32_t insn_bytes,
+                      const uint64_t* const data_to_load,
+                      uint64_t expected_result) {
+    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+    SetXReg<1>(state_.cpu, ToGuestAddr(data_to_load));
+    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+    EXPECT_EQ(GetXReg<2>(state_.cpu), expected_result);
+    EXPECT_EQ(state_.cpu.reservation_address, ToGuestAddr(data_to_load));
+    // We always reserve the full 64-bit range of the reservation address.
+    EXPECT_EQ(state_.cpu.reservation_value, *data_to_load);
+  }
+
+  template <typename T>
+  void TestAtomicStore(uint32_t insn_bytes, T expected_result) {
+    store_area_ = ~uint64_t{0};
+    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+    SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
+    SetXReg<2>(state_.cpu, kDataToStore);
+    SetXReg<3>(state_.cpu, 0xdeadbeef);
+    state_.cpu.reservation_address = ToGuestAddr(&store_area_);
+    state_.cpu.reservation_value = store_area_;
+    MemoryRegionReservation::SetOwner(ToGuestAddr(&store_area_), &state_.cpu);
+    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+    EXPECT_EQ(static_cast<T>(store_area_), expected_result);
+    EXPECT_EQ(GetXReg<3>(state_.cpu), 0u);
+  }
+
+  void TestAtomicStoreNoLoadFailure(uint32_t insn_bytes) {
+    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+    SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
+    SetXReg<2>(state_.cpu, kDataToStore);
+    SetXReg<3>(state_.cpu, 0xdeadbeef);
+    store_area_ = 0;
+    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+    EXPECT_EQ(store_area_, 0u);
+    EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
+  }
+
+  void TestAtomicStoreDifferentLoadFailure(uint32_t insn_bytes) {
+    state_.cpu.insn_addr = ToGuestAddr(&insn_bytes);
+    SetXReg<1>(state_.cpu, ToGuestAddr(&store_area_));
+    SetXReg<2>(state_.cpu, kDataToStore);
+    SetXReg<3>(state_.cpu, 0xdeadbeef);
+    state_.cpu.reservation_address = ToGuestAddr(&kDataToStore);
+    state_.cpu.reservation_value = 0;
+    MemoryRegionReservation::SetOwner(ToGuestAddr(&kDataToStore), &state_.cpu);
+    store_area_ = 0;
+    EXPECT_TRUE(RunOneInstruction(&state_, state_.cpu.insn_addr + 4));
+    EXPECT_EQ(store_area_, 0u);
+    EXPECT_EQ(GetXReg<3>(state_.cpu), 1u);
+  }
+
+#endif  // (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
+
   void TestAmo(uint32_t insn_bytes,
                uint64_t arg1,
                uint64_t arg2,
@@ -1721,6 +1777,51 @@
   TestFma(0x223170cf, {std::tuple{1.0, 2.0, 3.0, -5.0}});
 }
 
+#if (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
+
+TEST_F(TESTSUITE, AtomicLoadInstructions) {
+  // Validate sign-extension of returned value.
+  const uint64_t kNegative32BitValue = 0x0000'0000'8000'0000ULL;
+  const uint64_t kSignExtendedNegative = 0xffff'ffff'8000'0000ULL;
+  const uint64_t kPositive32BitValue = 0xffff'ffff'0000'0000ULL;
+  const uint64_t kSignExtendedPositive = 0ULL;
+  static_assert(static_cast<int32_t>(kSignExtendedPositive) >= 0);
+  static_assert(static_cast<int32_t>(kSignExtendedNegative) < 0);
+
+  // Lrw - sign extends from 32 to 64.
+  TestAtomicLoad(0x1000a12f, &kPositive32BitValue, kSignExtendedPositive);
+  TestAtomicLoad(0x1000a12f, &kNegative32BitValue, kSignExtendedNegative);
+
+  // Lrd
+  TestAtomicLoad(0x1000b12f, &kDataToLoad, kDataToLoad);
+}
+
+TEST_F(TESTSUITE, AtomicStoreInstructions) {
+  // Scw
+  TestAtomicStore(0x1820a1af, static_cast<uint32_t>(kDataToStore));
+
+  // Scd
+  TestAtomicStore(0x1820b1af, kDataToStore);
+}
+
+TEST_F(TESTSUITE, AtomicStoreInstructionNoLoadFailure) {
+  // Scw
+  TestAtomicStoreNoLoadFailure(0x1820a1af);
+
+  // Scd
+  TestAtomicStoreNoLoadFailure(0x1820b1af);
+}
+
+TEST_F(TESTSUITE, AtomicStoreInstructionDifferentLoadFailure) {
+  // Scw
+  TestAtomicStoreDifferentLoadFailure(0x1820a1af);
+
+  // Scd
+  TestAtomicStoreDifferentLoadFailure(0x1820b1af);
+}
+
+#endif  // (defined(TESTING_INTERPRETER) || defined(TESTING_HEAVY_OPTIMIZER))
+
 TEST_F(TESTSUITE, AmoInstructions) {
   // Verifying that all aq and rl combinations work for Amoswap, but only test relaxed one for most
   // other instructions for brevity.