Merge "Move some profman messages from LOG(ERROR) to LOG(WARNING)"
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index af5fa40..52e27af 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -222,7 +222,7 @@
     return result;
   }
 
-  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
   void TestStringReference(uint32_t string_offset);
   void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
 
@@ -290,15 +290,16 @@
     kUnpatchedPcRelativeRawCode);
 const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u;
 
-void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
-                                                      uint32_t element_offset) {
-  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void Thumb2RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+                                                   uint32_t string_entry_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+  bss_begin_ = bss_begin;
   const LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
-      LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
+      LinkerPatch::StringBssEntryPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
+      LinkerPatch::StringBssEntryPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
   };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
-                       dex_cache_arrays_begin_ + element_offset);
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
 }
 
 void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
@@ -534,23 +535,23 @@
   EXPECT_TRUE(CheckThunk(thunk_offset));
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference1) {
-  TestDexCacheReference(0x00ff0000u, 0x00fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry1) {
+  TestStringBssEntry(0x00ff0000u, 0x00fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference2) {
-  TestDexCacheReference(0x02ff0000u, 0x05fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry2) {
+  TestStringBssEntry(0x02ff0000u, 0x05fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference3) {
-  TestDexCacheReference(0x08ff0000u, 0x08fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry3) {
+  TestStringBssEntry(0x08ff0000u, 0x08fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
-TEST_F(Thumb2RelativePatcherTest, DexCacheReference4) {
-  TestDexCacheReference(0xd0ff0000u, 0x60fcu);
+TEST_F(Thumb2RelativePatcherTest, StringBssEntry4) {
+  TestStringBssEntry(0xd0ff0000u, 0x60fcu);
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b6549ee..5d02d44 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -249,12 +249,14 @@
     return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn);
   }
 
-  void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
-    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+  void TestNopsAdrpLdr(size_t num_nops, uint32_t bss_begin, uint32_t string_entry_offset) {
+    constexpr uint32_t kStringIndex = 1u;
+    string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+    bss_begin_ = bss_begin;
     auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
     const LinkerPatch patches[] = {
-        LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
-        LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
+        LinkerPatch::StringBssEntryPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
+        LinkerPatch::StringBssEntryPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
     };
     AddCompiledMethod(MethodRef(1u),
                       ArrayRef<const uint8_t>(code),
@@ -262,7 +264,7 @@
     Link();
 
     uint32_t method1_offset = GetMethodOffset(1u);
-    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+    uint32_t target_offset = bss_begin_ + string_entry_offset;
     auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
     EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
   }
@@ -293,14 +295,16 @@
 
   void PrepareNopsAdrpInsn2Ldr(size_t num_nops,
                                uint32_t insn2,
-                               uint32_t dex_cache_arrays_begin,
-                               uint32_t element_offset) {
-    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+                               uint32_t bss_begin,
+                               uint32_t string_entry_offset) {
+    constexpr uint32_t kStringIndex = 1u;
+    string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+    bss_begin_ = bss_begin;
     auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u);  // Unpatched.
     InsertInsn(&code, num_nops * 4u + 4u, insn2);
     const LinkerPatch patches[] = {
-        LinkerPatch::DexCacheArrayPatch(num_nops * 4u     , nullptr, num_nops * 4u, element_offset),
-        LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
+        LinkerPatch::StringBssEntryPatch(num_nops * 4u     , nullptr, num_nops * 4u, kStringIndex),
+        LinkerPatch::StringBssEntryPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
     };
     AddCompiledMethod(MethodRef(1u),
                       ArrayRef<const uint8_t>(code),
@@ -376,15 +380,15 @@
   void TestAdrpInsn2Ldr(uint32_t insn2,
                         uint32_t adrp_offset,
                         bool has_thunk,
-                        uint32_t dex_cache_arrays_begin,
-                        uint32_t element_offset) {
+                        uint32_t bss_begin,
+                        uint32_t string_entry_offset) {
     uint32_t method1_offset =
         kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
     CHECK_ALIGNED(adrp_offset, 4u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
-    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
-    uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+    PrepareNopsAdrpInsn2Ldr(num_nops, insn2, bss_begin, string_entry_offset);
+    uint32_t target_offset = bss_begin_ + string_entry_offset;
     if (has_thunk) {
       TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn);
     } else {
@@ -395,33 +399,33 @@
 
   void TestAdrpLdurLdr(uint32_t adrp_offset,
                        bool has_thunk,
-                       uint32_t dex_cache_arrays_begin,
-                       uint32_t element_offset) {
-    TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+                       uint32_t bss_begin,
+                       uint32_t string_entry_offset) {
+    TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, bss_begin, string_entry_offset);
   }
 
   void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn,
                            int32_t pcrel_disp,
                            uint32_t adrp_offset,
                            bool has_thunk,
-                           uint32_t dex_cache_arrays_begin,
-                           uint32_t element_offset) {
+                           uint32_t bss_begin,
+                           uint32_t string_entry_offset) {
     ASSERT_LT(pcrel_disp, 0x100000);
     ASSERT_GE(pcrel_disp, -0x100000);
     ASSERT_EQ(pcrel_disp & 0x3, 0);
     uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
-    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset);
   }
 
   void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn,
                            uint32_t sprel_disp_in_load_units,
                            uint32_t adrp_offset,
                            bool has_thunk,
-                           uint32_t dex_cache_arrays_begin,
-                           uint32_t element_offset) {
+                           uint32_t bss_begin,
+                           uint32_t string_entry_offset) {
     ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
     uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
-    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset);
   }
 
   void TestAdrpInsn2Add(uint32_t insn2,
@@ -726,19 +730,19 @@
   EXPECT_TRUE(CheckThunk(thunk_offset));
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry1) {
   TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry2) {
   TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry3) {
   TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu);
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) {
+TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry4) {
   TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
 }
 
@@ -763,7 +767,7 @@
   test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
 
 #define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## Ldur ## disp) { \
     bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \
     TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \
   }
@@ -771,7 +775,7 @@
 TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238)
 
 #define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference ## adrp_offset ## Ldur ## disp) { \
+  TEST_F(Arm64RelativePatcherTestDenver64, StringBssEntry ## adrp_offset ## Ldur ## disp) { \
     TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \
   }
 
@@ -779,7 +783,7 @@
 
 // LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
 #define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
+  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WPcRel ## disp) { \
     TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
@@ -787,7 +791,7 @@
 
 // LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
 #define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
+  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XPcRel ## disp) { \
     bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \
     bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \
     TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
@@ -797,14 +801,14 @@
 
 // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
 #define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
+  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WSpRel ## disp) { \
     TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
 TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4)
 
 #define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \
-  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
+  TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XSpRel ## disp) { \
     TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
   }
 
diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
index 474eb73..63ad8a5 100644
--- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc
@@ -37,7 +37,7 @@
   }
 
   void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
   void TestStringReference(uint32_t string_offset);
 };
 
@@ -69,14 +69,15 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
-void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
-                                                        uint32_t element_offset) {
-  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void Mips32r6RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+                                                     uint32_t string_entry_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+  bss_begin_ = bss_begin;
   LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset)
+      LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex)
   };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
-                       dex_cache_arrays_begin_ + element_offset);
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
 }
 
 void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) {
@@ -88,8 +89,8 @@
   CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
 }
 
-TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) {
-  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+TEST_F(Mips32r6RelativePatcherTest, StringBssEntry) {
+  TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
 }
 
 TEST_F(Mips32r6RelativePatcherTest, StringReference) {
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
index b0d1294..961b312 100644
--- a/compiler/linker/mips/relative_patcher_mips_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -20,10 +20,6 @@
 namespace art {
 namespace linker {
 
-// We'll maximize the range of a single load instruction for dex cache array accesses
-// by aligning offset -32768 with the offset of the first used element.
-static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
-
 class MipsRelativePatcherTest : public RelativePatcherTest {
  public:
   MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {}
@@ -41,7 +37,7 @@
   }
 
   void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
   void TestStringReference(uint32_t string_offset);
 };
 
@@ -65,9 +61,7 @@
   ASSERT_TRUE(result.first);
 
   uint32_t diff = target_offset - (result.second + kAnchorOffset);
-  if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) {
-    diff += kDexCacheArrayLwOffset;
-  }
+  CHECK_NE(patches[0].GetType(), LinkerPatch::Type::kDexCacheArray);
   diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
 
   const uint8_t expected_code[] = {
@@ -79,14 +73,15 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
-void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
-                                                    uint32_t element_offset) {
-  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void MipsRelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+                                                 uint32_t string_entry_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+  bss_begin_ = bss_begin;
   LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset)
+      LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex)
   };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
-                       dex_cache_arrays_begin_ + element_offset);
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
 }
 
 void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) {
@@ -98,8 +93,8 @@
   CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset);
 }
 
-TEST_F(MipsRelativePatcherTest, DexCacheReference) {
-  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+TEST_F(MipsRelativePatcherTest, StringBssEntry) {
+  TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
 }
 
 TEST_F(MipsRelativePatcherTest, StringReference) {
diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc
index c317058..9c9e24a 100644
--- a/compiler/linker/mips64/relative_patcher_mips64_test.cc
+++ b/compiler/linker/mips64/relative_patcher_mips64_test.cc
@@ -39,7 +39,7 @@
   }
 
   void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
-  void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
+  void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset);
   void TestStringReference(uint32_t string_offset);
 };
 
@@ -76,18 +76,19 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
-void Mips64RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
-                                                      uint32_t element_offset) {
-  dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+void Mips64RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin,
+                                                   uint32_t string_entry_offset) {
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, string_entry_offset);
+  bss_begin_ = bss_begin;
   LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(kLiteralOffset, nullptr, kAnchorOffset, element_offset)
+      LinkerPatch::StringBssEntryPatch(kLiteralOffset, nullptr, kAnchorOffset, kStringIndex)
   };
-  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches),
-                       dex_cache_arrays_begin_ + element_offset);
+  CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset);
 }
 
-TEST_F(Mips64RelativePatcherTest, DexCacheReference) {
-  TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234);
+TEST_F(Mips64RelativePatcherTest, StringBssEntry) {
+  TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234);
 }
 
 TEST_F(Mips64RelativePatcherTest, CallOther) {
diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h
index 247b290..bdc1ee1 100644
--- a/compiler/linker/multi_oat_relative_patcher.h
+++ b/compiler/linker/multi_oat_relative_patcher.h
@@ -102,7 +102,7 @@
     relative_patcher_->PatchCall(code, literal_offset, patch_offset, target_offset);
   }
 
-  // Wrapper around RelativePatcher::PatchDexCacheReference(), doing offset adjustment.
+  // Wrapper around RelativePatcher::PatchPcRelativeReference(), doing offset adjustment.
   void PatchPcRelativeReference(std::vector<uint8_t>* code,
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc
index 951588a..615b2b9 100644
--- a/compiler/linker/multi_oat_relative_patcher_test.cc
+++ b/compiler/linker/multi_oat_relative_patcher_test.cc
@@ -282,7 +282,7 @@
   uint32_t method2_patch_offset = 0x7654u;
   uint32_t method2_target_offset = 0xccccu;
   LinkerPatch method2_patch =
-      LinkerPatch::DexCacheArrayPatch(method2_literal_offset, nullptr, 0u, 1234u);
+      LinkerPatch::StringBssEntryPatch(method2_literal_offset, nullptr, 0u, 1u);
   patcher_.PatchPcRelativeReference(
       &code, method2_patch, method2_patch_offset, method2_target_offset);
   DCHECK_EQ(method2_literal_offset, mock_->last_literal_offset_);
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index d9a87a0..bff6808 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -31,6 +31,7 @@
 #include "method_reference.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
+#include "string_reference.h"
 #include "vector_output_stream.h"
 
 namespace art {
@@ -61,7 +62,7 @@
         features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
         method_offset_map_(),
         patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)),
-        dex_cache_arrays_begin_(0u),
+        bss_begin_(0u),
         compiled_method_refs_(),
         compiled_methods_(),
         patched_code_(),
@@ -157,8 +158,9 @@
                 result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
             patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
                                 offset + patch.LiteralOffset(), target_offset);
-          } else if (patch.GetType() == LinkerPatch::Type::kDexCacheArray) {
-            uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset();
+          } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
+            uint32_t target_offset =
+                bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
             patcher_->PatchPcRelativeReference(&patched_code_,
                                                patch,
                                                offset + patch.LiteralOffset(),
@@ -276,7 +278,7 @@
   std::unique_ptr<const InstructionSetFeatures> features_;
   MethodOffsetMap method_offset_map_;
   std::unique_ptr<RelativePatcher> patcher_;
-  uint32_t dex_cache_arrays_begin_;
+  uint32_t bss_begin_;
   SafeMap<uint32_t, uint32_t> string_index_to_offset_map_;
   std::vector<MethodReference> compiled_method_refs_;
   std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
index 2a44b79..0bd9de8 100644
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -107,9 +107,11 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
-TEST_F(X86RelativePatcherTest, DexCacheReference) {
-  dex_cache_arrays_begin_ = 0x12345678;
-  constexpr size_t kElementOffset = 0x1234;
+TEST_F(X86RelativePatcherTest, StringBssEntry) {
+  bss_begin_ = 0x12345678;
+  constexpr size_t kStringEntryOffset = 0x1234;
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset);
   static const uint8_t raw_code[] = {
       0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
       0x5b,                                 // pop ebx
@@ -118,15 +120,14 @@
   constexpr uint32_t anchor_offset = 5u;  // After call +0.
   ArrayRef<const uint8_t> code(raw_code);
   LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset),
+      LinkerPatch::StringBssEntryPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex),
   };
   AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
   ASSERT_TRUE(result.first);
-  uint32_t diff =
-      dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset);
+  uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + anchor_offset);
   static const uint8_t expected_code[] = {
       0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
       0x5b,                                 // pop ebx
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
index 2b46453..6d6bb40 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -127,19 +127,20 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
-TEST_F(X86_64RelativePatcherTest, DexCacheReference) {
-  dex_cache_arrays_begin_ = 0x12345678;
-  constexpr size_t kElementOffset = 0x1234;
+TEST_F(X86_64RelativePatcherTest, StringBssEntry) {
+  bss_begin_ = 0x12345678;
+  constexpr size_t kStringEntryOffset = 0x1234;
+  constexpr uint32_t kStringIndex = 1u;
+  string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset);
   LinkerPatch patches[] = {
-      LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset),
+      LinkerPatch::StringBssEntryPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kStringIndex),
   };
   AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches));
   Link();
 
   auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
   ASSERT_TRUE(result.first);
-  uint32_t diff =
-      dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size());
+  uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + kDexCacheLoadCode.size());
   static const uint8_t expected_code[] = {
       0x8b, 0x05,
       static_cast<uint8_t>(diff),
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4067aa3..963df5a 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -173,6 +173,51 @@
   return false;
 }
 
+// Detect up to two instructions a and b, and an acccumulated constant c.
+static bool IsAddConstHelper(HInstruction* instruction,
+                             /*out*/ HInstruction** a,
+                             /*out*/ HInstruction** b,
+                             /*out*/ int64_t* c,
+                             int32_t depth) {
+  static constexpr int32_t kMaxDepth = 8;  // don't search too deep
+  int64_t value = 0;
+  if (IsInt64AndGet(instruction, &value)) {
+    *c += value;
+    return true;
+  } else if (instruction->IsAdd() && depth <= kMaxDepth) {
+    return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
+           IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
+  } else if (*a == nullptr) {
+    *a = instruction;
+    return true;
+  } else if (*b == nullptr) {
+    *b = instruction;
+    return true;
+  }
+  return false;  // too many non-const operands
+}
+
+// Detect a + b + c for an optional constant c.
+static bool IsAddConst(HInstruction* instruction,
+                       /*out*/ HInstruction** a,
+                       /*out*/ HInstruction** b,
+                       /*out*/ int64_t* c) {
+  if (instruction->IsAdd()) {
+    // Try to find a + b and accumulated c.
+    if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
+        IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
+        *b != nullptr) {
+      return true;
+    }
+    // Found a + b.
+    *a = instruction->InputAt(0);
+    *b = instruction->InputAt(1);
+    *c = 0;
+    return true;
+  }
+  return false;
+}
+
 // Test vector restrictions.
 static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
   return (restrictions & tested) != 0;
@@ -1215,24 +1260,23 @@
   // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
   // (note whether the sign bit in higher precision is shifted in has no effect
   // on the narrow precision computed by the idiom).
-  int64_t value = 0;
+  int64_t distance = 0;
   if ((instruction->IsShr() ||
        instruction->IsUShr()) &&
-      IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) {
-    //
-    // TODO: make following code less sensitive to associativity and commutativity differences.
-    //
-    HInstruction* x = instruction->InputAt(0);
-    // Test for an optional rounding part (x + 1) >> 1.
-    bool is_rounded = false;
-    if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) {
-      x = x->InputAt(0);
-      is_rounded = true;
-    }
-    // Test for a core addition (a + b) >> 1 (possibly rounded), either unsigned or signed.
-    if (x->IsAdd()) {
-      HInstruction* a = x->InputAt(0);
-      HInstruction* b = x->InputAt(1);
+      IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
+    // Test for (a + b + c) >> 1 for optional constant c.
+    HInstruction* a = nullptr;
+    HInstruction* b = nullptr;
+    int64_t       c = 0;
+    if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+      // Accept c == 1 (rounded) or c == 0 (not rounded).
+      bool is_rounded = false;
+      if (c == 1) {
+        is_rounded = true;
+      } else if (c != 0) {
+        return false;
+      }
+      // Accept consistent zero or sign extension on operands a and b.
       HInstruction* r = nullptr;
       HInstruction* s = nullptr;
       bool is_unsigned = false;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index b0218b5..ef843c6 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2070,7 +2070,10 @@
   DCHECK_EQ(Thread::Current(), thread_running_gc_);
   mirror::Object* ref = obj->GetFieldObject<
       mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset);
-  mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false, /*kFromGCThread*/true>(ref);
+  mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false, /*kFromGCThread*/true>(
+      ref,
+      /*holder*/ obj,
+      offset);
   if (to_ref == ref) {
     return;
   }
diff --git a/runtime/gc/verification.cc b/runtime/gc/verification.cc
index 7b31c8a..c14f250 100644
--- a/runtime/gc/verification.cc
+++ b/runtime/gc/verification.cc
@@ -86,8 +86,8 @@
   std::ostringstream oss;
   oss << "GC tried to mark invalid reference " << ref << std::endl;
   oss << DumpObjectInfo(ref, "ref") << "\n";
+  oss << DumpObjectInfo(holder.Ptr(), "holder");
   if (holder != nullptr) {
-    oss << DumpObjectInfo(holder.Ptr(), "holder");
     mirror::Class* holder_klass = holder->GetClass<kVerifyNone, kWithoutReadBarrier>();
     if (IsValidClass(holder_klass)) {
       oss << "field_offset=" << offset.Uint32Value();
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index db495f6..4e6b4bd 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -49,6 +49,34 @@
     }
   }
 
+  /// CHECK-START: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I10:i\d+>>  IntConstant 10                      loop:none
+  /// CHECK-DAG: <<M10:i\d+>>  IntConstant -10                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M10>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add3>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Cancelling constant computations do not confuse recognition.
+      bo[i] = (short) (((b1[i] + 10) + (b2[i] - 10)) >> 1);
+    }
+  }
+
   /// CHECK-START: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
@@ -99,6 +127,59 @@
     }
   }
 
+  /// CHECK-START: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<Get2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Slightly different order in idiom does not confuse recognition.
+      bo[i] = (short) (((1 + b1[i]) + b2[i]) >> 1);
+    }
+  }
+
+  /// CHECK-START: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<I10:i\d+>>  IntConstant 10                      loop:none
+  /// CHECK-DAG: <<M9:i\d+>>   IntConstant -9                      loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<Get1>>,<<I10>>]              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get2>>,<<M9>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add3>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Computations that cancel to adding 1 also do not confuse recognition.
+      bo[i] = (short) (((b1[i] + 10) + (b2[i] - 9)) >> 1);
+    }
+  }
+
   /// CHECK-START: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
@@ -126,6 +207,34 @@
     }
   }
 
+  /// CHECK-START: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<UMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add1:i\d+>> Add [<<And2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add2:i\d+>> Add [<<And1>>,<<Add1>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Shr:i\d+>>  Shr [<<Add2>>,<<I1>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) {
+    int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
+    for (int i = 0; i < min_length; i++) {
+      // Slightly different order in idiom does not confuse recognition.
+      bo[i] = (short) ((b1[i] & 0xffff) + ((b2[i] & 0xffff) + 1) >> 1);
+    }
+  }
+
   /// CHECK-START: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<I1:i\d+>>   IntConstant 1                       loop:none
   /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                   loop:none
@@ -200,6 +309,11 @@
       short e = (short) ((sB1[i] + sB2[i]) >> 1);
       expectEquals(e, sBo[i]);
     }
+    halving_add_signed_alt(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i]) >> 1);
+      expectEquals(e, sBo[i]);
+    }
     halving_add_unsigned(sB1, sB2, sBo);
     for (int i = 0; i < M; i++) {
       short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff)) >> 1);
@@ -210,11 +324,26 @@
       short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
       expectEquals(e, sBo[i]);
     }
+    rounding_halving_add_signed_alt(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
+    rounding_halving_add_signed_alt2(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) ((sB1[i] + sB2[i] + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
     rounding_halving_add_unsigned(sB1, sB2, sBo);
     for (int i = 0; i < M; i++) {
       short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
       expectEquals(e, sBo[i]);
     }
+    rounding_halving_add_unsigned_alt(sB1, sB2, sBo);
+    for (int i = 0; i < M; i++) {
+      short e = (short) (((sB1[i] & 0xffff) + (sB2[i] & 0xffff) + 1) >> 1);
+      expectEquals(e, sBo[i]);
+    }
     halving_add_signed_constant(sB1, sBo);
     for (int i = 0; i < M; i++) {
       short e = (short) ((sB1[i] + 0x7fff) >> 1);