Merge "Add symbolizer option to generate debug information only."
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index fd84d05..2294ddb 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -114,8 +114,7 @@
 else
 ART_TARGET_CLANG := false
 endif
-# b/25130937
-ART_TARGET_CLANG_arm := false
+ART_TARGET_CLANG_arm :=
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
 ART_TARGET_CLANG_mips64 :=
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index fda4f5d..33242f1 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -517,7 +517,8 @@
 valgrind-$$(gtest_rule): $$(gtest_exe) $$(gtest_deps) $(ART_VALGRIND_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  VALGRIND_LIB=$(HOST_OUT)/lib64/valgrind \
-	  $(HOST_OUT_EXECUTABLES)/valgrind --leak-check=full --error-exitcode=1 $$< && \
+	  $(HOST_OUT_EXECUTABLES)/valgrind --leak-check=full --error-exitcode=1 \
+	    --suppressions=art/test/valgrind-suppressions.txt $$< && \
 	    $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 
   ART_TEST_HOST_VALGRIND_GTEST$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += valgrind-$$(gtest_rule)
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index badbd93..f9d33c1 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -175,18 +175,6 @@
   CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT || format == dwarf::DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
-  if (method_infos.empty()) {
-    return;
-  }
-
-  std::vector<uint32_t> binary_search_table;
-  std::vector<uintptr_t> patch_locations;
-  if (format == dwarf::DW_EH_FRAME_FORMAT) {
-    binary_search_table.reserve(2 * method_infos.size());
-  } else {
-    patch_locations.reserve(method_infos.size());
-  }
-
   // The methods can be written in any order.
   // Let's therefore sort them in the lexicographical order of the opcodes.
   // This has no effect on its own. However, if the final .debug_frame section is
@@ -194,9 +182,14 @@
   std::vector<const MethodDebugInfo*> sorted_method_infos;
   sorted_method_infos.reserve(method_infos.size());
   for (size_t i = 0; i < method_infos.size(); i++) {
-    sorted_method_infos.push_back(&method_infos[i]);
+    if (!method_infos[i].cfi.empty() && !method_infos[i].deduped) {
+      sorted_method_infos.push_back(&method_infos[i]);
+    }
   }
-  std::sort(
+  if (sorted_method_infos.empty()) {
+    return;
+  }
+  std::stable_sort(
       sorted_method_infos.begin(),
       sorted_method_infos.end(),
       [](const MethodDebugInfo* lhs, const MethodDebugInfo* rhs) {
@@ -205,6 +198,14 @@
         return std::lexicographical_compare(l.begin(), l.end(), r.begin(), r.end());
       });
 
+  std::vector<uint32_t> binary_search_table;
+  std::vector<uintptr_t> patch_locations;
+  if (format == dwarf::DW_EH_FRAME_FORMAT) {
+    binary_search_table.reserve(2 * sorted_method_infos.size());
+  } else {
+    patch_locations.reserve(sorted_method_infos.size());
+  }
+
   // Write .eh_frame/.debug_frame section.
   auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
                        ? builder->GetDebugFrame()
@@ -221,26 +222,21 @@
     buffer_address += buffer.size();
     buffer.clear();
     for (const MethodDebugInfo* mi : sorted_method_infos) {
-      if (!mi->deduped) {  // Only one FDE per unique address.
-        ArrayRef<const uint8_t> opcodes = mi->cfi;
-        if (!opcodes.empty()) {
-          const Elf_Addr code_address = mi->code_address +
-              (mi->is_code_address_text_relative ? builder->GetText()->GetAddress() : 0);
-          if (format == dwarf::DW_EH_FRAME_FORMAT) {
-            binary_search_table.push_back(
-                dchecked_integral_cast<uint32_t>(code_address));
-            binary_search_table.push_back(
-                dchecked_integral_cast<uint32_t>(buffer_address));
-          }
-          WriteFDE(is64bit, cfi_address, cie_address,
-                   code_address, mi->code_size,
-                   opcodes, format, buffer_address, &buffer,
-                   &patch_locations);
-          cfi_section->WriteFully(buffer.data(), buffer.size());
-          buffer_address += buffer.size();
-          buffer.clear();
-        }
+      DCHECK(!mi->deduped);
+      DCHECK(!mi->cfi.empty());
+      const Elf_Addr code_address = mi->code_address +
+          (mi->is_code_address_text_relative ? builder->GetText()->GetAddress() : 0);
+      if (format == dwarf::DW_EH_FRAME_FORMAT) {
+        binary_search_table.push_back(dchecked_integral_cast<uint32_t>(code_address));
+        binary_search_table.push_back(dchecked_integral_cast<uint32_t>(buffer_address));
       }
+      WriteFDE(is64bit, cfi_address, cie_address,
+               code_address, mi->code_size,
+               mi->cfi, format, buffer_address, &buffer,
+               &patch_locations);
+      cfi_section->WriteFully(buffer.data(), buffer.size());
+      buffer_address += buffer.size();
+      buffer.clear();
     }
     cfi_section->End();
   }
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index af74d4c..a6e6f8b 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -46,6 +46,7 @@
 static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
   std::vector<const char*> names;
   if (mi->code_item != nullptr) {
+    DCHECK(mi->dex_file != nullptr);
     const uint8_t* stream = mi->dex_file->GetDebugInfoStream(mi->code_item);
     if (stream != nullptr) {
       DecodeUnsignedLeb128(&stream);  // line.
@@ -133,6 +134,7 @@
 
     const char* last_dex_class_desc = nullptr;
     for (auto mi : compilation_unit.methods) {
+      DCHECK(mi->dex_file != nullptr);
       const DexFile* dex = mi->dex_file;
       const DexFile::CodeItem* dex_code = mi->code_item;
       const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index);
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index ed26d96..66e135f 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
 #define ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
 
+#include <unordered_set>
 #include <vector>
 
 #include "compiled_method.h"
@@ -81,11 +82,14 @@
       case kX86_64:
         break;
     }
+    std::unordered_set<uint64_t> seen_addresses(compilation_unit.methods.size());
     dwarf::DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
     for (const MethodDebugInfo* mi : compilation_unit.methods) {
       // Ignore function if we have already generated line table for the same address.
       // It would confuse the debugger and the DWARF specification forbids it.
-      if (mi->deduped) {
+      // We allow the line table for method to be replicated in different compilation unit.
+      // This ensures that each compilation unit contains line table for all its methods.
+      if (!seen_addresses.insert(mi->code_address).second) {
         continue;
       }
 
@@ -114,9 +118,44 @@
         continue;
       }
 
+      // Compensate for compiler's off-by-one-instruction error.
+      //
+      // The compiler generates stackmap with PC *after* the branch instruction
+      // (because this is the PC which is easier to obtain when unwinding).
+      //
+      // However, the debugger is more clever and it will ask us for line-number
+      // mapping at the location of the branch instruction (since the following
+      // instruction could belong to other line, this is the correct thing to do).
+      //
+      // So we really want to just decrement the PC by one instruction so that the
+      // branch instruction is covered as well. However, we do not know the size
+      // of the previous instruction, and we can not subtract just a fixed amount
+      // (the debugger would trust us that the PC is valid; it might try to set
+      // breakpoint there at some point, and setting breakpoint in mid-instruction
+      // would make the process crash in spectacular way).
+      //
+      // Therefore, we say that the PC which the compiler gave us for the stackmap
+      // is the end of its associated address range, and we use the PC from the
+      // previous stack map as the start of the range. This ensures that the PC is
+      // valid and that the branch instruction is covered.
+      //
+      // This ensures we have correct line number mapping at call sites (which is
+      // important for backtraces), but there is nothing we can do for non-call
+      // sites (so stepping through optimized code in debugger is not possible).
+      //
+      // We do not adjust the stackmaps if the code was compiled as debuggable.
+      // In that case, the stackmaps should accurately cover all instructions.
+      if (!mi->is_native_debuggable) {
+        for (size_t i = pc2dex_map.size() - 1; i > 0; --i) {
+          pc2dex_map[i].from_ = pc2dex_map[i - 1].from_;
+        }
+        pc2dex_map[0].from_ = 0;
+      }
+
       Elf_Addr method_address = base_address + mi->code_address;
 
       PositionInfos dex2line_map;
+      DCHECK(mi->dex_file != nullptr);
       const DexFile* dex = mi->dex_file;
       if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &dex2line_map)) {
         continue;
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index 2d4fff4..4712d47 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -96,12 +96,21 @@
   std::vector<VariableLocation> variable_locations;
 
   // Get stack maps sorted by pc (they might not be sorted internally).
+  // TODO(dsrbecky) Remove this once stackmaps get sorted by pc.
   const CodeInfo code_info(method_info->code_info);
   const StackMapEncoding encoding = code_info.ExtractEncoding();
   std::map<uint32_t, uint32_t> stack_maps;  // low_pc -> stack_map_index.
   for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
     StackMap stack_map = code_info.GetStackMapAt(s, encoding);
     DCHECK(stack_map.IsValid());
+    if (!stack_map.HasDexRegisterMap(encoding)) {
+      // The compiler creates stackmaps without register maps at the start of
+      // basic blocks in order to keep instruction-accurate line number mapping.
+      // However, we never stop at those (breakpoint locations always have map).
+      // Therefore, for the purpose of local variables, we ignore them.
+      // The main reason for this is to save space by avoiding undefined gaps.
+      continue;
+    }
     const uint32_t pc_offset = stack_map.GetNativePcOffset(encoding);
     DCHECK_LE(pc_offset, method_info->code_size);
     DCHECK_LE(compilation_unit_code_address, method_info->code_address);
@@ -128,6 +137,8 @@
     // Check that the stack map is in the requested range.
     uint32_t dex_pc = stack_map.GetDexPc(encoding);
     if (!(dex_pc_low <= dex_pc && dex_pc < dex_pc_high)) {
+      // The variable is not in scope at this PC. Therefore omit the entry.
+      // Note that this is different to None() entry which means in scope, but unknown location.
       continue;
     }
 
@@ -136,13 +147,12 @@
     DexRegisterLocation reg_hi = DexRegisterLocation::None();
     DCHECK_LT(stack_map_index, dex_register_maps.size());
     DexRegisterMap dex_register_map = dex_register_maps[stack_map_index];
-    if (dex_register_map.IsValid()) {
-      reg_lo = dex_register_map.GetDexRegisterLocation(
-          vreg, method_info->code_item->registers_size_, code_info, encoding);
-      if (is64bitValue) {
-        reg_hi = dex_register_map.GetDexRegisterLocation(
-            vreg + 1, method_info->code_item->registers_size_, code_info, encoding);
-      }
+    DCHECK(dex_register_map.IsValid());
+    reg_lo = dex_register_map.GetDexRegisterLocation(
+        vreg, method_info->code_item->registers_size_, code_info, encoding);
+    if (is64bitValue) {
+      reg_hi = dex_register_map.GetDexRegisterLocation(
+          vreg + 1, method_info->code_item->registers_size_, code_info, encoding);
     }
 
     // Add location entry for this address range.
@@ -152,9 +162,6 @@
         variable_locations.back().high_pc == low_pc) {
       // Merge with the previous entry (extend its range).
       variable_locations.back().high_pc = high_pc;
-    } else if (!variable_locations.empty() && reg_lo == DexRegisterLocation::None()) {
-      // Unknown location - use the last known location as best-effort guess.
-      variable_locations.back().high_pc = high_pc;
     } else {
       variable_locations.push_back({low_pc, high_pc, reg_lo, reg_hi});
     }
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 0ca7370..4dd8024 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -39,35 +39,31 @@
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     dwarf::CFIFormat cfi_format,
                     bool write_oat_patches) {
-  // Add methods to .symtab.
+  // Write .strtab and .symtab.
   WriteDebugSymbols(builder, method_infos, true /* with_signature */);
-  // Generate CFI (stack unwinding information).
-  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
-  // Write DWARF .debug_* sections.
-  WriteDebugSections(builder, method_infos, write_oat_patches);
-}
 
-template<typename ElfTypes>
-static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                               const ArrayRef<const MethodDebugInfo>& method_infos,
-                               bool write_oat_patches) {
+  // Write .debug_frame.
+  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
+
   // Group the methods into compilation units based on source file.
   std::vector<ElfCompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
   for (const MethodDebugInfo& mi : method_infos) {
-    auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index);
-    const char* source_file = mi.dex_file->GetSourceFile(dex_class_def);
-    if (compilation_units.empty() || source_file != last_source_file) {
-      compilation_units.push_back(ElfCompilationUnit());
+    if (mi.dex_file != nullptr) {
+      auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index);
+      const char* source_file = mi.dex_file->GetSourceFile(dex_class_def);
+      if (compilation_units.empty() || source_file != last_source_file) {
+        compilation_units.push_back(ElfCompilationUnit());
+      }
+      ElfCompilationUnit& cu = compilation_units.back();
+      cu.methods.push_back(&mi);
+      // All methods must have the same addressing mode otherwise the min/max below does not work.
+      DCHECK_EQ(cu.methods.front()->is_code_address_text_relative, mi.is_code_address_text_relative);
+      cu.is_code_address_text_relative = mi.is_code_address_text_relative;
+      cu.code_address = std::min(cu.code_address, mi.code_address);
+      cu.code_end = std::max(cu.code_end, mi.code_address + mi.code_size);
+      last_source_file = source_file;
     }
-    ElfCompilationUnit& cu = compilation_units.back();
-    cu.methods.push_back(&mi);
-    // All methods must have the same addressing mode otherwise the min/max below does not work.
-    DCHECK_EQ(cu.methods.front()->is_code_address_text_relative, mi.is_code_address_text_relative);
-    cu.is_code_address_text_relative = mi.is_code_address_text_relative;
-    cu.code_address = std::min(cu.code_address, mi.code_address);
-    cu.code_end = std::max(cu.code_end, mi.code_address + mi.code_size);
-    last_source_file = source_file;
   }
 
   // Write .debug_line section.
@@ -185,6 +181,31 @@
   }
 }
 
+std::vector<MethodDebugInfo> MakeTrampolineInfos(const OatHeader& header) {
+  std::map<const char*, uint32_t> trampolines = {
+    { "interpreterToInterpreterBridge", header.GetInterpreterToInterpreterBridgeOffset() },
+    { "interpreterToCompiledCodeBridge", header.GetInterpreterToCompiledCodeBridgeOffset() },
+    { "jniDlsymLookup", header.GetJniDlsymLookupOffset() },
+    { "quickGenericJniTrampoline", header.GetQuickGenericJniTrampolineOffset() },
+    { "quickImtConflictTrampoline", header.GetQuickImtConflictTrampolineOffset() },
+    { "quickResolutionTrampoline", header.GetQuickResolutionTrampolineOffset() },
+    { "quickToInterpreterBridge", header.GetQuickToInterpreterBridgeOffset() },
+  };
+  std::vector<MethodDebugInfo> result;
+  for (const auto& it : trampolines) {
+    if (it.second != 0) {
+      MethodDebugInfo info = MethodDebugInfo();
+      info.trampoline_name = it.first;
+      info.isa = header.GetInstructionSet();
+      info.is_code_address_text_relative = true;
+      info.code_address = it.second - header.GetExecutableOffset();
+      info.code_size = 0;  // The symbol lasts until the next symbol.
+      result.push_back(std::move(info));
+    }
+  }
+  return result;
+}
+
 // Explicit instantiations
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 7f5d24d..736370e 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
 #define ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
 
+#include <vector>
+
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "debug/dwarf/dwarf_constants.h"
@@ -24,6 +26,7 @@
 #include "utils/array_ref.h"
 
 namespace art {
+class OatHeader;
 namespace mirror {
 class Class;
 }
@@ -55,6 +58,8 @@
     const ArrayRef<mirror::Class*>& types)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+std::vector<MethodDebugInfo> MakeTrampolineInfos(const OatHeader& oat_header);
+
 }  // namespace debug
 }  // namespace art
 
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 0a199da..045eddd 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -39,7 +39,7 @@
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
                               const ArrayRef<const MethodDebugInfo>& method_infos,
                               bool with_signature) {
-  bool generated_mapping_symbol = false;
+  uint64_t mapping_symbol_address = std::numeric_limits<uint64_t>::max();
   auto* strtab = builder->GetStrTab();
   auto* symtab = builder->GetSymTab();
 
@@ -64,12 +64,20 @@
     if (info.deduped) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(info.dex_method_index, *info.dex_file, with_signature);
-    if (deduped_addresses.find(info.code_address) != deduped_addresses.end()) {
-      name += " [DEDUPED]";
+    size_t name_offset;
+    if (info.trampoline_name != nullptr) {
+      name_offset = strtab->Write(info.trampoline_name);
+    } else {
+      DCHECK(info.dex_file != nullptr);
+      std::string name = PrettyMethod(info.dex_method_index, *info.dex_file, with_signature);
+      if (deduped_addresses.find(info.code_address) != deduped_addresses.end()) {
+        name += " [DEDUPED]";
+      }
+      // If we write method names without signature, we might see the same name multiple times.
+      name_offset = (name == last_name ? last_name_offset : strtab->Write(name));
+      last_name = std::move(name);
+      last_name_offset = name_offset;
     }
-    // If we write method names without signature, we might see the same name multiple times.
-    size_t name_offset = (name == last_name ? last_name_offset : strtab->Write(name));
 
     const auto* text = info.is_code_address_text_relative ? builder->GetText() : nullptr;
     uint64_t address = info.code_address + (text != nullptr ? text->GetAddress() : 0);
@@ -82,14 +90,11 @@
     // Note that even if we generate just a single mapping symbol, ARM's Streamline
     // requires it to match function symbol.  Just address 0 does not work.
     if (info.isa == kThumb2) {
-      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
+      if (address < mapping_symbol_address || !kGenerateSingleArmMappingSymbol) {
         symtab->Add(strtab->Write("$t"), text, address & ~1, 0, STB_LOCAL, STT_NOTYPE);
-        generated_mapping_symbol = true;
+        mapping_symbol_address = address;
       }
     }
-
-    last_name = std::move(name);
-    last_name_offset = name_offset;
   }
   strtab->End();
 
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
index 1ccc705..ed1da2c 100644
--- a/compiler/debug/method_debug_info.h
+++ b/compiler/debug/method_debug_info.h
@@ -24,7 +24,8 @@
 namespace debug {
 
 struct MethodDebugInfo {
-  const DexFile* dex_file;
+  const char* trampoline_name;
+  const DexFile* dex_file;  // Native methods (trampolines) do not reference dex file.
   size_t class_def_index;
   uint32_t dex_method_index;
   uint32_t access_flags;
@@ -37,7 +38,7 @@
   uint64_t code_address;
   uint32_t code_size;
   uint32_t frame_size_in_bytes;
-  const uint8_t* code_info;
+  const void* code_info;
   ArrayRef<const uint8_t> cfi;
 };
 
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 2b511fc..c2f19c9 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -811,7 +811,8 @@
       if (compiler_options.GenerateAnyDebugInfo() && code_size != 0) {
         bool has_code_info = method_header->IsOptimized();
         // Record debug information for this function if we are doing that.
-        debug::MethodDebugInfo info;
+        debug::MethodDebugInfo info = debug::MethodDebugInfo();
+        info.trampoline_name = nullptr;
         info.dex_file = dex_file_;
         info.class_def_index = class_def_index_;
         info.dex_method_index = it.GetMemberIndex();
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 74aab4e..5e7a4a3 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -202,6 +202,10 @@
 
   ~OatWriter();
 
+  void AddMethodDebugInfos(const std::vector<debug::MethodDebugInfo>& infos) {
+    method_info_.insert(method_info_.end(), infos.begin(), infos.end());
+  }
+
   ArrayRef<const debug::MethodDebugInfo> GetMethodDebugInfo() const {
     return ArrayRef<const debug::MethodDebugInfo>(method_info_);
   }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index aa9b01f..0b7fefa 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -5727,6 +5727,71 @@
   HandleBitwiseOperation(instruction);
 }
 
+
+void LocationsBuilderARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+         || instruction->GetResultType() == Primitive::kPrimLong);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    ShifterOperand second_reg(second.AsRegister<Register>());
+    Register out_reg = out.AsRegister<Register>();
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ bic(out_reg, first_reg, second_reg);
+        break;
+      case HInstruction::kOr:
+        __ orn(out_reg, first_reg, second_reg);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+    return;
+
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    Register first_low = first.AsRegisterPairLow<Register>();
+    Register first_high = first.AsRegisterPairHigh<Register>();
+    ShifterOperand second_low(second.AsRegisterPairLow<Register>());
+    ShifterOperand second_high(second.AsRegisterPairHigh<Register>());
+    Register out_low = out.AsRegisterPairLow<Register>();
+    Register out_high = out.AsRegisterPairHigh<Register>();
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ bic(out_low, first_low, second_low);
+        __ bic(out_high, first_high, second_high);
+        break;
+      case HInstruction::kOr:
+        __ orn(out_low, first_low, second_low);
+        __ orn(out_high, first_high, second_high);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+  }
+}
+
 void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
   if (value == 0xffffffffu) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 985dc05..89b9e2c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1862,7 +1862,7 @@
   HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instr) {
+void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1871,8 +1871,7 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64BitwiseNegatedRight(
-    HArm64BitwiseNegatedRight* instr) {
+void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   Register dst = OutputRegister(instr);
   Register lhs = InputRegisterAt(instr, 0);
   Register rhs = InputRegisterAt(instr, 1);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index b9638f2..4f1e90c 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -440,13 +440,13 @@
   void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
+
+  void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_arm64
-  void VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instruction) OVERRIDE {
-    StartAttributeStream("kind") << instruction->GetOpKind();
-  }
-
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
     if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b95ece5..049901b 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -70,6 +70,10 @@
   void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE;
   void VisitLessThan(HLessThan* condition) OVERRIDE;
   void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE;
+  void VisitBelow(HBelow* condition) OVERRIDE;
+  void VisitBelowOrEqual(HBelowOrEqual* condition) OVERRIDE;
+  void VisitAbove(HAbove* condition) OVERRIDE;
+  void VisitAboveOrEqual(HAboveOrEqual* condition) OVERRIDE;
   void VisitDiv(HDiv* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitNeg(HNeg* instruction) OVERRIDE;
@@ -559,6 +563,36 @@
   block->RemoveInstruction(check);
 }
 
+static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* arena, HInstruction* cond) {
+  HInstruction *lhs = cond->InputAt(0);
+  HInstruction *rhs = cond->InputAt(1);
+  switch (cond->GetKind()) {
+    case HInstruction::kEqual:
+      return new (arena) HEqual(rhs, lhs);
+    case HInstruction::kNotEqual:
+      return new (arena) HNotEqual(rhs, lhs);
+    case HInstruction::kLessThan:
+      return new (arena) HGreaterThan(rhs, lhs);
+    case HInstruction::kLessThanOrEqual:
+      return new (arena) HGreaterThanOrEqual(rhs, lhs);
+    case HInstruction::kGreaterThan:
+      return new (arena) HLessThan(rhs, lhs);
+    case HInstruction::kGreaterThanOrEqual:
+      return new (arena) HLessThanOrEqual(rhs, lhs);
+    case HInstruction::kBelow:
+      return new (arena) HAbove(rhs, lhs);
+    case HInstruction::kBelowOrEqual:
+      return new (arena) HAboveOrEqual(rhs, lhs);
+    case HInstruction::kAbove:
+      return new (arena) HBelow(rhs, lhs);
+    case HInstruction::kAboveOrEqual:
+      return new (arena) HBelowOrEqual(rhs, lhs);
+    default:
+      LOG(FATAL) << "Unknown ConditionType " << cond->GetKind();
+  }
+  return nullptr;
+}
+
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input_const = equal->GetConstantRight();
   if (input_const != nullptr) {
@@ -982,13 +1016,47 @@
   VisitCondition(condition);
 }
 
-// TODO: unsigned comparisons too?
+void InstructionSimplifierVisitor::VisitBelow(HBelow* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitBelowOrEqual(HBelowOrEqual* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitAbove(HAbove* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitAboveOrEqual(HAboveOrEqual* condition) {
+  VisitCondition(condition);
+}
 
 void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
-  // Try to fold an HCompare into this HCondition.
+  // Reverse condition if left is constant. Our code generators prefer constant
+  // on the right hand side.
+  if (condition->GetLeft()->IsConstant() && !condition->GetRight()->IsConstant()) {
+    HBasicBlock* block = condition->GetBlock();
+    HCondition* replacement = GetOppositeConditionSwapOps(block->GetGraph()->GetArena(), condition);
+    // If it is a fp we must set the opposite bias.
+    if (replacement != nullptr) {
+      if (condition->IsLtBias()) {
+        replacement->SetBias(ComparisonBias::kGtBias);
+      } else if (condition->IsGtBias()) {
+        replacement->SetBias(ComparisonBias::kLtBias);
+      }
+      block->ReplaceAndRemoveInstructionWith(condition, replacement);
+      RecordSimplification();
+
+      condition = replacement;
+    }
+  }
 
   HInstruction* left = condition->GetLeft();
   HInstruction* right = condition->GetRight();
+
+  // Try to fold an HCompare into this HCondition.
+
   // We can only replace an HCondition which compares a Compare to 0.
   // Both 'dx' and 'jack' generate a compare to 0 when compiling a
   // condition with a long, float or double comparison as input.
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index db1f9a7..cd026b8 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -26,5 +26,18 @@
   }
 }
 
+void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 379b95d..14c940e 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -36,6 +36,8 @@
   }
 
   void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitAnd(HAnd* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index c2bbdcc..f00d960 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -180,51 +180,10 @@
   return true;
 }
 
-bool InstructionSimplifierArm64Visitor::TryMergeNegatedInput(HBinaryOperation* op) {
-  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
-  HInstruction* left = op->GetLeft();
-  HInstruction* right = op->GetRight();
-
-  // Only consider the case where there is exactly one Not, with 2 Not's De
-  // Morgan's laws should be applied instead.
-  if (left->IsNot() ^ right->IsNot()) {
-    HInstruction* hnot = (left->IsNot() ? left : right);
-    HInstruction* hother = (left->IsNot() ? right : left);
-
-    // Only do the simplification if the Not has only one use and can thus be
-    // safely removed. Even though ARM64 negated bitwise operations do not have
-    // an immediate variant (only register), we still do the simplification when
-    // `hother` is a constant, because it removes an instruction if the constant
-    // cannot be encoded as an immediate:
-    //   mov r0, #large_constant
-    //   neg r2, r1
-    //   and r0, r0, r2
-    // becomes:
-    //   mov r0, #large_constant
-    //   bic r0, r0, r1
-    if (hnot->HasOnlyOneNonEnvironmentUse()) {
-      // Replace code looking like
-      //    NOT tmp, mask
-      //    AND dst, src, tmp   (respectively ORR, EOR)
-      // with
-      //    BIC dst, src, mask  (respectively ORN, EON)
-      HInstruction* src = hnot->AsNot()->GetInput();
-
-      HArm64BitwiseNegatedRight* neg_op = new (GetGraph()->GetArena())
-          HArm64BitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
-
-      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
-      hnot->GetBlock()->RemoveInstruction(hnot);
-      RecordSimplification();
-      return true;
-    }
-  }
-
-  return false;
-}
-
 void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
-  TryMergeNegatedInput(instruction);
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
@@ -248,7 +207,9 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitOr(HOr* instruction) {
-  TryMergeNegatedInput(instruction);
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
@@ -284,7 +245,9 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) {
-  TryMergeNegatedInput(instruction);
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 }  // namespace arm64
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index cf84587..338120b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,10 +51,6 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
-  // For bitwise operations (And/Or/Xor) with a negated input, try to use
-  // a negated bitwise instruction.
-  bool TryMergeNegatedInput(HBinaryOperation* op);
-
   // HInstruction visitors, sorted alphabetically.
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 45d196f..a11b5bd 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -186,4 +186,47 @@
   return false;
 }
 
+
+bool TryMergeNegatedInput(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // Only consider the case where there is exactly one Not, with 2 Not's De
+  // Morgan's laws should be applied instead.
+  if (left->IsNot() ^ right->IsNot()) {
+    HInstruction* hnot = (left->IsNot() ? left : right);
+    HInstruction* hother = (left->IsNot() ? right : left);
+
+    // Only do the simplification if the Not has only one use and can thus be
+    // safely removed. Even though ARM64 negated bitwise operations do not have
+    // an immediate variant (only register), we still do the simplification when
+    // `hother` is a constant, because it removes an instruction if the constant
+    // cannot be encoded as an immediate:
+    //   mov r0, #large_constant
+    //   neg r2, r1
+    //   and r0, r0, r2
+    // becomes:
+    //   mov r0, #large_constant
+    //   bic r0, r0, r1
+    if (hnot->HasOnlyOneNonEnvironmentUse()) {
+      // Replace code looking like
+      //    NOT tmp, mask
+      //    AND dst, src, tmp   (respectively ORR, EOR)
+      // with
+      //    BIC dst, src, mask  (respectively ORN, EON)
+      HInstruction* src = hnot->AsNot()->GetInput();
+
+      HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetArena())
+          HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
+
+      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
+      hnot->GetBlock()->RemoveInstruction(hnot);
+      return true;
+    }
+  }
+
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 9832ecc..b1fe8f4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -22,6 +22,9 @@
 namespace art {
 
 bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+// For bitwise operations (And/Or/Xor) with a negated input, try to use
+// a negated bitwise instruction.
+bool TryMergeNegatedInput(HBinaryOperation* op);
 
 }  // namespace art
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index f1a6e3d..c306cf9 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1548,11 +1548,11 @@
   __ Beqz(argument, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadWord,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMipsWordSize,
                                             pStringCompareTo).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1707,10 +1707,10 @@
   }
 
   __ LoadFromOffset(kLoadWord,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pIndexOf).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
 
   if (slow_path != nullptr) {
@@ -1793,10 +1793,10 @@
   __ Beqz(byte_array, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadWord,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromBytes).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
@@ -1826,10 +1826,10 @@
   // all include a null check on `data` before calling that method.
 
   __ LoadFromOffset(kLoadWord,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromChars).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -1855,10 +1855,10 @@
   __ Beqz(string_to_copy, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadWord,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromString).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 5ec5b86..cf973aa 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1356,10 +1356,10 @@
   __ Beqzc(argument, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1506,11 +1506,11 @@
   }
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
 
   if (slow_path != nullptr) {
@@ -1583,12 +1583,12 @@
   __ Beqzc(byte_array, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
                                             pAllocStringFromBytes).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
@@ -1617,12 +1617,12 @@
   //
   // all include a null check on `data` before calling that method.
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
                                             pAllocStringFromChars).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -1648,12 +1648,12 @@
   __ Beqzc(string_to_copy, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
                                             pAllocStringFromString).Int32Value());
   CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 77ded29..98766a3 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2181,7 +2181,9 @@
     DCHECK(upper_bound_rti.IsSupertypeOf(rti))
         << " upper_bound_rti: " << upper_bound_rti
         << " rti: " << rti;
-    DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact());
+    DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact())
+        << " upper_bound_rti: " << upper_bound_rti
+        << " rti: " << rti;
   }
 }
 
@@ -2215,6 +2217,10 @@
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     DCHECK(IsValidHandle(type_handle));
+    if (!is_exact) {
+      DCHECK(!type_handle->CannotBeAssignedFromOtherTypes())
+          << "Callers of ReferenceTypeInfo::Create should ensure is_exact is properly computed";
+    }
   }
   return ReferenceTypeInfo(type_handle, is_exact);
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index ecb690f..1bb5f5d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -509,6 +509,8 @@
   // before cursor.
   HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
 
+  ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; }
+
  private:
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
@@ -1265,6 +1267,7 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
+  M(BitwiseNegatedRight, Instruction)                                   \
   M(MultiplyAccumulate, Instruction)
 #endif
 
@@ -1279,7 +1282,6 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64BitwiseNegatedRight, Instruction)                              \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
   M(Arm64IntermediateAddress, Instruction)
 #endif
@@ -2964,6 +2966,8 @@
   virtual IfCondition GetOppositeCondition() const = 0;
 
   bool IsGtBias() const { return GetBias() == ComparisonBias::kGtBias; }
+  bool IsLtBias() const { return GetBias() == ComparisonBias::kLtBias; }
+
   ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
   void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); }
 
@@ -2974,13 +2978,23 @@
   bool IsFPConditionTrueIfNaN() const {
     DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
-    return IsGtBias() ? ((if_cond == kCondGT) || (if_cond == kCondGE)) : (if_cond == kCondNE);
+    if (if_cond == kCondNE) {
+      return true;
+    } else if (if_cond == kCondEQ) {
+      return false;
+    }
+    return ((if_cond == kCondGT) || (if_cond == kCondGE)) && IsGtBias();
   }
 
   bool IsFPConditionFalseIfNaN() const {
     DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
-    return IsGtBias() ? ((if_cond == kCondLT) || (if_cond == kCondLE)) : (if_cond == kCondEQ);
+    if (if_cond == kCondEQ) {
+      return true;
+    } else if (if_cond == kCondNE) {
+      return false;
+    }
+    return ((if_cond == kCondLT) || (if_cond == kCondLE)) && IsGtBias();
   }
 
  protected:
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 75a71e7..173852a 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,66 +118,6 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
-class HArm64BitwiseNegatedRight : public HBinaryOperation {
- public:
-  HArm64BitwiseNegatedRight(Primitive::Type result_type,
-                            InstructionKind op,
-                            HInstruction* left,
-                            HInstruction* right,
-                            uint32_t dex_pc = kNoDexPc)
-    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
-      op_kind_(op) {
-    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
-  }
-
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x & ~y) {
-    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
-                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
-                  "Inconsistent negated bitwise types");
-    switch (op_kind_) {
-      case HInstruction::kAnd:
-        return x & ~y;
-      case HInstruction::kOr:
-        return x | ~y;
-      case HInstruction::kXor:
-        return x ^ ~y;
-      default:
-        LOG(FATAL) << "Unreachable";
-        UNREACHABLE();
-    }
-  }
-
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << DebugName() << " is not defined for float values";
-    UNREACHABLE();
-  }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << DebugName() << " is not defined for double values";
-    UNREACHABLE();
-  }
-
-  InstructionKind GetOpKind() const { return op_kind_; }
-
-  DECLARE_INSTRUCTION(Arm64BitwiseNegatedRight);
-
- private:
-  // Specifies the bitwise operation, which will be then negated.
-  const InstructionKind op_kind_;
-
-  DISALLOW_COPY_AND_ASSIGN(HArm64BitwiseNegatedRight);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index b04b622..c10c718 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -53,6 +53,66 @@
   DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
 };
 
+class HBitwiseNegatedRight : public HBinaryOperation {
+ public:
+  HBitwiseNegatedRight(Primitive::Type result_type,
+                            InstructionKind op,
+                            HInstruction* left,
+                            HInstruction* right,
+                            uint32_t dex_pc = kNoDexPc)
+    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
+      op_kind_(op) {
+    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
+  }
+
+  template <typename T, typename U>
+  auto Compute(T x, U y) const -> decltype(x & ~y) {
+    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
+                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
+                  "Inconsistent negated bitwise types");
+    switch (op_kind_) {
+      case HInstruction::kAnd:
+        return x & ~y;
+      case HInstruction::kOr:
+        return x | ~y;
+      case HInstruction::kXor:
+        return x ^ ~y;
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(BitwiseNegatedRight);
+
+ private:
+  // Specifies the bitwise operation, which will be then negated.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cc1a806..7a82063 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -919,7 +919,8 @@
   if (compiler_options.GetGenerateDebugInfo()) {
     const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
     const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
-    debug::MethodDebugInfo info;
+    debug::MethodDebugInfo info = debug::MethodDebugInfo();
+    info.trampoline_name = nullptr;
     info.dex_file = dex_file;
     info.class_def_index = class_def_idx;
     info.dex_method_index = method_idx;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index deaa415..75356c8 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -432,11 +432,10 @@
   } else if (klass != nullptr) {
     ScopedObjectAccess soa(Thread::Current());
     ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
-    is_exact = is_exact || klass->CannotBeAssignedFromOtherTypes();
+    is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
   } else {
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -518,8 +517,7 @@
     HUnresolvedInstanceFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
-    instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -527,8 +525,7 @@
     HUnresolvedStaticFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
-    instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -724,12 +721,11 @@
   if (handle->IsObjectArrayClass()) {
     ReferenceTypeInfo::TypeHandle component_handle =
         handle_cache->NewHandle(handle->GetComponentType());
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(component_handle, /* is_exact */ false));
+    bool is_exact = component_handle->CannotBeAssignedFromOtherTypes();
+    instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(component_handle, is_exact));
   } else {
     // We don't know what the parent actually is, so we fallback to object.
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(handle_cache->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -811,8 +807,7 @@
   if (first_input_index_not_null == input_count) {
     // All inputs are NullConstants, set the type to object.
     // This may happen in the presence of inlining.
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
     return;
   }
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 44e7fc9..ce4f38a 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -46,6 +46,7 @@
 #include "class_linker.h"
 #include "compiler.h"
 #include "compiler_callbacks.h"
+#include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
 #include "dex/pass_manager.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
@@ -1034,6 +1035,9 @@
     key_value_store_->Put(
         OatHeader::kDebuggableKey,
         compiler_options_->debuggable_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+    key_value_store_->Put(
+        OatHeader::kNativeDebuggableKey,
+        compiler_options_->native_debuggable_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
     if (compiler_options_->IsExtractOnly()) {
       key_value_store_->Put(OatHeader::kCompilationType, OatHeader::kExtractOnlyValue);
     } else if (UseProfileGuidedCompilation()) {
@@ -1687,6 +1691,8 @@
         std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
         std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
+        oat_writer->AddMethodDebugInfos(debug::MakeTrampolineInfos(oat_writer->GetOatHeader()));
+
         // We need to mirror the layout of the ELF file in the compressed debug-info.
         // Therefore PrepareDebugInfo() relies on the SetLoadedSectionSizes() call further above.
         elf_writer->PrepareDebugInfo(oat_writer->GetMethodDebugInfo());
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 00a691b..79fde5e 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -32,6 +32,8 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
+#include "debug/elf_debug_writer.h"
+#include "debug/method_debug_info.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "disassembler.h"
@@ -108,14 +110,6 @@
       no_bits_(no_bits) {
   }
 
-  typedef void (OatSymbolizer::*Callback)(const DexFile::ClassDef&,
-                                          uint32_t,
-                                          const OatFile::OatMethod&,
-                                          const DexFile&,
-                                          uint32_t,
-                                          const DexFile::CodeItem*,
-                                          uint32_t);
-
   bool Symbolize() {
     const InstructionSet isa = oat_file_->GetOatHeader().GetInstructionSet();
     const InstructionSetFeatures* features = InstructionSetFeatures::FromBitmap(
@@ -131,8 +125,6 @@
     auto* rodata = builder_->GetRoData();
     auto* text = builder_->GetText();
     auto* bss = builder_->GetBss();
-    auto* strtab = builder_->GetStrTab();
-    auto* symtab = builder_->GetSymTab();
 
     const uint8_t* rodata_begin = oat_file_->Begin();
     const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
@@ -165,69 +157,31 @@
         elf_file->GetPath(), rodata_size, text_size, oat_file_->BssSize());
     builder_->WriteDynamicSection();
 
-    Walk(&art::OatSymbolizer<ElfTypes>::RegisterForDedup);
+    Walk();
+    for (const auto& trampoline : debug::MakeTrampolineInfos(oat_file_->GetOatHeader())) {
+      method_debug_infos_.push_back(trampoline);
+    }
 
-    NormalizeState();
-
-    strtab->Start();
-    strtab->Write("");  // strtab should start with empty string.
-    AddTrampolineSymbols();
-    Walk(&art::OatSymbolizer<ElfTypes>::AddSymbol);
-    strtab->End();
-
-    symtab->Start();
-    symtab->Write();
-    symtab->End();
+    debug::WriteDebugInfo(builder_.get(),
+                          ArrayRef<const debug::MethodDebugInfo>(method_debug_infos_),
+                          dwarf::DW_DEBUG_FRAME_FORMAT,
+                          true /* write_oat_patches */);
 
     builder_->End();
 
     return builder_->Good();
   }
 
-  void AddTrampolineSymbol(const char* name, uint32_t code_offset) {
-    if (code_offset != 0) {
-      uint32_t name_offset = builder_->GetStrTab()->Write(name);
-      uint64_t symbol_value = code_offset - oat_file_->GetOatHeader().GetExecutableOffset();
-      // Specifying 0 as the symbol size means that the symbol lasts until the next symbol or until
-      // the end of the section in case of the last symbol.
-      builder_->GetSymTab()->Add(
-          name_offset,
-          builder_->GetText(),
-          builder_->GetText()->GetAddress() + symbol_value,
-          /* size */ 0,
-          STB_GLOBAL,
-          STT_FUNC);
-    }
-  }
-
-  void AddTrampolineSymbols() {
-    const OatHeader& oat_header = oat_file_->GetOatHeader();
-    AddTrampolineSymbol("interpreterToInterpreterBridge",
-                        oat_header.GetInterpreterToInterpreterBridgeOffset());
-    AddTrampolineSymbol("interpreterToCompiledCodeBridge",
-                        oat_header.GetInterpreterToCompiledCodeBridgeOffset());
-    AddTrampolineSymbol("jniDlsymLookup",
-                        oat_header.GetJniDlsymLookupOffset());
-    AddTrampolineSymbol("quickGenericJniTrampoline",
-                        oat_header.GetQuickGenericJniTrampolineOffset());
-    AddTrampolineSymbol("quickImtConflictTrampoline",
-                        oat_header.GetQuickImtConflictTrampolineOffset());
-    AddTrampolineSymbol("quickResolutionTrampoline",
-                        oat_header.GetQuickResolutionTrampolineOffset());
-    AddTrampolineSymbol("quickToInterpreterBridge",
-                        oat_header.GetQuickToInterpreterBridgeOffset());
-  }
-
-  void Walk(Callback callback) {
+  void Walk() {
     std::vector<const OatFile::OatDexFile*> oat_dex_files = oat_file_->GetOatDexFiles();
     for (size_t i = 0; i < oat_dex_files.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files[i];
       CHECK(oat_dex_file != nullptr);
-      WalkOatDexFile(oat_dex_file, callback);
+      WalkOatDexFile(oat_dex_file);
     }
   }
 
-  void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file, Callback callback) {
+  void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file) {
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
     if (dex_file == nullptr) {
@@ -236,13 +190,12 @@
     for (size_t class_def_index = 0;
         class_def_index < dex_file->NumClassDefs();
         class_def_index++) {
-      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
       const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(class_def_index);
       OatClassType type = oat_class.GetType();
       switch (type) {
         case kOatClassAllCompiled:
         case kOatClassSomeCompiled:
-          WalkOatClass(oat_class, *dex_file, class_def, callback);
+          WalkOatClass(oat_class, *dex_file, class_def_index);
           break;
 
         case kOatClassNoneCompiled:
@@ -253,8 +206,10 @@
     }
   }
 
-  void WalkOatClass(const OatFile::OatClass& oat_class, const DexFile& dex_file,
-                    const DexFile::ClassDef& class_def, Callback callback) {
+  void WalkOatClass(const OatFile::OatClass& oat_class,
+                    const DexFile& dex_file,
+                    uint32_t class_def_index) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
     const uint8_t* class_data = dex_file.GetClassData(class_def);
     if (class_data == nullptr) {  // empty class such as a marker interface?
       return;
@@ -262,117 +217,62 @@
     // Note: even if this is an interface or a native class, we still have to walk it, as there
     //       might be a static initializer.
     ClassDataItemIterator it(dex_file, class_data);
-    SkipAllFields(&it);
     uint32_t class_method_idx = 0;
-    while (it.HasNextDirectMethod()) {
-      const OatFile::OatMethod oat_method = oat_class.GetOatMethod(class_method_idx);
-      WalkOatMethod(class_def, class_method_idx, oat_method, dex_file, it.GetMemberIndex(),
-                    it.GetMethodCodeItem(), it.GetMethodAccessFlags(), callback);
-      class_method_idx++;
-      it.Next();
-    }
-    while (it.HasNextVirtualMethod()) {
-      const OatFile::OatMethod oat_method = oat_class.GetOatMethod(class_method_idx);
-      WalkOatMethod(class_def, class_method_idx, oat_method, dex_file, it.GetMemberIndex(),
-                    it.GetMethodCodeItem(), it.GetMethodAccessFlags(), callback);
-      class_method_idx++;
-      it.Next();
+    for (; it.HasNextStaticField(); it.Next()) { /* skip */ }
+    for (; it.HasNextInstanceField(); it.Next()) { /* skip */ }
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      WalkOatMethod(oat_class.GetOatMethod(class_method_idx++),
+                    dex_file,
+                    class_def_index,
+                    it.GetMemberIndex(),
+                    it.GetMethodCodeItem(),
+                    it.GetMethodAccessFlags());
     }
     DCHECK(!it.HasNext());
   }
 
-  void WalkOatMethod(const DexFile::ClassDef& class_def, uint32_t class_method_index,
-                     const OatFile::OatMethod& oat_method, const DexFile& dex_file,
-                     uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
-                     uint32_t method_access_flags, Callback callback) {
+  void WalkOatMethod(const OatFile::OatMethod& oat_method,
+                     const DexFile& dex_file,
+                     uint32_t class_def_index,
+                     uint32_t dex_method_index,
+                     const DexFile::CodeItem* code_item,
+                     uint32_t method_access_flags) {
     if ((method_access_flags & kAccAbstract) != 0) {
       // Abstract method, no code.
       return;
     }
-    if (oat_method.GetCodeOffset() == 0) {
+    const OatHeader& oat_header = oat_file_->GetOatHeader();
+    const OatQuickMethodHeader* method_header = oat_method.GetOatQuickMethodHeader();
+    if (method_header == nullptr || method_header->GetCodeSize() == 0) {
       // No code.
       return;
     }
 
-    (this->*callback)(class_def, class_method_index, oat_method, dex_file, dex_method_idx, code_item,
-                      method_access_flags);
-  }
-
-  void RegisterForDedup(const DexFile::ClassDef& class_def ATTRIBUTE_UNUSED,
-                        uint32_t class_method_index ATTRIBUTE_UNUSED,
-                        const OatFile::OatMethod& oat_method,
-                        const DexFile& dex_file ATTRIBUTE_UNUSED,
-                        uint32_t dex_method_idx ATTRIBUTE_UNUSED,
-                        const DexFile::CodeItem* code_item ATTRIBUTE_UNUSED,
-                        uint32_t method_access_flags ATTRIBUTE_UNUSED) {
-    state_[oat_method.GetCodeOffset()]++;
-  }
-
-  void NormalizeState() {
-    for (auto& x : state_) {
-      if (x.second == 1) {
-        state_[x.first] = 0;
-      }
-    }
-  }
-
-  enum class DedupState {  // private
-    kNotDeduplicated,
-    kDeduplicatedFirst,
-    kDeduplicatedOther
-  };
-  DedupState IsDuplicated(uint32_t offset) {
-    if (state_[offset] == 0) {
-      return DedupState::kNotDeduplicated;
-    }
-    if (state_[offset] == 1) {
-      return DedupState::kDeduplicatedOther;
-    }
-    state_[offset] = 1;
-    return DedupState::kDeduplicatedFirst;
-  }
-
-  void AddSymbol(const DexFile::ClassDef& class_def ATTRIBUTE_UNUSED,
-                 uint32_t class_method_index ATTRIBUTE_UNUSED,
-                 const OatFile::OatMethod& oat_method,
-                 const DexFile& dex_file,
-                 uint32_t dex_method_idx,
-                 const DexFile::CodeItem* code_item ATTRIBUTE_UNUSED,
-                 uint32_t method_access_flags ATTRIBUTE_UNUSED) {
-    DedupState dedup = IsDuplicated(oat_method.GetCodeOffset());
-    if (dedup != DedupState::kDeduplicatedOther) {
-      std::string pretty_name = PrettyMethod(dex_method_idx, dex_file, true);
-
-      if (dedup == DedupState::kDeduplicatedFirst) {
-        pretty_name = "[Dedup]" + pretty_name;
-      }
-
-      int name_offset = builder_->GetStrTab()->Write(pretty_name);
-      uint64_t address = oat_method.GetCodeOffset() -
-                         oat_file_->GetOatHeader().GetExecutableOffset() +
-                         builder_->GetText()->GetAddress();
-      builder_->GetSymTab()->Add(name_offset,
-                                 builder_->GetText(),
-                                 address,
-                                 oat_method.GetQuickCodeSize(),
-                                 STB_GLOBAL,
-                                 STT_FUNC);
-    }
+    debug::MethodDebugInfo info = debug::MethodDebugInfo();
+    info.trampoline_name = nullptr;
+    info.dex_file = &dex_file;
+    info.class_def_index = class_def_index;
+    info.dex_method_index = dex_method_index;
+    info.access_flags = method_access_flags;
+    info.code_item = code_item;
+    info.isa = oat_header.GetInstructionSet();
+    info.deduped = !seen_offsets_.insert(oat_method.GetCodeOffset()).second;
+    info.is_native_debuggable = oat_header.IsNativeDebuggable();
+    info.is_optimized = method_header->IsOptimized();
+    info.is_code_address_text_relative = true;
+    info.code_address = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    info.code_size = method_header->GetCodeSize();
+    info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
+    info.code_info = info.is_optimized ? method_header->GetOptimizedCodeInfoPtr() : nullptr;
+    info.cfi = ArrayRef<uint8_t>();
+    method_debug_infos_.push_back(info);
   }
 
  private:
-  static void SkipAllFields(ClassDataItemIterator* it) {
-    while (it->HasNextStaticField()) {
-      it->Next();
-    }
-    while (it->HasNextInstanceField()) {
-      it->Next();
-    }
-  }
-
   const OatFile* oat_file_;
   std::unique_ptr<ElfBuilder<ElfTypes> > builder_;
-  std::unordered_map<uint32_t, uint32_t> state_;
+  std::vector<debug::MethodDebugInfo> method_debug_infos_;
+  std::unordered_set<uint32_t> seen_offsets_;
   const std::string output_name_;
   bool no_bits_;
 };
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 500fa14..84660a3 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -501,7 +501,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
-    LOCAL_CLANG_arm64 := true
+    LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
     LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
   else # host
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index f33eebe..64135d8 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -952,85 +952,6 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
-ENTRY art_quick_alloc_object_tlab
-    // Fast path tlab allocation.
-    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
-    // r2, r3, r12: free.
-#if defined(USE_READ_BARRIER)
-    eor    r0, r0, r0                                         // Read barrier not supported here.
-    sub    r0, r0, #1                                         // Return -1.
-    bx     lr
-#endif
-    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
-                                                              // Load the class (r2)
-    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-    cbz    r2, .Lart_quick_alloc_object_tlab_slow_path        // Check null class
-                                                              // Check class status.
-    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
-    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
-    bne    .Lart_quick_alloc_object_tlab_slow_path
-                                                              // Add a fake dependence from the
-                                                              // following access flag and size
-                                                              // loads to the status load.
-                                                              // This is to prevent those loads
-                                                              // from being reordered above the
-                                                              // status load and reading wrong
-                                                              // values (an alternative is to use
-                                                              // a load-acquire for the status).
-    eor    r3, r3, r3
-    add    r2, r2, r3
-                                                              // Check access flags has
-                                                              // kAccClassIsFinalizable.
-    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
-    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
-    bne    .Lart_quick_alloc_object_tlab_slow_path
-                                                              // Load thread_local_pos (r12) and
-                                                              // thread_local_end (r3) with ldrd.
-                                                              // Check constraints for ldrd.
-#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
-#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
-#endif
-    ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
-    sub    r12, r3, r12                                       // Compute the remaining buf size.
-    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3).
-    cmp    r3, r12                                            // Check if it fits. OK to do this
-                                                              // before rounding up the object size
-                                                              // assuming the buf size alignment.
-    bhi    .Lart_quick_alloc_object_tlab_slow_path
-    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
-                                                              // Round up the object size by the
-                                                              // object alignment. (addr + 7) & ~7.
-    add    r3, r3, #OBJECT_ALIGNMENT_MASK
-    and    r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED
-                                                              // Reload old thread_local_pos (r0)
-                                                              // for the return value.
-    ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
-    add    r1, r0, r3
-    str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
-    ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
-    add    r1, r1, #1
-    str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
-    POISON_HEAP_REF r2
-    str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
-                                                              // Fence. This is "ish" not "ishst" so
-                                                              // that the code after this allocation
-                                                              // site will see the right values in
-                                                              // the fields of the class.
-                                                              // Alternatively we could use "ishst"
-                                                              // if we use load-acquire for the
-                                                              // class status load.)
-    dmb    ish
-    bx     lr
-.Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
-    mov    r2, r9                                             // Pass Thread::Current.
-    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_tlab
-
-
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -1134,6 +1055,127 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free.
+// Need to preserve r0 and r1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    r2, \slowPathLabel                                 // Check null class
+                                                              // Check class status.
+    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
+    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    \slowPathLabel
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    r3, r3, r3
+    add    r2, r2, r3
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable.
+    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
+    bne    \slowPathLabel
+                                                              // Load thread_local_pos (r12) and
+                                                              // thread_local_end (r3) with ldrd.
+                                                              // Check constraints for ldrd.
+#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
+#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
+#endif
+    ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
+    sub    r12, r3, r12                                       // Compute the remaining buf size.
+    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3).
+    cmp    r3, r12                                            // Check if it fits. OK to do this
+                                                              // before rounding up the object size
+                                                              // assuming the buf size alignment.
+    bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
+                                                              // Round up the object size by the
+                                                              // object alignment. (addr + 7) & ~7.
+    add    r3, r3, #OBJECT_ALIGNMENT_MASK
+    and    r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED
+                                                              // Reload old thread_local_pos (r0)
+                                                              // for the return value.
+    ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
+    add    r1, r0, r3
+    str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
+    ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
+    add    r1, r1, #1
+    str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF r2
+    str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that the code after this allocation
+                                                              // site will see the right values in
+                                                              // the fields of the class.
+                                                              // Alternatively we could use "ishst"
+                                                              // if we use load-acquire for the
+                                                              // class status load.)
+    dmb    ish
+    bx     lr
+.endm
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
+    // r2, r3, r12: free.
+#if defined(USE_READ_BARRIER)
+    eor    r0, r0, r0                                         // Read barrier not supported here.
+    sub    r0, r0, #1                                         // Return -1.
+    bx     lr
+#endif
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+ENTRY art_quick_alloc_object_region_tlab
+    // Fast path tlab allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free.
+#if !defined(USE_READ_BARRIER)
+    eor    r0, r0, r0                                         // Read barrier must be enabled here.
+    sub    r0, r0, #1                                         // Return -1.
+    bx     lr
+#endif
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+                                                              // Read barrier for class load.
+    ldr    r3, [r9, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    push   {r0, r1, r3, lr}                                   // Save registers. r3 is pushed only
+                                                              // to align sp by 16 bytes.
+    mov    r0, r2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    r2, r0                                             // Get the (marked) class back.
+    pop    {r0, r1, r3, lr}
+    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_region_tlab
+
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index e848008..e4c2558 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1537,7 +1537,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -1638,6 +1638,9 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
     /*
      * Called by managed code when the thread has been asked to suspend.
      */
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 6c7d510..dbf0abb 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1313,7 +1313,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
 
@@ -1421,6 +1421,9 @@
 
 END art_quick_alloc_object_rosalloc
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index b4e2fcc..f1e605a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1367,7 +1367,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
 
@@ -1467,6 +1467,9 @@
 
 END art_quick_alloc_object_rosalloc
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fbacdbc..290769b 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -219,7 +219,8 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 4be00ce..125570d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -897,8 +897,8 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 69caec8..dee8d3c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -809,6 +809,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 DEFINE_FUNCTION art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -943,6 +944,8 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_FUNCTION art_quick_alloc_object_tlab
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 879364e..d5f0dff 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -101,6 +101,11 @@
 ADD_TEST_EQ(THREAD_ID_OFFSET,
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
+// Offset of field Thread::tls32_.is_gc_marking.
+#define THREAD_IS_GC_MARKING_OFFSET 52
+ADD_TEST_EQ(THREAD_IS_GC_MARKING_OFFSET,
+            art::Thread::IsGcMarkingOffset<__SIZEOF_POINTER__>().Int32Value())
+
 // Offset of field Thread::tlsPtr_.card_table.
 #define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
index ec0b3c4..b8f0d92 100644
--- a/runtime/interpreter/mterp/arm/binopLit8.S
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -13,7 +13,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
diff --git a/runtime/interpreter/mterp/arm/binopWide.S b/runtime/interpreter/mterp/arm/binopWide.S
index 1d511ec..4d88001 100644
--- a/runtime/interpreter/mterp/arm/binopWide.S
+++ b/runtime/interpreter/mterp/arm/binopWide.S
@@ -19,9 +19,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if $chkzero
diff --git a/runtime/interpreter/mterp/arm/binopWide2addr.S b/runtime/interpreter/mterp/arm/binopWide2addr.S
index 81db48b..bb16335 100644
--- a/runtime/interpreter/mterp/arm/binopWide2addr.S
+++ b/runtime/interpreter/mterp/arm/binopWide2addr.S
@@ -16,8 +16,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if $chkzero
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
index 4c5ffc5..981c036 100644
--- a/runtime/interpreter/mterp/arm/entry.S
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -47,8 +47,8 @@
     /* set up "named" registers */
     mov     rSELF, r0
     ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
-    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to vregs.
+    VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
     ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
     add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
     add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
diff --git a/runtime/interpreter/mterp/arm/fbinop2addr.S b/runtime/interpreter/mterp/arm/fbinop2addr.S
index b052a29..53c87a0 100644
--- a/runtime/interpreter/mterp/arm/fbinop2addr.S
+++ b/runtime/interpreter/mterp/arm/fbinop2addr.S
@@ -7,14 +7,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     $instr                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
diff --git a/runtime/interpreter/mterp/arm/fbinopWide2addr.S b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
index 4e7401d..9766e2c 100644
--- a/runtime/interpreter/mterp/arm/fbinopWide2addr.S
+++ b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
@@ -8,11 +8,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
diff --git a/runtime/interpreter/mterp/arm/funop.S b/runtime/interpreter/mterp/arm/funop.S
index d7a0859..1b8bb8b 100644
--- a/runtime/interpreter/mterp/arm/funop.S
+++ b/runtime/interpreter/mterp/arm/funop.S
@@ -6,11 +6,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     $instr                              @ s1<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
diff --git a/runtime/interpreter/mterp/arm/funopNarrower.S b/runtime/interpreter/mterp/arm/funopNarrower.S
index 9daec28..b9f758b 100644
--- a/runtime/interpreter/mterp/arm/funopNarrower.S
+++ b/runtime/interpreter/mterp/arm/funopNarrower.S
@@ -6,11 +6,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     $instr                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
diff --git a/runtime/interpreter/mterp/arm/funopWider.S b/runtime/interpreter/mterp/arm/funopWider.S
index 450ba3a..854cdc9 100644
--- a/runtime/interpreter/mterp/arm/funopWider.S
+++ b/runtime/interpreter/mterp/arm/funopWider.S
@@ -6,11 +6,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     $instr                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_aget_wide.S b/runtime/interpreter/mterp/arm/op_aget_wide.S
index e1430b4..853a7a4 100644
--- a/runtime/interpreter/mterp/arm/op_aget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_aget_wide.S
@@ -19,7 +19,7 @@
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_wide.S b/runtime/interpreter/mterp/arm/op_aput_wide.S
index 49839d1..0057507 100644
--- a/runtime/interpreter/mterp/arm/op_aput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_aput_wide.S
@@ -15,7 +15,7 @@
     ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
     add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
     cmp     r1, r3                      @ compare unsigned index, length
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
index 2b4c0ea..e57b19c 100644
--- a/runtime/interpreter/mterp/arm/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -23,8 +23,8 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     cmp     r1, r3                      @ compare (vBB+1, vCC+1)
diff --git a/runtime/interpreter/mterp/arm/op_const.S b/runtime/interpreter/mterp/arm/op_const.S
index de3e3c3..39890a0 100644
--- a/runtime/interpreter/mterp/arm/op_const.S
+++ b/runtime/interpreter/mterp/arm/op_const.S
@@ -1,7 +1,7 @@
     /* const vAA, #+BBBBbbbb */
     mov     r3, rINST, lsr #8           @ r3<- AA
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (high)
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_const_16.S b/runtime/interpreter/mterp/arm/op_const_16.S
index 59c6dac..a30cf3a 100644
--- a/runtime/interpreter/mterp/arm/op_const_16.S
+++ b/runtime/interpreter/mterp/arm/op_const_16.S
@@ -1,5 +1,5 @@
     /* const/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     SET_VREG r0, r3                     @ vAA<- r0
diff --git a/runtime/interpreter/mterp/arm/op_const_4.S b/runtime/interpreter/mterp/arm/op_const_4.S
index c177bb9..c97b0e9 100644
--- a/runtime/interpreter/mterp/arm/op_const_4.S
+++ b/runtime/interpreter/mterp/arm/op_const_4.S
@@ -1,8 +1,7 @@
     /* const/4 vA, #+B */
-    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    sbfx    r1, rINST, #12, #4          @ r1<- sssssssB (sign-extended)
     ubfx    r0, rINST, #8, #4           @ r0<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  @ ip<- opcode from rINST
     SET_VREG r1, r0                     @ fp[A]<- r1
     GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_high16.S b/runtime/interpreter/mterp/arm/op_const_high16.S
index 460d546..536276d 100644
--- a/runtime/interpreter/mterp/arm/op_const_high16.S
+++ b/runtime/interpreter/mterp/arm/op_const_high16.S
@@ -1,5 +1,5 @@
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r0, r0, lsl #16             @ r0<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
index 1a3d0b2..1255c07 100644
--- a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
@@ -1,7 +1,7 @@
     /* const/string vAA, String@BBBBBBBB */
     EXPORT_PC
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r2, 2                         @ r2<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r2, 2                         @ r2<- BBBB (high)
     mov     r1, rINST, lsr #8           @ r1<- AA
     orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
     add     r2, rFP, #OFF_FP_SHADOWFRAME
diff --git a/runtime/interpreter/mterp/arm/op_const_wide.S b/runtime/interpreter/mterp/arm/op_const_wide.S
index 12394b6..8310a4c 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide.S
@@ -8,7 +8,7 @@
     orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
     CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
     FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_16.S b/runtime/interpreter/mterp/arm/op_const_wide_16.S
index 3811d86..28abb51 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide_16.S
@@ -1,10 +1,10 @@
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r1, r0, asr #31             @ r1<- ssssssss
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_32.S b/runtime/interpreter/mterp/arm/op_const_wide_32.S
index 0b6f1cc..c10bb04 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide_32.S
@@ -5,7 +5,7 @@
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     mov     r1, r0, asr #31             @ r1<- ssssssss
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_high16.S b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
index b9796eb..d7e38ec 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide_high16.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
@@ -5,7 +5,7 @@
     mov     r1, r1, lsl #16             @ r1<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide.S b/runtime/interpreter/mterp/arm/op_iget_wide.S
index 859ffac..e287d51 100644
--- a/runtime/interpreter/mterp/arm/op_iget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_iget_wide.S
@@ -16,7 +16,7 @@
     cmp      r3, #0
     bne      MterpException                @ bail out
     CLEAR_SHADOW_PAIR r2, ip, lr           @ Zero out the shadow regs
-    add      r3, rFP, r2, lsl #2           @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2              @ r3<- &fp[A]
     stmia    r3, {r0-r1}                   @ fp[A]<- r0/r1
     ADVANCE 2
     GET_INST_OPCODE ip                     @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
index 07f854a..5a7177d 100644
--- a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
@@ -7,7 +7,7 @@
     beq     common_errNullObject        @ object was null
     ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
     CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
diff --git a/runtime/interpreter/mterp/arm/op_instance_of.S b/runtime/interpreter/mterp/arm/op_instance_of.S
index d76f0b0..019929e 100644
--- a/runtime/interpreter/mterp/arm/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm/op_instance_of.S
@@ -11,10 +11,9 @@
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
     mov       r3, rSELF                 @ r3<- self
-    mov       r9, rINST, lsr #8         @ r9<- A+
-    and       r9, r9, #15               @ r9<- A
     bl        MterpInstanceOf           @ (index, &obj, method, self)
     ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      r9, rINST, #8, #4         @ r9<- A
     PREFETCH_INST 2
     cmp       r1, #0                    @ exception pending?
     bne       MterpException
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide.S b/runtime/interpreter/mterp/arm/op_iput_wide.S
index 8bbd63e..3dda187 100644
--- a/runtime/interpreter/mterp/arm/op_iput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_iput_wide.S
@@ -5,7 +5,7 @@
     mov      r1, rINST, lsr #12         @ r1<- B
     GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
     ubfx     r2, rINST, #8, #4          @ r2<- A
-    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[A]
     ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
index a2fc9e1..88e6ea1 100644
--- a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
@@ -5,7 +5,7 @@
     ubfx    r0, rINST, #8, #4           @ r0<- A
     cmp     r2, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
     ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     strd    r0, [r2, r3]                @ obj.field<- r0/r1
diff --git a/runtime/interpreter/mterp/arm/op_long_to_double.S b/runtime/interpreter/mterp/arm/op_long_to_double.S
index 1d48a2a..cac12d4 100644
--- a/runtime/interpreter/mterp/arm/op_long_to_double.S
+++ b/runtime/interpreter/mterp/arm/op_long_to_double.S
@@ -8,8 +8,8 @@
      */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     vldr    d0, [r3]                    @ d0<- vAA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
 
diff --git a/runtime/interpreter/mterp/arm/op_move_result_wide.S b/runtime/interpreter/mterp/arm/op_move_result_wide.S
index 1845ccf..87929ea 100644
--- a/runtime/interpreter/mterp/arm/op_move_result_wide.S
+++ b/runtime/interpreter/mterp/arm/op_move_result_wide.S
@@ -1,7 +1,7 @@
     /* move-result-wide vAA */
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_move_wide.S b/runtime/interpreter/mterp/arm/op_move_wide.S
index f5d156d..ff353ea 100644
--- a/runtime/interpreter/mterp/arm/op_move_wide.S
+++ b/runtime/interpreter/mterp/arm/op_move_wide.S
@@ -2,8 +2,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_16.S b/runtime/interpreter/mterp/arm/op_move_wide_16.S
index 8a55c4b..9812b66 100644
--- a/runtime/interpreter/mterp/arm/op_move_wide_16.S
+++ b/runtime/interpreter/mterp/arm/op_move_wide_16.S
@@ -2,8 +2,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 2                         @ r3<- BBBB
     FETCH r2, 1                         @ r2<- AAAA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     lr, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_from16.S b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
index b65259d..d2cc60c 100644
--- a/runtime/interpreter/mterp/arm/op_move_wide_from16.S
+++ b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
@@ -2,8 +2,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 1                         @ r3<- BBBB
     mov     rINST, rINST, lsr #8        @ rINST<- AA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
index 9e83778..8f40f19 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -20,8 +20,8 @@
     FETCH r0, 1                         @ r0<- CCBB
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     mul     ip, r2, r1                  @  ip<- ZxW
@@ -29,7 +29,7 @@
     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
index 789dbd3..7ef24c5 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -9,8 +9,8 @@
     /* mul-long/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
     mul     ip, r2, r1                  @  ip<- ZxW
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
index cfab530..ceae878 100644
--- a/runtime/interpreter/mterp/arm/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -9,6 +9,6 @@
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
     ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
     b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
index 3a50908..4f2f89d 100644
--- a/runtime/interpreter/mterp/arm/op_sget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -12,7 +12,7 @@
     bl    artGet64StaticFromCode
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
-    add   lr, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
     cmp   r3, #0                        @ Fail to resolve?
     bne   MterpException                @ bail out
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
index 12ea248..82ec6ed 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -9,12 +9,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r1, r1, asl r2              @ r1<- r1 << r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
index 4799e77..f361a7d 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -7,7 +7,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r1, r1, asl r2              @ r1<- r1 << r2
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
index 88a13d6..a0afe5b 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -9,12 +9,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
index 78d8bb7..976110e 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -7,7 +7,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
index adbcffa..8d8ed8c 100644
--- a/runtime/interpreter/mterp/arm/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -8,7 +8,7 @@
     FETCH   r0, 1                       @ r0<- field ref BBBB
     ldr     r1, [rFP, #OFF_FP_METHOD]
     mov     r2, rINST, lsr #8           @ r3<- AA
-    add     r2, rFP, r2, lsl #2
+    VREG_INDEX_TO_ADDR r2, r2
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
index f98ec63..c817bc9 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -9,12 +9,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
index 840283d..2735f87 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -7,7 +7,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
diff --git a/runtime/interpreter/mterp/arm/unopNarrower.S b/runtime/interpreter/mterp/arm/unopNarrower.S
index a5fc027..2d0453a 100644
--- a/runtime/interpreter/mterp/arm/unopNarrower.S
+++ b/runtime/interpreter/mterp/arm/unopNarrower.S
@@ -12,7 +12,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $preinstr                           @ optional op; may set condition codes
diff --git a/runtime/interpreter/mterp/arm/unopWide.S b/runtime/interpreter/mterp/arm/unopWide.S
index a074234..cd5defd 100644
--- a/runtime/interpreter/mterp/arm/unopWide.S
+++ b/runtime/interpreter/mterp/arm/unopWide.S
@@ -9,8 +9,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/unopWider.S b/runtime/interpreter/mterp/arm/unopWider.S
index 23b6b9d..9d50489 100644
--- a/runtime/interpreter/mterp/arm/unopWider.S
+++ b/runtime/interpreter/mterp/arm/unopWider.S
@@ -10,7 +10,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     GET_VREG r0, r3                     @ r0<- vB
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     $preinstr                           @ optional op; may set condition codes
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index f9073ab..23e656e 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -46,7 +46,7 @@
     /* set up "named" registers */
     mov     xSELF, x0
     ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.
     add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
     add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 7223750..7101ba9 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -272,7 +272,7 @@
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
-    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING: handle shadow frame vreg zero if store */
 .endm
 
 /*
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide.S b/runtime/interpreter/mterp/arm64/op_iput_wide.S
index 4ce9525..e1ab127 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide.S
@@ -5,7 +5,7 @@
     lsr      w1, wINST, #12             // w1<- B
     GET_VREG w1, w1                     // w1<- fp[B], the object pointer
     ubfx     w2, wINST, #8, #4          // w2<- A
-    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    VREG_INDEX_TO_ADDR x2, x2           // w2<- &fp[A]
     ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
index 1d034ec..a79b1a6 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S
@@ -8,7 +8,7 @@
     FETCH   w0, 1                       // w0<- field ref BBBB
     ldr     x1, [xFP, #OFF_FP_METHOD]
     lsr     w2, wINST, #8               // w3<- AA
-    add     x2, xFP, w2, lsl #2
+    VREG_INDEX_TO_ADDR x2, w2
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
diff --git a/runtime/interpreter/mterp/mips/entry.S b/runtime/interpreter/mterp/mips/entry.S
index cef08fe..5771a4f 100644
--- a/runtime/interpreter/mterp/mips/entry.S
+++ b/runtime/interpreter/mterp/mips/entry.S
@@ -49,7 +49,7 @@
     /* set up "named" registers */
     move    rSELF, a0
     lw      a0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
-    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to insns[] (i.e. - the dalivk byte code).
+    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to vregs.
     EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
     lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
     addu    rPC, a1, CODEITEM_INSNS_OFFSET        # Point to base of insns[]
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index b26a63a..092474d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -343,8 +343,8 @@
     /* set up "named" registers */
     mov     rSELF, r0
     ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
-    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to vregs.
+    VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
     ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
     add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
     add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
@@ -435,8 +435,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -452,8 +452,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 1                         @ r3<- BBBB
     mov     rINST, rINST, lsr #8        @ rINST<- AA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
@@ -469,8 +469,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 2                         @ r3<- BBBB
     FETCH r2, 1                         @ r2<- AAAA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     lr, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
@@ -563,7 +563,7 @@
     /* move-result-wide vAA */
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -655,7 +655,7 @@
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
     ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
     b       MterpReturn
 
@@ -687,10 +687,9 @@
 .L_op_const_4: /* 0x12 */
 /* File: arm/op_const_4.S */
     /* const/4 vA, #+B */
-    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    sbfx    r1, rINST, #12, #4          @ r1<- sssssssB (sign-extended)
     ubfx    r0, rINST, #8, #4           @ r0<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  @ ip<- opcode from rINST
     SET_VREG r1, r0                     @ fp[A]<- r1
     GOTO_OPCODE ip                      @ execute next instruction
@@ -700,7 +699,7 @@
 .L_op_const_16: /* 0x13 */
 /* File: arm/op_const_16.S */
     /* const/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     SET_VREG r0, r3                     @ vAA<- r0
@@ -713,8 +712,8 @@
 /* File: arm/op_const.S */
     /* const vAA, #+BBBBbbbb */
     mov     r3, rINST, lsr #8           @ r3<- AA
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (high)
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -726,7 +725,7 @@
 .L_op_const_high16: /* 0x15 */
 /* File: arm/op_const_high16.S */
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r0, r0, lsl #16             @ r0<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
@@ -739,12 +738,12 @@
 .L_op_const_wide_16: /* 0x16 */
 /* File: arm/op_const_wide_16.S */
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r1, r0, asr #31             @ r1<- ssssssss
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -760,7 +759,7 @@
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     mov     r1, r0, asr #31             @ r1<- ssssssss
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
@@ -780,7 +779,7 @@
     orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
     CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
     FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -796,7 +795,7 @@
     mov     r1, r1, lsl #16             @ r1<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -825,8 +824,8 @@
 /* File: arm/op_const_string_jumbo.S */
     /* const/string vAA, String@BBBBBBBB */
     EXPORT_PC
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r2, 2                         @ r2<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r2, 2                         @ r2<- BBBB (high)
     mov     r1, rINST, lsr #8           @ r1<- AA
     orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
     add     r2, rFP, #OFF_FP_SHADOWFRAME
@@ -938,10 +937,9 @@
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
     mov       r3, rSELF                 @ r3<- self
-    mov       r9, rINST, lsr #8         @ r9<- A+
-    and       r9, r9, #15               @ r9<- A
     bl        MterpInstanceOf           @ (index, &obj, method, self)
     ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      r9, rINST, #8, #4         @ r9<- A
     PREFETCH_INST 2
     cmp       r1, #0                    @ exception pending?
     bne       MterpException
@@ -1509,8 +1507,8 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     cmp     r1, r3                      @ compare (vBB+1, vCC+1)
@@ -2089,7 +2087,7 @@
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -2314,7 +2312,7 @@
     ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
     add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
     cmp     r1, r3                      @ compare unsigned index, length
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
@@ -2533,7 +2531,7 @@
     cmp      r3, #0
     bne      MterpException                @ bail out
     CLEAR_SHADOW_PAIR r2, ip, lr           @ Zero out the shadow regs
-    add      r3, rFP, r2, lsl #2           @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2              @ r3<- &fp[A]
     stmia    r3, {r0-r1}                   @ fp[A]<- r0/r1
     ADVANCE 2
     GET_INST_OPCODE ip                     @ extract opcode from rINST
@@ -2736,7 +2734,7 @@
     mov      r1, rINST, lsr #12         @ r1<- B
     GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
     ubfx     r2, rINST, #8, #4          @ r2<- A
-    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[A]
     ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
@@ -2923,7 +2921,7 @@
     bl    artGet64StaticFromCode
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
-    add   lr, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
     cmp   r3, #0                        @ Fail to resolve?
     bne   MterpException                @ bail out
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
@@ -3135,7 +3133,7 @@
     FETCH   r0, 1                       @ r0<- field ref BBBB
     ldr     r1, [rFP, #OFF_FP_METHOD]
     mov     r2, rINST, lsr #8           @ r3<- AA
-    add     r2, rFP, r2, lsl #2
+    VREG_INDEX_TO_ADDR r2, r2
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
@@ -3668,8 +3666,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3696,8 +3694,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3750,8 +3748,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3779,7 +3777,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     GET_VREG r0, r3                     @ r0<- vB
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
                                @ optional op; may set condition codes
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3803,11 +3801,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     fsitos  s1, s0                              @ s1<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -3828,11 +3825,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     fsitod  d0, s0                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -3880,7 +3876,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
                                @ optional op; may set condition codes
@@ -3904,8 +3900,8 @@
      */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     vldr    d0, [r3]                    @ d0<- vAA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
 
@@ -3935,11 +3931,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     ftosizs s1, s0                              @ s1<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -3964,7 +3959,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     GET_VREG r0, r3                     @ r0<- vB
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
                                @ optional op; may set condition codes
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3989,11 +3984,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     vcvt.f64.f32  d0, s0                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -4015,11 +4009,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     ftosizd  s0, d0                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -4043,8 +4036,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -4070,11 +4063,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     vcvt.f32.f64  s0, d0                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -4626,9 +4618,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4670,9 +4662,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4715,8 +4707,8 @@
     FETCH r0, 1                         @ r0<- CCBB
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     mul     ip, r2, r1                  @  ip<- ZxW
@@ -4724,7 +4716,7 @@
     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
@@ -4755,9 +4747,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 1
@@ -4800,9 +4792,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 1
@@ -4844,9 +4836,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4888,9 +4880,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4932,9 +4924,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4966,12 +4958,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r1, r1, asl r2              @ r1<- r1 << r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
@@ -4998,12 +4990,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -5030,12 +5022,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -5355,9 +5347,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -5808,8 +5800,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -5848,8 +5840,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -5881,8 +5873,8 @@
     /* mul-long/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
     mul     ip, r2, r1                  @  ip<- ZxW
@@ -5917,8 +5909,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 1
@@ -5958,8 +5950,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 1
@@ -5998,8 +5990,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6038,8 +6030,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6078,8 +6070,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6109,7 +6101,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r1, r1, asl r2              @ r1<- r1 << r2
@@ -6136,7 +6128,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
@@ -6163,7 +6155,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
@@ -6191,14 +6183,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fadds   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6219,14 +6209,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fsubs   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6247,14 +6235,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fmuls   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6275,14 +6261,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fdivs   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6343,11 +6327,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6372,11 +6355,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6401,11 +6383,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6430,11 +6411,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6467,8 +6447,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6783,7 +6763,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -6821,7 +6801,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -6860,7 +6840,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -6967,7 +6947,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7005,7 +6985,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7043,7 +7023,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7081,7 +7061,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7119,7 +7099,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7157,7 +7137,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7207,7 +7187,7 @@
     beq     common_errNullObject        @ object was null
     ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
     CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
@@ -7263,7 +7243,7 @@
     ubfx    r0, rINST, #8, #4           @ r0<- A
     cmp     r2, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
     ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     strd    r0, [r2, r3]                @ obj.field<- r0/r1
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index c7c0fb5..cdb27e8 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -279,7 +279,7 @@
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
-    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING: handle shadow frame vreg zero if store */
 .endm
 
 /*
@@ -338,7 +338,7 @@
     /* set up "named" registers */
     mov     xSELF, x0
     ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.
     add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
     add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
@@ -2552,7 +2552,7 @@
     lsr      w1, wINST, #12             // w1<- B
     GET_VREG w1, w1                     // w1<- fp[B], the object pointer
     ubfx     w2, wINST, #8, #4          // w2<- A
-    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    VREG_INDEX_TO_ADDR x2, x2           // w2<- &fp[A]
     ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
@@ -2941,7 +2941,7 @@
     FETCH   w0, 1                       // w0<- field ref BBBB
     ldr     x1, [xFP, #OFF_FP_METHOD]
     lsr     w2, wINST, #8               // w3<- AA
-    add     x2, xFP, w2, lsl #2
+    VREG_INDEX_TO_ADDR x2, w2
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index 7ae1ab1..b134129 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -542,7 +542,7 @@
     /* set up "named" registers */
     move    rSELF, a0
     lw      a0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
-    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to insns[] (i.e. - the dalivk byte code).
+    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to vregs.
     EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
     lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
     addu    rPC, a1, CODEITEM_INSNS_OFFSET        # Point to base of insns[]
@@ -4373,8 +4373,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4412,8 +4412,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4451,8 +4451,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4491,8 +4491,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4526,8 +4526,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4567,8 +4567,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4602,8 +4602,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4642,8 +4642,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4681,8 +4681,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4720,8 +4720,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4759,8 +4759,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4798,8 +4798,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4837,8 +4837,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 4d94130..701c600 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -183,7 +183,7 @@
         break;
       }
       case LockWord::kFatLocked: {
-        // Already inflated, return the has stored in the monitor.
+        // Already inflated, return the hash stored in the monitor.
         Monitor* monitor = lw.FatLockMonitor();
         DCHECK(monitor != nullptr);
         return monitor->GetHashCode();
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 2ac1052..ed99cba 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -468,6 +468,10 @@
   return IsKeyEnabled(OatHeader::kDebuggableKey);
 }
 
+bool OatHeader::IsNativeDebuggable() const {
+  return IsKeyEnabled(OatHeader::kNativeDebuggableKey);
+}
+
 bool OatHeader::IsExtractOnly() const {
   return KeyHasValue(kCompilationType,
                      kExtractOnlyValue,
diff --git a/runtime/oat.h b/runtime/oat.h
index 0660e19..1d6c076 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -38,6 +38,7 @@
   static constexpr const char* kDex2OatHostKey = "dex2oat-host";
   static constexpr const char* kPicKey = "pic";
   static constexpr const char* kDebuggableKey = "debuggable";
+  static constexpr const char* kNativeDebuggableKey = "native-debuggable";
   static constexpr const char* kCompilationType = "compilation-type";
   static constexpr const char* kClassPathKey = "classpath";
   static constexpr const char* kBootClassPath = "bootclasspath";
@@ -110,6 +111,7 @@
   size_t GetHeaderSize() const;
   bool IsPic() const;
   bool IsDebuggable() const;
+  bool IsNativeDebuggable() const;
   bool IsExtractOnly() const;
   bool IsProfileGuideCompiled() const;
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 2b7eca2..daabc6e 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -63,16 +63,24 @@
     return gc_map_offset_ == 0 && vmap_table_offset_ != 0;
   }
 
-  CodeInfo GetOptimizedCodeInfo() const {
+  const void* GetOptimizedCodeInfoPtr() const {
     DCHECK(IsOptimized());
     const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
-    return CodeInfo(data);
+    return data;
+  }
+
+  CodeInfo GetOptimizedCodeInfo() const {
+    return CodeInfo(GetOptimizedCodeInfoPtr());
   }
 
   const uint8_t* GetCode() const {
     return code_;
   }
 
+  uint32_t GetCodeSize() const {
+    return code_size_;
+  }
+
   const uint8_t* GetNativeGcMap() const {
     return (gc_map_offset_ == 0) ? nullptr : code_ - gc_map_offset_;
   }
@@ -111,7 +119,7 @@
   uint32_t GetFrameSizeInBytes() const {
     uint32_t result = frame_info_.FrameSizeInBytes();
     if (kCheckFrameSize) {
-      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
+      DCHECK_ALIGNED(result, kStackAlignment);
     }
     return result;
   }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index e95f2c5..fd6cc10 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1866,7 +1866,7 @@
 
 void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::string>* argv)
     const {
-  if (GetInstrumentation()->InterpretOnly() || UseJit()) {
+  if (GetInstrumentation()->InterpretOnly()) {
     argv->push_back("--compiler-filter=interpret-only");
   }
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 4c81d4f..afb11d3 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -57,6 +57,10 @@
 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
 
+// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
+// some history.
+static constexpr bool kDumpUnattachedThreadNativeStack = true;
+
 ThreadList::ThreadList()
     : suspend_all_count_(0),
       debug_suspend_all_count_(0),
@@ -138,9 +142,7 @@
   // refactor DumpState to avoid skipping analysis.
   Thread::DumpState(os, nullptr, tid);
   DumpKernelStack(os, tid, "  kernel: ", false);
-  // TODO: Reenable this when the native code in system_server can handle it.
-  // Currently "adb shell kill -3 `pid system_server`" will cause it to exit.
-  if (false) {
+  if (kDumpUnattachedThreadNativeStack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
   os << "\n";
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 8640148..dd4ffe4 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -1601,6 +1601,34 @@
     return (short) (value & 0x17fff);
   }
 
+  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Const42>>,<<Arg>>]
+
+  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
+
+  public static int intReverseCondition(int i) {
+    return (42 > i) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
+  /// CHECK-DAG:      <<CMP:i\d+>>      Compare [<<Const42>>,<<Result>>]
+
+  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
+
+  public static int intReverseConditionNaN(int i) {
+    return (42 != Math.sqrt(i)) ? 13 : 54;
+  }
+
   public static int runSmaliTest(String name, boolean input) {
     try {
       Class<?> c = Class.forName("SmaliTests");
@@ -1611,7 +1639,7 @@
     }
   }
 
-  public static void main(String[] args) {
+public static void main(String[] args) {
     int arg = 123456;
 
     assertLongEquals(Add0(arg), arg);
@@ -1740,6 +1768,9 @@
     assertIntEquals(intAnd0x17fffToShort(Integer.MIN_VALUE), 0);
     assertIntEquals(intAnd0x17fffToShort(Integer.MAX_VALUE), Short.MAX_VALUE);
 
+    assertIntEquals(intReverseCondition(41), 13);
+    assertIntEquals(intReverseConditionNaN(-5), 13);
+
     for (String condition : new String[] { "Equal", "NotEqual" }) {
       for (String constant : new String[] { "True", "False" }) {
         for (String side : new String[] { "Rhs", "Lhs" }) {
diff --git a/test/537-checker-jump-over-jump/src/Main.java b/test/537-checker-jump-over-jump/src/Main.java
index cf9a69d..7a58e8b 100644
--- a/test/537-checker-jump-over-jump/src/Main.java
+++ b/test/537-checker-jump-over-jump/src/Main.java
@@ -24,7 +24,7 @@
   //
   /// CHECK:                            If
   /// CHECK-NEXT:                       cmp
-  /// CHECK-NEXT:                       jnl/ge
+  /// CHECK-NEXT:                       jle/ng
   //
   /// CHECK-DAG:   <<Fibs:l\d+>>        StaticFieldGet
   /// CHECK-DAG:                        NullCheck [<<Fibs>>]
diff --git a/test/564-checker-negbitwise/src/Main.java b/test/564-checker-negbitwise/src/Main.java
index 3de7be7..ccb8ff4 100644
--- a/test/564-checker-negbitwise/src/Main.java
+++ b/test/564-checker-negbitwise/src/Main.java
@@ -45,7 +45,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Mask:i\d+>>        ParameterValue
-  /// CHECK:       <<NegOp:i\d+>>       Arm64BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
   /// CHECK:                            Return [<<NegOp>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (after)
@@ -55,6 +55,27 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) disassembly (after)
   /// CHECK:                            bic w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        And
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) disassembly (after)
+  /// CHECK:                            bic.w r{{\d+}}, r{{\d+}}, r{{\d+}}
+
   public static int $opt$noinline$notAnd(int base, int mask) {
     if (doThrow) throw new Error();
     return base & ~mask;
@@ -74,7 +95,7 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Base:j\d+>>        ParameterValue
   /// CHECK:       <<Mask:j\d+>>        ParameterValue
-  /// CHECK:       <<NegOp:j\d+>>       Arm64BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
+  /// CHECK:       <<NegOp:j\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
   /// CHECK:                            Return [<<NegOp>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (after)
@@ -84,6 +105,27 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) disassembly (after)
   /// CHECK:                            orn x{{\d+}}, x{{\d+}}, x{{\d+}}
 
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<Not:j\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:j\d+>>          Or [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:j\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Or
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) disassembly (after)
+  /// CHECK:                            orn.w r{{\d+}}, r{{\d+}}, r{{\d+}}
+
   public static long $opt$noinline$notOr(long base, long mask) {
     if (doThrow) throw new Error();
     return base | ~mask;
@@ -103,7 +145,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Mask:i\d+>>        ParameterValue
-  /// CHECK:       <<NegOp:i\d+>>       Arm64BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Xor
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Xor
   /// CHECK:                            Return [<<NegOp>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (after)
@@ -113,39 +155,63 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) disassembly (after)
   /// CHECK:                            eon w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
   public static int $opt$noinline$notXor(int base, int mask) {
     if (doThrow) throw new Error();
     return base ^ ~mask;
   }
 
   /**
-   * Check that the transformation is also done when the base is a constant.
+   * Check that transformation is done when the argument is a constant.
    */
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) instruction_simplifier_arm64 (before)
-  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Constant:i\d+>>    IntConstant
-  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
-  /// CHECK:       <<Op:i\d+>>          Xor [<<Not>>,<<Constant>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<Base>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Not>>,<<Constant>>]
   /// CHECK:                            Return [<<Op>>]
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) instruction_simplifier_arm64 (after)
-  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Constant:i\d+>>    IntConstant
-  /// CHECK:       <<NegOp:i\d+>>       Arm64BitwiseNegatedRight [<<Constant>>,<<Mask>>] kind:Xor
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Constant>>,<<Base>>] kind:And
   /// CHECK:                            Return [<<NegOp>>]
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Not
-  /// CHECK-NOT:                        Xor
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) disassembly (after)
-  /// CHECK:                            mov <<Reg:w\d+>>, #0xf
-  /// CHECK:                            eon w{{\d+}}, <<Reg>>, w{{\d+}}
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Base>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Not>>,<<Constant>>]
+  /// CHECK:                            Return [<<Op>>]
 
-  public static int $opt$noinline$notXorConstant(int mask) {
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Constant>>,<<Base>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  public static int $opt$noinline$notAndConstant(int mask) {
     if (doThrow) throw new Error();
-    return 0xf ^ ~mask;
+    return 0xf & ~mask;
   }
 
   /**
@@ -173,7 +239,31 @@
   /// CHECK:                            Return [<<Add>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64BitwiseNegatedRight
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
 
   public static int $opt$noinline$notAndMultipleUses(int base, int mask) {
     if (doThrow) throw new Error();
@@ -189,7 +279,10 @@
   // have been applied then Not/Not/Or is replaced by And/Not.
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$deMorganOr(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64BitwiseNegatedRight
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$deMorganOr(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
 
   public static int $opt$noinline$deMorganOr(int a, int b) {
     if (doThrow) throw new Error();
@@ -200,7 +293,7 @@
     assertIntEquals(0xe,   $opt$noinline$notAnd(0xf, 0x1));
     assertLongEquals(~0x0, $opt$noinline$notOr(0xf, 0x1));
     assertIntEquals(~0xe,  $opt$noinline$notXor(0xf, 0x1));
-    assertIntEquals(~0xe,  $opt$noinline$notXorConstant(0x1));
+    assertIntEquals(0xe,  $opt$noinline$notAndConstant(0x1));
     assertIntEquals(0xe,   $opt$noinline$notAndMultipleUses(0xf, 0x1));
     assertIntEquals(~0x1,  $opt$noinline$deMorganOr(0x3, 0x1));
   }
diff --git a/test/580-checker-round/expected.txt b/test/580-checker-round/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/580-checker-round/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/580-checker-round/info.txt b/test/580-checker-round/info.txt
new file mode 100644
index 0000000..d6397fd
--- /dev/null
+++ b/test/580-checker-round/info.txt
@@ -0,0 +1 @@
+Unit test for float/double rounding.
diff --git a/test/580-checker-round/src/Main.java b/test/580-checker-round/src/Main.java
new file mode 100644
index 0000000..9e248ef
--- /dev/null
+++ b/test/580-checker-round/src/Main.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.round32(float) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:MathRoundFloat
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int round32(float f) {
+    return Math.round(f);
+  }
+
+  /// CHECK-START: long Main.round64(double) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:MathRoundDouble
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long round64(double d) {
+    return Math.round(d);
+  }
+
+  public static void main(String args[]) {
+    // A few obvious numbers.
+    expectEquals32(-2147483648, round32(Float.NEGATIVE_INFINITY));
+    expectEquals32(-2, round32(-1.51f));
+    expectEquals32(-1, round32(-1.2f));
+    expectEquals32(-1, round32(-1.0f));
+    expectEquals32(-1, round32(-0.51f));
+    expectEquals32(0, round32(-0.2f));
+    expectEquals32(0, round32(-0.0f));
+    expectEquals32(0, round32(+0.0f));
+    expectEquals32(0, round32(+0.2f));
+    expectEquals32(1, round32(+0.5f));
+    expectEquals32(1, round32(+1.0f));
+    expectEquals32(1, round32(+1.2f));
+    expectEquals32(2, round32(+1.5f));
+    expectEquals32(2147483647, round32(Float.POSITIVE_INFINITY));
+
+    // Some others.
+    for (int i = -100; i <= 100; ++i) {
+      expectEquals32(i - 1, round32((float) i - 0.51f));
+      expectEquals32(i, round32((float) i));
+      expectEquals32(i + 1, round32((float) i + 0.5f));
+    }
+    for (float f = -1.5f; f <= -1.499f; f = Math.nextAfter(f, Float.POSITIVE_INFINITY)) {
+      expectEquals32(-1, round32(f));
+    }
+
+    // Some harder.
+    float[] fvals = {
+      -16777215.5f,
+      -16777215.0f,
+      -0.4999f,
+      0.4999f,
+      16777215.0f,
+      16777215.5f
+    };
+    int[] ivals = {
+      -16777216,
+      -16777215,
+      0,
+      0,
+      16777215,
+      16777216
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectEquals32(ivals[i], round32(fvals[i]));
+    }
+
+    // A few NaN numbers.
+    float[] fnans = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fnans.length; i++) {
+      expectEquals32(0, round32(fnans[i]));
+    }
+
+    // A few obvious numbers.
+    expectEquals64(-9223372036854775808L, round64(Double.NEGATIVE_INFINITY));
+    expectEquals64(-2L, round64(-1.51d));
+    expectEquals64(-1L, round64(-1.2d));
+    expectEquals64(-1L, round64(-1.0d));
+    expectEquals64(-1L, round64(-0.51d));
+    expectEquals64(0L, round64(-0.2d));
+    expectEquals64(0L, round64(-0.0d));
+    expectEquals64(0L, round64(+0.0d));
+    expectEquals64(0L, round64(+0.2d));
+    expectEquals64(1L, round64(+0.5d));
+    expectEquals64(1L, round64(+1.0d));
+    expectEquals64(1L, round64(+1.2d));
+    expectEquals64(2L, round64(+1.5d));
+    expectEquals64(9223372036854775807L, round64(Double.POSITIVE_INFINITY));
+
+    // Some others.
+    for (long l = -100; l <= 100; ++l) {
+      expectEquals64(l - 1, round64((double) l - 0.51d));
+      expectEquals64(l + 1, round64((double) l + 0.5d));
+      expectEquals64(l + 1, round64((double) l + 0.5d));
+    }
+    for (double d = -1.5d; d <= -1.49999999999d; d = Math.nextAfter(d, Double.POSITIVE_INFINITY)) {
+      expectEquals64(-1L, round64(d));
+    }
+
+    // Some harder.
+    double[] dvals = {
+      -9007199254740991.5d,
+      -9007199254740991.0d,
+      -0.49999999999999994d,
+      0.49999999999999994d,
+      9007199254740991.0d,
+      9007199254740991.5d
+    };
+    long[] lvals = {
+      -9007199254740992L,
+      -9007199254740991L,
+      0L,
+      0L,
+      9007199254740991L,
+      9007199254740992L
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectEquals64(lvals[i], round64(dvals[i]));
+    }
+
+    // A few NaN numbers.
+    double[] dnans = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dnans.length; i++) {
+      expectEquals64(0L, round64(dnans[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/581-rtp/expected.txt b/test/581-rtp/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/581-rtp/expected.txt
diff --git a/test/581-rtp/info.txt b/test/581-rtp/info.txt
new file mode 100644
index 0000000..b57449a
--- /dev/null
+++ b/test/581-rtp/info.txt
@@ -0,0 +1,2 @@
+Regression test for the reference type propagation pass
+of the optimizing compiler that used to break invariants.
diff --git a/test/581-rtp/src/Main.java b/test/581-rtp/src/Main.java
new file mode 100644
index 0000000..09f6f6c
--- /dev/null
+++ b/test/581-rtp/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Main {
+
+  /// CHECK-START: void Main.main(String[]) builder (after)
+  /// CHECK: StaticFieldGet klass:Main[] exact: true
+  /// CHECK: ArrayGet klass:Main exact:true
+  /// CHECK: BoundType klass:Main exact:true
+  public static void main(String[] args) {
+    Object o = null;
+    Main f = a[0];
+    for (int i = 0; i < 2; ++i) {
+      // We used to crash in the fixed point iteration of
+      // the reference type propagation while handling the instanceof:
+      // we were expecting `o` to get the same exact-ness as the
+      // `HBoundType` but the typing of the `ArrayGet` used to not
+      // propagate the exact-ness.
+      if (o instanceof Main) {
+        field = o;
+      }
+      o = f;
+    }
+    if (field != null) {
+      throw new Error("Expected null");
+    }
+  }
+
+  static Main[] a = new Main[1];
+  static Object field;
+}
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 2db1e6c..762d9a4 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -329,9 +329,17 @@
     INT_OPTS="-Xusejit:true"
     if [ "$VERIFY" = "y" ] ; then
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime"
+      if [ "$PREBUILD" = "n" ]; then
+        # Make sure that if we have noprebuild we still JIT as DexClassLoader will
+        # try to compile the dex file.
+        INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime"
+      fi
     else
       COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none"
       DEX_VERIFY="${DEX_VERIFY} -Xverify:none"
+      if [ "$PREBUILD" = "n" ]; then
+        INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none"
+      fi
     fi
 fi
 
diff --git a/test/valgrind-suppressions.txt b/test/valgrind-suppressions.txt
new file mode 100644
index 0000000..acab6e5
--- /dev/null
+++ b/test/valgrind-suppressions.txt
@@ -0,0 +1,15 @@
+{
+   b/27596582
+   Memcheck:Cond
+   fun:index
+   fun:expand_dynamic_string_token
+   fun:_dl_map_object
+   fun:map_doit
+   fun:_dl_catch_error
+   fun:do_preload
+   fun:dl_main
+   fun:_dl_sysdep_start
+   fun:_dl_start_final
+   fun:_dl_start
+   obj:/lib/x86_64-linux-gnu/ld-2.19.so
+}
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
index 95f0c2d..75d1eff 100644
--- a/tools/libcore_failures_concurrent_collector.txt
+++ b/tools/libcore_failures_concurrent_collector.txt
@@ -16,5 +16,11 @@
   names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
           "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
   bug: 25883050
+},
+{
+  description: "libcore.java.lang.OldSystemTest#test_gc failure on armv8-concurrent-collector.",
+  result: EXEC_FAILED,
+  names: ["libcore.java.lang.OldSystemTest#test_gc"],
+  bug: 26155567
 }
 ]