Merge "Have Checker print lines of unmatched test cases."
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 9a416e2..e9c22d2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -40,6 +40,7 @@
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
 	jni/quick/jni_compiler.cc \
+	optimizing/block_builder.cc \
 	optimizing/bounds_check_elimination.cc \
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
@@ -53,6 +54,7 @@
 	optimizing/induction_var_analysis.cc \
 	optimizing/induction_var_range.cc \
 	optimizing/inliner.cc \
+	optimizing/instruction_builder.cc \
 	optimizing/instruction_simplifier.cc \
 	optimizing/intrinsics.cc \
 	optimizing/licm.cc \
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
new file mode 100644
index 0000000..5e70a82
--- /dev/null
+++ b/compiler/optimizing/block_builder.cc
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "block_builder.h"
+
+#include "bytecode_utils.h"
+
+namespace art {
+
+HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) {
+  return MaybeCreateBlockAt(dex_pc, dex_pc);
+}
+
+HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t semantic_dex_pc,
+                                                    uint32_t store_dex_pc) {
+  HBasicBlock* block = branch_targets_[store_dex_pc];
+  if (block == nullptr) {
+    block = new (arena_) HBasicBlock(graph_, semantic_dex_pc);
+    branch_targets_[store_dex_pc] = block;
+  }
+  DCHECK_EQ(block->GetDexPc(), semantic_dex_pc);
+  return block;
+}
+
+bool HBasicBlockBuilder::CreateBranchTargets() {
+  // Create the first block for the dex instructions, single successor of the entry block.
+  MaybeCreateBlockAt(0u);
+
+  if (code_item_.tries_size_ != 0) {
+    // Create branch targets at the start/end of the TryItem range. These are
+    // places where the program might fall through into/out of the a block and
+    // where TryBoundary instructions will be inserted later. Other edges which
+    // enter/exit the try blocks are a result of branches/switches.
+    for (size_t idx = 0; idx < code_item_.tries_size_; ++idx) {
+      const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item_, idx);
+      uint32_t dex_pc_start = try_item->start_addr_;
+      uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
+      MaybeCreateBlockAt(dex_pc_start);
+      if (dex_pc_end < code_item_.insns_size_in_code_units_) {
+        // TODO: Do not create block if the last instruction cannot fall through.
+        MaybeCreateBlockAt(dex_pc_end);
+      } else if (dex_pc_end == code_item_.insns_size_in_code_units_) {
+        // The TryItem spans until the very end of the CodeItem and therefore
+        // cannot have any code afterwards.
+      } else {
+        // The TryItem spans beyond the end of the CodeItem. This is invalid code.
+        return false;
+      }
+    }
+
+    // Create branch targets for exception handlers.
+    const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
+    for (uint32_t idx = 0; idx < handlers_size; ++idx) {
+      CatchHandlerIterator iterator(handlers_ptr);
+      for (; iterator.HasNext(); iterator.Next()) {
+        MaybeCreateBlockAt(iterator.GetHandlerAddress());
+      }
+      handlers_ptr = iterator.EndDataPointer();
+    }
+  }
+
+  // Iterate over all instructions and find branching instructions. Create blocks for
+  // the locations these instructions branch to.
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+    const Instruction& instruction = it.CurrentInstruction();
+
+    if (instruction.IsBranch()) {
+      number_of_branches_++;
+      MaybeCreateBlockAt(dex_pc + instruction.GetTargetOffset());
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) {
+        MaybeCreateBlockAt(dex_pc + s_it.CurrentTargetOffset());
+
+        // Create N-1 blocks where we will insert comparisons of the input value
+        // against the Switch's case keys.
+        if (table.ShouldBuildDecisionTree() && !s_it.IsLast()) {
+          // Store the block under dex_pc of the current key at the switch data
+          // instruction for uniqueness but give it the dex_pc of the SWITCH
+          // instruction which it semantically belongs to.
+          MaybeCreateBlockAt(dex_pc, s_it.GetDexPcForCurrentIndex());
+        }
+      }
+    } else if (instruction.Opcode() == Instruction::MOVE_EXCEPTION) {
+      // End the basic block after MOVE_EXCEPTION. This simplifies the later
+      // stage of TryBoundary-block insertion.
+    } else {
+      continue;
+    }
+
+    if (instruction.CanFlowThrough()) {
+      if (it.IsLast()) {
+        // In the normal case we should never hit this but someone can artificially forge a dex
+        // file to fall-through out the method code. In this case we bail out compilation.
+        return false;
+      } else {
+        MaybeCreateBlockAt(dex_pc + it.CurrentInstruction().SizeInCodeUnits());
+      }
+    }
+  }
+
+  return true;
+}
+
+void HBasicBlockBuilder::ConnectBasicBlocks() {
+  HBasicBlock* block = graph_->GetEntryBlock();
+  graph_->AddBlock(block);
+
+  bool is_throwing_block = false;
+  for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
+    uint32_t dex_pc = it.CurrentDexPc();
+
+    // Check if this dex_pc address starts a new basic block.
+    HBasicBlock* next_block = GetBlockAt(dex_pc);
+    if (next_block != nullptr) {
+      if (block != nullptr) {
+        // Last instruction did not end its basic block but a new one starts here.
+        // It must have been a block falling through into the next one.
+        block->AddSuccessor(next_block);
+      }
+      block = next_block;
+      is_throwing_block = false;
+      graph_->AddBlock(block);
+    }
+
+    if (block == nullptr) {
+      // Ignore dead code.
+      continue;
+    }
+
+    const Instruction& instruction = it.CurrentInstruction();
+
+    if (!is_throwing_block && IsThrowingDexInstruction(instruction)) {
+      DCHECK(!ContainsElement(throwing_blocks_, block));
+      is_throwing_block = true;
+      throwing_blocks_.push_back(block);
+    }
+
+    if (instruction.IsBranch()) {
+      uint32_t target_dex_pc = dex_pc + instruction.GetTargetOffset();
+      block->AddSuccessor(GetBlockAt(target_dex_pc));
+    } else if (instruction.IsReturn() || (instruction.Opcode() == Instruction::THROW)) {
+      block->AddSuccessor(graph_->GetExitBlock());
+    } else if (instruction.IsSwitch()) {
+      DexSwitchTable table(instruction, dex_pc);
+      for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) {
+        uint32_t target_dex_pc = dex_pc + s_it.CurrentTargetOffset();
+        block->AddSuccessor(GetBlockAt(target_dex_pc));
+
+        if (table.ShouldBuildDecisionTree() && !s_it.IsLast()) {
+          uint32_t next_case_dex_pc = s_it.GetDexPcForCurrentIndex();
+          HBasicBlock* next_case_block = GetBlockAt(next_case_dex_pc);
+          block->AddSuccessor(next_case_block);
+          block = next_case_block;
+          graph_->AddBlock(block);
+        }
+      }
+    } else {
+      // Remaining code only applies to instructions which end their basic block.
+      continue;
+    }
+
+    if (instruction.CanFlowThrough()) {
+      uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+      block->AddSuccessor(GetBlockAt(next_dex_pc));
+    }
+
+    // The basic block ends here. Do not add any more instructions.
+    block = nullptr;
+  }
+
+  graph_->AddBlock(graph_->GetExitBlock());
+}
+
+// Returns the TryItem stored for `block` or nullptr if there is no info for it.
+static const DexFile::TryItem* GetTryItem(
+    HBasicBlock* block,
+    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
+  auto iterator = try_block_info.find(block->GetBlockId());
+  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
+}
+
+// Iterates over the exception handlers of `try_item`, finds the corresponding
+// catch blocks and makes them successors of `try_boundary`. The order of
+// successors matches the order in which runtime exception delivery searches
+// for a handler.
+static void LinkToCatchBlocks(HTryBoundary* try_boundary,
+                              const DexFile::CodeItem& code_item,
+                              const DexFile::TryItem* try_item,
+                              const ArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) {
+  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
+    try_boundary->AddExceptionHandler(catch_blocks.Get(it.GetHandlerAddress()));
+  }
+}
+
+bool HBasicBlockBuilder::MightHaveLiveNormalPredecessors(HBasicBlock* catch_block) {
+  if (kIsDebugBuild) {
+    DCHECK_NE(catch_block->GetDexPc(), kNoDexPc) << "Should not be called on synthetic blocks";
+    DCHECK(!graph_->GetEntryBlock()->GetSuccessors().empty())
+        << "Basic blocks must have been created and connected";
+    for (HBasicBlock* predecessor : catch_block->GetPredecessors()) {
+      DCHECK(!predecessor->IsSingleTryBoundary())
+          << "TryBoundary blocks must not have not been created yet";
+    }
+  }
+
+  const Instruction& first = GetDexInstructionAt(code_item_, catch_block->GetDexPc());
+  if (first.Opcode() == Instruction::MOVE_EXCEPTION) {
+    // Verifier guarantees that if a catch block begins with MOVE_EXCEPTION then
+    // it has no live normal predecessors.
+    return false;
+  } else if (catch_block->GetPredecessors().empty()) {
+    // Normal control-flow edges have already been created. Since block's list of
+    // predecessors is empty, it cannot have any live or dead normal predecessors.
+    return false;
+  }
+
+  // The catch block has normal predecessors but we do not know which are live
+  // and which will be removed during the initial DCE. Return `true` to signal
+  // that it may have live normal predecessors.
+  return true;
+}
+
+void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
+  if (code_item_.tries_size_ == 0) {
+    return;
+  }
+
+  // Keep a map of all try blocks and their respective TryItems. We do not use
+  // the block's pointer but rather its id to ensure deterministic iteration.
+  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
+
+  // Obtain TryItem information for blocks with throwing instructions, and split
+  // blocks which are both try & catch to simplify the graph.
+  for (HBasicBlock* block : graph_->GetBlocks()) {
+    if (block->GetDexPc() == kNoDexPc) {
+      continue;
+    }
+
+    // Do not bother creating exceptional edges for try blocks which have no
+    // throwing instructions. In that case we simply assume that the block is
+    // not covered by a TryItem. This prevents us from creating a throw-catch
+    // loop for synchronized blocks.
+    if (ContainsElement(throwing_blocks_, block)) {
+      // Try to find a TryItem covering the block.
+      const int32_t try_item_idx = DexFile::FindTryItem(code_item_, block->GetDexPc());
+      if (try_item_idx != -1) {
+        // Block throwing and in a TryItem. Store the try block information.
+        try_block_info.Put(block->GetBlockId(), DexFile::GetTryItems(code_item_, try_item_idx));
+      }
+    }
+  }
+
+  // Map from a handler dex_pc to the corresponding catch block.
+  ArenaSafeMap<uint32_t, HBasicBlock*> catch_blocks(
+      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
+
+  // Iterate over catch blocks, create artifical landing pads if necessary to
+  // simplify the CFG, and set metadata.
+  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0);
+  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
+  for (uint32_t idx = 0; idx < handlers_size; ++idx) {
+    CatchHandlerIterator iterator(handlers_ptr);
+    for (; iterator.HasNext(); iterator.Next()) {
+      uint32_t address = iterator.GetHandlerAddress();
+      if (catch_blocks.find(address) != catch_blocks.end()) {
+        // Catch block already processed.
+        continue;
+      }
+
+      // Check if we should create an artifical landing pad for the catch block.
+      // We create one if the catch block is also a try block because we do not
+      // have a strategy for inserting TryBoundaries on exceptional edges.
+      // We also create one if the block might have normal predecessors so as to
+      // simplify register allocation.
+      HBasicBlock* catch_block = GetBlockAt(address);
+      bool is_try_block = (try_block_info.find(catch_block->GetBlockId()) != try_block_info.end());
+      if (is_try_block || MightHaveLiveNormalPredecessors(catch_block)) {
+        HBasicBlock* new_catch_block = new (arena_) HBasicBlock(graph_, address);
+        new_catch_block->AddInstruction(new (arena_) HGoto(address));
+        new_catch_block->AddSuccessor(catch_block);
+        graph_->AddBlock(new_catch_block);
+        catch_block = new_catch_block;
+      }
+
+      catch_blocks.Put(address, catch_block);
+      catch_block->SetTryCatchInformation(
+        new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
+    }
+    handlers_ptr = iterator.EndDataPointer();
+  }
+
+  // Do a pass over the try blocks and insert entering TryBoundaries where at
+  // least one predecessor is not covered by the same TryItem as the try block.
+  // We do not split each edge separately, but rather create one boundary block
+  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
+      if (GetTryItem(predecessor, try_block_info) != entry.second) {
+        // Found a predecessor not covered by the same TryItem. Insert entering
+        // boundary block.
+        HTryBoundary* try_entry =
+            new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
+        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
+        LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+        break;
+      }
+    }
+  }
+
+  // Do a second pass over the try blocks and insert exit TryBoundaries where
+  // the successor is not in the same TryItem.
+  for (auto entry : try_block_info) {
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+    // NOTE: Do not use iterators because SplitEdge would invalidate them.
+    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
+      HBasicBlock* successor = try_block->GetSuccessors()[i];
+
+      // If the successor is a try block, all of its predecessors must be
+      // covered by the same TryItem. Otherwise the previous pass would have
+      // created a non-throwing boundary block.
+      if (GetTryItem(successor, try_block_info) != nullptr) {
+        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+        continue;
+      }
+
+      // Insert TryBoundary and link to catch blocks.
+      HTryBoundary* try_exit =
+          new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
+      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
+      LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+    }
+  }
+}
+
+bool HBasicBlockBuilder::Build() {
+  DCHECK(graph_->GetBlocks().empty());
+
+  graph_->SetEntryBlock(new (arena_) HBasicBlock(graph_, kNoDexPc));
+  graph_->SetExitBlock(new (arena_) HBasicBlock(graph_, kNoDexPc));
+
+  // TODO(dbrazdil): Do CreateBranchTargets and ConnectBasicBlocks in one pass.
+  if (!CreateBranchTargets()) {
+    return false;
+  }
+
+  ConnectBasicBlocks();
+  InsertTryBoundaryBlocks();
+
+  return true;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
new file mode 100644
index 0000000..1be0b4c
--- /dev/null
+++ b/compiler/optimizing/block_builder.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "dex_file.h"
+#include "nodes.h"
+
+namespace art {
+
+class HBasicBlockBuilder : public ValueObject {
+ public:
+  HBasicBlockBuilder(HGraph* graph,
+                     const DexFile* const dex_file,
+                     const DexFile::CodeItem& code_item)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        branch_targets_(code_item.insns_size_in_code_units_,
+                        nullptr,
+                        arena_->Adapter(kArenaAllocGraphBuilder)),
+        throwing_blocks_(kDefaultNumberOfThrowingBlocks, arena_->Adapter(kArenaAllocGraphBuilder)),
+        number_of_branches_(0u) {}
+
+  // Creates basic blocks in `graph_` at branch target dex_pc positions of the
+  // `code_item_`. Blocks are connected but left unpopulated with instructions.
+  // TryBoundary blocks are inserted at positions where control-flow enters/
+  // exits a try block.
+  bool Build();
+
+  size_t GetNumberOfBranches() const { return number_of_branches_; }
+  HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
+
+ private:
+  // Creates a basic block starting at given `dex_pc`.
+  HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
+
+  // Creates a basic block for bytecode instructions at `semantic_dex_pc` and
+  // stores it under the `store_dex_pc` key. This is used when multiple blocks
+  // share the same semantic dex_pc, e.g. when building switch decision trees.
+  HBasicBlock* MaybeCreateBlockAt(uint32_t semantic_dex_pc, uint32_t store_dex_pc);
+
+  bool CreateBranchTargets();
+  void ConnectBasicBlocks();
+  void InsertTryBoundaryBlocks();
+
+  // Helper method which decides whether `catch_block` may have live normal
+  // predecessors and thus whether a synthetic catch block needs to be created
+  // to avoid mixing normal and exceptional predecessors.
+  // Should only be called during InsertTryBoundaryBlocks on blocks at catch
+  // handler dex_pcs.
+  bool MightHaveLiveNormalPredecessors(HBasicBlock* catch_block);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  ArenaVector<HBasicBlock*> branch_targets_;
+  ArenaVector<HBasicBlock*> throwing_blocks_;
+  size_t number_of_branches_;
+
+  static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
+
+  DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index b6b8322..86742e6 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -20,148 +20,49 @@
 #include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/logging.h"
-#include "class_linker.h"
 #include "dex/verified_method.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
 #include "primitive.h"
-#include "scoped_thread_state_change.h"
-#include "ssa_builder.h"
 #include "thread.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
-void HGraphBuilder::InitializeLocals(uint16_t count) {
-  graph_->SetNumberOfVRegs(count);
-  locals_.resize(count);
-  for (int i = 0; i < count; i++) {
-    HLocal* local = new (arena_) HLocal(i);
-    entry_block_->AddInstruction(local);
-    locals_[i] = local;
-  }
-}
-
-void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) {
-  // dex_compilation_unit_ is null only when unit testing.
-  if (dex_compilation_unit_ == nullptr) {
-    return;
-  }
-
-  graph_->SetNumberOfInVRegs(number_of_parameters);
-  const char* shorty = dex_compilation_unit_->GetShorty();
-  int locals_index = locals_.size() - number_of_parameters;
-  int parameter_index = 0;
-
-  const DexFile::MethodId& referrer_method_id =
-      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
-  if (!dex_compilation_unit_->IsStatic()) {
-    // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
-                                                              referrer_method_id.class_idx_,
-                                                              parameter_index++,
-                                                              Primitive::kPrimNot,
-                                                              true);
-    entry_block_->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    number_of_parameters--;
-  }
-
-  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
-  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
-  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
-    HParameterValue* parameter = new (arena_) HParameterValue(
-        *dex_file_,
-        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
-        parameter_index++,
-        Primitive::GetType(shorty[shorty_pos]),
-        false);
-    ++shorty_pos;
-    entry_block_->AddInstruction(parameter);
-    HLocal* local = GetLocalAt(locals_index++);
-    // Store the parameter value in the local that the dex code will use
-    // to reference that parameter.
-    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter, local->GetDexPc()));
-    bool is_wide = (parameter->GetType() == Primitive::kPrimLong)
-        || (parameter->GetType() == Primitive::kPrimDouble);
-    if (is_wide) {
-      i++;
-      locals_index++;
-      parameter_index++;
-    }
-  }
-}
-
-template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
-  int32_t target_offset = instruction.GetTargetOffset();
-  HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset);
-  HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(branch_target != nullptr);
-  DCHECK(fallthrough_target != nullptr);
-  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(first, second, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_->AddSuccessor(branch_target);
-  current_block_->AddSuccessor(fallthrough_target);
-  current_block_ = nullptr;
-}
-
-template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
-  int32_t target_offset = instruction.GetTargetOffset();
-  HBasicBlock* branch_target = FindBlockStartingAt(dex_pc + target_offset);
-  HBasicBlock* fallthrough_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(branch_target != nullptr);
-  DCHECK(fallthrough_target != nullptr);
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-  current_block_->AddSuccessor(branch_target);
-  current_block_->AddSuccessor(fallthrough_target);
-  current_block_ = nullptr;
-}
-
 void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
   if (compilation_stats_ != nullptr) {
     compilation_stats_->RecordStat(compilation_stat);
   }
 }
 
-bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item,
-                                    size_t number_of_branches) {
+bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
+  if (compiler_driver_ == nullptr) {
+    // Note that the compiler driver is null when unit testing.
+    return false;
+  }
+
   const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
   CompilerFilter::Filter compiler_filter = compiler_options.GetCompilerFilter();
   if (compiler_filter == CompilerFilter::kEverything) {
     return false;
   }
 
-  if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) {
+  if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) {
     VLOG(compiler) << "Skip compilation of huge method "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << code_item.insns_size_in_code_units_ << " code units";
+                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
   }
 
   // If it's large and contains no branches, it's likely to be machine generated initialization.
-  if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_)
+  if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_)
       && (number_of_branches == 0)) {
     VLOG(compiler) << "Skip compilation of large method with no branch "
                    << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << ": " << code_item.insns_size_in_code_units_ << " code units";
+                   << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
   }
@@ -169,2707 +70,39 @@
   return false;
 }
 
-void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) {
-  if (code_item.tries_size_ == 0) {
-    return;
-  }
-
-  // Create branch targets at the start/end of the TryItem range. These are
-  // places where the program might fall through into/out of the a block and
-  // where TryBoundary instructions will be inserted later. Other edges which
-  // enter/exit the try blocks are a result of branches/switches.
-  for (size_t idx = 0; idx < code_item.tries_size_; ++idx) {
-    const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item, idx);
-    uint32_t dex_pc_start = try_item->start_addr_;
-    uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_;
-    FindOrCreateBlockStartingAt(dex_pc_start);
-    if (dex_pc_end < code_item.insns_size_in_code_units_) {
-      // TODO: Do not create block if the last instruction cannot fall through.
-      FindOrCreateBlockStartingAt(dex_pc_end);
-    } else {
-      // The TryItem spans until the very end of the CodeItem (or beyond if
-      // invalid) and therefore cannot have any code afterwards.
-    }
-  }
-
-  // Create branch targets for exception handlers.
-  const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item, 0);
-  uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr);
-  for (uint32_t idx = 0; idx < handlers_size; ++idx) {
-    CatchHandlerIterator iterator(handlers_ptr);
-    for (; iterator.HasNext(); iterator.Next()) {
-      uint32_t address = iterator.GetHandlerAddress();
-      HBasicBlock* block = FindOrCreateBlockStartingAt(address);
-      block->SetTryCatchInformation(
-        new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
-    }
-    handlers_ptr = iterator.EndDataPointer();
-  }
-}
-
-// Returns the TryItem stored for `block` or nullptr if there is no info for it.
-static const DexFile::TryItem* GetTryItem(
-    HBasicBlock* block,
-    const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
-  auto iterator = try_block_info.find(block->GetBlockId());
-  return (iterator == try_block_info.end()) ? nullptr : iterator->second;
-}
-
-void HGraphBuilder::LinkToCatchBlocks(HTryBoundary* try_boundary,
-                                      const DexFile::CodeItem& code_item,
-                                      const DexFile::TryItem* try_item) {
-  for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) {
-    try_boundary->AddExceptionHandler(FindBlockStartingAt(it.GetHandlerAddress()));
-  }
-}
-
-void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) {
-  if (code_item.tries_size_ == 0) {
-    return;
-  }
-
-  // Keep a map of all try blocks and their respective TryItems. We do not use
-  // the block's pointer but rather its id to ensure deterministic iteration.
-  ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
-      std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder));
-
-  // Obtain TryItem information for blocks with throwing instructions, and split
-  // blocks which are both try & catch to simplify the graph.
-  // NOTE: We are appending new blocks inside the loop, so we need to use index
-  // because iterators can be invalidated. We remember the initial size to avoid
-  // iterating over the new blocks which cannot throw.
-  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
-    HBasicBlock* block = graph_->GetBlocks()[i];
-
-    // Do not bother creating exceptional edges for try blocks which have no
-    // throwing instructions. In that case we simply assume that the block is
-    // not covered by a TryItem. This prevents us from creating a throw-catch
-    // loop for synchronized blocks.
-    if (block->HasThrowingInstructions()) {
-      // Try to find a TryItem covering the block.
-      DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dex_pc to find its TryItem.";
-      const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc());
-      if (try_item_idx != -1) {
-        // Block throwing and in a TryItem. Store the try block information.
-        HBasicBlock* throwing_block = block;
-        if (block->IsCatchBlock()) {
-          // Simplify blocks which are both try and catch, otherwise we would
-          // need a strategy for splitting exceptional edges. We split the block
-          // after the move-exception (if present) and mark the first part not
-          // throwing. The normal-flow edge between them will be split later.
-          throwing_block = block->SplitCatchBlockAfterMoveException();
-          // Move-exception does not throw and the block has throwing insructions
-          // so it must have been possible to split it.
-          DCHECK(throwing_block != nullptr);
-        }
-
-        try_block_info.Put(throwing_block->GetBlockId(),
-                           DexFile::GetTryItems(code_item, try_item_idx));
-      }
-    }
-  }
-
-  // Do a pass over the try blocks and insert entering TryBoundaries where at
-  // least one predecessor is not covered by the same TryItem as the try block.
-  // We do not split each edge separately, but rather create one boundary block
-  // that all predecessors are relinked to. This preserves loop headers (b/23895756).
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
-    for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
-      if (GetTryItem(predecessor, try_block_info) != entry.second) {
-        // Found a predecessor not covered by the same TryItem. Insert entering
-        // boundary block.
-        HTryBoundary* try_entry =
-            new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
-        try_block->CreateImmediateDominator()->AddInstruction(try_entry);
-        LinkToCatchBlocks(try_entry, code_item, entry.second);
-        break;
-      }
-    }
-  }
-
-  // Do a second pass over the try blocks and insert exit TryBoundaries where
-  // the successor is not in the same TryItem.
-  for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
-    // NOTE: Do not use iterators because SplitEdge would invalidate them.
-    for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
-      HBasicBlock* successor = try_block->GetSuccessors()[i];
-
-      // If the successor is a try block, all of its predecessors must be
-      // covered by the same TryItem. Otherwise the previous pass would have
-      // created a non-throwing boundary block.
-      if (GetTryItem(successor, try_block_info) != nullptr) {
-        DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
-        continue;
-      }
-
-      // Preserve the invariant that Return(Void) always jumps to Exit by moving
-      // it outside the try block if necessary.
-      HInstruction* last_instruction = try_block->GetLastInstruction();
-      if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
-        DCHECK_EQ(successor, exit_block_);
-        successor = try_block->SplitBefore(last_instruction);
-      }
-
-      // Insert TryBoundary and link to catch blocks.
-      HTryBoundary* try_exit =
-          new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
-      graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
-      LinkToCatchBlocks(try_exit, code_item, entry.second);
-    }
-  }
-}
-
-GraphAnalysisResult HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item,
-                                              StackHandleScopeCollection* handles) {
+GraphAnalysisResult HGraphBuilder::BuildGraph() {
   DCHECK(graph_->GetBlocks().empty());
 
-  const uint16_t* code_ptr = code_item.insns_;
-  const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
-  code_start_ = code_ptr;
+  graph_->SetNumberOfVRegs(code_item_.registers_size_);
+  graph_->SetNumberOfInVRegs(code_item_.ins_size_);
+  graph_->SetMaximumNumberOfOutVRegs(code_item_.outs_size_);
+  graph_->SetHasTryCatch(code_item_.tries_size_ != 0);
 
-  // Setup the graph with the entry block and exit block.
-  entry_block_ = new (arena_) HBasicBlock(graph_, 0);
-  graph_->AddBlock(entry_block_);
-  exit_block_ = new (arena_) HBasicBlock(graph_, kNoDexPc);
-  graph_->SetEntryBlock(entry_block_);
-  graph_->SetExitBlock(exit_block_);
-
-  graph_->SetHasTryCatch(code_item.tries_size_ != 0);
-
-  InitializeLocals(code_item.registers_size_);
-  graph_->SetMaximumNumberOfOutVRegs(code_item.outs_size_);
-
-  // Compute the number of dex instructions, blocks, and branches. We will
-  // check these values against limits given to the compiler.
-  size_t number_of_branches = 0;
-
-  // To avoid splitting blocks, we compute ahead of time the instructions that
-  // start a new block, and create these blocks.
-  if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+  // 1) Create basic blocks and link them together. Basic blocks are left
+  //    unpopulated with the exception of synthetic blocks, e.g. HTryBoundaries.
+  if (!block_builder_.Build()) {
     return kAnalysisInvalidBytecode;
   }
 
-  // Note that the compiler driver is null when unit testing.
-  if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
-    return kAnalysisInvalidBytecode;
+  // 2) Decide whether to skip this method based on its code size and number
+  //    of branches.
+  if (SkipCompilation(block_builder_.GetNumberOfBranches())) {
+    return kAnalysisSkipped;
   }
 
-  // Find locations where we want to generate extra stackmaps for native debugging.
-  // This allows us to generate the info only at interesting points (for example,
-  // at start of java statement) rather than before every dex instruction.
-  const bool native_debuggable = compiler_driver_ != nullptr &&
-                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
-  ArenaBitVector* native_debug_info_locations;
-  if (native_debuggable) {
-    const uint32_t num_instructions = code_item.insns_size_in_code_units_;
-    native_debug_info_locations =
-        ArenaBitVector::Create(arena_, num_instructions, false, kArenaAllocGraphBuilder);
-    FindNativeDebugInfoLocations(code_item, native_debug_info_locations);
-  }
-
-  CreateBlocksForTryCatch(code_item);
-
-  InitializeParameters(code_item.ins_size_);
-
-  size_t dex_pc = 0;
-  while (code_ptr < code_end) {
-    // Update the current block if dex_pc starts a new block.
-    MaybeUpdateCurrentBlock(dex_pc);
-    const Instruction& instruction = *Instruction::At(code_ptr);
-    if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
-      if (current_block_ != nullptr) {
-        current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc));
-      }
-    }
-    if (!AnalyzeDexInstruction(instruction, dex_pc)) {
-      return kAnalysisInvalidBytecode;
-    }
-    dex_pc += instruction.SizeInCodeUnits();
-    code_ptr += instruction.SizeInCodeUnits();
-  }
-
-  // Add Exit to the exit block.
-  exit_block_->AddInstruction(new (arena_) HExit());
-  // Add the suspend check to the entry block.
-  entry_block_->AddInstruction(new (arena_) HSuspendCheck(0));
-  entry_block_->AddInstruction(new (arena_) HGoto());
-  // Add the exit block at the end.
-  graph_->AddBlock(exit_block_);
-
-  // Iterate over blocks covered by TryItems and insert TryBoundaries at entry
-  // and exit points. This requires all control-flow instructions and
-  // non-exceptional edges to have been created.
-  InsertTryBoundaryBlocks(code_item);
-
+  // 3) Build the dominator tree and fill in loop and try/catch metadata.
   GraphAnalysisResult result = graph_->BuildDominatorTree();
   if (result != kAnalysisSuccess) {
     return result;
   }
 
-  graph_->InitializeInexactObjectRTI(handles);
-  return SsaBuilder(graph_, handles).BuildSsa();
-}
-
-void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) {
-  HBasicBlock* block = FindBlockStartingAt(dex_pc);
-  if (block == nullptr) {
-    return;
+  // 4) Populate basic blocks with instructions.
+  if (!instruction_builder_.Build()) {
+    return kAnalysisInvalidBytecode;
   }
 
-  if (current_block_ != nullptr) {
-    // Branching instructions clear current_block, so we know
-    // the last instruction of the current block is not a branching
-    // instruction. We add an unconditional goto to the found block.
-    current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-    current_block_->AddSuccessor(block);
-  }
-  graph_->AddBlock(block);
-  current_block_ = block;
-}
-
-void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item,
-                                                 ArenaBitVector* locations) {
-  // The callback gets called when the line number changes.
-  // In other words, it marks the start of new java statement.
-  struct Callback {
-    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
-      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
-      return false;
-    }
-  };
-  dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations);
-  // Instruction-specific tweaks.
-  const Instruction* const begin = Instruction::At(code_item.insns_);
-  const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_);
-  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
-    switch (inst->Opcode()) {
-      case Instruction::MOVE_EXCEPTION: {
-        // Stop in native debugger after the exception has been moved.
-        // The compiler also expects the move at the start of basic block so
-        // we do not want to interfere by inserting native-debug-info before it.
-        locations->ClearBit(inst->GetDexPc(code_item.insns_));
-        const Instruction* next = inst->Next();
-        if (next < end) {
-          locations->SetBit(next->GetDexPc(code_item.insns_));
-        }
-        break;
-      }
-      default:
-        break;
-    }
-  }
-}
-
-bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
-                                         const uint16_t* code_end,
-                                         size_t* number_of_branches) {
-  branch_targets_.resize(code_end - code_ptr, nullptr);
-
-  // Create the first block for the dex instructions, single successor of the entry block.
-  HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0);
-  branch_targets_[0] = block;
-  entry_block_->AddSuccessor(block);
-
-  // Iterate over all instructions and find branching instructions. Create blocks for
-  // the locations these instructions branch to.
-  uint32_t dex_pc = 0;
-  while (code_ptr < code_end) {
-    const Instruction& instruction = *Instruction::At(code_ptr);
-    if (instruction.IsBranch()) {
-      (*number_of_branches)++;
-      int32_t target = instruction.GetTargetOffset() + dex_pc;
-      // Create a block for the target instruction.
-      FindOrCreateBlockStartingAt(target);
-
-      dex_pc += instruction.SizeInCodeUnits();
-      code_ptr += instruction.SizeInCodeUnits();
-
-      if (instruction.CanFlowThrough()) {
-        if (code_ptr >= code_end) {
-          // In the normal case we should never hit this but someone can artificially forge a dex
-          // file to fall-through out the method code. In this case we bail out compilation.
-          return false;
-        } else {
-          FindOrCreateBlockStartingAt(dex_pc);
-        }
-      }
-    } else if (instruction.IsSwitch()) {
-      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
-
-      uint16_t num_entries = table.GetNumEntries();
-
-      // In a packed-switch, the entry at index 0 is the starting key. In a sparse-switch, the
-      // entry at index 0 is the first key, and values are after *all* keys.
-      size_t offset = table.GetFirstValueIndex();
-
-      // Use a larger loop counter type to avoid overflow issues.
-      for (size_t i = 0; i < num_entries; ++i) {
-        // The target of the case.
-        uint32_t target = dex_pc + table.GetEntryAt(i + offset);
-        FindOrCreateBlockStartingAt(target);
-
-        // Create a block for the switch-case logic. The block gets the dex_pc
-        // of the SWITCH instruction because it is part of its semantics.
-        block = new (arena_) HBasicBlock(graph_, dex_pc);
-        branch_targets_[table.GetDexPcForIndex(i)] = block;
-      }
-
-      // Fall-through. Add a block if there is more code afterwards.
-      dex_pc += instruction.SizeInCodeUnits();
-      code_ptr += instruction.SizeInCodeUnits();
-      if (code_ptr >= code_end) {
-        // In the normal case we should never hit this but someone can artificially forge a dex
-        // file to fall-through out the method code. In this case we bail out compilation.
-        // (A switch can fall-through so we don't need to check CanFlowThrough().)
-        return false;
-      } else {
-        FindOrCreateBlockStartingAt(dex_pc);
-      }
-    } else {
-      code_ptr += instruction.SizeInCodeUnits();
-      dex_pc += instruction.SizeInCodeUnits();
-    }
-  }
-  return true;
-}
-
-HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const {
-  DCHECK_GE(dex_pc, 0);
-  return branch_targets_[dex_pc];
-}
-
-HBasicBlock* HGraphBuilder::FindOrCreateBlockStartingAt(int32_t dex_pc) {
-  HBasicBlock* block = FindBlockStartingAt(dex_pc);
-  if (block == nullptr) {
-    block = new (arena_) HBasicBlock(graph_, dex_pc);
-    branch_targets_[dex_pc] = block;
-  }
-  return block;
-}
-
-template<typename T>
-void HGraphBuilder::Unop_12x(const Instruction& instruction,
-                             Primitive::Type type,
-                             uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Conversion_12x(const Instruction& instruction,
-                                   Primitive::Type input_type,
-                                   Primitive::Type result_type,
-                                   uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), input_type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_23x_shift(const Instruction& instruction,
-                                    Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
-                                  Primitive::Type type,
-                                  ComparisonBias bias,
-                                  uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegC(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x_shift(const Instruction& instruction, Primitive::Type type,
-                                    uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_12x(const Instruction& instruction,
-                              Primitive::Type type,
-                              uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegA(), type, dex_pc);
-  HInstruction* second = LoadLocal(instruction.VRegB(), type, dex_pc);
-  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-template<typename T>
-void HGraphBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
-  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
-  if (reverse) {
-    std::swap(first, second);
-  }
-  current_block_->AddInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
-  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-}
-
-static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) {
-  Thread* self = Thread::Current();
-  return cu->IsConstructor()
-      && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
-}
-
-void HGraphBuilder::BuildReturn(const Instruction& instruction,
-                                Primitive::Type type,
-                                uint32_t dex_pc) {
-  if (type == Primitive::kPrimVoid) {
-    if (graph_->ShouldGenerateConstructorBarrier()) {
-      // The compilation unit is null during testing.
-      if (dex_compilation_unit_ != nullptr) {
-        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_))
-          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
-      }
-      current_block_->AddInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
-    }
-    current_block_->AddInstruction(new (arena_) HReturnVoid(dex_pc));
-  } else {
-    HInstruction* value = LoadLocal(instruction.VRegA(), type, dex_pc);
-    current_block_->AddInstruction(new (arena_) HReturn(value, dex_pc));
-  }
-  current_block_->AddSuccessor(exit_block_);
-  current_block_ = nullptr;
-}
-
-static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
-  switch (opcode) {
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_STATIC_RANGE:
-      return kStatic;
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_DIRECT_RANGE:
-      return kDirect;
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
-      return kVirtual;
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-      return kInterface;
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_SUPER:
-      return kSuper;
-    default:
-      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
-      UNREACHABLE();
-  }
-}
-
-ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-
-  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
-
-  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
-      *dex_compilation_unit_->GetDexFile(),
-      method_idx,
-      dex_compilation_unit_->GetDexCache(),
-      class_loader,
-      /* referrer */ nullptr,
-      invoke_type);
-
-  if (UNLIKELY(resolved_method == nullptr)) {
-    // Clean up any exception left by type resolution.
-    soa.Self()->ClearException();
-    return nullptr;
-  }
-
-  // Check access. The class linker has a fast path for looking into the dex cache
-  // and does not check the access if it hits it.
-  if (compiling_class.Get() == nullptr) {
-    if (!resolved_method->IsPublic()) {
-      return nullptr;
-    }
-  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
-                                                       resolved_method,
-                                                       dex_compilation_unit_->GetDexCache().Get(),
-                                                       method_idx)) {
-    return nullptr;
-  }
-
-  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
-  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
-  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
-  // which require runtime handling.
-  if (invoke_type == kSuper) {
-    if (compiling_class.Get() == nullptr) {
-      // We could not determine the method's class we need to wait until runtime.
-      DCHECK(Runtime::Current()->IsAotCompiler());
-      return nullptr;
-    }
-    ArtMethod* current_method = graph_->GetArtMethod();
-    DCHECK(current_method != nullptr);
-    Handle<mirror::Class> methods_class(hs.NewHandle(
-        dex_compilation_unit_->GetClassLinker()->ResolveReferencedClassOfMethod(Thread::Current(),
-                                                                                method_idx,
-                                                                                current_method)));
-    if (methods_class.Get() == nullptr) {
-      // Invoking a super method requires knowing the actual super class. If we did not resolve
-      // the compiling method's declaring class (which only happens for ahead of time
-      // compilation), bail out.
-      DCHECK(Runtime::Current()->IsAotCompiler());
-      return nullptr;
-    } else {
-      ArtMethod* actual_method;
-      if (methods_class->IsInterface()) {
-        actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
-            resolved_method, class_linker->GetImagePointerSize());
-      } else {
-        uint16_t vtable_index = resolved_method->GetMethodIndex();
-        actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
-            vtable_index, class_linker->GetImagePointerSize());
-      }
-      if (actual_method != resolved_method &&
-          !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
-        // The back-end code generator relies on this check in order to ensure that it will not
-        // attempt to read the dex_cache with a dex_method_index that is not from the correct
-        // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
-        // builder, which means that the code-generator (and compiler driver during sharpening and
-        // inliner, maybe) might invoke an incorrect method.
-        // TODO: The actual method could still be referenced in the current dex file, so we
-        //       could try locating it.
-        // TODO: Remove the dex_file restriction.
-        return nullptr;
-      }
-      if (!actual_method->IsInvokable()) {
-        // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
-        // could resolve the callee to the wrong method.
-        return nullptr;
-      }
-      resolved_method = actual_method;
-    }
-  }
-
-  // Check for incompatible class changes. The class linker has a fast path for
-  // looking into the dex cache and does not check incompatible class changes if it hits it.
-  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
-    return nullptr;
-  }
-
-  return resolved_method;
-}
-
-bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
-                                uint32_t dex_pc,
-                                uint32_t method_idx,
-                                uint32_t number_of_vreg_arguments,
-                                bool is_range,
-                                uint32_t* args,
-                                uint32_t register_index) {
-  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
-  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
-
-  // Remove the return type from the 'proto'.
-  size_t number_of_arguments = strlen(descriptor) - 1;
-  if (invoke_type != kStatic) {  // instance call
-    // One extra argument for 'this'.
-    number_of_arguments++;
-  }
-
-  MethodReference target_method(dex_file_, method_idx);
-
-  // Special handling for string init.
-  int32_t string_init_offset = 0;
-  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
-                                                       dex_file_,
-                                                       &string_init_offset);
-  // Replace calls to String.<init> with StringFactory.
-  if (is_string_init) {
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        dchecked_integral_cast<uint64_t>(string_init_offset),
-        0U
-    };
-    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
-        arena_,
-        number_of_arguments - 1,
-        Primitive::kPrimNot /*return_type */,
-        dex_pc,
-        method_idx,
-        target_method,
-        dispatch_info,
-        invoke_type,
-        kStatic /* optimized_invoke_type */,
-        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
-    return HandleStringInit(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor);
-  }
-
-  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
-
-  if (UNLIKELY(resolved_method == nullptr)) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
-    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
-                                                     number_of_arguments,
-                                                     return_type,
-                                                     dex_pc,
-                                                     method_idx,
-                                                     invoke_type);
-    return HandleInvoke(invoke,
-                        number_of_vreg_arguments,
-                        args,
-                        register_index,
-                        is_range,
-                        descriptor,
-                        nullptr /* clinit_check */);
-  }
-
-  // Potential class initialization check, in the case of a static method call.
-  HClinitCheck* clinit_check = nullptr;
-  HInvoke* invoke = nullptr;
-  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
-    // By default, consider that the called method implicitly requires
-    // an initialization check of its declaring method.
-    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
-        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    ScopedObjectAccess soa(Thread::Current());
-    if (invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(
-          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
-    } else if (invoke_type == kSuper) {
-      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
-        // Update the target method to the one resolved. Note that this may be a no-op if
-        // we resolved to the method referenced by the instruction.
-        method_idx = resolved_method->GetDexMethodIndex();
-        target_method = MethodReference(dex_file_, method_idx);
-      }
-    }
-
-    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-        0u,
-        0U
-    };
-    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
-                                                number_of_arguments,
-                                                return_type,
-                                                dex_pc,
-                                                method_idx,
-                                                target_method,
-                                                dispatch_info,
-                                                invoke_type,
-                                                invoke_type,
-                                                clinit_check_requirement);
-  } else if (invoke_type == kVirtual) {
-    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
-    invoke = new (arena_) HInvokeVirtual(arena_,
-                                         number_of_arguments,
-                                         return_type,
-                                         dex_pc,
-                                         method_idx,
-                                         resolved_method->GetMethodIndex());
-  } else {
-    DCHECK_EQ(invoke_type, kInterface);
-    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
-    invoke = new (arena_) HInvokeInterface(arena_,
-                                           number_of_arguments,
-                                           return_type,
-                                           dex_pc,
-                                           method_idx,
-                                           resolved_method->GetDexMethodIndex());
-  }
-
-  return HandleInvoke(invoke,
-                      number_of_vreg_arguments,
-                      args,
-                      register_index,
-                      is_range,
-                      descriptor,
-                      clinit_check);
-}
-
-bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
-  bool finalizable;
-  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
-
-  // Only the non-resolved entrypoint handles the finalizable class case. If we
-  // need access checks, then we haven't resolved the method and the class may
-  // again be finalizable.
-  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
-      ? kQuickAllocObject
-      : kQuickAllocObjectInitialized;
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-
-  if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // We currently do not support inlining allocations across dex files.
-    return false;
-  }
-
-  HLoadClass* load_class = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      outer_dex_file,
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      /*needs_access_check*/ can_throw,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
-
-  current_block_->AddInstruction(load_class);
-  HInstruction* cls = load_class;
-  if (!IsInitialized(resolved_class)) {
-    cls = new (arena_) HClinitCheck(load_class, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  current_block_->AddInstruction(new (arena_) HNewInstance(
-      cls,
-      graph_->GetCurrentMethod(),
-      dex_pc,
-      type_index,
-      *dex_compilation_unit_->GetDexFile(),
-      can_throw,
-      finalizable,
-      entrypoint));
-  return true;
-}
-
-static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
-}
-
-bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
-  if (cls.Get() == nullptr) {
-    return false;
-  }
-
-  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
-  // check whether the class is in an image for the AOT compilation.
-  if (cls->IsInitialized() &&
-      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
-    return true;
-  }
-
-  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
-    return true;
-  }
-
-  // TODO: We should walk over the inlined methods, but we don't pass
-  //       that information to the builder.
-  if (IsSubClass(GetCompilingClass(), cls.Get())) {
-    return true;
-  }
-
-  return false;
-}
-
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      ArtMethod* resolved_method,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Thread* self = Thread::Current();
-  StackHandleScope<4> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
-
-  // The index at which the method's class is stored in the DexCache's type array.
-  uint32_t storage_index = DexFile::kDexNoIndex;
-  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
-    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
-    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
-                                                               GetCompilingClass(),
-                                                               resolved_method,
-                                                               method_idx,
-                                                               &storage_index);
-  }
-
-  HClinitCheck* clinit_check = nullptr;
-
-  if (IsInitialized(resolved_method_class)) {
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-  } else if (storage_index != DexFile::kDexNoIndex) {
-    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-    HLoadClass* load_class = new (arena_) HLoadClass(
-        graph_->GetCurrentMethod(),
-        storage_index,
-        outer_dex_file,
-        is_outer_class,
-        dex_pc,
-        /*needs_access_check*/ false,
-        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
-    current_block_->AddInstruction(load_class);
-    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-    current_block_->AddInstruction(clinit_check);
-  }
-  return clinit_check;
-}
-
-bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke,
-                                         uint32_t number_of_vreg_arguments,
-                                         uint32_t* args,
-                                         uint32_t register_index,
-                                         bool is_range,
-                                         const char* descriptor,
-                                         size_t start_index,
-                                         size_t* argument_index) {
-  uint32_t descriptor_index = 1;  // Skip the return type.
-  uint32_t dex_pc = invoke->GetDexPc();
-
-  for (size_t i = start_index;
-       // Make sure we don't go over the expected arguments or over the number of
-       // dex registers given. If the instruction was seen as dead by the verifier,
-       // it hasn't been properly checked.
-       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
-       i++, (*argument_index)++) {
-    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
-    if (!is_range
-        && is_wide
-        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
-      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
-      // reject any class where this is violated. However, the verifier only does these checks
-      // on non trivially dead instructions, so we just bailout the compilation.
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of non-sequential dex register pair in wide argument";
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-      return false;
-    }
-    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    invoke->SetArgumentAt(*argument_index, arg);
-    if (is_wide) {
-      i++;
-    }
-  }
-
-  if (*argument_index != invoke->GetNumberOfArguments()) {
-    VLOG(compiler) << "Did not compile "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                   << " because of wrong number of arguments in invoke instruction";
-    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
-    return false;
-  }
-
-  if (invoke->IsInvokeStaticOrDirect() &&
-      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
-          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
-    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
-    (*argument_index)++;
-  }
-
-  return true;
-}
-
-bool HGraphBuilder::HandleInvoke(HInvoke* invoke,
-                                 uint32_t number_of_vreg_arguments,
-                                 uint32_t* args,
-                                 uint32_t register_index,
-                                 bool is_range,
-                                 const char* descriptor,
-                                 HClinitCheck* clinit_check) {
-  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 0;
-  size_t argument_index = 0;
-  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
-    HInstruction* arg = LoadLocal(
-        is_range ? register_index : args[0], Primitive::kPrimNot, invoke->GetDexPc());
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
-    current_block_->AddInstruction(null_check);
-    invoke->SetArgumentAt(0, null_check);
-    start_index = 1;
-    argument_index = 1;
-  }
-
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  if (clinit_check != nullptr) {
-    // Add the class initialization check as last input of `invoke`.
-    DCHECK(invoke->IsInvokeStaticOrDirect());
-    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
-        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
-    invoke->SetArgumentAt(argument_index, clinit_check);
-    argument_index++;
-  }
-
-  current_block_->AddInstruction(invoke);
-  latest_result_ = invoke;
-
-  return true;
-}
-
-bool HGraphBuilder::HandleStringInit(HInvoke* invoke,
-                                     uint32_t number_of_vreg_arguments,
-                                     uint32_t* args,
-                                     uint32_t register_index,
-                                     bool is_range,
-                                     const char* descriptor) {
-  DCHECK(invoke->IsInvokeStaticOrDirect());
-  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
-
-  size_t start_index = 1;
-  size_t argument_index = 0;
-  if (!SetupInvokeArguments(invoke,
-                            number_of_vreg_arguments,
-                            args,
-                            register_index,
-                            is_range,
-                            descriptor,
-                            start_index,
-                            &argument_index)) {
-    return false;
-  }
-
-  // Add move-result for StringFactory method.
-  uint32_t orig_this_reg = is_range ? register_index : args[0];
-  HInstruction* new_instance = LoadLocal(orig_this_reg, Primitive::kPrimNot, invoke->GetDexPc());
-  invoke->SetArgumentAt(argument_index, new_instance);
-  current_block_->AddInstruction(invoke);
-
-  latest_result_ = invoke;
-  return true;
-}
-
-static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
-  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
-  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
-  return Primitive::GetType(type[0]);
-}
-
-bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
-                                             uint32_t dex_pc,
-                                             bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_22c();
-  uint32_t obj_reg = instruction.VRegB_22c();
-  uint16_t field_index;
-  if (instruction.IsQuickened()) {
-    if (!CanDecodeQuickenedInfo()) {
-      return false;
-    }
-    field_index = LookupQuickenedInfo(dex_pc);
-  } else {
-    field_index = instruction.VRegC_22c();
-  }
-
-  ScopedObjectAccess soa(Thread::Current());
-  ArtField* resolved_field =
-      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
-
-
-  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot, dex_pc);
-  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  Primitive::Type field_type = (resolved_field == nullptr)
-      ? GetFieldAccessType(*dex_file_, field_index)
-      : resolved_field->GetTypeAsPrimitiveType();
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    HInstruction* field_set = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
-                                                           value,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_set = new (arena_) HInstanceFieldSet(null_check,
-                                                 value,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_set);
-  } else {
-    HInstruction* field_get = nullptr;
-    if (resolved_field == nullptr) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
-                                                           field_type,
-                                                           field_index,
-                                                           dex_pc);
-    } else {
-      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
-      field_get = new (arena_) HInstanceFieldGet(null_check,
-                                                 field_type,
-                                                 resolved_field->GetOffset(),
-                                                 resolved_field->IsVolatile(),
-                                                 field_index,
-                                                 class_def_index,
-                                                 *dex_file_,
-                                                 dex_compilation_unit_->GetDexCache(),
-                                                 dex_pc);
-    }
-    current_block_->AddInstruction(field_get);
-    UpdateLocal(source_or_dest_reg, field_get, dex_pc);
-  }
-
-  return true;
-}
-
-static mirror::Class* GetClassFrom(CompilerDriver* driver,
-                                   const DexCompilationUnit& compilation_unit) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *compilation_unit.GetDexFile();
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-
-  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
-}
-
-mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
-}
-
-mirror::Class* HGraphBuilder::GetCompilingClass() const {
-  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
-}
-
-bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
-      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // GetOutermostCompilingClass returns null when the class is unresolved
-  // (e.g. if it derives from an unresolved class). This is bogus knowing that
-  // we are compiling it.
-  // When this happens we cannot establish a direct relation between the current
-  // class and the outer class, so we return false.
-  // (Note that this is only used for optimizing invokes and field accesses)
-  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
-}
-
-void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                                     uint32_t dex_pc,
-                                                     bool is_put,
-                                                     Primitive::Type field_type) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
-  } else {
-    current_block_->AddInstruction(
-        new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
-                                           uint32_t dex_pc,
-                                           bool is_put) {
-  uint32_t source_or_dest_reg = instruction.VRegA_21c();
-  uint16_t field_index = instruction.VRegB_21c();
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtField* resolved_field = compiler_driver_->ResolveField(
-      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
-
-  if (resolved_field == nullptr) {
-    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
-    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
-    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-    return true;
-  }
-
-  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
-  // The index at which the field's class is stored in the DexCache's type array.
-  uint32_t storage_index;
-  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
-  if (is_outer_class) {
-    storage_index = outer_class->GetDexTypeIndex();
-  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
-    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
-    return false;
-  } else {
-    // TODO: This is rather expensive. Perf it and cache the results if needed.
-    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
-        outer_dex_cache.Get(),
-        GetCompilingClass(),
-        resolved_field,
-        field_index,
-        &storage_index);
-    bool can_easily_access = is_put ? pair.second : pair.first;
-    if (!can_easily_access) {
-      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
-      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-      return true;
-    }
-  }
-
-  bool is_in_cache =
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
-  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
-                                                 storage_index,
-                                                 outer_dex_file,
-                                                 is_outer_class,
-                                                 dex_pc,
-                                                 /*needs_access_check*/ false,
-                                                 is_in_cache);
-  current_block_->AddInstruction(constant);
-
-  HInstruction* cls = constant;
-
-  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
-  if (!IsInitialized(klass)) {
-    cls = new (arena_) HClinitCheck(constant, dex_pc);
-    current_block_->AddInstruction(cls);
-  }
-
-  uint16_t class_def_index = klass->GetDexClassDefIndex();
-  if (is_put) {
-    // We need to keep the class alive before loading the value.
-    HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc);
-    DCHECK_EQ(value->GetType(), field_type);
-    current_block_->AddInstruction(new (arena_) HStaticFieldSet(cls,
-                                                                value,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HStaticFieldGet(cls,
-                                                                field_type,
-                                                                resolved_field->GetOffset(),
-                                                                resolved_field->IsVolatile(),
-                                                                field_index,
-                                                                class_def_index,
-                                                                *dex_file_,
-                                                                dex_cache_,
-                                                                dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  return true;
-}
-
-void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg,
-                                       uint16_t first_vreg,
-                                       int64_t second_vreg_or_constant,
-                                       uint32_t dex_pc,
-                                       Primitive::Type type,
-                                       bool second_is_constant,
-                                       bool isDiv) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  HInstruction* first = LoadLocal(first_vreg, type, dex_pc);
-  HInstruction* second = nullptr;
-  if (second_is_constant) {
-    if (type == Primitive::kPrimInt) {
-      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
-    } else {
-      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
-    }
-  } else {
-    second = LoadLocal(second_vreg_or_constant, type, dex_pc);
-  }
-
-  if (!second_is_constant
-      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
-      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_pc);
-    current_block_->AddInstruction(second);
-  }
-
-  if (isDiv) {
-    current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HRem(type, first, second, dex_pc));
-  }
-  UpdateLocal(out_vreg, current_block_->GetLastInstruction(), dex_pc);
-}
-
-void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
-                                     uint32_t dex_pc,
-                                     bool is_put,
-                                     Primitive::Type anticipated_type) {
-  uint8_t source_or_dest_reg = instruction.VRegA_23x();
-  uint8_t array_reg = instruction.VRegB_23x();
-  uint8_t index_reg = instruction.VRegC_23x();
-
-  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot, dex_pc);
-  object = new (arena_) HNullCheck(object, dex_pc);
-  current_block_->AddInstruction(object);
-
-  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
-  current_block_->AddInstruction(length);
-  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt, dex_pc);
-  index = new (arena_) HBoundsCheck(index, length, dex_pc);
-  current_block_->AddInstruction(index);
-  if (is_put) {
-    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type, dex_pc);
-    // TODO: Insert a type check node if the type is Object.
-    current_block_->AddInstruction(new (arena_) HArraySet(
-        object, index, value, anticipated_type, dex_pc));
-  } else {
-    current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type, dex_pc));
-    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc);
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
-                                        uint32_t type_index,
-                                        uint32_t number_of_vreg_arguments,
-                                        bool is_range,
-                                        uint32_t* args,
-                                        uint32_t register_index) {
-  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
-  bool finalizable;
-  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
-      ? kQuickAllocArrayWithAccessCheck
-      : kQuickAllocArray;
-  HInstruction* object = new (arena_) HNewArray(length,
-                                                graph_->GetCurrentMethod(),
-                                                dex_pc,
-                                                type_index,
-                                                *dex_compilation_unit_->GetDexFile(),
-                                                entrypoint);
-  current_block_->AddInstruction(object);
-
-  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
-  DCHECK_EQ(descriptor[0], '[') << descriptor;
-  char primitive = descriptor[1];
-  DCHECK(primitive == 'I'
-      || primitive == 'L'
-      || primitive == '[') << descriptor;
-  bool is_reference_array = (primitive == 'L') || (primitive == '[');
-  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
-
-  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
-    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type, dex_pc);
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    current_block_->AddInstruction(
-        new (arena_) HArraySet(object, index, value, type, dex_pc));
-  }
-  latest_result_ = object;
-}
-
-template <typename T>
-void HGraphBuilder::BuildFillArrayData(HInstruction* object,
-                                       const T* data,
-                                       uint32_t element_count,
-                                       Primitive::Type anticipated_type,
-                                       uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, anticipated_type, dex_pc));
-  }
-}
-
-void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
-  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot, dex_pc);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
-  current_block_->AddInstruction(null_check);
-
-  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
-  current_block_->AddInstruction(length);
-
-  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
-  const Instruction::ArrayDataPayload* payload =
-      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_start_ + payload_offset);
-  const uint8_t* data = payload->data;
-  uint32_t element_count = payload->element_count;
-
-  // Implementation of this DEX instruction seems to be that the bounds check is
-  // done before doing any stores.
-  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
-  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
-
-  switch (payload->element_width) {
-    case 1:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int8_t*>(data),
-                         element_count,
-                         Primitive::kPrimByte,
-                         dex_pc);
-      break;
-    case 2:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int16_t*>(data),
-                         element_count,
-                         Primitive::kPrimShort,
-                         dex_pc);
-      break;
-    case 4:
-      BuildFillArrayData(null_check,
-                         reinterpret_cast<const int32_t*>(data),
-                         element_count,
-                         Primitive::kPrimInt,
-                         dex_pc);
-      break;
-    case 8:
-      BuildFillWideArrayData(null_check,
-                             reinterpret_cast<const int64_t*>(data),
-                             element_count,
-                             dex_pc);
-      break;
-    default:
-      LOG(FATAL) << "Unknown element width for " << payload->element_width;
-  }
-  graph_->SetHasBoundsChecks(true);
-}
-
-void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
-                                           const int64_t* data,
-                                           uint32_t element_count,
-                                           uint32_t dex_pc) {
-  for (uint32_t i = 0; i < element_count; ++i) {
-    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
-    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
-    current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, Primitive::kPrimLong, dex_pc));
-  }
-}
-
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (cls.Get() == nullptr) {
-    return TypeCheckKind::kUnresolvedCheck;
-  } else if (cls->IsInterface()) {
-    return TypeCheckKind::kInterfaceCheck;
-  } else if (cls->IsArrayClass()) {
-    if (cls->GetComponentType()->IsObjectClass()) {
-      return TypeCheckKind::kArrayObjectCheck;
-    } else if (cls->CannotBeAssignedFromOtherTypes()) {
-      return TypeCheckKind::kExactCheck;
-    } else {
-      return TypeCheckKind::kArrayCheck;
-    }
-  } else if (cls->IsFinal()) {
-    return TypeCheckKind::kExactCheck;
-  } else if (cls->IsAbstract()) {
-    return TypeCheckKind::kAbstractClassCheck;
-  } else {
-    return TypeCheckKind::kClassHierarchyCheck;
-  }
-}
-
-void HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
-                                   uint8_t destination,
-                                   uint8_t reference,
-                                   uint16_t type_index,
-                                   uint32_t dex_pc) {
-  bool type_known_final, type_known_abstract, use_declaring_class;
-  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(),
-      *dex_compilation_unit_->GetDexFile(),
-      type_index,
-      &type_known_final,
-      &type_known_abstract,
-      &use_declaring_class);
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-
-  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
-  HLoadClass* cls = new (arena_) HLoadClass(
-      graph_->GetCurrentMethod(),
-      type_index,
-      dex_file,
-      IsOutermostCompilingClass(type_index),
-      dex_pc,
-      !can_access,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
-  current_block_->AddInstruction(cls);
-
-  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
-  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
-    current_block_->AddInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
-    UpdateLocal(destination, current_block_->GetLastInstruction(), dex_pc);
-  } else {
-    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
-    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
-    // which may throw. If it succeeds BoundType sets the new type of `object`
-    // for all subsequent uses.
-    current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
-    current_block_->AddInstruction(new (arena_) HBoundType(object, dex_pc));
-    UpdateLocal(reference, current_block_->GetLastInstruction(), dex_pc);
-  }
-}
-
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
-  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
-}
-
-void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
-                                         const Instruction& instruction,
-                                         HInstruction* value,
-                                         uint32_t dex_pc) {
-  // Add the successor blocks to the current block.
-  uint16_t num_entries = table.GetNumEntries();
-  for (size_t i = 1; i <= num_entries; i++) {
-    int32_t target_offset = table.GetEntryAt(i);
-    HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
-    DCHECK(case_target != nullptr);
-
-    // Add the target block as a successor.
-    current_block_->AddSuccessor(case_target);
-  }
-
-  // Add the default target block as the last successor.
-  HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-  DCHECK(default_target != nullptr);
-  current_block_->AddSuccessor(default_target);
-
-  // Now add the Switch instruction.
-  int32_t starting_key = table.GetEntryAt(0);
-  current_block_->AddInstruction(
-      new (arena_) HPackedSwitch(starting_key, num_entries, value, dex_pc));
-  // This block ends with control flow.
-  current_block_ = nullptr;
-}
-
-void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  // Verifier guarantees that the payload for PackedSwitch contains:
-  //   (a) number of entries (may be zero)
-  //   (b) first and lowest switch case value (entry 0, always present)
-  //   (c) list of target pcs (entries 1 <= i <= N)
-  SwitchTable table(instruction, dex_pc, false);
-
-  // Value to test against.
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-
-  // Starting key value.
-  int32_t starting_key = table.GetEntryAt(0);
-
-  // Retrieve number of entries.
-  uint16_t num_entries = table.GetNumEntries();
-  if (num_entries == 0) {
-    return;
-  }
-
-  // Don't use a packed switch if there are very few entries.
-  if (num_entries > kSmallSwitchThreshold) {
-    BuildSwitchJumpTable(table, instruction, value, dex_pc);
-  } else {
-    // Chained cmp-and-branch, starting from starting_key.
-    for (size_t i = 1; i <= num_entries; i++) {
-      BuildSwitchCaseHelper(instruction,
-                            i,
-                            i == num_entries,
-                            table,
-                            value,
-                            starting_key + i - 1,
-                            table.GetEntryAt(i),
-                            dex_pc);
-    }
-  }
-}
-
-void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) {
-  // Verifier guarantees that the payload for SparseSwitch contains:
-  //   (a) number of entries (may be zero)
-  //   (b) sorted key values (entries 0 <= i < N)
-  //   (c) target pcs corresponding to the switch values (entries N <= i < 2*N)
-  SwitchTable table(instruction, dex_pc, true);
-
-  // Value to test against.
-  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc);
-
-  uint16_t num_entries = table.GetNumEntries();
-
-  for (size_t i = 0; i < num_entries; i++) {
-    BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value,
-                          table.GetEntryAt(i), table.GetEntryAt(i + num_entries), dex_pc);
-  }
-}
-
-void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
-                                          bool is_last_case, const SwitchTable& table,
-                                          HInstruction* value, int32_t case_value_int,
-                                          int32_t target_offset, uint32_t dex_pc) {
-  HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
-  DCHECK(case_target != nullptr);
-
-  // The current case's value.
-  HInstruction* this_case_value = graph_->GetIntConstant(case_value_int, dex_pc);
-
-  // Compare value and this_case_value.
-  HEqual* comparison = new (arena_) HEqual(value, this_case_value, dex_pc);
-  current_block_->AddInstruction(comparison);
-  HInstruction* ifinst = new (arena_) HIf(comparison, dex_pc);
-  current_block_->AddInstruction(ifinst);
-
-  // Case hit: use the target offset to determine where to go.
-  current_block_->AddSuccessor(case_target);
-
-  // Case miss: go to the next case (or default fall-through).
-  // When there is a next case, we use the block stored with the table offset representing this
-  // case (that is where we registered them in ComputeBranchTargets).
-  // When there is no next case, we use the following instruction.
-  // TODO: Find a good way to peel the last iteration to avoid conditional, but still have re-use.
-  if (!is_last_case) {
-    HBasicBlock* next_case_target = FindBlockStartingAt(table.GetDexPcForIndex(index));
-    DCHECK(next_case_target != nullptr);
-    current_block_->AddSuccessor(next_case_target);
-
-    // Need to manually add the block, as there is no dex-pc transition for the cases.
-    graph_->AddBlock(next_case_target);
-
-    current_block_ = next_case_target;
-  } else {
-    HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
-    DCHECK(default_target != nullptr);
-    current_block_->AddSuccessor(default_target);
-    current_block_ = nullptr;
-  }
-}
-
-bool HGraphBuilder::CanDecodeQuickenedInfo() const {
-  return interpreter_metadata_ != nullptr;
-}
-
-uint16_t HGraphBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
-  DCHECK(interpreter_metadata_ != nullptr);
-  uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
-  DCHECK_EQ(dex_pc, dex_pc_in_map);
-  return DecodeUnsignedLeb128(&interpreter_metadata_);
-}
-
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
-  if (current_block_ == nullptr) {
-    return true;  // Dead code
-  }
-
-  switch (instruction.Opcode()) {
-    case Instruction::CONST_4: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_16: {
-      int32_t register_index = instruction.VRegA();
-      // Get 16 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_21s();
-      value <<= 48;
-      value >>= 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_32: {
-      int32_t register_index = instruction.VRegA();
-      // Get 32 bits of constant value, sign extended to 64 bits.
-      int64_t value = instruction.VRegB_31i();
-      value <<= 32;
-      value >>= 32;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE: {
-      int32_t register_index = instruction.VRegA();
-      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_WIDE_HIGH16: {
-      int32_t register_index = instruction.VRegA();
-      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
-      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
-      UpdateLocal(register_index, constant, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE:
-    case Instruction::MOVE_FROM16:
-    case Instruction::MOVE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    // Note that the SSA building will refine the types.
-    case Instruction::MOVE_WIDE:
-    case Instruction::MOVE_WIDE_FROM16:
-    case Instruction::MOVE_WIDE_16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_OBJECT:
-    case Instruction::MOVE_OBJECT_16:
-    case Instruction::MOVE_OBJECT_FROM16: {
-      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot, dex_pc);
-      UpdateLocal(instruction.VRegA(), value, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_VOID_NO_BARRIER:
-    case Instruction::RETURN_VOID: {
-      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
-      break;
-    }
-
-#define IF_XX(comparison, cond) \
-    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
-    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
-
-    IF_XX(HEqual, EQ);
-    IF_XX(HNotEqual, NE);
-    IF_XX(HLessThan, LT);
-    IF_XX(HLessThanOrEqual, LE);
-    IF_XX(HGreaterThan, GT);
-    IF_XX(HGreaterThanOrEqual, GE);
-
-    case Instruction::GOTO:
-    case Instruction::GOTO_16:
-    case Instruction::GOTO_32: {
-      int32_t offset = instruction.GetTargetOffset();
-      HBasicBlock* target = FindBlockStartingAt(offset + dex_pc);
-      DCHECK(target != nullptr);
-      current_block_->AddInstruction(new (arena_) HGoto(dex_pc));
-      current_block_->AddSuccessor(target);
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::RETURN: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_OBJECT: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::RETURN_WIDE: {
-      BuildReturn(instruction, return_type_, dex_pc);
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT:
-    case Instruction::INVOKE_INTERFACE:
-    case Instruction::INVOKE_STATIC:
-    case Instruction::INVOKE_SUPER:
-    case Instruction::INVOKE_VIRTUAL:
-    case Instruction::INVOKE_VIRTUAL_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_35c();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, false, args, -1)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::INVOKE_DIRECT_RANGE:
-    case Instruction::INVOKE_INTERFACE_RANGE:
-    case Instruction::INVOKE_STATIC_RANGE:
-    case Instruction::INVOKE_SUPER_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE:
-    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
-      uint16_t method_idx;
-      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
-        if (!CanDecodeQuickenedInfo()) {
-          return false;
-        }
-        method_idx = LookupQuickenedInfo(dex_pc);
-      } else {
-        method_idx = instruction.VRegB_3rc();
-      }
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t register_index = instruction.VRegC();
-      if (!BuildInvoke(instruction, dex_pc, method_idx,
-                       number_of_vreg_arguments, true, nullptr, register_index)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::NEG_INT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_LONG: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_FLOAT: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::NEG_DOUBLE: {
-      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_INT: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::NOT_LONG: {
-      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::LONG_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::FLOAT_TO_DOUBLE: {
-      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_INT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_LONG: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DOUBLE_TO_FLOAT: {
-      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_BYTE: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_SHORT: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
-      break;
-    }
-
-    case Instruction::INT_TO_CHAR: {
-      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT: {
-      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE: {
-      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE: {
-      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_INT: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE: {
-      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG: {
-      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG: {
-      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG: {
-      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG: {
-      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG: {
-      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG: {
-      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_LONG_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_DOUBLE_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_FLOAT_2ADDR: {
-      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_INT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_LONG_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_FLOAT_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::SUB_DOUBLE_2ADDR: {
-      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_LONG_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_FLOAT_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_DOUBLE_2ADDR: {
-      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, true);
-      break;
-    }
-
-    case Instruction::DIV_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, true);
-      break;
-    }
-
-    case Instruction::REM_INT_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimInt, false, false);
-      break;
-    }
-
-    case Instruction::REM_LONG_2ADDR: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                         dex_pc, Primitive::kPrimLong, false, false);
-      break;
-    }
-
-    case Instruction::REM_FLOAT_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::REM_DOUBLE_2ADDR: {
-      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_INT_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHL_LONG_2ADDR: {
-      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_LONG_2ADDR: {
-      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_LONG_2ADDR: {
-      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_LONG_2ADDR: {
-      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_LONG_2ADDR: {
-      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_LONG_2ADDR: {
-      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT16: {
-      Binop_22s<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT16: {
-      Binop_22s<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT16: {
-      Binop_22s<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT16: {
-      Binop_22s<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT: {
-      Binop_22s<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT16: {
-      Binop_22s<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::ADD_INT_LIT8: {
-      Binop_22b<HAdd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::AND_INT_LIT8: {
-      Binop_22b<HAnd>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::OR_INT_LIT8: {
-      Binop_22b<HOr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::XOR_INT_LIT8: {
-      Binop_22b<HXor>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::RSUB_INT_LIT8: {
-      Binop_22b<HSub>(instruction, true, dex_pc);
-      break;
-    }
-
-    case Instruction::MUL_INT_LIT8: {
-      Binop_22b<HMul>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::DIV_INT_LIT16:
-    case Instruction::DIV_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, true);
-      break;
-    }
-
-    case Instruction::REM_INT_LIT16:
-    case Instruction::REM_INT_LIT8: {
-      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                         dex_pc, Primitive::kPrimInt, true, false);
-      break;
-    }
-
-    case Instruction::SHL_INT_LIT8: {
-      Binop_22b<HShl>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::SHR_INT_LIT8: {
-      Binop_22b<HShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::USHR_INT_LIT8: {
-      Binop_22b<HUShr>(instruction, false, dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_INSTANCE: {
-      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
-        return false;
-      }
-      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::NEW_ARRAY: {
-      uint16_t type_index = instruction.VRegC_22c();
-      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
-      bool finalizable;
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
-          ? kQuickAllocArrayWithAccessCheck
-          : kQuickAllocArray;
-      current_block_->AddInstruction(new (arena_) HNewArray(length,
-                                                            graph_->GetCurrentMethod(),
-                                                            dex_pc,
-                                                            type_index,
-                                                            *dex_compilation_unit_->GetDexFile(),
-                                                            entrypoint));
-      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
-      uint32_t type_index = instruction.VRegB_35c();
-      uint32_t args[5];
-      instruction.GetVarArgs(args);
-      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
-      break;
-    }
-
-    case Instruction::FILLED_NEW_ARRAY_RANGE: {
-      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
-      uint32_t type_index = instruction.VRegB_3rc();
-      uint32_t register_index = instruction.VRegC_3rc();
-      BuildFilledNewArray(
-          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
-      break;
-    }
-
-    case Instruction::FILL_ARRAY_DATA: {
-      BuildFillArrayData(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_RESULT:
-    case Instruction::MOVE_RESULT_WIDE:
-    case Instruction::MOVE_RESULT_OBJECT: {
-      if (latest_result_ == nullptr) {
-        // Only dead code can lead to this situation, where the verifier
-        // does not reject the method.
-      } else {
-        // An Invoke/FilledNewArray and its MoveResult could have landed in
-        // different blocks if there was a try/catch block boundary between
-        // them. For Invoke, we insert a StoreLocal after the instruction. For
-        // FilledNewArray, the local needs to be updated after the array was
-        // filled, otherwise we might overwrite an input vreg.
-        HStoreLocal* update_local =
-            new (arena_) HStoreLocal(GetLocalAt(instruction.VRegA()), latest_result_, dex_pc);
-        HBasicBlock* block = latest_result_->GetBlock();
-        if (block == current_block_) {
-          // MoveResult and the previous instruction are in the same block.
-          current_block_->AddInstruction(update_local);
-        } else {
-          // The two instructions are in different blocks. Insert the MoveResult
-          // before the final control-flow instruction of the previous block.
-          DCHECK(block->EndsWithControlFlowInstruction());
-          DCHECK(current_block_->GetInstructions().IsEmpty());
-          block->InsertInstructionBefore(update_local, block->GetLastInstruction());
-        }
-        latest_result_ = nullptr;
-      }
-      break;
-    }
-
-    case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
-      break;
-    }
-
-    case Instruction::NOP:
-      break;
-
-    case Instruction::IGET:
-    case Instruction::IGET_QUICK:
-    case Instruction::IGET_WIDE:
-    case Instruction::IGET_WIDE_QUICK:
-    case Instruction::IGET_OBJECT:
-    case Instruction::IGET_OBJECT_QUICK:
-    case Instruction::IGET_BOOLEAN:
-    case Instruction::IGET_BOOLEAN_QUICK:
-    case Instruction::IGET_BYTE:
-    case Instruction::IGET_BYTE_QUICK:
-    case Instruction::IGET_CHAR:
-    case Instruction::IGET_CHAR_QUICK:
-    case Instruction::IGET_SHORT:
-    case Instruction::IGET_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::IPUT:
-    case Instruction::IPUT_QUICK:
-    case Instruction::IPUT_WIDE:
-    case Instruction::IPUT_WIDE_QUICK:
-    case Instruction::IPUT_OBJECT:
-    case Instruction::IPUT_OBJECT_QUICK:
-    case Instruction::IPUT_BOOLEAN:
-    case Instruction::IPUT_BOOLEAN_QUICK:
-    case Instruction::IPUT_BYTE:
-    case Instruction::IPUT_BYTE_QUICK:
-    case Instruction::IPUT_CHAR:
-    case Instruction::IPUT_CHAR_QUICK:
-    case Instruction::IPUT_SHORT:
-    case Instruction::IPUT_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SGET:
-    case Instruction::SGET_WIDE:
-    case Instruction::SGET_OBJECT:
-    case Instruction::SGET_BOOLEAN:
-    case Instruction::SGET_BYTE:
-    case Instruction::SGET_CHAR:
-    case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
-      break;
-    }
-
-    case Instruction::SPUT:
-    case Instruction::SPUT_WIDE:
-    case Instruction::SPUT_OBJECT:
-    case Instruction::SPUT_BOOLEAN:
-    case Instruction::SPUT_BYTE:
-    case Instruction::SPUT_CHAR:
-    case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
-      break;
-    }
-
-#define ARRAY_XX(kind, anticipated_type)                                          \
-    case Instruction::AGET##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
-      break;                                                                      \
-    }                                                                             \
-    case Instruction::APUT##kind: {                                               \
-      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
-      break;                                                                      \
-    }
-
-    ARRAY_XX(, Primitive::kPrimInt);
-    ARRAY_XX(_WIDE, Primitive::kPrimLong);
-    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
-    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
-    ARRAY_XX(_BYTE, Primitive::kPrimByte);
-    ARRAY_XX(_CHAR, Primitive::kPrimChar);
-    ARRAY_XX(_SHORT, Primitive::kPrimShort);
-
-    case Instruction::ARRAY_LENGTH: {
-      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot, dex_pc);
-      object = new (arena_) HNullCheck(object, dex_pc);
-      current_block_->AddInstruction(object);
-      current_block_->AddInstruction(new (arena_) HArrayLength(object, dex_pc));
-      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING: {
-      uint32_t string_index = instruction.VRegB_21c();
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_STRING_JUMBO: {
-      uint32_t string_index = instruction.VRegB_31c();
-      current_block_->AddInstruction(
-          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
-      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::CONST_CLASS: {
-      uint16_t type_index = instruction.VRegB_21c();
-      bool type_known_final;
-      bool type_known_abstract;
-      bool dont_use_is_referrers_class;
-      // `CanAccessTypeWithoutChecks` will tell whether the method being
-      // built is trying to access its own class, so that the generated
-      // code can optimize for this case. However, the optimization does not
-      // work for inlining, so we use `IsOutermostCompilingClass` instead.
-      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
-          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
-      current_block_->AddInstruction(new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          type_index,
-          *dex_file_,
-          IsOutermostCompilingClass(type_index),
-          dex_pc,
-          !can_access,
-          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
-      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
-      break;
-    }
-
-    case Instruction::MOVE_EXCEPTION: {
-      current_block_->AddInstruction(new (arena_) HLoadException(dex_pc));
-      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction(), dex_pc);
-      current_block_->AddInstruction(new (arena_) HClearException(dex_pc));
-      break;
-    }
-
-    case Instruction::THROW: {
-      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc);
-      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_pc));
-      // A throw instruction must branch to the exit block.
-      current_block_->AddSuccessor(exit_block_);
-      // We finished building this block. Set the current block to null to avoid
-      // adding dead instructions to it.
-      current_block_ = nullptr;
-      break;
-    }
-
-    case Instruction::INSTANCE_OF: {
-      uint8_t destination = instruction.VRegA_22c();
-      uint8_t reference = instruction.VRegB_22c();
-      uint16_t type_index = instruction.VRegC_22c();
-      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::CHECK_CAST: {
-      uint8_t reference = instruction.VRegA_21c();
-      uint16_t type_index = instruction.VRegB_21c();
-      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
-      break;
-    }
-
-    case Instruction::MONITOR_ENTER: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::OperationKind::kEnter,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::MONITOR_EXIT: {
-      current_block_->AddInstruction(new (arena_) HMonitorOperation(
-          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot, dex_pc),
-          HMonitorOperation::OperationKind::kExit,
-          dex_pc));
-      break;
-    }
-
-    case Instruction::PACKED_SWITCH: {
-      BuildPackedSwitch(instruction, dex_pc);
-      break;
-    }
-
-    case Instruction::SPARSE_SWITCH: {
-      BuildSparseSwitch(instruction, dex_pc);
-      break;
-    }
-
-    default:
-      VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
-                     << " because of unhandled instruction "
-                     << instruction.Name();
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
-      return false;
-  }
-  return true;
-}  // NOLINT(readability/fn_size)
-
-HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
-  return locals_[register_index];
-}
-
-void HGraphBuilder::UpdateLocal(uint32_t register_index,
-                                HInstruction* instruction,
-                                uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc));
-}
-
-HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index,
-                                       Primitive::Type type,
-                                       uint32_t dex_pc) const {
-  HLocal* local = GetLocalAt(register_index);
-  current_block_->AddInstruction(new (arena_) HLoadLocal(local, type, dex_pc));
-  return current_block_->GetLastInstruction();
+  // 5) Type the graph and eliminate dead/redundant phis.
+  return ssa_builder_.BuildSsa();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 48f5316..4f46d5e 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -19,320 +19,90 @@
 
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
+#include "block_builder.h"
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "instruction_builder.h"
 #include "optimizing_compiler_stats.h"
 #include "primitive.h"
 #include "nodes.h"
+#include "ssa_builder.h"
 
 namespace art {
 
-class Instruction;
-
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(HGraph* graph,
                 DexCompilationUnit* dex_compilation_unit,
                 const DexCompilationUnit* const outer_compilation_unit,
                 const DexFile* dex_file,
+                const DexFile::CodeItem& code_item,
                 CompilerDriver* driver,
                 OptimizingCompilerStats* compiler_stats,
                 const uint8_t* interpreter_metadata,
-                Handle<mirror::DexCache> dex_cache)
-      : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        entry_block_(nullptr),
-        exit_block_(nullptr),
-        current_block_(nullptr),
-        graph_(graph),
+                Handle<mirror::DexCache> dex_cache,
+                StackHandleScopeCollection* handles)
+      : graph_(graph),
         dex_file_(dex_file),
+        code_item_(code_item),
         dex_compilation_unit_(dex_compilation_unit),
         compiler_driver_(driver),
-        outer_compilation_unit_(outer_compilation_unit),
-        return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
-        code_start_(nullptr),
-        latest_result_(nullptr),
         compilation_stats_(compiler_stats),
-        interpreter_metadata_(interpreter_metadata),
-        dex_cache_(dex_cache) {}
+        block_builder_(graph, dex_file, code_item),
+        ssa_builder_(graph, handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             dex_file,
+                             code_item_,
+                             Primitive::GetType(dex_compilation_unit_->GetShorty()[0]),
+                             dex_compilation_unit,
+                             outer_compilation_unit,
+                             driver,
+                             interpreter_metadata,
+                             compiler_stats,
+                             dex_cache) {}
 
   // Only for unit testing.
-  HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt)
-      : arena_(graph->GetArena()),
-        branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
-        entry_block_(nullptr),
-        exit_block_(nullptr),
-        current_block_(nullptr),
-        graph_(graph),
+  HGraphBuilder(HGraph* graph,
+                const DexFile::CodeItem& code_item,
+                StackHandleScopeCollection* handles,
+                Primitive::Type return_type = Primitive::kPrimInt)
+      : graph_(graph),
         dex_file_(nullptr),
+        code_item_(code_item),
         dex_compilation_unit_(nullptr),
         compiler_driver_(nullptr),
-        outer_compilation_unit_(nullptr),
-        return_type_(return_type),
-        code_start_(nullptr),
-        latest_result_(nullptr),
-        compilation_stats_(nullptr),
-        interpreter_metadata_(nullptr),
         null_dex_cache_(),
-        dex_cache_(null_dex_cache_) {}
+        compilation_stats_(nullptr),
+        block_builder_(graph, nullptr, code_item),
+        ssa_builder_(graph, handles),
+        instruction_builder_(graph,
+                             &block_builder_,
+                             &ssa_builder_,
+                             /* dex_file */ nullptr,
+                             code_item_,
+                             return_type,
+                             /* dex_compilation_unit */ nullptr,
+                             /* outer_compilation_unit */ nullptr,
+                             /* compiler_driver */ nullptr,
+                             /* interpreter_metadata */ nullptr,
+                             /* compiler_stats */ nullptr,
+                             null_dex_cache_) {}
 
-  GraphAnalysisResult BuildGraph(const DexFile::CodeItem& code,
-                                 StackHandleScopeCollection* handles);
+  GraphAnalysisResult BuildGraph();
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table or specified
-  // compare/jump series.
-  static constexpr uint16_t kSmallSwitchThreshold = 3;
-
  private:
-  // Analyzes the dex instruction and adds HInstruction to the graph
-  // to execute that instruction. Returns whether the instruction can
-  // be handled.
-  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc);
-
-  // Finds all instructions that start a new block, and populates branch_targets_ with
-  // the newly created blocks.
-  // As a side effect, also compute the number of dex instructions, blocks, and
-  // branches.
-  // Returns true if all the branches fall inside the method code, false otherwise.
-  // (In normal cases this should always return true but someone can artificially
-  // create a code unit in which branches fall-through out of it).
-  bool ComputeBranchTargets(const uint16_t* start,
-                            const uint16_t* end,
-                            size_t* number_of_branches);
-  void MaybeUpdateCurrentBlock(size_t dex_pc);
-  void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations);
-  HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const;
-  HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc);
-
-  // Adds new blocks to `branch_targets_` starting at the limits of TryItems and
-  // their exception handlers.
-  void CreateBlocksForTryCatch(const DexFile::CodeItem& code_item);
-
-  // Splits edges which cross the boundaries of TryItems, inserts TryBoundary
-  // instructions and links them to the corresponding catch blocks.
-  void InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item);
-
-  // Iterates over the exception handlers of `try_item`, finds the corresponding
-  // catch blocks and makes them successors of `try_boundary`. The order of
-  // successors matches the order in which runtime exception delivery searches
-  // for a handler.
-  void LinkToCatchBlocks(HTryBoundary* try_boundary,
-                         const DexFile::CodeItem& code_item,
-                         const DexFile::TryItem* try_item);
-
-  bool CanDecodeQuickenedInfo() const;
-  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
-
-  void InitializeLocals(uint16_t count);
-  HLocal* GetLocalAt(uint32_t register_index) const;
-  void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const;
-  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
-  void InitializeParameters(uint16_t number_of_parameters);
-
-  // Returns whether the current method needs access check for the type.
-  // Output parameter finalizable is set to whether the type is finalizable.
-  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
-
-  template<typename T>
-  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  void Binop_23x_cmp(const Instruction& instruction,
-                     Primitive::Type type,
-                     ComparisonBias bias,
-                     uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T>
-  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
-
-  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
-  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
-
-  void Conversion_12x(const Instruction& instruction,
-                      Primitive::Type input_type,
-                      Primitive::Type result_type,
-                      uint32_t dex_pc);
-
-  void BuildCheckedDivRem(uint16_t out_reg,
-                          uint16_t first_reg,
-                          int64_t second_reg_or_constant,
-                          uint32_t dex_pc,
-                          Primitive::Type type,
-                          bool second_is_lit,
-                          bool is_div);
-
-  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
-
-  // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
-                                        uint32_t dex_pc,
-                                        bool is_put,
-                                        Primitive::Type field_type);
-  // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
-
-  void BuildArrayAccess(const Instruction& instruction,
-                        uint32_t dex_pc,
-                        bool is_get,
-                        Primitive::Type anticipated_type);
-
-  // Builds an invocation node and returns whether the instruction is supported.
-  bool BuildInvoke(const Instruction& instruction,
-                   uint32_t dex_pc,
-                   uint32_t method_idx,
-                   uint32_t number_of_vreg_arguments,
-                   bool is_range,
-                   uint32_t* args,
-                   uint32_t register_index);
-
-  // Builds a new array node and the instructions that fill it.
-  void BuildFilledNewArray(uint32_t dex_pc,
-                           uint32_t type_index,
-                           uint32_t number_of_vreg_arguments,
-                           bool is_range,
-                           uint32_t* args,
-                           uint32_t register_index);
-
-  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. Currently only used for non-reference and non-floating point
-  // arrays.
-  template <typename T>
-  void BuildFillArrayData(HInstruction* object,
-                          const T* data,
-                          uint32_t element_count,
-                          Primitive::Type anticipated_type,
-                          uint32_t dex_pc);
-
-  // Fills the given object with data as specified in the fill-array-data
-  // instruction. The data must be for long and double arrays.
-  void BuildFillWideArrayData(HInstruction* object,
-                              const int64_t* data,
-                              uint32_t element_count,
-                              uint32_t dex_pc);
-
-  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
-  void BuildTypeCheck(const Instruction& instruction,
-                      uint8_t destination,
-                      uint8_t reference,
-                      uint16_t type_index,
-                      uint32_t dex_pc);
-
-  // Builds an instruction sequence for a packed switch statement.
-  void BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc);
-
-  // Build a switch instruction from a packed switch statement.
-  void BuildSwitchJumpTable(const SwitchTable& table,
-                            const Instruction& instruction,
-                            HInstruction* value,
-                            uint32_t dex_pc);
-
-  // Builds an instruction sequence for a sparse switch statement.
-  void BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc);
-
-  void BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
-                             bool is_last_case, const SwitchTable& table,
-                             HInstruction* value, int32_t case_value_int,
-                             int32_t target_offset, uint32_t dex_pc);
-
-  bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches);
-
   void MaybeRecordStat(MethodCompilationStat compilation_stat);
+  bool SkipCompilation(size_t number_of_branches);
 
-  // Returns the outer-most compiling method's class.
-  mirror::Class* GetOutermostCompilingClass() const;
-
-  // Returns the class whose method is being compiled.
-  mirror::Class* GetCompilingClass() const;
-
-  // Returns whether `type_index` points to the outer-most compiling method's class.
-  bool IsOutermostCompilingClass(uint16_t type_index) const;
-
-  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
-                                     uint32_t dex_pc,
-                                     HInvoke* invoke);
-
-  bool SetupInvokeArguments(HInvoke* invoke,
-                            uint32_t number_of_vreg_arguments,
-                            uint32_t* args,
-                            uint32_t register_index,
-                            bool is_range,
-                            const char* descriptor,
-                            size_t start_index,
-                            size_t* argument_index);
-
-  bool HandleInvoke(HInvoke* invoke,
-                    uint32_t number_of_vreg_arguments,
-                    uint32_t* args,
-                    uint32_t register_index,
-                    bool is_range,
-                    const char* descriptor,
-                    HClinitCheck* clinit_check);
-
-  bool HandleStringInit(HInvoke* invoke,
-                        uint32_t number_of_vreg_arguments,
-                        uint32_t* args,
-                        uint32_t register_index,
-                        bool is_range,
-                        const char* descriptor);
-
-  HClinitCheck* ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      ArtMethod* method,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Build a HNewInstance instruction.
-  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
-
-  // Return whether the compiler can assume `cls` is initialized.
-  bool IsInitialized(Handle<mirror::Class> cls) const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Try to resolve a method using the class linker. Return null if a method could
-  // not be resolved.
-  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
-
-  ArenaAllocator* const arena_;
-
-  // A list of the size of the dex code holding block information for
-  // the method. If an entry contains a block, then the dex instruction
-  // starting at that entry is the first instruction of a new block.
-  ArenaVector<HBasicBlock*> branch_targets_;
-
-  ArenaVector<HLocal*> locals_;
-
-  HBasicBlock* entry_block_;
-  HBasicBlock* exit_block_;
-  HBasicBlock* current_block_;
   HGraph* const graph_;
-
-  // The dex file where the method being compiled is.
   const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
 
   // The compilation unit of the current method being compiled. Note that
   // it can be an inlined method.
@@ -340,29 +110,13 @@
 
   CompilerDriver* const compiler_driver_;
 
-  // The compilation unit of the outermost method being compiled. That is the
-  // method being compiled (and not inlined), and potentially inlining other
-  // methods.
-  const DexCompilationUnit* const outer_compilation_unit_;
-
-  // The return type of the method being compiled.
-  const Primitive::Type return_type_;
-
-  // The pointer in the dex file where the instructions of the code item
-  // being currently compiled start.
-  const uint16_t* code_start_;
-
-  // The last invoke or fill-new-array being built. Only to be
-  // used by move-result instructions.
-  HInstruction* latest_result_;
+  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
 
   OptimizingCompilerStats* compilation_stats_;
 
-  const uint8_t* interpreter_metadata_;
-
-  // Dex cache for dex_file_.
-  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
-  Handle<mirror::DexCache> dex_cache_;
+  HBasicBlockBuilder block_builder_;
+  SsaBuilder ssa_builder_;
+  HInstructionBuilder instruction_builder_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
new file mode 100644
index 0000000..6dfffce
--- /dev/null
+++ b/compiler/optimizing/bytecode_utils.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+
+#include "base/arena_object.h"
+#include "dex_file.h"
+#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
+
+namespace art {
+
+class CodeItemIterator : public ValueObject {
+ public:
+  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u)
+      : code_ptr_(code_item.insns_ + start_dex_pc),
+        code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
+        dex_pc_(start_dex_pc) {}
+
+  bool Done() const { return code_ptr_ >= code_end_; }
+  bool IsLast() const { return code_ptr_ + CurrentInstruction().SizeInCodeUnits() >= code_end_; }
+
+  const Instruction& CurrentInstruction() const { return *Instruction::At(code_ptr_); }
+  uint32_t CurrentDexPc() const { return dex_pc_; }
+
+  void Advance() {
+    DCHECK(!Done());
+    size_t instruction_size = CurrentInstruction().SizeInCodeUnits();
+    code_ptr_ += instruction_size;
+    dex_pc_ += instruction_size;
+  }
+
+ private:
+  const uint16_t* code_ptr_;
+  const uint16_t* const code_end_;
+  uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeItemIterator);
+};
+
+class DexSwitchTable : public ValueObject {
+ public:
+  DexSwitchTable(const Instruction& instruction, uint32_t dex_pc)
+      : instruction_(instruction),
+        dex_pc_(dex_pc),
+        sparse_(instruction.Opcode() == Instruction::SPARSE_SWITCH) {
+    int32_t table_offset = instruction.VRegB_31t();
+    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+    DCHECK_EQ(table[0], sparse_ ? static_cast<uint16_t>(Instruction::kSparseSwitchSignature)
+                                : static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+    num_entries_ = table[1];
+    values_ = reinterpret_cast<const int32_t*>(&table[2]);
+  }
+
+  uint16_t GetNumEntries() const {
+    return num_entries_;
+  }
+
+  void CheckIndex(size_t index) const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+    }
+  }
+
+  int32_t GetEntryAt(size_t index) const {
+    CheckIndex(index);
+    return values_[index];
+  }
+
+  uint32_t GetDexPcForIndex(size_t index) const {
+    CheckIndex(index);
+    return dex_pc_ +
+        (reinterpret_cast<const int16_t*>(values_ + index) -
+         reinterpret_cast<const int16_t*>(&instruction_));
+  }
+
+  // Index of the first value in the table.
+  size_t GetFirstValueIndex() const {
+    if (sparse_) {
+      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+      return num_entries_;
+    } else {
+      // In a packed table, we have the starting key and num_entries_ values.
+      return 1;
+    }
+  }
+
+  bool IsSparse() const { return sparse_; }
+
+  bool ShouldBuildDecisionTree() {
+    return IsSparse() || GetNumEntries() <= kSmallSwitchThreshold;
+  }
+
+ private:
+  const Instruction& instruction_;
+  const uint32_t dex_pc_;
+
+  // Whether this is a sparse-switch table (or a packed-switch one).
+  const bool sparse_;
+
+  // This can't be const as it needs to be computed off of the given instruction, and complicated
+  // expressions in the initializer list seemed very ugly.
+  uint16_t num_entries_;
+
+  const int32_t* values_;
+
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
+
+  DISALLOW_COPY_AND_ASSIGN(DexSwitchTable);
+};
+
+class DexSwitchTableIterator {
+ public:
+  explicit DexSwitchTableIterator(const DexSwitchTable& table)
+      : table_(table),
+        num_entries_(static_cast<size_t>(table_.GetNumEntries())),
+        first_target_offset_(table_.GetFirstValueIndex()),
+        index_(0u) {}
+
+  bool Done() const { return index_ >= num_entries_; }
+  bool IsLast() const { return index_ == num_entries_ - 1; }
+
+  void Advance() {
+    DCHECK(!Done());
+    index_++;
+  }
+
+  int32_t CurrentKey() const {
+    return table_.IsSparse() ? table_.GetEntryAt(index_) : table_.GetEntryAt(0) + index_;
+  }
+
+  int32_t CurrentTargetOffset() const {
+    return table_.GetEntryAt(index_ + first_target_offset_);
+  }
+
+  uint32_t GetDexPcForCurrentIndex() const { return table_.GetDexPcForIndex(index_); }
+
+ private:
+  const DexSwitchTable& table_;
+  const size_t num_entries_;
+  const size_t first_target_offset_;
+
+  size_t index_;
+};
+
+inline const Instruction& GetDexInstructionAt(const DexFile::CodeItem& code_item, uint32_t dex_pc) {
+  return CodeItemIterator(code_item, dex_pc).CurrentInstruction();
+}
+
+inline bool IsThrowingDexInstruction(const Instruction& instruction) {
+  // Special-case MONITOR_EXIT which is a throwing instruction but the verifier
+  // guarantees that it will never throw. This is necessary to avoid rejecting
+  // 'synchronized' blocks/methods.
+  return instruction.IsThrow() && instruction.Opcode() != Instruction::MONITOR_EXIT;
+}
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 7cf90725..65e5c3a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,6 +40,7 @@
 #include "code_generator_mips64.h"
 #endif
 
+#include "bytecode_utils.h"
 #include "compiled_method.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
@@ -298,23 +299,6 @@
   }
 }
 
-int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
-  uint16_t reg_number = local->GetRegNumber();
-  uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
-  if (reg_number >= number_of_locals) {
-    // Local is a parameter of the method. It is stored in the caller's frame.
-    // TODO: Share this logic with StackVisitor::GetVRegOffsetFromQuickCode.
-    return GetFrameSize() + InstructionSetPointerSize(GetInstructionSet())  // ART method
-                          + (reg_number - number_of_locals) * kVRegSize;
-  } else {
-    // Local is a temporary in this method. It is stored in this method's frame.
-    return GetFrameSize() - FrameEntrySpillSize()
-                          - kVRegSize  // filler.
-                          - (number_of_locals * kVRegSize)
-                          + (reg_number * kVRegSize);
-  }
-}
-
 void CodeGenerator::CreateCommonInvokeLocationSummary(
     HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) {
   ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena();
@@ -680,7 +664,7 @@
       uint32_t target = dex_pc + instruction.GetTargetOffset();
       CheckCovers(target, graph, code_info, loop_headers, &covered);
     } else if (instruction.IsSwitch()) {
-      SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+      DexSwitchTable table(instruction, dex_pc);
       uint16_t num_entries = table.GetNumEntries();
       size_t offset = table.GetFirstValueIndex();
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index cad5529..1a060b1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -211,7 +211,6 @@
                                 size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
-  int32_t GetStackSlot(HLocal* local) const;
 
   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
@@ -525,8 +524,6 @@
     slow_paths_.reserve(8);
   }
 
-  virtual Location GetStackLocation(HLoadLocal* load) const = 0;
-
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 98577d6..a0c1412 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -954,30 +954,6 @@
   __ BindTrackedLabel(label);
 }
 
-Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1724,49 +1700,6 @@
   HandleCondition(comp);
 }
 
-void LocationsBuilderARM::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8434128..144d58d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -345,8 +345,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 491014d..7699ddd 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1072,31 +1072,6 @@
   }
 }
 
-Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
@@ -4010,14 +3985,6 @@
   __ Str(wzr, GetExceptionTlsAddress());
 }
 
-void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
   if (kEmitCompilerReadBarrier) {
@@ -4156,14 +4123,6 @@
   }
 }
 
-void LocationsBuilderARM64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorARM64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -4556,34 +4515,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-      UNREACHABLE();
-  }
-}
-
-void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 8ec7531..ec46a34 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -386,8 +386,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8b19f84..2df37cd 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -974,31 +974,6 @@
   }
 }
 
-Location CodeGeneratorMIPS::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
   MipsLabel done;
   Register card = AT;
@@ -4063,14 +4038,6 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = load->NeedsEnvironment()
       ? LocationSummary::kCallOnSlowPath
@@ -4096,14 +4063,6 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -4611,33 +4570,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index afe7917c..5e6fec8 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,8 +290,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 2f9eca6..cc1f372 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -869,31 +869,6 @@
   }
 }
 
-Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
-  Primitive::Type type = load->GetType();
-
-  switch (type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << type;
-  }
-
-  LOG(FATAL) << "Unreachable";
-  return Location::NoLocation();
-}
-
 void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
                                      GpuRegister value,
                                      bool value_can_be_null) {
@@ -3281,14 +3256,6 @@
   __ StoreToOffset(kStoreWord, ZERO, TR, GetExceptionTlsOffset());
 }
 
-void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) {
-  load->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
 void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = load->NeedsEnvironment()
       ? LocationSummary::kCallOnSlowPath
@@ -3317,14 +3284,6 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorMIPS64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
 void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -3745,33 +3704,6 @@
   HandleShift(shr);
 }
 
-void LocationsBuilderMIPS64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
-  Primitive::Type field_type = store->InputAt(1)->GetType();
-  switch (field_type) {
-    case Primitive::kPrimNot:
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented local type " << field_type;
-  }
-}
-
-void InstructionCodeGeneratorMIPS64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderMIPS64::VisitSub(HSub* instruction) {
   HandleBinaryOp(instruction);
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 94767cb..4e15cdd 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -286,8 +286,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
   size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 715b5be..94d2f0c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -892,30 +892,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const {
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -1646,49 +1622,6 @@
   __ nop();
 }
 
-void LocationsBuilderX86::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 1fa22fc..69a6253 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -367,8 +367,6 @@
 
   void SetupBlockedRegisters() const OVERRIDE;
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cc46a07..da126e4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1118,30 +1118,6 @@
   __ Bind(GetLabelOf(block));
 }
 
-Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
-  switch (load->GetType()) {
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unexpected type " << load->GetType();
-      UNREACHABLE();
-  }
-
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-}
-
 void CodeGeneratorX86_64::Move(Location destination, Location source) {
   if (source.Equals(destination)) {
     return;
@@ -1660,49 +1636,6 @@
   __ nop();
 }
 
-void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitLocal(HLocal* local) {
-  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
-}
-
-void LocationsBuilderX86_64::VisitLoadLocal(HLoadLocal* local) {
-  local->SetLocations(nullptr);
-}
-
-void InstructionCodeGeneratorX86_64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
-  // Nothing to do, this is driven by the code generator.
-}
-
-void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(store, LocationSummary::kNoCall);
-  switch (store->InputAt(1)->GetType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-    case Primitive::kPrimFloat:
-      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    case Primitive::kPrimLong:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
-      break;
-
-    default:
-      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
-  }
-}
-
-void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
-}
-
 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 7ebce58..d7ce7c6 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -350,8 +350,6 @@
     return GetLabelOf(block)->Position();
   }
 
-  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
-
   void SetupBlockedRegisters() const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 6c55194..6412b24 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -53,17 +53,17 @@
 }
 
 static inline vixl::Register XRegisterFrom(Location location) {
-  DCHECK(location.IsRegister());
+  DCHECK(location.IsRegister()) << location;
   return vixl::Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
 static inline vixl::Register WRegisterFrom(Location location) {
-  DCHECK(location.IsRegister());
+  DCHECK(location.IsRegister()) << location;
   return vixl::Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
 static inline vixl::Register RegisterFrom(Location location, Primitive::Type type) {
-  DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type));
+  DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
 }
 
@@ -77,17 +77,17 @@
 }
 
 static inline vixl::FPRegister DRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister());
+  DCHECK(location.IsFpuRegister()) << location;
   return vixl::FPRegister::DRegFromCode(location.reg());
 }
 
 static inline vixl::FPRegister SRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister());
+  DCHECK(location.IsFpuRegister()) << location;
   return vixl::FPRegister::SRegFromCode(location.reg());
 }
 
 static inline vixl::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
-  DCHECK(Primitive::IsFloatingPointType(type));
+  DCHECK(Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
 }
 
@@ -124,7 +124,7 @@
   } else if (instr->IsNullConstant()) {
     return 0;
   } else {
-    DCHECK(instr->IsLongConstant());
+    DCHECK(instr->IsLongConstant()) << instr->DebugName();
     return instr->AsLongConstant()->GetValue();
   }
 }
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 1e54a0a..d1a2a26 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -111,22 +111,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  2: IntConstant [5]\n"
-      "  10: SuspendCheck\n"
-      "  11: Goto 1\n"
+      "  2: IntConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  5: Neg(2) [8]\n"
-      "  8: Return(5)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  9: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  2: IntConstant [5]\n", "  2: IntConstant\n" },
-    { "  10: SuspendCheck\n",   "  10: SuspendCheck\n"
-                                "  12: IntConstant [8]\n" },
-    { "  5: Neg(2) [8]\n",      removed },
-    { "  8: Return(5)\n",       "  8: Return(12)\n" }
+    { "  2: IntConstant [3]\n", "  2: IntConstant\n"
+                                "  6: IntConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",      removed },
+    { "  4: Return(3)\n",       "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -173,22 +172,21 @@
 
   std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  4: LongConstant [7]\n"
-      "  12: SuspendCheck\n"
-      "  13: Goto 1\n"
+      "  2: LongConstant [3]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  7: Neg(4) [10]\n"
-      "  10: Return(7)\n"
+      "  3: Neg(2) [4]\n"
+      "  4: Return(3)\n"
       "BasicBlock 2, pred: 1\n"
-      "  11: Exit\n";
+      "  5: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  4: LongConstant [7]\n", "  4: LongConstant\n" },
-    { "  12: SuspendCheck\n",    "  12: SuspendCheck\n"
-                                 "  14: LongConstant [10]\n" },
-    { "  7: Neg(4) [10]\n",      removed },
-    { "  10: Return(7)\n",       "  10: Return(14)\n" }
+    { "  2: LongConstant [3]\n", "  2: LongConstant\n"
+                                 "  6: LongConstant [4]\n" },
+    { "  3: Neg(2) [4]\n",       removed },
+    { "  4: Return(3)\n",        "  4: Return(6)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -201,7 +199,7 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  4: LongConstant\n", removed },
+    { "  2: LongConstant\n", removed },
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -232,25 +230,24 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  14: SuspendCheck\n"
-    "  15: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Add(3, 5) [12]\n"
-    "  12: Return(9)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  13: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
-    { "  14: SuspendCheck\n",   "  14: SuspendCheck\n"
-                                "  16: IntConstant [12]\n" },
-    { "  9: Add(3, 5) [12]\n",  removed },
-    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+    { "  2: IntConstant [4]\n", "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n", "  3: IntConstant\n"
+                                "  7: IntConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",   removed },
+    { "  5: Return(4)\n",       "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -263,8 +260,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  3: IntConstant\n", removed },
-    { "  5: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -302,35 +299,34 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  11: IntConstant [17]\n"
-    "  13: IntConstant [17]\n"
-    "  26: SuspendCheck\n"
-    "  27: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Add(3, 5) [21]\n"
-    "  17: Add(11, 13) [21]\n"
-    "  21: Add(9, 17) [24]\n"
-    "  24: Return(21)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  25: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  5: IntConstant [7]\n"
+      "  6: IntConstant [7]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [8]\n"
+      "  7: Add(5, 6) [8]\n"
+      "  8: Add(4, 7) [9]\n"
+      "  9: Return(8)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  10: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n",   "  5: IntConstant\n" },
-    { "  11: IntConstant [17]\n", "  11: IntConstant\n" },
-    { "  13: IntConstant [17]\n", "  13: IntConstant\n" },
-    { "  26: SuspendCheck\n",     "  26: SuspendCheck\n"
-                                  "  28: IntConstant\n"
-                                  "  29: IntConstant\n"
-                                  "  30: IntConstant [24]\n" },
-    { "  9: Add(3, 5) [21]\n",    removed },
-    { "  17: Add(11, 13) [21]\n", removed },
-    { "  21: Add(9, 17) [24]\n",  removed  },
-    { "  24: Return(21)\n",       "  24: Return(30)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n" },
+    { "  5: IntConstant [7]\n",  "  5: IntConstant\n" },
+    { "  6: IntConstant [7]\n",  "  6: IntConstant\n"
+                                 "  11: IntConstant\n"
+                                 "  12: IntConstant\n"
+                                 "  13: IntConstant [9]\n" },
+    { "  4: Add(2, 3) [8]\n",    removed },
+    { "  7: Add(5, 6) [8]\n",    removed },
+    { "  8: Add(4, 7) [9]\n",    removed  },
+    { "  9: Return(8)\n",        "  9: Return(13)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -349,12 +345,12 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
+    { "  2: IntConstant\n",  removed },
     { "  3: IntConstant\n",  removed },
     { "  5: IntConstant\n",  removed },
+    { "  6: IntConstant\n",  removed },
     { "  11: IntConstant\n", removed },
-    { "  13: IntConstant\n", removed },
-    { "  28: IntConstant\n", removed },
-    { "  29: IntConstant\n", removed }
+    { "  12: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -384,25 +380,24 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  14: SuspendCheck\n"
-    "  15: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  9: Sub(3, 5) [12]\n"
-    "  12: Return(9)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  13: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n", "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n", "  5: IntConstant\n" },
-    { "  14: SuspendCheck\n",   "  14: SuspendCheck\n"
-                                "  16: IntConstant [12]\n" },
-    { "  9: Sub(3, 5) [12]\n",  removed },
-    { "  12: Return(9)\n",      "  12: Return(16)\n" }
+    { "  2: IntConstant [4]\n",  "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",  "  3: IntConstant\n"
+                                 "  7: IntConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",    removed },
+    { "  5: Return(4)\n",        "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -415,8 +410,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  3: IntConstant\n", removed },
-    { "  5: IntConstant\n", removed }
+    { "  2: IntConstant\n", removed },
+    { "  3: IntConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -448,25 +443,24 @@
     Instruction::RETURN_WIDE | 4 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  6: LongConstant [12]\n"
-    "  8: LongConstant [12]\n"
-    "  17: SuspendCheck\n"
-    "  18: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  12: Add(6, 8) [15]\n"
-    "  15: Return(12)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  16: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Add(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
-    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
-    { "  17: SuspendCheck\n",     "  17: SuspendCheck\n"
-                                  "  19: LongConstant [15]\n" },
-    { "  12: Add(6, 8) [15]\n",   removed },
-    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Add(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -479,8 +473,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
-    { "  8: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -513,25 +507,24 @@
     Instruction::RETURN_WIDE | 4 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  6: LongConstant [12]\n"
-    "  8: LongConstant [12]\n"
-    "  17: SuspendCheck\n"
-    "  18: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  12: Sub(6, 8) [15]\n"
-    "  15: Return(12)\n"
-    "BasicBlock 2, pred: 1\n"
-    "  16: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: LongConstant [4]\n"
+      "  3: LongConstant [4]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  4: Sub(2, 3) [5]\n"
+      "  5: Return(4)\n"
+      "BasicBlock 2, pred: 1\n"
+      "  6: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  6: LongConstant [12]\n", "  6: LongConstant\n" },
-    { "  8: LongConstant [12]\n", "  8: LongConstant\n" },
-    { "  17: SuspendCheck\n",     "  17: SuspendCheck\n"
-                                  "  19: LongConstant [15]\n" },
-    { "  12: Sub(6, 8) [15]\n",   removed },
-    { "  15: Return(12)\n",       "  15: Return(19)\n" }
+    { "  2: LongConstant [4]\n",  "  2: LongConstant\n" },
+    { "  3: LongConstant [4]\n",  "  3: LongConstant\n"
+                                  "  7: LongConstant [5]\n" },
+    { "  4: Sub(2, 3) [5]\n",     removed },
+    { "  5: Return(4)\n",         "  5: Return(7)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -544,8 +537,8 @@
 
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
-    { "  6: LongConstant\n", removed },
-    { "  8: LongConstant\n", removed }
+    { "  2: LongConstant\n", removed },
+    { "  3: LongConstant\n", removed }
   };
   std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff);
 
@@ -593,46 +586,45 @@
     Instruction::RETURN | 2 << 8);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"            // v0 <- 1
-    "  5: IntConstant [9]\n"            // v1 <- 2
-    "  13: IntConstant [14]\n"          // const 5
-    "  18: IntConstant [19]\n"          // const 4
-    "  23: IntConstant [24]\n"          // const 8
-    "  29: SuspendCheck\n"
-    "  30: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"             // v2 <- v0 + v1 = 1 + 2 = 3
-    "  11: Goto 3\n"                    // goto L2
-    "BasicBlock 2, pred: 3, succ: 4\n"  // L1:
-    "  14: Add(19, 13) [24]\n"          // v1 <- v0 + 3 = 7 + 5 = 12
-    "  16: Goto 4\n"                    // goto L3
-    "BasicBlock 3, pred: 1, succ: 2\n"  // L2:
-    "  19: Add(9, 18) [14]\n"           // v0 <- v2 + 2 = 3 + 4 = 7
-    "  21: Goto 2\n"                    // goto L1
-    "BasicBlock 4, pred: 2, succ: 5\n"  // L3:
-    "  24: Add(14, 23) [27]\n"          // v2 <- v1 + 4 = 12 + 8 = 20
-    "  27: Return(24)\n"                // return v2
-    "BasicBlock 5, pred: 4\n"
-    "  28: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"             // v0 <- 1
+      "  3: IntConstant [4]\n"             // v1 <- 2
+      "  6: IntConstant [7]\n"             // const 5
+      "  9: IntConstant [10]\n"            // const 4
+      "  12: IntConstant [13]\n"           // const 8
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  4: Add(2, 3) [7]\n"               // v2 <- v0 + v1 = 1 + 2 = 3
+      "  5: Goto 3\n"                      // goto L2
+      "BasicBlock 2, pred: 3, succ: 4\n"   // L1:
+      "  10: Add(7, 9) [13]\n"             // v1 <- v0 + 3 = 7 + 5 = 12
+      "  11: Goto 4\n"                     // goto L3
+      "BasicBlock 3, pred: 1, succ: 2\n"   // L2:
+      "  7: Add(4, 6) [10]\n"              // v0 <- v2 + 2 = 3 + 4 = 7
+      "  8: Goto 2\n"                      // goto L1
+      "BasicBlock 4, pred: 2, succ: 5\n"   // L3:
+      "  13: Add(10, 12) [14]\n"           // v2 <- v1 + 4 = 12 + 8 = 20
+      "  14: Return(13)\n"                 // return v2
+      "BasicBlock 5, pred: 4\n"
+      "  15: Exit\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [9]\n",   "  3: IntConstant\n" },
-    { "  5: IntConstant [9]\n",   "  5: IntConstant\n" },
-    { "  13: IntConstant [14]\n", "  13: IntConstant\n" },
-    { "  18: IntConstant [19]\n", "  18: IntConstant\n" },
-    { "  23: IntConstant [24]\n", "  23: IntConstant\n" },
-    { "  29: SuspendCheck\n",     "  29: SuspendCheck\n"
-                                  "  31: IntConstant\n"
-                                  "  32: IntConstant\n"
-                                  "  33: IntConstant\n"
-                                  "  34: IntConstant [27]\n" },
-    { "  9: Add(3, 5) [19]\n",    removed },
-    { "  14: Add(19, 13) [24]\n", removed },
-    { "  19: Add(9, 18) [14]\n",  removed },
-    { "  24: Add(14, 23) [27]\n", removed },
-    { "  27: Return(24)\n",       "  27: Return(34)\n"}
+    { "  2: IntConstant [4]\n",   "  2: IntConstant\n" },
+    { "  3: IntConstant [4]\n",   "  3: IntConstant\n" },
+    { "  6: IntConstant [7]\n",   "  6: IntConstant\n" },
+    { "  9: IntConstant [10]\n",  "  9: IntConstant\n" },
+    { "  12: IntConstant [13]\n", "  12: IntConstant\n"
+                                  "  16: IntConstant\n"
+                                  "  17: IntConstant\n"
+                                  "  18: IntConstant\n"
+                                  "  19: IntConstant [14]\n" },
+    { "  4: Add(2, 3) [7]\n",     removed },
+    { "  10: Add(7, 9) [13]\n",   removed },
+    { "  7: Add(4, 6) [10]\n",    removed },
+    { "  13: Add(10, 12) [14]\n", removed },
+    { "  14: Return(13)\n",       "  14: Return(19)\n"}
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -654,14 +646,14 @@
 
   // Expected difference after dead code elimination.
   std::string expected_after_dce =
-    "BasicBlock 0, succ: 1\n"
-    "  29: SuspendCheck\n"
-    "  34: IntConstant [27]\n"
-    "  30: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5\n"
-    "  27: Return(34)\n"
-    "BasicBlock 5, pred: 1\n"
-    "  28: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  19: IntConstant [14]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5\n"
+      "  14: Return(19)\n"
+      "BasicBlock 5, pred: 1\n"
+      "  15: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -693,31 +685,31 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [15, 22, 8]\n"
-    "  5: IntConstant [22, 8]\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  8: GreaterThanOrEqual(3, 5) [9]\n"
-    "  9: If(8)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  22: Phi(5, 3) [15]\n"
-    "  15: Add(22, 3)\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  18: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  21: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  7: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  11: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   // Expected difference after constant folding.
   diff_t expected_cf_diff = {
-    { "  3: IntConstant [15, 22, 8]\n",      "  3: IntConstant [9, 15, 22]\n" },
-    { "  5: IntConstant [22, 8]\n",          "  5: IntConstant [22]\n" },
-    { "  8: GreaterThanOrEqual(3, 5) [9]\n", removed },
-    { "  9: If(8)\n",                        "  9: If(3)\n" }
+    { "  3: IntConstant [9, 8, 5]\n",        "  3: IntConstant [6, 9, 8]\n" },
+    { "  4: IntConstant [8, 5]\n",           "  4: IntConstant [8]\n" },
+    { "  5: GreaterThanOrEqual(3, 4) [6]\n", removed },
+    { "  6: If(5)\n",                        "  6: If(3)\n" }
   };
   std::string expected_after_cf = Patch(expected_before, expected_cf_diff);
 
@@ -730,13 +722,13 @@
 
   // Expected graph after dead code elimination.
   std::string expected_after_dce =
-    "BasicBlock 0, succ: 1\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 4\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 1\n"
-    "  18: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 4\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 1\n"
+      "  11: Exit\n";
 
   TestCode(data,
            expected_before,
@@ -766,7 +758,10 @@
   HInstruction* parameter = new (&allocator_) HParameterValue(
       graph_->GetDexFile(), 0, 0, Primitive::kPrimInt, true);
   entry_block->AddInstruction(parameter);
+  entry_block->AddInstruction(new (&allocator_) HGoto());
+
   HInstruction* zero = graph_->GetIntConstant(0);
+
   HInstruction* last;
   block->AddInstruction(last = new (&allocator_) HAbove(zero, parameter));
   block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
@@ -784,70 +779,70 @@
   block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
   block->AddInstruction(last = new (&allocator_) HBelowOrEqual(parameter, zero));
   block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0));
-
-  entry_block->AddInstruction(new (&allocator_) HGoto());
   block->AddInstruction(new (&allocator_) HReturn(zero));
+
   exit_block->AddInstruction(new (&allocator_) HExit());
 
   graph_->BuildDominatorTree();
 
   const std::string expected_before =
       "BasicBlock 0, succ: 1\n"
-      "  0: ParameterValue [17, 17, 16, 15, 15, 14, 13, 13, 12, 11, 11, 10, 9, 9, "
-                           "8, 7, 7, 6, 5, 5, 4, 3, 3, 2]\n"
-      "  1: IntConstant [19, 16, 14, 12, 10, 8, 6, 4, 2]\n"
-      "  18: Goto 1\n"
+      "  0: ParameterValue [18, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, 9, "
+                            "8, 8, 7, 6, 6, 5, 4, 4, 3]\n"
+      "  2: IntConstant [19, 17, 15, 13, 11, 9, 7, 5, 3]\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  2: Above(1, 0) [3]\n"
-      "  3: Select(0, 0, 2)\n"
-      "  4: Above(0, 1) [5]\n"
-      "  5: Select(0, 0, 4)\n"
-      "  6: AboveOrEqual(1, 0) [7]\n"
-      "  7: Select(0, 0, 6)\n"
-      "  8: AboveOrEqual(0, 1) [9]\n"
-      "  9: Select(0, 0, 8)\n"
-      "  10: Below(1, 0) [11]\n"
-      "  11: Select(0, 0, 10)\n"
-      "  12: Below(0, 1) [13]\n"
-      "  13: Select(0, 0, 12)\n"
-      "  14: BelowOrEqual(1, 0) [15]\n"
-      "  15: Select(0, 0, 14)\n"
-      "  16: BelowOrEqual(0, 1) [17]\n"
-      "  17: Select(0, 0, 16)\n"
-      "  19: Return(1)\n"
+      "  3: Above(2, 0) [4]\n"
+      "  4: Select(0, 0, 3)\n"
+      "  5: Above(0, 2) [6]\n"
+      "  6: Select(0, 0, 5)\n"
+      "  7: AboveOrEqual(2, 0) [8]\n"
+      "  8: Select(0, 0, 7)\n"
+      "  9: AboveOrEqual(0, 2) [10]\n"
+      "  10: Select(0, 0, 9)\n"
+      "  11: Below(2, 0) [12]\n"
+      "  12: Select(0, 0, 11)\n"
+      "  13: Below(0, 2) [14]\n"
+      "  14: Select(0, 0, 13)\n"
+      "  15: BelowOrEqual(2, 0) [16]\n"
+      "  16: Select(0, 0, 15)\n"
+      "  17: BelowOrEqual(0, 2) [18]\n"
+      "  18: Select(0, 0, 17)\n"
+      "  19: Return(2)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
   const std::string expected_after_cf =
       "BasicBlock 0, succ: 1\n"
-      "  0: ParameterValue [17, 17, 16, 15, 15, 13, 13, 11, 11, 10, 9, 9, 7, 7, 6, 5, 5, 4, 3, 3]\n"
-      "  1: IntConstant [13, 3, 19, 16, 10, 6, 4]\n"
-      "  21: IntConstant [15, 9]\n"
-      "  18: Goto 1\n"
+      "  0: ParameterValue [18, 18, 17, 16, 16, 14, 14, 12, 12, 11, 10, 10, "
+                            "8, 8, 7, 6, 6, 5, 4, 4]\n"
+      "  2: IntConstant [14, 4, 19, 17, 11, 7, 5]\n"
+      "  21: IntConstant [16, 10]\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  3: Select(0, 0, 1)\n"
-      "  4: Above(0, 1) [5]\n"
-      "  5: Select(0, 0, 4)\n"
-      "  6: AboveOrEqual(1, 0) [7]\n"
-      "  7: Select(0, 0, 6)\n"
-      "  9: Select(0, 0, 21)\n"
-      "  10: Below(1, 0) [11]\n"
-      "  11: Select(0, 0, 10)\n"
-      "  13: Select(0, 0, 1)\n"
-      "  15: Select(0, 0, 21)\n"
-      "  16: BelowOrEqual(0, 1) [17]\n"
-      "  17: Select(0, 0, 16)\n"
-      "  19: Return(1)\n"
+      "  4: Select(0, 0, 2)\n"
+      "  5: Above(0, 2) [6]\n"
+      "  6: Select(0, 0, 5)\n"
+      "  7: AboveOrEqual(2, 0) [8]\n"
+      "  8: Select(0, 0, 7)\n"
+      "  10: Select(0, 0, 21)\n"
+      "  11: Below(2, 0) [12]\n"
+      "  12: Select(0, 0, 11)\n"
+      "  14: Select(0, 0, 2)\n"
+      "  16: Select(0, 0, 21)\n"
+      "  17: BelowOrEqual(0, 2) [18]\n"
+      "  18: Select(0, 0, 17)\n"
+      "  19: Return(2)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
   const std::string expected_after_dce =
       "BasicBlock 0, succ: 1\n"
       "  0: ParameterValue\n"
-      "  1: IntConstant [19]\n"
-      "  18: Goto 1\n"
+      "  2: IntConstant [19]\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  19: Return(1)\n"
+      "  19: Return(2)\n"
       "BasicBlock 2, pred: 1\n"
       "  20: Exit\n";
 
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 83e724b..fe52aac 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -78,30 +78,30 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [15, 22, 8]\n"
-    "  5: IntConstant [22, 8]\n"
-    "  19: SuspendCheck\n"
-    "  20: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  8: GreaterThanOrEqual(3, 5) [9]\n"
-    "  9: If(8)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  22: Phi(5, 3) [15]\n"
-    "  15: Add(22, 3)\n"
-    "  17: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  18: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  21: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [9, 8, 5]\n"
+      "  4: IntConstant [8, 5]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  5: GreaterThanOrEqual(3, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  7: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  8: Phi(4, 3) [9]\n"
+      "  9: Add(8, 3)\n"
+      "  10: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  11: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   // Expected difference after dead code elimination.
   diff_t expected_diff = {
-    { "  3: IntConstant [15, 22, 8]\n", "  3: IntConstant [22, 8]\n" },
-    { "  22: Phi(5, 3) [15]\n",         "  22: Phi(5, 3)\n" },
-    { "  15: Add(22, 3)\n",             removed }
+    { "  3: IntConstant [9, 8, 5]\n",  "  3: IntConstant [8, 5]\n" },
+    { "  8: Phi(4, 3) [9]\n",          "  8: Phi(4, 3)\n" },
+    { "  9: Add(8, 3)\n",              removed }
   };
   std::string expected_after = Patch(expected_before, expected_diff);
 
@@ -144,37 +144,37 @@
     Instruction::RETURN_VOID);
 
   std::string expected_before =
-    "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  13: IntConstant [14]\n"
-    "  18: IntConstant [19]\n"
-    "  23: IntConstant [24]\n"
-    "  28: SuspendCheck\n"
-    "  29: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"
-    "  11: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  14: Add(19, 13) [24]\n"
-    "  16: Goto 4\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  19: Add(9, 18) [14]\n"
-    "  21: Goto 2\n"
-    "BasicBlock 4, pred: 2, succ: 5\n"
-    "  24: Add(14, 23)\n"
-    "  26: ReturnVoid\n"
-    "BasicBlock 5, pred: 4\n"
-    "  27: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant [4]\n"
+      "  3: IntConstant [4]\n"
+      "  6: IntConstant [7]\n"
+      "  9: IntConstant [10]\n"
+      "  12: IntConstant [13]\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  4: Add(2, 3) [7]\n"
+      "  5: Goto 3\n"
+      "BasicBlock 2, pred: 3, succ: 4\n"
+      "  10: Add(7, 9) [13]\n"
+      "  11: Goto 4\n"
+      "BasicBlock 3, pred: 1, succ: 2\n"
+      "  7: Add(4, 6) [10]\n"
+      "  8: Goto 2\n"
+      "BasicBlock 4, pred: 2, succ: 5\n"
+      "  13: Add(10, 12)\n"
+      "  14: ReturnVoid\n"
+      "BasicBlock 5, pred: 4\n"
+      "  15: Exit\n";
 
   std::string expected_after =
-    "BasicBlock 0, succ: 1\n"
-    "  28: SuspendCheck\n"
-    "  29: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5\n"
-    "  26: ReturnVoid\n"
-    "BasicBlock 5, pred: 1\n"
-    "  27: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5\n"
+      "  14: ReturnVoid\n"
+      "BasicBlock 5, pred: 1\n"
+      "  15: Exit\n";
 
   TestCode(data, expected_before, expected_after);
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c790d01..9ea4b2d 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -27,6 +27,21 @@
 
 namespace art {
 
+static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) {
+  return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid();
+}
+
+static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) {
+  if (!block->IsSingleTryBoundary()) {
+    return false;
+  }
+
+  HTryBoundary* boundary = block->GetLastInstruction()->AsTryBoundary();
+  return block->GetPredecessors().size() == 1u &&
+         boundary->GetNormalFlowSuccessor()->IsExitBlock() &&
+         !boundary->IsEntry();
+}
+
 void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
   current_block_ = block;
 
@@ -85,28 +100,17 @@
                           block->GetBlockId()));
   }
 
-  // Ensure that only Return(Void) and Throw jump to Exit. An exiting
-  // TryBoundary may be between a Throw and the Exit if the Throw is in a try.
+  // Ensure that only Return(Void) and Throw jump to Exit. An exiting TryBoundary
+  // may be between the instructions if the Throw/Return(Void) is in a try block.
   if (block->IsExitBlock()) {
     for (HBasicBlock* predecessor : block->GetPredecessors()) {
-      if (predecessor->IsSingleTryBoundary()
-          && !predecessor->GetLastInstruction()->AsTryBoundary()->IsEntry()) {
-        HBasicBlock* real_predecessor = predecessor->GetSinglePredecessor();
-        HInstruction* last_instruction = real_predecessor->GetLastInstruction();
-        if (!last_instruction->IsThrow()) {
-          AddError(StringPrintf("Unexpected TryBoundary between %s:%d and Exit.",
-                                last_instruction->DebugName(),
-                                last_instruction->GetId()));
-        }
-      } else {
-        HInstruction* last_instruction = predecessor->GetLastInstruction();
-        if (!last_instruction->IsReturn()
-            && !last_instruction->IsReturnVoid()
-            && !last_instruction->IsThrow()) {
-          AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
-                                last_instruction->DebugName(),
-                                last_instruction->GetId()));
-        }
+      HInstruction* last_instruction = IsExitTryBoundaryIntoExitBlock(predecessor) ?
+        predecessor->GetSinglePredecessor()->GetLastInstruction() :
+        predecessor->GetLastInstruction();
+      if (!IsAllowedToJumpToExitBlock(last_instruction)) {
+        AddError(StringPrintf("Unexpected instruction %s:%d jumps into the exit block.",
+                              last_instruction->DebugName(),
+                              last_instruction->GetId()));
       }
     }
   }
@@ -176,16 +180,15 @@
   // predecessors). Exceptional edges are synthesized and hence
   // not accounted for.
   if (block->GetSuccessors().size() > 1) {
-    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
-      if (successor->IsExitBlock() &&
-          block->IsSingleTryBoundary() &&
-          block->GetPredecessors().size() == 1u &&
-          block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) {
-        // Allowed critical edge Throw->TryBoundary->Exit.
-      } else if (successor->GetPredecessors().size() > 1) {
-        AddError(StringPrintf("Critical edge between blocks %d and %d.",
-                              block->GetBlockId(),
-                              successor->GetBlockId()));
+    if (IsExitTryBoundaryIntoExitBlock(block)) {
+      // Allowed critical edge (Throw/Return/ReturnVoid)->TryBoundary->Exit.
+    } else {
+      for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+        if (successor->GetPredecessors().size() > 1) {
+          AddError(StringPrintf("Critical edge between blocks %d and %d.",
+                                block->GetBlockId(),
+                                successor->GetBlockId()));
+        }
       }
     }
   }
@@ -505,7 +508,8 @@
 
 void GraphChecker::VisitReturn(HReturn* ret) {
   VisitInstruction(ret);
-  if (!ret->GetBlock()->GetSingleSuccessor()->IsExitBlock()) {
+  HBasicBlock* successor = ret->GetBlock()->GetSingleSuccessor();
+  if (!successor->IsExitBlock() && !IsExitTryBoundaryIntoExitBlock(successor)) {
     AddError(StringPrintf("%s:%d does not jump to the exit block.",
                           ret->DebugName(),
                           ret->GetId()));
@@ -514,7 +518,8 @@
 
 void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
   VisitInstruction(ret);
-  if (!ret->GetBlock()->GetSingleSuccessor()->IsExitBlock()) {
+  HBasicBlock* successor = ret->GetBlock()->GetSingleSuccessor();
+  if (!successor->IsExitBlock() && !IsExitTryBoundaryIntoExitBlock(successor)) {
     AddError(StringPrintf("%s:%d does not jump to the exit block.",
                           ret->DebugName(),
                           ret->GetId()));
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 56dc088..6abf00e 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -357,8 +357,10 @@
                                                              Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
+  outer_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   outer_loop_body->AddInstruction(new (&allocator) HGoto());
+  inner_loop_header->AddInstruction(new (&allocator) HSuspendCheck());
   inner_loop_header->AddInstruction(new (&allocator) HIf(parameter));
   inner_loop_body->AddInstruction(new (&allocator) HGoto());
   inner_loop_exit->AddInstruction(new (&allocator) HGoto());
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index fce9ab8..33803c1 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1061,17 +1061,24 @@
       caller_instruction_counter);
   callee_graph->SetArtMethod(resolved_method);
 
-  OptimizingCompilerStats inline_stats;
+  // When they are needed, allocate `inline_stats` on the heap instead
+  // of on the stack, as Clang might produce a stack frame too large
+  // for this function, that would not fit the requirements of the
+  // `-Wframe-larger-than` option.
+  std::unique_ptr<OptimizingCompilerStats> inline_stats =
+      (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
   HGraphBuilder builder(callee_graph,
                         &dex_compilation_unit,
                         &outer_compilation_unit_,
                         resolved_method->GetDexFile(),
+                        *code_item,
                         compiler_driver_,
-                        &inline_stats,
+                        inline_stats.get(),
                         resolved_method->GetQuickenedInfo(),
-                        dex_cache);
+                        dex_cache,
+                        handles_);
 
-  if (builder.BuildGraph(*code_item, handles_) != kAnalysisSuccess) {
+  if (builder.BuildGraph() != kAnalysisSuccess) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
                    << " could not be built, so cannot be inlined";
     return false;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
new file mode 100644
index 0000000..c5f2342
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.cc
@@ -0,0 +1,2681 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_builder.h"
+
+#include "bytecode_utils.h"
+#include "class_linker.h"
+#include "driver/compiler_options.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
+  if (compilation_stats_ != nullptr) {
+    compilation_stats_->RecordStat(compilation_stat);
+  }
+}
+
+HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const {
+  return block_builder_->GetBlockAt(dex_pc);
+}
+
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
+  const size_t vregs = graph_->GetNumberOfVRegs();
+  if (locals->size() != vregs) {
+    locals->resize(vregs, nullptr);
+
+    if (block->IsCatchBlock()) {
+      // We record incoming inputs of catch phis at throwing instructions and
+      // must therefore eagerly create the phis. Phis for undefined vregs will
+      // be deleted when the first throwing instruction with the vreg undefined
+      // is encountered. Unused phis will be removed by dead phi analysis.
+      for (size_t i = 0; i < vregs; ++i) {
+        // No point in creating the catch phi if it is already undefined at
+        // the first throwing instruction.
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena_) HPhi(
+              arena_,
+              i,
+              0,
+              current_local_value->GetType());
+          block->AddPhi(phi);
+          (*locals)[i] = phi;
+        }
+      }
+    }
+  }
+  return locals;
+}
+
+HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
+  return (*locals)[local];
+}
+
+void HInstructionBuilder::InitializeBlockLocals() {
+  current_locals_ = GetLocalsFor(current_block_);
+
+  if (current_block_->IsCatchBlock()) {
+    // Catch phis were already created and inputs collected from throwing sites.
+    if (kIsDebugBuild) {
+      // Make sure there was at least one throwing instruction which initialized
+      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
+      // visited already (from HTryBoundary scoping and reverse post order).
+      bool catch_block_visited = false;
+      for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (current == current_block_) {
+          catch_block_visited = true;
+        } else if (current->IsTryBlock()) {
+          const HTryBoundary& try_entry = current->GetTryCatchInformation()->GetTryEntry();
+          if (try_entry.HasExceptionHandler(*current_block_)) {
+            DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
+          }
+        }
+      }
+      DCHECK_EQ(current_locals_->size(), graph_->GetNumberOfVRegs())
+          << "No instructions throwing into a live catch block.";
+    }
+  } else if (current_block_->IsLoopHeader()) {
+    // If the block is a loop header, we know we only have visited the pre header
+    // because we are visiting in reverse post order. We create phis for all initialized
+    // locals from the pre header. Their inputs will be populated at the end of
+    // the analysis.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      HInstruction* incoming =
+          ValueOfLocalAt(current_block_->GetLoopInformation()->GetPreHeader(), local);
+      if (incoming != nullptr) {
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            0,
+            incoming->GetType());
+        current_block_->AddPhi(phi);
+        (*current_locals_)[local] = phi;
+      }
+    }
+
+    // Save the loop header so that the last phase of the analysis knows which
+    // blocks need to be updated.
+    loop_headers_.push_back(current_block_);
+  } else if (current_block_->GetPredecessors().size() > 0) {
+    // All predecessors have already been visited because we are visiting in reverse post order.
+    // We merge the values of all locals, creating phis if those values differ.
+    for (size_t local = 0; local < current_locals_->size(); ++local) {
+      bool one_predecessor_has_no_value = false;
+      bool is_different = false;
+      HInstruction* value = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+
+      for (HBasicBlock* predecessor : current_block_->GetPredecessors()) {
+        HInstruction* current = ValueOfLocalAt(predecessor, local);
+        if (current == nullptr) {
+          one_predecessor_has_no_value = true;
+          break;
+        } else if (current != value) {
+          is_different = true;
+        }
+      }
+
+      if (one_predecessor_has_no_value) {
+        // If one predecessor has no value for this local, we trust the verifier has
+        // successfully checked that there is a store dominating any read after this block.
+        continue;
+      }
+
+      if (is_different) {
+        HInstruction* first_input = ValueOfLocalAt(current_block_->GetPredecessors()[0], local);
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            local,
+            current_block_->GetPredecessors().size(),
+            first_input->GetType());
+        for (size_t i = 0; i < current_block_->GetPredecessors().size(); i++) {
+          HInstruction* pred_value = ValueOfLocalAt(current_block_->GetPredecessors()[i], local);
+          phi->SetRawInputAt(i, pred_value);
+        }
+        current_block_->AddPhi(phi);
+        value = phi;
+      }
+      (*current_locals_)[local] = value;
+    }
+  }
+}
+
+void HInstructionBuilder::PropagateLocalsToCatchBlocks() {
+  const HTryBoundary& try_entry = current_block_->GetTryCatchInformation()->GetTryEntry();
+  for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
+    ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
+    DCHECK_EQ(handler_locals->size(), current_locals_->size());
+    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+      HInstruction* handler_value = (*handler_locals)[vreg];
+      if (handler_value == nullptr) {
+        // Vreg was undefined at a previously encountered throwing instruction
+        // and the catch phi was deleted. Do not record the local value.
+        continue;
+      }
+      DCHECK(handler_value->IsPhi());
+
+      HInstruction* local_value = (*current_locals_)[vreg];
+      if (local_value == nullptr) {
+        // This is the first instruction throwing into `catch_block` where
+        // `vreg` is undefined. Delete the catch phi.
+        catch_block->RemovePhi(handler_value->AsPhi());
+        (*handler_locals)[vreg] = nullptr;
+      } else {
+        // Vreg has been defined at all instructions throwing into `catch_block`
+        // encountered so far. Record the local value in the catch phi.
+        handler_value->AsPhi()->AddInput(local_value);
+      }
+    }
+  }
+}
+
+void HInstructionBuilder::AppendInstruction(HInstruction* instruction) {
+  current_block_->AddInstruction(instruction);
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InsertInstructionAtTop(HInstruction* instruction) {
+  if (current_block_->GetInstructions().IsEmpty()) {
+    current_block_->AddInstruction(instruction);
+  } else {
+    current_block_->InsertInstructionBefore(instruction, current_block_->GetFirstInstruction());
+  }
+  InitializeInstruction(instruction);
+}
+
+void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) {
+  if (instruction->NeedsEnvironment()) {
+    HEnvironment* environment = new (arena_) HEnvironment(
+        arena_,
+        current_locals_->size(),
+        graph_->GetDexFile(),
+        graph_->GetMethodIdx(),
+        instruction->GetDexPc(),
+        graph_->GetInvokeType(),
+        instruction);
+    environment->CopyFrom(*current_locals_);
+    instruction->SetRawEnvironment(environment);
+  }
+}
+
+void HInstructionBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocalAt(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
+static bool IsBlockPopulated(HBasicBlock* block) {
+  if (block->IsLoopHeader()) {
+    // Suspend checks were inserted into loop headers during building of dominator tree.
+    DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
+    return block->GetFirstInstruction() != block->GetLastInstruction();
+  } else {
+    return !block->GetInstructions().IsEmpty();
+  }
+}
+
+bool HInstructionBuilder::Build() {
+  locals_for_.resize(graph_->GetBlocks().size(),
+                     ArenaVector<HInstruction*>(arena_->Adapter(kArenaAllocGraphBuilder)));
+
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations = nullptr;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item_.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    FindNativeDebugInfoLocations(native_debug_info_locations);
+  }
+
+  for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
+    current_block_ = block_it.Current();
+    uint32_t block_dex_pc = current_block_->GetDexPc();
+
+    InitializeBlockLocals();
+
+    if (current_block_->IsEntryBlock()) {
+      InitializeParameters();
+      AppendInstruction(new (arena_) HSuspendCheck(0u));
+      AppendInstruction(new (arena_) HGoto(0u));
+      continue;
+    } else if (current_block_->IsExitBlock()) {
+      AppendInstruction(new (arena_) HExit());
+      continue;
+    } else if (current_block_->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(current_block_->GetDexPc());
+      current_block_->GetLoopInformation()->SetSuspendCheck(suspend_check);
+      // This is slightly odd because the loop header might not be empty (TryBoundary).
+      // But we're still creating the environment with locals from the top of the block.
+      InsertInstructionAtTop(suspend_check);
+    }
+
+    if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
+      // Synthetic block that does not need to be populated.
+      DCHECK(IsBlockPopulated(current_block_));
+      continue;
+    }
+
+    DCHECK(!IsBlockPopulated(current_block_));
+
+    for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) {
+      if (current_block_ == nullptr) {
+        // The previous instruction ended this block.
+        break;
+      }
+
+      uint32_t dex_pc = it.CurrentDexPc();
+      if (dex_pc != block_dex_pc && FindBlockStartingAt(dex_pc) != nullptr) {
+        // This dex_pc starts a new basic block.
+        break;
+      }
+
+      if (current_block_->IsTryBlock() && IsThrowingDexInstruction(it.CurrentInstruction())) {
+        PropagateLocalsToCatchBlocks();
+      }
+
+      if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+        AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+
+      if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc)) {
+        return false;
+      }
+    }
+
+    if (current_block_ != nullptr) {
+      // Branching instructions clear current_block, so we know the last
+      // instruction of the current block is not a branching instruction.
+      // We add an unconditional Goto to the next block.
+      DCHECK_EQ(current_block_->GetSuccessors().size(), 1u);
+      AppendInstruction(new (arena_) HGoto());
+    }
+  }
+
+  SetLoopHeaderPhiInputs();
+
+  return true;
+}
+
+void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations);
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item_.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item_.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION: {
+        // Stop in native debugger after the exception has been moved.
+        // The compiler also expects the move at the start of basic block so
+        // we do not want to interfere by inserting native-debug-info before it.
+        locations->ClearBit(inst->GetDexPc(code_item_.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item_.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
+HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, Primitive::Type type) const {
+  HInstruction* value = (*current_locals_)[reg_number];
+  DCHECK(value != nullptr);
+
+  // If the operation requests a specific type, we make sure its input is of that type.
+  if (type != value->GetType()) {
+    if (Primitive::IsFloatingPointType(type)) {
+      return ssa_builder_->GetFloatOrDoubleEquivalent(value, type);
+    } else if (type == Primitive::kPrimNot) {
+      return ssa_builder_->GetReferenceTypeEquivalent(value);
+    }
+  }
+
+  return value;
+}
+
+void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_value) {
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+}
+
+void HInstructionBuilder::InitializeParameters() {
+  DCHECK(current_block_->IsEntryBlock());
+
+  // dex_compilation_unit_ is null only when unit testing.
+  if (dex_compilation_unit_ == nullptr) {
+    return;
+  }
+
+  const char* shorty = dex_compilation_unit_->GetShorty();
+  uint16_t number_of_parameters = graph_->GetNumberOfInVRegs();
+  uint16_t locals_index = graph_->GetNumberOfLocalVRegs();
+  uint16_t parameter_index = 0;
+
+  const DexFile::MethodId& referrer_method_id =
+      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
+  if (!dex_compilation_unit_->IsStatic()) {
+    // Add the implicit 'this' argument, not expressed in the signature.
+    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
+                                                              referrer_method_id.class_idx_,
+                                                              parameter_index++,
+                                                              Primitive::kPrimNot,
+                                                              true);
+    AppendInstruction(parameter);
+    UpdateLocal(locals_index++, parameter);
+    number_of_parameters--;
+  }
+
+  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
+    HParameterValue* parameter = new (arena_) HParameterValue(
+        *dex_file_,
+        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
+        parameter_index++,
+        Primitive::GetType(shorty[shorty_pos]),
+        false);
+    ++shorty_pos;
+    AppendInstruction(parameter);
+    // Store the parameter value in the local that the dex code will use
+    // to reference that parameter.
+    UpdateLocal(locals_index++, parameter);
+    if (Primitive::Is64BitType(parameter->GetType())) {
+      i++;
+      locals_index++;
+      parameter_index++;
+    }
+  }
+}
+
+template<typename T>
+void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(first, second, dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc);
+  AppendInstruction(comparison);
+  AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+  current_block_ = nullptr;
+}
+
+template<typename T>
+void HInstructionBuilder::Unop_12x(const Instruction& instruction,
+                                   Primitive::Type type,
+                                   uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Conversion_12x(const Instruction& instruction,
+                                         Primitive::Type input_type,
+                                         Primitive::Type result_type,
+                                         uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
+  AppendInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_23x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::Binop_23x_cmp(const Instruction& instruction,
+                                        Primitive::Type type,
+                                        ComparisonBias bias,
+                                        uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  AppendInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x_shift(const Instruction& instruction,
+                                          Primitive::Type type,
+                                          uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_12x(const Instruction& instruction,
+                                    Primitive::Type type,
+                                    uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  AppendInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
+void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+  HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc);
+  if (reverse) {
+    std::swap(first, second);
+  }
+  AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, const CompilerDriver& driver) {
+  Thread* self = Thread::Current();
+  return cu->IsConstructor()
+      && driver.RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
+}
+
+// Returns true if `block` has only one successor which starts at the next
+// dex_pc after `instruction` at `dex_pc`.
+static bool IsFallthroughInstruction(const Instruction& instruction,
+                                     uint32_t dex_pc,
+                                     HBasicBlock* block) {
+  uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits();
+  return block->GetSingleSuccessor()->GetDexPc() == next_dex_pc;
+}
+
+void HInstructionBuilder::BuildSwitch(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+  DexSwitchTable table(instruction, dex_pc);
+
+  if (table.GetNumEntries() == 0) {
+    // Empty Switch. Code falls through to the next block.
+    DCHECK(IsFallthroughInstruction(instruction, dex_pc, current_block_));
+    AppendInstruction(new (arena_) HGoto(dex_pc));
+  } else if (table.ShouldBuildDecisionTree()) {
+    for (DexSwitchTableIterator it(table); !it.Done(); it.Advance()) {
+      HInstruction* case_value = graph_->GetIntConstant(it.CurrentKey(), dex_pc);
+      HEqual* comparison = new (arena_) HEqual(value, case_value, dex_pc);
+      AppendInstruction(comparison);
+      AppendInstruction(new (arena_) HIf(comparison, dex_pc));
+
+      if (!it.IsLast()) {
+        current_block_ = FindBlockStartingAt(it.GetDexPcForCurrentIndex());
+      }
+    }
+  } else {
+    AppendInstruction(
+        new (arena_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc));
+  }
+
+  current_block_ = nullptr;
+}
+
+void HInstructionBuilder::BuildReturn(const Instruction& instruction,
+                                      Primitive::Type type,
+                                      uint32_t dex_pc) {
+  if (type == Primitive::kPrimVoid) {
+    if (graph_->ShouldGenerateConstructorBarrier()) {
+      // The compilation unit is null during testing.
+      if (dex_compilation_unit_ != nullptr) {
+        DCHECK(RequiresConstructorBarrier(dex_compilation_unit_, *compiler_driver_))
+          << "Inconsistent use of ShouldGenerateConstructorBarrier. Should not generate a barrier.";
+      }
+      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+    }
+    AppendInstruction(new (arena_) HReturnVoid(dex_pc));
+  } else {
+    HInstruction* value = LoadLocal(instruction.VRegA(), type);
+    AppendInstruction(new (arena_) HReturn(value, dex_pc));
+  }
+  current_block_ = nullptr;
+}
+
+static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
+  switch (opcode) {
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_STATIC_RANGE:
+      return kStatic;
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_DIRECT_RANGE:
+      return kDirect;
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      return kVirtual;
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+      return kInterface;
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_SUPER:
+      return kSuper;
+    default:
+      LOG(FATAL) << "Unexpected invoke opcode: " << opcode;
+      UNREACHABLE();
+  }
+}
+
+ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable. We need to do this to know if we need to
+  // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
+  // which require runtime handling.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // We could not determine the method's class we need to wait until runtime.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    ArtMethod* current_method = graph_->GetArtMethod();
+    DCHECK(current_method != nullptr);
+    Handle<mirror::Class> methods_class(hs.NewHandle(
+        dex_compilation_unit_->GetClassLinker()->ResolveReferencedClassOfMethod(Thread::Current(),
+                                                                                method_idx,
+                                                                                current_method)));
+    if (methods_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time
+      // compilation), bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    } else {
+      ArtMethod* actual_method;
+      if (methods_class->IsInterface()) {
+        actual_method = methods_class->FindVirtualMethodForInterfaceSuper(
+            resolved_method, class_linker->GetImagePointerSize());
+      } else {
+        uint16_t vtable_index = resolved_method->GetMethodIndex();
+        actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+            vtable_index, class_linker->GetImagePointerSize());
+      }
+      if (actual_method != resolved_method &&
+          !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // The back-end code generator relies on this check in order to ensure that it will not
+        // attempt to read the dex_cache with a dex_method_index that is not from the correct
+        // dex_file. If we didn't do this check then the dex_method_index will not be updated in the
+        // builder, which means that the code-generator (and compiler driver during sharpening and
+        // inliner, maybe) might invoke an incorrect method.
+        // TODO: The actual method could still be referenced in the current dex file, so we
+        //       could try locating it.
+        // TODO: Remove the dex_file restriction.
+        return nullptr;
+      }
+      if (!actual_method->IsInvokable()) {
+        // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+        // could resolve the callee to the wrong method.
+        return nullptr;
+      }
+      resolved_method = actual_method;
+    }
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
+bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
+                                      uint32_t dex_pc,
+                                      uint32_t method_idx,
+                                      uint32_t number_of_vreg_arguments,
+                                      bool is_range,
+                                      uint32_t* args,
+                                      uint32_t register_index) {
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
+  const char* descriptor = dex_file_->GetMethodShorty(method_idx);
+  Primitive::Type return_type = Primitive::GetType(descriptor[0]);
+
+  // Remove the return type from the 'proto'.
+  size_t number_of_arguments = strlen(descriptor) - 1;
+  if (invoke_type != kStatic) {  // instance call
+    // One extra argument for 'this'.
+    number_of_arguments++;
+  }
+
+  MethodReference target_method(dex_file_, method_idx);
+
+  // Special handling for string init.
+  int32_t string_init_offset = 0;
+  bool is_string_init = compiler_driver_->IsStringInit(method_idx,
+                                                       dex_file_,
+                                                       &string_init_offset);
+  // Replace calls to String.<init> with StringFactory.
+  if (is_string_init) {
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kStringInit,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        dchecked_integral_cast<uint64_t>(string_init_offset),
+        0U
+    };
+    HInvoke* invoke = new (arena_) HInvokeStaticOrDirect(
+        arena_,
+        number_of_arguments - 1,
+        Primitive::kPrimNot /*return_type */,
+        dex_pc,
+        method_idx,
+        target_method,
+        dispatch_info,
+        invoke_type,
+        kStatic /* optimized_invoke_type */,
+        HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+    return HandleStringInit(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor);
+  }
+
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
+    HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
+                                                     number_of_arguments,
+                                                     return_type,
+                                                     dex_pc,
+                                                     method_idx,
+                                                     invoke_type);
+    return HandleInvoke(invoke,
+                        number_of_vreg_arguments,
+                        args,
+                        register_index,
+                        is_range,
+                        descriptor,
+                        nullptr /* clinit_check */);
+  }
+
+  // Potential class initialization check, in the case of a static method call.
+  HClinitCheck* clinit_check = nullptr;
+  HInvoke* invoke = nullptr;
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
+    // By default, consider that the called method implicitly requires
+    // an initialization check of its declaring method.
+    HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
+        = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
+    }
+
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
+        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+        0u,
+        0U
+    };
+    invoke = new (arena_) HInvokeStaticOrDirect(arena_,
+                                                number_of_arguments,
+                                                return_type,
+                                                dex_pc,
+                                                method_idx,
+                                                target_method,
+                                                dispatch_info,
+                                                invoke_type,
+                                                invoke_type,
+                                                clinit_check_requirement);
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeVirtual(arena_,
+                                         number_of_arguments,
+                                         return_type,
+                                         dex_pc,
+                                         method_idx,
+                                         resolved_method->GetMethodIndex());
+  } else {
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
+    invoke = new (arena_) HInvokeInterface(arena_,
+                                           number_of_arguments,
+                                           return_type,
+                                           dex_pc,
+                                           method_idx,
+                                           resolved_method->GetDexMethodIndex());
+  }
+
+  return HandleInvoke(invoke,
+                      number_of_vreg_arguments,
+                      args,
+                      register_index,
+                      is_range,
+                      descriptor,
+                      clinit_check);
+}
+
+bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  bool finalizable;
+  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      outer_dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      /*needs_access_check*/ can_throw,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
+
+  AppendInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  AppendInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      can_throw,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+    return true;
+  }
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
+}
+
+HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* resolved_method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
+
+  // The index at which the method's class is stored in the DexCache's type array.
+  uint32_t storage_index = DexFile::kDexNoIndex;
+  bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() == dex_cache.Get()) {
+    // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
+    compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
+                                                               GetCompilingClass(),
+                                                               resolved_method,
+                                                               method_idx,
+                                                               &storage_index);
+  }
+
+  HClinitCheck* clinit_check = nullptr;
+
+  if (IsInitialized(resolved_method_class)) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
+  } else if (storage_index != DexFile::kDexNoIndex) {
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+    AppendInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    AppendInstruction(clinit_check);
+  }
+  return clinit_check;
+}
+
+bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
+                                               uint32_t number_of_vreg_arguments,
+                                               uint32_t* args,
+                                               uint32_t register_index,
+                                               bool is_range,
+                                               const char* descriptor,
+                                               size_t start_index,
+                                               size_t* argument_index) {
+  uint32_t descriptor_index = 1;  // Skip the return type.
+
+  for (size_t i = start_index;
+       // Make sure we don't go over the expected arguments or over the number of
+       // dex registers given. If the instruction was seen as dead by the verifier,
+       // it hasn't been properly checked.
+       (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
+       i++, (*argument_index)++) {
+    Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
+    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
+    if (!is_range
+        && is_wide
+        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
+      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
+      // reject any class where this is violated. However, the verifier only does these checks
+      // on non trivially dead instructions, so we just bailout the compilation.
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of non-sequential dex register pair in wide argument";
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      return false;
+    }
+    HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+    invoke->SetArgumentAt(*argument_index, arg);
+    if (is_wide) {
+      i++;
+    }
+  }
+
+  if (*argument_index != invoke->GetNumberOfArguments()) {
+    VLOG(compiler) << "Did not compile "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << " because of wrong number of arguments in invoke instruction";
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    return false;
+  }
+
+  if (invoke->IsInvokeStaticOrDirect() &&
+      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
+    invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
+    (*argument_index)++;
+  }
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
+                                       uint32_t number_of_vreg_arguments,
+                                       uint32_t* args,
+                                       uint32_t register_index,
+                                       bool is_range,
+                                       const char* descriptor,
+                                       HClinitCheck* clinit_check) {
+  DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 0;
+  size_t argument_index = 0;
+  if (invoke->GetOriginalInvokeType() != InvokeType::kStatic) {  // Instance call.
+    HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, invoke->GetDexPc());
+    AppendInstruction(null_check);
+    invoke->SetArgumentAt(0, null_check);
+    start_index = 1;
+    argument_index = 1;
+  }
+
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  if (clinit_check != nullptr) {
+    // Add the class initialization check as last input of `invoke`.
+    DCHECK(invoke->IsInvokeStaticOrDirect());
+    DCHECK(invoke->AsInvokeStaticOrDirect()->GetClinitCheckRequirement()
+        == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
+    invoke->SetArgumentAt(argument_index, clinit_check);
+    argument_index++;
+  }
+
+  AppendInstruction(invoke);
+  latest_result_ = invoke;
+
+  return true;
+}
+
+bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
+                                           uint32_t number_of_vreg_arguments,
+                                           uint32_t* args,
+                                           uint32_t register_index,
+                                           bool is_range,
+                                           const char* descriptor) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
+
+  size_t start_index = 1;
+  size_t argument_index = 0;
+  if (!SetupInvokeArguments(invoke,
+                            number_of_vreg_arguments,
+                            args,
+                            register_index,
+                            is_range,
+                            descriptor,
+                            start_index,
+                            &argument_index)) {
+    return false;
+  }
+
+  AppendInstruction(invoke);
+
+  // This is a StringFactory call, not an actual String constructor. Its result
+  // replaces the empty String pre-allocated by NewInstance.
+  uint32_t orig_this_reg = is_range ? register_index : args[0];
+  HInstruction* arg_this = LoadLocal(orig_this_reg, Primitive::kPrimNot);
+
+  // Replacing the NewInstance might render it redundant. Keep a list of these
+  // to be visited once it is clear whether it is has remaining uses.
+  if (arg_this->IsNewInstance()) {
+    ssa_builder_->AddUninitializedString(arg_this->AsNewInstance());
+  } else {
+    DCHECK(arg_this->IsPhi());
+    // NewInstance is not the direct input of the StringFactory call. It might
+    // be redundant but optimizing this case is not worth the effort.
+  }
+
+  // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
+  for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+    if ((*current_locals_)[vreg] == arg_this) {
+      (*current_locals_)[vreg] = invoke;
+    }
+  }
+
+  return true;
+}
+
+static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+  const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+  return Primitive::GetType(type[0]);
+}
+
+bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
+                                                   uint32_t dex_pc,
+                                                   bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_22c();
+  uint32_t obj_reg = instruction.VRegB_22c();
+  uint16_t field_index;
+  if (instruction.IsQuickened()) {
+    if (!CanDecodeQuickenedInfo()) {
+      return false;
+    }
+    field_index = LookupQuickenedInfo(dex_pc);
+  } else {
+    field_index = instruction.VRegC_22c();
+  }
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtField* resolved_field =
+      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
+
+
+  HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
+  HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc);
+  AppendInstruction(null_check);
+
+  Primitive::Type field_type = (resolved_field == nullptr)
+      ? GetFieldAccessType(*dex_file_, field_index)
+      : resolved_field->GetTypeAsPrimitiveType();
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    HInstruction* field_set = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check,
+                                                           value,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_set = new (arena_) HInstanceFieldSet(null_check,
+                                                 value,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_set);
+  } else {
+    HInstruction* field_get = nullptr;
+    if (resolved_field == nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+      field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check,
+                                                           field_type,
+                                                           field_index,
+                                                           dex_pc);
+    } else {
+      uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+      field_get = new (arena_) HInstanceFieldGet(null_check,
+                                                 field_type,
+                                                 resolved_field->GetOffset(),
+                                                 resolved_field->IsVolatile(),
+                                                 field_index,
+                                                 class_def_index,
+                                                 *dex_file_,
+                                                 dex_compilation_unit_->GetDexCache(),
+                                                 dex_pc);
+    }
+    AppendInstruction(field_get);
+    UpdateLocal(source_or_dest_reg, field_get);
+  }
+
+  return true;
+}
+
+static mirror::Class* GetClassFrom(CompilerDriver* driver,
+                                   const DexCompilationUnit& compilation_unit) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *compilation_unit.GetDexFile();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
+
+  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
+}
+
+mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
+}
+
+mirror::Class* HInstructionBuilder::GetCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
+}
+
+bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
+      soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // GetOutermostCompilingClass returns null when the class is unresolved
+  // (e.g. if it derives from an unresolved class). This is bogus knowing that
+  // we are compiling it.
+  // When this happens we cannot establish a direct relation between the current
+  // class and the outer class, so we return false.
+  // (Note that this is only used for optimizing invokes and field accesses)
+  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
+}
+
+void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                                     uint32_t dex_pc,
+                                                     bool is_put,
+                                                     Primitive::Type field_type) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    AppendInstruction(
+        new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
+                                                 uint32_t dex_pc,
+                                                 bool is_put) {
+  uint32_t source_or_dest_reg = instruction.VRegA_21c();
+  uint16_t field_index = instruction.VRegB_21c();
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<5> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  ArtField* resolved_field = compiler_driver_->ResolveField(
+      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
+
+  if (resolved_field == nullptr) {
+    MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
+    Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index);
+    BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+    return true;
+  }
+
+  Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+
+  // The index at which the field's class is stored in the DexCache's type array.
+  uint32_t storage_index;
+  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
+  } else if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
+    return false;
+  } else {
+    // TODO: This is rather expensive. Perf it and cache the results if needed.
+    std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
+        outer_dex_cache.Get(),
+        GetCompilingClass(),
+        resolved_field,
+        field_index,
+        &storage_index);
+    bool can_easily_access = is_put ? pair.second : pair.first;
+    if (!can_easily_access) {
+      MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+      BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+      return true;
+    }
+  }
+
+  bool is_in_cache =
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
+  HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
+                                                 storage_index,
+                                                 outer_dex_file,
+                                                 is_outer_class,
+                                                 dex_pc,
+                                                 /*needs_access_check*/ false,
+                                                 is_in_cache);
+  AppendInstruction(constant);
+
+  HInstruction* cls = constant;
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass)) {
+    cls = new (arena_) HClinitCheck(constant, dex_pc);
+    AppendInstruction(cls);
+  }
+
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
+  if (is_put) {
+    // We need to keep the class alive before loading the value.
+    HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
+    DCHECK_EQ(HPhi::ToPhiType(value->GetType()), HPhi::ToPhiType(field_type));
+    AppendInstruction(new (arena_) HStaticFieldSet(cls,
+                                                   value,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HStaticFieldGet(cls,
+                                                   field_type,
+                                                   resolved_field->GetOffset(),
+                                                   resolved_field->IsVolatile(),
+                                                   field_index,
+                                                   class_def_index,
+                                                   *dex_file_,
+                                                   dex_cache_,
+                                                   dex_pc));
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  return true;
+}
+
+void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
+                                       uint16_t first_vreg,
+                                       int64_t second_vreg_or_constant,
+                                       uint32_t dex_pc,
+                                       Primitive::Type type,
+                                       bool second_is_constant,
+                                       bool isDiv) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc);
+    } else {
+      second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
+    AppendInstruction(second);
+  }
+
+  if (isDiv) {
+    AppendInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  } else {
+    AppendInstruction(new (arena_) HRem(type, first, second, dex_pc));
+  }
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
+}
+
+void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
+                                           uint32_t dex_pc,
+                                           bool is_put,
+                                           Primitive::Type anticipated_type) {
+  uint8_t source_or_dest_reg = instruction.VRegA_23x();
+  uint8_t array_reg = instruction.VRegB_23x();
+  uint8_t index_reg = instruction.VRegC_23x();
+
+  HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot);
+  object = new (arena_) HNullCheck(object, dex_pc);
+  AppendInstruction(object);
+
+  HInstruction* length = new (arena_) HArrayLength(object, dex_pc);
+  AppendInstruction(length);
+  HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
+  index = new (arena_) HBoundsCheck(index, length, dex_pc);
+  AppendInstruction(index);
+  if (is_put) {
+    HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type);
+    // TODO: Insert a type check node if the type is Object.
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  } else {
+    HArrayGet* aget = new (arena_) HArrayGet(object, index, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArrayGet(aget);
+    AppendInstruction(aget);
+    UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+                                              uint32_t type_index,
+                                              uint32_t number_of_vreg_arguments,
+                                              bool is_range,
+                                              uint32_t* args,
+                                              uint32_t register_index) {
+  HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
+  bool finalizable;
+  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+      ? kQuickAllocArrayWithAccessCheck
+      : kQuickAllocArray;
+  HInstruction* object = new (arena_) HNewArray(length,
+                                                graph_->GetCurrentMethod(),
+                                                dex_pc,
+                                                type_index,
+                                                *dex_compilation_unit_->GetDexFile(),
+                                                entrypoint);
+  AppendInstruction(object);
+
+  const char* descriptor = dex_file_->StringByTypeIdx(type_index);
+  DCHECK_EQ(descriptor[0], '[') << descriptor;
+  char primitive = descriptor[1];
+  DCHECK(primitive == 'I'
+      || primitive == 'L'
+      || primitive == '[') << descriptor;
+  bool is_reference_array = (primitive == 'L') || (primitive == '[');
+  Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
+
+  for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
+    HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+  latest_result_ = object;
+}
+
+template <typename T>
+void HInstructionBuilder::BuildFillArrayData(HInstruction* object,
+                                             const T* data,
+                                             uint32_t element_count,
+                                             Primitive::Type anticipated_type,
+                                             uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetIntConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
+  HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot);
+  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
+  AppendInstruction(null_check);
+
+  HInstruction* length = new (arena_) HArrayLength(null_check, dex_pc);
+  AppendInstruction(length);
+
+  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
+  const Instruction::ArrayDataPayload* payload =
+      reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_.insns_ + payload_offset);
+  const uint8_t* data = payload->data;
+  uint32_t element_count = payload->element_count;
+
+  // Implementation of this DEX instruction seems to be that the bounds check is
+  // done before doing any stores.
+  HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc);
+  AppendInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
+
+  switch (payload->element_width) {
+    case 1:
+      BuildFillArrayData(null_check,
+                         reinterpret_cast<const int8_t*>(data),
+                         element_count,
+                         Primitive::kPrimByte,
+                         dex_pc);
+      break;
+    case 2:
+      BuildFillArrayData(null_check,
+                         reinterpret_cast<const int16_t*>(data),
+                         element_count,
+                         Primitive::kPrimShort,
+                         dex_pc);
+      break;
+    case 4:
+      BuildFillArrayData(null_check,
+                         reinterpret_cast<const int32_t*>(data),
+                         element_count,
+                         Primitive::kPrimInt,
+                         dex_pc);
+      break;
+    case 8:
+      BuildFillWideArrayData(null_check,
+                             reinterpret_cast<const int64_t*>(data),
+                             element_count,
+                             dex_pc);
+      break;
+    default:
+      LOG(FATAL) << "Unknown element width for " << payload->element_width;
+  }
+  graph_->SetHasBoundsChecks(true);
+}
+
+void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
+                                                 const int64_t* data,
+                                                 uint32_t element_count,
+                                                 uint32_t dex_pc) {
+  for (uint32_t i = 0; i < element_count; ++i) {
+    HInstruction* index = graph_->GetIntConstant(i, dex_pc);
+    HInstruction* value = graph_->GetLongConstant(data[i], dex_pc);
+    HArraySet* aset = new (arena_) HArraySet(object, index, value, Primitive::kPrimLong, dex_pc);
+    ssa_builder_->MaybeAddAmbiguousArraySet(aset);
+    AppendInstruction(aset);
+  }
+}
+
+static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls.Get() == nullptr) {
+    return TypeCheckKind::kUnresolvedCheck;
+  } else if (cls->IsInterface()) {
+    return TypeCheckKind::kInterfaceCheck;
+  } else if (cls->IsArrayClass()) {
+    if (cls->GetComponentType()->IsObjectClass()) {
+      return TypeCheckKind::kArrayObjectCheck;
+    } else if (cls->CannotBeAssignedFromOtherTypes()) {
+      return TypeCheckKind::kExactCheck;
+    } else {
+      return TypeCheckKind::kArrayCheck;
+    }
+  } else if (cls->IsFinal()) {
+    return TypeCheckKind::kExactCheck;
+  } else if (cls->IsAbstract()) {
+    return TypeCheckKind::kAbstractClassCheck;
+  } else {
+    return TypeCheckKind::kClassHierarchyCheck;
+  }
+}
+
+void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
+                                         uint8_t destination,
+                                         uint8_t reference,
+                                         uint16_t type_index,
+                                         uint32_t dex_pc) {
+  bool type_known_final, type_known_abstract, use_declaring_class;
+  bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(),
+      *dex_compilation_unit_->GetDexFile(),
+      type_index,
+      &type_known_final,
+      &type_known_abstract,
+      &use_declaring_class);
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+
+  HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
+  HLoadClass* cls = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      !can_access,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
+  AppendInstruction(cls);
+
+  TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
+  if (instruction.Opcode() == Instruction::INSTANCE_OF) {
+    AppendInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc));
+    UpdateLocal(destination, current_block_->GetLastInstruction());
+  } else {
+    DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
+    // We emit a CheckCast followed by a BoundType. CheckCast is a statement
+    // which may throw. If it succeeds BoundType sets the new type of `object`
+    // for all subsequent uses.
+    AppendInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc));
+    AppendInstruction(new (arena_) HBoundType(object, dex_pc));
+    UpdateLocal(reference, current_block_->GetLastInstruction());
+  }
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+  return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
+      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
+}
+
+bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
+  return interpreter_metadata_ != nullptr;
+}
+
+uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
+  DCHECK(interpreter_metadata_ != nullptr);
+  uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
+  DCHECK_EQ(dex_pc, dex_pc_in_map);
+  return DecodeUnsignedLeb128(&interpreter_metadata_);
+}
+
+bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
+  switch (instruction.Opcode()) {
+    case Instruction::CONST_4: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_11n(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21s(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_31i(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      HIntConstant* constant = graph_->GetIntConstant(instruction.VRegB_21h() << 16, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_16: {
+      int32_t register_index = instruction.VRegA();
+      // Get 16 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_21s();
+      value <<= 48;
+      value >>= 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_32: {
+      int32_t register_index = instruction.VRegA();
+      // Get 32 bits of constant value, sign extended to 64 bits.
+      int64_t value = instruction.VRegB_31i();
+      value <<= 32;
+      value >>= 32;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE: {
+      int32_t register_index = instruction.VRegA();
+      HLongConstant* constant = graph_->GetLongConstant(instruction.VRegB_51l(), dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    case Instruction::CONST_WIDE_HIGH16: {
+      int32_t register_index = instruction.VRegA();
+      int64_t value = static_cast<int64_t>(instruction.VRegB_21h()) << 48;
+      HLongConstant* constant = graph_->GetLongConstant(value, dex_pc);
+      UpdateLocal(register_index, constant);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE:
+    case Instruction::MOVE_FROM16:
+    case Instruction::MOVE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    // Note that the SSA building will refine the types.
+    case Instruction::MOVE_WIDE:
+    case Instruction::MOVE_WIDE_FROM16:
+    case Instruction::MOVE_WIDE_16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::MOVE_OBJECT:
+    case Instruction::MOVE_OBJECT_16:
+    case Instruction::MOVE_OBJECT_FROM16: {
+      HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimNot);
+      UpdateLocal(instruction.VRegA(), value);
+      break;
+    }
+
+    case Instruction::RETURN_VOID_NO_BARRIER:
+    case Instruction::RETURN_VOID: {
+      BuildReturn(instruction, Primitive::kPrimVoid, dex_pc);
+      break;
+    }
+
+#define IF_XX(comparison, cond) \
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
+
+    IF_XX(HEqual, EQ);
+    IF_XX(HNotEqual, NE);
+    IF_XX(HLessThan, LT);
+    IF_XX(HLessThanOrEqual, LE);
+    IF_XX(HGreaterThan, GT);
+    IF_XX(HGreaterThanOrEqual, GE);
+
+    case Instruction::GOTO:
+    case Instruction::GOTO_16:
+    case Instruction::GOTO_32: {
+      AppendInstruction(new (arena_) HGoto(dex_pc));
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::RETURN: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_OBJECT: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::RETURN_WIDE: {
+      BuildReturn(instruction, return_type_, dex_pc);
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_VIRTUAL_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_35c();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, false, args, -1)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      uint16_t method_idx;
+      if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
+        if (!CanDecodeQuickenedInfo()) {
+          return false;
+        }
+        method_idx = LookupQuickenedInfo(dex_pc);
+      } else {
+        method_idx = instruction.VRegB_3rc();
+      }
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t register_index = instruction.VRegC();
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
+                       number_of_vreg_arguments, true, nullptr, register_index)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::NEG_INT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_LONG: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_FLOAT: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::NEG_DOUBLE: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_INT: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::NOT_LONG: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::LONG_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_LONG: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DOUBLE_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_BYTE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_SHORT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
+      break;
+    }
+
+    case Instruction::INT_TO_CHAR: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE: {
+      Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE: {
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG: {
+      Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG: {
+      Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG: {
+      Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG: {
+      Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG: {
+      Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG: {
+      Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_LONG_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_DOUBLE_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_INT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_LONG_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_FLOAT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_LONG_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_FLOAT_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_DOUBLE_2ADDR: {
+      Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::REM_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, false);
+      break;
+    }
+
+    case Instruction::REM_FLOAT_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_INT_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHL_LONG_2ADDR: {
+      Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_LONG_2ADDR: {
+      Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_LONG_2ADDR: {
+      Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_FLOAT_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_DOUBLE_2ADDR: {
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_LONG_2ADDR: {
+      Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_LONG_2ADDR: {
+      Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_LONG_2ADDR: {
+      Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT16: {
+      Binop_22s<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT16: {
+      Binop_22s<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT16: {
+      Binop_22s<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT16: {
+      Binop_22s<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT: {
+      Binop_22s<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT16: {
+      Binop_22s<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::ADD_INT_LIT8: {
+      Binop_22b<HAdd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::AND_INT_LIT8: {
+      Binop_22b<HAnd>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::OR_INT_LIT8: {
+      Binop_22b<HOr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::XOR_INT_LIT8: {
+      Binop_22b<HXor>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::RSUB_INT_LIT8: {
+      Binop_22b<HSub>(instruction, true, dex_pc);
+      break;
+    }
+
+    case Instruction::MUL_INT_LIT8: {
+      Binop_22b<HMul>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::DIV_INT_LIT16:
+    case Instruction::DIV_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, true);
+      break;
+    }
+
+    case Instruction::REM_INT_LIT16:
+    case Instruction::REM_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, false);
+      break;
+    }
+
+    case Instruction::SHL_INT_LIT8: {
+      Binop_22b<HShl>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::SHR_INT_LIT8: {
+      Binop_22b<HShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::USHR_INT_LIT8: {
+      Binop_22b<HUShr>(instruction, false, dex_pc);
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE: {
+      if (!BuildNewInstance(instruction.VRegB_21c(), dex_pc)) {
+        return false;
+      }
+      UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::NEW_ARRAY: {
+      uint16_t type_index = instruction.VRegC_22c();
+      HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
+      bool finalizable;
+      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
+          ? kQuickAllocArrayWithAccessCheck
+          : kQuickAllocArray;
+      AppendInstruction(new (arena_) HNewArray(length,
+                                               graph_->GetCurrentMethod(),
+                                               dex_pc,
+                                               type_index,
+                                               *dex_compilation_unit_->GetDexFile(),
+                                               entrypoint));
+      UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
+      uint32_t type_index = instruction.VRegB_35c();
+      uint32_t args[5];
+      instruction.GetVarArgs(args);
+      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+      break;
+    }
+
+    case Instruction::FILLED_NEW_ARRAY_RANGE: {
+      uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
+      uint32_t type_index = instruction.VRegB_3rc();
+      uint32_t register_index = instruction.VRegC_3rc();
+      BuildFilledNewArray(
+          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+      break;
+    }
+
+    case Instruction::FILL_ARRAY_DATA: {
+      BuildFillArrayData(instruction, dex_pc);
+      break;
+    }
+
+    case Instruction::MOVE_RESULT:
+    case Instruction::MOVE_RESULT_WIDE:
+    case Instruction::MOVE_RESULT_OBJECT: {
+      DCHECK(latest_result_ != nullptr);
+      UpdateLocal(instruction.VRegA(), latest_result_);
+      latest_result_ = nullptr;
+      break;
+    }
+
+    case Instruction::CMP_LONG: {
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPG_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::CMPL_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc);
+      break;
+    }
+
+    case Instruction::NOP:
+      break;
+
+    case Instruction::IGET:
+    case Instruction::IGET_QUICK:
+    case Instruction::IGET_WIDE:
+    case Instruction::IGET_WIDE_QUICK:
+    case Instruction::IGET_OBJECT:
+    case Instruction::IGET_OBJECT_QUICK:
+    case Instruction::IGET_BOOLEAN:
+    case Instruction::IGET_BOOLEAN_QUICK:
+    case Instruction::IGET_BYTE:
+    case Instruction::IGET_BYTE_QUICK:
+    case Instruction::IGET_CHAR:
+    case Instruction::IGET_CHAR_QUICK:
+    case Instruction::IGET_SHORT:
+    case Instruction::IGET_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::IPUT:
+    case Instruction::IPUT_QUICK:
+    case Instruction::IPUT_WIDE:
+    case Instruction::IPUT_WIDE_QUICK:
+    case Instruction::IPUT_OBJECT:
+    case Instruction::IPUT_OBJECT_QUICK:
+    case Instruction::IPUT_BOOLEAN:
+    case Instruction::IPUT_BOOLEAN_QUICK:
+    case Instruction::IPUT_BYTE:
+    case Instruction::IPUT_BYTE_QUICK:
+    case Instruction::IPUT_CHAR:
+    case Instruction::IPUT_CHAR_QUICK:
+    case Instruction::IPUT_SHORT:
+    case Instruction::IPUT_SHORT_QUICK: {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
+        return false;
+      }
+      break;
+    }
+
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
+        return false;
+      }
+      break;
+    }
+
+#define ARRAY_XX(kind, anticipated_type)                                          \
+    case Instruction::AGET##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
+      break;                                                                      \
+    }                                                                             \
+    case Instruction::APUT##kind: {                                               \
+      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
+      break;                                                                      \
+    }
+
+    ARRAY_XX(, Primitive::kPrimInt);
+    ARRAY_XX(_WIDE, Primitive::kPrimLong);
+    ARRAY_XX(_OBJECT, Primitive::kPrimNot);
+    ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean);
+    ARRAY_XX(_BYTE, Primitive::kPrimByte);
+    ARRAY_XX(_CHAR, Primitive::kPrimChar);
+    ARRAY_XX(_SHORT, Primitive::kPrimShort);
+
+    case Instruction::ARRAY_LENGTH: {
+      HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot);
+      object = new (arena_) HNullCheck(object, dex_pc);
+      AppendInstruction(object);
+      AppendInstruction(new (arena_) HArrayLength(object, dex_pc));
+      UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING: {
+      uint32_t string_index = instruction.VRegB_21c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_STRING_JUMBO: {
+      uint32_t string_index = instruction.VRegB_31c();
+      AppendInstruction(
+          new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc));
+      UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::CONST_CLASS: {
+      uint16_t type_index = instruction.VRegB_21c();
+      bool type_known_final;
+      bool type_known_abstract;
+      bool dont_use_is_referrers_class;
+      // `CanAccessTypeWithoutChecks` will tell whether the method being
+      // built is trying to access its own class, so that the generated
+      // code can optimize for this case. However, the optimization does not
+      // work for inlining, so we use `IsOutermostCompilingClass` instead.
+      bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
+          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
+          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
+      AppendInstruction(new (arena_) HLoadClass(
+          graph_->GetCurrentMethod(),
+          type_index,
+          *dex_file_,
+          IsOutermostCompilingClass(type_index),
+          dex_pc,
+          !can_access,
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
+      UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+      break;
+    }
+
+    case Instruction::MOVE_EXCEPTION: {
+      AppendInstruction(new (arena_) HLoadException(dex_pc));
+      UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction());
+      AppendInstruction(new (arena_) HClearException(dex_pc));
+      break;
+    }
+
+    case Instruction::THROW: {
+      HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot);
+      AppendInstruction(new (arena_) HThrow(exception, dex_pc));
+      // We finished building this block. Set the current block to null to avoid
+      // adding dead instructions to it.
+      current_block_ = nullptr;
+      break;
+    }
+
+    case Instruction::INSTANCE_OF: {
+      uint8_t destination = instruction.VRegA_22c();
+      uint8_t reference = instruction.VRegB_22c();
+      uint16_t type_index = instruction.VRegC_22c();
+      BuildTypeCheck(instruction, destination, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::CHECK_CAST: {
+      uint8_t reference = instruction.VRegA_21c();
+      uint16_t type_index = instruction.VRegB_21c();
+      BuildTypeCheck(instruction, -1, reference, type_index, dex_pc);
+      break;
+    }
+
+    case Instruction::MONITOR_ENTER: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kEnter,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::MONITOR_EXIT: {
+      AppendInstruction(new (arena_) HMonitorOperation(
+          LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
+          HMonitorOperation::OperationKind::kExit,
+          dex_pc));
+      break;
+    }
+
+    case Instruction::SPARSE_SWITCH:
+    case Instruction::PACKED_SWITCH: {
+      BuildSwitch(instruction, dex_pc);
+      break;
+    }
+
+    default:
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of unhandled instruction "
+                     << instruction.Name();
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
+      return false;
+  }
+  return true;
+}  // NOLINT(readability/fn_size)
+
+}  // namespace art
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
new file mode 100644
index 0000000..612594f
--- /dev/null
+++ b/compiler/optimizing/instruction_builder.h
@@ -0,0 +1,303 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
+
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
+#include "block_builder.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
+#include "driver/dex_compilation_unit.h"
+#include "mirror/dex_cache.h"
+#include "nodes.h"
+#include "optimizing_compiler_stats.h"
+#include "ssa_builder.h"
+
+namespace art {
+
+class HInstructionBuilder : public ValueObject {
+ public:
+  HInstructionBuilder(HGraph* graph,
+                      HBasicBlockBuilder* block_builder,
+                      SsaBuilder* ssa_builder,
+                      const DexFile* dex_file,
+                      const DexFile::CodeItem& code_item,
+                      Primitive::Type return_type,
+                      DexCompilationUnit* dex_compilation_unit,
+                      const DexCompilationUnit* const outer_compilation_unit,
+                      CompilerDriver* driver,
+                      const uint8_t* interpreter_metadata,
+                      OptimizingCompilerStats* compiler_stats,
+                      Handle<mirror::DexCache> dex_cache)
+      : arena_(graph->GetArena()),
+        graph_(graph),
+        dex_file_(dex_file),
+        code_item_(code_item),
+        return_type_(return_type),
+        block_builder_(block_builder),
+        ssa_builder_(ssa_builder),
+        locals_for_(arena_->Adapter(kArenaAllocGraphBuilder)),
+        current_block_(nullptr),
+        current_locals_(nullptr),
+        latest_result_(nullptr),
+        compiler_driver_(driver),
+        dex_compilation_unit_(dex_compilation_unit),
+        outer_compilation_unit_(outer_compilation_unit),
+        interpreter_metadata_(interpreter_metadata),
+        compilation_stats_(compiler_stats),
+        dex_cache_(dex_cache),
+        loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    loop_headers_.reserve(kDefaultNumberOfLoops);
+  }
+
+  bool Build();
+
+ private:
+  void MaybeRecordStat(MethodCompilationStat compilation_stat);
+
+  void InitializeBlockLocals();
+  void PropagateLocalsToCatchBlocks();
+  void SetLoopHeaderPhiInputs();
+
+  bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc);
+  void FindNativeDebugInfoLocations(ArenaBitVector* locations);
+
+  bool CanDecodeQuickenedInfo() const;
+  uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+
+  HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
+
+  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+  HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
+  HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
+  void UpdateLocal(uint32_t register_index, HInstruction* instruction);
+
+  void AppendInstruction(HInstruction* instruction);
+  void InsertInstructionAtTop(HInstruction* instruction);
+  void InitializeInstruction(HInstruction* instruction);
+
+  void InitializeParameters();
+
+  // Returns whether the current method needs access check for the type.
+  // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
+
+  template<typename T>
+  void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  void Binop_23x_cmp(const Instruction& instruction,
+                     Primitive::Type type,
+                     ComparisonBias bias,
+                     uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T>
+  void Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc);
+
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
+
+  void Conversion_12x(const Instruction& instruction,
+                      Primitive::Type input_type,
+                      Primitive::Type result_type,
+                      uint32_t dex_pc);
+
+  void BuildCheckedDivRem(uint16_t out_reg,
+                          uint16_t first_reg,
+                          int64_t second_reg_or_constant,
+                          uint32_t dex_pc,
+                          Primitive::Type type,
+                          bool second_is_lit,
+                          bool is_div);
+
+  void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  // Builds an instance field access node and returns whether the instruction is supported.
+  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
+                                        uint32_t dex_pc,
+                                        bool is_put,
+                                        Primitive::Type field_type);
+  // Builds a static field access node and returns whether the instruction is supported.
+  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+
+  void BuildArrayAccess(const Instruction& instruction,
+                        uint32_t dex_pc,
+                        bool is_get,
+                        Primitive::Type anticipated_type);
+
+  // Builds an invocation node and returns whether the instruction is supported.
+  bool BuildInvoke(const Instruction& instruction,
+                   uint32_t dex_pc,
+                   uint32_t method_idx,
+                   uint32_t number_of_vreg_arguments,
+                   bool is_range,
+                   uint32_t* args,
+                   uint32_t register_index);
+
+  // Builds a new array node and the instructions that fill it.
+  void BuildFilledNewArray(uint32_t dex_pc,
+                           uint32_t type_index,
+                           uint32_t number_of_vreg_arguments,
+                           bool is_range,
+                           uint32_t* args,
+                           uint32_t register_index);
+
+  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. Currently only used for non-reference and non-floating point
+  // arrays.
+  template <typename T>
+  void BuildFillArrayData(HInstruction* object,
+                          const T* data,
+                          uint32_t element_count,
+                          Primitive::Type anticipated_type,
+                          uint32_t dex_pc);
+
+  // Fills the given object with data as specified in the fill-array-data
+  // instruction. The data must be for long and double arrays.
+  void BuildFillWideArrayData(HInstruction* object,
+                              const int64_t* data,
+                              uint32_t element_count,
+                              uint32_t dex_pc);
+
+  // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
+  void BuildTypeCheck(const Instruction& instruction,
+                      uint8_t destination,
+                      uint8_t reference,
+                      uint16_t type_index,
+                      uint32_t dex_pc);
+
+  // Builds an instruction sequence for a switch statement.
+  void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
+
+  // Returns the outer-most compiling method's class.
+  mirror::Class* GetOutermostCompilingClass() const;
+
+  // Returns the class whose method is being compiled.
+  mirror::Class* GetCompilingClass() const;
+
+  // Returns whether `type_index` points to the outer-most compiling method's class.
+  bool IsOutermostCompilingClass(uint16_t type_index) const;
+
+  void PotentiallySimplifyFakeString(uint16_t original_dex_register,
+                                     uint32_t dex_pc,
+                                     HInvoke* invoke);
+
+  bool SetupInvokeArguments(HInvoke* invoke,
+                            uint32_t number_of_vreg_arguments,
+                            uint32_t* args,
+                            uint32_t register_index,
+                            bool is_range,
+                            const char* descriptor,
+                            size_t start_index,
+                            size_t* argument_index);
+
+  bool HandleInvoke(HInvoke* invoke,
+                    uint32_t number_of_vreg_arguments,
+                    uint32_t* args,
+                    uint32_t register_index,
+                    bool is_range,
+                    const char* descriptor,
+                    HClinitCheck* clinit_check);
+
+  bool HandleStringInit(HInvoke* invoke,
+                        uint32_t number_of_vreg_arguments,
+                        uint32_t* args,
+                        uint32_t register_index,
+                        bool is_range,
+                        const char* descriptor);
+  void HandleStringInitResult(HInvokeStaticOrDirect* invoke);
+
+  HClinitCheck* ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
+  ArenaAllocator* const arena_;
+  HGraph* const graph_;
+
+  // The dex file where the method being compiled is, and the bytecode data.
+  const DexFile* const dex_file_;
+  const DexFile::CodeItem& code_item_;
+
+  // The return type of the method being compiled.
+  const Primitive::Type return_type_;
+
+  HBasicBlockBuilder* block_builder_;
+  SsaBuilder* ssa_builder_;
+
+  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
+  HBasicBlock* current_block_;
+  ArenaVector<HInstruction*>* current_locals_;
+  HInstruction* latest_result_;
+
+  CompilerDriver* const compiler_driver_;
+
+  // The compilation unit of the current method being compiled. Note that
+  // it can be an inlined method.
+  DexCompilationUnit* const dex_compilation_unit_;
+
+  // The compilation unit of the outermost method being compiled. That is the
+  // method being compiled (and not inlined), and potentially inlining other
+  // methods.
+  const DexCompilationUnit* const outer_compilation_unit_;
+
+  const uint8_t* interpreter_metadata_;
+  OptimizingCompilerStats* compilation_stats_;
+  Handle<mirror::DexCache> dex_cache_;
+
+  ArenaVector<HBasicBlock*> loop_headers_;
+
+  static constexpr int kDefaultNumberOfLoops = 2;
+
+  DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5de2306..a589ef0 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -368,17 +368,16 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenBitCount(HInvoke* instr, bool is_long, vixl::MacroAssembler* masm) {
-  DCHECK(instr->GetType() == Primitive::kPrimInt);
-  DCHECK((is_long && instr->InputAt(0)->GetType() == Primitive::kPrimLong) ||
-         (!is_long && instr->InputAt(0)->GetType() == Primitive::kPrimInt));
+static void GenBitCount(HInvoke* instr, Primitive::Type type, vixl::MacroAssembler* masm) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
 
-  Location out = instr->GetLocations()->Out();
   UseScratchRegisterScope temps(masm);
 
   Register src = InputRegisterAt(instr, 0);
-  Register dst = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-  FPRegister fpr = is_long ? temps.AcquireD() : temps.AcquireS();
+  Register dst = RegisterFrom(instr->GetLocations()->Out(), type);
+  FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS();
 
   __ Fmov(fpr, src);
   __ Cnt(fpr.V8B(), fpr.V8B());
@@ -391,7 +390,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
-  GenBitCount(invoke, /* is_long */ true, GetVIXLAssembler());
+  GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
@@ -399,7 +398,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
-  GenBitCount(invoke, /* is_long */ false, GetVIXLAssembler());
+  GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 1280587..19c6a22 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -614,8 +614,6 @@
                         Primitive::Type type,
                         bool isR6,
                         MipsAssembler* assembler) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
   Register out = locations->Out().AsRegister<Register>();
 
   // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -663,7 +661,8 @@
       __ MulR2(out, out, TMP);
     }
     __ Srl(out, out, 24);
-  } else if (type == Primitive::kPrimLong) {
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimLong);
     Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
     Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 95fdb9b..4aab3e2 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2387,10 +2387,10 @@
   if (invoke->InputAt(0)->IsConstant()) {
     // Evaluate this at compile time.
     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
-    value = is_long
+    int32_t result = is_long
         ? POPCOUNT(static_cast<uint64_t>(value))
         : POPCOUNT(static_cast<uint32_t>(value));
-    codegen->Load32BitValue(out, value);
+    codegen->Load32BitValue(out, result);
     return;
   }
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9e568f7..9ca4ef0 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2402,10 +2402,10 @@
   if (invoke->InputAt(0)->IsConstant()) {
     // Evaluate this at compile time.
     int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
-    value = is_long
+    int32_t result = is_long
         ? POPCOUNT(static_cast<uint64_t>(value))
         : POPCOUNT(static_cast<uint32_t>(value));
-    codegen->Load32BitValue(out, value);
+    codegen->Load32BitValue(out, result);
     return;
   }
 
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 3202493..bdaef1d 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -211,8 +211,8 @@
    *
    * Which becomes the following graph (numbered by lifetime position):
    *       2: constant0
-   *       4: constant4
-   *       6: constant5
+   *       4: constant5
+   *       6: constant4
    *       8: goto
    *           |
    *       12: goto
@@ -247,7 +247,7 @@
   liveness.Analyze();
 
   // Test for the 0 constant.
-  LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
+  LiveInterval* interval = graph->GetIntConstant(0)->GetLiveInterval();
   LiveRange* range = interval->GetFirstRange();
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the loop phi so instruction is live until
@@ -256,18 +256,18 @@
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 4 constant.
-  interval = liveness.GetInstructionFromSsaIndex(1)->GetLiveInterval();
+  interval = graph->GetIntConstant(4)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the end of the loop.
-  ASSERT_EQ(4u, range->GetStart());
+  ASSERT_EQ(6u, range->GetStart());
   ASSERT_EQ(24u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
   // Test for the 5 constant.
-  interval = liveness.GetInstructionFromSsaIndex(2)->GetLiveInterval();
+  interval = graph->GetIntConstant(5)->GetLiveInterval();
   range = interval->GetFirstRange();
   // The instruction is live until the return instruction after the loop.
-  ASSERT_EQ(6u, range->GetStart());
+  ASSERT_EQ(4u, range->GetStart());
   ASSERT_EQ(26u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 92a987c..bd74368 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -154,7 +154,7 @@
   // return a;
   //
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (0000)\n"
@@ -165,11 +165,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"  // else block
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"  // then block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
@@ -291,7 +291,7 @@
   // }
   // return 5;
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi)
+  // (constant0, constant5, constant4, phi)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -310,7 +310,7 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // return block
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 5\n"  // exit block
@@ -386,7 +386,7 @@
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
-  // (constant0, constant4, constant5, phi in block 8)
+  // (constant0, constant5, constant4, phi in block 8)
   const char* expected =
     "Block 0\n"
     "  live in: (0000)\n"
@@ -397,11 +397,11 @@
     "  live out: (0110)\n"
     "  kill: (0000)\n"
     "Block 2\n"
-    "  live in: (0100)\n"
+    "  live in: (0010)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 3\n"
-    "  live in: (0010)\n"
+    "  live in: (0100)\n"
     "  live out: (0000)\n"
     "  kill: (0000)\n"
     "Block 4\n"  // loop header
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 9504481..1086cbf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -134,46 +134,44 @@
       if (block->IsExitBlock()) {
         SetExitBlock(nullptr);
       }
+      // Mark the block as removed. This is used by the HGraphBuilder to discard
+      // the block as a branch target.
+      block->SetGraph(nullptr);
     }
   }
 }
 
 GraphAnalysisResult HGraph::BuildDominatorTree() {
-  // (1) Simplify the CFG so that catch blocks have only exceptional incoming
-  //     edges. This invariant simplifies building SSA form because Phis cannot
-  //     collect both normal- and exceptional-flow values at the same time.
-  SimplifyCatchBlocks();
-
   ArenaBitVector visited(arena_, blocks_.size(), false, kArenaAllocGraphBuilder);
 
-  // (2) Find the back edges in the graph doing a DFS traversal.
+  // (1) Find the back edges in the graph doing a DFS traversal.
   FindBackEdges(&visited);
 
-  // (3) Remove instructions and phis from blocks not visited during
+  // (2) Remove instructions and phis from blocks not visited during
   //     the initial DFS as users from other instructions, so that
   //     users can be safely removed before uses later.
   RemoveInstructionsAsUsersFromDeadBlocks(visited);
 
-  // (4) Remove blocks not visited during the initial DFS.
+  // (3) Remove blocks not visited during the initial DFS.
   //     Step (5) requires dead blocks to be removed from the
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
-  // (5) Simplify the CFG now, so that we don't need to recompute
+  // (4) Simplify the CFG now, so that we don't need to recompute
   //     dominators and the reverse post order.
   SimplifyCFG();
 
-  // (6) Compute the dominance information and the reverse post order.
+  // (5) Compute the dominance information and the reverse post order.
   ComputeDominanceInformation();
 
-  // (7) Analyze loops discovered through back edge analysis, and
+  // (6) Analyze loops discovered through back edge analysis, and
   //     set the loop information on each block.
   GraphAnalysisResult result = AnalyzeLoops();
   if (result != kAnalysisSuccess) {
     return result;
   }
 
-  // (8) Precompute per-block try membership before entering the SSA builder,
+  // (7) Precompute per-block try membership before entering the SSA builder,
   //     which needs the information to build catch block phis from values of
   //     locals at throwing instructions inside try blocks.
   ComputeTryBlockInformation();
@@ -320,85 +318,10 @@
     }
   }
 
-  // Place the suspend check at the beginning of the header, so that live registers
-  // will be known when allocating registers. Note that code generation can still
-  // generate the suspend check at the back edge, but needs to be careful with
-  // loop phi spill slots (which are not written to at back edge).
   HInstruction* first_instruction = header->GetFirstInstruction();
-  if (!first_instruction->IsSuspendCheck()) {
-    HSuspendCheck* check = new (arena_) HSuspendCheck(header->GetDexPc());
-    header->InsertInstructionBefore(check, first_instruction);
-    first_instruction = check;
-  }
-  info->SetSuspendCheck(first_instruction->AsSuspendCheck());
-}
-
-static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
-  HBasicBlock* predecessor = block.GetPredecessors()[pred_idx];
-  if (!predecessor->EndsWithTryBoundary()) {
-    // Only edges from HTryBoundary can be exceptional.
-    return false;
-  }
-  HTryBoundary* try_boundary = predecessor->GetLastInstruction()->AsTryBoundary();
-  if (try_boundary->GetNormalFlowSuccessor() == &block) {
-    // This block is the normal-flow successor of `try_boundary`, but it could
-    // also be one of its exception handlers if catch blocks have not been
-    // simplified yet. Predecessors are unordered, so we will consider the first
-    // occurrence to be the normal edge and a possible second occurrence to be
-    // the exceptional edge.
-    return !block.IsFirstIndexOfPredecessor(predecessor, pred_idx);
-  } else {
-    // This is not the normal-flow successor of `try_boundary`, hence it must be
-    // one of its exception handlers.
-    DCHECK(try_boundary->HasExceptionHandler(block));
-    return true;
-  }
-}
-
-void HGraph::SimplifyCatchBlocks() {
-  // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
-  // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
-  for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
-    HBasicBlock* catch_block = blocks_[block_id];
-    if (catch_block == nullptr || !catch_block->IsCatchBlock()) {
-      continue;
-    }
-
-    bool exceptional_predecessors_only = true;
-    for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-      if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-        exceptional_predecessors_only = false;
-        break;
-      }
-    }
-
-    if (!exceptional_predecessors_only) {
-      // Catch block has normal-flow predecessors and needs to be simplified.
-      // Splitting the block before its first instruction moves all its
-      // instructions into `normal_block` and links the two blocks with a Goto.
-      // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
-      // leaving `catch_block` with the exceptional edges only.
-      //
-      // Note that catch blocks with normal-flow predecessors cannot begin with
-      // a move-exception instruction, as guaranteed by the verifier. However,
-      // trivially dead predecessors are ignored by the verifier and such code
-      // has not been removed at this stage. We therefore ignore the assumption
-      // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628).
-      HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException();
-      if (normal_block == nullptr) {
-        // Catch block is either empty or only contains a move-exception. It must
-        // therefore be dead and will be removed during initial DCE. Do nothing.
-        DCHECK(!catch_block->EndsWithControlFlowInstruction());
-      } else {
-        // Catch block was split. Re-link normal-flow edges to the new block.
-        for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-          if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-            catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
-            --j;
-          }
-        }
-      }
-    }
+  if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) {
+    // Called from DeadBlockElimination. Update SuspendCheck pointer.
+    info->SetSuspendCheck(first_instruction->AsSuspendCheck());
   }
 }
 
@@ -447,10 +370,9 @@
         HBasicBlock* successor = normal_successors[j];
         DCHECK(!successor->IsCatchBlock());
         if (successor == exit_block_) {
-          // Throw->TryBoundary->Exit. Special case which we do not want to split
-          // because Goto->Exit is not allowed.
+          // (Throw/Return/ReturnVoid)->TryBoundary->Exit. Special case which we
+          // do not want to split because Goto->Exit is not allowed.
           DCHECK(block->IsSingleTryBoundary());
-          DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow());
         } else if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
           // SplitCriticalEdge could have invalidated the `normal_successors`
@@ -463,8 +385,10 @@
     }
     if (block->IsLoopHeader()) {
       SimplifyLoop(block);
-    } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
-      // We are being called by the dead code elimination pass, and what used to be
+    } else if (!block->IsEntryBlock() &&
+               block->GetFirstInstruction() != nullptr &&
+               block->GetFirstInstruction()->IsSuspendCheck()) {
+      // We are being called by the dead code elimiation pass, and what used to be
       // a loop got dismantled. Just remove the suspend check.
       block->RemoveInstruction(block->GetFirstInstruction());
     }
@@ -502,12 +426,25 @@
 }
 
 void HGraph::InsertConstant(HConstant* constant) {
-  // New constants are inserted before the final control-flow instruction
-  // of the graph, or at its end if called from the graph builder.
-  if (entry_block_->EndsWithControlFlowInstruction()) {
-    entry_block_->InsertInstructionBefore(constant, entry_block_->GetLastInstruction());
-  } else {
+  // New constants are inserted before the SuspendCheck at the bottom of the
+  // entry block. Note that this method can be called from the graph builder and
+  // the entry block therefore may not end with SuspendCheck->Goto yet.
+  HInstruction* insert_before = nullptr;
+
+  HInstruction* gota = entry_block_->GetLastInstruction();
+  if (gota != nullptr && gota->IsGoto()) {
+    HInstruction* suspend_check = gota->GetPrevious();
+    if (suspend_check != nullptr && suspend_check->IsSuspendCheck()) {
+      insert_before = suspend_check;
+    } else {
+      insert_before = gota;
+    }
+  }
+
+  if (insert_before == nullptr) {
     entry_block_->AddInstruction(constant);
+  } else {
+    entry_block_->InsertInstructionBefore(constant, insert_before);
   }
 }
 
@@ -1404,34 +1341,6 @@
   return new_block;
 }
 
-HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
-  DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only.";
-
-  HInstruction* first_insn = GetFirstInstruction();
-  HInstruction* split_before = nullptr;
-
-  if (first_insn != nullptr && first_insn->IsLoadException()) {
-    // Catch block starts with a LoadException. Split the block after
-    // the StoreLocal and ClearException which must come after the load.
-    DCHECK(first_insn->GetNext()->IsStoreLocal());
-    DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-    split_before = first_insn->GetNext()->GetNext()->GetNext();
-  } else {
-    // Catch block does not load the exception. Split at the beginning
-    // to create an empty catch block.
-    split_before = first_insn;
-  }
-
-  if (split_before == nullptr) {
-    // Catch block has no instructions after the split point (must be dead).
-    // Do not split it but rather signal error by returning nullptr.
-    return nullptr;
-  } else {
-    return SplitBefore(split_before);
-  }
-}
-
 HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) {
   DCHECK_EQ(cursor->GetBlock(), this);
 
@@ -1910,6 +1819,7 @@
 
   RemoveElement(reverse_post_order_, block);
   blocks_[block->GetBlockId()] = nullptr;
+  block->SetGraph(nullptr);
 }
 
 void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
@@ -1962,6 +1872,7 @@
            instr_it.Advance()) {
         HInstruction* current = instr_it.Current();
         if (current->NeedsEnvironment()) {
+          DCHECK(current->HasEnvironment());
           current->GetEnvironment()->SetAndCopyParentChain(
               outer_graph->GetArena(), invoke->GetEnvironment());
         }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 9425ef3..0088fed 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,7 +26,6 @@
 #include "base/arena_object.h"
 #include "base/stl_util.h"
 #include "dex/compiler_enums.h"
-#include "dex_instruction-inl.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -101,6 +100,7 @@
 };
 
 enum GraphAnalysisResult {
+  kAnalysisSkipped,
   kAnalysisInvalidBytecode,
   kAnalysisFailThrowCatchLoop,
   kAnalysisFailAmbiguousArrayOp,
@@ -427,6 +427,10 @@
     number_of_in_vregs_ = value;
   }
 
+  uint16_t GetNumberOfInVRegs() const {
+    return number_of_in_vregs_;
+  }
+
   uint16_t GetNumberOfLocalVRegs() const {
     DCHECK(!in_ssa_form_);
     return number_of_vregs_ - number_of_in_vregs_;
@@ -999,15 +1003,6 @@
   // Similar to `SplitBeforeForInlining` but does it after `cursor`.
   HBasicBlock* SplitAfterForInlining(HInstruction* cursor);
 
-  // Split catch block into two blocks after the original move-exception bytecode
-  // instruction, or at the beginning if not present. Returns the newly created,
-  // latter block, or nullptr if such block could not be created (must be dead
-  // in that case). Note that this method just updates raw block information,
-  // like predecessors, successors, dominators, and instruction list. It does not
-  // update the graph, reverse post order, loop information, nor make sure the
-  // blocks are consistent (for example ending with a control flow instruction).
-  HBasicBlock* SplitCatchBlockAfterMoveException();
-
   // Merge `other` at the end of `this`. Successors and dominated blocks of
   // `other` are changed to be successors and dominated blocks of `this`. Note
   // that this method does not update the graph, reverse post order, loop
@@ -1220,9 +1215,7 @@
   M(LessThanOrEqual, Condition)                                         \
   M(LoadClass, Instruction)                                             \
   M(LoadException, Instruction)                                         \
-  M(LoadLocal, Instruction)                                             \
   M(LoadString, Instruction)                                            \
-  M(Local, Instruction)                                                 \
   M(LongConstant, Constant)                                             \
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
@@ -1253,7 +1246,6 @@
   M(UnresolvedStaticFieldGet, Instruction)                              \
   M(UnresolvedStaticFieldSet, Instruction)                              \
   M(Select, Instruction)                                                \
-  M(StoreLocal, Instruction)                                            \
   M(Sub, BinaryOperation)                                               \
   M(SuspendCheck, Instruction)                                          \
   M(Throw, Instruction)                                                 \
@@ -2392,6 +2384,107 @@
   DISALLOW_COPY_AND_ASSIGN(HReturn);
 };
 
+class HPhi : public HInstruction {
+ public:
+  HPhi(ArenaAllocator* arena,
+       uint32_t reg_number,
+       size_t number_of_inputs,
+       Primitive::Type type,
+       uint32_t dex_pc = kNoDexPc)
+      : HInstruction(SideEffects::None(), dex_pc),
+        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
+        reg_number_(reg_number) {
+    SetPackedField<TypeField>(ToPhiType(type));
+    DCHECK_NE(GetType(), Primitive::kPrimVoid);
+    // Phis are constructed live and marked dead if conflicting or unused.
+    // Individual steps of SsaBuilder should assume that if a phi has been
+    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+    SetPackedFlag<kFlagIsLive>(true);
+    SetPackedFlag<kFlagCanBeNull>(true);
+  }
+
+  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
+  static Primitive::Type ToPhiType(Primitive::Type type) {
+    return Primitive::PrimitiveKind(type);
+  }
+
+  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
+
+  size_t InputCount() const OVERRIDE { return inputs_.size(); }
+
+  void AddInput(HInstruction* input);
+  void RemoveInputAt(size_t index);
+
+  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
+  void SetType(Primitive::Type new_type) {
+    // Make sure that only valid type changes occur. The following are allowed:
+    //  (1) int  -> float/ref (primitive type propagation),
+    //  (2) long -> double (primitive type propagation).
+    DCHECK(GetType() == new_type ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
+           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
+           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
+    SetPackedField<TypeField>(new_type);
+  }
+
+  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
+
+  uint32_t GetRegNumber() const { return reg_number_; }
+
+  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
+  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
+  bool IsDead() const { return !IsLive(); }
+  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
+
+  bool IsVRegEquivalentOf(HInstruction* other) const {
+    return other != nullptr
+        && other->IsPhi()
+        && other->AsPhi()->GetBlock() == GetBlock()
+        && other->AsPhi()->GetRegNumber() == GetRegNumber();
+  }
+
+  // Returns the next equivalent phi (starting from the current one) or null if there is none.
+  // An equivalent phi is a phi having the same dex register and type.
+  // It assumes that phis with the same dex register are adjacent.
+  HPhi* GetNextEquivalentPhiWithSameType() {
+    HInstruction* next = GetNext();
+    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
+      if (next->GetType() == GetType()) {
+        return next->AsPhi();
+      }
+      next = next->GetNext();
+    }
+    return nullptr;
+  }
+
+  DECLARE_INSTRUCTION(Phi);
+
+ protected:
+  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
+    return inputs_[index];
+  }
+
+  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+    inputs_[index] = input;
+  }
+
+ private:
+  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
+  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
+  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
+  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+  ArenaVector<HUserRecord<HInstruction*> > inputs_;
+  const uint32_t reg_number_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPhi);
+};
+
 // The exit instruction is the only instruction of the exit block.
 // Instructions aborting the method (HThrow and HReturn) must branch to the
 // exit block.
@@ -3552,57 +3645,6 @@
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
-// A local in the graph. Corresponds to a Dex register.
-class HLocal : public HTemplateInstruction<0> {
- public:
-  explicit HLocal(uint16_t reg_number)
-      : HTemplateInstruction(SideEffects::None(), kNoDexPc), reg_number_(reg_number) {}
-
-  DECLARE_INSTRUCTION(Local);
-
-  uint16_t GetRegNumber() const { return reg_number_; }
-
- private:
-  // The Dex register number.
-  const uint16_t reg_number_;
-
-  DISALLOW_COPY_AND_ASSIGN(HLocal);
-};
-
-// Load a given local. The local is an input of this instruction.
-class HLoadLocal : public HExpression<1> {
- public:
-  HLoadLocal(HLocal* local, Primitive::Type type, uint32_t dex_pc = kNoDexPc)
-      : HExpression(type, SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(LoadLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HLoadLocal);
-};
-
-// Store a value in a given local. This instruction has two inputs: the value
-// and the local.
-class HStoreLocal : public HTemplateInstruction<2> {
- public:
-  HStoreLocal(HLocal* local, HInstruction* value, uint32_t dex_pc = kNoDexPc)
-      : HTemplateInstruction(SideEffects::None(), dex_pc) {
-    SetRawInputAt(0, local);
-    SetRawInputAt(1, value);
-  }
-
-  HLocal* GetLocal() const { return reinterpret_cast<HLocal*>(InputAt(0)); }
-
-  DECLARE_INSTRUCTION(StoreLocal);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(HStoreLocal);
-};
-
 class HNewInstance : public HExpression<2> {
  public:
   HNewInstance(HInstruction* cls,
@@ -3923,8 +3965,7 @@
                 // potentially one other if the clinit check is explicit, and potentially
                 // one other if the method is a string factory.
                 (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
-                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) +
-                    (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
                 return_type,
                 dex_pc,
                 method_index,
@@ -4052,15 +4093,6 @@
     DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
-  HInstruction* GetAndRemoveThisArgumentOfStringInit() {
-    DCHECK(IsStringInit());
-    size_t index = InputCount() - 1;
-    HInstruction* input = InputAt(index);
-    RemoveAsUserOfInput(index);
-    inputs_.pop_back();
-    return input;
-  }
-
   // Is this a call to a static method whose declaring class has an
   // explicit initialization check in the graph?
   bool IsStaticWithExplicitClinitCheck() const {
@@ -4903,7 +4935,6 @@
                     SideEffectsForArchRuntimeCalls(input->GetType(), result_type),
                     dex_pc) {
     SetRawInputAt(0, input);
-    DCHECK_NE(input->GetType(), result_type);
     // Invariant: We should never generate a conversion to a Boolean value.
     DCHECK_NE(Primitive::kPrimBoolean, result_type);
   }
@@ -4939,115 +4970,6 @@
 
 static constexpr uint32_t kNoRegNumber = -1;
 
-class HPhi : public HInstruction {
- public:
-  HPhi(ArenaAllocator* arena,
-       uint32_t reg_number,
-       size_t number_of_inputs,
-       Primitive::Type type,
-       uint32_t dex_pc = kNoDexPc)
-      : HInstruction(SideEffects::None(), dex_pc),
-        inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
-        reg_number_(reg_number) {
-    SetPackedField<TypeField>(ToPhiType(type));
-    DCHECK_NE(GetType(), Primitive::kPrimVoid);
-    // Phis are constructed live and marked dead if conflicting or unused.
-    // Individual steps of SsaBuilder should assume that if a phi has been
-    // marked dead, it can be ignored and will be removed by SsaPhiElimination.
-    SetPackedFlag<kFlagIsLive>(true);
-    SetPackedFlag<kFlagCanBeNull>(true);
-  }
-
-  // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
-  static Primitive::Type ToPhiType(Primitive::Type type) {
-    switch (type) {
-      case Primitive::kPrimBoolean:
-      case Primitive::kPrimByte:
-      case Primitive::kPrimShort:
-      case Primitive::kPrimChar:
-        return Primitive::kPrimInt;
-      default:
-        return type;
-    }
-  }
-
-  bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
-
-  size_t InputCount() const OVERRIDE { return inputs_.size(); }
-
-  void AddInput(HInstruction* input);
-  void RemoveInputAt(size_t index);
-
-  Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
-  void SetType(Primitive::Type new_type) {
-    // Make sure that only valid type changes occur. The following are allowed:
-    //  (1) int  -> float/ref (primitive type propagation),
-    //  (2) long -> double (primitive type propagation).
-    DCHECK(GetType() == new_type ||
-           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) ||
-           (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) ||
-           (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble));
-    SetPackedField<TypeField>(new_type);
-  }
-
-  bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
-  void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
-
-  uint32_t GetRegNumber() const { return reg_number_; }
-
-  void SetDead() { SetPackedFlag<kFlagIsLive>(false); }
-  void SetLive() { SetPackedFlag<kFlagIsLive>(true); }
-  bool IsDead() const { return !IsLive(); }
-  bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); }
-
-  bool IsVRegEquivalentOf(HInstruction* other) const {
-    return other != nullptr
-        && other->IsPhi()
-        && other->AsPhi()->GetBlock() == GetBlock()
-        && other->AsPhi()->GetRegNumber() == GetRegNumber();
-  }
-
-  // Returns the next equivalent phi (starting from the current one) or null if there is none.
-  // An equivalent phi is a phi having the same dex register and type.
-  // It assumes that phis with the same dex register are adjacent.
-  HPhi* GetNextEquivalentPhiWithSameType() {
-    HInstruction* next = GetNext();
-    while (next != nullptr && next->AsPhi()->GetRegNumber() == reg_number_) {
-      if (next->GetType() == GetType()) {
-        return next->AsPhi();
-      }
-      next = next->GetNext();
-    }
-    return nullptr;
-  }
-
-  DECLARE_INSTRUCTION(Phi);
-
- protected:
-  const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    return inputs_[index];
-  }
-
-  void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    inputs_[index] = input;
-  }
-
- private:
-  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
-  static constexpr size_t kFieldTypeSize =
-      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
-  static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
-  static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
-  static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
-  static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
-  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
-
-  ArenaVector<HUserRecord<HInstruction*> > inputs_;
-  const uint32_t reg_number_;
-
-  DISALLOW_COPY_AND_ASSIGN(HPhi);
-};
-
 class HNullCheck : public HExpression<1> {
  public:
   // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
@@ -5391,7 +5313,7 @@
   // constructor.
   HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
       : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
-    DCHECK(index->GetType() == Primitive::kPrimInt);
+    DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType()));
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
   }
@@ -5415,7 +5337,7 @@
 
 class HSuspendCheck : public HTemplateInstruction<0> {
  public:
-  explicit HSuspendCheck(uint32_t dex_pc)
+  explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
       : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {}
 
   bool NeedsEnvironment() const OVERRIDE {
@@ -5922,7 +5844,7 @@
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
         field_index_(field_index) {
     SetPackedField<FieldTypeField>(field_type);
-    DCHECK_EQ(field_type, value->GetType());
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, obj);
     SetRawInputAt(1, value);
   }
@@ -5982,7 +5904,7 @@
       : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc),
         field_index_(field_index) {
     SetPackedField<FieldTypeField>(field_type);
-    DCHECK_EQ(field_type, value->GetType());
+    DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType()));
     SetRawInputAt(0, value);
   }
 
@@ -6707,74 +6629,6 @@
   FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
-class SwitchTable : public ValueObject {
- public:
-  SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
-      : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
-    int32_t table_offset = instruction.VRegB_31t();
-    const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
-    if (sparse) {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
-    } else {
-      CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
-    }
-    num_entries_ = table[1];
-    values_ = reinterpret_cast<const int32_t*>(&table[2]);
-  }
-
-  uint16_t GetNumEntries() const {
-    return num_entries_;
-  }
-
-  void CheckIndex(size_t index) const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
-    }
-  }
-
-  int32_t GetEntryAt(size_t index) const {
-    CheckIndex(index);
-    return values_[index];
-  }
-
-  uint32_t GetDexPcForIndex(size_t index) const {
-    CheckIndex(index);
-    return dex_pc_ +
-        (reinterpret_cast<const int16_t*>(values_ + index) -
-         reinterpret_cast<const int16_t*>(&instruction_));
-  }
-
-  // Index of the first value in the table.
-  size_t GetFirstValueIndex() const {
-    if (sparse_) {
-      // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
-      return num_entries_;
-    } else {
-      // In a packed table, we have the starting key and num_entries_ values.
-      return 1;
-    }
-  }
-
- private:
-  const Instruction& instruction_;
-  const uint32_t dex_pc_;
-
-  // Whether this is a sparse-switch table (or a packed-switch one).
-  const bool sparse_;
-
-  // This can't be const as it needs to be computed off of the given instruction, and complicated
-  // expressions in the initializer list seemed very ugly.
-  uint16_t num_entries_;
-
-  const int32_t* values_;
-
-  DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
 // Create space in `blocks` for adding `number_of_new_blocks` entries
 // starting at location `at`. Blocks after `at` are moved accordingly.
 inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 886c9e2..3d6bf62 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -727,14 +727,20 @@
                             &dex_compilation_unit,
                             &dex_compilation_unit,
                             &dex_file,
+                            *code_item,
                             compiler_driver,
                             compilation_stats_.get(),
                             interpreter_metadata,
-                            dex_cache);
-      GraphAnalysisResult result = builder.BuildGraph(*code_item, &handles);
+                            dex_cache,
+                            &handles);
+      GraphAnalysisResult result = builder.BuildGraph();
       if (result != kAnalysisSuccess) {
         switch (result) {
+          case kAnalysisSkipped:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+            break;
           case kAnalysisInvalidBytecode:
+            MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
             break;
           case kAnalysisFailThrowCatchLoop:
             MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 3717926..9cc6ea4 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,7 +38,8 @@
   kRemovedCheckedCast,
   kRemovedDeadInstruction,
   kRemovedNullCheck,
-  kNotCompiledBranchOutsideMethodCode,
+  kNotCompiledSkipped,
+  kNotCompiledInvalidBytecode,
   kNotCompiledThrowCatchLoop,
   kNotCompiledAmbiguousArrayOp,
   kNotCompiledHugeMethod,
@@ -115,7 +116,8 @@
       case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
       case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
       case kRemovedNullCheck: name = "RemovedNullCheck"; break;
-      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledSkipped: name = "NotCompiledSkipped"; break;
+      case kNotCompiledInvalidBytecode: name = "NotCompiledInvalidBytecode"; break;
       case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
       case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break;
       case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 0ca7305..dd5cb1c 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -91,8 +91,8 @@
   {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScopeCollection handles(soa.Self());
-    HGraphBuilder builder(graph, return_type);
-    bool graph_built = (builder.BuildGraph(*item, &handles) == kAnalysisSuccess);
+    HGraphBuilder builder(graph, *item, &handles, return_type);
+    bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
     return graph_built ? graph : nullptr;
   }
 }
@@ -109,7 +109,8 @@
   std::string result = original;
   for (const auto& p : diff) {
     std::string::size_type pos = result.find(p.first);
-    EXPECT_NE(pos, std::string::npos);
+    DCHECK_NE(pos, std::string::npos)
+        << "Could not find: \"" << p.first << "\" in \"" << result << "\"";
     result.replace(pos, p.first.size(), p.second);
   }
   return result;
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index d5b95d2..951cdfb 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -44,27 +44,27 @@
 
   const char* expected =
       "BasicBlock 0, succ: 1\n"
-      "  2: SuspendCheck\n"
-      "  3: Goto 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
       "BasicBlock 1, pred: 0, succ: 2\n"
-      "  0: ReturnVoid\n"
+      "  2: ReturnVoid\n"
       "BasicBlock 2, pred: 1\n"
-      "  1: Exit\n";
+      "  3: Exit\n";
 
   TestCode(data, expected);
 }
 
 TEST_F(PrettyPrinterTest, CFG1) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  0: Goto 2\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  1: ReturnVoid\n"
-    "BasicBlock 3, pred: 2\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Goto 2\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  3: ReturnVoid\n"
+      "BasicBlock 3, pred: 2\n"
+      "  4: Exit\n";
 
   const uint16_t data[] =
     ZERO_REGISTER_CODE_ITEM(
@@ -76,17 +76,17 @@
 
 TEST_F(PrettyPrinterTest, CFG2) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  4: SuspendCheck\n"
-    "  5: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  0: Goto 2\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  1: Goto 3\n"
-    "BasicBlock 3, pred: 2, succ: 4\n"
-    "  2: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  3: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  2: Goto 2\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  3: Goto 3\n"
+      "BasicBlock 3, pred: 2, succ: 4\n"
+      "  4: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  5: Exit\n";
 
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x100,
@@ -98,17 +98,17 @@
 
 TEST_F(PrettyPrinterTest, CFG3) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  4: SuspendCheck\n"
-    "  5: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  0: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  1: ReturnVoid\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  2: Goto 2\n"
-    "BasicBlock 4, pred: 2\n"
-    "  3: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  2: Goto 3\n"
+      "BasicBlock 2, pred: 3, succ: 4\n"
+      "  4: ReturnVoid\n"
+      "BasicBlock 3, pred: 1, succ: 2\n"
+      "  3: Goto 2\n"
+      "BasicBlock 4, pred: 2\n"
+      "  5: Exit\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::GOTO | 0x200,
@@ -134,14 +134,14 @@
 
 TEST_F(PrettyPrinterTest, CFG4) {
   const char* expected =
-    "BasicBlock 0, succ: 3\n"
-    "  2: SuspendCheck\n"
-    "  3: Goto 3\n"
-    "BasicBlock 1, pred: 3, 1, succ: 1\n"
-    "  5: SuspendCheck\n"
-    "  0: Goto 1\n"
-    "BasicBlock 3, pred: 0, succ: 1\n"
-    "  4: Goto 1\n";
+      "BasicBlock 0, succ: 3\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 3\n"
+      "BasicBlock 1, pred: 3, 1, succ: 1\n"
+      "  3: SuspendCheck\n"
+      "  4: Goto 1\n"
+      "BasicBlock 3, pred: 0, succ: 1\n"
+      "  0: Goto 1\n";
 
   const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::NOP,
@@ -157,13 +157,13 @@
 
 TEST_F(PrettyPrinterTest, CFG5) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  3: SuspendCheck\n"
-    "  4: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 3\n"
-    "  0: ReturnVoid\n"
-    "BasicBlock 3, pred: 1\n"
-    "  2: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 3\n"
+      "  2: ReturnVoid\n"
+      "BasicBlock 3, pred: 1\n"
+      "  3: Exit\n";
 
   const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
     Instruction::RETURN_VOID,
@@ -175,21 +175,21 @@
 
 TEST_F(PrettyPrinterTest, CFG6) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  1: IntConstant [5, 5]\n"
-    "  10: SuspendCheck\n"
-    "  11: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 2\n"
-    "  5: Equal(1, 1) [6]\n"
-    "  6: If(5)\n"
-    "BasicBlock 2, pred: 1, succ: 3\n"
-    "  7: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 4\n"
-    "  8: ReturnVoid\n"
-    "BasicBlock 4, pred: 3\n"
-    "  9: Exit\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  12: Goto 3\n";
+      "BasicBlock 0, succ: 1\n"
+      "  3: IntConstant [4, 4]\n"
+      "  1: SuspendCheck\n"
+      "  2: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 2\n"
+      "  4: Equal(3, 3) [5]\n"
+      "  5: If(4)\n"
+      "BasicBlock 2, pred: 1, succ: 3\n"
+      "  6: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 4\n"
+      "  7: ReturnVoid\n"
+      "BasicBlock 4, pred: 3\n"
+      "  8: Exit\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -202,22 +202,22 @@
 
 TEST_F(PrettyPrinterTest, CFG7) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  1: IntConstant [5, 5]\n"
-    "  10: SuspendCheck\n"
-    "  11: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 5, 6\n"
-    "  5: Equal(1, 1) [6]\n"
-    "  6: If(5)\n"
-    "BasicBlock 2, pred: 6, 3, succ: 3\n"
-    "  7: Goto 3\n"
-    "BasicBlock 3, pred: 5, 2, succ: 2\n"
-    "  14: SuspendCheck\n"
-    "  8: Goto 2\n"
-    "BasicBlock 5, pred: 1, succ: 3\n"
-    "  12: Goto 3\n"
-    "BasicBlock 6, pred: 1, succ: 2\n"
-    "  13: Goto 2\n";
+      "BasicBlock 0, succ: 1\n"
+      "  4: IntConstant [5, 5]\n"
+      "  2: SuspendCheck\n"
+      "  3: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 5, 6\n"
+      "  5: Equal(4, 4) [6]\n"
+      "  6: If(5)\n"
+      "BasicBlock 2, pred: 6, 3, succ: 3\n"
+      "  11: Goto 3\n"
+      "BasicBlock 3, pred: 5, 2, succ: 2\n"
+      "  8: SuspendCheck\n"
+      "  9: Goto 2\n"
+      "BasicBlock 5, pred: 1, succ: 3\n"
+      "  0: Goto 3\n"
+      "BasicBlock 6, pred: 1, succ: 2\n"
+      "  1: Goto 2\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
@@ -230,14 +230,14 @@
 
 TEST_F(PrettyPrinterTest, IntConstant) {
   const char* expected =
-    "BasicBlock 0, succ: 1\n"
-    "  1: IntConstant\n"
-    "  5: SuspendCheck\n"
-    "  6: Goto 1\n"
-    "BasicBlock 1, pred: 0, succ: 2\n"
-    "  3: ReturnVoid\n"
-    "BasicBlock 2, pred: 1\n"
-    "  4: Exit\n";
+      "BasicBlock 0, succ: 1\n"
+      "  2: IntConstant\n"
+      "  0: SuspendCheck\n"
+      "  1: Goto 1\n"
+      "BasicBlock 1, pred: 0, succ: 2\n"
+      "  3: ReturnVoid\n"
+      "BasicBlock 2, pred: 1\n"
+      "  4: Exit\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 294d00f..eeadbeb 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -16,36 +16,16 @@
 
 #include "ssa_builder.h"
 
+#include "bytecode_utils.h"
 #include "nodes.h"
 #include "reference_type_propagation.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
 
-void SsaBuilder::SetLoopHeaderPhiInputs() {
-  for (size_t i = loop_headers_.size(); i > 0; --i) {
-    HBasicBlock* block = loop_headers_[i - 1];
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      size_t vreg = phi->GetRegNumber();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* value = ValueOfLocal(predecessor, vreg);
-        if (value == nullptr) {
-          // Vreg is undefined at this predecessor. Mark it dead and leave with
-          // fewer inputs than predecessors. SsaChecker will fail if not removed.
-          phi->SetDead();
-          break;
-        } else {
-          phi->AddInput(value);
-        }
-      }
-    }
-  }
-}
-
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* equality_instr = it.Current();
       if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
@@ -70,14 +50,14 @@
       // can only be the 0 constant.
       DCHECK(int_operand->IsIntConstant()) << int_operand->DebugName();
       DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue());
-      equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0);
+      equality_instr->ReplaceInput(graph_->GetNullConstant(), int_operand == right ? 1 : 0);
     }
   }
 }
 
 void SsaBuilder::EquivalentPhisCleanup() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
+  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
     for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
@@ -99,7 +79,7 @@
 }
 
 void SsaBuilder::FixEnvironmentPhis() {
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
       HPhi* phi = it_phis.Current()->AsPhi();
@@ -253,9 +233,9 @@
 }
 
 void SsaBuilder::RunPrimitiveTypePropagation() {
-  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter());
 
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     if (block->IsLoopHeader()) {
       for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
@@ -299,8 +279,14 @@
 static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
   Primitive::Type type = aget->GetType();
   DCHECK(Primitive::IsIntOrLongType(type));
-  HArrayGet* next = aget->GetNext()->AsArrayGet();
-  return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr;
+  HInstruction* next = aget->GetNext();
+  if (next != nullptr && next->IsArrayGet()) {
+    HArrayGet* next_aget = next->AsArrayGet();
+    if (next_aget->IsEquivalentOf(aget)) {
+      return next_aget;
+    }
+  }
+  return nullptr;
 }
 
 static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) {
@@ -333,7 +319,7 @@
   // uses (because they are untyped) and environment uses (if --debuggable).
   // After resolving all ambiguous ArrayGets, we will re-run primitive type
   // propagation on the Phis which need to be updated.
-  ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter());
+  ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter());
 
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -451,7 +437,7 @@
 }
 
 void SsaBuilder::RemoveRedundantUninitializedStrings() {
-  if (GetGraph()->IsDebuggable()) {
+  if (graph_->IsDebuggable()) {
     // Do not perform the optimization for consistency with the interpreter
     // which always allocates an object for new-instance of String.
     return;
@@ -459,11 +445,13 @@
 
   for (HNewInstance* new_instance : uninitialized_strings_) {
     DCHECK(new_instance->IsInBlock());
+    DCHECK(new_instance->IsStringAlloc());
+
     // Replace NewInstance of String with NullConstant if not used prior to
     // calling StringFactory. In case of deoptimization, the interpreter is
     // expected to skip null check on the `this` argument of the StringFactory call.
     if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
-      new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+      new_instance->ReplaceWith(graph_->GetNullConstant());
       new_instance->GetBlock()->RemoveInstruction(new_instance);
 
       // Remove LoadClass if not needed any more.
@@ -494,57 +482,47 @@
 }
 
 GraphAnalysisResult SsaBuilder::BuildSsa() {
-  DCHECK(!GetGraph()->IsInSsaForm());
+  DCHECK(!graph_->IsInSsaForm());
 
-  // 1) Visit in reverse post order. We need to have all predecessors of a block
-  // visited (with the exception of loops) in order to create the right environment
-  // for that block. For loops, we create phis whose inputs will be set in 2).
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-
-  // 2) Set inputs of loop header phis.
-  SetLoopHeaderPhiInputs();
-
-  // 3) Propagate types of phis. At this point, phis are typed void in the general
+  // 1) Propagate types of phis. At this point, phis are typed void in the general
   // case, or float/double/reference if we created an equivalent phi. So we need
   // to propagate the types across phis to give them a correct type. If a type
   // conflict is detected in this stage, the phi is marked dead.
   RunPrimitiveTypePropagation();
 
-  // 4) Now that the correct primitive types have been assigned, we can get rid
+  // 2) Now that the correct primitive types have been assigned, we can get rid
   // of redundant phis. Note that we cannot do this phase before type propagation,
   // otherwise we could get rid of phi equivalents, whose presence is a requirement
   // for the type propagation phase. Note that this is to satisfy statement (a)
   // of the SsaBuilder (see ssa_builder.h).
-  SsaRedundantPhiElimination(GetGraph()).Run();
+  SsaRedundantPhiElimination(graph_).Run();
 
-  // 5) Fix the type for null constants which are part of an equality comparison.
+  // 3) Fix the type for null constants which are part of an equality comparison.
   // We need to do this after redundant phi elimination, to ensure the only cases
   // that we can see are reference comparison against 0. The redundant phi
   // elimination ensures we do not see a phi taking two 0 constants in a HEqual
   // or HNotEqual.
   FixNullConstantType();
 
-  // 6) Compute type of reference type instructions. The pass assumes that
+  // 4) Compute type of reference type instructions. The pass assumes that
   // NullConstant has been fixed up.
-  ReferenceTypePropagation(GetGraph(), handles_, /* is_first_run */ true).Run();
+  ReferenceTypePropagation(graph_, handles_, /* is_first_run */ true).Run();
 
-  // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float
-  // or long/double) and marked ArraySets with ambiguous input type. Now that RTP
-  // computed the type of the array input, the ambiguity can be resolved and the
-  // correct equivalents kept.
+  // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
+  // (int/float or long/double) and marked ArraySets with ambiguous input type.
+  // Now that RTP computed the type of the array input, the ambiguity can be
+  // resolved and the correct equivalents kept.
   if (!FixAmbiguousArrayOps()) {
     return kAnalysisFailAmbiguousArrayOp;
   }
 
-  // 8) Mark dead phis. This will mark phis which are not used by instructions
+  // 6) Mark dead phis. This will mark phis which are not used by instructions
   // or other live phis. If compiling as debuggable code, phis will also be kept
   // live if they have an environment use.
-  SsaDeadPhiElimination dead_phi_elimimation(GetGraph());
+  SsaDeadPhiElimination dead_phi_elimimation(graph_);
   dead_phi_elimimation.MarkDeadPhis();
 
-  // 9) Make sure environments use the right phi equivalent: a phi marked dead
+  // 7) Make sure environments use the right phi equivalent: a phi marked dead
   // can have a phi equivalent that is not dead. In that case we have to replace
   // it with the live equivalent because deoptimization and try/catch rely on
   // environments containing values of all live vregs at that point. Note that
@@ -553,166 +531,26 @@
   // environments to just reference one.
   FixEnvironmentPhis();
 
-  // 10) Now that the right phis are used for the environments, we can eliminate
+  // 8) Now that the right phis are used for the environments, we can eliminate
   // phis we do not need. Regardless of the debuggable status, this phase is
   /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
   // as for the code generation, which does not deal with phis of conflicting
   // input types.
   dead_phi_elimimation.EliminateDeadPhis();
 
-  // 11) Step 1) replaced uses of NewInstances of String with the results of
-  // their corresponding StringFactory calls. Unless the String objects are used
-  // before they are initialized, they can be replaced with NullConstant.
-  // Note that this optimization is valid only if unsimplified code does not use
-  // the uninitialized value because we assume execution can be deoptimized at
-  // any safepoint. We must therefore perform it before any other optimizations.
+  // 9) HInstructionBuidler replaced uses of NewInstances of String with the
+  // results of their corresponding StringFactory calls. Unless the String
+  // objects are used before they are initialized, they can be replaced with
+  // NullConstant. Note that this optimization is valid only if unsimplified
+  // code does not use the uninitialized value because we assume execution can
+  // be deoptimized at any safepoint. We must therefore perform it before any
+  // other optimizations.
   RemoveRedundantUninitializedStrings();
 
-  // 12) Clear locals.
-  for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
-       !it.Done();
-       it.Advance()) {
-    HInstruction* current = it.Current();
-    if (current->IsLocal()) {
-      current->GetBlock()->RemoveInstruction(current);
-    }
-  }
-
-  GetGraph()->SetInSsaForm();
+  graph_->SetInSsaForm();
   return kAnalysisSuccess;
 }
 
-ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
-  ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
-  const size_t vregs = GetGraph()->GetNumberOfVRegs();
-  if (locals->empty() && vregs != 0u) {
-    locals->resize(vregs, nullptr);
-
-    if (block->IsCatchBlock()) {
-      ArenaAllocator* arena = GetGraph()->GetArena();
-      // We record incoming inputs of catch phis at throwing instructions and
-      // must therefore eagerly create the phis. Phis for undefined vregs will
-      // be deleted when the first throwing instruction with the vreg undefined
-      // is encountered. Unused phis will be removed by dead phi analysis.
-      for (size_t i = 0; i < vregs; ++i) {
-        // No point in creating the catch phi if it is already undefined at
-        // the first throwing instruction.
-        HInstruction* current_local_value = (*current_locals_)[i];
-        if (current_local_value != nullptr) {
-          HPhi* phi = new (arena) HPhi(
-              arena,
-              i,
-              0,
-              current_local_value->GetType());
-          block->AddPhi(phi);
-          (*locals)[i] = phi;
-        }
-      }
-    }
-  }
-  return locals;
-}
-
-HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
-  ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
-  return (*locals)[local];
-}
-
-void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
-  current_locals_ = GetLocalsFor(block);
-
-  if (block->IsCatchBlock()) {
-    // Catch phis were already created and inputs collected from throwing sites.
-    if (kIsDebugBuild) {
-      // Make sure there was at least one throwing instruction which initialized
-      // locals (guaranteed by HGraphBuilder) and that all try blocks have been
-      // visited already (from HTryBoundary scoping and reverse post order).
-      bool throwing_instruction_found = false;
-      bool catch_block_visited = false;
-      for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-        HBasicBlock* current = it.Current();
-        if (current == block) {
-          catch_block_visited = true;
-        } else if (current->IsTryBlock() &&
-                   current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) {
-          DCHECK(!catch_block_visited) << "Catch block visited before its try block.";
-          throwing_instruction_found |= current->HasThrowingInstructions();
-        }
-      }
-      DCHECK(throwing_instruction_found) << "No instructions throwing into a live catch block.";
-    }
-  } else if (block->IsLoopHeader()) {
-    // If the block is a loop header, we know we only have visited the pre header
-    // because we are visiting in reverse post order. We create phis for all initialized
-    // locals from the pre header. Their inputs will be populated at the end of
-    // the analysis.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
-      if (incoming != nullptr) {
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(),
-            local,
-            0,
-            incoming->GetType());
-        block->AddPhi(phi);
-        (*current_locals_)[local] = phi;
-      }
-    }
-    // Save the loop header so that the last phase of the analysis knows which
-    // blocks need to be updated.
-    loop_headers_.push_back(block);
-  } else if (block->GetPredecessors().size() > 0) {
-    // All predecessors have already been visited because we are visiting in reverse post order.
-    // We merge the values of all locals, creating phis if those values differ.
-    for (size_t local = 0; local < current_locals_->size(); ++local) {
-      bool one_predecessor_has_no_value = false;
-      bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessors()[0], local);
-
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* current = ValueOfLocal(predecessor, local);
-        if (current == nullptr) {
-          one_predecessor_has_no_value = true;
-          break;
-        } else if (current != value) {
-          is_different = true;
-        }
-      }
-
-      if (one_predecessor_has_no_value) {
-        // If one predecessor has no value for this local, we trust the verifier has
-        // successfully checked that there is a store dominating any read after this block.
-        continue;
-      }
-
-      if (is_different) {
-        HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
-        HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(),
-            local,
-            block->GetPredecessors().size(),
-            first_input->GetType());
-        for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
-          phi->SetRawInputAt(i, pred_value);
-        }
-        block->AddPhi(phi);
-        value = phi;
-      }
-      (*current_locals_)[local] = value;
-    }
-  }
-
-  // Visit all instructions. The instructions of interest are:
-  // - HLoadLocal: replace them with the current value of the local.
-  // - HStoreLocal: update current value of the local and remove the instruction.
-  // - Instructions that require an environment: populate their environment
-  //   with the current values of the locals.
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    it.Current()->Accept(this);
-  }
-}
-
 /**
  * Constants in the Dex format are not typed. So the builder types them as
  * integers, but when doing the SSA form, we might realize the constant
@@ -723,11 +561,10 @@
   // We place the floating point constant next to this constant.
   HFloatConstant* result = constant->GetNext()->AsFloatConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
+    float value = bit_cast<float, int32_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HFloatConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheFloatConstant(result);
+    graph_->CacheFloatConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -746,11 +583,10 @@
   // We place the floating point constant next to this constant.
   HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
   if (result == nullptr) {
-    HGraph* graph = constant->GetBlock()->GetGraph();
-    ArenaAllocator* allocator = graph->GetArena();
-    result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
+    double value = bit_cast<double, int64_t>(constant->GetValue());
+    result = new (graph_->GetArena()) HDoubleConstant(value);
     constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
-    graph->CacheDoubleConstant(result);
+    graph_->CacheDoubleConstant(result);
   } else {
     // If there is already a constant with the expected type, we know it is
     // the floating point equivalent of this constant.
@@ -781,7 +617,7 @@
   if (next == nullptr
       || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
       || (next->GetType() != type)) {
-    ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
+    ArenaAllocator* allocator = graph_->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
       // Copy the inputs. Note that the graph may not be correctly typed
@@ -841,7 +677,7 @@
 
 HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
   if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) {
-    return value->GetBlock()->GetGraph()->GetNullConstant();
+    return graph_->GetNullConstant();
   } else if (value->IsPhi()) {
     return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
   } else {
@@ -849,150 +685,4 @@
   }
 }
 
-void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  Primitive::Type load_type = load->GetType();
-  HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
-  // If the operation requests a specific type, we make sure its input is of that type.
-  if (load_type != value->GetType()) {
-    if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) {
-      value = GetFloatOrDoubleEquivalent(value, load_type);
-    } else if (load_type == Primitive::kPrimNot) {
-      value = GetReferenceTypeEquivalent(value);
-    }
-  }
-
-  load->ReplaceWith(value);
-  load->GetBlock()->RemoveInstruction(load);
-}
-
-void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  uint32_t reg_number = store->GetLocal()->GetRegNumber();
-  HInstruction* stored_value = store->InputAt(1);
-  Primitive::Type stored_type = stored_value->GetType();
-  DCHECK_NE(stored_type, Primitive::kPrimVoid);
-
-  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
-  // registers. Consider the following cases:
-  // (1) Storing a wide value must overwrite previous values in both `reg_number`
-  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
-  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
-  //     must invalidate it. We store `nullptr` in `reg_number-1`.
-  // Consequently, storing a wide value into the high vreg of another wide value
-  // will invalidate both `reg_number-1` and `reg_number+1`.
-
-  if (reg_number != 0) {
-    HInstruction* local_low = (*current_locals_)[reg_number - 1];
-    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
-      // The vreg we are storing into was previously the high vreg of a pair.
-      // We need to invalidate its low vreg.
-      DCHECK((*current_locals_)[reg_number] == nullptr);
-      (*current_locals_)[reg_number - 1] = nullptr;
-    }
-  }
-
-  (*current_locals_)[reg_number] = stored_value;
-  if (Primitive::Is64BitType(stored_type)) {
-    // We are storing a pair. Invalidate the instruction in the high vreg.
-    (*current_locals_)[reg_number + 1] = nullptr;
-  }
-
-  store->GetBlock()->RemoveInstruction(store);
-}
-
-void SsaBuilder::VisitInstruction(HInstruction* instruction) {
-  if (instruction->NeedsEnvironment()) {
-    HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
-        GetGraph()->GetArena(),
-        current_locals_->size(),
-        GetGraph()->GetDexFile(),
-        GetGraph()->GetMethodIdx(),
-        instruction->GetDexPc(),
-        GetGraph()->GetInvokeType(),
-        instruction);
-    environment->CopyFrom(*current_locals_);
-    instruction->SetRawEnvironment(environment);
-  }
-
-  // If in a try block, propagate values of locals into catch blocks.
-  if (instruction->CanThrowIntoCatchBlock()) {
-    const HTryBoundary& try_entry =
-        instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
-    for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
-      ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
-      DCHECK_EQ(handler_locals->size(), current_locals_->size());
-      for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-        HInstruction* handler_value = (*handler_locals)[vreg];
-        if (handler_value == nullptr) {
-          // Vreg was undefined at a previously encountered throwing instruction
-          // and the catch phi was deleted. Do not record the local value.
-          continue;
-        }
-        DCHECK(handler_value->IsPhi());
-
-        HInstruction* local_value = (*current_locals_)[vreg];
-        if (local_value == nullptr) {
-          // This is the first instruction throwing into `catch_block` where
-          // `vreg` is undefined. Delete the catch phi.
-          catch_block->RemovePhi(handler_value->AsPhi());
-          (*handler_locals)[vreg] = nullptr;
-        } else {
-          // Vreg has been defined at all instructions throwing into `catch_block`
-          // encountered so far. Record the local value in the catch phi.
-          handler_value->AsPhi()->AddInput(local_value);
-        }
-      }
-    }
-  }
-}
-
-void SsaBuilder::VisitArrayGet(HArrayGet* aget) {
-  Primitive::Type type = aget->GetType();
-  DCHECK(!Primitive::IsFloatingPointType(type));
-  if (Primitive::IsIntOrLongType(type)) {
-    ambiguous_agets_.push_back(aget);
-  }
-  VisitInstruction(aget);
-}
-
-void SsaBuilder::VisitArraySet(HArraySet* aset) {
-  Primitive::Type type = aset->GetValue()->GetType();
-  if (Primitive::IsIntOrLongType(type)) {
-    ambiguous_asets_.push_back(aset);
-  }
-  VisitInstruction(aset);
-}
-
-void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  VisitInstruction(invoke);
-
-  if (invoke->IsStringInit()) {
-    // This is a StringFactory call which acts as a String constructor. Its
-    // result replaces the empty String pre-allocated by NewInstance.
-    HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
-
-    // Replacing the NewInstance might render it redundant. Keep a list of these
-    // to be visited once it is clear whether it is has remaining uses.
-    if (arg_this->IsNewInstance()) {
-      HNewInstance* new_instance = arg_this->AsNewInstance();
-      // Note that in some rare cases (b/27847265), the same NewInstance may be seen
-      // multiple times. We should only consider it once for removal, so we
-      // ensure it is not added more than once.
-      if (!ContainsElement(uninitialized_strings_, new_instance)) {
-        uninitialized_strings_.push_back(new_instance);
-      }
-    } else {
-      DCHECK(arg_this->IsPhi());
-      // NewInstance is not the direct input of the StringFactory call. It might
-      // be redundant but optimizing this case is not worth the effort.
-    }
-
-    // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
-    for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
-      if ((*current_locals_)[vreg] == arg_this) {
-        (*current_locals_)[vreg] = invoke;
-      }
-    }
-  }
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2dae9c2..c37c28c 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -23,8 +23,6 @@
 
 namespace art {
 
-static constexpr int kDefaultNumberOfLoops = 2;
-
 /**
  * Transforms a graph into SSA form. The liveness guarantees of
  * this transformation are listed below. A DEX register
@@ -47,37 +45,48 @@
  *     is not set, values of Dex registers only used by environments
  *     are killed.
  */
-class SsaBuilder : public HGraphVisitor {
+class SsaBuilder : public ValueObject {
  public:
   SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles)
-      : HGraphVisitor(graph),
+      : graph_(graph),
         handles_(handles),
         agets_fixed_(false),
-        current_locals_(nullptr),
-        loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-        locals_for_(graph->GetBlocks().size(),
-                    ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
-                    graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
-    loop_headers_.reserve(kDefaultNumberOfLoops);
+        ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)),
+        uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
+    graph_->InitializeInexactObjectRTI(handles);
   }
 
   GraphAnalysisResult BuildSsa();
 
-  // Returns locals vector for `block`. If it is a catch block, the vector will be
-  // prepopulated with catch phis for vregs which are defined in `current_locals_`.
-  ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
-  HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
+  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
+  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
 
-  void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
-  void VisitLoadLocal(HLoadLocal* load) OVERRIDE;
-  void VisitStoreLocal(HStoreLocal* store) OVERRIDE;
-  void VisitInstruction(HInstruction* instruction) OVERRIDE;
-  void VisitArrayGet(HArrayGet* aget) OVERRIDE;
-  void VisitArraySet(HArraySet* aset) OVERRIDE;
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void MaybeAddAmbiguousArrayGet(HArrayGet* aget) {
+    Primitive::Type type = aget->GetType();
+    DCHECK(!Primitive::IsFloatingPointType(type));
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_agets_.push_back(aget);
+    }
+  }
+
+  void MaybeAddAmbiguousArraySet(HArraySet* aset) {
+    Primitive::Type type = aset->GetValue()->GetType();
+    if (Primitive::IsIntOrLongType(type)) {
+      ambiguous_asets_.push_back(aset);
+    }
+  }
+
+  void AddUninitializedString(HNewInstance* string) {
+    // In some rare cases (b/27847265), the same NewInstance may be seen
+    // multiple times. We should only consider it once for removal, so we
+    // ensure it is not added more than once.
+    // Note that we cannot check whether this really is a NewInstance of String
+    // before RTP. We DCHECK that in RemoveRedundantUninitializedStrings.
+    if (!ContainsElement(uninitialized_strings_, string)) {
+      uninitialized_strings_.push_back(string);
+    }
+  }
 
  private:
   void SetLoopHeaderPhiInputs();
@@ -95,9 +104,6 @@
   bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist);
   void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist);
 
-  HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type);
-  HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
-
   HFloatConstant* GetFloatEquivalent(HIntConstant* constant);
   HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant);
   HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
@@ -105,25 +111,16 @@
 
   void RemoveRedundantUninitializedStrings();
 
+  HGraph* graph_;
   StackHandleScopeCollection* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
   bool agets_fixed_;
 
-  // Locals for the current block being visited.
-  ArenaVector<HInstruction*>* current_locals_;
-
-  // Keep track of loop headers found. The last phase of the analysis iterates
-  // over these blocks to set the inputs of their phis.
-  ArenaVector<HBasicBlock*> loop_headers_;
-
   ArenaVector<HArrayGet*> ambiguous_agets_;
   ArenaVector<HArraySet*> ambiguous_asets_;
   ArenaVector<HNewInstance*> uninitialized_strings_;
 
-  // HEnvironment for each block.
-  ArenaVector<ArenaVector<HInstruction*>> locals_for_;
-
   DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
 };
 
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index a688092..218bd53 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -163,8 +163,8 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [8]\n"
-    "  2: IntConstant 5 [8]\n"
+    "  1: IntConstant 5 [8]\n"
+    "  2: IntConstant 4 [8]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -174,7 +174,7 @@
     "BasicBlock 3, pred: 1, succ: 4\n"
     "  7: Goto\n"
     "BasicBlock 4, pred: 2, 3, succ: 5\n"
-    "  8: Phi(1, 2) [9]\n"
+    "  8: Phi(2, 1) [9]\n"
     "  9: Return(8)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
@@ -258,19 +258,19 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [5]\n"
-    "  1: IntConstant 4 [5]\n"
-    "  2: IntConstant 5 [9]\n"
+    "  1: IntConstant 5 [9]\n"
+    "  2: IntConstant 4 [5]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 2\n"
     "  4: Goto\n"
     "BasicBlock 2, pred: 1, 3, succ: 4, 3\n"
-    "  5: Phi(0, 1) [6, 6]\n"
+    "  5: Phi(0, 2) [6, 6]\n"
     "  6: Equal(5, 5) [7]\n"
     "  7: If(6)\n"
     "BasicBlock 3, pred: 2, succ: 2\n"
     "  8: Goto\n"
     "BasicBlock 4, pred: 2, succ: 5\n"
-    "  9: Return(2)\n"
+    "  9: Return(1)\n"
     "BasicBlock 5, pred: 4\n"
     "  10: Exit\n";
 
@@ -326,8 +326,8 @@
   const char* expected =
     "BasicBlock 0, succ: 1\n"
     "  0: IntConstant 0 [4, 4]\n"
-    "  1: IntConstant 4 [13]\n"
-    "  2: IntConstant 5 [13]\n"
+    "  1: IntConstant 5 [13]\n"
+    "  2: IntConstant 4 [13]\n"
     "  3: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  4: Equal(0, 0) [5]\n"
@@ -346,7 +346,7 @@
     "BasicBlock 7, pred: 6\n"
     "  12: Exit\n"
     "BasicBlock 8, pred: 2, 3, succ: 4\n"
-    "  13: Phi(1, 2) [8, 8, 11]\n"
+    "  13: Phi(2, 1) [8, 8, 11]\n"
     "  14: Goto\n";
 
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -496,7 +496,7 @@
   // does not update the local.
   const char* expected =
     "BasicBlock 0, succ: 1\n"
-    "  0: IntConstant 0 [4, 8, 6, 6, 2, 2, 8, 4]\n"
+    "  0: IntConstant 0 [4, 4, 8, 8, 6, 6, 2, 2]\n"
     "  1: Goto\n"
     "BasicBlock 1, pred: 0, succ: 3, 2\n"
     "  2: Equal(0, 0) [3]\n"
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c571312..11a254e 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -137,8 +137,9 @@
 
 size_t StackMapStream::PrepareForFillIn() {
   int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
-  inline_info_size_ = ComputeInlineInfoSize();
   dex_register_maps_size_ = ComputeDexRegisterMapsSize();
+  ComputeInlineInfoEncoding();  // needs dex_register_maps_size_.
+  inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
   uint32_t max_native_pc_offset = ComputeMaxNativePcOffset();
   size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset,
                                                            dex_pc_max_,
@@ -158,9 +159,10 @@
   // Prepare the CodeInfo variable-sized encoding.
   CodeInfoEncoding code_info_encoding;
   code_info_encoding.non_header_size = non_header_size;
-  code_info_encoding.stack_map_encoding = stack_map_encoding_;
   code_info_encoding.number_of_stack_maps = stack_maps_.size();
   code_info_encoding.stack_map_size_in_bytes = stack_map_size;
+  code_info_encoding.stack_map_encoding = stack_map_encoding_;
+  code_info_encoding.inline_info_encoding = inline_info_encoding_;
   code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
   code_info_encoding.Compress(&code_info_encoding_);
 
@@ -224,10 +226,26 @@
   return size;
 }
 
-size_t StackMapStream::ComputeInlineInfoSize() const {
-  return inline_infos_.size() * InlineInfo::SingleEntrySize()
-    // For encoding the depth.
-    + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
+void StackMapStream::ComputeInlineInfoEncoding() {
+  uint32_t method_index_max = 0;
+  uint32_t dex_pc_max = 0;
+  uint32_t invoke_type_max = 0;
+
+  uint32_t inline_info_index = 0;
+  for (const StackMapEntry& entry : stack_maps_) {
+    for (size_t j = 0; j < entry.inlining_depth; ++j) {
+      InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
+      method_index_max = std::max(method_index_max, inline_entry.method_index);
+      dex_pc_max = std::max(dex_pc_max, inline_entry.dex_pc);
+      invoke_type_max = std::max(invoke_type_max, static_cast<uint32_t>(inline_entry.invoke_type));
+    }
+  }
+  DCHECK_EQ(inline_info_index, inline_infos_.size());
+
+  inline_info_encoding_.SetFromSizes(method_index_max,
+                                     dex_pc_max,
+                                     invoke_type_max,
+                                     dex_register_maps_size_);
 }
 
 void StackMapStream::FillIn(MemoryRegion region) {
@@ -321,7 +339,7 @@
     if (entry.inlining_depth != 0) {
       MemoryRegion inline_region = inline_infos_region.Subregion(
           next_inline_info_offset,
-          InlineInfo::kFixedSize + entry.inlining_depth * InlineInfo::SingleEntrySize());
+          entry.inlining_depth * inline_info_encoding_.GetEntrySize());
       next_inline_info_offset += inline_region.size();
       InlineInfo inline_info(inline_region);
 
@@ -329,16 +347,18 @@
       stack_map.SetInlineDescriptorOffset(
           stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
 
-      inline_info.SetDepth(entry.inlining_depth);
+      inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
       DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
         InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
-        inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index);
-        inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc);
-        inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type);
+        inline_info.SetMethodIndexAtDepth(inline_info_encoding_, depth, inline_entry.method_index);
+        inline_info.SetDexPcAtDepth(inline_info_encoding_, depth, inline_entry.dex_pc);
+        inline_info.SetInvokeTypeAtDepth(inline_info_encoding_, depth, inline_entry.invoke_type);
         if (inline_entry.num_dex_registers == 0) {
           // No dex map available.
-          inline_info.SetDexRegisterMapOffsetAtDepth(depth, StackMap::kNoDexRegisterMap);
+          inline_info.SetDexRegisterMapOffsetAtDepth(inline_info_encoding_,
+                                                     depth,
+                                                     StackMap::kNoDexRegisterMap);
           DCHECK(inline_entry.live_dex_registers_mask == nullptr);
         } else {
           MemoryRegion register_region = dex_register_locations_region.Subregion(
@@ -348,7 +368,8 @@
           next_dex_register_map_offset += register_region.size();
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
-            depth, register_region.start() - dex_register_locations_region.start());
+              inline_info_encoding_,
+              depth, register_region.start() - dex_register_locations_region.start());
 
           FillInDexRegisterMap(dex_register_map,
                                inline_entry.num_dex_registers,
@@ -513,14 +534,17 @@
     DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0));
     if (entry.inlining_depth != 0) {
       InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-      DCHECK_EQ(inline_info.GetDepth(), entry.inlining_depth);
+      DCHECK_EQ(inline_info.GetDepth(encoding.inline_info_encoding), entry.inlining_depth);
       for (size_t d = 0; d < entry.inlining_depth; ++d) {
         size_t inline_info_index = entry.inline_infos_start_index + d;
         DCHECK_LT(inline_info_index, inline_infos_.size());
         InlineInfoEntry inline_entry = inline_infos_[inline_info_index];
-        DCHECK_EQ(inline_info.GetDexPcAtDepth(d), inline_entry.dex_pc);
-        DCHECK_EQ(inline_info.GetMethodIndexAtDepth(d), inline_entry.method_index);
-        DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(d), inline_entry.invoke_type);
+        DCHECK_EQ(inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.dex_pc);
+        DCHECK_EQ(inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.method_index);
+        DCHECK_EQ(inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, d),
+                  inline_entry.invoke_type);
 
         CheckDexRegisterMap(code_info,
                             code_info.GetDexRegisterMapAtDepth(
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index b686748..41f72f5 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -156,7 +156,7 @@
   size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers,
                                    const BitVector* live_dex_registers_mask) const;
   size_t ComputeDexRegisterMapsSize() const;
-  size_t ComputeInlineInfoSize() const;
+  void ComputeInlineInfoEncoding();
 
   // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
@@ -200,6 +200,7 @@
   StackMapEntry current_entry_;
   InlineInfoEntry current_inline_info_;
   StackMapEncoding stack_map_encoding_;
+  InlineInfoEncoding inline_info_encoding_;
   ArenaVector<uint8_t> code_info_encoding_;
   size_t inline_info_size_;
   size_t dex_register_maps_size_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 3552487..967fd96 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -237,13 +237,13 @@
 
     ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
     InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-    ASSERT_EQ(2u, inline_info.GetDepth());
-    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(0));
-    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(1));
-    ASSERT_EQ(kDirect, inline_info.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(kStatic, inline_info.GetInvokeTypeAtDepth(1));
+    ASSERT_EQ(2u, inline_info.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kDirect, inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kStatic, inline_info.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
   }
 
   // Second stack map.
@@ -741,13 +741,13 @@
     ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if0 = ci.GetInlineInfoOf(sm0, encoding);
-    ASSERT_EQ(2u, if0.GetDepth());
-    ASSERT_EQ(2u, if0.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(3u, if0.GetDexPcAtDepth(1));
-    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(1));
+    ASSERT_EQ(2u, if0.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if0.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(3u, if0.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kStatic, if0.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, encoding, 1);
     ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
@@ -767,16 +767,16 @@
     ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if1 = ci.GetInlineInfoOf(sm1, encoding);
-    ASSERT_EQ(3u, if1.GetDepth());
-    ASSERT_EQ(2u, if1.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kDirect, if1.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(3u, if1.GetDexPcAtDepth(1));
-    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kStatic, if1.GetInvokeTypeAtDepth(1));
-    ASSERT_EQ(5u, if1.GetDexPcAtDepth(2));
-    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(2));
-    ASSERT_EQ(kVirtual, if1.GetInvokeTypeAtDepth(2));
+    ASSERT_EQ(3u, if1.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kDirect, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(3u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kStatic, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(5u, if1.GetDexPcAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(kVirtual, if1.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 2));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, encoding, 1);
     ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
@@ -786,7 +786,7 @@
     ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci, encoding));
     ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci, encoding));
 
-    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2));
+    ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, 2));
   }
 
   {
@@ -808,18 +808,18 @@
     ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
 
     InlineInfo if2 = ci.GetInlineInfoOf(sm3, encoding);
-    ASSERT_EQ(3u, if2.GetDepth());
-    ASSERT_EQ(2u, if2.GetDexPcAtDepth(0));
-    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(0));
-    ASSERT_EQ(kVirtual, if2.GetInvokeTypeAtDepth(0));
-    ASSERT_EQ(5u, if2.GetDexPcAtDepth(1));
-    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(1));
-    ASSERT_EQ(kInterface, if2.GetInvokeTypeAtDepth(1));
-    ASSERT_EQ(10u, if2.GetDexPcAtDepth(2));
-    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(2));
-    ASSERT_EQ(kStatic, if2.GetInvokeTypeAtDepth(2));
+    ASSERT_EQ(3u, if2.GetDepth(encoding.inline_info_encoding));
+    ASSERT_EQ(2u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(kVirtual, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 0));
+    ASSERT_EQ(5u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(kInterface, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 1));
+    ASSERT_EQ(10u, if2.GetDexPcAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(encoding.inline_info_encoding, 2));
+    ASSERT_EQ(kStatic, if2.GetInvokeTypeAtDepth(encoding.inline_info_encoding, 2));
 
-    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0));
+    ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, 0));
 
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, encoding, 1);
     ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci, encoding));
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 93a0974..5c0eb3f 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -23,6 +23,7 @@
 #include <vector>
 #include <set>
 #include <map>
+#include <unordered_set>
 
 #include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
@@ -48,9 +49,9 @@
 class ImgDiagDumper {
  public:
   explicit ImgDiagDumper(std::ostream* os,
-                       const ImageHeader& image_header,
-                       const std::string& image_location,
-                       pid_t image_diff_pid)
+                         const ImageHeader& image_header,
+                         const std::string& image_location,
+                         pid_t image_diff_pid)
       : os_(os),
         image_header_(image_header),
         image_location_(image_location),
@@ -58,6 +59,8 @@
 
   bool Dump() SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
+    os << "IMAGE LOCATION: " << image_location_ << "\n\n";
+
     os << "MAGIC: " << image_header_.GetMagic() << "\n\n";
 
     os << "IMAGE BEGIN: " << reinterpret_cast<void*>(image_header_.GetImageBegin()) << "\n\n";
@@ -138,7 +141,56 @@
     return DumpImageDiffMap(image_diff_pid, boot_map);
   }
 
-    // Look at /proc/$pid/mem and only diff the things from there
+  static std::string PrettyFieldValue(ArtField* field, mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    std::ostringstream oss;
+    switch (field->GetTypeAsPrimitiveType()) {
+      case Primitive::kPrimNot: {
+        oss << obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
+            field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimBoolean: {
+        oss << static_cast<bool>(obj->GetFieldBoolean<kVerifyNone>(field->GetOffset()));
+        break;
+      }
+      case Primitive::kPrimByte: {
+        oss << static_cast<int32_t>(obj->GetFieldByte<kVerifyNone>(field->GetOffset()));
+        break;
+      }
+      case Primitive::kPrimChar: {
+        oss << obj->GetFieldChar<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimShort: {
+        oss << obj->GetFieldShort<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimInt: {
+        oss << obj->GetField32<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimLong: {
+        oss << obj->GetField64<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimFloat: {
+        oss << obj->GetField32<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimDouble: {
+        oss << obj->GetField64<kVerifyNone>(field->GetOffset());
+        break;
+      }
+      case Primitive::kPrimVoid: {
+        oss << "void";
+        break;
+      }
+    }
+    return oss.str();
+  }
+
+  // Look at /proc/$pid/mem and only diff the things from there
   bool DumpImageDiffMap(pid_t image_diff_pid, const backtrace_map_t& boot_map)
     SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
@@ -353,119 +405,119 @@
     // Look up local classes by their descriptor
     std::map<std::string, mirror::Class*> local_class_map;
 
+    std::unordered_set<mirror::Object*> dirty_objects;
+
     size_t dirty_object_bytes = 0;
-    {
-      const uint8_t* begin_image_ptr = image_begin_unaligned;
-      const uint8_t* end_image_ptr = image_mirror_end_unaligned;
+    const uint8_t* begin_image_ptr = image_begin_unaligned;
+    const uint8_t* end_image_ptr = image_mirror_end_unaligned;
 
-      const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
-      while (reinterpret_cast<const uintptr_t>(current)
-             < reinterpret_cast<const uintptr_t>(end_image_ptr)) {
-        CHECK_ALIGNED(current, kObjectAlignment);
-        mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));
+    const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
+    while (reinterpret_cast<uintptr_t>(current) < reinterpret_cast<uintptr_t>(end_image_ptr)) {
+      CHECK_ALIGNED(current, kObjectAlignment);
+      mirror::Object* obj = reinterpret_cast<mirror::Object*>(const_cast<uint8_t*>(current));
 
-        // Sanity check that we are reading a real object
-        CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-        if (kUseBakerOrBrooksReadBarrier) {
-          obj->AssertReadBarrierPointer();
+      // Sanity check that we are reading a real object
+      CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
+      if (kUseBakerOrBrooksReadBarrier) {
+        obj->AssertReadBarrierPointer();
+      }
+
+      // Iterate every page this object belongs to
+      bool on_dirty_page = false;
+      size_t page_off = 0;
+      size_t current_page_idx;
+      uintptr_t object_address;
+      do {
+        object_address = reinterpret_cast<uintptr_t>(current);
+        current_page_idx = object_address / kPageSize + page_off;
+
+        if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
+          // This object is on a dirty page
+          on_dirty_page = true;
         }
 
-        // Iterate every page this object belongs to
-        bool on_dirty_page = false;
-        size_t page_off = 0;
-        size_t current_page_idx;
-        uintptr_t object_address;
-        do {
-          object_address = reinterpret_cast<uintptr_t>(current);
-          current_page_idx = object_address / kPageSize + page_off;
+        page_off++;
+      } while ((current_page_idx * kPageSize) <
+               RoundUp(object_address + obj->SizeOf(), kObjectAlignment));
 
-          if (dirty_page_set_local.find(current_page_idx) != dirty_page_set_local.end()) {
-            // This object is on a dirty page
-            on_dirty_page = true;
+      mirror::Class* klass = obj->GetClass();
+
+      bool different_object = false;
+
+      // Check against the other object and see if they are different
+      ptrdiff_t offset = current - begin_image_ptr;
+      const uint8_t* current_remote = &remote_contents[offset];
+      mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
+          const_cast<uint8_t*>(current_remote));
+      if (memcmp(current, current_remote, obj->SizeOf()) != 0) {
+        different_objects++;
+        dirty_object_bytes += obj->SizeOf();
+        dirty_objects.insert(obj);
+
+        ++dirty_object_class_map[klass];
+
+        // Go byte-by-byte and figure out what exactly got dirtied
+        size_t dirty_byte_count_per_object = 0;
+        for (size_t i = 0; i < obj->SizeOf(); ++i) {
+          if (current[i] != current_remote[i]) {
+            dirty_byte_count_per_object++;
           }
+        }
+        dirty_object_byte_count[klass] += dirty_byte_count_per_object;
+        dirty_object_size_in_bytes[klass] += obj->SizeOf();
 
-          page_off++;
-        } while ((current_page_idx * kPageSize) <
-                 RoundUp(object_address + obj->SizeOf(), kObjectAlignment));
+        different_object = true;
 
-        mirror::Class* klass = obj->GetClass();
+        dirty_objects_by_class[klass].push_back(remote_obj);
+      } else {
+        ++clean_object_class_map[klass];
+      }
 
-        bool different_object = false;
+      std::string descriptor = GetClassDescriptor(klass);
+      if (different_object) {
+        if (klass->IsClassClass()) {
+          // this is a "Class"
+          mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
 
-        // Check against the other object and see if they are different
-        ptrdiff_t offset = current - begin_image_ptr;
-        const uint8_t* current_remote = &remote_contents[offset];
-        mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
-            const_cast<uint8_t*>(current_remote));
-        if (memcmp(current, current_remote, obj->SizeOf()) != 0) {
-          different_objects++;
-          dirty_object_bytes += obj->SizeOf();
-
-          ++dirty_object_class_map[klass];
-
-          // Go byte-by-byte and figure out what exactly got dirtied
-          size_t dirty_byte_count_per_object = 0;
+          // print the fields that are dirty
           for (size_t i = 0; i < obj->SizeOf(); ++i) {
             if (current[i] != current_remote[i]) {
-              dirty_byte_count_per_object++;
+              class_field_dirty_count[i]++;
             }
           }
-          dirty_object_byte_count[klass] += dirty_byte_count_per_object;
-          dirty_object_size_in_bytes[klass] += obj->SizeOf();
 
-          different_object = true;
+          class_dirty_objects.push_back(obj_as_class);
+        } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
+          // this is an ArtMethod
+          ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);
 
-          dirty_objects_by_class[klass].push_back(remote_obj);
-        } else {
-          ++clean_object_class_map[klass];
-        }
-
-        std::string descriptor = GetClassDescriptor(klass);
-        if (different_object) {
-          if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
-            // this is a "Class"
-            mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
-
-            // print the fields that are dirty
-            for (size_t i = 0; i < obj->SizeOf(); ++i) {
-              if (current[i] != current_remote[i]) {
-                class_field_dirty_count[i]++;
-              }
+          // print the fields that are dirty
+          for (size_t i = 0; i < obj->SizeOf(); ++i) {
+            if (current[i] != current_remote[i]) {
+              art_method_field_dirty_count[i]++;
             }
-
-            class_dirty_objects.push_back(obj_as_class);
-          } else if (strcmp(descriptor.c_str(), "Ljava/lang/reflect/ArtMethod;") == 0) {
-            // this is an ArtMethod
-            ArtMethod* art_method = reinterpret_cast<ArtMethod*>(remote_obj);
-
-            // print the fields that are dirty
-            for (size_t i = 0; i < obj->SizeOf(); ++i) {
-              if (current[i] != current_remote[i]) {
-                art_method_field_dirty_count[i]++;
-              }
-            }
-
-            art_method_dirty_objects.push_back(art_method);
           }
-        } else if (on_dirty_page) {
-          // This object was either never mutated or got mutated back to the same value.
-          // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
-          false_dirty_objects.push_back(obj);
-          false_dirty_objects_map[klass].push_back(obj);
-          false_dirty_object_bytes += obj->SizeOf();
-          false_dirty_byte_count[obj->GetClass()] += obj->SizeOf();
-          false_dirty_object_count[obj->GetClass()] += 1;
-        }
 
-        if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
-          local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
-          remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
+          art_method_dirty_objects.push_back(art_method);
         }
-
-        // Unconditionally store the class descriptor in case we need it later
-        class_to_descriptor_map[klass] = descriptor;
-        current += RoundUp(obj->SizeOf(), kObjectAlignment);
+      } else if (on_dirty_page) {
+        // This object was either never mutated or got mutated back to the same value.
+        // TODO: Do I want to distinguish a "different" vs a "dirty" page here?
+        false_dirty_objects.push_back(obj);
+        false_dirty_objects_map[klass].push_back(obj);
+        false_dirty_object_bytes += obj->SizeOf();
+        false_dirty_byte_count[obj->GetClass()] += obj->SizeOf();
+        false_dirty_object_count[obj->GetClass()] += 1;
       }
+
+      if (strcmp(descriptor.c_str(), "Ljava/lang/Class;") == 0) {
+        local_class_map[descriptor] = reinterpret_cast<mirror::Class*>(obj);
+        remote_class_map[descriptor] = reinterpret_cast<mirror::Class*>(remote_obj);
+      }
+
+      // Unconditionally store the class descriptor in case we need it later
+      class_to_descriptor_map[klass] = descriptor;
+      current += RoundUp(obj->SizeOf(), kObjectAlignment);
     }
 
     // Looking at only dirty pages, figure out how many of those bytes belong to dirty objects.
@@ -492,6 +544,71 @@
     auto dirty_object_class_values = SortByValueDesc(dirty_object_class_map);
     auto clean_object_class_values = SortByValueDesc(clean_object_class_map);
 
+    os << "\n" << "  Dirty objects: " << dirty_objects.size() << "\n";
+    for (mirror::Object* obj : dirty_objects) {
+      const char* tabs = "    ";
+      // Attempt to find fields for all dirty bytes.
+      mirror::Class* klass = obj->GetClass();
+      if (obj->IsClass()) {
+        os << tabs << "Class " << PrettyClass(obj->AsClass()) << " " << obj << "\n";
+      } else {
+        os << tabs << "Instance of " << PrettyClass(klass) << " " << obj << "\n";
+      }
+
+      std::unordered_set<ArtField*> dirty_instance_fields;
+      std::unordered_set<ArtField*> dirty_static_fields;
+      const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+      ptrdiff_t offset = obj_bytes - begin_image_ptr;
+      uint8_t* remote_bytes = &remote_contents[offset];
+      mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(remote_bytes);
+      for (size_t i = 0, count = obj->SizeOf(); i < count; ++i) {
+        if (obj_bytes[i] != remote_bytes[i]) {
+          ArtField* field = ArtField::FindInstanceFieldWithOffset</*exact*/false>(klass, i);
+          if (field != nullptr) {
+            dirty_instance_fields.insert(field);
+          } else if (obj->IsClass()) {
+            field = ArtField::FindStaticFieldWithOffset</*exact*/false>(obj->AsClass(), i);
+            if (field != nullptr) {
+              dirty_static_fields.insert(field);
+            }
+          }
+          if (field == nullptr) {
+            if (klass->IsArrayClass()) {
+              mirror::Class* component_type = klass->GetComponentType();
+              Primitive::Type primitive_type = component_type->GetPrimitiveType();
+              size_t component_size = Primitive::ComponentSize(primitive_type);
+              size_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+              if (i >= data_offset) {
+                os << tabs << "Dirty array element " << (i - data_offset) / component_size << "\n";
+                // Skip to next element to prevent spam.
+                i += component_size - 1;
+                continue;
+              }
+            }
+            os << tabs << "No field for byte offset " << i << "\n";
+          }
+        }
+      }
+      // Dump different fields. TODO: Dump field contents.
+      if (!dirty_instance_fields.empty()) {
+        os << tabs << "Dirty instance fields " << dirty_instance_fields.size() << "\n";
+        for (ArtField* field : dirty_instance_fields) {
+          os << tabs << PrettyField(field)
+             << " original=" << PrettyFieldValue(field, obj)
+             << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+        }
+      }
+      if (!dirty_static_fields.empty()) {
+        os << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
+        for (ArtField* field : dirty_static_fields) {
+          os << tabs << PrettyField(field)
+             << " original=" << PrettyFieldValue(field, obj)
+             << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+        }
+      }
+      os << "\n";
+    }
+
     os << "\n" << "  Dirty object count by class:\n";
     for (const auto& vk_pair : dirty_object_class_values) {
       int dirty_object_count = vk_pair.first;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 1150ce9..9a3bb02 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -247,6 +247,10 @@
       return;
     }
 
+    uint32_t entry_point = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    // Clear Thumb2 bit.
+    const void* code_address = EntryPointToCodePointer(reinterpret_cast<void*>(entry_point));
+
     debug::MethodDebugInfo info = debug::MethodDebugInfo();
     info.trampoline_name = nullptr;
     info.dex_file = &dex_file;
@@ -259,7 +263,7 @@
     info.is_native_debuggable = oat_header.IsNativeDebuggable();
     info.is_optimized = method_header->IsOptimized();
     info.is_code_address_text_relative = true;
-    info.code_address = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    info.code_address = reinterpret_cast<uintptr_t>(code_address);
     info.code_size = method_header->GetCodeSize();
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = info.is_optimized ? method_header->GetOptimizedCodeInfoPtr() : nullptr;
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 3463b0d..d911497 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -343,6 +343,49 @@
   }
 }
 
+// If kExactOffset is true then we only find the matching offset, not the field containing the
+// offset.
+template <bool kExactOffset>
+static inline ArtField* FindFieldWithOffset(
+    const IterationRange<StrideIterator<ArtField>>& fields,
+    uint32_t field_offset) SHARED_REQUIRES(Locks::mutator_lock_) {
+  for (ArtField& field : fields) {
+    if (kExactOffset) {
+      if (field.GetOffset().Uint32Value() == field_offset) {
+        return &field;
+      }
+    } else {
+      const uint32_t offset = field.GetOffset().Uint32Value();
+      Primitive::Type type = field.GetTypeAsPrimitiveType();
+      const size_t field_size = Primitive::ComponentSize(type);
+      DCHECK_GT(field_size, 0u);
+      if (offset <= field_offset && field_offset < offset + field_size) {
+        return &field;
+      }
+    }
+  }
+  return nullptr;
+}
+
+template <bool kExactOffset>
+inline ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass,
+                                                       uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  ArtField* field = FindFieldWithOffset<kExactOffset>(klass->GetIFields(), field_offset);
+  if (field != nullptr) {
+    return field;
+  }
+  // We did not find field in the class: look into superclass.
+  return (klass->GetSuperClass() != nullptr) ?
+      FindInstanceFieldWithOffset<kExactOffset>(klass->GetSuperClass(), field_offset) : nullptr;
+}
+
+template <bool kExactOffset>
+inline ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  return FindFieldWithOffset<kExactOffset>(klass->GetSFields(), field_offset);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ART_FIELD_INL_H_
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 3737e0d..ea5078e 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -47,28 +47,6 @@
   offset_ = num_bytes.Uint32Value();
 }
 
-ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  for (ArtField& field : klass->GetIFields()) {
-    if (field.GetOffset().Uint32Value() == field_offset) {
-      return &field;
-    }
-  }
-  // We did not find field in the class: look into superclass.
-  return (klass->GetSuperClass() != nullptr) ?
-      FindInstanceFieldWithOffset(klass->GetSuperClass(), field_offset) : nullptr;
-}
-
-ArtField* ArtField::FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  for (ArtField& field : klass->GetSFields()) {
-    if (field.GetOffset().Uint32Value() == field_offset) {
-      return &field;
-    }
-  }
-  return nullptr;
-}
-
 mirror::Class* ArtField::ProxyFindSystemClass(const char* descriptor) {
   DCHECK(GetDeclaringClass()->IsProxyClass());
   return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
diff --git a/runtime/art_field.h b/runtime/art_field.h
index ee1ba1f..b64b70f 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -159,9 +159,16 @@
   }
 
   // Returns an instance field with this offset in the given class or null if not found.
+  // If kExactOffset is true then we only find the matching offset, not the field containing the
+  // offset.
+  template <bool kExactOffset = true>
   static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Returns a static field with this offset in the given class or null if not found.
+  // If kExactOffset is true then we only find the matching offset, not the field containing the
+  // offset.
+  template <bool kExactOffset = true>
   static ArtField* FindStaticFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index d1ef019..3dbcd58 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -545,9 +545,6 @@
   ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Note, hotness_counter_ updates are non-atomic but it doesn't need to be precise.  Also,
-  // given that the counter is only 16 bits wide we can expect wrap-around in some
-  // situations.  Consumers of hotness_count_ must be able to deal with that.
   uint16_t IncrementCounter() {
     return ++hotness_count_;
   }
@@ -556,14 +553,6 @@
     hotness_count_ = 0;
   }
 
-  void SetCounter(int16_t hotness_count) {
-    hotness_count_ = hotness_count;
-  }
-
-  uint16_t GetCounter() const {
-    return hotness_count_;
-  }
-
   const uint8_t* GetQuickenedInfo() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns the method header for the compiled code containing 'pc'. Note that runtime
@@ -608,7 +597,7 @@
   // ifTable.
   uint16_t method_index_;
 
-  // The hotness we measure for this method. Managed by the interpreter. Not atomic, as we allow
+  // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow
   // missing increments: if the method is hot, we will see it eventually.
   uint16_t hotness_count_;
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index d27d2f6..942f9de 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -20,7 +20,6 @@
 #if defined(__cplusplus)
 #include "art_method.h"
 #include "gc/allocator/rosalloc.h"
-#include "jit/jit_instrumentation.h"
 #include "lock_word.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
@@ -189,13 +188,7 @@
 #define SHADOWFRAME_DEX_PC_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 4)
 ADD_TEST_EQ(SHADOWFRAME_DEX_PC_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::DexPCOffset()))
-#define SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
-ADD_TEST_EQ(SHADOWFRAME_CACHED_HOTNESS_COUNTDOWN_OFFSET,
-            static_cast<int32_t>(art::ShadowFrame::CachedHotnessCountdownOffset()))
-#define SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 10)
-ADD_TEST_EQ(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET,
-            static_cast<int32_t>(art::ShadowFrame::HotnessCountdownOffset()))
-#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 12)
+#define SHADOWFRAME_VREGS_OFFSET (SHADOWFRAME_NUMBER_OF_VREGS_OFFSET + 8)
 ADD_TEST_EQ(SHADOWFRAME_VREGS_OFFSET,
             static_cast<int32_t>(art::ShadowFrame::VRegsOffset()))
 
@@ -396,12 +389,6 @@
 #define THREAD_CHECKPOINT_REQUEST 2
 ADD_TEST_EQ(THREAD_CHECKPOINT_REQUEST, static_cast<int32_t>(art::kCheckpointRequest))
 
-#define JIT_CHECK_OSR -1
-ADD_TEST_EQ(JIT_CHECK_OSR, static_cast<int32_t>(art::jit::kJitCheckForOSR))
-
-#define JIT_HOTNESS_DISABLE -2
-ADD_TEST_EQ(JIT_HOTNESS_DISABLE, static_cast<int32_t>(art::jit::kJitHotnessDisabled))
-
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
 #endif
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 116261b..16fbfaa 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -42,11 +42,12 @@
 template <bool kResolve = true>
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
                                     const InlineInfo& inline_info,
+                                    const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
   SHARED_REQUIRES(Locks::mutator_lock_) {
-  uint32_t method_index = inline_info.GetMethodIndexAtDepth(inlining_depth);
+  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
   InvokeType invoke_type = static_cast<InvokeType>(
-        inline_info.GetInvokeTypeAtDepth(inlining_depth));
+        inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth));
   ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*));
   if (!caller->IsRuntimeMethod()) {
     return caller;
@@ -68,7 +69,10 @@
   if (inlining_depth == 0) {
     class_loader.Assign(outer_method->GetClassLoader());
   } else {
-    caller = GetResolvedMethod<kResolve>(outer_method, inline_info, inlining_depth - 1);
+    caller = GetResolvedMethod<kResolve>(outer_method,
+                                         inline_info,
+                                         encoding,
+                                         inlining_depth - 1);
     class_loader.Assign(caller->GetClassLoader());
   }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 3368411..e46576e 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -280,7 +280,10 @@
         DCHECK(stack_map.IsValid());
         if (stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
           InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+          caller = GetResolvedMethod(outer_method,
+                                     inline_info,
+                                     encoding.inline_info_encoding,
+                                     inline_info.GetDepth(encoding.inline_info_encoding) - 1);
         }
       }
     }
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 27926e0..f3e8dba 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -332,7 +332,8 @@
       DCHECK(stack_map.IsValid());
       if (stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
         InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-        return inline_info.GetDexPcAtDepth(inline_info.GetDepth() - 1);
+        return inline_info.GetDexPcAtDepth(encoding.inline_info_encoding,
+                                           inline_info.GetDepth(encoding.inline_info_encoding)-1);
       } else {
         return stack_map.GetDexPc(encoding.stack_map_encoding);
       }
@@ -2166,9 +2167,13 @@
     uint32_t imt_index = interface_method->GetDexMethodIndex();
     ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
         imt_index % mirror::Class::kImtSize, sizeof(void*));
-    DCHECK(conflict_method->IsRuntimeMethod()) << PrettyMethod(conflict_method);
-    ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
-    method = current_table->Lookup(interface_method);
+    if (LIKELY(conflict_method->IsRuntimeMethod())) {
+      ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
+      method = current_table->Lookup(interface_method);
+    } else {
+      // It seems we aren't really a conflict method!
+      method = cls->FindVirtualMethodForInterface(interface_method, sizeof(void*));
+    }
     if (method != nullptr) {
       return GetTwoWordSuccessValue(
           reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode()),
@@ -2214,39 +2219,41 @@
   uint32_t imt_index = interface_method->GetDexMethodIndex();
   ArtMethod* conflict_method = cls->GetEmbeddedImTableEntry(
       imt_index % mirror::Class::kImtSize, sizeof(void*));
-  ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
-  Runtime* runtime = Runtime::Current();
-  LinearAlloc* linear_alloc = (cls->GetClassLoader() == nullptr)
-      ? runtime->GetLinearAlloc()
-      : cls->GetClassLoader()->GetAllocator();
-  bool is_new_entry = (conflict_method == runtime->GetImtConflictMethod());
+  if (conflict_method->IsRuntimeMethod()) {
+    ImtConflictTable* current_table = conflict_method->GetImtConflictTable(sizeof(void*));
+    Runtime* runtime = Runtime::Current();
+    LinearAlloc* linear_alloc = (cls->GetClassLoader() == nullptr)
+        ? runtime->GetLinearAlloc()
+        : cls->GetClassLoader()->GetAllocator();
+    bool is_new_entry = (conflict_method == runtime->GetImtConflictMethod());
 
-  // Create a new entry if the existing one is the shared conflict method.
-  ArtMethod* new_conflict_method = is_new_entry
-      ? runtime->CreateImtConflictMethod(linear_alloc)
-      : conflict_method;
+    // Create a new entry if the existing one is the shared conflict method.
+    ArtMethod* new_conflict_method = is_new_entry
+        ? runtime->CreateImtConflictMethod(linear_alloc)
+        : conflict_method;
 
-  // Allocate a new table. Note that we will leak this table at the next conflict,
-  // but that's a tradeoff compared to making the table fixed size.
-  void* data = linear_alloc->Alloc(
-      self, ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table));
-  CHECK(data != nullptr) << "Out of memory";
-  ImtConflictTable* new_table = new (data) ImtConflictTable(
-      current_table, interface_method, method);
+    // Allocate a new table. Note that we will leak this table at the next conflict,
+    // but that's a tradeoff compared to making the table fixed size.
+    void* data = linear_alloc->Alloc(
+        self, ImtConflictTable::ComputeSizeWithOneMoreEntry(current_table));
+    CHECK(data != nullptr) << "Out of memory";
+    ImtConflictTable* new_table = new (data) ImtConflictTable(
+        current_table, interface_method, method);
 
-  // Do a fence to ensure threads see the data in the table before it is assigned
-  // to the conlict method.
-  // Note that there is a race in the presence of multiple threads and we may leak
-  // memory from the LinearAlloc, but that's a tradeoff compared to using
-  // atomic operations.
-  QuasiAtomic::ThreadFenceRelease();
-  new_conflict_method->SetImtConflictTable(new_table);
-  if (is_new_entry) {
-    // Update the IMT if we create a new conflict method. No fence needed here, as the
-    // data is consistent.
-    cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
-                                 new_conflict_method,
-                                 sizeof(void*));
+    // Do a fence to ensure threads see the data in the table before it is assigned
+    // to the conlict method.
+    // Note that there is a race in the presence of multiple threads and we may leak
+    // memory from the LinearAlloc, but that's a tradeoff compared to using
+    // atomic operations.
+    QuasiAtomic::ThreadFenceRelease();
+    new_conflict_method->SetImtConflictTable(new_table);
+    if (is_new_entry) {
+      // Update the IMT if we create a new conflict method. No fence needed here, as the
+      // data is consistent.
+      cls->SetEmbeddedImTableEntry(imt_index % mirror::Class::kImtSize,
+                                  new_conflict_method,
+                                  sizeof(void*));
+    }
   }
 
   const void* code = method->GetEntryPointFromQuickCompiledCode();
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index a4c3d41..d07f47b 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -303,8 +303,7 @@
   bool NonJitProfilingActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
-        have_exception_caught_listeners_ || have_method_unwind_listeners_ ||
-        have_branch_listeners_;
+        have_exception_caught_listeners_ || have_method_unwind_listeners_;
   }
 
   // Inform listeners that a method has been entered. A dex PC is provided as we may install
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index f365fd0..b55312f 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -22,7 +22,6 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -65,20 +64,15 @@
   currentHandlersTable = handlersTable[ \
       Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
 
-#define BRANCH_INSTRUMENTATION(offset)                                                          \
-  do {                                                                                          \
-    instrumentation->Branch(self, method, dex_pc, offset);                                      \
-    JValue result;                                                                              \
-    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {           \
-      return result;                                                                            \
-    }                                                                                           \
-  } while (false)
-
-#define HOTNESS_UPDATE()                                                                       \
-  do {                                                                                         \
-    if (jit_instrumentation_cache != nullptr) {                                                \
-      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
-    }                                                                                          \
+#define BRANCH_INSTRUMENTATION(offset)                                                            \
+  do {                                                                                            \
+    ArtMethod* method = shadow_frame.GetMethod();                                                 \
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
+    instrumentation->Branch(self, method, dex_pc, offset);                                        \
+    JValue result;                                                                                \
+    if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {             \
+      return result;                                                                              \
+    }                                                                                             \
   } while (false)
 
 #define UNREACHABLE_CODE_CHECK()                \
@@ -192,13 +186,6 @@
   UPDATE_HANDLER_TABLE();
   std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder;
   size_t lambda_captured_variable_index = 0;
-  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
-  ArtMethod* method = shadow_frame.GetMethod();
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
-  if (jit != nullptr) {
-    jit_instrumentation_cache = jit->GetInstrumentationCache();
-  }
 
   // Jump to first instruction.
   ADVANCE(0);
@@ -638,7 +625,6 @@
     int8_t offset = inst->VRegA_10t(inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -652,7 +638,6 @@
     int16_t offset = inst->VRegA_20t();
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -666,7 +651,6 @@
     int32_t offset = inst->VRegA_30t();
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -680,7 +664,6 @@
     int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -694,7 +677,6 @@
     int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
     BRANCH_INSTRUMENTATION(offset);
     if (IsBackwardBranch(offset)) {
-      HOTNESS_UPDATE();
       if (UNLIKELY(self->TestAllFlags())) {
         self->CheckSuspend();
         UPDATE_HANDLER_TABLE();
@@ -798,7 +780,6 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -818,7 +799,6 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -838,7 +818,6 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -858,7 +837,6 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -878,7 +856,6 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -898,7 +875,6 @@
       int16_t offset = inst->VRegC_22t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -917,7 +893,6 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -936,7 +911,6 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -955,7 +929,6 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -974,7 +947,6 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -993,7 +965,6 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
@@ -1012,7 +983,6 @@
       int16_t offset = inst->VRegB_21t();
       BRANCH_INSTRUMENTATION(offset);
       if (IsBackwardBranch(offset)) {
-        HOTNESS_UPDATE();
         if (UNLIKELY(self->TestAllFlags())) {
           self->CheckSuspend();
           UPDATE_HANDLER_TABLE();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index f9941d2..0488dbf 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -18,7 +18,6 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -73,6 +72,7 @@
 
 #define BRANCH_INSTRUMENTATION(offset)                                                         \
   do {                                                                                         \
+    ArtMethod* method = shadow_frame.GetMethod();                                              \
     instrumentation->Branch(self, method, dex_pc, offset);                                     \
     JValue result;                                                                             \
     if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
@@ -80,13 +80,6 @@
     }                                                                                          \
   } while (false)
 
-#define HOTNESS_UPDATE()                                                                       \
-  do {                                                                                         \
-    if (jit_instrumentation_cache != nullptr) {                                                \
-      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
-    }                                                                                          \
-  } while (false)
-
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -108,12 +101,6 @@
   const uint16_t* const insns = code_item->insns_;
   const Instruction* inst = Instruction::At(insns + dex_pc);
   uint16_t inst_data;
-  ArtMethod* method = shadow_frame.GetMethod();
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
-  if (jit != nullptr) {
-    jit_instrumentation_cache = jit->GetInstrumentationCache();
-  }
 
   // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
   // to keep this live for the scope of the entire function call.
@@ -577,7 +564,6 @@
         int8_t offset = inst->VRegA_10t(inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -588,7 +574,6 @@
         int16_t offset = inst->VRegA_20t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -599,7 +584,6 @@
         int32_t offset = inst->VRegA_30t();
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -610,7 +594,6 @@
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -621,7 +604,6 @@
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
         if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -726,7 +708,6 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -743,7 +724,6 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -760,7 +740,6 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -777,7 +756,6 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -794,7 +772,6 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -811,7 +788,6 @@
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -827,7 +803,6 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -843,7 +818,6 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -859,7 +833,6 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -875,7 +848,6 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -891,7 +863,6 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -907,7 +878,6 @@
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
           if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/interpreter/mterp/arm/bincmp.S b/runtime/interpreter/mterp/arm/bincmp.S
index 8fad42f..cfad714 100644
--- a/runtime/interpreter/mterp/arm/bincmp.S
+++ b/runtime/interpreter/mterp/arm/bincmp.S
@@ -1,6 +1,7 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -8,12 +9,23 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    b${condition} MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    mov${revcmp} rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
index a6b131d..981c036 100644
--- a/runtime/interpreter/mterp/arm/entry.S
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -33,8 +33,10 @@
 
 ExecuteMterpImpl:
     .fnstart
-    .save {r3-r10,fp,lr}
-    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
+    .save {r4-r10,fp,lr}
+    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
+    .pad    #4
+    sub     sp, sp, #4                  @ align 64
 
     /* Remember the return register */
     str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -55,12 +57,6 @@
     /* Starting ibase */
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
-    /* Set up for backwards branches & osr profiling */
-    ldr     r0, [rFP, #OFF_FP_METHOD]
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    bl      MterpSetUpHotnessCountdown
-    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
-
     /* start executing the instruction at rPC */
     FETCH_INST                          @ load rINST from rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 62e573a..3456a75 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -114,117 +114,21 @@
     /* NOTE: no fallthrough */
 
 /*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    rINST          <= signed offset
- *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
  */
-MterpCommonTakenBranchNoFlags:
-    cmp     rINST, #0
-MterpCommonTakenBranch:
-    bgt     .L_forward_branch           @ don't add forward branches to hotness
-/*
- * We need to subtract 1 from positive values and we should not see 0 here,
- * so we may use the result of the comparison with -1.
- */
-#if JIT_CHECK_OSR != -1
-#  error "JIT_CHECK_OSR must be -1."
-#endif
-    cmp     rPROFILE, #JIT_CHECK_OSR
-    beq     .L_osr_check
-    subgts  rPROFILE, #1
-    beq     .L_add_batch                @ counted down to zero - report
-.L_resume_backward_branch:
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    REFRESH_IBASE
-    add     r2, rINST, rINST            @ r2<- byte offset
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+MterpCheckSuspendAndContinue:
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bne     .L_suspend_request_pending
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
-
-.L_suspend_request_pending:
+1:
     EXPORT_PC
     mov     r0, rSELF
     bl      MterpSuspendCheck           @ (self)
     cmp     r0, #0
     bne     MterpFallback
-    REFRESH_IBASE                       @ might have changed during suspend
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.L_no_count_backwards:
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    bne     .L_resume_backward_branch
-.L_osr_check:
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement
-    b       .L_resume_backward_branch
-
-.L_forward_branch:
-    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
-    beq     .L_check_osr_forward
-.L_resume_forward_branch:
-    add     r2, rINST, rINST            @ r2<- byte offset
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.L_check_osr_forward:
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement
-    b       .L_resume_forward_branch
-
-.L_add_batch:
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    ldr     r0, [rFP, #OFF_FP_METHOD]
-    mov     r2, rSELF
-    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
-    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
-    b       .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, #2
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement
-    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -272,27 +176,9 @@
     str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
-/*
- * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmp     rPROFILE, #0
-    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
-    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
+    add     sp, sp, #4                              @ un-align 64
+    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
 
-MterpProfileActive:
-    mov     rINST, r0                               @ stash return value
-    /* Report cached hotness counts */
-    ldr     r0, [rFP, #OFF_FP_METHOD]
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rSELF
-    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
-    mov     r0, rINST                               @ restore return value
-    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
     .fnend
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index 039bcbe..298af8a 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -72,8 +72,7 @@
   r6  rSELF     self (Thread) pointer
   r7  rINST     first 16-bit code unit of current instruction
   r8  rIBASE    interpreted instruction base pointer, used for computed goto
-  r10 rPROFILE  branch profiling countdown
-  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
 
 Macros are provided for common operations.  Each macro MUST emit only
 one instruction to make instruction-counting easier.  They MUST NOT alter
@@ -91,13 +90,12 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC      r4
-#define rFP      r5
-#define rSELF    r6
-#define rINST    r7
-#define rIBASE   r8
-#define rPROFILE r10
-#define rREFS    r11
+#define rPC     r4
+#define rFP     r5
+#define rSELF   r6
+#define rINST   r7
+#define rIBASE  r8
+#define rREFS   r11
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -111,7 +109,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME OFF_FP(0)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
index 6626ff0..e57b19c 100644
--- a/runtime/interpreter/mterp/arm/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -1,6 +1,22 @@
     /*
      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
      * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
      */
     /* cmp-long vAA, vBB, vCC */
     FETCH r0, 1                         @ r0<- CCBB
@@ -11,13 +27,30 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    cmp     r0, r2
-    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
-    mov     ip, #0
-    mvnlt   ip, #0                      @ -1
-    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
-    orrne   ip, #1
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .L${opcode}_less            @ signed compare on high part
+    bgt     .L${opcode}_greater
+    subs    r1, r0, r2                  @ r1<- r0 - r2
+    bhi     .L${opcode}_greater         @ unsigned compare on low part
+    bne     .L${opcode}_less
+    b       .L${opcode}_finish          @ equal; r1 already holds 0
+%break
+
+.L${opcode}_less:
+    mvn     r1, #0                      @ r1<- -1
+    @ Want to cond code the next mov so we can avoid branch, but don't see it;
+    @ instead, we just replicate the tail end.
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG ip, r9                     @ vAA<- ip
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.L${opcode}_greater:
+    mov     r1, #1                      @ r1<- 1
+    @ fall through to _finish
+
+.L${opcode}_finish:
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_goto.S b/runtime/interpreter/mterp/arm/op_goto.S
index aa42dfd..6861950 100644
--- a/runtime/interpreter/mterp/arm/op_goto.S
+++ b/runtime/interpreter/mterp/arm/op_goto.S
@@ -5,5 +5,32 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_goto_16.S b/runtime/interpreter/mterp/arm/op_goto_16.S
index 12a6bc0..91639ca 100644
--- a/runtime/interpreter/mterp/arm/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm/op_goto_16.S
@@ -5,5 +5,27 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
+#if MTERP_PROFILE_BRANCHES
     FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_goto_32.S b/runtime/interpreter/mterp/arm/op_goto_32.S
index 7325a1c..e730b52 100644
--- a/runtime/interpreter/mterp/arm/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm/op_goto_32.S
@@ -10,7 +10,31 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r3, 2                         @ r1<- AAAA (hi)
-    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
-    b       MterpCommonTakenBranch
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/op_if_eq.S b/runtime/interpreter/mterp/arm/op_if_eq.S
index b8b6a6e..5685686 100644
--- a/runtime/interpreter/mterp/arm/op_if_eq.S
+++ b/runtime/interpreter/mterp/arm/op_if_eq.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "condition":"eq" }
+%include "arm/bincmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_eqz.S b/runtime/interpreter/mterp/arm/op_if_eqz.S
index 7012f61..2a9c0f9 100644
--- a/runtime/interpreter/mterp/arm/op_if_eqz.S
+++ b/runtime/interpreter/mterp/arm/op_if_eqz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "condition":"eq" }
+%include "arm/zcmp.S" { "revcmp":"ne" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ge.S b/runtime/interpreter/mterp/arm/op_if_ge.S
index eb29e63..60a0307 100644
--- a/runtime/interpreter/mterp/arm/op_if_ge.S
+++ b/runtime/interpreter/mterp/arm/op_if_ge.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "condition":"ge" }
+%include "arm/bincmp.S" { "revcmp":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gez.S b/runtime/interpreter/mterp/arm/op_if_gez.S
index d9da374..981cdec 100644
--- a/runtime/interpreter/mterp/arm/op_if_gez.S
+++ b/runtime/interpreter/mterp/arm/op_if_gez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "condition":"ge" }
+%include "arm/zcmp.S" { "revcmp":"lt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gt.S b/runtime/interpreter/mterp/arm/op_if_gt.S
index a35eab8..ca50cd7 100644
--- a/runtime/interpreter/mterp/arm/op_if_gt.S
+++ b/runtime/interpreter/mterp/arm/op_if_gt.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "condition":"gt" }
+%include "arm/bincmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_gtz.S b/runtime/interpreter/mterp/arm/op_if_gtz.S
index 4ef4d8e..c621812 100644
--- a/runtime/interpreter/mterp/arm/op_if_gtz.S
+++ b/runtime/interpreter/mterp/arm/op_if_gtz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "condition":"gt" }
+%include "arm/zcmp.S" { "revcmp":"le" }
diff --git a/runtime/interpreter/mterp/arm/op_if_le.S b/runtime/interpreter/mterp/arm/op_if_le.S
index c7c31bc..7e060f2 100644
--- a/runtime/interpreter/mterp/arm/op_if_le.S
+++ b/runtime/interpreter/mterp/arm/op_if_le.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "condition":"le" }
+%include "arm/bincmp.S" { "revcmp":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lez.S b/runtime/interpreter/mterp/arm/op_if_lez.S
index 9fbf6c9..f92be23 100644
--- a/runtime/interpreter/mterp/arm/op_if_lez.S
+++ b/runtime/interpreter/mterp/arm/op_if_lez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "condition":"le" }
+%include "arm/zcmp.S" { "revcmp":"gt" }
diff --git a/runtime/interpreter/mterp/arm/op_if_lt.S b/runtime/interpreter/mterp/arm/op_if_lt.S
index 9469fbb..213344d 100644
--- a/runtime/interpreter/mterp/arm/op_if_lt.S
+++ b/runtime/interpreter/mterp/arm/op_if_lt.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "condition":"lt" }
+%include "arm/bincmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ltz.S b/runtime/interpreter/mterp/arm/op_if_ltz.S
index a4fc1b8..dfd4e44 100644
--- a/runtime/interpreter/mterp/arm/op_if_ltz.S
+++ b/runtime/interpreter/mterp/arm/op_if_ltz.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "condition":"lt" }
+%include "arm/zcmp.S" { "revcmp":"ge" }
diff --git a/runtime/interpreter/mterp/arm/op_if_ne.S b/runtime/interpreter/mterp/arm/op_if_ne.S
index c945331..4a58b4a 100644
--- a/runtime/interpreter/mterp/arm/op_if_ne.S
+++ b/runtime/interpreter/mterp/arm/op_if_ne.S
@@ -1 +1 @@
-%include "arm/bincmp.S" { "condition":"ne" }
+%include "arm/bincmp.S" { "revcmp":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_if_nez.S b/runtime/interpreter/mterp/arm/op_if_nez.S
index 2d81fda..d864ef4 100644
--- a/runtime/interpreter/mterp/arm/op_if_nez.S
+++ b/runtime/interpreter/mterp/arm/op_if_nez.S
@@ -1 +1 @@
-%include "arm/zcmp.S" { "condition":"ne" }
+%include "arm/zcmp.S" { "revcmp":"eq" }
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
index a13c803..8f40f19 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -24,13 +24,13 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    mul     ip, r2, r1                  @ ip<- ZxW
-    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
-    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
-    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
index 4c1f058..7ef24c5 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -13,12 +13,12 @@
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
-    mul     ip, r2, r1                  @ ip<- ZxW
-    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
-    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_packed_switch.S b/runtime/interpreter/mterp/arm/op_packed_switch.S
index 412c58f..4c369cb 100644
--- a/runtime/interpreter/mterp/arm/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm/op_packed_switch.S
@@ -9,6 +9,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -16,5 +17,33 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      $func                       @ r0<- code-unit branch offset
-    movs    rINST, r0
-    b       MterpCommonTakenBranch
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      $func                       @ r0<- code-unit branch offset
+    mov     rINST, r0
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
diff --git a/runtime/interpreter/mterp/arm/zcmp.S b/runtime/interpreter/mterp/arm/zcmp.S
index 5db8b6c..3d7dec0 100644
--- a/runtime/interpreter/mterp/arm/zcmp.S
+++ b/runtime/interpreter/mterp/arm/zcmp.S
@@ -1,17 +1,29 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    b${condition} MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    mov${revcmp} rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/bincmp.S b/runtime/interpreter/mterp/arm64/bincmp.S
index 8dd4fed..2356ecb 100644
--- a/runtime/interpreter/mterp/arm64/bincmp.S
+++ b/runtime/interpreter/mterp/arm64/bincmp.S
@@ -1,6 +1,7 @@
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -9,11 +10,22 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.${condition} MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index 9fbbbd3..23e656e 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -31,12 +31,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
-    add     fp, sp, #64
+    stp     xIBASE, xREFS, [sp, #-64]!
+    stp     xSELF, xINST, [sp, #16]
+    stp     xPC, xFP, [sp, #32]
+    stp     fp, lr, [sp, #48]
+    add     fp, sp, #48
 
     /* Remember the return register */
     str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -57,12 +56,6 @@
     /* Starting ibase */
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
-    /* Set up for backwards branches & osr profiling */
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    bl      MterpSetUpHotnessCountdown
-    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
-
     /* start executing the instruction at rPC */
     FETCH_INST                          // load wINST from rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 2d3a11e..aae78de 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -107,107 +107,6 @@
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
     /* NOTE: no fallthrough */
-/*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    wINST          <= signed offset
- *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
- */
-MterpCommonTakenBranchNoFlags:
-    cmp     wINST, #0
-    b.gt    .L_forward_branch           // don't add forward branches to hotness
-    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
-    subs    wPROFILE, wPROFILE, #1      // countdown
-    b.eq    .L_add_batch                // counted down to zero - report
-.L_resume_backward_branch:
-    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.ne    .L_suspend_request_pending
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_suspend_request_pending:
-    EXPORT_PC
-    mov     x0, xSELF
-    bl      MterpSuspendCheck           // (self)
-    cbnz    x0, MterpFallback
-    REFRESH_IBASE                       // might have changed during suspend
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_no_count_backwards:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.ne    .L_resume_backward_branch
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_backward_branch
-
-.L_forward_branch:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_osr_forward
-.L_resume_forward_branch:
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_check_osr_forward:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_forward_branch
-
-.L_add_batch:
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    mov     x2, xSELF
-    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
-    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
-    b       .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, #2
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    FETCH_ADVANCE_INST 2
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
 
 /*
  * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
@@ -276,36 +175,10 @@
 check2:
     mov     x0, #1                                  // signal return to caller.
 MterpDone:
-/*
- * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmp     wPROFILE, #0
-    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
-    ret
-
-MterpProfileActive:
-    mov     xINST, x0                               // stash return value
-    /* Report cached hotness counts */
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xSELF
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
-    mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    ldp     fp, lr, [sp, #48]
+    ldp     xPC, xFP, [sp, #32]
+    ldp     xSELF, xINST, [sp, #16]
+    ldp     xIBASE, xREFS, [sp], #64
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 4257200..7101ba9 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -74,7 +74,6 @@
   x23  xINST     first 16-bit code unit of current instruction
   x24  xIBASE    interpreted instruction base pointer, used for computed goto
   x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
-  x26  wPROFILE  jit profile hotness countdown
   x16  ip        scratch reg
   x17  ip2       scratch reg (used by macros)
 
@@ -93,17 +92,15 @@
 
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
-#define xPC      x20
-#define xFP      x21
-#define xSELF    x22
-#define xINST    x23
-#define wINST    w23
-#define xIBASE   x24
-#define xREFS    x25
-#define wPROFILE w26
-#define xPROFILE x26
-#define ip       x16
-#define ip2      x17
+#define xPC     x20
+#define xFP     x21
+#define xSELF   x22
+#define xINST   x23
+#define wINST   w23
+#define xIBASE  x24
+#define xREFS   x25
+#define ip      x16
+#define ip2     x17
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
@@ -117,7 +114,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME OFF_FP(0)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
diff --git a/runtime/interpreter/mterp/arm64/op_goto.S b/runtime/interpreter/mterp/arm64/op_goto.S
index 6381e94..7e2f6a9 100644
--- a/runtime/interpreter/mterp/arm64/op_goto.S
+++ b/runtime/interpreter/mterp/arm64/op_goto.S
@@ -5,5 +5,21 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+    /* tuning: use sbfx for 6t2+ targets */
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
+    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
+       // If backwards branch refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_goto_16.S b/runtime/interpreter/mterp/arm64/op_goto_16.S
index fb9a80a..b2b9924 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_16.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_16.S
@@ -6,4 +6,17 @@
      */
     /* goto/16 +AAAA */
     FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_goto_32.S b/runtime/interpreter/mterp/arm64/op_goto_32.S
index b13cb41..b785857 100644
--- a/runtime/interpreter/mterp/arm64/op_goto_32.S
+++ b/runtime/interpreter/mterp/arm64/op_goto_32.S
@@ -13,4 +13,17 @@
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
     orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/op_packed_switch.S b/runtime/interpreter/mterp/arm64/op_packed_switch.S
index 1456f1a..e8b4f04 100644
--- a/runtime/interpreter/mterp/arm64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/arm64/op_packed_switch.S
@@ -17,4 +17,17 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      $func                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S
index b303e6a..3f1e1b1 100644
--- a/runtime/interpreter/mterp/arm64/zcmp.S
+++ b/runtime/interpreter/mterp/arm64/zcmp.S
@@ -1,17 +1,29 @@
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.${condition} MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, ${condition} // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index cbfdcc3..10b19c5 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -21,7 +21,6 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "debugger.h"
 
 namespace art {
@@ -433,7 +432,7 @@
 }
 
 extern "C" void MterpCheckBefore(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
   if (inst->Opcode(inst_data) == Instruction::MOVE_EXCEPTION) {
@@ -445,7 +444,7 @@
 }
 
 extern "C" void MterpLogDivideByZeroException(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -453,7 +452,7 @@
 }
 
 extern "C" void MterpLogArrayIndexException(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -461,7 +460,7 @@
 }
 
 extern "C" void MterpLogNegativeArraySizeException(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -469,7 +468,7 @@
 }
 
 extern "C" void MterpLogNoSuchMethodException(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -477,7 +476,7 @@
 }
 
 extern "C" void MterpLogExceptionThrownException(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -485,7 +484,7 @@
 }
 
 extern "C" void MterpLogNullObjectException(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -493,7 +492,7 @@
 }
 
 extern "C" void MterpLogFallback(Thread* self, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -502,7 +501,7 @@
 }
 
 extern "C" void MterpLogOSR(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -510,7 +509,7 @@
 }
 
 extern "C" void MterpLogSuspendFallback(Thread* self, ShadowFrame* shadow_frame, uint32_t flags)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   UNUSED(self);
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   uint16_t inst_data = inst->Fetch16(0);
@@ -522,7 +521,7 @@
 }
 
 extern "C" bool MterpSuspendCheck(Thread* self)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   self->AllowThreadSuspension();
   return MterpShouldSwitchInterpreters();
 }
@@ -618,7 +617,7 @@
 }
 
 extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   if (UNLIKELY(arr == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
@@ -632,7 +631,7 @@
 }
 
 extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
@@ -640,85 +639,13 @@
   return obj->GetFieldObject<mirror::Object>(MemberOffset(field_offset));
 }
 
-/*
- * Create a hotness_countdown based on the current method hotness_count and profiling
- * mode.  In short, determine how many hotness events we hit before reporting back
- * to the full instrumentation via MterpAddHotnessBatch.  Called once on entry to the method,
- * and regenerated following batch updates.
- */
-extern "C" int MterpSetUpHotnessCountdown(ArtMethod* method, ShadowFrame* shadow_frame)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  uint16_t hotness_count = method->GetCounter();
-  int32_t countdown_value = jit::kJitHotnessDisabled;
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    jit::JitInstrumentationCache* cache = jit->GetInstrumentationCache();
-    int32_t warm_threshold = cache->WarmMethodThreshold();
-    int32_t hot_threshold = cache->HotMethodThreshold();
-    int32_t osr_threshold = cache->OSRMethodThreshold();
-    if (hotness_count < warm_threshold) {
-      countdown_value = warm_threshold - hotness_count;
-    } else if (hotness_count < hot_threshold) {
-      countdown_value = hot_threshold - hotness_count;
-    } else if (hotness_count < osr_threshold) {
-      countdown_value = osr_threshold - hotness_count;
-    } else {
-      countdown_value = jit::kJitCheckForOSR;
-    }
-  }
-  /*
-   * The actual hotness threshold may exceed the range of our int16_t countdown value.  This is
-   * not a problem, though.  We can just break it down into smaller chunks.
-   */
-  countdown_value = std::min(countdown_value,
-                             static_cast<int32_t>(std::numeric_limits<int16_t>::max()));
-  shadow_frame->SetCachedHotnessCountdown(countdown_value);
-  shadow_frame->SetHotnessCountdown(countdown_value);
-  return countdown_value;
-}
-
-/*
- * Report a batch of hotness events to the instrumentation and then return the new
- * countdown value to the next time we should report.
- */
-extern "C" int16_t MterpAddHotnessBatch(ArtMethod* method,
-                                        ShadowFrame* shadow_frame,
-                                        Thread* self)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    int16_t count = shadow_frame->GetCachedHotnessCountdown() - shadow_frame->GetHotnessCountdown();
-    jit->GetInstrumentationCache()->AddSamples(self, method, count);
-  }
-  return MterpSetUpHotnessCountdown(method, shadow_frame);
-}
-
-// TUNING: Unused by arm/arm64/x86/x86_64.  Remove when mips/mips64 mterps support batch updates.
 extern "C" bool  MterpProfileBranch(Thread* self, ShadowFrame* shadow_frame, int32_t offset)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+  SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
   JValue* result = shadow_frame->GetResultRegister();
   uint32_t dex_pc = shadow_frame->GetDexPC();
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if ((jit != nullptr) && (offset <= 0)) {
-    jit->GetInstrumentationCache()->AddSamples(self, method, 1);
-  }
-  int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame);
-  if (countdown_value == jit::kJitCheckForOSR) {
-    return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
-  } else {
-    return false;
-  }
-}
-
-extern "C" bool MterpMaybeDoOnStackReplacement(Thread* self,
-                                               ShadowFrame* shadow_frame,
-                                               int32_t offset)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  ArtMethod* method = shadow_frame->GetMethod();
-  JValue* result = shadow_frame->GetResultRegister();
-  uint32_t dex_pc = shadow_frame->GetDexPC();
-  // Assumes caller has already determined that an OSR check is appropriate.
+  const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
+  instrumentation->Branch(self, method, dex_pc, offset);
   return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
 }
 
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index a38a87b..092474d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -79,8 +79,7 @@
   r6  rSELF     self (Thread) pointer
   r7  rINST     first 16-bit code unit of current instruction
   r8  rIBASE    interpreted instruction base pointer, used for computed goto
-  r10 rPROFILE  branch profiling countdown
-  r11 rREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
+  r11 rREFS	base of object references in shadow frame  (ideally, we'll get rid of this later).
 
 Macros are provided for common operations.  Each macro MUST emit only
 one instruction to make instruction-counting easier.  They MUST NOT alter
@@ -98,13 +97,12 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rPC      r4
-#define rFP      r5
-#define rSELF    r6
-#define rINST    r7
-#define rIBASE   r8
-#define rPROFILE r10
-#define rREFS    r11
+#define rPC     r4
+#define rFP     r5
+#define rSELF   r6
+#define rINST   r7
+#define rIBASE  r8
+#define rREFS   r11
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -118,7 +116,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME OFF_FP(0)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
@@ -331,8 +329,10 @@
 
 ExecuteMterpImpl:
     .fnstart
-    .save {r3-r10,fp,lr}
-    stmfd   sp!, {r3-r10,fp,lr}         @ save 10 regs, (r3 just to align 64)
+    .save {r4-r10,fp,lr}
+    stmfd   sp!, {r4-r10,fp,lr}         @ save 9 regs
+    .pad    #4
+    sub     sp, sp, #4                  @ align 64
 
     /* Remember the return register */
     str     r3, [r2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -353,12 +353,6 @@
     /* Starting ibase */
     ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
-    /* Set up for backwards branches & osr profiling */
-    ldr     r0, [rFP, #OFF_FP_METHOD]
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    bl      MterpSetUpHotnessCountdown
-    mov     rPROFILE, r0                @ Starting hotness countdown to rPROFILE
-
     /* start executing the instruction at rPC */
     FETCH_INST                          @ load rINST from rPC
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -1109,8 +1103,35 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    sbfx    rINST, rINST, #8, #8           @ rINST<- ssssssAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+    /* tuning: use sbfx for 6t2+ targets */
+#if MTERP_PROFILE_BRANCHES
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    mov     r0, rINST, lsl #16          @ r0<- AAxx0000
+    movs    rINST, r0, asr #24          @ rINST<- ssssssAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r2, rINST, rINST            @ r2<- byte offset, set flags
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+       @ If backwards branch refresh rIBASE
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1123,8 +1144,30 @@
      * double to get a byte offset.
      */
     /* goto/16 +AAAA */
+#if MTERP_PROFILE_BRANCHES
     FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH_S rINST, 1                    @ rINST<- ssssAAAA (sign-extended)
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1142,10 +1185,34 @@
      * offset to byte offset.
      */
     /* goto/32 +AAAAAAAA */
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- aaaa (lo)
-    FETCH r3, 2                         @ r1<- AAAA (hi)
-    orrs    rINST, r0, r3, lsl #16      @ rINST<- AAAAaaaa
-    b       MterpCommonTakenBranch
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- aaaa (lo)
+    FETCH r1, 2                         @ r1<- AAAA (hi)
+    orr     rINST, r0, r1, lsl #16      @ rINST<- AAAAaaaa
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1161,6 +1228,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1168,8 +1236,36 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
-    movs    rINST, r0
-    b       MterpCommonTakenBranch
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoPackedSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -1186,6 +1282,7 @@
      * for: packed-switch, sparse-switch
      */
     /* op vAA, +BBBB */
+#if MTERP_PROFILE_BRANCHES
     FETCH r0, 1                         @ r0<- bbbb (lo)
     FETCH r1, 2                         @ r1<- BBBB (hi)
     mov     r3, rINST, lsr #8           @ r3<- AA
@@ -1193,8 +1290,36 @@
     GET_VREG r1, r3                     @ r1<- vAA
     add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
-    movs    rINST, r0
-    b       MterpCommonTakenBranch
+    mov     rINST, r0
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#else
+    FETCH r0, 1                         @ r0<- bbbb (lo)
+    FETCH r1, 2                         @ r1<- BBBB (hi)
+    mov     r3, rINST, lsr #8           @ r3<- AA
+    orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
+    GET_VREG r1, r3                     @ r1<- vAA
+    add     r0, rPC, r0, lsl #1         @ r0<- PC + BBBBbbbb*2
+    bl      MterpDoSparseSwitch                       @ r0<- code-unit branch offset
+    mov     rINST, r0
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    adds    r1, rINST, rINST            @ r1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    ble     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+#endif
 
 
 /* ------------------------------ */
@@ -1360,6 +1485,22 @@
     /*
      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
      * register based on the results of the comparison.
+     *
+     * We load the full values with LDM, but in practice many values could
+     * be resolved by only looking at the high word.  This could be made
+     * faster or slower by splitting the LDM into a pair of LDRs.
+     *
+     * If we just wanted to set condition flags, we could do this:
+     *  subs    ip, r0, r2
+     *  sbcs    ip, r1, r3
+     *  subeqs  ip, r0, r2
+     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
+     * integer value, which we can do with 2 conditional mov/mvn instructions
+     * (set 1, set -1; if they're equal we already have 0 in ip), giving
+     * us a constant 5-cycle path plus a branch at the end to the
+     * instruction epilogue code.  The multi-compare approach below needs
+     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
+     * in the worst case (the 64-bit values are equal).
      */
     /* cmp-long vAA, vBB, vCC */
     FETCH r0, 1                         @ r0<- CCBB
@@ -1370,16 +1511,13 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    cmp     r0, r2
-    sbcs    ip, r1, r3                  @ Sets correct CCs for checking LT (but not EQ/NE)
-    mov     ip, #0
-    mvnlt   ip, #0                      @ -1
-    cmpeq   r0, r2                      @ For correct EQ/NE, we may need to repeat the first CMP
-    orrne   ip, #1
-    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    SET_VREG ip, r9                     @ vAA<- ip
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
+    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
+    blt     .Lop_cmp_long_less            @ signed compare on high part
+    bgt     .Lop_cmp_long_greater
+    subs    r1, r0, r2                  @ r1<- r0 - r2
+    bhi     .Lop_cmp_long_greater         @ unsigned compare on low part
+    bne     .Lop_cmp_long_less
+    b       .Lop_cmp_long_finish          @ equal; r1 already holds 0
 
 /* ------------------------------ */
     .balign 128
@@ -1387,8 +1525,9 @@
 /* File: arm/op_if_eq.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1396,13 +1535,24 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    beq MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    movne rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1413,8 +1563,9 @@
 /* File: arm/op_if_ne.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1422,13 +1573,24 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    bne MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    moveq rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1439,8 +1601,9 @@
 /* File: arm/op_if_lt.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1448,13 +1611,24 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    blt MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    movge rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1465,8 +1639,9 @@
 /* File: arm/op_if_ge.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1474,13 +1649,24 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    bge MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    movlt rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1491,8 +1677,9 @@
 /* File: arm/op_if_gt.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1500,13 +1687,24 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    bgt MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    movle rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1517,8 +1715,9 @@
 /* File: arm/op_if_le.S */
 /* File: arm/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1526,13 +1725,24 @@
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r0, rINST, #8, #4           @ r0<- A
     GET_VREG r3, r1                     @ r3<- vB
-    GET_VREG r0, r0                     @ r0<- vA
+    GET_VREG r2, r0                     @ r2<- vA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, r3                      @ compare (vA, vB)
-    ble MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    cmp     r2, r3                      @ compare (vA, vB)
+    movgt rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r2, rINST, rINST            @ convert to bytes, check sign
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1543,20 +1753,32 @@
 /* File: arm/op_if_eqz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    beq MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movne rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1567,20 +1789,32 @@
 /* File: arm/op_if_nez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    bne MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    moveq rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1591,20 +1825,32 @@
 /* File: arm/op_if_ltz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    blt MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movge rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1615,20 +1861,32 @@
 /* File: arm/op_if_gez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    bge MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movlt rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1639,20 +1897,32 @@
 /* File: arm/op_if_gtz.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    bgt MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movle rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -1663,20 +1933,32 @@
 /* File: arm/op_if_lez.S */
 /* File: arm/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     mov     r0, rINST, lsr #8           @ r0<- AA
-    GET_VREG r0, r0                     @ r0<- vAA
+    GET_VREG r2, r0                     @ r2<- vAA
     FETCH_S rINST, 1                    @ rINST<- branch offset, in code units
-    cmp     r0, #0                      @ compare (vA, 0)
-    ble MterpCommonTakenBranchNoFlags
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    beq     .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
+    cmp     r2, #0                      @ compare (vA, 0)
+    movgt rINST, #2
+#if MTERP_PROFILE_BRANCHES
+    @ TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     r0, rSELF
+    add     r1, rFP, #OFF_FP_SHADOWFRAME
+    mov     r2, rINST
+    bl      MterpProfileBranch          @ (self, shadow_frame, offset)
+    cmp     r0, #0
+    bne     MterpOnStackReplacement     @ Note: offset must be in rINST
+#endif
+    adds    r1, rINST, rINST            @ convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB r1            @ update rPC, load rINST
+    bmi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -4429,15 +4711,15 @@
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
-    mul     ip, r2, r1                  @ ip<- ZxW
-    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
-    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
-    add     r2, r2, lr                  @ r2<- lr + low(ZxW + (YxX))
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
     VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2 }                @ vAA/vAA+1<- r1/r2
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -5595,14 +5877,14 @@
     VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
-    mul     ip, r2, r1                  @ ip<- ZxW
-    umull   r1, lr, r2, r0              @ r1/lr <- ZxX
-    mla     r2, r0, r3, ip              @ r2<- YxX + (ZxW)
+    mul     ip, r2, r1                  @  ip<- ZxW
+    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
+    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST                   @ r0<- &fp[A] (free up rINST)
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    add     r2, r2, lr                  @ r2<- r2 + low(ZxW + (YxX))
+    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
     GET_INST_OPCODE ip                  @ extract opcode from rINST
-    stmia   r0, {r1-r2}                 @ vAA/vAA+1<- r1/r2
+    stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
     GOTO_OPCODE ip                      @ jump to next instruction
 
 /* ------------------------------ */
@@ -7334,6 +7616,27 @@
     .balign 4
 artMterpAsmSisterStart:
 
+/* continuation for op_cmp_long */
+
+.Lop_cmp_long_less:
+    mvn     r1, #0                      @ r1<- -1
+    @ Want to cond code the next mov so we can avoid branch, but don't see it;
+    @ instead, we just replicate the tail end.
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
+.Lop_cmp_long_greater:
+    mov     r1, #1                      @ r1<- 1
+    @ fall through to _finish
+
+.Lop_cmp_long_finish:
+    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
+    SET_VREG r1, r9                     @ vAA<- r1
+    GET_INST_OPCODE ip                  @ extract opcode from rINST
+    GOTO_OPCODE ip                      @ jump to next instruction
+
 /* continuation for op_float_to_long */
 /*
  * Convert the float in r0 to a long in r0/r1.
@@ -11904,117 +12207,21 @@
     /* NOTE: no fallthrough */
 
 /*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    rINST          <= signed offset
- *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
  */
-MterpCommonTakenBranchNoFlags:
-    cmp     rINST, #0
-MterpCommonTakenBranch:
-    bgt     .L_forward_branch           @ don't add forward branches to hotness
-/*
- * We need to subtract 1 from positive values and we should not see 0 here,
- * so we may use the result of the comparison with -1.
- */
-#if JIT_CHECK_OSR != -1
-#  error "JIT_CHECK_OSR must be -1."
-#endif
-    cmp     rPROFILE, #JIT_CHECK_OSR
-    beq     .L_osr_check
-    subgts  rPROFILE, #1
-    beq     .L_add_batch                @ counted down to zero - report
-.L_resume_backward_branch:
-    ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
-    REFRESH_IBASE
-    add     r2, rINST, rINST            @ r2<- byte offset
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
+MterpCheckSuspendAndContinue:
+    ldr     rIBASE, [rSELF, #THREAD_CURRENT_IBASE_OFFSET]  @ refresh rIBASE
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    bne     .L_suspend_request_pending
+    bne     1f
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
-
-.L_suspend_request_pending:
+1:
     EXPORT_PC
     mov     r0, rSELF
     bl      MterpSuspendCheck           @ (self)
     cmp     r0, #0
     bne     MterpFallback
-    REFRESH_IBASE                       @ might have changed during suspend
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.L_no_count_backwards:
-    cmp     rPROFILE, #JIT_CHECK_OSR    @ possible OSR re-entry?
-    bne     .L_resume_backward_branch
-.L_osr_check:
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement
-    b       .L_resume_backward_branch
-
-.L_forward_branch:
-    cmp     rPROFILE, #JIT_CHECK_OSR @ possible OSR re-entry?
-    beq     .L_check_osr_forward
-.L_resume_forward_branch:
-    add     r2, rINST, rINST            @ r2<- byte offset
-    FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    GET_INST_OPCODE ip                  @ extract opcode from rINST
-    GOTO_OPCODE ip                      @ jump to next instruction
-
-.L_check_osr_forward:
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement
-    b       .L_resume_forward_branch
-
-.L_add_batch:
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    ldr     r0, [rFP, #OFF_FP_METHOD]
-    mov     r2, rSELF
-    bl      MterpAddHotnessBatch        @ (method, shadow_frame, self)
-    mov     rPROFILE, r0                @ restore new hotness countdown to rPROFILE
-    b       .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    mov     r0, rSELF
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, #2
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  @ (self, shadow_frame, offset)
-    cmp     r0, #0
-    bne     MterpOnStackReplacement
-    FETCH_ADVANCE_INST 2
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
 
@@ -12062,27 +12269,9 @@
     str     r1, [r2, #4]
     mov     r0, #1                                  @ signal return to caller.
 MterpDone:
-/*
- * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmp     rPROFILE, #0
-    bgt     MterpProfileActive                      @ if > 0, we may have some counts to report.
-    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
+    add     sp, sp, #4                              @ un-align 64
+    ldmfd   sp!, {r4-r10,fp,pc}                     @ restore 9 regs and return
 
-MterpProfileActive:
-    mov     rINST, r0                               @ stash return value
-    /* Report cached hotness counts */
-    ldr     r0, [rFP, #OFF_FP_METHOD]
-    add     r1, rFP, #OFF_FP_SHADOWFRAME
-    mov     r2, rSELF
-    strh    rPROFILE, [r1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    bl      MterpAddHotnessBatch                    @ (method, shadow_frame, self)
-    mov     r0, rINST                               @ restore return value
-    ldmfd   sp!, {r3-r10,fp,pc}                     @ restore 10 regs and return
 
     .fnend
     .size   ExecuteMterpImpl, .-ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 55797e6..6ae59d8 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -81,7 +81,6 @@
   x23  xINST     first 16-bit code unit of current instruction
   x24  xIBASE    interpreted instruction base pointer, used for computed goto
   x25  xREFS     base of object references in shadow frame  (ideally, we'll get rid of this later).
-  x26  wPROFILE  jit profile hotness countdown
   x16  ip        scratch reg
   x17  ip2       scratch reg (used by macros)
 
@@ -100,17 +99,15 @@
 
 /* During bringup, we'll use the shadow frame model instead of xFP */
 /* single-purpose registers, given names for clarity */
-#define xPC      x20
-#define xFP      x21
-#define xSELF    x22
-#define xINST    x23
-#define wINST    w23
-#define xIBASE   x24
-#define xREFS    x25
-#define wPROFILE w26
-#define xPROFILE x26
-#define ip       x16
-#define ip2      x17
+#define xPC     x20
+#define xFP     x21
+#define xSELF   x22
+#define xINST   x23
+#define wINST   w23
+#define xIBASE  x24
+#define xREFS   x25
+#define ip      x16
+#define ip2     x17
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep xFP at the base of the vregs.  So,
@@ -124,7 +121,7 @@
 #define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
 #define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
 #define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_SHADOWFRAME OFF_FP(0)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
 
 /*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
@@ -326,12 +323,11 @@
 
 ExecuteMterpImpl:
     .cfi_startproc
-    stp     xPROFILE, x27, [sp, #-80]!
-    stp     xIBASE, xREFS, [sp, #16]
-    stp     xSELF, xINST, [sp, #32]
-    stp     xPC, xFP, [sp, #48]
-    stp     fp, lr, [sp, #64]
-    add     fp, sp, #64
+    stp     xIBASE, xREFS, [sp, #-64]!
+    stp     xSELF, xINST, [sp, #16]
+    stp     xPC, xFP, [sp, #32]
+    stp     fp, lr, [sp, #48]
+    add     fp, sp, #48
 
     /* Remember the return register */
     str     x3, [x2, #SHADOWFRAME_RESULT_REGISTER_OFFSET]
@@ -352,12 +348,6 @@
     /* Starting ibase */
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]
 
-    /* Set up for backwards branches & osr profiling */
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    bl      MterpSetUpHotnessCountdown
-    mov     wPROFILE, w0                // Starting hotness countdown to xPROFILE
-
     /* start executing the instruction at rPC */
     FETCH_INST                          // load wINST from rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -1091,8 +1081,24 @@
      * double to get a byte offset.
      */
     /* goto +AA */
-    sbfx    wINST, wINST, #8, #8           // wINST<- ssssssAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+    /* tuning: use sbfx for 6t2+ targets */
+    lsl     w0, wINST, #16              // w0<- AAxx0000
+    asr     wINST, w0, #24              // wINST<- ssssssAA (sign-extended)
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]  // Preload flags for MterpCheckSuspendAndContinue
+    adds    w1, wINST, wINST            // Convert dalvik offset to byte offset, setting flags
+    FETCH_ADVANCE_INST_RB w1            // load wINST and advance xPC
+       // If backwards branch refresh rIBASE
+    b.mi     MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -1106,7 +1112,20 @@
      */
     /* goto/16 +AAAA */
     FETCH_S wINST, 1                    // wINST<- ssssAAAA (sign-extended)
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset, flags set
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load rINST
+    b.mi    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from rINST
+    GOTO_OPCODE ip                      // jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -1127,7 +1146,20 @@
     FETCH w0, 1                         // w0<- aaaa (lo)
     FETCH w1, 2                         // w1<- AAAA (hi)
     orr     wINST, w0, w1, lsl #16      // wINST<- AAAAaaaa
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load xINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from xINST
+    GOTO_OPCODE ip                      // jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -1151,7 +1183,20 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoPackedSwitch                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -1176,7 +1221,20 @@
     add     x0, xPC, w0, lsl #1         // w0<- PC + BBBBbbbb*2
     bl      MterpDoSparseSwitch                       // w0<- code-unit branch offset
     sbfm    xINST, x0, 0, 31
-    b       MterpCommonTakenBranchNoFlags
+#if MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    mov     x2, xINST
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w1, wINST, wINST            // w1<- byte offset; clear V
+    FETCH_ADVANCE_INST_RB w1            // update rPC, load wINST
+    b.le    MterpCheckSuspendAndContinue
+    GET_INST_OPCODE ip                  // extract opcode from wINST
+    GOTO_OPCODE ip                      // jump to next instruction
 
 
 /* ------------------------------ */
@@ -1307,8 +1365,9 @@
 /* File: arm64/op_if_eq.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1317,12 +1376,23 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.eq MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1333,8 +1403,9 @@
 /* File: arm64/op_if_ne.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1343,12 +1414,23 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.ne MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1359,8 +1441,9 @@
 /* File: arm64/op_if_lt.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1369,12 +1452,23 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.lt MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1385,8 +1479,9 @@
 /* File: arm64/op_if_ge.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1395,12 +1490,23 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.ge MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1411,8 +1517,9 @@
 /* File: arm64/op_if_gt.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1421,12 +1528,23 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.gt MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1437,8 +1555,9 @@
 /* File: arm64/op_if_le.S */
 /* File: arm64/bincmp.S */
     /*
-     * Generic two-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic two-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
      */
@@ -1447,12 +1566,23 @@
     ubfx    w0, wINST, #8, #4           // w0<- A
     GET_VREG w3, w1                     // w3<- vB
     GET_VREG w2, w0                     // w2<- vA
-    FETCH_S wINST, 1                    // wINST<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Offset if branch not taken
     cmp     w2, w3                      // compare (vA, vB)
-    b.le MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg.
+#if MTERP_PROFILE_BRANCHES
+    // TUINING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31            // Sign extend branch offset
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in xINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes, check sign
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi     MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1463,20 +1593,32 @@
 /* File: arm64/op_if_eqz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.eq MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, eq // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1487,20 +1629,32 @@
 /* File: arm64/op_if_nez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.ne MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, ne // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1511,20 +1665,32 @@
 /* File: arm64/op_if_ltz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.lt MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, lt // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1535,20 +1701,32 @@
 /* File: arm64/op_if_gez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.ge MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, ge // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1559,20 +1737,32 @@
 /* File: arm64/op_if_gtz.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.gt MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, gt // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -1583,20 +1773,32 @@
 /* File: arm64/op_if_lez.S */
 /* File: arm64/zcmp.S */
     /*
-     * Generic one-operand compare-and-branch operation.  Provide a "condition"
-     * fragment that specifies the comparison to perform.
+     * Generic one-operand compare-and-branch operation.  Provide a "revcmp"
+     * fragment that specifies the *reverse* comparison to perform, e.g.
+     * for "if-le" you would use "gt".
      *
      * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
      */
     /* if-cmp vAA, +BBBB */
     lsr     w0, wINST, #8               // w0<- AA
     GET_VREG w2, w0                     // w2<- vAA
-    FETCH_S wINST, 1                    // w1<- branch offset, in code units
+    FETCH_S w1, 1                       // w1<- branch offset, in code units
+    mov     w0, #2                      // Branch offset if not taken
     cmp     w2, #0                      // compare (vA, 0)
-    b.le MterpCommonTakenBranchNoFlags
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_not_taken_osr
-    FETCH_ADVANCE_INST 2
+    csel    wINST, w1, w0, le // Branch if true, stashing result in callee save reg
+#if MTERP_PROFILE_BRANCHES
+    // TUNING: once measurements are complete, remove #if and hand-schedule.
+    EXPORT_PC
+    mov     x0, xSELF
+    add     x1, xFP, #OFF_FP_SHADOWFRAME
+    sbfm    x2, xINST, 0, 31
+    bl      MterpProfileBranch          // (self, shadow_frame, offset)
+    cbnz    w0, MterpOnStackReplacement // Note: offset must be in wINST
+#endif
+    ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
+    adds    w2, wINST, wINST            // convert to bytes & set flags
+    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
+    b.mi    MterpCheckSuspendAndContinue
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
 
@@ -11394,107 +11596,6 @@
     GET_INST_OPCODE ip
     GOTO_OPCODE ip
     /* NOTE: no fallthrough */
-/*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    wINST          <= signed offset
- *    wPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
- */
-MterpCommonTakenBranchNoFlags:
-    cmp     wINST, #0
-    b.gt    .L_forward_branch           // don't add forward branches to hotness
-    tbnz    wPROFILE, #31, .L_no_count_backwards  // go if negative
-    subs    wPROFILE, wPROFILE, #1      // countdown
-    b.eq    .L_add_batch                // counted down to zero - report
-.L_resume_backward_branch:
-    ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
-    b.ne    .L_suspend_request_pending
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_suspend_request_pending:
-    EXPORT_PC
-    mov     x0, xSELF
-    bl      MterpSuspendCheck           // (self)
-    cbnz    x0, MterpFallback
-    REFRESH_IBASE                       // might have changed during suspend
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_no_count_backwards:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.ne    .L_resume_backward_branch
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_backward_branch
-
-.L_forward_branch:
-    cmp     wPROFILE, #JIT_CHECK_OSR    // possible OSR re-entry?
-    b.eq    .L_check_osr_forward
-.L_resume_forward_branch:
-    add     w2, wINST, wINST            // w2<- byte offset
-    FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
-.L_check_osr_forward:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xINST
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    b       .L_resume_forward_branch
-
-.L_add_batch:
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    mov     x2, xSELF
-    bl      MterpAddHotnessBatch        // (method, shadow_frame, self)
-    mov     wPROFILE, w0                // restore new hotness countdown to wPROFILE
-    b       .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    mov     x0, xSELF
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, #2
-    EXPORT_PC
-    bl      MterpMaybeDoOnStackReplacement  // (self, shadow_frame, offset)
-    cbnz    x0, MterpOnStackReplacement
-    FETCH_ADVANCE_INST 2
-    GET_INST_OPCODE ip                  // extract opcode from wINST
-    GOTO_OPCODE ip                      // jump to next instruction
-
 
 /*
  * Check for suspend check request.  Assumes wINST already loaded, xPC advanced and
@@ -11563,36 +11664,10 @@
 check2:
     mov     x0, #1                                  // signal return to caller.
 MterpDone:
-/*
- * At this point, we expect wPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending wPROFILE and the cached hotness counter).  wPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmp     wPROFILE, #0
-    bgt     MterpProfileActive                      // if > 0, we may have some counts to report.
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
-    ret
-
-MterpProfileActive:
-    mov     xINST, x0                               // stash return value
-    /* Report cached hotness counts */
-    ldr     x0, [xFP, #OFF_FP_METHOD]
-    add     x1, xFP, #OFF_FP_SHADOWFRAME
-    mov     x2, xSELF
-    strh    wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET]
-    bl      MterpAddHotnessBatch                    // (method, shadow_frame, self)
-    mov     x0, xINST                               // restore return value
-    ldp     fp, lr, [sp, #64]
-    ldp     xPC, xFP, [sp, #48]
-    ldp     xSELF, xINST, [sp, #32]
-    ldp     xIBASE, xREFS, [sp, #16]
-    ldp     xPROFILE, x27, [sp], #80
+    ldp     fp, lr, [sp, #48]
+    ldp     xPC, xFP, [sp, #32]
+    ldp     xSELF, xINST, [sp, #16]
+    ldp     xIBASE, xREFS, [sp], #64
     ret
 
     .cfi_endproc
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 685b9b6..ebac5fc 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -124,21 +124,6 @@
     .cfi_restore \_reg
 .endm
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
-#define OFF_FP_SHADOWFRAME OFF_FP(0)
-
 /* Frame size must be 16-byte aligned.
  * Remember about 4 bytes for return address + 4 * 4 for spills
  */
@@ -170,11 +155,43 @@
 #define rINSTbl  %bl
 #define rIBASE   %edx
 #define rREFS    %ebp
-#define rPROFILE OFF_FP_COUNTDOWN_OFFSET(rFP)
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
+ * Profile branch. rINST should contain the offset. %eax is scratch.
+ */
+.macro MTERP_PROFILE_BRANCH
+#ifdef MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpProfileBranch)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    RESTORE_IBASE
+#endif
+.endm
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -382,13 +399,6 @@
     lea     (rPC, %eax, 2), rPC
     EXPORT_PC
 
-    /* Set up for backwards branches & osr profiling */
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    call    SYMBOL(MterpSetUpHotnessCountdown)
-
     /* Starting ibase */
     REFRESH_IBASE
 
@@ -1089,8 +1099,12 @@
  */
     /* goto +AA */
     movsbl  rINSTbl, rINST                  # rINST <- ssssssAA
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # rINST <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle      MterpCheckSuspendAndContinue   # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1104,8 +1118,12 @@
  */
     /* goto/16 +AAAA */
     movswl  2(rPC), rINST                   # rINST <- ssssAAAA
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # rINST <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1124,8 +1142,12 @@
  */
     /* goto/32 +AAAAAAAA */
     movl    2(rPC), rINST                   # rINST <- AAAAAAAA
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # rINST <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1147,10 +1169,14 @@
     movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
     movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
     call    SYMBOL(MterpDoPackedSwitch)
-    REFRESH_IBASE
-    testl   %eax, %eax
     movl    %eax, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jle     MterpCheckSuspendAndContinue
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1173,10 +1199,14 @@
     movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
     movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
     call    SYMBOL(MterpDoSparseSwitch)
-    REFRESH_IBASE
-    testl   %eax, %eax
     movl    %eax, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jle     MterpCheckSuspendAndContinue
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1393,14 +1423,16 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, rINST
     jne   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1421,14 +1453,16 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, rINST
     je   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1449,14 +1483,16 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, rINST
     jge   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1477,14 +1513,16 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, rINST
     jl   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1505,14 +1543,16 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, rINST
     jle   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1533,14 +1573,16 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $2, rINST
     jg   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1557,14 +1599,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, rINST
     jne   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1581,14 +1625,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, rINST
     je   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1605,14 +1651,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, rINST
     jge   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1629,14 +1677,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, rINST
     jl   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1653,14 +1703,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, rINST
     jle   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1677,14 +1729,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $2, rINST
     jg   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -12882,120 +12936,20 @@
     /* NOTE: no fallthrough */
 
 /*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    rINST          <= signed offset
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
  */
-MterpCommonTakenBranch:
-    jg      .L_forward_branch               # don't add forward branches to hotness
-/*
- * We need to subtract 1 from positive values and we should not see 0 here,
- * so we may use the result of the comparison with -1.
- */
-#if JIT_CHECK_OSR != -1
-#  error "JIT_CHECK_OSR must be -1."
-#endif
-    cmpw    $JIT_CHECK_OSR, rPROFILE
-    je      .L_osr_check
-    decw    rPROFILE
-    je      .L_add_batch                    # counted down to zero - report
-.L_resume_backward_branch:
+MterpCheckSuspendAndContinue:
     movl    rSELF, %eax
+    EXPORT_PC
     testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
-    leal    (rPC, rINST, 2), rPC
-    FETCH_INST
-    jnz     .L_suspend_request_pending
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
     REFRESH_IBASE
+1:
     GOTO_NEXT
 
-.L_suspend_request_pending:
-    EXPORT_PC
-    movl    %eax, OUT_ARG0(%esp)            # rSELF in eax
-    call    SYMBOL(MterpSuspendCheck)       # (self)
-    testb   %al, %al
-    jnz     MterpFallback
-    REFRESH_IBASE                           # might have changed during suspend
-    GOTO_NEXT
-
-.L_no_count_backwards:
-    cmpw    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    jne     .L_resume_backward_branch
-.L_osr_check:
-    EXPORT_PC
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG2(%esp)
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jz      .L_resume_backward_branch
-    jmp     MterpOnStackReplacement
-
-.L_forward_branch:
-    cmpw    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    je      .L_check_osr_forward
-.L_resume_forward_branch:
-    leal    (rPC, rINST, 2), rPC
-    FETCH_INST
-    GOTO_NEXT
-
-.L_check_osr_forward:
-    EXPORT_PC
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG2(%esp)
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    REFRESH_IBASE
-    jz      .L_resume_forward_branch
-    jmp     MterpOnStackReplacement
-
-.L_add_batch:
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG2(%esp)
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    jmp     .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG3(%esp)
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    REFRESH_IBASE
-    jnz     MterpOnStackReplacement
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -13040,29 +12994,7 @@
     movl    %ecx, 4(%edx)
     mov     $1, %eax
 MterpDone:
-/*
- * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmpw    $0, rPROFILE
-    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
-
-    movl    %eax, rINST                     # stash return value
-    /* Report cached hotness counts */
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG2(%esp)
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    movl    rINST, %eax                     # restore return value
-
     /* pop up frame */
-MRestoreFrame:
     addl    $FRAME_SIZE, %esp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index f78bcf0..a1360e0 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -120,21 +120,6 @@
     .cfi_restore \_reg
 .endm
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
 /* Frame size must be 16-byte aligned.
  * Remember about 8 bytes for return address + 6 * 8 for spills.
  */
@@ -145,8 +130,6 @@
 #define IN_ARG2        %rdx
 #define IN_ARG1        %rsi
 #define IN_ARG0        %rdi
-/* Spill offsets relative to %esp */
-#define SELF_SPILL     (FRAME_SIZE -  8)
 /* Out Args  */
 #define OUT_ARG3       %rcx
 #define OUT_ARG2       %rdx
@@ -161,7 +144,7 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rSELF    SELF_SPILL(%rsp)
+#define rSELF    %rbp
 #define rPC      %r12
 #define rFP      %r13
 #define rINST    %ebx
@@ -171,11 +154,40 @@
 #define rINSTbl  %bl
 #define rIBASE   %r14
 #define rREFS    %r15
-#define rPROFILE %ebp
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
+ * Profile branch. rINST should contain the offset. %eax is scratch.
+ */
+.macro MTERP_PROFILE_BRANCH
+#ifdef MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    rINST, OUT_32_ARG2
+    call    SYMBOL(MterpProfileBranch)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+#endif
+.endm
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -199,8 +211,7 @@
  *
  */
 .macro REFRESH_IBASE
-    movq    rSELF, rIBASE
-    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rSELF), rIBASE
 .endm
 
 /*
@@ -366,12 +377,6 @@
     movq    IN_ARG0, rSELF
     REFRESH_IBASE
 
-    /* Set up for backwards branches & osr profiling */
-    movq    OFF_FP_METHOD(rFP), OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpSetUpHotnessCountdown)
-    movswl  %ax, rPROFILE
-
     /* start executing the instruction at rPC */
     FETCH_INST
     GOTO_NEXT
@@ -574,10 +579,9 @@
 .L_op_move_exception: /* 0x0d */
 /* File: x86_64/op_move_exception.S */
     /* move-exception vAA */
-    movq    rSELF, %rcx
-    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
+    movl    THREAD_EXCEPTION_OFFSET(rSELF), %eax
     SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
-    movl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    movl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
 
 /* ------------------------------ */
@@ -586,9 +590,9 @@
 /* File: x86_64/op_return_void.S */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
@@ -606,9 +610,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
@@ -624,9 +628,9 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
@@ -645,9 +649,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
@@ -850,8 +854,7 @@
     movq    rSELF, OUT_ARG3
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movsbl  %al, %eax
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     andb    $0xf, rINSTbl                  # rINSTbl <- A
     SET_VREG %eax, rINSTq
@@ -985,8 +988,7 @@
     GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
     testb   %al, %al
     jz      common_errNullObject
-    movq    rSELF, %rcx
-    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
+    movq    %rax, THREAD_EXCEPTION_OFFSET(rSELF)
     jmp     MterpException
 
 /* ------------------------------ */
@@ -1001,8 +1003,12 @@
  */
     /* goto +AA */
     movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1016,8 +1022,12 @@
  */
     /* goto/16 +AAAA */
     movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1034,8 +1044,12 @@
  */
     /* goto/32 +AAAAAAAA */
     movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1055,9 +1069,13 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoPackedSwitch)
-    testl   %eax, %eax
     movslq  %eax, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue
+    GOTO_NEXT
 
 /* ------------------------------ */
     .balign 128
@@ -1078,9 +1096,13 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL(MterpDoSparseSwitch)
-    testl   %eax, %eax
     movslq  %eax, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1287,14 +1309,16 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $2, rINST                      # assume not taken
     jne   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1315,14 +1339,16 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $2, rINST                      # assume not taken
     je   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1343,14 +1369,16 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $2, rINST                      # assume not taken
     jge   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1371,14 +1399,16 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $2, rINST                      # assume not taken
     jl   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1399,14 +1429,16 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $2, rINST                      # assume not taken
     jle   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1427,14 +1459,16 @@
     andb    $0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $2, rINST                      # assume not taken
     jg   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1451,14 +1485,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $2, rINST                      # assume branch not taken
     jne   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1475,14 +1511,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $2, rINST                      # assume branch not taken
     je   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1499,14 +1537,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $2, rINST                      # assume branch not taken
     jge   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1523,14 +1563,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $2, rINST                      # assume branch not taken
     jl   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1547,14 +1589,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $2, rINST                      # assume branch not taken
     jle   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1571,14 +1615,16 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $2, rINST                      # assume branch not taken
     jg   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
 
 
 /* ------------------------------ */
@@ -1721,8 +1767,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
     EXPORT_PC
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2054,8 +2099,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGet32InstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2087,8 +2131,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGet64InstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2121,8 +2164,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetObjInstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 1
@@ -2155,8 +2197,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetBooleanInstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2189,8 +2230,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetByteInstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2223,8 +2263,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetCharInstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2257,8 +2296,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL(artGetShortInstanceFromCode)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     .if 0
@@ -2451,8 +2489,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGet32StaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2482,8 +2519,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGet64StaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2514,8 +2550,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetObjStaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 1
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2546,8 +2581,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetBooleanStaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2578,8 +2612,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetByteStaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2610,8 +2643,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetCharStaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2642,8 +2674,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL(artGetShortStaticFromCode)
-    movq    rSELF, %rcx
-    cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if 0
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -2971,9 +3002,9 @@
     .balign 128
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86_64/op_return_void_no_barrier.S */
-    movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
@@ -5681,8 +5712,7 @@
     movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
     EXPORT_PC
     callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $0xf, rINSTbl                  # rINST <- A
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
@@ -11819,7 +11849,7 @@
 #if MTERP_LOGGING
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
+    movl    THREAD_FLAGS_OFFSET(rSELF), OUT_32_ARG2
     call    SYMBOL(MterpLogSuspendFallback)
 #endif
     jmp     MterpCommonFallback
@@ -11830,8 +11860,7 @@
  * interpreter.
  */
 MterpPossibleException:
-    movq    rSELF, %rcx
-    cmpq    $0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $0, THREAD_EXCEPTION_OFFSET(rSELF)
     jz      MterpFallback
     /* intentional fallthrough - handle pending exception. */
 
@@ -11862,113 +11891,19 @@
     /* NOTE: no fallthrough */
 
 /*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    rINST          <= signed offset
- *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
  */
-MterpCommonTakenBranch:
-    jg      .L_forward_branch               # don't add forward branches to hotness
-/*
- * We need to subtract 1 from positive values and we should not see 0 here,
- * so we may use the result of the comparison with -1.
- */
-#if JIT_CHECK_OSR != -1
-#  error "JIT_CHECK_OSR must be -1."
-#endif
-    cmpl    $JIT_CHECK_OSR, rPROFILE
-    je      .L_osr_check
-    decl    rPROFILE
-    je      .L_add_batch                    # counted down to zero - report
-.L_resume_backward_branch:
-    movq    rSELF, %rax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+MterpCheckSuspendAndContinue:
     REFRESH_IBASE
-    leaq    (rPC, rINSTq, 2), rPC
-    FETCH_INST
-    jnz     .L_suspend_request_pending
-    GOTO_NEXT
-
-.L_suspend_request_pending:
+    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
+    jz      1f
     EXPORT_PC
     movq    rSELF, OUT_ARG0
-    call    SYMBOL(MterpSuspendCheck)       # (self)
-    testb   %al, %al
-    jnz     MterpFallback
-    REFRESH_IBASE                           # might have changed during suspend
+    call    SYMBOL(MterpSuspendCheck)
+1:
     GOTO_NEXT
 
-.L_no_count_backwards:
-    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    jne     .L_resume_backward_branch
-.L_osr_check:
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movq    rINSTq, OUT_ARG2
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jz      .L_resume_backward_branch
-    jmp     MterpOnStackReplacement
-
-.L_forward_branch:
-    cmpl    $JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    je      .L_check_osr_forward
-.L_resume_forward_branch:
-    leaq    (rPC, rINSTq, 2), rPC
-    FETCH_INST
-    GOTO_NEXT
-
-.L_check_osr_forward:
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movq    rINSTq, OUT_ARG2
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jz      .L_resume_forward_branch
-    jmp     MterpOnStackReplacement
-
-.L_add_batch:
-    movl    rPROFILE, %eax
-    movq    OFF_FP_METHOD(rFP), OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
-    movq    rSELF, OUT_ARG2
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    movswl  %ax, rPROFILE
-    jmp     .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movq    rINSTq, OUT_ARG3
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -12008,28 +11943,7 @@
     movq    %rax, (%rdx)
     movl    $1, %eax
 MterpDone:
-/*
- * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    testl   rPROFILE, rPROFILE
-    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
-
-    movl    %eax, rINST                     # stash return value
-    /* Report cached hotness counts */
-    movl    rPROFILE, %eax
-    movq    OFF_FP_METHOD(rFP), OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
-    movq    rSELF, OUT_ARG2
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    movl    rINST, %eax                     # restore return value
-
     /* pop up frame */
-MRestoreFrame:
     addq    $FRAME_SIZE, %rsp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86/bincmp.S b/runtime/interpreter/mterp/x86/bincmp.S
index ee32278..c72a5cf 100644
--- a/runtime/interpreter/mterp/x86/bincmp.S
+++ b/runtime/interpreter/mterp/x86/bincmp.S
@@ -11,11 +11,13 @@
     GET_VREG %eax, %ecx                     # eax <- vA
     sarl    $$4, rINST                      # rINST <- B
     cmpl    VREG_ADDRESS(rINST), %eax       # compare (vA, vB)
+    movl    $$2, rINST
     j${revcmp}   1f
     movswl  2(rPC), rINST                   # Get signed branch offset
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $$JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/entry.S b/runtime/interpreter/mterp/x86/entry.S
index 384dd9a..785efdc 100644
--- a/runtime/interpreter/mterp/x86/entry.S
+++ b/runtime/interpreter/mterp/x86/entry.S
@@ -64,13 +64,6 @@
     lea     (rPC, %eax, 2), rPC
     EXPORT_PC
 
-    /* Set up for backwards branches & osr profiling */
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    call    SYMBOL(MterpSetUpHotnessCountdown)
-
     /* Starting ibase */
     REFRESH_IBASE
 
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index df10ff0..3965ecd 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -131,120 +131,20 @@
     /* NOTE: no fallthrough */
 
 /*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    rINST          <= signed offset
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
  */
-MterpCommonTakenBranch:
-    jg      .L_forward_branch               # don't add forward branches to hotness
-/*
- * We need to subtract 1 from positive values and we should not see 0 here,
- * so we may use the result of the comparison with -1.
- */
-#if JIT_CHECK_OSR != -1
-#  error "JIT_CHECK_OSR must be -1."
-#endif
-    cmpw    $$JIT_CHECK_OSR, rPROFILE
-    je      .L_osr_check
-    decw    rPROFILE
-    je      .L_add_batch                    # counted down to zero - report
-.L_resume_backward_branch:
+MterpCheckSuspendAndContinue:
     movl    rSELF, %eax
+    EXPORT_PC
     testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
-    leal    (rPC, rINST, 2), rPC
-    FETCH_INST
-    jnz     .L_suspend_request_pending
+    jz      1f
+    movl    %eax, OUT_ARG0(%esp)
+    call    SYMBOL(MterpSuspendCheck)
     REFRESH_IBASE
+1:
     GOTO_NEXT
 
-.L_suspend_request_pending:
-    EXPORT_PC
-    movl    %eax, OUT_ARG0(%esp)            # rSELF in eax
-    call    SYMBOL(MterpSuspendCheck)       # (self)
-    testb   %al, %al
-    jnz     MterpFallback
-    REFRESH_IBASE                           # might have changed during suspend
-    GOTO_NEXT
-
-.L_no_count_backwards:
-    cmpw    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    jne     .L_resume_backward_branch
-.L_osr_check:
-    EXPORT_PC
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG2(%esp)
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jz      .L_resume_backward_branch
-    jmp     MterpOnStackReplacement
-
-.L_forward_branch:
-    cmpw    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    je      .L_check_osr_forward
-.L_resume_forward_branch:
-    leal    (rPC, rINST, 2), rPC
-    FETCH_INST
-    GOTO_NEXT
-
-.L_check_osr_forward:
-    EXPORT_PC
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG2(%esp)
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    REFRESH_IBASE
-    jz      .L_resume_forward_branch
-    jmp     MterpOnStackReplacement
-
-.L_add_batch:
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG2(%esp)
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    jmp     .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rINST, OUT_ARG3(%esp)
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    REFRESH_IBASE
-    jnz     MterpOnStackReplacement
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -289,29 +189,7 @@
     movl    %ecx, 4(%edx)
     mov     $$1, %eax
 MterpDone:
-/*
- * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    cmpw    $$0, rPROFILE
-    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
-
-    movl    %eax, rINST                     # stash return value
-    /* Report cached hotness counts */
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG0(%esp)
-    leal    OFF_FP_SHADOWFRAME(rFP), %ecx
-    movl    %ecx, OUT_ARG1(%esp)
-    movl    rSELF, %eax
-    movl    %eax, OUT_ARG2(%esp)
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    movl    rINST, %eax                     # restore return value
-
     /* pop up frame */
-MRestoreFrame:
     addl    $$FRAME_SIZE, %esp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
index 3a2dcb7..5729b90 100644
--- a/runtime/interpreter/mterp/x86/header.S
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -117,21 +117,6 @@
     .cfi_restore \_reg
 .endm
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
-#define OFF_FP_SHADOWFRAME OFF_FP(0)
-
 /* Frame size must be 16-byte aligned.
  * Remember about 4 bytes for return address + 4 * 4 for spills
  */
@@ -163,11 +148,43 @@
 #define rINSTbl  %bl
 #define rIBASE   %edx
 #define rREFS    %ebp
-#define rPROFILE OFF_FP_COUNTDOWN_OFFSET(rFP)
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
+ * Profile branch. rINST should contain the offset. %eax is scratch.
+ */
+.macro MTERP_PROFILE_BRANCH
+#ifdef MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    movl    rSELF, %eax
+    movl    %eax, OUT_ARG0(%esp)
+    leal    OFF_FP_SHADOWFRAME(rFP), %eax
+    movl    %eax, OUT_ARG1(%esp)
+    movl    rINST, OUT_ARG2(%esp)
+    call    SYMBOL(MterpProfileBranch)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+    RESTORE_IBASE
+#endif
+.endm
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
diff --git a/runtime/interpreter/mterp/x86/op_goto.S b/runtime/interpreter/mterp/x86/op_goto.S
index 1827d68..9a87361 100644
--- a/runtime/interpreter/mterp/x86/op_goto.S
+++ b/runtime/interpreter/mterp/x86/op_goto.S
@@ -6,5 +6,9 @@
  */
     /* goto +AA */
     movsbl  rINSTbl, rINST                  # rINST <- ssssssAA
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # rINST <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle      MterpCheckSuspendAndContinue   # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_goto_16.S b/runtime/interpreter/mterp/x86/op_goto_16.S
index ea5ea90..a25c31b 100644
--- a/runtime/interpreter/mterp/x86/op_goto_16.S
+++ b/runtime/interpreter/mterp/x86/op_goto_16.S
@@ -6,5 +6,9 @@
  */
     /* goto/16 +AAAA */
     movswl  2(rPC), rINST                   # rINST <- ssssAAAA
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # rINST <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_goto_32.S b/runtime/interpreter/mterp/x86/op_goto_32.S
index 4becaf3..159128b 100644
--- a/runtime/interpreter/mterp/x86/op_goto_32.S
+++ b/runtime/interpreter/mterp/x86/op_goto_32.S
@@ -11,5 +11,9 @@
  */
     /* goto/32 +AAAAAAAA */
     movl    2(rPC), rINST                   # rINST <- AAAAAAAA
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # rINST <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/op_packed_switch.S b/runtime/interpreter/mterp/x86/op_packed_switch.S
index fcb7509..e33cf75 100644
--- a/runtime/interpreter/mterp/x86/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86/op_packed_switch.S
@@ -15,7 +15,11 @@
     movl    %eax, OUT_ARG1(%esp)            # ARG1 <- vAA
     movl    %ecx, OUT_ARG0(%esp)            # ARG0 <- switchData
     call    SYMBOL($func)
-    REFRESH_IBASE
-    testl   %eax, %eax
     movl    %eax, rINST
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    REFRESH_IBASE
+    jle     MterpCheckSuspendAndContinue
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86/zcmp.S b/runtime/interpreter/mterp/x86/zcmp.S
index c116159..0f28d1a 100644
--- a/runtime/interpreter/mterp/x86/zcmp.S
+++ b/runtime/interpreter/mterp/x86/zcmp.S
@@ -7,11 +7,13 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $$0, VREG_ADDRESS(rINST)        # compare (vA, 0)
+    movl    $$2, rINST
     j${revcmp}   1f
     movswl  2(rPC), rINST                   # fetch signed displacement
-    testl   rINST, rINST
-    jmp     MterpCommonTakenBranch
 1:
-    cmpw    $$JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addl    rINST, rINST                    # eax <- AA * 2
+    leal    (rPC, rINST), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/bincmp.S b/runtime/interpreter/mterp/x86_64/bincmp.S
index 6601483..a16050b 100644
--- a/runtime/interpreter/mterp/x86_64/bincmp.S
+++ b/runtime/interpreter/mterp/x86_64/bincmp.S
@@ -11,11 +11,13 @@
     andb    $$0xf, %cl                      # rcx <- A
     GET_VREG %eax, %rcx                     # eax <- vA
     cmpl    VREG_ADDRESS(rINSTq), %eax      # compare (vA, vB)
+    movl    $$2, rINST                      # assume not taken
     j${revcmp}   1f
     movswq  2(rPC), rINSTq                  # Get signed branch offset
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $$JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rax <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/entry.S b/runtime/interpreter/mterp/x86_64/entry.S
index d992956..69b2371 100644
--- a/runtime/interpreter/mterp/x86_64/entry.S
+++ b/runtime/interpreter/mterp/x86_64/entry.S
@@ -65,12 +65,6 @@
     movq    IN_ARG0, rSELF
     REFRESH_IBASE
 
-    /* Set up for backwards branches & osr profiling */
-    movq    OFF_FP_METHOD(rFP), OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    call    SYMBOL(MterpSetUpHotnessCountdown)
-    movswl  %ax, rPROFILE
-
     /* start executing the instruction at rPC */
     FETCH_INST
     GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/footer.S b/runtime/interpreter/mterp/x86_64/footer.S
index 71130d1..573256b 100644
--- a/runtime/interpreter/mterp/x86_64/footer.S
+++ b/runtime/interpreter/mterp/x86_64/footer.S
@@ -71,7 +71,7 @@
 #if MTERP_LOGGING
     movq    rSELF, OUT_ARG0
     leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movl    THREAD_FLAGS_OFFSET(OUT_ARG0), OUT_32_ARG2
+    movl    THREAD_FLAGS_OFFSET(rSELF), OUT_32_ARG2
     call    SYMBOL(MterpLogSuspendFallback)
 #endif
     jmp     MterpCommonFallback
@@ -82,8 +82,7 @@
  * interpreter.
  */
 MterpPossibleException:
-    movq    rSELF, %rcx
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     jz      MterpFallback
     /* intentional fallthrough - handle pending exception. */
 
@@ -114,113 +113,19 @@
     /* NOTE: no fallthrough */
 
 /*
- * Common handling for branches with support for Jit profiling.
- * On entry:
- *    rINST          <= signed offset
- *    rPROFILE       <= signed hotness countdown (expanded to 32 bits)
- *    condition bits <= set to establish sign of offset (use "NoFlags" entry if not)
- *
- * We have quite a few different cases for branch profiling, OSR detection and
- * suspend check support here.
- *
- * Taken backward branches:
- *    If profiling active, do hotness countdown and report if we hit zero.
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *    Is there a pending suspend request?  If so, suspend.
- *
- * Taken forward branches and not-taken backward branches:
- *    If in osr check mode, see if our target is a compiled loop header entry and do OSR if so.
- *
- * Our most common case is expected to be a taken backward branch with active jit profiling,
- * but no full OSR check and no pending suspend request.
- * Next most common case is not-taken branch with no full OSR check.
- *
+ * Check for suspend check request.  Assumes rINST already loaded, rPC advanced and
+ * still needs to get the opcode and branch to it, and flags are in lr.
  */
-MterpCommonTakenBranch:
-    jg      .L_forward_branch               # don't add forward branches to hotness
-/*
- * We need to subtract 1 from positive values and we should not see 0 here,
- * so we may use the result of the comparison with -1.
- */
-#if JIT_CHECK_OSR != -1
-#  error "JIT_CHECK_OSR must be -1."
-#endif
-    cmpl    $$JIT_CHECK_OSR, rPROFILE
-    je      .L_osr_check
-    decl    rPROFILE
-    je      .L_add_batch                    # counted down to zero - report
-.L_resume_backward_branch:
-    movq    rSELF, %rax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+MterpCheckSuspendAndContinue:
     REFRESH_IBASE
-    leaq    (rPC, rINSTq, 2), rPC
-    FETCH_INST
-    jnz     .L_suspend_request_pending
-    GOTO_NEXT
-
-.L_suspend_request_pending:
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
+    jz      1f
     EXPORT_PC
     movq    rSELF, OUT_ARG0
-    call    SYMBOL(MterpSuspendCheck)       # (self)
-    testb   %al, %al
-    jnz     MterpFallback
-    REFRESH_IBASE                           # might have changed during suspend
+    call    SYMBOL(MterpSuspendCheck)
+1:
     GOTO_NEXT
 
-.L_no_count_backwards:
-    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    jne     .L_resume_backward_branch
-.L_osr_check:
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movq    rINSTq, OUT_ARG2
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jz      .L_resume_backward_branch
-    jmp     MterpOnStackReplacement
-
-.L_forward_branch:
-    cmpl    $$JIT_CHECK_OSR, rPROFILE         # possible OSR re-entry?
-    je      .L_check_osr_forward
-.L_resume_forward_branch:
-    leaq    (rPC, rINSTq, 2), rPC
-    FETCH_INST
-    GOTO_NEXT
-
-.L_check_osr_forward:
-    EXPORT_PC
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movq    rINSTq, OUT_ARG2
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jz      .L_resume_forward_branch
-    jmp     MterpOnStackReplacement
-
-.L_add_batch:
-    movl    rPROFILE, %eax
-    movq    OFF_FP_METHOD(rFP), OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
-    movq    rSELF, OUT_ARG2
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    movswl  %ax, rPROFILE
-    jmp     .L_no_count_backwards
-
-/*
- * Entered from the conditional branch handlers when OSR check request active on
- * not-taken path.  All Dalvik not-taken conditional branch offsets are 2.
- */
-.L_check_not_taken_osr:
-    movq    rSELF, OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movq    rINSTq, OUT_ARG3
-    call    SYMBOL(MterpMaybeDoOnStackReplacement) # (self, shadow_frame, offset)
-    testb   %al, %al
-    jnz     MterpOnStackReplacement
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-
 /*
  * On-stack replacement has happened, and now we've returned from the compiled method.
  */
@@ -260,28 +165,7 @@
     movq    %rax, (%rdx)
     movl    $$1, %eax
 MterpDone:
-/*
- * At this point, we expect rPROFILE to be non-zero.  If negative, hotness is disabled or we're
- * checking for OSR.  If greater than zero, we might have unreported hotness to register
- * (the difference between the ending rPROFILE and the cached hotness counter).  rPROFILE
- * should only reach zero immediately after a hotness decrement, and is then reset to either
- * a negative special state or the new non-zero countdown value.
- */
-    testl   rPROFILE, rPROFILE
-    jle     MRestoreFrame                   # if > 0, we may have some counts to report.
-
-    movl    %eax, rINST                     # stash return value
-    /* Report cached hotness counts */
-    movl    rPROFILE, %eax
-    movq    OFF_FP_METHOD(rFP), OUT_ARG0
-    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
-    movw    %ax, OFF_FP_COUNTDOWN_OFFSET(rFP)
-    movq    rSELF, OUT_ARG2
-    call    SYMBOL(MterpAddHotnessBatch)    # (method, shadow_frame, self)
-    movl    rINST, %eax                     # restore return value
-
     /* pop up frame */
-MRestoreFrame:
     addq    $$FRAME_SIZE, %rsp
     .cfi_adjust_cfa_offset -FRAME_SIZE
 
diff --git a/runtime/interpreter/mterp/x86_64/header.S b/runtime/interpreter/mterp/x86_64/header.S
index 47d30ec..eb84ea1 100644
--- a/runtime/interpreter/mterp/x86_64/header.S
+++ b/runtime/interpreter/mterp/x86_64/header.S
@@ -113,21 +113,6 @@
     .cfi_restore \_reg
 .endm
 
-/*
- * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
- * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
- */
-#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
-#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
-#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
-#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
-#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
-#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
-#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
-#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
-#define OFF_FP_COUNTDOWN_OFFSET OFF_FP(SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET)
-#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
-
 /* Frame size must be 16-byte aligned.
  * Remember about 8 bytes for return address + 6 * 8 for spills.
  */
@@ -138,8 +123,6 @@
 #define IN_ARG2        %rdx
 #define IN_ARG1        %rsi
 #define IN_ARG0        %rdi
-/* Spill offsets relative to %esp */
-#define SELF_SPILL     (FRAME_SIZE -  8)
 /* Out Args  */
 #define OUT_ARG3       %rcx
 #define OUT_ARG2       %rdx
@@ -154,7 +137,7 @@
 
 /* During bringup, we'll use the shadow frame model instead of rFP */
 /* single-purpose registers, given names for clarity */
-#define rSELF    SELF_SPILL(%rsp)
+#define rSELF    %rbp
 #define rPC      %r12
 #define rFP      %r13
 #define rINST    %ebx
@@ -164,11 +147,40 @@
 #define rINSTbl  %bl
 #define rIBASE   %r14
 #define rREFS    %r15
-#define rPROFILE %ebp
 
+/*
+ * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
+ * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
+ */
+#define OFF_FP(a) (a - SHADOWFRAME_VREGS_OFFSET)
+#define OFF_FP_NUMBER_OF_VREGS OFF_FP(SHADOWFRAME_NUMBER_OF_VREGS_OFFSET)
+#define OFF_FP_DEX_PC OFF_FP(SHADOWFRAME_DEX_PC_OFFSET)
+#define OFF_FP_LINK OFF_FP(SHADOWFRAME_LINK_OFFSET)
+#define OFF_FP_METHOD OFF_FP(SHADOWFRAME_METHOD_OFFSET)
+#define OFF_FP_RESULT_REGISTER OFF_FP(SHADOWFRAME_RESULT_REGISTER_OFFSET)
+#define OFF_FP_DEX_PC_PTR OFF_FP(SHADOWFRAME_DEX_PC_PTR_OFFSET)
+#define OFF_FP_CODE_ITEM OFF_FP(SHADOWFRAME_CODE_ITEM_OFFSET)
+#define OFF_FP_SHADOWFRAME (-SHADOWFRAME_VREGS_OFFSET)
+
+#define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
 
 /*
+ * Profile branch. rINST should contain the offset. %eax is scratch.
+ */
+.macro MTERP_PROFILE_BRANCH
+#ifdef MTERP_PROFILE_BRANCHES
+    EXPORT_PC
+    movq    rSELF, OUT_ARG0
+    leaq    OFF_FP_SHADOWFRAME(rFP), OUT_ARG1
+    movl    rINST, OUT_32_ARG2
+    call    SYMBOL(MterpProfileBranch)
+    testb   %al, %al
+    jnz     MterpOnStackReplacement
+#endif
+.endm
+
+/*
  * "export" the PC to dex_pc field in the shadow frame, f/b/o future exception objects.  Must
  * be done *before* something throws.
  *
@@ -192,8 +204,7 @@
  *
  */
 .macro REFRESH_IBASE
-    movq    rSELF, rIBASE
-    movq    THREAD_CURRENT_IBASE_OFFSET(rIBASE), rIBASE
+    movq    THREAD_CURRENT_IBASE_OFFSET(rSELF), rIBASE
 .endm
 
 /*
diff --git a/runtime/interpreter/mterp/x86_64/op_aget_object.S b/runtime/interpreter/mterp/x86_64/op_aget_object.S
index 5f77a97..8baedea 100644
--- a/runtime/interpreter/mterp/x86_64/op_aget_object.S
+++ b/runtime/interpreter/mterp/x86_64/op_aget_object.S
@@ -10,8 +10,7 @@
     GET_VREG OUT_32_ARG1, %rcx              # ecx <- vCC (requested index)
     EXPORT_PC
     call    SYMBOL(artAGetObjectFromMterp)  # (array, index)
-    movq    rSELF, %rcx
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     SET_VREG_OBJECT %eax, rINSTq
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/mterp/x86_64/op_goto.S b/runtime/interpreter/mterp/x86_64/op_goto.S
index 9749901..c4fc976 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto.S
@@ -6,5 +6,9 @@
  */
     /* goto +AA */
     movsbq  rINSTbl, rINSTq                 # rINSTq <- ssssssAA
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_16.S b/runtime/interpreter/mterp/x86_64/op_goto_16.S
index 77688e0..8cb9a5c 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto_16.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto_16.S
@@ -6,5 +6,9 @@
  */
     /* goto/16 +AAAA */
     movswq  2(rPC), rINSTq                  # rINSTq <- ssssAAAA
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/op_goto_32.S b/runtime/interpreter/mterp/x86_64/op_goto_32.S
index 29d777b..4ecdacd 100644
--- a/runtime/interpreter/mterp/x86_64/op_goto_32.S
+++ b/runtime/interpreter/mterp/x86_64/op_goto_32.S
@@ -9,5 +9,9 @@
  */
     /* goto/32 +AAAAAAAA */
     movslq  2(rPC), rINSTq                  # rINSTq <- AAAAAAAA
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/op_iget.S b/runtime/interpreter/mterp/x86_64/op_iget.S
index df43efe..a0d0faf 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget.S
@@ -12,8 +12,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3
     call    SYMBOL($helper)
-    movq    rSELF, %rcx
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
     .if $is_object
diff --git a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
index 176c954..964d20a 100644
--- a/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/x86_64/op_iget_object_quick.S
@@ -7,8 +7,7 @@
     movzwl  2(rPC), OUT_32_ARG1             # eax <- field byte offset
     EXPORT_PC
     callq   SYMBOL(artIGetObjectFromMterp)  # (obj, offset)
-    movq    rSELF, %rcx
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException                  # bail out
     andb    $$0xf, rINSTbl                  # rINST <- A
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
diff --git a/runtime/interpreter/mterp/x86_64/op_instance_of.S b/runtime/interpreter/mterp/x86_64/op_instance_of.S
index 4819833..6be37f9 100644
--- a/runtime/interpreter/mterp/x86_64/op_instance_of.S
+++ b/runtime/interpreter/mterp/x86_64/op_instance_of.S
@@ -14,8 +14,7 @@
     movq    rSELF, OUT_ARG3
     call    SYMBOL(MterpInstanceOf)         # (index, &obj, method, self)
     movsbl  %al, %eax
-    movq    rSELF, %rcx
-    cmpq    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpq    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     andb    $$0xf, rINSTbl                  # rINSTbl <- A
     SET_VREG %eax, rINSTq
diff --git a/runtime/interpreter/mterp/x86_64/op_move_exception.S b/runtime/interpreter/mterp/x86_64/op_move_exception.S
index 33db878..d0a14fd 100644
--- a/runtime/interpreter/mterp/x86_64/op_move_exception.S
+++ b/runtime/interpreter/mterp/x86_64/op_move_exception.S
@@ -1,6 +1,5 @@
     /* move-exception vAA */
-    movq    rSELF, %rcx
-    movl    THREAD_EXCEPTION_OFFSET(%rcx), %eax
+    movl    THREAD_EXCEPTION_OFFSET(rSELF), %eax
     SET_VREG_OBJECT %eax, rINSTq            # fp[AA] <- exception object
-    movl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    movl    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 1
diff --git a/runtime/interpreter/mterp/x86_64/op_packed_switch.S b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
index fdf5a50..cb0acb7 100644
--- a/runtime/interpreter/mterp/x86_64/op_packed_switch.S
+++ b/runtime/interpreter/mterp/x86_64/op_packed_switch.S
@@ -13,6 +13,10 @@
     leaq    (rPC,OUT_ARG0,2), OUT_ARG0      # rcx <- PC + BBBBbbbb*2
     GET_VREG OUT_32_ARG1, rINSTq            # eax <- vAA
     call    SYMBOL($func)
-    testl   %eax, %eax
     movslq  %eax, rINSTq
-    jmp     MterpCommonTakenBranch
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue
+    GOTO_NEXT
diff --git a/runtime/interpreter/mterp/x86_64/op_return.S b/runtime/interpreter/mterp/x86_64/op_return.S
index 07e0e53..14f4f8a 100644
--- a/runtime/interpreter/mterp/x86_64/op_return.S
+++ b/runtime/interpreter/mterp/x86_64/op_return.S
@@ -6,9 +6,9 @@
     /* op vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_VREG %eax, rINSTq                   # eax <- vAA
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void.S b/runtime/interpreter/mterp/x86_64/op_return_void.S
index 6a12df3..46a5753 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void.S
@@ -1,8 +1,8 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
index 822b2e8..92e3506 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
-    movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     xorq    %rax, %rax
diff --git a/runtime/interpreter/mterp/x86_64/op_return_wide.S b/runtime/interpreter/mterp/x86_64/op_return_wide.S
index 288eb96..f2d6e04 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_wide.S
@@ -4,9 +4,9 @@
     /* return-wide vAA */
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
-    movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(rSELF)
     jz      1f
+    movq    rSELF, OUT_ARG0
     call    SYMBOL(MterpSuspendCheck)
 1:
     GET_WIDE_VREG %rax, rINSTq              # eax <- v[AA]
diff --git a/runtime/interpreter/mterp/x86_64/op_sget.S b/runtime/interpreter/mterp/x86_64/op_sget.S
index d39e6c4..38d9a5e 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget.S
@@ -11,8 +11,7 @@
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
     call    SYMBOL($helper)
-    movq    rSELF, %rcx
-    cmpl    $$0, THREAD_EXCEPTION_OFFSET(%rcx)
+    cmpl    $$0, THREAD_EXCEPTION_OFFSET(rSELF)
     jnz     MterpException
     .if $is_object
     SET_VREG_OBJECT %eax, rINSTq            # fp[A] <- value
diff --git a/runtime/interpreter/mterp/x86_64/op_throw.S b/runtime/interpreter/mterp/x86_64/op_throw.S
index 8095c25..22ed990 100644
--- a/runtime/interpreter/mterp/x86_64/op_throw.S
+++ b/runtime/interpreter/mterp/x86_64/op_throw.S
@@ -6,6 +6,5 @@
     GET_VREG %eax, rINSTq                   # eax<- vAA (exception object)
     testb   %al, %al
     jz      common_errNullObject
-    movq    rSELF, %rcx
-    movq    %rax, THREAD_EXCEPTION_OFFSET(%rcx)
+    movq    %rax, THREAD_EXCEPTION_OFFSET(rSELF)
     jmp     MterpException
diff --git a/runtime/interpreter/mterp/x86_64/zcmp.S b/runtime/interpreter/mterp/x86_64/zcmp.S
index fb8ae6a..0051407 100644
--- a/runtime/interpreter/mterp/x86_64/zcmp.S
+++ b/runtime/interpreter/mterp/x86_64/zcmp.S
@@ -7,11 +7,13 @@
  */
     /* if-cmp vAA, +BBBB */
     cmpl    $$0, VREG_ADDRESS(rINSTq)       # compare (vA, 0)
+    movl    $$2, rINST                      # assume branch not taken
     j${revcmp}   1f
     movswq  2(rPC), rINSTq                  # fetch signed displacement
-    testq   rINSTq, rINSTq
-    jmp     MterpCommonTakenBranch
 1:
-    cmpl    $$JIT_CHECK_OSR, rPROFILE
-    je      .L_check_not_taken_osr
-    ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+    MTERP_PROFILE_BRANCH
+    addq    rINSTq, rINSTq                  # rINSTq <- AA * 2
+    leaq    (rPC, rINSTq), rPC
+    FETCH_INST
+    jle     MterpCheckSuspendAndContinue    # AA * 2 <= 0 => suspend check
+    GOTO_NEXT
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 78f34ff..81be959 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -353,28 +353,35 @@
   jint src_pos = shadow_frame->GetVReg(arg_offset + 1);
   jint dst_pos = shadow_frame->GetVReg(arg_offset + 3);
   jint length = shadow_frame->GetVReg(arg_offset + 4);
-  mirror::Array* src_array = shadow_frame->GetVRegReference(arg_offset)->AsArray();
-  mirror::Array* dst_array = shadow_frame->GetVRegReference(arg_offset + 2)->AsArray();
 
-  // Null checking.
-  if (src_array == nullptr) {
+  mirror::Object* src_obj = shadow_frame->GetVRegReference(arg_offset);
+  mirror::Object* dst_obj = shadow_frame->GetVRegReference(arg_offset + 2);
+  // Null checking. For simplicity, abort transaction.
+  if (src_obj == nullptr) {
     AbortTransactionOrFail(self, "src is null in arraycopy.");
     return;
   }
-  if (dst_array == nullptr) {
+  if (dst_obj == nullptr) {
     AbortTransactionOrFail(self, "dst is null in arraycopy.");
     return;
   }
+  // Test for arrayness. Throw ArrayStoreException.
+  if (!src_obj->IsArrayInstance() || !dst_obj->IsArrayInstance()) {
+    self->ThrowNewException("Ljava/lang/ArrayStoreException;", "src or trg is not an array");
+    return;
+  }
 
-  // Bounds checking.
+  mirror::Array* src_array = src_obj->AsArray();
+  mirror::Array* dst_array = dst_obj->AsArray();
+
+  // Bounds checking. Throw IndexOutOfBoundsException.
   if (UNLIKELY(src_pos < 0) || UNLIKELY(dst_pos < 0) || UNLIKELY(length < 0) ||
       UNLIKELY(src_pos > src_array->GetLength() - length) ||
       UNLIKELY(dst_pos > dst_array->GetLength() - length)) {
-    self->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
+    self->ThrowNewExceptionF("Ljava/lang/IndexOutOfBoundsException;",
                              "src.length=%d srcPos=%d dst.length=%d dstPos=%d length=%d",
                              src_array->GetLength(), src_pos, dst_array->GetLength(), dst_pos,
                              length);
-    AbortTransactionOrFail(self, "Index out of bounds.");
     return;
   }
 
@@ -393,19 +400,11 @@
       return;
     }
 
-    // For simplicity only do this if the component types are the same. Otherwise we have to copy
-    // even more code from the object-array functions.
-    if (src_type != trg_type) {
-      AbortTransactionOrFail(self, "Types not the same in arraycopy: %s vs %s",
-                             PrettyDescriptor(src_array->GetClass()->GetComponentType()).c_str(),
-                             PrettyDescriptor(dst_array->GetClass()->GetComponentType()).c_str());
-      return;
-    }
-
     mirror::ObjectArray<mirror::Object>* src = src_array->AsObjectArray<mirror::Object>();
     mirror::ObjectArray<mirror::Object>* dst = dst_array->AsObjectArray<mirror::Object>();
     if (src == dst) {
       // Can overlap, but not have type mismatches.
+      // We cannot use ObjectArray::MemMove here, as it doesn't support transactions.
       const bool copy_forward = (dst_pos < src_pos) || (dst_pos - src_pos >= length);
       if (copy_forward) {
         for (int32_t i = 0; i < length; ++i) {
@@ -417,9 +416,15 @@
         }
       }
     } else {
-      // Can't overlap. Would need type checks, but we abort above.
-      for (int32_t i = 0; i < length; ++i) {
-        dst->Set(dst_pos + i, src->Get(src_pos + i));
+      // We're being lazy here. Optimally this could be a memcpy (if component types are
+      // assignable), but the ObjectArray implementation doesn't support transactions. The
+      // checking version, however, does.
+      if (Runtime::Current()->IsActiveTransaction()) {
+        dst->AssignableCheckingMemcpy<true>(
+            dst_pos, src, src_pos, length, true /* throw_exception */);
+      } else {
+        dst->AssignableCheckingMemcpy<false>(
+                    dst_pos, src, src_pos, length, true /* throw_exception */);
       }
     }
   } else if (src_type->IsPrimitiveChar()) {
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index a1ae2aa..fb53b1d 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -66,6 +66,94 @@
 #undef UNSTARTED_RUNTIME_DIRECT_LIST
 #undef UNSTARTED_RUNTIME_JNI_LIST
 #undef UNSTARTED_JNI
+
+  // Helpers for ArrayCopy.
+  //
+  // Note: as we have to use handles, we use StackHandleScope to transfer data. Hardcode a size
+  //       of three everywhere. That is enough to test all cases.
+
+  static mirror::ObjectArray<mirror::Object>* CreateObjectArray(
+      Thread* self,
+      mirror::Class* component_type,
+      const StackHandleScope<3>& data)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_type = runtime->GetClassLinker()->FindArrayClass(self, &component_type);
+    CHECK(array_type != nullptr);
+    mirror::ObjectArray<mirror::Object>* result =
+        mirror::ObjectArray<mirror::Object>::Alloc(self, array_type, 3);
+    CHECK(result != nullptr);
+    for (size_t i = 0; i < 3; ++i) {
+      result->Set(static_cast<int32_t>(i), data.GetReference(i));
+      CHECK(!self->IsExceptionPending());
+    }
+    return result;
+  }
+
+  static void CheckObjectArray(mirror::ObjectArray<mirror::Object>* array,
+                               const StackHandleScope<3>& data)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    CHECK_EQ(array->GetLength(), 3);
+    CHECK_EQ(data.NumberOfReferences(), 3U);
+    for (size_t i = 0; i < 3; ++i) {
+      EXPECT_EQ(data.GetReference(i), array->Get(static_cast<int32_t>(i))) << i;
+    }
+  }
+
+  void RunArrayCopy(Thread* self,
+                    ShadowFrame* tmp,
+                    bool expect_exception,
+                    mirror::ObjectArray<mirror::Object>* src,
+                    int32_t src_pos,
+                    mirror::ObjectArray<mirror::Object>* dst,
+                    int32_t dst_pos,
+                    int32_t length)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    JValue result;
+    tmp->SetVRegReference(0, src);
+    tmp->SetVReg(1, src_pos);
+    tmp->SetVRegReference(2, dst);
+    tmp->SetVReg(3, dst_pos);
+    tmp->SetVReg(4, length);
+    UnstartedSystemArraycopy(self, tmp, &result, 0);
+    bool exception_pending = self->IsExceptionPending();
+    EXPECT_EQ(exception_pending, expect_exception);
+    if (exception_pending) {
+      self->ClearException();
+    }
+  }
+
+  void RunArrayCopy(Thread* self,
+                    ShadowFrame* tmp,
+                    bool expect_exception,
+                    mirror::Class* src_component_class,
+                    mirror::Class* dst_component_class,
+                    const StackHandleScope<3>& src_data,
+                    int32_t src_pos,
+                    const StackHandleScope<3>& dst_data,
+                    int32_t dst_pos,
+                    int32_t length,
+                    const StackHandleScope<3>& expected_result)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    StackHandleScope<3> hs_misc(self);
+    Handle<mirror::Class> dst_component_handle(hs_misc.NewHandle(dst_component_class));
+
+    Handle<mirror::ObjectArray<mirror::Object>> src_handle(
+        hs_misc.NewHandle(CreateObjectArray(self, src_component_class, src_data)));
+
+    Handle<mirror::ObjectArray<mirror::Object>> dst_handle(
+        hs_misc.NewHandle(CreateObjectArray(self, dst_component_handle.Get(), dst_data)));
+
+    RunArrayCopy(self,
+                 tmp,
+                 expect_exception,
+                 src_handle.Get(),
+                 src_pos,
+                 dst_handle.Get(),
+                 dst_pos,
+                 length);
+    CheckObjectArray(dst_handle.Get(), expected_result);
+  }
 };
 
 TEST_F(UnstartedRuntimeTest, MemoryPeekByte) {
@@ -277,5 +365,148 @@
   ShadowFrame::DeleteDeoptimizedFrame(shadow_frame);
 }
 
+// Tests the exceptions that should be checked before modifying the destination.
+// (Doesn't check the object vs primitive case ATM.)
+TEST_F(UnstartedRuntimeTest, SystemArrayCopyObjectArrayTestExceptions) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  JValue result;
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  // Note: all tests are not GC safe. Assume there's no GC running here with the few objects we
+  //       allocate.
+  StackHandleScope<2> hs_misc(self);
+  Handle<mirror::Class> object_class(
+      hs_misc.NewHandle(mirror::Class::GetJavaLangClass()->GetSuperClass()));
+
+  StackHandleScope<3> hs_data(self);
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+  hs_data.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+  Handle<mirror::ObjectArray<mirror::Object>> array(
+      hs_misc.NewHandle(CreateObjectArray(self, object_class.Get(), hs_data)));
+
+  RunArrayCopy(self, tmp, true, array.Get(), -1, array.Get(), 0, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), -1, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 0, -1);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 0, 4);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, array.Get(), 1, 3);
+  RunArrayCopy(self, tmp, true, array.Get(), 1, array.Get(), 0, 3);
+
+  mirror::ObjectArray<mirror::Object>* class_as_array =
+      reinterpret_cast<mirror::ObjectArray<mirror::Object>*>(object_class.Get());
+  RunArrayCopy(self, tmp, true, class_as_array, 0, array.Get(), 0, 0);
+  RunArrayCopy(self, tmp, true, array.Get(), 0, class_as_array, 0, 0);
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
+TEST_F(UnstartedRuntimeTest, SystemArrayCopyObjectArrayTest) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  JValue result;
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  StackHandleScope<1> hs_object(self);
+  Handle<mirror::Class> object_class(
+      hs_object.NewHandle(mirror::Class::GetJavaLangClass()->GetSuperClass()));
+
+  // Simple test:
+  // [1,2,3]{1 @ 2} into [4,5,6] = [4,2,6]
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_dst.GetReference(0));
+    hs_expected.NewHandle(hs_dst.GetReference(1));
+    hs_expected.NewHandle(hs_src.GetReference(1));
+
+    RunArrayCopy(self,
+                 tmp,
+                 false,
+                 object_class.Get(),
+                 object_class.Get(),
+                 hs_src,
+                 1,
+                 hs_dst,
+                 2,
+                 1,
+                 hs_expected);
+  }
+
+  // Simple test:
+  // [1,2,3]{1 @ 1} into [4,5,6] = [4,2,6]  (with dst String[])
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "2"));
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_dst.GetReference(0));
+    hs_expected.NewHandle(hs_src.GetReference(1));
+    hs_expected.NewHandle(hs_dst.GetReference(2));
+
+    RunArrayCopy(self,
+                 tmp,
+                 false,
+                 object_class.Get(),
+                 mirror::String::GetJavaLangString(),
+                 hs_src,
+                 1,
+                 hs_dst,
+                 1,
+                 1,
+                 hs_expected);
+  }
+
+  // Simple test:
+  // [1,*,3] into [4,5,6] = [1,5,6] + exc
+  {
+    StackHandleScope<3> hs_src(self);
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "1"));
+    hs_src.NewHandle(mirror::String::GetJavaLangString());
+    hs_src.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "3"));
+
+    StackHandleScope<3> hs_dst(self);
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "4"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "5"));
+    hs_dst.NewHandle(mirror::String::AllocFromModifiedUtf8(self, "6"));
+
+    StackHandleScope<3> hs_expected(self);
+    hs_expected.NewHandle(hs_src.GetReference(0));
+    hs_expected.NewHandle(hs_dst.GetReference(1));
+    hs_expected.NewHandle(hs_dst.GetReference(2));
+
+    RunArrayCopy(self,
+                 tmp,
+                 true,
+                 object_class.Get(),
+                 mirror::String::GetJavaLangString(),
+                 hs_src,
+                 0,
+                 hs_dst,
+                 0,
+                 3,
+                 hs_expected);
+  }
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index b18d6a2..d751e5a 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -80,9 +80,9 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
 
-JitInstrumentationCache::JitInstrumentationCache(uint16_t hot_method_threshold,
-                                                 uint16_t warm_method_threshold,
-                                                 uint16_t osr_method_threshold)
+JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
+                                                 size_t warm_method_threshold,
+                                                 size_t osr_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
       warm_method_threshold_(warm_method_threshold),
       osr_method_threshold_(osr_method_threshold),
@@ -130,62 +130,44 @@
   }
 }
 
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, uint16_t count) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
-  // than we want resulting in samples even after the method is compiled.  Also, if the
-  // jit is no longer interested in hotness samples because we're shutting down, just return.
-  if (method->IsClassInitializer() || method->IsNative() || (thread_pool_ == nullptr)) {
-    if (thread_pool_ == nullptr) {
-      // Should only see this when shutting down.
-      DCHECK(Runtime::Current()->IsShuttingDown(self));
-    }
+  // than we want resulting in samples even after the method is compiled.
+  if (method->IsClassInitializer() || method->IsNative()) {
     return;
   }
   DCHECK(thread_pool_ != nullptr);
-  DCHECK_GT(warm_method_threshold_, 0);
-  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
-  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
 
-  int32_t starting_count = method->GetCounter();
-  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
-  if (starting_count < warm_method_threshold_) {
-    if (new_count >= warm_method_threshold_) {
-      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
-      if (success) {
-        VLOG(jit) << "Start profiling " << PrettyMethod(method);
-      }
-
-      if (thread_pool_ == nullptr) {
-        // Calling ProfilingInfo::Create might put us in a suspended state, which could
-        // lead to the thread pool being deleted when we are shutting down.
-        DCHECK(Runtime::Current()->IsShuttingDown(self));
-        return;
-      }
-
-      if (!success) {
-        // We failed allocating. Instead of doing the collection on the Java thread, we push
-        // an allocation to a compiler thread, that will do the collection.
-        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
-      }
+  uint16_t sample_count = method->IncrementCounter();
+  if (sample_count == warm_method_threshold_) {
+    bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+    if (success) {
+      VLOG(jit) << "Start profiling " << PrettyMethod(method);
     }
-    // Avoid jumping more than one state at a time.
-    new_count = std::min(new_count, hot_method_threshold_ - 1);
-  } else if (starting_count < hot_method_threshold_) {
-    if (new_count >= hot_method_threshold_) {
-      DCHECK(thread_pool_ != nullptr);
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
+
+    if (thread_pool_ == nullptr) {
+      // Calling ProfilingInfo::Create might put us in a suspended state, which could
+      // lead to the thread pool being deleted when we are shutting down.
+      DCHECK(Runtime::Current()->IsShuttingDown(self));
+      return;
     }
-    // Avoid jumping more than one state at a time.
-    new_count = std::min(new_count, osr_method_threshold_ - 1);
-  } else if (starting_count < osr_method_threshold_) {
-    if (new_count >= osr_method_threshold_) {
-      DCHECK(thread_pool_ != nullptr);
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+
+    if (!success) {
+      // We failed allocating. Instead of doing the collection on the Java thread, we push
+      // an allocation to a compiler thread, that will do the collection.
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
     }
   }
-  // Update hotness counter, but avoid wrap around.
-  method->SetCounter(
-      std::min(new_count, static_cast<int32_t>(std::numeric_limits<uint16_t>::max())));
+
+  if (sample_count == hot_method_threshold_) {
+    DCHECK(thread_pool_ != nullptr);
+    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
+  }
+
+  if (sample_count == osr_method_threshold_) {
+    DCHECK(thread_pool_ != nullptr);
+    thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+  }
 }
 
 JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 7ffd4eb..d1c5c44 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -40,8 +40,6 @@
 class Thread;
 
 namespace jit {
-static constexpr int16_t kJitCheckForOSR = -1;
-static constexpr int16_t kJitHotnessDisabled = -2;
 
 class JitInstrumentationCache;
 
@@ -86,6 +84,7 @@
 
   static constexpr uint32_t kJitEvents =
       instrumentation::Instrumentation::kMethodEntered |
+      instrumentation::Instrumentation::kBranch |
       instrumentation::Instrumentation::kInvokeVirtualOrInterface;
 
  private:
@@ -97,33 +96,25 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  JitInstrumentationCache(uint16_t hot_method_threshold,
-                          uint16_t warm_method_threshold,
-                          uint16_t osr_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples)
+  JitInstrumentationCache(size_t hot_method_threshold,
+                          size_t warm_method_threshold,
+                          size_t osr_method_threshold);
+  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool(Thread* self);
 
-  size_t OSRMethodThreshold() const {
-    return osr_method_threshold_;
-  }
-
   size_t HotMethodThreshold() const {
     return hot_method_threshold_;
   }
 
-  size_t WarmMethodThreshold() const {
-    return warm_method_threshold_;
-  }
-
   // Wait until there is no more pending compilation tasks.
   void WaitForCompilationToFinish(Thread* self);
 
  private:
-  uint16_t hot_method_threshold_;
-  uint16_t warm_method_threshold_;
-  uint16_t osr_method_threshold_;
+  size_t hot_method_threshold_;
+  size_t warm_method_threshold_;
+  size_t osr_method_threshold_;
   JitInstrumentationListener listener_;
   std::unique_ptr<ThreadPool> thread_pool_;
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 6f9d642..c3c5231 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -197,6 +197,7 @@
 }
 
 template<class T>
+template<bool kTransactionActive>
 inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src,
                                                      int32_t src_pos, int32_t count,
                                                      bool throw_exception) {
@@ -215,15 +216,15 @@
     o = src->GetWithoutChecks(src_pos + i);
     if (o == nullptr) {
       // Null is always assignable.
-      SetWithoutChecks<false>(dst_pos + i, nullptr);
+      SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
     } else {
       // TODO: use the underlying class reference to avoid uncompression when not necessary.
       Class* o_class = o->GetClass();
       if (LIKELY(lastAssignableElementClass == o_class)) {
-        SetWithoutChecks<false>(dst_pos + i, o);
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
       } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
         lastAssignableElementClass = o_class;
-        SetWithoutChecks<false>(dst_pos + i, o);
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
       } else {
         // Can't put this element into the array, break to perform write-barrier and throw
         // exception.
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 1b1295c..4257396 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -78,6 +78,7 @@
                         int32_t count) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Copy src into this array with assignability checks.
+  template<bool kTransactionActive>
   void AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
                                 int32_t count, bool throw_exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index d9863c5..9e2d68d 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -149,7 +149,9 @@
     dstObjArray->AssignableMemcpy(dstPos, srcObjArray, srcPos, count);
     return;
   }
-  dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
+  // This code is never run under a transaction.
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+  dstObjArray->AssignableCheckingMemcpy<false>(dstPos, srcObjArray, srcPos, count, true);
 }
 
 // Template to convert general array to that of its specific primitive type.
diff --git a/runtime/oat.h b/runtime/oat.h
index 68e71c4..469a65f 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '7', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '8', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 2336365..c22eb92 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -130,11 +130,19 @@
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
       InlineInfo inline_info = GetCurrentInlineInfo();
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
       DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
       bool allow_resolve = walk_kind_ != StackWalkKind::kIncludeInlinedFramesNoResolve;
       return allow_resolve
-          ? GetResolvedMethod<true>(*GetCurrentQuickFrame(), inline_info, depth_in_stack_map)
-          : GetResolvedMethod<false>(*GetCurrentQuickFrame(), inline_info, depth_in_stack_map);
+          ? GetResolvedMethod<true>(*GetCurrentQuickFrame(),
+                                    inline_info,
+                                    encoding.inline_info_encoding,
+                                    depth_in_stack_map)
+          : GetResolvedMethod<false>(*GetCurrentQuickFrame(),
+                                     inline_info,
+                                     encoding.inline_info_encoding,
+                                     depth_in_stack_map);
     } else {
       return *cur_quick_frame_;
     }
@@ -148,7 +156,10 @@
   } else if (cur_quick_frame_ != nullptr) {
     if (IsInInlinedFrame()) {
       size_t depth_in_stack_map = current_inlining_depth_ - 1;
-      return GetCurrentInlineInfo().GetDexPcAtDepth(depth_in_stack_map);
+      const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+      CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      return GetCurrentInlineInfo().GetDexPcAtDepth(encoding.inline_info_encoding,
+                                                    depth_in_stack_map);
     } else if (cur_oat_quick_method_header_ == nullptr) {
       return DexFile::kDexNoIndex;
     } else {
@@ -875,7 +886,7 @@
           if (stack_map.IsValid() && stack_map.HasInlineInfo(encoding.stack_map_encoding)) {
             InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
             DCHECK_EQ(current_inlining_depth_, 0u);
-            for (current_inlining_depth_ = inline_info.GetDepth();
+            for (current_inlining_depth_ = inline_info.GetDepth(encoding.inline_info_encoding);
                  current_inlining_depth_ != 0;
                  --current_inlining_depth_) {
               bool should_continue = VisitFrame();
diff --git a/runtime/stack.h b/runtime/stack.h
index a25874e..3659560 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -187,22 +187,6 @@
     return (dex_pc_ptr_ == nullptr) ? dex_pc_ : dex_pc_ptr_ - code_item_->insns_;
   }
 
-  int16_t GetCachedHotnessCountdown() const {
-    return cached_hotness_countdown_;
-  }
-
-  void SetCachedHotnessCountdown(int16_t cached_hotness_countdown) {
-    cached_hotness_countdown_ = cached_hotness_countdown;
-  }
-
-  int16_t GetHotnessCountdown() const {
-    return hotness_countdown_;
-  }
-
-  void SetHotnessCountdown(int16_t hotness_countdown) {
-    hotness_countdown_ = hotness_countdown;
-  }
-
   void SetDexPC(uint32_t dex_pc) {
     dex_pc_ = dex_pc;
     dex_pc_ptr_ = nullptr;
@@ -413,14 +397,6 @@
     return OFFSETOF_MEMBER(ShadowFrame, code_item_);
   }
 
-  static size_t CachedHotnessCountdownOffset() {
-    return OFFSETOF_MEMBER(ShadowFrame, cached_hotness_countdown_);
-  }
-
-  static size_t HotnessCountdownOffset() {
-    return OFFSETOF_MEMBER(ShadowFrame, hotness_countdown_);
-  }
-
   // Create ShadowFrame for interpreter using provided memory.
   static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs,
                                             ShadowFrame* link,
@@ -430,7 +406,7 @@
     return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true);
   }
 
-  const uint16_t* GetDexPCPtr() {
+  uint16_t* GetDexPCPtr() {
     return dex_pc_ptr_;
   }
 
@@ -467,13 +443,11 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   JValue* result_register_;
-  const uint16_t* dex_pc_ptr_;
+  uint16_t* dex_pc_ptr_;
   const DexFile::CodeItem* code_item_;
   LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
   const uint32_t number_of_vregs_;
   uint32_t dex_pc_;
-  int16_t cached_hotness_countdown_;
-  int16_t hotness_countdown_;
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index b51baf1..a7e7c21 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -101,6 +101,17 @@
       << ")\n";
 }
 
+void InlineInfoEncoding::Dump(VariableIndentationOutputStream* vios) const {
+  vios->Stream()
+      << "InlineInfoEncoding"
+      << " (method_index_bit_offset=" << static_cast<uint32_t>(kMethodIndexBitOffset)
+      << ", dex_pc_bit_offset=" << static_cast<uint32_t>(dex_pc_bit_offset_)
+      << ", invoke_type_bit_offset=" << static_cast<uint32_t>(invoke_type_bit_offset_)
+      << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
+      << ", total_bit_size=" << static_cast<uint32_t>(total_bit_size_)
+      << ")\n";
+}
+
 void CodeInfo::Dump(VariableIndentationOutputStream* vios,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
@@ -113,6 +124,9 @@
       << ")\n";
   ScopedIndentation indent1(vios);
   encoding.stack_map_encoding.Dump(vios);
+  if (HasInlineInfo(encoding)) {
+    encoding.inline_info_encoding.Dump(vios);
+  }
   // Display the Dex register location catalog.
   GetDexRegisterLocationCatalog(encoding).Dump(vios, *this);
   // Display stack maps along with (live) Dex register maps.
@@ -207,18 +221,22 @@
 void InlineInfo::Dump(VariableIndentationOutputStream* vios,
                       const CodeInfo& code_info,
                       uint16_t number_of_dex_registers[]) const {
-  vios->Stream() << "InlineInfo with depth " << static_cast<uint32_t>(GetDepth()) << "\n";
+  InlineInfoEncoding inline_info_encoding = code_info.ExtractEncoding().inline_info_encoding;
+  vios->Stream() << "InlineInfo with depth "
+                 << static_cast<uint32_t>(GetDepth(inline_info_encoding))
+                 << "\n";
 
-  for (size_t i = 0; i < GetDepth(); ++i) {
+  for (size_t i = 0; i < GetDepth(inline_info_encoding); ++i) {
     vios->Stream()
         << " At depth " << i
         << std::hex
-        << " (dex_pc=0x" << GetDexPcAtDepth(i)
+        << " (dex_pc=0x" << GetDexPcAtDepth(inline_info_encoding, i)
         << std::dec
-        << ", method_index=" << GetMethodIndexAtDepth(i)
-        << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(i))
+        << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, i)
+        << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(inline_info_encoding,
+                                                                            i))
         << ")\n";
-    if (HasDexRegisterMapAtDepth(i) && (number_of_dex_registers != nullptr)) {
+    if (HasDexRegisterMapAtDepth(inline_info_encoding, i) && (number_of_dex_registers != nullptr)) {
       CodeInfoEncoding encoding = code_info.ExtractEncoding();
       DexRegisterMap dex_register_map =
           code_info.GetDexRegisterMapAtDepth(i, *this, encoding, number_of_dex_registers[i]);
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 9e8884e..7c50f97 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -24,12 +24,6 @@
 
 namespace art {
 
-#define ELEMENT_BYTE_OFFSET_AFTER(PreviousElement) \
-  k ## PreviousElement ## Offset + sizeof(PreviousElement ## Type)
-
-#define ELEMENT_BIT_OFFSET_AFTER(PreviousElement) \
-  k ## PreviousElement ## BitOffset + PreviousElement ## BitSize
-
 class VariableIndentationOutputStream;
 
 // Size of a frame slot, in bytes.  This constant is a signed value,
@@ -888,102 +882,139 @@
   friend class StackMapStream;
 };
 
+class InlineInfoEncoding {
+ public:
+  void SetFromSizes(size_t method_index_max,
+                    size_t dex_pc_max,
+                    size_t invoke_type_max,
+                    size_t dex_register_map_size) {
+    total_bit_size_ = kMethodIndexBitOffset;
+    total_bit_size_ += MinimumBitsToStore(method_index_max);
+
+    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+
+    invoke_type_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(invoke_type_max);
+
+    // We also need +1 for kNoDexRegisterMap, but since the size is strictly
+    // greater than any offset we might try to encode, we already implicitly have it.
+    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
+  }
+
+  ALWAYS_INLINE FieldEncoding GetMethodIndexEncoding() const {
+    return FieldEncoding(kMethodIndexBitOffset, dex_pc_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
+    return FieldEncoding(dex_pc_bit_offset_, invoke_type_bit_offset_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE FieldEncoding GetInvokeTypeEncoding() const {
+    return FieldEncoding(invoke_type_bit_offset_, dex_register_map_bit_offset_);
+  }
+  ALWAYS_INLINE FieldEncoding GetDexRegisterMapEncoding() const {
+    return FieldEncoding(dex_register_map_bit_offset_, total_bit_size_, -1 /* min_value */);
+  }
+  ALWAYS_INLINE size_t GetEntrySize() const {
+    return RoundUp(total_bit_size_, kBitsPerByte) / kBitsPerByte;
+  }
+
+  void Dump(VariableIndentationOutputStream* vios) const;
+
+ private:
+  static constexpr uint8_t kIsLastBitOffset = 0;
+  static constexpr uint8_t kMethodIndexBitOffset = 1;
+  uint8_t dex_pc_bit_offset_;
+  uint8_t invoke_type_bit_offset_;
+  uint8_t dex_register_map_bit_offset_;
+  uint8_t total_bit_size_;
+};
+
 /**
  * Inline information for a specific PC. The information is of the form:
  *
- *   [inlining_depth, entry+]
- *
- * where `entry` is of the form:
- *
- *   [dex_pc, method_index, dex_register_map_offset].
+ *   [is_last, method_index, dex_pc, invoke_type, dex_register_map_offset]+.
  */
 class InlineInfo {
  public:
-  // Memory layout: fixed contents.
-  typedef uint8_t DepthType;
-  // Memory layout: single entry contents.
-  typedef uint32_t MethodIndexType;
-  typedef uint32_t DexPcType;
-  typedef uint8_t InvokeTypeType;
-  typedef uint32_t DexRegisterMapType;
-
-  explicit InlineInfo(MemoryRegion region) : region_(region) {}
-
-  DepthType GetDepth() const {
-    return region_.LoadUnaligned<DepthType>(kDepthOffset);
+  explicit InlineInfo(MemoryRegion region) : region_(region) {
   }
 
-  void SetDepth(DepthType depth) {
-    region_.StoreUnaligned<DepthType>(kDepthOffset, depth);
+  ALWAYS_INLINE uint32_t GetDepth(const InlineInfoEncoding& encoding) const {
+    size_t depth = 0;
+    while (!GetRegionAtDepth(encoding, depth++).LoadBit(0)) { }  // Check is_last bit.
+    return depth;
   }
 
-  MethodIndexType GetMethodIndexAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<MethodIndexType>(
-        kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset);
+  ALWAYS_INLINE void SetDepth(const InlineInfoEncoding& encoding, uint32_t depth) {
+    DCHECK_GT(depth, 0u);
+    for (size_t d = 0; d < depth; ++d) {
+      GetRegionAtDepth(encoding, d).StoreBit(0, d == depth - 1);  // Set is_last bit.
+    }
   }
 
-  void SetMethodIndexAtDepth(DepthType depth, MethodIndexType index) {
-    region_.StoreUnaligned<MethodIndexType>(
-        kFixedSize + depth * SingleEntrySize() + kMethodIndexOffset, index);
+  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                               uint32_t depth) const {
+    return encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  DexPcType GetDexPcAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<DexPcType>(
-        kFixedSize + depth * SingleEntrySize() + kDexPcOffset);
+  ALWAYS_INLINE void SetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                           uint32_t depth,
+                                           uint32_t index) {
+    encoding.GetMethodIndexEncoding().Store(GetRegionAtDepth(encoding, depth), index);
   }
 
-  void SetDexPcAtDepth(DepthType depth, DexPcType dex_pc) {
-    region_.StoreUnaligned<DexPcType>(
-        kFixedSize + depth * SingleEntrySize() + kDexPcOffset, dex_pc);
+  ALWAYS_INLINE uint32_t GetDexPcAtDepth(const InlineInfoEncoding& encoding,
+                                         uint32_t depth) const {
+    return encoding.GetDexPcEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  InvokeTypeType GetInvokeTypeAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<InvokeTypeType>(
-        kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset);
+  ALWAYS_INLINE void SetDexPcAtDepth(const InlineInfoEncoding& encoding,
+                                     uint32_t depth,
+                                     uint32_t dex_pc) {
+    encoding.GetDexPcEncoding().Store(GetRegionAtDepth(encoding, depth), dex_pc);
   }
 
-  void SetInvokeTypeAtDepth(DepthType depth, InvokeTypeType invoke_type) {
-    region_.StoreUnaligned<InvokeTypeType>(
-        kFixedSize + depth * SingleEntrySize() + kInvokeTypeOffset, invoke_type);
+  ALWAYS_INLINE uint32_t GetInvokeTypeAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    return encoding.GetInvokeTypeEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  DexRegisterMapType GetDexRegisterMapOffsetAtDepth(DepthType depth) const {
-    return region_.LoadUnaligned<DexRegisterMapType>(
-        kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset);
+  ALWAYS_INLINE void SetInvokeTypeAtDepth(const InlineInfoEncoding& encoding,
+                                          uint32_t depth,
+                                          uint32_t invoke_type) {
+    encoding.GetInvokeTypeEncoding().Store(GetRegionAtDepth(encoding, depth), invoke_type);
   }
 
-  void SetDexRegisterMapOffsetAtDepth(DepthType depth, DexRegisterMapType offset) {
-    region_.StoreUnaligned<DexRegisterMapType>(
-        kFixedSize + depth * SingleEntrySize() + kDexRegisterMapOffset, offset);
+  ALWAYS_INLINE uint32_t GetDexRegisterMapOffsetAtDepth(const InlineInfoEncoding& encoding,
+                                                        uint32_t depth) const {
+    return encoding.GetDexRegisterMapEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  bool HasDexRegisterMapAtDepth(DepthType depth) const {
-    return GetDexRegisterMapOffsetAtDepth(depth) != StackMap::kNoDexRegisterMap;
+  ALWAYS_INLINE void SetDexRegisterMapOffsetAtDepth(const InlineInfoEncoding& encoding,
+                                                    uint32_t depth,
+                                                    uint32_t offset) {
+    encoding.GetDexRegisterMapEncoding().Store(GetRegionAtDepth(encoding, depth), offset);
   }
 
-  static size_t SingleEntrySize() {
-    return kFixedEntrySize;
+  ALWAYS_INLINE bool HasDexRegisterMapAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    return GetDexRegisterMapOffsetAtDepth(encoding, depth) != StackMap::kNoDexRegisterMap;
   }
 
   void Dump(VariableIndentationOutputStream* vios,
-            const CodeInfo& info, uint16_t* number_of_dex_registers) const;
-
+            const CodeInfo& info,
+            uint16_t* number_of_dex_registers) const;
 
  private:
-  static constexpr int kDepthOffset = 0;
-  static constexpr int kFixedSize = ELEMENT_BYTE_OFFSET_AFTER(Depth);
-
-  static constexpr int kMethodIndexOffset = 0;
-  static constexpr int kDexPcOffset = ELEMENT_BYTE_OFFSET_AFTER(MethodIndex);
-  static constexpr int kInvokeTypeOffset = ELEMENT_BYTE_OFFSET_AFTER(DexPc);
-  static constexpr int kDexRegisterMapOffset = ELEMENT_BYTE_OFFSET_AFTER(InvokeType);
-  static constexpr int kFixedEntrySize = ELEMENT_BYTE_OFFSET_AFTER(DexRegisterMap);
+  ALWAYS_INLINE MemoryRegion GetRegionAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth) const {
+    size_t entry_size = encoding.GetEntrySize();
+    DCHECK_GT(entry_size, 0u);
+    return region_.Subregion(depth * entry_size, entry_size);
+  }
 
   MemoryRegion region_;
-
-  friend class CodeInfo;
-  friend class StackMap;
-  friend class StackMapStream;
 };
 
 // Most of the fields are encoded as ULEB128 to save space.
@@ -993,6 +1024,7 @@
   uint32_t stack_map_size_in_bytes;
   uint32_t number_of_location_catalog_entries;
   StackMapEncoding stack_map_encoding;
+  InlineInfoEncoding inline_info_encoding;
   uint8_t header_size;
 
   CodeInfoEncoding() { }
@@ -1003,9 +1035,18 @@
     number_of_stack_maps = DecodeUnsignedLeb128(&ptr);
     stack_map_size_in_bytes = DecodeUnsignedLeb128(&ptr);
     number_of_location_catalog_entries = DecodeUnsignedLeb128(&ptr);
-    static_assert(alignof(StackMapEncoding) == 1, "StackMapEncoding should not require alignment");
+    static_assert(alignof(StackMapEncoding) == 1,
+                  "StackMapEncoding should not require alignment");
     stack_map_encoding = *reinterpret_cast<const StackMapEncoding*>(ptr);
     ptr += sizeof(StackMapEncoding);
+    if (stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0) {
+      static_assert(alignof(InlineInfoEncoding) == 1,
+                    "InlineInfoEncoding should not require alignment");
+      inline_info_encoding = *reinterpret_cast<const InlineInfoEncoding*>(ptr);
+      ptr += sizeof(InlineInfoEncoding);
+    } else {
+      inline_info_encoding = InlineInfoEncoding{}; // NOLINT.
+    }
     header_size = dchecked_integral_cast<uint8_t>(ptr - reinterpret_cast<const uint8_t*>(data));
   }
 
@@ -1015,8 +1056,12 @@
     EncodeUnsignedLeb128(dest, number_of_stack_maps);
     EncodeUnsignedLeb128(dest, stack_map_size_in_bytes);
     EncodeUnsignedLeb128(dest, number_of_location_catalog_entries);
-    const uint8_t* ptr = reinterpret_cast<const uint8_t*>(&stack_map_encoding);
-    dest->insert(dest->end(), ptr, ptr + sizeof(stack_map_encoding));
+    const uint8_t* stack_map_ptr = reinterpret_cast<const uint8_t*>(&stack_map_encoding);
+    dest->insert(dest->end(), stack_map_ptr, stack_map_ptr + sizeof(StackMapEncoding));
+    if (stack_map_encoding.GetInlineInfoEncoding().BitSize() > 0) {
+      const uint8_t* inline_info_ptr = reinterpret_cast<const uint8_t*>(&inline_info_encoding);
+      dest->insert(dest->end(), inline_info_ptr, inline_info_ptr + sizeof(InlineInfoEncoding));
+    }
   }
 };
 
@@ -1110,11 +1155,11 @@
                                           InlineInfo inline_info,
                                           const CodeInfoEncoding& encoding,
                                           uint32_t number_of_dex_registers) const {
-    if (!inline_info.HasDexRegisterMapAtDepth(depth)) {
+    if (!inline_info.HasDexRegisterMapAtDepth(encoding.inline_info_encoding, depth)) {
       return DexRegisterMap();
     } else {
-      uint32_t offset = GetDexRegisterMapsOffset(encoding)
-                        + inline_info.GetDexRegisterMapOffsetAtDepth(depth);
+      uint32_t offset = GetDexRegisterMapsOffset(encoding) +
+          inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info_encoding, depth);
       size_t size = ComputeDexRegisterMapSizeOf(encoding, offset, number_of_dex_registers);
       return DexRegisterMap(region_.Subregion(offset, size));
     }
@@ -1124,9 +1169,7 @@
     DCHECK(stack_map.HasInlineInfo(encoding.stack_map_encoding));
     uint32_t offset = stack_map.GetInlineDescriptorOffset(encoding.stack_map_encoding)
                       + GetDexRegisterMapsOffset(encoding);
-    uint8_t depth = region_.LoadUnaligned<uint8_t>(offset);
-    return InlineInfo(region_.Subregion(offset,
-        InlineInfo::kFixedSize + depth * InlineInfo::SingleEntrySize()));
+    return InlineInfo(region_.Subregion(offset, region_.size() - offset));
   }
 
   StackMap GetStackMapForDexPc(uint32_t dex_pc, const CodeInfoEncoding& encoding) const {
diff --git a/test/146-bad-interface/build b/test/146-bad-interface/build
new file mode 100755
index 0000000..0dd8573
--- /dev/null
+++ b/test/146-bad-interface/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm
+  # Hard-wired use of experimental jack.
+  # TODO: fix this temporary work-around for default-methods, see b/19467889
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental default-methods
diff --git a/test/146-bad-interface/expected.txt b/test/146-bad-interface/expected.txt
new file mode 100644
index 0000000..3441966
--- /dev/null
+++ b/test/146-bad-interface/expected.txt
@@ -0,0 +1 @@
+running invoke
diff --git a/test/146-bad-interface/info.txt b/test/146-bad-interface/info.txt
new file mode 100644
index 0000000..38f188e
--- /dev/null
+++ b/test/146-bad-interface/info.txt
@@ -0,0 +1 @@
+Check whether a duplicate class can invoke-interface on an unresolved method.
diff --git a/test/146-bad-interface/run b/test/146-bad-interface/run
new file mode 100755
index 0000000..ceef6b8
--- /dev/null
+++ b/test/146-bad-interface/run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We want to run as no-dex-file-fallback to confirm that even though the -ex file has a symbolic
+# reference to A, there's no class-def, so we don't detect a collision.
+exec ${RUN} --secondary "${@}"
diff --git a/test/146-bad-interface/smali/invoke_inf.smali b/test/146-bad-interface/smali/invoke_inf.smali
new file mode 100644
index 0000000..c5101e0
--- /dev/null
+++ b/test/146-bad-interface/smali/invoke_inf.smali
@@ -0,0 +1,24 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeInf;
+.super Ljava/lang/Object;
+
+.method public static doInvoke(LIface;)V
+.locals 0
+    invoke-interface {p0}, LIface;->invoke()V
+    return-void
+.end method
+
diff --git a/test/146-bad-interface/src-ex/A.java b/test/146-bad-interface/src-ex/A.java
new file mode 100644
index 0000000..a30a5f2
--- /dev/null
+++ b/test/146-bad-interface/src-ex/A.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A implements Iface {
+}
diff --git a/test/146-bad-interface/src-ex/Iface.java b/test/146-bad-interface/src-ex/Iface.java
new file mode 100644
index 0000000..921e25c
--- /dev/null
+++ b/test/146-bad-interface/src-ex/Iface.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface {
+  public default void aPadding() {}
+  public default void bPadding() {}
+  public default void cPadding() {}
+  public default void dPadding() {}
+  public default void invoke() {
+    System.out.println("running invoke");
+  }
+  public default void wPadding() {}
+  public default void xPadding() {}
+  public default void yPadding() {}
+  public default void zPadding() {}
+}
diff --git a/test/146-bad-interface/src/Main.java b/test/146-bad-interface/src/Main.java
new file mode 100644
index 0000000..5534bb4
--- /dev/null
+++ b/test/146-bad-interface/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import dalvik.system.PathClassLoader;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+  static final String DEX_LOCATION = System.getenv("DEX_LOCATION");
+  static final String DEX_FILES =
+      DEX_LOCATION + "/146-bad-interface-ex.jar" + ":" +
+      DEX_LOCATION + "/146-bad-interface.jar";
+  public static void main(String[] args) {
+    try {
+      PathClassLoader p = new PathClassLoader(DEX_FILES, Main.class.getClassLoader());
+      Class<?> c = Class.forName("A", true, p);
+      Object o = c.newInstance();
+      Class<?> runner = Class.forName("InvokeInf", true, p);
+      Class<?> arg = Class.forName("Iface", true, p);
+      Method r = runner.getDeclaredMethod("doInvoke", arg);
+      r.invoke(null, o);
+    } catch (Throwable t) {
+      System.out.println("Error occurred");
+      System.out.println(t);
+      t.printStackTrace();
+    }
+  }
+}
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 8ec840d1..733a1dd 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -21,11 +21,11 @@
 
 ## CHECK-START: int Builder.testMultipleTryCatch(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
-## CHECK:  <<Minus3:i\d+>>  IntConstant -3
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK-DAG:  <<Minus3:i\d+>>  IntConstant -3
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>"
@@ -236,10 +236,10 @@
 
 ## CHECK-START: int Builder.testMultipleExits(int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnterTry:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnterTry:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry:B\d+>>"
 ## CHECK:  predecessors     "<<BEnterTry>>"
@@ -312,10 +312,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BEnter1:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BEnter1:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1>>"
@@ -403,10 +403,10 @@
 
 ## CHECK-START: int Builder.testSharedBoundary_Reverse(int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  successors       "<<BGoto:B\d+>>"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK:      successors       "<<BGoto:B\d+>>"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BGoto>>"
 ## CHECK:  successors       "<<BEnter2:B\d+>>"
@@ -504,9 +504,9 @@
 
 ## CHECK-START: int Builder.testNestedTry(int, int, int, int) builder (after)
 
-## CHECK:  name             "B0"
-## CHECK:  <<Minus1:i\d+>>  IntConstant -1
-## CHECK:  <<Minus2:i\d+>>  IntConstant -2
+## CHECK:      name             "B0"
+## CHECK-DAG:  <<Minus1:i\d+>>  IntConstant -1
+## CHECK-DAG:  <<Minus2:i\d+>>  IntConstant -2
 
 ## CHECK:  name             "<<BTry1:B\d+>>"
 ## CHECK:  predecessors     "<<BEnter1:B\d+>>"
@@ -728,8 +728,17 @@
 ## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
+
+## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
+
+## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
 ## CHECK:  Goto
 
@@ -757,10 +766,6 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BReturn>>"
-## CHECK:  predecessors     "<<BCatch>>" "<<BOutside>>"
-## CHECK:  Return
-
 ## CHECK:  name             "<<BSplit1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
 ## CHECK:  successors       "<<BEnterTry2>>"
@@ -838,7 +843,15 @@
 ## CHECK:  successors       "<<BReturn:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  name             "<<BReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BCatch:B\d+>>"
+## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
+
+## CHECK:  name             "<<BExit>>"
+## CHECK:  Exit
+
+## CHECK:  name             "<<BCatch>>"
 ## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
 ## CHECK:  successors       "<<BReturn>>"
 ## CHECK:  flags            "catch_block"
@@ -868,10 +881,6 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BReturn>>"
-## CHECK:  predecessors     "<<BCatch>>" "<<BOutside>>"
-## CHECK:  Return
-
 ## CHECK:  name             "<<BSplit1>>"
 ## CHECK:  predecessors     "<<BPSwitch0>>"
 ## CHECK:  successors       "<<BTry2>>"
@@ -968,12 +977,12 @@
 ## CHECK-START: int Builder.testCatchLoop(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BSplit2:B\d+>>"
+## CHECK:  successors       "<<BSplit:B\d+>>"
 
-## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry:B\d+>>" "<<BExitTry:B\d+>>"
-## CHECK:  successors       "<<BSplit1:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry:B\d+>>"
+## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry>>"
@@ -984,13 +993,14 @@
 ## CHECK:  predecessors     "<<BReturn>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry>>"
-## CHECK:  successors       "<<BExitTry>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry>>" "<<BExitTry>>"
+## CHECK:  successors       "<<BEnterTry>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry>>"
-## CHECK:  predecessors     "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit>>" "<<BCatch>>"
 ## CHECK:  successors       "<<BTry>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
@@ -1001,14 +1011,9 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BSplit1>>"
-## CHECK:  predecessors     "<<BSplit2>>" "<<BCatch>>"
-## CHECK:  successors       "<<BEnterTry>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit2>>"
+## CHECK:  name             "<<BSplit>>"
 ## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  successors       "<<BEnterTry>>"
 ## CHECK:  Goto
 
 .method public static testCatchLoop(III)I
@@ -1037,23 +1042,25 @@
 ## CHECK:  successors       "<<BExitTry1:B\d+>>"
 ## CHECK:  Div
 
-## CHECK:  name             "<<BCatch:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>"
-## CHECK:  successors       "<<BSplit1:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
 ## CHECK:  successors       "<<BExit:B\d+>>"
+## CHECK:  Return
 
 ## CHECK:  name             "<<BExit>>"
 ## CHECK:  predecessors     "<<BReturn>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BExitTry2>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Goto
 
 ## CHECK:  name             "<<BEnterTry1>>"
 ## CHECK:  predecessors     "B0"
@@ -1062,14 +1069,14 @@
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit:B\d+>>" "<<BCatch>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BSplit2:B\d+>>"
+## CHECK:  successors       "<<BSplit>>"
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -1079,14 +1086,9 @@
 ## CHECK:  xhandlers        "<<BCatch>>"
 ## CHECK:  TryBoundary      kind:exit
 
-## CHECK:  name             "<<BSplit1>>"
-## CHECK:  predecessors     "<<BSplit2>>" "<<BCatch>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit2>>"
+## CHECK:  name             "<<BSplit>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
-## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
 ## CHECK:  Goto
 
 .method public static testHandlerEdge1(III)I
@@ -1109,17 +1111,17 @@
 ## CHECK-START: int Builder.testHandlerEdge2(int, int, int) builder (after)
 
 ## CHECK:  name             "B0"
-## CHECK:  successors       "<<BSplit4:B\d+>>"
-
-## CHECK:  name             "<<BCatch1:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>"
 ## CHECK:  successors       "<<BSplit1:B\d+>>"
-## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BCatch2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1:B\d+>>" "<<BExitTry1:B\d+>>"
-## CHECK:  successors       "<<BSplit2:B\d+>>"
-## CHECK:  flags            "catch_block"
+## CHECK:  name             "<<BTry1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1:B\d+>>"
+## CHECK:  successors       "<<BExitTry1:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BTry2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2:B\d+>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
 
 ## CHECK:  name             "<<BReturn:B\d+>>"
 ## CHECK:  predecessors     "<<BExitTry2>>"
@@ -1129,31 +1131,31 @@
 ## CHECK:  name             "<<BExit>>"
 ## CHECK:  Exit
 
-## CHECK:  name             "<<BTry1:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry1>>"
-## CHECK:  successors       "<<BExitTry1>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch2:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>" "<<BExitTry1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
+## CHECK:  flags            "catch_block"
 
-## CHECK:  name             "<<BTry2:B\d+>>"
-## CHECK:  predecessors     "<<BEnterTry2>>"
-## CHECK:  successors       "<<BExitTry2>>"
-## CHECK:  Div
+## CHECK:  name             "<<BCatch1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry2>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BEnterTry1>>"
+## CHECK:  flags            "catch_block"
 
 ## CHECK:  name             "<<BEnterTry1>>"
-## CHECK:  predecessors     "<<BSplit1>>"
+## CHECK:  predecessors     "<<BSplit1>>" "<<BCatch1>>"
 ## CHECK:  successors       "<<BTry1>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BEnterTry2>>"
-## CHECK:  predecessors     "<<BSplit2>>"
+## CHECK:  predecessors     "<<BSplit2:B\d+>>" "<<BCatch2>>"
 ## CHECK:  successors       "<<BTry2>>"
 ## CHECK:  xhandlers        "<<BCatch1>>"
 ## CHECK:  TryBoundary      kind:entry
 
 ## CHECK:  name             "<<BExitTry1>>"
 ## CHECK:  predecessors     "<<BTry1>>"
-## CHECK:  successors       "<<BSplit3:B\d+>>"
+## CHECK:  successors       "<<BSplit2>>"
 ## CHECK:  xhandlers        "<<BCatch2>>"
 ## CHECK:  TryBoundary      kind:exit
 
@@ -1164,23 +1166,13 @@
 ## CHECK:  TryBoundary      kind:exit
 
 ## CHECK:  name             "<<BSplit1>>"
-## CHECK:  predecessors     "<<BSplit4>>" "<<BCatch1>>"
+## CHECK:  predecessors     "B0"
 ## CHECK:  successors       "<<BEnterTry1>>"
 ## CHECK:  Goto
 
 ## CHECK:  name             "<<BSplit2>>"
-## CHECK:  predecessors     "<<BCatch2>>" "<<BSplit3>>"
-## CHECK:  successors       "<<BEnterTry2>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit3>>"
 ## CHECK:  predecessors     "<<BExitTry1>>"
-## CHECK:  successors       "<<BSplit2>>"
-## CHECK:  Goto
-
-## CHECK:  name             "<<BSplit4>>"
-## CHECK:  predecessors     "B0"
-## CHECK:  successors       "<<BSplit1>>"
+## CHECK:  successors       "<<BEnterTry2>>"
 ## CHECK:  Goto
 
 .method public static testHandlerEdge2(III)I
@@ -1365,7 +1357,10 @@
     .catchall {:try_start .. :try_end} :catch_all
 .end method
 
-## CHECK-START: int Builder.testSynchronized(java.lang.Object) builder (after)
+# Test that a throw-catch loop on monitor-exit is eliminated.
+# Note that we do not test this until after DCE which merges trivially split blocks.
+
+## CHECK-START: int Builder.testSynchronized(java.lang.Object) dead_code_elimination (after)
 ## CHECK:      flags "catch_block"
 ## CHECK-NOT:  end_block
 ## CHECK:      MonitorOperation kind:exit
diff --git a/test/510-checker-try-catch/smali/SsaBuilder.smali b/test/510-checker-try-catch/smali/SsaBuilder.smali
index 1fd5fb2..bfc0b20 100644
--- a/test/510-checker-try-catch/smali/SsaBuilder.smali
+++ b/test/510-checker-try-catch/smali/SsaBuilder.smali
@@ -25,23 +25,24 @@
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
-## CHECK-NEXT: successors       "<<BExtracted:B\d+>>"
+## CHECK-NEXT: successors       "<<BAdd:B\d+>>"
 
-## CHECK:      name             "<<BCatch:B\d+>>"
+## CHECK:      name             "<<BAdd>>"
+## CHECK-NEXT: from_bci
+## CHECK-NEXT: to_bci
+## CHECK-NEXT: predecessors     "B1" "<<BCatch:B\d+>>"
+## CHECK-NEXT: successors
+## CHECK-NEXT: xhandlers
+## CHECK-NOT:  end_block
+## CHECK:      Add
+
+## CHECK:      name             "<<BCatch>>"
 ## CHECK-NEXT: from_bci
 ## CHECK-NEXT: to_bci
 ## CHECK-NEXT: predecessors
-## CHECK-NEXT: successors       "<<BExtracted>>"
+## CHECK-NEXT: successors       "<<BAdd>>"
 ## CHECK-NEXT: xhandlers
 ## CHECK-NEXT: flags            "catch_block"
-## CHECK-NOT:  Add
-
-## CHECK:      name             "<<BExtracted>>"
-## CHECK-NEXT: from_bci
-## CHECK-NEXT: to_bci
-## CHECK-NEXT: predecessors     "B1" "<<BCatch>>"
-## CHECK-NOT:  flags            "catch_block"
-## CHECK:      Add
 
 .method public static testSimplifyCatchBlock(III)I
     .registers 4
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 62511df..9f9916d 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -215,10 +215,10 @@
 ## CHECK-DAG:     <<Const0x10:i\d+>> IntConstant 16
 ## CHECK-DAG:     <<Const0x11:i\d+>> IntConstant 17
 ## CHECK-DAG:     <<Add:i\d+>>       Add [<<Arg0>>,<<Arg1>>]
-## CHECK-DAG:     <<Phi:i\d+>>       Phi [<<Add>>,<<Const0xf>>] reg:3 is_catch_phi:false
+## CHECK-DAG:     <<Select:i\d+>>    Select [<<Const0xf>>,<<Add>>,{{z\d+}}]
 ## CHECK-DAG:                        Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
-## CHECK-DAG:                        Phi [<<Phi>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
+## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
 ## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
@@ -246,7 +246,6 @@
     add-int v2, p0, p1             # dead catch phi input, defined in the dead block (HInstruction)
     move v3, v2
     if-eqz v3, :define_phi
-    sput v3, LTestCase;->sField:I  # beat HSelect simplification (has side-effects, does not throw)
     const v3, 0xf
     :define_phi
     # v3 = Phi [Add, 0xf]          # dead catch phi input, defined in the dead block (HPhi)
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
index 2683b25..aad9689 100644
--- a/test/564-checker-bitcount/src/Main.java
+++ b/test/564-checker-bitcount/src/Main.java
@@ -20,67 +20,187 @@
   // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
   // CHECK-DAG: popcnt
 
-  /// CHECK-START: int Main.bits32(int) intrinsics_recognition (after)
-  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:IntegerBitCount
-  /// CHECK-DAG:                 Return [<<Result>>]
-  private static int bits32(int x) {
+
+  /// CHECK-START: int Main.$noinline$BitCountBoolean(boolean) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountBoolean(boolean x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x ? 1 : 0);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountByte(byte) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountByte(byte x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
     return Integer.bitCount(x);
   }
 
-  /// CHECK-START: int Main.bits64(long) intrinsics_recognition (after)
-  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:LongBitCount
-  /// CHECK-DAG:                 Return [<<Result>>]
-  private static int bits64(long x) {
+  /// CHECK-START: int Main.$noinline$BitCountShort(short) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountShort(short x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountChar(char) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountChar(char x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountInt(int) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:IntegerBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountInt(int x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
+    return Integer.bitCount(x);
+  }
+
+  /// CHECK-START: int Main.$noinline$BitCountLong(long) intrinsics_recognition (after)
+  /// CHECK-DAG:     <<Result:i\d+>>  InvokeStaticOrDirect intrinsic:LongBitCount
+  /// CHECK-DAG:                      Return [<<Result>>]
+  private static int $noinline$BitCountLong(long x) {
+    if (doThrow) { throw new Error(); }  // Try defeating inlining.
     return Long.bitCount(x);
   }
 
+  public static void testBitCountBoolean() {
+    expectEqualsInt($noinline$BitCountBoolean(false), 0);
+    expectEqualsInt($noinline$BitCountBoolean(true), 1);
+  }
+
+  public static void testBitCountByte() {
+    // Number of bits in an 32-bit integer representing the sign
+    // extension of a byte value widened to an int.
+    int signExtensionSize = Integer.SIZE - Byte.SIZE;
+    // Sign bit position in a byte.
+    int signBit = Byte.SIZE - 1;
+
+    expectEqualsInt($noinline$BitCountByte((byte) 0x00), 0);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x01), 1);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x10), 1);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x11), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x03), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x70), 3);
+    expectEqualsInt($noinline$BitCountByte((byte) 0xF0), 4 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x0F), 4);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x12), 2);
+    expectEqualsInt($noinline$BitCountByte((byte) 0x9A), 4 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountByte((byte) 0xFF), 8 + signExtensionSize);
+
+    for (int i = 0; i < Byte.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountByte((byte) (1 << i)),
+                      (i < signBit) ? 1 : 1 + signExtensionSize);
+    }
+  }
+
+  public static void testBitCountShort() {
+    // Number of bits in an 32-bit integer representing the sign
+    // extension of a short value widened to an int.
+    int signExtensionSize = Integer.SIZE - Short.SIZE;
+    // Sign bit position in a short.
+    int signBit = Short.SIZE - 1;
+
+    expectEqualsInt($noinline$BitCountShort((short) 0x0000), 0);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0001), 1);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1000), 1);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1001), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0003), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x7000), 3);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0F00), 4);
+    expectEqualsInt($noinline$BitCountShort((short) 0x0011), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1100), 2);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1111), 4);
+    expectEqualsInt($noinline$BitCountShort((short) 0x1234), 5);
+    expectEqualsInt($noinline$BitCountShort((short) 0x9ABC), 9 + signExtensionSize);
+    expectEqualsInt($noinline$BitCountShort((short) 0xFFFF), 16 + signExtensionSize);
+
+    for (int i = 0; i < Short.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountShort((short) (1 << i)),
+                      (i < signBit) ? 1 : 1 + signExtensionSize);
+    }
+  }
+
+  public static void testBitCountChar() {
+    expectEqualsInt($noinline$BitCountChar((char) 0x0000), 0);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0001), 1);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1000), 1);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1001), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0003), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x7000), 3);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0F00), 4);
+    expectEqualsInt($noinline$BitCountChar((char) 0x0011), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1100), 2);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1111), 4);
+    expectEqualsInt($noinline$BitCountChar((char) 0x1234), 5);
+    expectEqualsInt($noinline$BitCountChar((char) 0x9ABC), 9);
+    expectEqualsInt($noinline$BitCountChar((char) 0xFFFF), 16);
+
+    for (int i = 0; i < Character.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountChar((char) (1 << i)), 1);
+    }
+  }
+
+  public static void testBitCountInt() {
+    expectEqualsInt($noinline$BitCountInt(0x00000000), 0);
+    expectEqualsInt($noinline$BitCountInt(0x00000001), 1);
+    expectEqualsInt($noinline$BitCountInt(0x10000000), 1);
+    expectEqualsInt($noinline$BitCountInt(0x10000001), 2);
+    expectEqualsInt($noinline$BitCountInt(0x00000003), 2);
+    expectEqualsInt($noinline$BitCountInt(0x70000000), 3);
+    expectEqualsInt($noinline$BitCountInt(0x000F0000), 4);
+    expectEqualsInt($noinline$BitCountInt(0x00001111), 4);
+    expectEqualsInt($noinline$BitCountInt(0x11110000), 4);
+    expectEqualsInt($noinline$BitCountInt(0x11111111), 8);
+    expectEqualsInt($noinline$BitCountInt(0x12345678), 13);
+    expectEqualsInt($noinline$BitCountInt(0x9ABCDEF0), 19);
+    expectEqualsInt($noinline$BitCountInt(0xFFFFFFFF), 32);
+
+    for (int i = 0; i < Integer.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountInt(1 << i), 1);
+    }
+  }
+
+  public static void testBitCountLong() {
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000000L), 0);
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000001L), 1);
+    expectEqualsInt($noinline$BitCountLong(0x1000000000000000L), 1);
+    expectEqualsInt($noinline$BitCountLong(0x1000000000000001L), 2);
+    expectEqualsInt($noinline$BitCountLong(0x0000000000000003L), 2);
+    expectEqualsInt($noinline$BitCountLong(0x7000000000000000L), 3);
+    expectEqualsInt($noinline$BitCountLong(0x000F000000000000L), 4);
+    expectEqualsInt($noinline$BitCountLong(0x0000000011111111L), 8);
+    expectEqualsInt($noinline$BitCountLong(0x1111111100000000L), 8);
+    expectEqualsInt($noinline$BitCountLong(0x1111111111111111L), 16);
+    expectEqualsInt($noinline$BitCountLong(0x123456789ABCDEF1L), 33);
+    expectEqualsInt($noinline$BitCountLong(0xFFFFFFFFFFFFFFFFL), 64);
+
+    for (int i = 0; i < Long.SIZE; i++) {
+      expectEqualsInt($noinline$BitCountLong(1L << i), 1);
+    }
+  }
+
   public static void main(String args[]) {
-    expectEquals32(bits32(0x00000000), 0);
-    expectEquals32(bits32(0x00000001), 1);
-    expectEquals32(bits32(0x10000000), 1);
-    expectEquals32(bits32(0x10000001), 2);
-    expectEquals32(bits32(0x00000003), 2);
-    expectEquals32(bits32(0x70000000), 3);
-    expectEquals32(bits32(0x000F0000), 4);
-    expectEquals32(bits32(0x00001111), 4);
-    expectEquals32(bits32(0x11110000), 4);
-    expectEquals32(bits32(0x11111111), 8);
-    expectEquals32(bits32(0x12345678), 13);
-    expectEquals32(bits32(0x9ABCDEF0), 19);
-    expectEquals32(bits32(0xFFFFFFFF), 32);
-
-    for (int i = 0; i < 32; i++) {
-      expectEquals32(bits32(1 << i), 1);
-    }
-
-    expectEquals64(bits64(0x0000000000000000L), 0);
-    expectEquals64(bits64(0x0000000000000001L), 1);
-    expectEquals64(bits64(0x1000000000000000L), 1);
-    expectEquals64(bits64(0x1000000000000001L), 2);
-    expectEquals64(bits64(0x0000000000000003L), 2);
-    expectEquals64(bits64(0x7000000000000000L), 3);
-    expectEquals64(bits64(0x000F000000000000L), 4);
-    expectEquals64(bits64(0x0000000011111111L), 8);
-    expectEquals64(bits64(0x1111111100000000L), 8);
-    expectEquals64(bits64(0x1111111111111111L), 16);
-    expectEquals64(bits64(0x123456789ABCDEF1L), 33);
-    expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
-
-    for (int i = 0; i < 64; i++) {
-      expectEquals64(bits64(1L << i), 1);
-    }
+    testBitCountBoolean();
+    testBitCountByte();
+    testBitCountShort();
+    testBitCountChar();
+    testBitCountInt();
+    testBitCountLong();
 
     System.out.println("passed");
   }
 
-  private static void expectEquals32(int expected, int result) {
+  private static void expectEqualsInt(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
-  private static void expectEquals64(long expected, long result) {
-    if (expected != result) {
-      throw new Error("Expected: " + expected + ", found: " + result);
-    }
-  }
+
+  private static boolean doThrow = false;
 }