Merge "Use signed encoding when using relative CFI addresses."
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 6192be7..98fd327 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -101,6 +101,19 @@
     return ::testing::AssertionFailure() << "key was not in the map";
   }
 
+  template <typename TMap, typename TKey, typename T>
+  ::testing::AssertionResult IsExpectedDefaultKeyValue(const T& expected,
+                                                       const TMap& map,
+                                                       const TKey& key) {
+    const T& actual = map.GetOrDefault(key);
+    if (!UsuallyEquals(expected, actual)) {
+      return ::testing::AssertionFailure()
+          << "expected " << detail::ToStringAny(expected) << " but got "
+          << detail::ToStringAny(actual);
+     }
+    return ::testing::AssertionSuccess();
+  }
+
 class CmdlineParserTest : public ::testing::Test {
  public:
   CmdlineParserTest() = default;
@@ -145,13 +158,23 @@
 
 #define EXPECT_KEY_EXISTS(map, key) EXPECT_TRUE((map).Exists(key))
 #define EXPECT_KEY_VALUE(map, key, expected) EXPECT_TRUE(IsExpectedKeyValue(expected, map, key))
+#define EXPECT_DEFAULT_KEY_VALUE(map, key, expected) EXPECT_TRUE(IsExpectedDefaultKeyValue(expected, map, key))
 
-#define EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)               \
+#define _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)              \
   do {                                                        \
     EXPECT_TRUE(IsResultSuccessful(parser_->Parse(argv)));    \
     EXPECT_EQ(0u, parser_->GetArgumentsMap().Size());         \
+
+#define EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv)               \
+  _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv);                   \
   } while (false)
 
+#define EXPECT_SINGLE_PARSE_DEFAULT_VALUE(expected, argv, key)\
+  _EXPECT_SINGLE_PARSE_EMPTY_SUCCESS(argv);                   \
+    RuntimeArgumentMap args = parser_->ReleaseArgumentsMap(); \
+    EXPECT_DEFAULT_KEY_VALUE(args, key, expected);            \
+  } while (false)                                             // NOLINT [readability/namespace] [5]
+
 #define _EXPECT_SINGLE_PARSE_EXISTS(argv, key)                \
   do {                                                        \
     EXPECT_TRUE(IsResultSuccessful(parser_->Parse(argv)));    \
@@ -509,6 +532,24 @@
   }
 }  // TEST_F
 
+/* -X[no]experimental-lambdas */
+TEST_F(CmdlineParserTest, TestExperimentalLambdas) {
+  // Off by default
+  EXPECT_SINGLE_PARSE_DEFAULT_VALUE(false,
+                                    "",
+                                    M::ExperimentalLambdas);
+
+  // Disabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(false,
+                            "-Xnoexperimental-lambdas",
+                            M::ExperimentalLambdas);
+
+  // Enabled explicitly
+  EXPECT_SINGLE_PARSE_VALUE(true,
+                            "-Xexperimental-lambdas",
+                            M::ExperimentalLambdas);
+}
+
 TEST_F(CmdlineParserTest, TestIgnoreUnrecognized) {
   RuntimeParser::Builder parserBuilder;
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 67536f0..60668ed 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -103,6 +103,7 @@
 	optimizing/code_generator.cc \
 	optimizing/code_generator_arm.cc \
 	optimizing/code_generator_arm64.cc \
+	optimizing/code_generator_mips64.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/code_generator_x86_64.cc \
 	optimizing/code_generator_utils.cc \
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index e2b9987..94ba4fa 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -160,20 +160,10 @@
   return location;
 }
 
-bool GlobalValueNumbering::HasNullCheckLastInsn(const BasicBlock* pred_bb,
-                                                BasicBlockId succ_id) {
-  if (pred_bb->block_type != kDalvikByteCode || pred_bb->last_mir_insn == nullptr) {
-    return false;
-  }
-  Instruction::Code last_opcode = pred_bb->last_mir_insn->dalvikInsn.opcode;
-  return ((last_opcode == Instruction::IF_EQZ && pred_bb->fall_through == succ_id) ||
-      (last_opcode == Instruction::IF_NEZ && pred_bb->taken == succ_id));
-}
-
 bool GlobalValueNumbering::NullCheckedInAllPredecessors(
     const ScopedArenaVector<uint16_t>& merge_names) const {
   // Implicit parameters:
-  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - *work_lvn_: the LVN for which we're checking predecessors.
   //   - merge_lvns_: the predecessor LVNs.
   DCHECK_EQ(merge_lvns_.size(), merge_names.size());
   for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
@@ -198,7 +188,7 @@
 bool GlobalValueNumbering::DivZeroCheckedInAllPredecessors(
     const ScopedArenaVector<uint16_t>& merge_names) const {
   // Implicit parameters:
-  //   - *work_lvn: the LVN for which we're checking predecessors.
+  //   - *work_lvn_: the LVN for which we're checking predecessors.
   //   - merge_lvns_: the predecessor LVNs.
   DCHECK_EQ(merge_lvns_.size(), merge_names.size());
   for (size_t i = 0, size = merge_lvns_.size(); i != size; ++i) {
@@ -217,15 +207,11 @@
   if (bb->predecessors.size() == 1u) {
     BasicBlockId pred_id = bb->predecessors[0];
     BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
-    if (pred_bb->last_mir_insn != nullptr) {
-      Instruction::Code opcode = pred_bb->last_mir_insn->dalvikInsn.opcode;
-      if ((opcode == Instruction::IF_NEZ && pred_bb->taken == bb_id) ||
-          (opcode == Instruction::IF_EQZ && pred_bb->fall_through == bb_id)) {
-        DCHECK(lvns_[pred_id] != nullptr);
-        uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]);
-        if (operand == cond) {
-          return true;
-        }
+    if (pred_bb->BranchesToSuccessorOnlyIfNotZero(bb_id)) {
+      DCHECK(lvns_[pred_id] != nullptr);
+      uint16_t operand = lvns_[pred_id]->GetSregValue(pred_bb->last_mir_insn->ssa_rep->uses[0]);
+      if (operand == cond) {
+        return true;
       }
     }
   }
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index bd2f187..c514f75 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -194,7 +194,9 @@
     return mir_graph_->GetBasicBlock(bb_id);
   }
 
-  static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id);
+  static bool HasNullCheckLastInsn(const BasicBlock* pred_bb, BasicBlockId succ_id) {
+    return pred_bb->BranchesToSuccessorOnlyIfNotZero(succ_id);
+  }
 
   bool NullCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
 
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 6d8a7da..b1f5d87 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -1003,7 +1003,6 @@
                 vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg));
     }
   }
-  unused_vregs_->Union(vregs_to_kill_);
   for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) {
     MIRData* data = vreg_chains_.GetMIRData(*it);
     DCHECK(!data->must_keep);
@@ -1012,6 +1011,10 @@
     KillMIR(data);
   }
 
+  // Each dependent register not in vregs_to_kill_ is either already marked unused or
+  // it's one word of a wide register where the other word has been overwritten.
+  unused_vregs_->UnionIfNotIn(dependent_vregs_, vregs_to_kill_);
+
   vreg_chains_.RemoveTrailingNops();
   return true;
 }
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index de591d0..461c844 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -137,6 +137,8 @@
     { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } }
 #define DEF_BINOP(bb, opcode, result, src1, src2) \
     { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
+#define DEF_BINOP_WIDE(bb, opcode, result, src1, src2) \
+    { bb, opcode, 0u, 0u, 4, { src1, src1 + 1, src2, src2 + 1 }, 2, { result, result + 1 } }
 
   void DoPrepareIFields(const IFieldDef* defs, size_t count) {
     cu_.mir_graph->ifield_lowering_infos_.clear();
@@ -1936,7 +1938,7 @@
       DEF_CONST(3, Instruction::CONST, 0u, 1000u),
       DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
       DEF_CONST(3, Instruction::CONST, 2u, 2000u),
-      { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u} },
+      { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u } },
       DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 5u, 3u),
       DEF_CONST(3, Instruction::CONST, 7u, 3000u),
       DEF_CONST(3, Instruction::CONST, 8u, 4000u),
@@ -1983,4 +1985,85 @@
   EXPECT_EQ(0u, int_to_long->dalvikInsn.vB);
 }
 
+TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs1) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_CONST(3, Instruction::CONST, 1u, 2000u),
+      DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u),
+      DEF_CONST(3, Instruction::CONST, 3u, 1000u),            // NOT killed (b/21702651).
+      DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u),         // Killed (RecordPass)
+      DEF_CONST(3, Instruction::CONST, 5u, 2000u),            // Killed with 9u (BackwardPass)
+      DEF_BINOP(3, Instruction::ADD_INT, 6u, 5u, 0u),         // Killed (RecordPass)
+      DEF_CONST(3, Instruction::CONST, 7u, 4000u),
+      DEF_MOVE(3, Instruction::MOVE, 8u, 0u),                 // Killed with 6u (BackwardPass)
+  };
+
+  static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 0, 3, 0, 3, 4, 0 };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 7 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_EQ(value_names_[2], value_names_[4]);
+  EXPECT_EQ(value_names_[1], value_names_[5]);
+  EXPECT_EQ(value_names_[2], value_names_[6]);
+  EXPECT_EQ(value_names_[0], value_names_[8]);
+
+  static const bool eliminated[] = {
+      false, false, false, false, true, true, true, false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs2) {
+  static const MIRDef mirs[] = {
+      DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+      DEF_CONST(3, Instruction::CONST, 1u, 2000u),
+      DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u),
+      DEF_CONST(3, Instruction::CONST, 3u, 1000u),            // Killed (BackwardPass; b/21702651)
+      DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u),         // Killed (RecordPass)
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 4000u),
+      { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 5u, 6u }, 1, { 7u } },
+      DEF_BINOP(3, Instruction::ADD_INT, 8u, 7u, 0u),
+      DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 9u, 4000u),  // Killed with 12u (BackwardPass)
+      DEF_CONST(3, Instruction::CONST, 11u, 6000u),
+      { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 9u, 10u }, 1, { 12u } },  // Killed with 9u (BP)
+  };
+
+  static const int32_t sreg_to_vreg_map[] = {
+      2, 3, 4, 1, 4, 5, 6 /* high word */, 0, 7, 0, 1 /* high word */, 8, 0
+  };
+  PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+  PrepareMIRs(mirs);
+  static const int32_t wide_sregs[] = { 5, 9 };
+  MarkAsWideSRegs(wide_sregs);
+  PerformGVN_DCE();
+
+  ASSERT_EQ(arraysize(mirs), value_names_.size());
+  static const size_t diff_indexes[] = { 0, 1, 2, 5, 6, 7, 9 };
+  ExpectValueNamesNE(diff_indexes);
+  EXPECT_EQ(value_names_[0], value_names_[3]);
+  EXPECT_EQ(value_names_[2], value_names_[4]);
+  EXPECT_EQ(value_names_[5], value_names_[8]);
+  EXPECT_EQ(value_names_[6], value_names_[10]);
+
+  static const bool eliminated[] = {
+      false, false, false, true, true, false, false, false, true, false, true,
+  };
+  static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+  for (size_t i = 0; i != arraysize(eliminated); ++i) {
+    bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+    EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9fa5148..3834242 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -173,7 +173,7 @@
   decoded_instruction->vB = inst->HasVRegB() ? inst->VRegB() : 0;
   decoded_instruction->vB_wide = inst->HasWideVRegB() ? inst->WideVRegB() : 0;
   decoded_instruction->vC = inst->HasVRegC() ?  inst->VRegC() : 0;
-  if (inst->HasVarArgs()) {
+  if (inst->HasVarArgs35c()) {
     inst->GetVarArgs(decoded_instruction->arg);
   }
   return inst->SizeInCodeUnits();
@@ -398,12 +398,13 @@
   DCHECK(monitor_exit->Opcode() == Instruction::MONITOR_EXIT);
   int monitor_reg = monitor_exit->VRegA_11x();
   const Instruction* check_insn = Instruction::At(current_code_item_->insns_ + catch_offset);
-  DCHECK(check_insn->Opcode() == Instruction::MOVE_EXCEPTION);
-  if (check_insn->VRegA_11x() == monitor_reg) {
-    // Unexpected move-exception to the same register. Probably not the pattern we're looking for.
-    return false;
+  if (check_insn->Opcode() == Instruction::MOVE_EXCEPTION) {
+    if (check_insn->VRegA_11x() == monitor_reg) {
+      // Unexpected move-exception to the same register. Probably not the pattern we're looking for.
+      return false;
+    }
+    check_insn = check_insn->Next();
   }
-  check_insn = check_insn->Next();
   while (true) {
     int dest = -1;
     bool wide = false;
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index f038397..dbe9062 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -452,6 +452,21 @@
   MIR* GetFirstNonPhiInsn();
 
   /**
+   * @brief Checks whether the block ends with if-nez or if-eqz that branches to
+   *        the given successor only if the register in not zero.
+   */
+  bool BranchesToSuccessorOnlyIfNotZero(BasicBlockId succ_id) const {
+    if (last_mir_insn == nullptr) {
+      return false;
+    }
+    Instruction::Code last_opcode = last_mir_insn->dalvikInsn.opcode;
+    return ((last_opcode == Instruction::IF_EQZ && fall_through == succ_id) ||
+        (last_opcode == Instruction::IF_NEZ && taken == succ_id)) &&
+        // Make sure the other successor isn't the same (empty if), b/21614284.
+        (fall_through != taken);
+  }
+
+  /**
    * @brief Used to obtain the next MIR that follows unconditionally.
    * @details The implementation does not guarantee that a MIR does not
    * follow even if this method returns nullptr.
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 645511e..5bb0ce3 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -978,18 +978,12 @@
       BasicBlock* pred_bb = GetBasicBlock(pred_id);
       DCHECK(pred_bb != nullptr);
       MIR* null_check_insn = nullptr;
-      if (pred_bb->block_type == kDalvikByteCode) {
-        // Check to see if predecessor had an explicit null-check.
-        MIR* last_insn = pred_bb->last_mir_insn;
-        if (last_insn != nullptr) {
-          Instruction::Code last_opcode = last_insn->dalvikInsn.opcode;
-          if ((last_opcode == Instruction::IF_EQZ && pred_bb->fall_through == bb->id) ||
-              (last_opcode == Instruction::IF_NEZ && pred_bb->taken == bb->id)) {
-            // Remember the null check insn if there's no other predecessor requiring null check.
-            if (!copied_first || !vregs_to_check->IsBitSet(last_insn->dalvikInsn.vA)) {
-              null_check_insn = last_insn;
-            }
-          }
+      // Check to see if predecessor had an explicit null-check.
+      if (pred_bb->BranchesToSuccessorOnlyIfNotZero(bb->id)) {
+        // Remember the null check insn if there's no other predecessor requiring null check.
+        if (!copied_first || !vregs_to_check->IsBitSet(pred_bb->last_mir_insn->dalvikInsn.vA)) {
+          null_check_insn = pred_bb->last_mir_insn;
+          DCHECK(null_check_insn != nullptr);
         }
       }
       if (!copied_first) {
@@ -1673,15 +1667,9 @@
       if (opcode == Instruction::NEW_INSTANCE) {
         uint32_t type_idx = mir->dalvikInsn.vB;
         if (cu_->compiler_driver->IsStringTypeIndex(type_idx, cu_->dex_file)) {
-          // Change NEW_INSTANCE and throwing half of the insn (if it exists) into CONST_4 of 0
+          // Change NEW_INSTANCE into CONST_4 of 0
           mir->dalvikInsn.opcode = Instruction::CONST_4;
           mir->dalvikInsn.vB = 0;
-          MIR* check_mir = GetBasicBlock(bb->predecessors[0])->last_mir_insn;
-          if (check_mir != nullptr &&
-              static_cast<int>(check_mir->dalvikInsn.opcode) == kMirOpCheck) {
-            check_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-            check_mir->dalvikInsn.vB = 0;
-          }
         }
       } else if ((opcode == Instruction::INVOKE_DIRECT) ||
                  (opcode == Instruction::INVOKE_DIRECT_RANGE)) {
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 6d30e72..cf01884 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -471,13 +471,18 @@
         NewLIR3(kThumb2Fmrrd, r_dest.GetLowReg(), r_dest.GetHighReg(), r_src.GetReg());
       } else {
         // Handle overlap
-        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-          DCHECK_NE(r_src.GetLowReg(), r_dest.GetHighReg());
+        if (r_src.GetHighReg() != r_dest.GetLowReg()) {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
         } else {
+          RegStorage r_tmp = AllocTemp();
+          OpRegCopy(r_tmp, r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetHigh(), r_tmp);
+          FreeTemp(r_tmp);
         }
       }
     }
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 9319c64..f5ad7c7 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -258,13 +258,19 @@
         }
       } else {
         // Here if both src and dest are core registers.
-        // Handle overlap.
-        if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+        // Handle overlap
+        if (r_src.GetHighReg() != r_dest.GetLowReg()) {
+          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+        } else if (r_src.GetLowReg() != r_dest.GetHighReg()) {
           OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
         } else {
+          RegStorage r_tmp = AllocTemp();
+          OpRegCopy(r_tmp, r_src.GetHigh());
           OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          OpRegCopy(r_dest.GetHigh(), r_tmp);
+          FreeTemp(r_tmp);
         }
       }
     }
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 58236e2..97703a5 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -377,10 +377,10 @@
     Instruction::IGET_BYTE_QUICK,
     Instruction::IGET_CHAR_QUICK,
     Instruction::IGET_SHORT_QUICK,
-    Instruction::UNUSED_F3,
+    Instruction::INVOKE_LAMBDA,
     Instruction::UNUSED_F4,
     Instruction::UNUSED_F5,
-    Instruction::UNUSED_F6,
+    Instruction::CREATE_LAMBDA,
     Instruction::UNUSED_F7,
     Instruction::UNUSED_F8,
     Instruction::UNUSED_F9,
@@ -421,7 +421,13 @@
     Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
 };
 
-// Unsupported opcodes. null can be used when everything is supported. Size of the lists is
+// TODO: Add support for lambda opcodes to the quick compiler.
+static const int kUnsupportedLambdaOpcodes[] = {
+    Instruction::INVOKE_LAMBDA,
+    Instruction::CREATE_LAMBDA,
+};
+
+// Unsupported opcodes. Null can be used when everything is supported. Size of the lists is
 // recorded below.
 static const int* kUnsupportedOpcodes[] = {
     // 0 = kNone.
@@ -429,17 +435,17 @@
     // 1 = kArm, unused (will use kThumb2).
     kAllOpcodes,
     // 2 = kArm64.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 3 = kThumb2.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 4 = kX86.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 5 = kX86_64.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 6 = kMips.
-    nullptr,
+    kUnsupportedLambdaOpcodes,
     // 7 = kMips64.
-    nullptr
+    kUnsupportedLambdaOpcodes,
 };
 static_assert(sizeof(kUnsupportedOpcodes) == 8 * sizeof(int*), "kUnsupportedOpcodes unexpected");
 
@@ -450,21 +456,26 @@
     // 1 = kArm, unused (will use kThumb2).
     arraysize(kAllOpcodes),
     // 2 = kArm64.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 3 = kThumb2.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 4 = kX86.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 5 = kX86_64.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 6 = kMips.
-    0,
+    arraysize(kUnsupportedLambdaOpcodes),
     // 7 = kMips64.
-    0
+    arraysize(kUnsupportedLambdaOpcodes),
 };
 static_assert(sizeof(kUnsupportedOpcodesSize) == 8 * sizeof(size_t),
               "kUnsupportedOpcodesSize unexpected");
 
+static bool IsUnsupportedExperimentalLambdasOnly(size_t i) {
+  DCHECK_LE(i, arraysize(kUnsupportedOpcodes));
+  return kUnsupportedOpcodes[i] == kUnsupportedLambdaOpcodes;
+}
+
 // The maximum amount of Dalvik register in a method for which we will start compiling. Tries to
 // avoid an abort when we need to manage more SSA registers than we can.
 static constexpr size_t kMaxAllowedDalvikRegisters = INT16_MAX / 2;
@@ -487,6 +498,30 @@
   return true;
 }
 
+// If the ISA has unsupported opcodes, should we skip scanning over them?
+//
+// Most of the time we're compiling non-experimental files, so scanning just slows
+// performance down by as much as 6% with 4 threads.
+// In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
+// which will force scanning for any unsupported opcodes.
+static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
+  if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
+    // All opcodes are supported no matter what. Usually not the case
+    // since experimental opcodes are not implemented in the quick compiler.
+    return true;
+  } else if (LIKELY(!Runtime::Current()->AreExperimentalLambdasEnabled())) {
+    // Experimental opcodes are disabled.
+    //
+    // If all unsupported opcodes are experimental we don't need to do scanning.
+    return IsUnsupportedExperimentalLambdasOnly(instruction_set);
+  } else {
+    // Experimental opcodes are enabled.
+    //
+    // Do the opcode scanning if the ISA has any unsupported opcodes.
+    return false;
+  }
+}
+
 // Skip the method that we do not support currently.
 bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
                                      CompilationUnit* cu) const {
@@ -498,7 +533,7 @@
 
   // Check whether we do have limitations at all.
   if (kSupportedTypes[cu->instruction_set] == nullptr &&
-      kUnsupportedOpcodesSize[cu->instruction_set] == 0U) {
+      SkipScanningUnsupportedOpcodes(cu->instruction_set)) {
     return true;
   }
 
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 6d48598..8a009cb 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -89,14 +89,15 @@
   DCHECK(dex_gc_map_.empty());
   size_t num_entries, ref_bitmap_bits, pc_bits;
   ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
-  // There's a single byte to encode the size of each bitmap.
-  if (ref_bitmap_bits >= kBitsPerByte * 8192 /* 13-bit size */) {
+  const size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
+  static constexpr size_t kFormatBits = 3;
+  // We have 16 - kFormatBits available for the ref_bitmap_bytes.
+  if ((ref_bitmap_bytes >> (16u - kFormatBits)) != 0) {
     LOG(WARNING) << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers: "
                  << PrettyMethod(method_verifier->GetMethodReference().dex_method_index,
                                  *method_verifier->GetMethodReference().dex_file);
     return false;
   }
-  size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte;
   // There are 2 bytes to encode the number of entries.
   if (num_entries > std::numeric_limits<uint16_t>::max()) {
     LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: "
@@ -122,7 +123,7 @@
   size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
   dex_gc_map_.reserve(table_size);
   // Write table header.
-  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> 5));
+  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> (kBitsPerByte - kFormatBits)));
   dex_gc_map_.push_back(ref_bitmap_bytes & 0xFF);
   dex_gc_map_.push_back(num_entries & 0xFF);
   dex_gc_map_.push_back((num_entries >> 8) & 0xFF);
@@ -147,7 +148,7 @@
   // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
   // that the table data is well formed and all references are marked (or not) in the bitmap.
   verifier::DexPcToReferenceMap map(&data[0]);
-  DCHECK_EQ(data.size(), map.RawSize());
+  CHECK_EQ(data.size(), map.RawSize()) << map.NumEntries() << " " << map.RegWidth();
   size_t map_index = 0;
   const DexFile::CodeItem* code_item = method_verifier->CodeItem();
   for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index b25e967..e0c56fc 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -233,11 +233,32 @@
   return referrer_class == fields_class;
 }
 
+inline bool CompilerDriver::CanAssumeClassIsInitialized(mirror::Class* klass) {
+  // Being loaded is a pre-requisite for being initialized but let's do the cheap check first.
+  //
+  // NOTE: When AOT compiling an app, we eagerly initialize app classes (and potentially their
+  // super classes in the boot image) but only those that have a trivial initialization, i.e.
+  // without <clinit>() or static values in the dex file for that class or any of its super
+  // classes. So while we could see the klass as initialized during AOT compilation and have
+  // it only loaded at runtime, the needed initialization would have to be trivial and
+  // unobservable from Java, so we may as well treat it as initialized.
+  if (!klass->IsInitialized()) {
+    return false;
+  }
+  return CanAssumeClassIsLoaded(klass);
+}
+
+inline bool CompilerDriver::CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class,
+                                                                mirror::Class* klass) {
+  return (referrer_class != nullptr && referrer_class->IsSubClass(klass)) ||
+      CanAssumeClassIsInitialized(klass);
+}
+
 inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class,
                                                            ArtField* resolved_field) {
   DCHECK(resolved_field->IsStatic());
   mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-  return fields_class == referrer_class || fields_class->IsInitialized();
+  return CanReferrerAssumeClassIsInitialized(referrer_class, fields_class);
 }
 
 inline ArtMethod* CompilerDriver::ResolveMethod(
@@ -394,7 +415,7 @@
     return true;
   }
   mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-  return methods_class == referrer_class || methods_class->IsInitialized();
+  return CanReferrerAssumeClassIsInitialized(referrer_class, methods_class);
 }
 
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 22fcf87..84b6a52 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -659,7 +659,8 @@
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
   if (!IsImage()) {
-    return true;
+    // NOTE: Currently unreachable, all callers check IsImage().
+    return false;
   } else {
     return image_classes_->find(descriptor) != image_classes_->end();
   }
@@ -992,6 +993,24 @@
   }
 }
 
+bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) {
+  Runtime* runtime = Runtime::Current();
+  if (!runtime->IsAotCompiler()) {
+    DCHECK(runtime->UseJit());
+    // Having the klass reference here implies that the klass is already loaded.
+    return true;
+  }
+  if (!IsImage()) {
+    // Assume loaded only if klass is in the boot image. App classes cannot be assumed
+    // loaded because we don't even know what class loader will be used to load them.
+    bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace();
+    return class_in_image;
+  }
+  std::string temp;
+  const char* descriptor = klass->GetDescriptor(&temp);
+  return IsImageClass(descriptor);
+}
+
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
   if (IsImage() &&
       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 68c905e..f737007 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -501,6 +501,16 @@
                                       uint32_t field_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Can we assume that the klass is initialized?
+  bool CanAssumeClassIsInitialized(mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Can we assume that the klass is loaded?
+  bool CanAssumeClassIsLoaded(mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index cdd7636..2a555e4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -483,10 +483,11 @@
 
 void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
                                   Primitive::Type type,
-                                  HCompare::Bias bias) {
+                                  HCompare::Bias bias,
+                                  uint32_t dex_pc) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
-  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias));
+  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc));
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
@@ -603,7 +604,12 @@
   const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
   bool is_instance_call = invoke_type != kStatic;
-  size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
+  // Remove the return type from the 'proto'.
+  size_t number_of_arguments = strlen(descriptor) - 1;
+  if (is_instance_call) {
+    // One extra argument for 'this'.
+    ++number_of_arguments;
+  }
 
   MethodReference target_method(dex_file_, method_idx);
   uintptr_t direct_code;
@@ -614,7 +620,8 @@
   if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
                                            &optimized_invoke_type, &target_method, &table_index,
                                            &direct_code, &direct_method)) {
-    VLOG(compiler) << "Did not compile " << PrettyMethod(method_idx, *dex_file_)
+    VLOG(compiler) << "Did not compile "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
                    << " because a method call could not be resolved";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod);
     return false;
@@ -655,7 +662,7 @@
         (target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex())
         && (target_method.dex_file == outer_compilation_unit_->GetDexFile());
 
-    if (optimized_invoke_type == kStatic) {
+    if (optimized_invoke_type == kStatic && !is_string_init) {
       ScopedObjectAccess soa(Thread::Current());
       StackHandleScope<4> hs(soa.Self());
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(
@@ -746,26 +753,45 @@
     start_index = 1;
   }
 
-  uint32_t descriptor_index = 1;
+  uint32_t descriptor_index = 1;  // Skip the return type.
   uint32_t argument_index = start_index;
   if (is_string_init) {
     start_index = 1;
   }
-  for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
+  for (size_t i = start_index;
+       // Make sure we don't go over the expected arguments or over the number of
+       // dex registers given. If the instruction was seen as dead by the verifier,
+       // it hasn't been properly checked.
+       (i < number_of_vreg_arguments) && (argument_index < number_of_arguments);
+       i++, argument_index++) {
     Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
     bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
-    // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
-    // reject any class where this is violated.
-    DCHECK(is_range || !is_wide || (args[i] + 1 == args[i + 1]))
-        << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
-        << " at " << dex_pc;
+    if (!is_range
+        && is_wide
+        && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
+      // Longs and doubles should be in pairs, that is, sequential registers. The verifier should
+      // reject any class where this is violated. However, the verifier only does these checks
+      // on non trivially dead instructions, so we just bailout the compilation.
+      VLOG(compiler) << "Did not compile "
+                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << " because of non-sequential dex register pair in wide argument";
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+      return false;
+    }
     HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
     invoke->SetArgumentAt(argument_index, arg);
     if (is_wide) {
       i++;
     }
   }
-  DCHECK_EQ(argument_index, number_of_arguments);
+
+  if (argument_index != number_of_arguments) {
+    VLOG(compiler) << "Did not compile "
+                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << " because of wrong number of arguments in invoke instruction";
+    MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
+    return false;
+  }
 
   if (invoke->IsInvokeStaticOrDirect()) {
     invoke->SetArgumentAt(argument_index, graph_->GetCurrentMethod());
@@ -853,17 +879,25 @@
   return true;
 }
 
-mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
+static mirror::Class* GetClassFrom(CompilerDriver* driver,
+                                   const DexCompilationUnit& compilation_unit) {
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  const DexFile& dex_file = *compilation_unit.GetDexFile();
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
+      soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      compilation_unit.GetClassLinker()->FindDexCache(dex_file)));
 
-  return compiler_driver_->ResolveCompilingMethodsClass(
-      soa, outer_dex_cache, class_loader, outer_compilation_unit_);
+  return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
+}
+
+mirror::Class* HGraphBuilder::GetOutermostCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
+}
+
+mirror::Class* HGraphBuilder::GetCompilingClass() const {
+  return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
 }
 
 bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
@@ -903,20 +937,20 @@
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
       outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file)));
-  Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the field's class is stored in the DexCache's type array.
   uint32_t storage_index;
-  bool is_referrer_class = (referrer_class.Get() == resolved_field->GetDeclaringClass());
-  if (is_referrer_class) {
-    storage_index = referrer_class->GetDexTypeIndex();
+  bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
+  if (is_outer_class) {
+    storage_index = outer_class->GetDexTypeIndex();
   } else if (outer_dex_cache.Get() != dex_cache.Get()) {
     // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
     return false;
   } else {
     std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
         outer_dex_cache.Get(),
-        referrer_class.Get(),
+        GetCompilingClass(),
         resolved_field,
         field_index,
         &storage_index);
@@ -934,12 +968,12 @@
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
                                                  *dex_compilation_unit_->GetDexFile(),
-                                                 is_referrer_class,
+                                                 is_outer_class,
                                                  dex_pc);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
-  if (!is_initialized && !is_referrer_class) {
+  if (!is_initialized && !is_outer_class) {
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
@@ -1438,21 +1472,16 @@
     }
 
     case Instruction::RETURN: {
-      DCHECK_NE(return_type_, Primitive::kPrimNot);
-      DCHECK_NE(return_type_, Primitive::kPrimLong);
-      DCHECK_NE(return_type_, Primitive::kPrimDouble);
       BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_OBJECT: {
-      DCHECK(return_type_ == Primitive::kPrimNot);
       BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_WIDE: {
-      DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong);
       BuildReturn(instruction, return_type_);
       break;
     }
@@ -2080,27 +2109,27 @@
       break;
 
     case Instruction::CMP_LONG: {
-      Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias, dex_pc);
       break;
     }
 
     case Instruction::CMPG_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPG_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPL_FLOAT: {
-      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias, dex_pc);
       break;
     }
 
     case Instruction::CMPL_DOUBLE: {
-      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias);
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias, dex_pc);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 36503ce..052aaf8 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -117,7 +117,10 @@
   template<typename T>
   void Binop_23x_shift(const Instruction& instruction, Primitive::Type type);
 
-  void Binop_23x_cmp(const Instruction& instruction, Primitive::Type type, HCompare::Bias bias);
+  void Binop_23x_cmp(const Instruction& instruction,
+                     Primitive::Type type,
+                     HCompare::Bias bias,
+                     uint32_t dex_pc);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
@@ -222,8 +225,12 @@
 
   void MaybeRecordStat(MethodCompilationStat compilation_stat);
 
+  // Returns the outer-most compiling method's class.
   mirror::Class* GetOutermostCompilingClass() const;
 
+  // Returns the class whose method is being compiled.
+  mirror::Class* GetCompilingClass() const;
+
   // Returns whether `type_index` points to the outer-most compiling method's class.
   bool IsOutermostCompilingClass(uint16_t type_index) const;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 130f0e9..64f2c9a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -20,6 +20,7 @@
 #include "code_generator_arm64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
+#include "code_generator_mips64.h"
 #include "compiled_method.h"
 #include "dex/verified_method.h"
 #include "driver/dex_compilation_unit.h"
@@ -236,7 +237,6 @@
                                              const GrowableArray<HBasicBlock*>& block_order) {
   block_order_ = &block_order;
   DCHECK(block_order_->Get(0) == GetGraph()->GetEntryBlock());
-  DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), block_order_->Get(1)));
   ComputeSpillMask();
   first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
 
@@ -487,6 +487,11 @@
     }
     case kMips:
       return nullptr;
+    case kMips64: {
+      return new mips64::CodeGeneratorMIPS64(graph,
+          *isa_features.AsMips64InstructionSetFeatures(),
+          compiler_options);
+    }
     case kX86: {
       return new x86::CodeGeneratorX86(graph,
            *isa_features.AsX86InstructionSetFeatures(),
@@ -652,18 +657,18 @@
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path) {
   if (instruction != nullptr) {
-    // The code generated for some type conversions may call the
-    // runtime, thus normally requiring a subsequent call to this
-    // method.  However, the method verifier does not produce PC
-    // information for certain instructions, which are considered "atomic"
-    // (they cannot join a GC).
+    // The code generated for some type conversions and comparisons
+    // may call the runtime, thus normally requiring a subsequent
+    // call to this method. However, the method verifier does not
+    // produce PC information for certain instructions, which are
+    // considered "atomic" (they cannot join a GC).
     // Therefore we do not currently record PC information for such
     // instructions.  As this may change later, we added this special
     // case so that code generators may nevertheless call
     // CodeGenerator::RecordPcInfo without triggering an error in
     // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
     // thereafter.
-    if (instruction->IsTypeConversion()) {
+    if (instruction->IsTypeConversion() || instruction->IsCompare()) {
       return;
     }
     if (instruction->IsRem()) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5b0abd7..b1f1674 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -97,6 +97,8 @@
     return saved_fpu_stack_offsets_[reg];
   }
 
+  virtual const char* GetDescription() const = 0;
+
  protected:
   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
   static constexpr uint32_t kRegisterNotSaved = -1;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f6ae452..7169679 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -69,6 +69,8 @@
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM"; }
+
  private:
   HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM);
@@ -85,6 +87,8 @@
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM);
@@ -118,6 +122,8 @@
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM"; }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -155,6 +161,8 @@
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -197,6 +205,8 @@
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -236,6 +246,8 @@
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -286,6 +298,8 @@
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -310,6 +324,8 @@
     arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3c8f117..7ec6b54 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -213,6 +213,8 @@
     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -233,6 +235,8 @@
     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
@@ -278,6 +282,8 @@
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -319,6 +325,8 @@
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -337,6 +345,8 @@
     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
+
  private:
   HNullCheck* const instruction_;
 
@@ -373,6 +383,8 @@
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -429,6 +441,8 @@
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -453,6 +467,8 @@
     arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
new file mode 100644
index 0000000..ab684d4
--- /dev/null
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -0,0 +1,3282 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips64.h"
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "art_method.h"
+#include "mirror/array-inl.h"
+#include "mirror/class-inl.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils/mips64/assembler_mips64.h"
+#include "utils/assembler.h"
+#include "utils/stack_checks.h"
+
+namespace art {
+namespace mips64 {
+
+static constexpr int kCurrentMethodStackOffset = 0;
+static constexpr GpuRegister kMethodRegisterArgument = A0;
+
+// We need extra temporary/scratch registers (in addition to AT) in some cases.
+static constexpr GpuRegister TMP = T8;
+static constexpr FpuRegister FTMP = F8;
+
+// ART Thread Register.
+static constexpr GpuRegister TR = S1;
+
+Location Mips64ReturnLocation(Primitive::Type return_type) {
+  switch (return_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong:
+      return Location::RegisterLocation(V0);
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      return Location::FpuRegisterLocation(F0);
+
+    case Primitive::kPrimVoid:
+      return Location();
+  }
+  UNREACHABLE();
+}
+
+Location InvokeDexCallingConventionVisitorMIPS64::GetReturnLocation(Primitive::Type type) const {
+  return Mips64ReturnLocation(type);
+}
+
+Location InvokeDexCallingConventionVisitorMIPS64::GetMethodLocation() const {
+  return Location::RegisterLocation(kMethodRegisterArgument);
+}
+
+Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(Primitive::Type type) {
+  Location next_location;
+  if (type == Primitive::kPrimVoid) {
+    LOG(FATAL) << "Unexpected parameter type " << type;
+  }
+
+  if (Primitive::IsFloatingPointType(type) &&
+      (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
+    next_location = Location::FpuRegisterLocation(
+        calling_convention.GetFpuRegisterAt(float_index_++));
+    gp_index_++;
+  } else if (!Primitive::IsFloatingPointType(type) &&
+             (gp_index_ < calling_convention.GetNumberOfRegisters())) {
+    next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++));
+    float_index_++;
+  } else {
+    size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+    next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
+                                                 : Location::StackSlot(stack_offset);
+  }
+
+  // Space on the stack is reserved for all arguments.
+  stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
+
+  // TODO: review
+
+  // TODO: shouldn't we use a whole machine word per argument on the stack?
+  // Implicit 4-byte method pointer (and such) will cause misalignment.
+
+  return next_location;
+}
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) {
+  return Mips64ReturnLocation(type);
+}
+
+#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+
+class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction,
+                            Location index_location,
+                            Location length_location)
+      : instruction_(instruction),
+        index_location_(index_location),
+        length_location_(length_location) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(index_location_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimInt,
+                               length_location_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimInt);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; }
+
+ private:
+  HBoundsCheck* const instruction_;
+  const Location index_location_;
+  const Location length_location_;
+
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
+};
+
+class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; }
+
+ private:
+  HDivZeroCheck* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64);
+};
+
+class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  LoadClassSlowPathMIPS64(HLoadClass* cls,
+                          HInstruction* at,
+                          uint32_t dex_pc,
+                          bool do_clinit)
+      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = at_->GetLocations();
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
+                                            : QUICK_ENTRY_POINT(pInitializeType);
+    mips64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
+
+    // Move the class to the desired location.
+    Location out = locations->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      Primitive::Type type = at_->GetType();
+      mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+    }
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
+
+ private:
+  // The class this slow path will load.
+  HLoadClass* const cls_;
+
+  // The instruction where this slow path is happening.
+  // (Might be the load class or an initialization check).
+  HInstruction* const at_;
+
+  // The dex PC of `at_`.
+  const uint32_t dex_pc_;
+
+  // Whether to initialize the class.
+  const bool do_clinit_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64);
+};
+
+class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+    Primitive::Type type = instruction_->GetType();
+    mips64_codegen->MoveLocation(locations->Out(),
+                                 calling_convention.GetReturnLocation(type),
+                                 type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
+
+ private:
+  HLoadString* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64);
+};
+
+class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : instruction_(instr) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; }
+
+ private:
+  HNullCheck* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64);
+};
+
+class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction,
+                                      HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, instruction_->GetLocations());
+    if (successor_ == nullptr) {
+      __ B(GetReturnLabel());
+    } else {
+      __ B(mips64_codegen->GetLabelOf(successor_));
+    }
+  }
+
+  Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
+
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; }
+
+ private:
+  HSuspendCheck* const instruction_;
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
+  Label return_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64);
+};
+
+class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  TypeCheckSlowPathMIPS64(HInstruction* instruction,
+                          Location class_to_check,
+                          Location object_class,
+                          uint32_t dex_pc)
+      : instruction_(instruction),
+        class_to_check_(class_to_check),
+        object_class_(object_class),
+        dex_pc_(dex_pc) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(class_to_check_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               object_class_,
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
+
+    if (instruction_->IsInstanceOf()) {
+      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                    instruction_,
+                                    dex_pc_,
+                                    this);
+      Primitive::Type ret_type = instruction_->GetType();
+      Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+      mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
+                           uint32_t,
+                           const mirror::Class*,
+                           const mirror::Class*>();
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_, this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+    }
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location class_to_check_;
+  const Location object_class_;
+  uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
+};
+
+class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+    : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, instruction_->GetLocations());
+    DCHECK(instruction_->IsDeoptimize());
+    HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+    uint32_t dex_pc = deoptimize->GetDexPc();
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
+
+ private:
+  HInstruction* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
+};
+
+CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
+                                         const Mips64InstructionSetFeatures& isa_features,
+                                         const CompilerOptions& compiler_options)
+    : CodeGenerator(graph,
+                    kNumberOfGpuRegisters,
+                    kNumberOfFpuRegisters,
+                    0,  // kNumberOfRegisterPairs
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+                                        arraysize(kCoreCalleeSaves)),
+                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
+                                        arraysize(kFpuCalleeSaves)),
+                    compiler_options),
+      block_labels_(graph->GetArena(), 0),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this),
+      isa_features_(isa_features) {
+  // Save RA (containing the return address) to mimic Quick.
+  AddAllocatedRegister(Location::RegisterLocation(RA));
+}
+
+#undef __
+#define __ down_cast<Mips64Assembler*>(GetAssembler())->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
+
+void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
+  CodeGenerator::Finalize(allocator);
+}
+
+Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverMIPS64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverMIPS64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverMIPS64::RestoreScratch(int reg) {
+  // Pop reg
+  __ Ld(GpuRegister(reg), SP, 0);
+  __ DecreaseFrameSize(kMips64WordSize);
+}
+
+void ParallelMoveResolverMIPS64::SpillScratch(int reg) {
+  // Push reg
+  __ IncreaseFrameSize(kMips64WordSize);
+  __ Sd(GpuRegister(reg), SP, 0);
+}
+
+void ParallelMoveResolverMIPS64::Exchange(int index1, int index2, bool double_slot) {
+  LoadOperandType load_type = double_slot ? kLoadDoubleword : kLoadWord;
+  StoreOperandType store_type = double_slot ? kStoreDoubleword : kStoreWord;
+  // Allocate a scratch register other than TMP, if available.
+  // Else, spill V0 (arbitrary choice) and use it as a scratch register (it will be
+  // automatically unspilled when the scratch scope object is destroyed).
+  ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters());
+  // If V0 spills onto the stack, SP-relative offsets need to be adjusted.
+  int stack_offset = ensure_scratch.IsSpilled() ? kMips64WordSize : 0;
+  __ LoadFromOffset(load_type,
+                    GpuRegister(ensure_scratch.GetRegister()),
+                    SP,
+                    index1 + stack_offset);
+  __ LoadFromOffset(load_type,
+                    TMP,
+                    SP,
+                    index2 + stack_offset);
+  __ StoreToOffset(store_type,
+                   GpuRegister(ensure_scratch.GetRegister()),
+                   SP,
+                   index2 + stack_offset);
+  __ StoreToOffset(store_type, TMP, SP, index1 + stack_offset);
+}
+
+static dwarf::Reg DWARFReg(GpuRegister reg) {
+  return dwarf::Reg::Mips64Core(static_cast<int>(reg));
+}
+
+// TODO: mapping of floating-point registers to DWARF
+
+void CodeGeneratorMIPS64::GenerateFrameEntry() {
+  __ Bind(&frame_entry_label_);
+
+  bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kMips64) || !IsLeafMethod();
+
+  if (do_overflow_check) {
+    __ LoadFromOffset(kLoadWord,
+                      ZERO,
+                      SP,
+                      -static_cast<int32_t>(GetStackOverflowReservedBytes(kMips64)));
+    RecordPcInfo(nullptr, 0);
+  }
+
+  // TODO: anything related to T9/GP/GOT/PIC/.so's?
+
+  if (HasEmptyFrame()) {
+    return;
+  }
+
+  // Make sure the frame size isn't unreasonably large. Per the various APIs
+  // it looks like it should always be less than 2GB in size, which allows
+  // us using 32-bit signed offsets from the stack pointer.
+  if (GetFrameSize() > 0x7FFFFFFF)
+    LOG(FATAL) << "Stack frame larger than 2GB";
+
+  // Spill callee-saved registers.
+  // Note that their cumulative size is small and they can be indexed using
+  // 16-bit offsets.
+
+  // TODO: increment/decrement SP in one step instead of two or remove this comment.
+
+  uint32_t ofs = FrameEntrySpillSize();
+  __ IncreaseFrameSize(ofs);
+
+  for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+    GpuRegister reg = kCoreCalleeSaves[i];
+    if (allocated_registers_.ContainsCoreRegister(reg)) {
+      ofs -= kMips64WordSize;
+      __ Sd(reg, SP, ofs);
+      __ cfi().RelOffset(DWARFReg(reg), ofs);
+    }
+  }
+
+  for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+    FpuRegister reg = kFpuCalleeSaves[i];
+    if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+      ofs -= kMips64WordSize;
+      __ Sdc1(reg, SP, ofs);
+      // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
+    }
+  }
+
+  // Allocate the rest of the frame and store the current method pointer
+  // at its end.
+
+  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
+
+  static_assert(IsInt<16>(kCurrentMethodStackOffset),
+                "kCurrentMethodStackOffset must fit into int16_t");
+  __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+}
+
+void CodeGeneratorMIPS64::GenerateFrameExit() {
+  __ cfi().RememberState();
+
+  // TODO: anything related to T9/GP/GOT/PIC/.so's?
+
+  if (!HasEmptyFrame()) {
+    // Deallocate the rest of the frame.
+
+    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
+
+    // Restore callee-saved registers.
+    // Note that their cumulative size is small and they can be indexed using
+    // 16-bit offsets.
+
+    // TODO: increment/decrement SP in one step instead of two or remove this comment.
+
+    uint32_t ofs = 0;
+
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      FpuRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        __ Ldc1(reg, SP, ofs);
+        ofs += kMips64WordSize;
+        // TODO: __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+      GpuRegister reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        __ Ld(reg, SP, ofs);
+        ofs += kMips64WordSize;
+        __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    DCHECK_EQ(ofs, FrameEntrySpillSize());
+    __ DecreaseFrameSize(ofs);
+  }
+
+  __ Jr(RA);
+
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorMIPS64::Bind(HBasicBlock* block) {
+  __ Bind(GetLabelOf(block));
+}
+
+void CodeGeneratorMIPS64::MoveLocation(Location destination,
+                                       Location source,
+                                       Primitive::Type type) {
+  if (source.Equals(destination)) {
+    return;
+  }
+
+  // A valid move can always be inferred from the destination and source
+  // locations. When moving from and to a register, the argument type can be
+  // used to generate 32bit instead of 64bit moves.
+  bool unspecified_type = (type == Primitive::kPrimVoid);
+  DCHECK_EQ(unspecified_type, false);
+
+  if (destination.IsRegister() || destination.IsFpuRegister()) {
+    if (unspecified_type) {
+      HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+      if (source.IsStackSlot() ||
+          (src_cst != nullptr && (src_cst->IsIntConstant()
+                                  || src_cst->IsFloatConstant()
+                                  || src_cst->IsNullConstant()))) {
+        // For stack slots and 32bit constants, a 64bit type is appropriate.
+        type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
+      } else {
+        // If the source is a double stack slot or a 64bit constant, a 64bit
+        // type is appropriate. Else the source is a register, and since the
+        // type has not been specified, we chose a 64bit type to force a 64bit
+        // move.
+        type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
+      }
+    }
+    DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(type)) ||
+           (destination.IsRegister() && !Primitive::IsFloatingPointType(type)));
+    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+      // Move to GPR/FPR from stack
+      LoadOperandType load_type = source.IsStackSlot() ? kLoadWord : kLoadDoubleword;
+      if (Primitive::IsFloatingPointType(type)) {
+        __ LoadFpuFromOffset(load_type,
+                             destination.AsFpuRegister<FpuRegister>(),
+                             SP,
+                             source.GetStackIndex());
+      } else {
+        // TODO: use load_type = kLoadUnsignedWord when type == Primitive::kPrimNot.
+        __ LoadFromOffset(load_type,
+                          destination.AsRegister<GpuRegister>(),
+                          SP,
+                          source.GetStackIndex());
+      }
+    } else if (source.IsConstant()) {
+      // Move to GPR/FPR from constant
+      GpuRegister gpr = AT;
+      if (!Primitive::IsFloatingPointType(type)) {
+        gpr = destination.AsRegister<GpuRegister>();
+      }
+      if (type == Primitive::kPrimInt || type == Primitive::kPrimFloat) {
+        __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+      } else {
+        __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+      }
+      if (type == Primitive::kPrimFloat) {
+        __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
+      } else if (type == Primitive::kPrimDouble) {
+        __ Dmtc1(gpr, destination.AsFpuRegister<FpuRegister>());
+      }
+    } else {
+      if (destination.IsRegister()) {
+        // Move to GPR from GPR
+        __ Move(destination.AsRegister<GpuRegister>(), source.AsRegister<GpuRegister>());
+      } else {
+        // Move to FPR from FPR
+        if (type == Primitive::kPrimFloat) {
+          __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        } else {
+          DCHECK_EQ(type, Primitive::kPrimDouble);
+          __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>());
+        }
+      }
+    }
+  } else {  // The destination is not a register. It must be a stack slot.
+    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+    if (source.IsRegister() || source.IsFpuRegister()) {
+      if (unspecified_type) {
+        if (source.IsRegister()) {
+          type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
+        } else {
+          type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
+        }
+      }
+      DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(type)) &&
+             (source.IsFpuRegister() == Primitive::IsFloatingPointType(type)));
+      // Move to stack from GPR/FPR
+      StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      if (source.IsRegister()) {
+        __ StoreToOffset(store_type,
+                         source.AsRegister<GpuRegister>(),
+                         SP,
+                         destination.GetStackIndex());
+      } else {
+        __ StoreFpuToOffset(store_type,
+                            source.AsFpuRegister<FpuRegister>(),
+                            SP,
+                            destination.GetStackIndex());
+      }
+    } else if (source.IsConstant()) {
+      // Move to stack from constant
+      HConstant* src_cst = source.GetConstant();
+      StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      if (destination.IsStackSlot()) {
+        __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+      } else {
+        __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+      }
+      __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+    } else {
+      DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+      DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
+      // Move to stack from stack
+      if (destination.IsStackSlot()) {
+        __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex());
+        __ StoreToOffset(kStoreWord, TMP, SP, destination.GetStackIndex());
+      } else {
+        __ LoadFromOffset(kLoadDoubleword, TMP, SP, source.GetStackIndex());
+        __ StoreToOffset(kStoreDoubleword, TMP, SP, destination.GetStackIndex());
+      }
+    }
+  }
+}
+
+void CodeGeneratorMIPS64::SwapLocations(Location loc1,
+                                        Location loc2,
+                                        Primitive::Type type ATTRIBUTE_UNUSED) {
+  DCHECK(!loc1.IsConstant());
+  DCHECK(!loc2.IsConstant());
+
+  if (loc1.Equals(loc2)) {
+    return;
+  }
+
+  bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+  bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+  bool is_fp_reg1 = loc1.IsFpuRegister();
+  bool is_fp_reg2 = loc2.IsFpuRegister();
+
+  if (loc2.IsRegister() && loc1.IsRegister()) {
+    // Swap 2 GPRs
+    GpuRegister r1 = loc1.AsRegister<GpuRegister>();
+    GpuRegister r2 = loc2.AsRegister<GpuRegister>();
+    __ Move(TMP, r2);
+    __ Move(r2, r1);
+    __ Move(r1, TMP);
+  } else if (is_fp_reg2 && is_fp_reg1) {
+    // Swap 2 FPRs
+    FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
+    FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
+    // TODO: Can MOV.S/MOV.D be used here to save one instruction?
+    // Need to distinguish float from double, right?
+    __ Dmfc1(TMP, r2);
+    __ Dmfc1(AT, r1);
+    __ Dmtc1(TMP, r1);
+    __ Dmtc1(AT, r2);
+  } else if (is_slot1 != is_slot2) {
+    // Swap GPR/FPR and stack slot
+    Location reg_loc = is_slot1 ? loc2 : loc1;
+    Location mem_loc = is_slot1 ? loc1 : loc2;
+    LoadOperandType load_type = mem_loc.IsStackSlot() ? kLoadWord : kLoadDoubleword;
+    StoreOperandType store_type = mem_loc.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+    // TODO: use load_type = kLoadUnsignedWord when type == Primitive::kPrimNot.
+    __ LoadFromOffset(load_type, TMP, SP, mem_loc.GetStackIndex());
+    if (reg_loc.IsFpuRegister()) {
+      __ StoreFpuToOffset(store_type,
+                          reg_loc.AsFpuRegister<FpuRegister>(),
+                          SP,
+                          mem_loc.GetStackIndex());
+      // TODO: review this MTC1/DMTC1 move
+      if (mem_loc.IsStackSlot()) {
+        __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
+      } else {
+        DCHECK(mem_loc.IsDoubleStackSlot());
+        __ Dmtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
+      }
+    } else {
+      __ StoreToOffset(store_type, reg_loc.AsRegister<GpuRegister>(), SP, mem_loc.GetStackIndex());
+      __ Move(reg_loc.AsRegister<GpuRegister>(), TMP);
+    }
+  } else if (is_slot1 && is_slot2) {
+    move_resolver_.Exchange(loc1.GetStackIndex(),
+                            loc2.GetStackIndex(),
+                            loc1.IsDoubleStackSlot());
+  } else {
+    LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2;
+  }
+}
+
+void CodeGeneratorMIPS64::Move(HInstruction* instruction,
+                               Location location,
+                               HInstruction* move_for) {
+  LocationSummary* locations = instruction->GetLocations();
+  Primitive::Type type = instruction->GetType();
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  if (instruction->IsCurrentMethod()) {
+    MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset), type);
+  } else if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  } else if (instruction->IsIntConstant()
+             || instruction->IsLongConstant()
+             || instruction->IsNullConstant()) {
+    if (location.IsRegister()) {
+      // Move to GPR from constant
+      GpuRegister dst = location.AsRegister<GpuRegister>();
+      if (instruction->IsNullConstant() || instruction->IsIntConstant()) {
+        __ LoadConst32(dst, GetInt32ValueOf(instruction->AsConstant()));
+      } else {
+        __ LoadConst64(dst, instruction->AsLongConstant()->GetValue());
+      }
+    } else {
+      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
+      // Move to stack from constant
+      if (location.IsStackSlot()) {
+        __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
+        __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+      } else {
+        __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
+        __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+      }
+    }
+  } else if (instruction->IsTemporary()) {
+    Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
+    MoveLocation(location, temp_location, type);
+  } else if (instruction->IsLoadLocal()) {
+    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
+    if (Primitive::Is64BitType(type)) {
+      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
+    } else {
+      MoveLocation(location, Location::StackSlot(stack_slot), type);
+    }
+  } else {
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
+    MoveLocation(location, locations->Out(), type);
+  }
+}
+
+Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
+  Primitive::Type type = load->GetType();
+
+  switch (type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type " << type;
+  }
+
+  LOG(FATAL) << "Unreachable";
+  return Location::NoLocation();
+}
+
+void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
+  Label done;
+  GpuRegister card = AT;
+  GpuRegister temp = TMP;
+  __ Beqzc(value, &done);
+  __ LoadFromOffset(kLoadDoubleword,
+                    card,
+                    TR,
+                    Thread::CardTableOffset<kMips64WordSize>().Int32Value());
+  __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Daddu(temp, card, temp);
+  __ Sb(card, temp, 0);
+  __ Bind(&done);
+}
+
+void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+  // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
+  blocked_core_registers_[ZERO] = true;
+  blocked_core_registers_[K0] = true;
+  blocked_core_registers_[K1] = true;
+  blocked_core_registers_[GP] = true;
+  blocked_core_registers_[SP] = true;
+  blocked_core_registers_[RA] = true;
+
+  // AT and TMP(T8) are used as temporary/scratch registers
+  // (similar to how AT is used by MIPS assemblers).
+  blocked_core_registers_[AT] = true;
+  blocked_core_registers_[TMP] = true;
+  blocked_fpu_registers_[FTMP] = true;
+
+  // Reserve suspend and thread registers.
+  blocked_core_registers_[S0] = true;
+  blocked_core_registers_[TR] = true;
+
+  // Reserve T9 for function calls
+  blocked_core_registers_[T9] = true;
+
+  // TODO: review; anything else?
+
+  // TODO: make these two for's conditional on is_baseline once
+  // all the issues with register saving/restoring are sorted out.
+  for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+    blocked_core_registers_[kCoreCalleeSaves[i]] = true;
+  }
+
+  for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+    blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+  }
+}
+
+Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
+  if (type == Primitive::kPrimVoid) {
+    LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  if (Primitive::IsFloatingPointType(type)) {
+    size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
+    return Location::FpuRegisterLocation(reg);
+  } else {
+    size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
+    return Location::RegisterLocation(reg);
+  }
+}
+
+size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadFromOffset(kLoadDoubleword, GpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
+  return kMips64WordSize;
+}
+
+void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << Mips64ManagedRegister::FromGpuRegister(GpuRegister(reg));
+}
+
+void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << Mips64ManagedRegister::FromFpuRegister(FpuRegister(reg));
+}
+
+void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset,
+                                        HInstruction* instruction,
+                                        uint32_t dex_pc,
+                                        SlowPathCode* slow_path) {
+  // TODO: anything related to T9/GP/GOT/PIC/.so's?
+  __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+  __ Jalr(T9);
+  RecordPcInfo(instruction, dex_pc, slow_path);
+  DCHECK(instruction->IsSuspendCheck()
+      || instruction->IsBoundsCheck()
+      || instruction->IsNullCheck()
+      || instruction->IsDivZeroCheck()
+      || !IsLeafMethod());
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
+                                                                      GpuRegister class_reg) {
+  __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
+  __ LoadConst32(AT, mirror::Class::kStatusInitialized);
+  __ Bltc(TMP, AT, slow_path->GetEntryLabel());
+  // TODO: barrier needed?
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
+  __ Sync(0);  // only stype 0 is supported
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                          HBasicBlock* successor) {
+  SuspendCheckSlowPathMIPS64* slow_path =
+    new (GetGraph()->GetArena()) SuspendCheckSlowPathMIPS64(instruction, successor);
+  codegen_->AddSlowPath(slow_path);
+
+  __ LoadFromOffset(kLoadUnsignedHalfword,
+                    TMP,
+                    TR,
+                    Thread::ThreadFlagsOffset<kMips64WordSize>().Int32Value());
+  if (successor == nullptr) {
+    __ Bnezc(TMP, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Beqzc(TMP, codegen_->GetLabelOf(successor));
+    __ B(slow_path->GetEntryLabel());
+    // slow_path will return to GetLabelOf(successor).
+  }
+}
+
+InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
+                                                               CodeGeneratorMIPS64* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
+void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) {
+  DCHECK_EQ(instruction->InputCount(), 2U);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Primitive::Type type = instruction->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      HInstruction* right = instruction->InputAt(1);
+      bool can_use_imm = false;
+      if (right->IsConstant()) {
+        int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
+        if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
+          can_use_imm = IsUint<16>(imm);
+        } else if (instruction->IsAdd()) {
+          can_use_imm = IsInt<16>(imm);
+        } else {
+          DCHECK(instruction->IsSub());
+          can_use_imm = IsInt<16>(-imm);
+        }
+      }
+      if (can_use_imm)
+        locations->SetInAt(1, Location::ConstantLocation(right->AsConstant()));
+      else
+        locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      Location rhs_location = locations->InAt(1);
+
+      GpuRegister rhs_reg = ZERO;
+      int64_t rhs_imm = 0;
+      bool use_imm = rhs_location.IsConstant();
+      if (use_imm) {
+        rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+      } else {
+        rhs_reg = rhs_location.AsRegister<GpuRegister>();
+      }
+
+      if (instruction->IsAnd()) {
+        if (use_imm)
+          __ Andi(dst, lhs, rhs_imm);
+        else
+          __ And(dst, lhs, rhs_reg);
+      } else if (instruction->IsOr()) {
+        if (use_imm)
+          __ Ori(dst, lhs, rhs_imm);
+        else
+          __ Or(dst, lhs, rhs_reg);
+      } else if (instruction->IsXor()) {
+        if (use_imm)
+          __ Xori(dst, lhs, rhs_imm);
+        else
+          __ Xor(dst, lhs, rhs_reg);
+      } else if (instruction->IsAdd()) {
+        if (type == Primitive::kPrimInt) {
+          if (use_imm)
+            __ Addiu(dst, lhs, rhs_imm);
+          else
+            __ Addu(dst, lhs, rhs_reg);
+        } else {
+          if (use_imm)
+            __ Daddiu(dst, lhs, rhs_imm);
+          else
+            __ Daddu(dst, lhs, rhs_reg);
+        }
+      } else {
+        DCHECK(instruction->IsSub());
+        if (type == Primitive::kPrimInt) {
+          if (use_imm)
+            __ Addiu(dst, lhs, -rhs_imm);
+          else
+            __ Subu(dst, lhs, rhs_reg);
+        } else {
+          if (use_imm)
+            __ Daddiu(dst, lhs, -rhs_imm);
+          else
+            __ Dsubu(dst, lhs, rhs_reg);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      if (instruction->IsAdd()) {
+        if (type == Primitive::kPrimFloat)
+          __ AddS(dst, lhs, rhs);
+        else
+          __ AddD(dst, lhs, rhs);
+      } else if (instruction->IsSub()) {
+        if (type == Primitive::kPrimFloat)
+          __ SubS(dst, lhs, rhs);
+        else
+          __ SubD(dst, lhs, rhs);
+      } else {
+        LOG(FATAL) << "Unexpected floating-point binary operation";
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected binary operation type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+  LocationSummary* locations = instr->GetLocations();
+  Primitive::Type type = instr->GetType();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      Location rhs_location = locations->InAt(1);
+
+      GpuRegister rhs_reg = ZERO;
+      int64_t rhs_imm = 0;
+      bool use_imm = rhs_location.IsConstant();
+      if (use_imm) {
+        rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+      } else {
+        rhs_reg = rhs_location.AsRegister<GpuRegister>();
+      }
+
+      if (use_imm) {
+        uint32_t shift_value = (type == Primitive::kPrimInt)
+          ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
+          : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
+
+        if (type == Primitive::kPrimInt) {
+          if (instr->IsShl()) {
+            __ Sll(dst, lhs, shift_value);
+          } else if (instr->IsShr()) {
+            __ Sra(dst, lhs, shift_value);
+          } else {
+            __ Srl(dst, lhs, shift_value);
+          }
+        } else {
+          if (shift_value < 32) {
+            if (instr->IsShl()) {
+              __ Dsll(dst, lhs, shift_value);
+            } else if (instr->IsShr()) {
+              __ Dsra(dst, lhs, shift_value);
+            } else {
+              __ Dsrl(dst, lhs, shift_value);
+            }
+          } else {
+            shift_value -= 32;
+            if (instr->IsShl()) {
+              __ Dsll32(dst, lhs, shift_value);
+            } else if (instr->IsShr()) {
+              __ Dsra32(dst, lhs, shift_value);
+            } else {
+              __ Dsrl32(dst, lhs, shift_value);
+            }
+          }
+        }
+      } else {
+        if (type == Primitive::kPrimInt) {
+          if (instr->IsShl()) {
+            __ Sllv(dst, lhs, rhs_reg);
+          } else if (instr->IsShr()) {
+            __ Srav(dst, lhs, rhs_reg);
+          } else {
+            __ Srlv(dst, lhs, rhs_reg);
+          }
+        } else {
+          if (instr->IsShl()) {
+            __ Dsllv(dst, lhs, rhs_reg);
+          } else if (instr->IsShr()) {
+            __ Dsrav(dst, lhs, rhs_reg);
+          } else {
+            __ Dsrlv(dst, lhs, rhs_reg);
+          }
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift operation type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location index = locations->InAt(1);
+  Primitive::Type type = instruction->GetType();
+
+  switch (type) {
+    case Primitive::kPrimBoolean: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+      } else {
+        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
+        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimByte: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+      } else {
+        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
+        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimShort: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimChar: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadFromOffset(load_type, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(load_type, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ LoadFpuFromOffset(kLoadWord, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      UNREACHABLE();
+  }
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  __ LoadFromOffset(kLoadWord, out, obj, offset);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool is_object = value_type == Primitive::kPrimNot;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (is_object) {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+      locations->SetInAt(2, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(2, Location::RequiresRegister());
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location index = locations->InAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_runtime_call = locations->WillCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
+      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        __ StoreToOffset(kStoreByte, value, obj, offset);
+      } else {
+        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
+        __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
+      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      if (!needs_runtime_call) {
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ StoreToOffset(kStoreWord, value, obj, offset);
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+          __ Daddu(TMP, obj, TMP);
+          __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+        }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (needs_write_barrier) {
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          codegen_->MarkGCCard(obj, value);
+        }
+      } else {
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                instruction,
+                                instruction->GetDexPc(),
+                                nullptr);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
+      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
+      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
+      DCHECK(locations->InAt(2).IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        __ StoreFpuToOffset(kStoreWord, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
+      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
+      DCHECK(locations->InAt(2).IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset);
+      } else {
+        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(TMP, obj, TMP);
+        __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      UNREACHABLE();
+  }
+
+  // Ints and objects are handled in the switch.
+  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  BoundsCheckSlowPathMIPS64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(
+      instruction,
+      locations->InAt(0),
+      locations->InAt(1));
+  codegen_->AddSlowPath(slow_path);
+
+  GpuRegister index = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister length = locations->InAt(1).AsRegister<GpuRegister>();
+
+  // length is limited by the maximum positive signed 32-bit integer.
+  // Unsigned comparison of length and index checks for index < 0
+  // and for length <= index simultaneously.
+  // Mips R6 requires lhs != rhs for compact branches.
+  if (index == length) {
+    __ B(slow_path->GetEntryLabel());
+  } else {
+    __ Bgeuc(index, length, slow_path->GetEntryLabel());
+  }
+}
+
+void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
+
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(
+      instruction,
+      locations->InAt(1),
+      Location::RegisterLocation(obj_cls),
+      instruction->GetDexPc());
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ Beqzc(obj, slow_path->GetExitLabel());
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+  __ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (check->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) {
+  // We assume the class is not null.
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+      check->GetLoadClass(),
+      check,
+      check->GetDexPc(),
+      true);
+  codegen_->AddSlowPath(slow_path);
+  GenerateClassInitializationCheck(slow_path,
+                                   check->GetLocations()->InAt(0).AsRegister<GpuRegister>());
+}
+
+void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
+
+  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+
+  switch (in_type) {
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Primitive::Type in_type = instruction->InputAt(0)->GetType();
+
+  //  0 if: left == right
+  //  1 if: left  > right
+  // -1 if: left  < right
+  switch (in_type) {
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      // TODO: more efficient (direct) comparison with a constant
+      __ Slt(TMP, lhs, rhs);
+      __ Slt(dst, rhs, lhs);
+      __ Subu(dst, dst, TMP);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int32_t entry_point_offset;
+      if (in_type == Primitive::kPrimFloat) {
+        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat)
+                                                     : QUICK_ENTRY_POINT(pCmplFloat);
+      } else {
+        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble)
+                                                     : QUICK_ENTRY_POINT(pCmplDouble);
+      }
+      codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << in_type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (instruction->NeedsMaterialization()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
+  if (!instruction->NeedsMaterialization()) {
+    return;
+  }
+
+  LocationSummary* locations = instruction->GetLocations();
+
+  GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+
+  IfCondition if_cond = instruction->GetCondition();
+
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (if_cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        if (if_cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (if_cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  switch (div->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ DivR6(dst, lhs, rhs);
+      else
+        __ Ddiv(dst, lhs, rhs);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      if (type == Primitive::kPrimFloat)
+        __ DivS(dst, lhs, rhs);
+      else
+        __ DivD(dst, lhs, rhs);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected div type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  SlowPathCodeMIPS64* slow_path =
+      new (GetGraph()->GetArena()) DivZeroCheckSlowPathMIPS64(instruction);
+  codegen_->AddSlowPath(slow_path);
+  Location value = instruction->GetLocations()->InAt(0);
+
+  Primitive::Type type = instruction->GetType();
+
+  if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) {
+      LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+  }
+
+  if (value.IsConstant()) {
+    int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
+    if (divisor == 0) {
+      __ B(slow_path->GetEntryLabel());
+    } else {
+      // A division by a non-null constant is valid. We don't need to perform
+      // any check, so simply fall through.
+    }
+  } else {
+    __ Beqzc(value.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+  }
+}
+
+void LocationsBuilderMIPS64::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDoubleConstant(HDoubleConstant* cst ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitExit(HExit* exit) {
+  exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
+}
+
+void LocationsBuilderMIPS64::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitGoto(HGoto* got) {
+  got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitGoto(HGoto* got) {
+  HBasicBlock* successor = got->GetSuccessor();
+  DCHECK(!successor->IsExitBlock());
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+  HLoopInformation* info = block->GetLoopInformation();
+
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(block, successor)) {
+    __ B(codegen_->GetLabelOf(successor));
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
+                                                           Label* true_target,
+                                                           Label* false_target,
+                                                           Label* always_true_target) {
+  HInstruction* cond = instruction->InputAt(0);
+  HCondition* condition = cond->AsCondition();
+
+  if (cond->IsIntConstant()) {
+    int32_t cond_value = cond->AsIntConstant()->GetValue();
+    if (cond_value == 1) {
+      if (always_true_target != nullptr) {
+        __ B(always_true_target);
+      }
+      return;
+    } else {
+      DCHECK_EQ(cond_value, 0);
+    }
+  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    // The condition instruction has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(0);
+    DCHECK(cond_val.IsRegister());
+    __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+  } else {
+    // The condition instruction has not been materialized, use its inputs as
+    // the comparison and its condition as the branch condition.
+    GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
+    Location rhs_location = condition->GetLocations()->InAt(1);
+    GpuRegister rhs_reg = ZERO;
+    int32_t rhs_imm = 0;
+    bool use_imm = rhs_location.IsConstant();
+    if (use_imm) {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_reg = rhs_location.AsRegister<GpuRegister>();
+    }
+
+    IfCondition if_cond = condition->GetCondition();
+    if (use_imm && rhs_imm == 0) {
+      switch (if_cond) {
+        case kCondEQ:
+          __ Beqzc(lhs, true_target);
+          break;
+        case kCondNE:
+          __ Bnezc(lhs, true_target);
+          break;
+        case kCondLT:
+          __ Bltzc(lhs, true_target);
+          break;
+        case kCondGE:
+          __ Bgezc(lhs, true_target);
+          break;
+        case kCondLE:
+          __ Blezc(lhs, true_target);
+          break;
+        case kCondGT:
+          __ Bgtzc(lhs, true_target);
+          break;
+      }
+    } else {
+      if (use_imm) {
+        rhs_reg = TMP;
+        __ LoadConst32(rhs_reg, rhs_imm);
+      }
+      // It looks like we can get here with lhs == rhs. Should that be possible at all?
+      // Mips R6 requires lhs != rhs for compact branches.
+      if (lhs == rhs_reg) {
+        DCHECK(!use_imm);
+        switch (if_cond) {
+          case kCondEQ:
+          case kCondGE:
+          case kCondLE:
+            // if lhs == rhs for a positive condition, then it is a branch
+            __ B(true_target);
+            break;
+          case kCondNE:
+          case kCondLT:
+          case kCondGT:
+            // if lhs == rhs for a negative condition, then it is a NOP
+            break;
+        }
+      } else {
+        switch (if_cond) {
+          case kCondEQ:
+            __ Beqc(lhs, rhs_reg, true_target);
+            break;
+          case kCondNE:
+            __ Bnec(lhs, rhs_reg, true_target);
+            break;
+          case kCondLT:
+            __ Bltc(lhs, rhs_reg, true_target);
+            break;
+          case kCondGE:
+            __ Bgec(lhs, rhs_reg, true_target);
+            break;
+          case kCondLE:
+            __ Bgec(rhs_reg, lhs, true_target);
+            break;
+          case kCondGT:
+            __ Bltc(rhs_reg, lhs, true_target);
+            break;
+        }
+      }
+    }
+  }
+  if (false_target != nullptr) {
+    __ B(false_target);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  HInstruction* cond = if_instr->InputAt(0);
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
+  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+  Label* always_true_target = true_target;
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfTrueSuccessor())) {
+    always_true_target = nullptr;
+  }
+  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+                                if_instr->IfFalseSuccessor())) {
+    false_target = nullptr;
+  }
+  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  HInstruction* cond = deoptimize->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
+      DeoptimizationSlowPathMIPS64(deoptimize);
+  codegen_->AddSlowPath(slow_path);
+  Label* slow_path_entry = slow_path->GetEntryLabel();
+  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
+void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
+                                            const FieldInfo& field_info ATTRIBUTE_UNUSED) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister());
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
+                                                    const FieldInfo& field_info) {
+  Primitive::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  LoadOperandType load_type = kLoadUnsignedByte;
+  switch (type) {
+    case Primitive::kPrimBoolean:
+      load_type = kLoadUnsignedByte;
+      break;
+    case Primitive::kPrimByte:
+      load_type = kLoadSignedByte;
+      break;
+    case Primitive::kPrimShort:
+      load_type = kLoadSignedHalfword;
+      break;
+    case Primitive::kPrimChar:
+      load_type = kLoadUnsignedHalfword;
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      load_type = kLoadWord;
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      load_type = kLoadDoubleword;
+      break;
+    case Primitive::kPrimNot:
+      load_type = kLoadUnsignedWord;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+  if (!Primitive::IsFloatingPointType(type)) {
+    DCHECK(locations->Out().IsRegister());
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+  } else {
+    DCHECK(locations->Out().IsFpuRegister());
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+  }
+
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // TODO: memory barrier?
+}
+
+void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
+                                            const FieldInfo& field_info ATTRIBUTE_UNUSED) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
+                                                    const FieldInfo& field_info) {
+  Primitive::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  StoreOperandType store_type = kStoreByte;
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      store_type = kStoreByte;
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      store_type = kStoreHalfword;
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimNot:
+      store_type = kStoreWord;
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      store_type = kStoreDoubleword;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+  if (!Primitive::IsFloatingPointType(type)) {
+    DCHECK(locations->InAt(1).IsRegister());
+    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+    __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+  } else {
+    DCHECK(locations->InAt(1).IsFpuRegister());
+    FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
+    __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+  }
+
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // TODO: memory barriers?
+  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+    DCHECK(locations->InAt(1).IsRegister());
+    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+    codegen_->MarkGCCard(obj, src);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind =
+      instruction->IsClassFinal() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The output does overlap inputs.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  Label done;
+
+  // Return 0 if `obj` is null.
+  // TODO: Avoid this check if we know `obj` is not null.
+  __ Move(out, ZERO);
+  __ Beqzc(obj, &done);
+
+  // Compare the class of `obj` with `cls`.
+  __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+  if (instruction->IsClassFinal()) {
+    // Classes must be equal for the instanceof to succeed.
+    __ Xor(out, out, cls);
+    __ Sltiu(out, out, 1);
+  } else {
+    // If the classes are not equal, we go into a slow path.
+    DCHECK(locations->OnlyCallsOnSlowPath());
+    SlowPathCodeMIPS64* slow_path =
+        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                             locations->InAt(1),
+                                                             locations->Out(),
+                                                             instruction->GetDexPc());
+    codegen_->AddSlowPath(slow_path);
+    __ Bnec(out, cls, slow_path->GetEntryLabel());
+    __ LoadConst32(out, 1);
+    __ Bind(slow_path->GetExitLabel());
+  }
+
+  __ Bind(&done);
+}
+
+void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::HandleInvoke(HInvoke* invoke) {
+  InvokeDexCallingConventionVisitorMIPS64 calling_convention_visitor;
+  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+}
+
+void LocationsBuilderMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
+  HandleInvoke(invoke);
+  // The register T0 is required to be used for the hidden argument in
+  // art_quick_imt_conflict_trampoline, so add the hidden argument.
+  invoke->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
+  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+      invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value();
+  Location receiver = invoke->GetLocations()->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+
+  // Set the hidden argument.
+  __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<GpuRegister>(),
+                 invoke->GetDexMethodIndex());
+
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ LoadFromOffset(kLoadUnsignedWord, temp, SP, receiver.GetStackIndex());
+    __ LoadFromOffset(kLoadUnsignedWord, temp, temp, class_offset);
+  } else {
+    __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  }
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // temp = temp->GetImtEntryAt(method_offset);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
+  // T9 = temp->GetEntryPoint();
+  __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
+  // T9();
+  __ Jalr(T9);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  // TODO intrinsic function
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
+  // TODO - intrinsic function
+  HandleInvoke(invoke);
+
+  // While SetupBlockedRegisters() blocks registers S2-S8 due to their
+  // clobbering somewhere else, reduce further register pressure by avoiding
+  // allocation of a register for the current method pointer like on x86 baseline.
+  // TODO: remove this once all the issues with register saving/restoring are
+  // sorted out.
+  LocationSummary* locations = invoke->GetLocations();
+  Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+  if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+    locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+  }
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke,
+                                     CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    // TODO - intrinsic function
+    return true;
+  }
+  return false;
+}
+
+void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // All registers are assumed to be correctly set up per the calling convention.
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  if (invoke->IsStringInit()) {
+    GpuRegister reg = temp.AsRegister<GpuRegister>();
+    // temp = thread->string_init_entrypoint
+    __ LoadFromOffset(kLoadDoubleword,
+                      reg,
+                      TR,
+                      invoke->GetStringInitOffset());
+    // T9 = temp->entry_point_from_quick_compiled_code_;
+    __ LoadFromOffset(kLoadDoubleword,
+                      T9,
+                      reg,
+                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kMips64WordSize).Int32Value());
+    // T9()
+    __ Jalr(T9);
+  } else if (invoke->IsRecursive()) {
+    __ Jalr(&frame_entry_label_, T9);
+  } else {
+    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+    GpuRegister reg = temp.AsRegister<GpuRegister>();
+    GpuRegister method_reg;
+    if (current_method.IsRegister()) {
+      method_reg = current_method.AsRegister<GpuRegister>();
+    } else {
+      // TODO: use the appropriate DCHECK() here if possible.
+      // DCHECK(invoke->GetLocations()->Intrinsified());
+      DCHECK(!current_method.IsValid());
+      method_reg = reg;
+      __ Ld(reg, SP, kCurrentMethodStackOffset);
+    }
+
+    // temp = temp->dex_cache_resolved_methods_;
+    __ LoadFromOffset(kLoadUnsignedWord,
+                      reg,
+                      method_reg,
+                      ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+    // temp = temp[index_in_cache]
+    __ LoadFromOffset(kLoadDoubleword,
+                      reg,
+                      reg,
+                      CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()));
+    // T9 = temp[offset_of_quick_compiled_code]
+    __ LoadFromOffset(kLoadDoubleword,
+                      T9,
+                      reg,
+                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kMips64WordSize).Int32Value());
+    // T9()
+    __ Jalr(T9);
+  }
+
+  DCHECK(!IsLeafMethod());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // When we do not run baseline, explicit clinit checks triggered by static
+  // invokes must have been pruned by art::PrepareForRegisterAllocation.
+  DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  codegen_->GenerateStaticOrDirectCall(invoke,
+                                       locations->HasTemps()
+                                           ? locations->GetTemp(0)
+                                           : Location::NoLocation());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  // TODO: Try to generate intrinsics code.
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+      invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
+
+  // temp = object->GetClass();
+  DCHECK(receiver.IsRegister());
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // temp = temp->GetMethodAt(method_offset);
+  __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
+  // T9 = temp->GetEntryPoint();
+  __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
+  // T9();
+  __ Jalr(T9);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath
+                                                              : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary* locations = cls->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
+  if (cls->IsReferrersClass()) {
+    DCHECK(!cls->CanCallRuntime());
+    DCHECK(!cls->MustGenerateClinitCheck());
+    __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
+                      ArtMethod::DeclaringClassOffset().Int32Value());
+  } else {
+    DCHECK(cls->CanCallRuntime());
+    __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
+                      ArtMethod::DexCacheResolvedTypesOffset().Int32Value());
+    __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+        cls,
+        cls,
+        cls->GetDexPc(),
+        cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    __ Beqzc(out, slow_path->GetEntryLabel());
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
+    }
+  }
+}
+
+void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadException(HLoadException* load) {
+  GpuRegister out = load->GetLocations()->Out().AsRegister<GpuRegister>();
+  __ LoadFromOffset(kLoadUnsignedWord, out, TR, Thread::ExceptionOffset<kMips64WordSize>().Int32Value());
+  __ StoreToOffset(kStoreWord, ZERO, TR, Thread::ExceptionOffset<kMips64WordSize>().Int32Value());
+}
+
+void LocationsBuilderMIPS64::VisitLoadLocal(HLoadLocal* load) {
+  load->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadLocal(HLoadLocal* load ATTRIBUTE_UNUSED) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) {
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = load->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
+  __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
+                    ArtMethod::DeclaringClassOffset().Int32Value());
+  __ LoadFromOffset(kLoadUnsignedWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+  __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  __ Beqzc(out, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderMIPS64::VisitLocal(HLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLocal(HLocal* local) {
+  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+}
+
+void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter()
+                              ? QUICK_ENTRY_POINT(pLockObject)
+                              : QUICK_ENTRY_POINT(pUnlockObject),
+                          instruction,
+                          instruction->GetDexPc(),
+                          nullptr);
+  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+}
+
+void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
+  switch (mul->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMul(HMul* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ MulR6(dst, lhs, rhs);
+      else
+        __ Dmul(dst, lhs, rhs);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      if (type == Primitive::kPrimFloat)
+        __ MulS(dst, lhs, rhs);
+      else
+        __ MulD(dst, lhs, rhs);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected mul type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ Subu(dst, ZERO, src);
+      else
+        __ Dsubu(dst, ZERO, src);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      if (type == Primitive::kPrimFloat)
+        __ NegS(dst, src);
+      else
+        __ NegD(dst, src);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected neg type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  // Move an uint16_t value to a register.
+  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
+  codegen_->InvokeRuntime(
+      GetThreadOffset<kMips64WordSize>(instruction->GetEntrypoint()).Int32Value(),
+      instruction,
+      instruction->GetDexPc(),
+      nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
+}
+
+void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  // Move an uint16_t value to a register.
+  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
+  codegen_->InvokeRuntime(
+      GetThreadOffset<kMips64WordSize>(instruction->GetEntrypoint()).Int32Value(),
+      instruction,
+      instruction->GetDexPc(),
+      nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+}
+
+void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNot(HNot* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+      __ Nor(dst, src, ZERO);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+  }
+}
+
+void LocationsBuilderMIPS64::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  __ Xori(locations->Out().AsRegister<GpuRegister>(),
+          locations->InAt(0).AsRegister<GpuRegister>(),
+          1);
+}
+
+void LocationsBuilderMIPS64::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (codegen_->CanMoveNullCheckToUser(instruction)) {
+    return;
+  }
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Lw(ZERO, obj.AsRegister<GpuRegister>(), 0);
+  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS64(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Beqzc(obj.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNullCheck(HNullCheck* instruction) {
+  if (codegen_->GetCompilerOptions().GetImplicitNullChecks()) {
+    GenerateImplicitNullCheck(instruction);
+  } else {
+    GenerateExplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderMIPS64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitParallelMove(HParallelMove* instruction) {
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitParameterValue(HParameterValue* instruction
+                                                         ATTRIBUTE_UNUSED) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderMIPS64::VisitCurrentMethod(HCurrentMethod* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitCurrentMethod(HCurrentMethod* instruction
+                                                        ATTRIBUTE_UNUSED) {
+  // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  LocationSummary::CallKind call_kind =
+      Primitive::IsFloatingPointType(type) ? LocationSummary::kCall : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
+  Primitive::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+      if (type == Primitive::kPrimInt)
+        __ ModR6(dst, lhs, rhs);
+      else
+        __ Dmod(dst, lhs, rhs);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
+                                                             : QUICK_ENTRY_POINT(pFmod);
+      codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+  memory_barrier->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
+  GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+}
+
+void LocationsBuilderMIPS64::VisitReturn(HReturn* ret) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret);
+  Primitive::Type return_type = ret->InputAt(0)->GetType();
+  locations->SetInAt(0, Mips64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderMIPS64::VisitReturnVoid(HReturnVoid* ret) {
+  ret->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderMIPS64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderMIPS64::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  Primitive::Type field_type = store->InputAt(1)->GetType();
+  switch (field_type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << field_type;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
+}
+
+void LocationsBuilderMIPS64::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void LocationsBuilderMIPS64::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitTemporary(HTemporary* temp ATTRIBUTE_UNUSED) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitThrow(HThrow* instruction) {
+  codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pDeliverException),
+                          instruction,
+                          instruction->GetDexPc(),
+                          nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
+  Primitive::Type input_type = conversion->GetInputType();
+  Primitive::Type result_type = conversion->GetResultType();
+  DCHECK_NE(input_type, result_type);
+
+  if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
+      (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
+    LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+  }
+
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+      (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+    call_kind = LocationSummary::kCall;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
+  if (call_kind == LocationSummary::kNoCall) {
+    if (Primitive::IsFloatingPointType(input_type)) {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(0, Location::RequiresRegister());
+    }
+
+    if (Primitive::IsFloatingPointType(result_type)) {
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+    } else {
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    }
+  } else {
+    InvokeRuntimeCallingConvention calling_convention;
+
+    if (Primitive::IsFloatingPointType(input_type)) {
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+    } else {
+      locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+    }
+
+    locations->SetOut(calling_convention.GetReturnLocation(result_type));
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
+  LocationSummary* locations = conversion->GetLocations();
+  Primitive::Type result_type = conversion->GetResultType();
+  Primitive::Type input_type = conversion->GetInputType();
+
+  DCHECK_NE(input_type, result_type);
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+    GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+
+    switch (result_type) {
+      case Primitive::kPrimChar:
+        __ Andi(dst, src, 0xFFFF);
+        break;
+      case Primitive::kPrimByte:
+        // long is never converted into types narrower than int directly,
+        // so SEB and SEH can be used without ever causing unpredictable results
+        // on 64-bit inputs
+        DCHECK(input_type != Primitive::kPrimLong);
+        __ Seb(dst, src);
+        break;
+      case Primitive::kPrimShort:
+        // long is never converted into types narrower than int directly,
+        // so SEB and SEH can be used without ever causing unpredictable results
+        // on 64-bit inputs
+        DCHECK(input_type != Primitive::kPrimLong);
+        __ Seh(dst, src);
+        break;
+      case Primitive::kPrimInt:
+      case Primitive::kPrimLong:
+        // Sign-extend 32-bit int into bits 32 through 63 for
+        // int-to-long and long-to-int conversions
+        __ Sll(dst, src, 0);
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type conversion from " << input_type
+                   << " to " << result_type;
+    }
+  } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
+    if (input_type != Primitive::kPrimLong) {
+      FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+      GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+      __ Mtc1(src, FTMP);
+      if (result_type == Primitive::kPrimFloat) {
+        __ Cvtsw(dst, FTMP);
+      } else {
+        __ Cvtdw(dst, FTMP);
+      }
+    } else {
+      int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+                                                                    : QUICK_ENTRY_POINT(pL2d);
+      codegen_->InvokeRuntime(entry_offset,
+                              conversion,
+                              conversion->GetDexPc(),
+                              nullptr);
+    }
+  } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
+    CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+    int32_t entry_offset;
+    if (result_type != Primitive::kPrimLong) {
+      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
+                                                           : QUICK_ENTRY_POINT(pD2iz);
+    } else {
+      entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+                                                           : QUICK_ENTRY_POINT(pD2l);
+    }
+    codegen_->InvokeRuntime(entry_offset,
+                            conversion,
+                            conversion->GetDexPc(),
+                            nullptr);
+  } else if (Primitive::IsFloatingPointType(result_type) &&
+             Primitive::IsFloatingPointType(input_type)) {
+    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+    FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+    if (result_type == Primitive::kPrimFloat) {
+      __ Cvtsd(dst, src);
+    } else {
+      __ Cvtds(dst, src);
+    }
+  } else {
+    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+                << " to " << result_type;
+  }
+}
+
+void LocationsBuilderMIPS64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void LocationsBuilderMIPS64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderMIPS64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) {
+  VisitCondition(comp);
+}
+
+void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
+  VisitCondition(comp);
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
new file mode 100644
index 0000000..534154f
--- /dev/null
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
+
+#include "code_generator.h"
+#include "dex/compiler_enums.h"
+#include "driver/compiler_options.h"
+#include "nodes.h"
+#include "parallel_move_resolver.h"
+#include "utils/mips64/assembler_mips64.h"
+
+namespace art {
+namespace mips64 {
+
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kMips64WordSize = kMips64PointerSize;
+
+
+// InvokeDexCallingConvention registers
+
+static constexpr GpuRegister kParameterCoreRegisters[] =
+    { A1, A2, A3, A4, A5, A6, A7 };
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+static constexpr FpuRegister kParameterFpuRegisters[] =
+    { F13, F14, F15, F16, F17, F18, F19 };
+static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+
+
+// InvokeRuntimeCallingConvention registers
+
+static constexpr GpuRegister kRuntimeParameterCoreRegisters[] =
+    { A0, A1, A2, A3, A4, A5, A6, A7 };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+static constexpr FpuRegister kRuntimeParameterFpuRegisters[] =
+    { F12, F13, F14, F15, F16, F17, F18, F19 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
+
+
+static constexpr GpuRegister kCoreCalleeSaves[] =
+    { S0, S1, S2, S3, S4, S5, S6, S7, GP, S8, RA };  // TODO: review
+static constexpr FpuRegister kFpuCalleeSaves[] =
+    { F24, F25, F26, F27, F28, F29, F30, F31 };
+
+
+class CodeGeneratorMIPS64;
+
+class InvokeDexCallingConvention : public CallingConvention<GpuRegister, FpuRegister> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFpuRegisters,
+                          kParameterFpuRegistersLength,
+                          kMips64PointerSize) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitorMIPS64() {}
+  virtual ~InvokeDexCallingConventionVisitorMIPS64() {}
+
+  Location GetNextLocation(Primitive::Type type) OVERRIDE;
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE;
+  Location GetMethodLocation() const OVERRIDE;
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorMIPS64);
+};
+
+class InvokeRuntimeCallingConvention : public CallingConvention<GpuRegister, FpuRegister> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength,
+                          kMips64PointerSize) {}
+
+  Location GetReturnLocation(Primitive::Type return_type);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap {
+ public:
+  ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen)
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+  void EmitMove(size_t index) OVERRIDE;
+  void EmitSwap(size_t index) OVERRIDE;
+  void SpillScratch(int reg) OVERRIDE;
+  void RestoreScratch(int reg) OVERRIDE;
+
+  void Exchange(int index1, int index2, bool double_slot);
+
+  Mips64Assembler* GetAssembler() const;
+
+ private:
+  CodeGeneratorMIPS64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverMIPS64);
+};
+
+class SlowPathCodeMIPS64 : public SlowPathCode {
+ public:
+  SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
+
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  Label entry_label_;
+  Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64);
+};
+
+class LocationsBuilderMIPS64 : public HGraphVisitor {
+ public:
+  LocationsBuilderMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr);
+
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleInvoke(HInvoke* invoke);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_;
+
+  CodeGeneratorMIPS64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
+};
+
+class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+ public:
+  InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super)     \
+  void Visit##name(H##name* instr);
+
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  Mips64Assembler* GetAssembler() const { return assembler_; }
+
+ private:
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
+  void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  void GenerateImplicitNullCheck(HNullCheck* instruction);
+  void GenerateExplicitNullCheck(HNullCheck* instruction);
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             Label* true_target,
+                             Label* false_target,
+                             Label* always_true_target);
+
+  Mips64Assembler* const assembler_;
+  CodeGeneratorMIPS64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorMIPS64);
+};
+
+class CodeGeneratorMIPS64 : public CodeGenerator {
+ public:
+  CodeGeneratorMIPS64(HGraph* graph,
+                      const Mips64InstructionSetFeatures& isa_features,
+                      const CompilerOptions& compiler_options);
+  virtual ~CodeGeneratorMIPS64() {}
+
+  void GenerateFrameEntry() OVERRIDE;
+  void GenerateFrameExit() OVERRIDE;
+
+  void Bind(HBasicBlock* block) OVERRIDE;
+
+  void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+
+  size_t GetWordSize() const OVERRIDE { return kMips64WordSize; }
+
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
+
+  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+    return GetLabelOf(block)->Position();
+  }
+
+  HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+  HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+  Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
+
+  void MarkGCCard(GpuRegister object, GpuRegister value);
+
+  // Register allocation.
+
+  void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
+  // AllocateFreeRegister() is only used when allocating registers locally
+  // during CompileBaseline().
+  Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+
+  Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+
+  void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+  void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+  InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips64; }
+
+  const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
+  Label* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
+  }
+
+  void Initialize() OVERRIDE {
+    block_labels_.SetSize(GetGraph()->GetBlocks().Size());
+  }
+
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
+
+  // Code generation helpers.
+
+  void MoveLocation(Location destination, Location source, Primitive::Type type);
+
+  void SwapLocations(Location loc1, Location loc2, Primitive::Type type);
+
+  // Generate code to invoke a runtime entry point.
+  void InvokeRuntime(int32_t offset,
+                     HInstruction* instruction,
+                     uint32_t dex_pc,
+                     SlowPathCode* slow_path);
+
+  ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
+
+  bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const { return false; }
+
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
+
+ private:
+  // Labels for each block that will be compiled.
+  GrowableArray<Label> block_labels_;
+  Label frame_entry_label_;
+  LocationsBuilderMIPS64 location_builder_;
+  InstructionCodeGeneratorMIPS64 instruction_visitor_;
+  ParallelMoveResolverMIPS64 move_resolver_;
+  Mips64Assembler assembler_;
+  const Mips64InstructionSetFeatures& isa_features_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS64);
+};
+
+}  // namespace mips64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e39a1c2..4d106c4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -56,6 +56,8 @@
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
+
  private:
   HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
@@ -71,6 +73,8 @@
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
@@ -90,6 +94,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; }
+
  private:
   Register reg_;
   bool is_div_;
@@ -122,6 +128,8 @@
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -158,6 +166,8 @@
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -188,6 +198,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -228,6 +240,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -293,6 +307,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -318,6 +334,8 @@
     codegen->RecordPcInfo(instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index bfc827d..e55de8f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -59,6 +59,8 @@
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
+
  private:
   HNullCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
@@ -75,6 +77,8 @@
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
+
  private:
   HDivZeroCheck* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
@@ -105,6 +109,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
+
  private:
   const CpuRegister cpu_reg_;
   const Primitive::Type type_;
@@ -140,6 +146,8 @@
     return successor_;
   }
 
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
+
  private:
   HSuspendCheck* const instruction_;
   HBasicBlock* const successor_;
@@ -174,6 +182,8 @@
     RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
   }
 
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
+
  private:
   HBoundsCheck* const instruction_;
   const Location index_location_;
@@ -217,6 +227,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
+
  private:
   // The class this slow path will load.
   HLoadClass* const cls_;
@@ -257,6 +269,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
+
  private:
   HLoadString* const instruction_;
 
@@ -312,6 +326,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
+
  private:
   HInstruction* const instruction_;
   const Location class_to_check_;
@@ -337,6 +353,8 @@
     codegen->RecordPcInfo(instruction_, dex_pc, this);
   }
 
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
+
  private:
   HInstruction* const instruction_;
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
@@ -373,7 +391,7 @@
   if (invoke->IsStringInit()) {
     CpuRegister reg = temp.AsRegister<CpuRegister>();
     // temp = thread->string_init_entrypoint
-    __ gs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset()));
+    __ gs()->movq(reg, Address::Absolute(invoke->GetStringInitOffset(), true));
     // (temp + offset_of_quick_compiled_code)()
     __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kX86_64WordSize).SizeValue()));
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index bfed1a8..ca85cf5 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -20,6 +20,8 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arch/arm/registers_arm.h"
 #include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/mips64/registers_mips64.h"
 #include "arch/x86/instruction_set_features_x86.h"
 #include "arch/x86/registers_x86.h"
 #include "arch/x86_64/instruction_set_features_x86_64.h"
@@ -27,6 +29,7 @@
 #include "builder.h"
 #include "code_generator_arm.h"
 #include "code_generator_arm64.h"
+#include "code_generator_mips64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
 #include "common_compiler_test.h"
@@ -40,6 +43,7 @@
 #include "ssa_liveness_analysis.h"
 #include "utils.h"
 #include "utils/arm/managed_register_arm.h"
+#include "utils/mips64/managed_register_mips64.h"
 #include "utils/x86/managed_register_x86.h"
 
 #include "gtest/gtest.h"
@@ -172,6 +176,14 @@
   if (kRuntimeISA == kArm64) {
     Run(allocator, codegenARM64, has_result, expected);
   }
+
+  std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+      Mips64InstructionSetFeatures::FromCppDefines());
+  mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
+  codegenMIPS64.CompileBaseline(&allocator, true);
+  if (kRuntimeISA == kMips64) {
+    Run(allocator, codegenMIPS64, has_result, expected);
+  }
 }
 
 template <typename Expected>
@@ -219,6 +231,11 @@
         X86_64InstructionSetFeatures::FromCppDefines());
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+  } else if (kRuntimeISA == kMips64) {
+    std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+        Mips64InstructionSetFeatures::FromCppDefines());
+    mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
+    RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
   }
 }
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 422223f..11f6362 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -627,8 +627,8 @@
     "  9: If(8)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  12: Goto 3\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
-    "  22: Phi(3, 5) [15]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
+    "  22: Phi(5, 3) [15]\n"
     "  15: Add(22, 3)\n"
     "  17: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 17a006c..fdfe518 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -122,10 +122,6 @@
       if (!inst->HasSideEffects()
           && !inst->CanThrow()
           && !inst->IsSuspendCheck()
-          // The current method needs to stay in the graph in case of inlining.
-          // It is always passed anyway, and keeping it in the graph does not
-          // affect the generated code.
-          && !inst->IsCurrentMethod()
           // If we added an explicit barrier then we should keep it.
           && !inst->IsMemoryBarrier()
           && !inst->HasUses()) {
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 3209d3e..ee3a61a 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -89,8 +89,8 @@
     "  9: If(8)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  12: Goto 3\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
-    "  22: Phi(3, 5) [15]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
+    "  22: Phi(5, 3) [15]\n"
     "  15: Add(22, 3)\n"
     "  17: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
@@ -101,7 +101,7 @@
   // Expected difference after dead code elimination.
   diff_t expected_diff = {
     { "  3: IntConstant [15, 22, 8]\n", "  3: IntConstant [22, 8]\n" },
-    { "  22: Phi(3, 5) [15]\n",         "  22: Phi(3, 5)\n" },
+    { "  22: Phi(5, 3) [15]\n",         "  22: Phi(5, 3)\n" },
     { "  15: Add(22, 3)\n",             removed }
   };
   std::string expected_after = Patch(expected_before, expected_diff);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index b647917..9fd8d00 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -359,9 +359,13 @@
                && is_after_pass_) {
       if (instruction->GetType() == Primitive::kPrimNot) {
         if (instruction->IsLoadClass()) {
+          ReferenceTypeInfo info = instruction->AsLoadClass()->GetLoadedClassRTI();
           ScopedObjectAccess soa(Thread::Current());
-          StartAttributeStream("klass")
-              << PrettyClass(instruction->AsLoadClass()->GetLoadedClassRTI().GetTypeHandle().Get());
+          if (info.GetTypeHandle().GetReference() != nullptr) {
+            StartAttributeStream("klass") << info.GetTypeHandle().Get();
+          } else {
+            StartAttributeStream("klass") << "unresolved";
+          }
         } else {
           ReferenceTypeInfo info = instruction->GetReferenceTypeInfo();
           if (info.IsTop()) {
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2daeeb3..678924d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -312,14 +312,15 @@
     HInstruction* input_value = equal->GetLeastConstantLeft();
     if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
       HBasicBlock* block = equal->GetBlock();
+      // We are comparing the boolean to a constant which is of type int and can
+      // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
         // Replace (bool_value == true) with bool_value
         equal->ReplaceWith(input_value);
         block->RemoveInstruction(equal);
         RecordSimplification();
-      } else {
+      } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value == false) with !bool_value
-        DCHECK(input_const->AsIntConstant()->IsZero());
         block->ReplaceAndRemoveInstructionWith(
             equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
         RecordSimplification();
@@ -334,14 +335,15 @@
     HInstruction* input_value = not_equal->GetLeastConstantLeft();
     if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
       HBasicBlock* block = not_equal->GetBlock();
+      // We are comparing the boolean to a constant which is of type int and can
+      // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
         // Replace (bool_value != true) with !bool_value
         block->ReplaceAndRemoveInstructionWith(
             not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
         RecordSimplification();
-      } else {
+      } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value != false) with bool_value
-        DCHECK(input_const->AsIntConstant()->IsZero());
         not_equal->ReplaceWith(input_value);
         block->RemoveInstruction(not_equal);
         RecordSimplification();
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 749bedf..71fadfb 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -121,6 +121,8 @@
     __ b(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index c108ad5..8bcb88b 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -130,6 +130,8 @@
     __ B(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 424ac7c..b04cc5c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -158,6 +158,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8915314..888c7b8 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -149,6 +149,8 @@
     __ jmp(GetExitLabel());
   }
 
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathX86_64"; }
+
  private:
   // The instruction where this slow path is happening.
   HInvoke* const invoke_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 4baa05c..01eb2d7 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -180,8 +180,9 @@
   HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc());
   AddBlock(new_block);
   new_block->AddInstruction(new (arena_) HGoto());
-  block->ReplaceSuccessor(successor, new_block);
-  new_block->AddSuccessor(successor);
+  // Use `InsertBetween` to ensure the predecessor index and successor index of
+  // `block` and `successor` are preserved.
+  new_block->InsertBetween(block, successor);
   if (successor->IsLoopHeader()) {
     // If we split at a back edge boundary, make the new block the back edge.
     HLoopInformation* info = successor->GetLoopInformation();
@@ -288,7 +289,10 @@
 }
 
 HNullConstant* HGraph::GetNullConstant() {
-  if (cached_null_constant_ == nullptr) {
+  // For simplicity, don't bother reviving the cached null constant if it is
+  // not null and not in a block. Otherwise, we need to clear the instruction
+  // id and/or any invariants the graph is assuming when adding new instructions.
+  if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) {
     cached_null_constant_ = new (arena_) HNullConstant();
     InsertConstant(cached_null_constant_);
   }
@@ -296,7 +300,10 @@
 }
 
 HCurrentMethod* HGraph::GetCurrentMethod() {
-  if (cached_current_method_ == nullptr) {
+  // For simplicity, don't bother reviving the cached current method if it is
+  // not null and not in a block. Otherwise, we need to clear the instruction
+  // id and/or any invariants the graph is assuming when adding new instructions.
+  if ((cached_current_method_ == nullptr) || (cached_current_method_->GetBlock() == nullptr)) {
     cached_current_method_ = new (arena_) HCurrentMethod(
         Is64BitInstructionSet(instruction_set_) ? Primitive::kPrimLong : Primitive::kPrimInt);
     if (entry_block_->GetFirstInstruction() == nullptr) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7ef6955..26eee1c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -335,6 +335,7 @@
     }
 
     // If not found or previously deleted, create and cache a new instruction.
+    // Don't bother reviving a previously deleted instruction, for simplicity.
     if (constant == nullptr || constant->GetBlock() == nullptr) {
       constant = new (arena_) InstructionType(value);
       cache->Overwrite(value, constant);
@@ -624,6 +625,20 @@
     predecessors_.Put(predecessor_index, new_block);
   }
 
+  // Insert `this` between `predecessor` and `successor. This method
+  // preserves the indicies, and will update the first edge found between
+  // `predecessor` and `successor`.
+  void InsertBetween(HBasicBlock* predecessor, HBasicBlock* successor) {
+    size_t predecessor_index = successor->GetPredecessorIndexOf(predecessor);
+    DCHECK_NE(predecessor_index, static_cast<size_t>(-1));
+    size_t successor_index = predecessor->GetSuccessorIndexOf(successor);
+    DCHECK_NE(successor_index, static_cast<size_t>(-1));
+    successor->predecessors_.Put(predecessor_index, this);
+    predecessor->successors_.Put(successor_index, this);
+    successors_.Add(successor);
+    predecessors_.Add(predecessor);
+  }
+
   void RemovePredecessor(HBasicBlock* block) {
     predecessors_.Delete(block);
   }
@@ -2189,8 +2204,12 @@
     kLtBias,  // return -1 for NaN comparisons
   };
 
-  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second, Bias bias)
-      : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias) {
+  HCompare(Primitive::Type type,
+           HInstruction* first,
+           HInstruction* second,
+           Bias bias,
+           uint32_t dex_pc)
+      : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias), dex_pc_(dex_pc) {
     DCHECK_EQ(type, first->GetType());
     DCHECK_EQ(type, second->GetType());
   }
@@ -2215,10 +2234,13 @@
 
   bool IsGtBias() { return bias_ == kGtBias; }
 
+  uint32_t GetDexPc() const { return dex_pc_; }
+
   DECLARE_INSTRUCTION(Compare);
 
  private:
   const Bias bias_;
+  const uint32_t dex_pc_;
 
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
@@ -4026,6 +4048,8 @@
     return source_.IsInvalid();
   }
 
+  Primitive::Type GetType() const { return type_; }
+
   bool Is64BitMove() const {
     return Primitive::Is64BitType(type_);
   }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 303a7cb..ad67813 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -290,6 +290,7 @@
 static bool IsInstructionSetSupported(InstructionSet instruction_set) {
   return instruction_set == kArm64
       || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat)
+      || instruction_set == kMips64
       || instruction_set == kX86
       || instruction_set == kX86_64;
 }
@@ -340,18 +341,20 @@
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_types");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, stats, "last_instruction_simplifier");
+      graph, stats, "instruction_simplifier_after_bce");
   ReferenceTypePropagation* type_propagation2 =
       new (arena) ReferenceTypePropagation(graph, handles);
+  InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
+      graph, stats, "instruction_simplifier_before_codegen");
 
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
   HOptimization* optimizations[] = {
     intrinsics,
-    dce1,
     fold1,
     simplify1,
     type_propagation,
+    dce1,
     simplify2,
     inliner,
     // Run another type propagation phase: inlining will open up more opprotunities
@@ -367,6 +370,10 @@
     bce,
     simplify3,
     dce2,
+    // The codegen has a few assumptions that only the instruction simplifier can
+    // satisfy. For example, the code generator does not expect to see a
+    // HTypeConversion from a type to the same type.
+    simplify4,
   };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index b988813..53d052b 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -38,6 +38,7 @@
   kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
+  kNotCompiledMalformedOpcode,
   kNotCompiledNoCodegen,
   kNotCompiledPathological,
   kNotCompiledSpaceFilter,
@@ -106,6 +107,7 @@
       case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
       case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
       case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
+      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
       case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
       case kNotCompiledPathological : return "kNotCompiledPathological";
       case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index e38e49c..7b23d02 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -80,6 +80,7 @@
                                                 InstructionSet instruction_set) {
   return instruction_set == kArm64
       || instruction_set == kX86_64
+      || instruction_set == kMips64
       || instruction_set == kArm
       || instruction_set == kX86
       || instruction_set == kThumb2;
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index fb3e7d7..0e8c058 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -115,7 +115,7 @@
     "  3: If(2)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  4: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
     "  5: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
     "  6: Exit\n"
@@ -145,8 +145,8 @@
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
-    "  6: Phi(1, 0) [7]\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
+    "  6: Phi(0, 1) [7]\n"
     "  7: Return(6)\n"
     "BasicBlock 4, pred: 3\n"
     "  8: Exit\n"
@@ -428,8 +428,8 @@
     "  10: Goto\n"
     "BasicBlock 5, pred: 3, succ: 2\n"
     "  11: Goto\n"
-    "BasicBlock 6, pred: 4, 8, succ: 7\n"
-    "  12: Phi(2, 5) [13]\n"
+    "BasicBlock 6, pred: 8, 4, succ: 7\n"
+    "  12: Phi(5, 2) [13]\n"
     "  13: Return(12)\n"
     "BasicBlock 7, pred: 6\n"
     "  14: Exit\n"
@@ -480,7 +480,7 @@
     "  4: If(3)\n"
     "BasicBlock 2, pred: 1, succ: 3\n"
     "  5: Goto\n"
-    "BasicBlock 3, pred: 2, 5, succ: 4\n"
+    "BasicBlock 3, pred: 5, 2, succ: 4\n"
     "  6: ReturnVoid\n"
     "BasicBlock 4, pred: 3\n"
     "  7: Exit\n"
@@ -517,7 +517,7 @@
     "  8: Add(0, 0)\n"
     "  9: Goto\n"
     // This block should not get a phi for local 1.
-    "BasicBlock 5, pred: 2, 4, 7, succ: 6\n"
+    "BasicBlock 5, pred: 2, 7, 4, succ: 6\n"
     "  10: ReturnVoid\n"
     "BasicBlock 6, pred: 5\n"
     "  11: Exit\n"
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index a8b55d1..a192d2f 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -25,9 +25,9 @@
 namespace art {
 namespace mips64 {
 
-void Mips64Assembler::Emit(int32_t value) {
+void Mips64Assembler::Emit(uint32_t value) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<int32_t>(value);
+  buffer_.Emit<uint32_t>(value);
 }
 
 void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
@@ -35,124 +35,62 @@
   CHECK_NE(rs, kNoGpuRegister);
   CHECK_NE(rt, kNoGpuRegister);
   CHECK_NE(rd, kNoGpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     shamt << kShamtShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      static_cast<uint32_t>(rd) << kRdShift |
+                      shamt << kShamtShift |
+                      funct;
   Emit(encoding);
 }
 
 void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm) {
   CHECK_NE(rs, kNoGpuRegister);
   CHECK_NE(rt, kNoGpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     static_cast<int32_t>(rs) << kRsShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      static_cast<uint32_t>(rt) << kRtShift |
+                      imm;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitJ(int opcode, int address) {
-  int32_t encoding = opcode << kOpcodeShift |
-                     address;
+void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) {
+  CHECK_NE(rs, kNoGpuRegister);
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      static_cast<uint32_t>(rs) << kRsShift |
+                      (imm21 & 0x1FFFFF);
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) {
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      (addr26 & 0x3FFFFFF);
   Emit(encoding);
 }
 
 void Mips64Assembler::EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd,
-int funct) {
+                             int funct) {
   CHECK_NE(ft, kNoFpuRegister);
   CHECK_NE(fs, kNoFpuRegister);
   CHECK_NE(fd, kNoFpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(ft) << kFtShift |
-                     static_cast<int32_t>(fs) << kFsShift |
-                     static_cast<int32_t>(fd) << kFdShift |
-                     funct;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      static_cast<uint32_t>(fs) << kFsShift |
+                      static_cast<uint32_t>(fd) << kFdShift |
+                      funct;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm) {
-  CHECK_NE(rt, kNoFpuRegister);
-  int32_t encoding = opcode << kOpcodeShift |
-                     fmt << kFmtShift |
-                     static_cast<int32_t>(rt) << kRtShift |
-                     imm;
+void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) {
+  CHECK_NE(ft, kNoFpuRegister);
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
+                      fmt << kFmtShift |
+                      static_cast<uint32_t>(ft) << kFtShift |
+                      imm;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the branch instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (equal) {
-    Beq(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  } else {
-    Bne(rt, rs, (offset >> 2) & kBranchOffsetMask);
-  }
-}
-
-void Mips64Assembler::EmitJump(Label* label, bool link) {
-  int offset;
-  if (label->IsBound()) {
-    offset = label->Position() - buffer_.Size();
-  } else {
-    // Use the offset field of the jump instruction for linking the sites.
-    offset = label->position_;
-    label->LinkTo(buffer_.Size());
-  }
-  if (link) {
-    Jal((offset >> 2) & kJumpOffsetMask);
-  } else {
-    J((offset >> 2) & kJumpOffsetMask);
-  }
-}
-
-int32_t Mips64Assembler::EncodeBranchOffset(int offset, int32_t inst, bool is_jump) {
-  CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt<POPCOUNT(kBranchOffsetMask)>(offset)) << offset;
-
-  // Properly preserve only the bits supported in the instruction.
-  offset >>= 2;
-  if (is_jump) {
-    offset &= kJumpOffsetMask;
-    return (inst & ~kJumpOffsetMask) | offset;
-  } else {
-    offset &= kBranchOffsetMask;
-    return (inst & ~kBranchOffsetMask) | offset;
-  }
-}
-
-int Mips64Assembler::DecodeBranchOffset(int32_t inst, bool is_jump) {
-  // Sign-extend, then left-shift by 2.
-  if (is_jump) {
-    return (((inst & kJumpOffsetMask) << 6) >> 4);
-  } else {
-    return (((inst & kBranchOffsetMask) << 16) >> 14);
-  }
-}
-
-void Mips64Assembler::Bind(Label* label, bool is_jump) {
-  CHECK(!label->IsBound());
-  int bound_pc = buffer_.Size();
-  while (label->IsLinked()) {
-    int32_t position = label->Position();
-    int32_t next = buffer_.Load<int32_t>(position);
-    int32_t offset = is_jump ? bound_pc - position : bound_pc - position - 4;
-    int32_t encoded = Mips64Assembler::EncodeBranchOffset(offset, next, is_jump);
-    buffer_.Store<int32_t>(position, encoded);
-    label->position_ = Mips64Assembler::DecodeBranchOffset(next, is_jump);
-  }
-  label->BindTo(bound_pc);
-}
-
 void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x20);
 }
@@ -169,6 +107,10 @@
   EmitI(0x9, rs, rt, imm16);
 }
 
+void Mips64Assembler::Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x2d);
+}
+
 void Mips64Assembler::Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x19, rs, rt, imm16);
 }
@@ -181,22 +123,90 @@
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
 
-void Mips64Assembler::Mult(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 0, 0x2f);
+}
+
+void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
 }
 
-void Mips64Assembler::Multu(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
 }
 
-void Mips64Assembler::Div(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
 }
 
-void Mips64Assembler::Divu(GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
 }
 
+void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0x1c, rs, rt, rd, 0, 2);
+}
+
+void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivR2(rs, rt);
+  Mflo(rd);
+}
+
+void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivR2(rs, rt);
+  Mfhi(rd);
+}
+
+void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivuR2(rs, rt);
+  Mflo(rd);
+}
+
+void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  DivuR2(rs, rt);
+  Mfhi(rd);
+}
+
+void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x18);
+}
+
+void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1a);
+}
+
+void Mips64Assembler::ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1a);
+}
+
+void Mips64Assembler::DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1b);
+}
+
+void Mips64Assembler::ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1b);
+}
+
+void Mips64Assembler::Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1c);
+}
+
+void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1e);
+}
+
+void Mips64Assembler::Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1e);
+}
+
+void Mips64Assembler::Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 2, 0x1f);
+}
+
+void Mips64Assembler::Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1f);
+}
+
 void Mips64Assembler::And(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x24);
 }
@@ -225,30 +235,80 @@
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
-void Mips64Assembler::Sll(GpuRegister rd, GpuRegister rs, int shamt) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x00);
+void Mips64Assembler::Seb(GpuRegister rd, GpuRegister rt) {
+  EmitR(0x1f, static_cast<GpuRegister>(0), rt, rd, 0x10, 0x20);
 }
 
-void Mips64Assembler::Srl(GpuRegister rd, GpuRegister rs, int shamt) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x02);
+void Mips64Assembler::Seh(GpuRegister rd, GpuRegister rt) {
+  EmitR(0x1f, static_cast<GpuRegister>(0), rt, rd, 0x18, 0x20);
 }
 
-void Mips64Assembler::Sra(GpuRegister rd, GpuRegister rs, int shamt) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), rd, shamt, 0x03);
+void Mips64Assembler::Dext(GpuRegister rt, GpuRegister rs, int pos, int size_less_one) {
+  DCHECK(0 <= pos && pos < 32) << pos;
+  DCHECK(0 <= size_less_one && size_less_one < 32) << size_less_one;
+  EmitR(0x1f, rs, rt, static_cast<GpuRegister>(size_less_one), pos, 3);
 }
 
-void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Sll(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x00);
+}
+
+void Mips64Assembler::Srl(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x02);
+}
+
+void Mips64Assembler::Sra(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x03);
+}
+
+void Mips64Assembler::Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x04);
 }
 
-void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
-void Mips64Assembler::Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+void Mips64Assembler::Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
 
+void Mips64Assembler::Dsll(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x38);
+}
+
+void Mips64Assembler::Dsrl(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3a);
+}
+
+void Mips64Assembler::Dsra(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3b);
+}
+
+void Mips64Assembler::Dsll32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3c);
+}
+
+void Mips64Assembler::Dsrl32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3e);
+}
+
+void Mips64Assembler::Dsra32(GpuRegister rd, GpuRegister rt, int shamt) {
+  EmitR(0, static_cast<GpuRegister>(0), rt, rd, shamt, 0x3f);
+}
+
+void Mips64Assembler::Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 0, 0x14);
+}
+
+void Mips64Assembler::Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 0, 0x16);
+}
+
+void Mips64Assembler::Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs) {
+  EmitR(0, rs, rt, rd, 0, 0x17);
+}
+
 void Mips64Assembler::Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x20, rs, rt, imm16);
 }
@@ -281,6 +341,19 @@
   EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
 }
 
+void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) {
+  EmitI(1, rs, static_cast<GpuRegister>(6), imm16);
+}
+
+void Mips64Assembler::Dati(GpuRegister rs, uint16_t imm16) {
+  EmitI(1, rs, static_cast<GpuRegister>(0x1e), imm16);
+}
+
+void Mips64Assembler::Sync(uint32_t stype) {
+  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0),
+           static_cast<GpuRegister>(0), stype & 0x1f, 0xf);
+}
+
 void Mips64Assembler::Mfhi(GpuRegister rd) {
   EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
 }
@@ -321,34 +394,121 @@
   EmitI(0xb, rs, rt, imm16);
 }
 
-void Mips64Assembler::Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   EmitI(0x4, rs, rt, imm16);
   Nop();
 }
 
-void Mips64Assembler::Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   EmitI(0x5, rs, rt, imm16);
   Nop();
 }
 
-void Mips64Assembler::J(uint32_t address) {
-  EmitJ(0x2, address);
+void Mips64Assembler::J(uint32_t addr26) {
+  EmitJ(0x2, addr26);
   Nop();
 }
 
-void Mips64Assembler::Jal(uint32_t address) {
-  EmitJ(0x2, address);
+void Mips64Assembler::Jal(uint32_t addr26) {
+  EmitJ(0x3, addr26);
   Nop();
 }
 
-void Mips64Assembler::Jr(GpuRegister rs) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), 0, 0x09);  // Jalr zero, rs
+void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) {
+  EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09);
   Nop();
 }
 
 void Mips64Assembler::Jalr(GpuRegister rs) {
-  EmitR(0, rs, static_cast<GpuRegister>(0), RA, 0, 0x09);
-  Nop();
+  Jalr(RA, rs);
+}
+
+void Mips64Assembler::Jr(GpuRegister rs) {
+  Jalr(ZERO, rs);
+}
+
+void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) {
+  EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16);
+}
+
+void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Jialc(GpuRegister rt, uint16_t imm16) {
+  EmitI(0x3E, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x17, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bltzc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, rt, rt, imm16);
+}
+
+void Mips64Assembler::Bgtzc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x17, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x16, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bgezc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, rt, rt, imm16);
+}
+
+void Mips64Assembler::Blezc(GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rt, ZERO);
+  EmitI(0x16, static_cast<GpuRegister>(0), rt, imm16);
+}
+
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x7, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x6, rs, rt, imm16);
+}
+
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  CHECK_NE(rt, ZERO);
+  CHECK_NE(rs, rt);
+  EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) {
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x36, rs, imm21);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) {
+  CHECK_NE(rs, ZERO);
+  EmitI21(0x3E, rs, imm21);
 }
 
 void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
@@ -368,23 +528,19 @@
 }
 
 void Mips64Assembler::AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x0);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x0);
 }
 
 void Mips64Assembler::SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x1);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x1);
 }
 
 void Mips64Assembler::MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x2);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x2);
 }
 
 void Mips64Assembler::DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(ft), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x3);
+  EmitFR(0x11, 0x11, ft, fs, fd, 0x3);
 }
 
 void Mips64Assembler::MovS(FpuRegister fd, FpuRegister fs) {
@@ -392,16 +548,47 @@
 }
 
 void Mips64Assembler::MovD(FpuRegister fd, FpuRegister fs) {
-  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), static_cast<FpuRegister>(fs),
-         static_cast<FpuRegister>(fd), 0x6);
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x6);
+}
+
+void Mips64Assembler::NegS(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x7);
+}
+
+void Mips64Assembler::NegD(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x7);
+}
+
+void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
+}
+
+void Mips64Assembler::Cvtdw(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x21);
+}
+
+void Mips64Assembler::Cvtsd(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x20);
+}
+
+void Mips64Assembler::Cvtds(FpuRegister fd, FpuRegister fs) {
+  EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x21);
 }
 
 void Mips64Assembler::Mfc1(GpuRegister rt, FpuRegister fs) {
   EmitFR(0x11, 0x00, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
-void Mips64Assembler::Mtc1(FpuRegister ft, GpuRegister rs) {
-  EmitFR(0x11, 0x04, ft, static_cast<FpuRegister>(rs), static_cast<FpuRegister>(0), 0x0);
+void Mips64Assembler::Mtc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x04, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
+void Mips64Assembler::Dmfc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x01, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
+}
+
+void Mips64Assembler::Dmtc1(GpuRegister rt, FpuRegister fs) {
+  EmitFR(0x11, 0x05, static_cast<FpuRegister>(rt), fs, static_cast<FpuRegister>(0), 0x0);
 }
 
 void Mips64Assembler::Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16) {
@@ -430,45 +617,330 @@
         static_cast<GpuRegister>(0), 0, 0x0);
 }
 
-void Mips64Assembler::Move(GpuRegister rt, GpuRegister rs) {
-  EmitI(0x19, rs, rt, 0);   // Daddiu
+void Mips64Assembler::Move(GpuRegister rd, GpuRegister rs) {
+  Or(rd, rs, ZERO);
 }
 
-void Mips64Assembler::Clear(GpuRegister rt) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rt, 0, 0x20);
+void Mips64Assembler::Clear(GpuRegister rd) {
+  Move(rd, ZERO);
 }
 
-void Mips64Assembler::Not(GpuRegister rt, GpuRegister rs) {
-  EmitR(0, static_cast<GpuRegister>(0), rs, rt, 0, 0x27);
+void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) {
+  Nor(rd, rs, ZERO);
 }
 
-void Mips64Assembler::Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  Mult(rs, rt);
-  Mflo(rd);
+void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
+  if (IsUint<16>(value)) {
+    // Use OR with (unsigned) immediate to encode 16b unsigned int.
+    Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    // Use ADD with (signed) immediate to encode 16b signed int.
+    Addiu(rd, ZERO, value);
+  } else {
+    Lui(rd, value >> 16);
+    if (value & 0xFFFF)
+      Ori(rd, rd, value);
+  }
 }
 
-void Mips64Assembler::Div(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  Div(rs, rt);
-  Mflo(rd);
+void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) {
+  int bit31 = (value & UINT64_C(0x80000000)) != 0;
+
+  // Loads with 1 instruction.
+  if (IsUint<16>(value)) {
+    Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    Daddiu(rd, ZERO, value);
+  } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) {
+    Lui(rd, value >> 16);
+  } else if (IsInt<32>(value)) {
+    // Loads with 2 instructions.
+    Lui(rd, value >> 16);
+    Ori(rd, rd, value);
+  } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) {
+    Ori(rd, ZERO, value);
+    Dahi(rd, value >> 32);
+  } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) {
+    Ori(rd, ZERO, value);
+    Dati(rd, value >> 48);
+  } else if ((value & 0xFFFF) == 0 &&
+             (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
+    Lui(rd, value >> 16);
+    Dahi(rd, (value >> 32) + bit31);
+  } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
+    Lui(rd, value >> 16);
+    Dati(rd, (value >> 48) + bit31);
+  } else {
+    int shift_cnt = CTZ(value);
+    int64_t tmp = value >> shift_cnt;
+    if (IsUint<16>(tmp)) {
+      Ori(rd, ZERO, tmp);
+      if (shift_cnt < 32)
+        Dsll(rd, rd, shift_cnt);
+      else
+        Dsll32(rd, rd, shift_cnt & 31);
+    } else if (IsInt<16>(tmp)) {
+      Daddiu(rd, ZERO, tmp);
+      if (shift_cnt < 32)
+        Dsll(rd, rd, shift_cnt);
+      else
+        Dsll32(rd, rd, shift_cnt & 31);
+    } else if (IsInt<32>(tmp)) {
+      // Loads with 3 instructions.
+      Lui(rd, tmp >> 16);
+      Ori(rd, rd, tmp);
+      if (shift_cnt < 32)
+        Dsll(rd, rd, shift_cnt);
+      else
+        Dsll32(rd, rd, shift_cnt & 31);
+    } else {
+      shift_cnt = 16 + CTZ(value >> 16);
+      tmp = value >> shift_cnt;
+      if (IsUint<16>(tmp)) {
+        Ori(rd, ZERO, tmp);
+        if (shift_cnt < 32)
+          Dsll(rd, rd, shift_cnt);
+        else
+          Dsll32(rd, rd, shift_cnt & 31);
+        Ori(rd, rd, value);
+      } else if (IsInt<16>(tmp)) {
+        Daddiu(rd, ZERO, tmp);
+        if (shift_cnt < 32)
+          Dsll(rd, rd, shift_cnt);
+        else
+          Dsll32(rd, rd, shift_cnt & 31);
+        Ori(rd, rd, value);
+      } else {
+        // Loads with 3-4 instructions.
+        uint64_t tmp2 = value;
+        bool used_lui = false;
+        if (((tmp2 >> 16) & 0xFFFF) != 0 || (tmp2 & 0xFFFFFFFF) == 0) {
+          Lui(rd, tmp2 >> 16);
+          used_lui = true;
+        }
+        if ((tmp2 & 0xFFFF) != 0) {
+          if (used_lui)
+            Ori(rd, rd, tmp2);
+          else
+            Ori(rd, ZERO, tmp2);
+        }
+        if (bit31) {
+          tmp2 += UINT64_C(0x100000000);
+        }
+        if (((tmp2 >> 32) & 0xFFFF) != 0) {
+          Dahi(rd, tmp2 >> 32);
+        }
+        if (tmp2 & UINT64_C(0x800000000000)) {
+          tmp2 += UINT64_C(0x1000000000000);
+        }
+        if ((tmp2 >> 48) != 0) {
+          Dati(rd, tmp2 >> 48);
+        }
+      }
+    }
+  }
 }
 
-void Mips64Assembler::Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  Div(rs, rt);
-  Mfhi(rd);
+void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) {
+  if (IsInt<16>(value)) {
+    Addiu(rt, rs, value);
+  } else {
+    LoadConst32(rtmp, value);
+    Addu(rt, rs, rtmp);
+  }
 }
 
-void Mips64Assembler::AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value) {
-  CHECK((value >= -32768) && (value <= 32766));
-  Daddiu(rt, rs, value);
+void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
+  if (IsInt<16>(value)) {
+    Daddiu(rt, rs, value);
+  } else {
+    LoadConst64(rtmp, value);
+    Daddu(rt, rs, rtmp);
+  }
 }
 
-void Mips64Assembler::LoadImmediate64(GpuRegister rt, int32_t value) {
-  CHECK((value >= -32768) && (value <= 32766));
-  Daddiu(rt, ZERO, value);
+//
+// MIPS64R6 branches
+//
+//
+// Unconditional (pc + 32-bit signed offset):
+//
+//   auipc    at, ofs_high
+//   jic      at, ofs_low
+//   // no delay/forbidden slot
+//
+//
+// Conditional (pc + 32-bit signed offset):
+//
+//   b<cond>c   reg, +2      // skip next 2 instructions
+//   auipc      at, ofs_high
+//   jic        at, ofs_low
+//   // no delay/forbidden slot
+//
+//
+// Unconditional (pc + 32-bit signed offset) and link:
+//
+//   auipc    reg, ofs_high
+//   daddiu   reg, ofs_low
+//   jialc    reg, 0
+//   // no delay/forbidden slot
+//
+//
+// TODO: use shorter instruction sequences whenever possible.
+//
+
+void Mips64Assembler::Bind(Label* label) {
+  CHECK(!label->IsBound());
+  int32_t bound_pc = buffer_.Size();
+
+  // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label.
+  // Embed the previously unknown pc-relative addresses in them.
+  while (label->IsLinked()) {
+    int32_t position = label->Position();
+    // Extract the branch (instruction pair)
+    uint32_t auipc = buffer_.Load<uint32_t>(position);
+    uint32_t jic = buffer_.Load<uint32_t>(position + 4);  // actually, jic or daddiu
+
+    // Extract the location of the previous pair in the list (walking the list backwards;
+    // the previous pair location was stored in the immediate operands of the instructions)
+    int32_t prev = (auipc << 16) | (jic & 0xFFFF);
+
+    // Get the pc-relative address
+    uint32_t offset = bound_pc - position;
+    offset += (offset & 0x8000) << 1;  // account for sign extension in jic/daddiu
+
+    // Embed it in the two instructions
+    auipc = (auipc & 0xFFFF0000) | (offset >> 16);
+    jic = (jic & 0xFFFF0000) | (offset & 0xFFFF);
+
+    // Save the adjusted instructions
+    buffer_.Store<uint32_t>(position, auipc);
+    buffer_.Store<uint32_t>(position + 4, jic);
+
+    // On to the previous branch in the list...
+    label->position_ = prev;
+  }
+
+  // Now make the label object contain its own location
+  // (it will be used by the branches referring to and following this label)
+  label->BindTo(bound_pc);
+}
+
+void Mips64Assembler::B(Label* label) {
+  if (label->IsBound()) {
+    // Branch backwards (to a preceding label), distance is known
+    uint32_t offset = label->Position() - buffer_.Size();
+    CHECK_LE(static_cast<int32_t>(offset), 0);
+    offset += (offset & 0x8000) << 1;  // account for sign extension in jic
+    Auipc(AT, offset >> 16);
+    Jic(AT, offset);
+  } else {
+    // Branch forward (to a following label), distance is unknown
+    int32_t position = buffer_.Size();
+    // The first branch forward will have 0 in its pc-relative address (copied from label's
+    // position). It will be the terminator of the list of forward-reaching branches.
+    uint32_t prev = label->position_;
+    Auipc(AT, prev >> 16);
+    Jic(AT, prev);
+    // Now make the link object point to the location of this branch
+    // (this forms a linked list of branches preceding this label)
+    label->LinkTo(position);
+  }
+}
+
+void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) {
+  if (label->IsBound()) {
+    // Branch backwards (to a preceding label), distance is known
+    uint32_t offset = label->Position() - buffer_.Size();
+    CHECK_LE(static_cast<int32_t>(offset), 0);
+    offset += (offset & 0x8000) << 1;  // account for sign extension in daddiu
+    Auipc(indirect_reg, offset >> 16);
+    Daddiu(indirect_reg, indirect_reg, offset);
+    Jialc(indirect_reg, 0);
+  } else {
+    // Branch forward (to a following label), distance is unknown
+    int32_t position = buffer_.Size();
+    // The first branch forward will have 0 in its pc-relative address (copied from label's
+    // position). It will be the terminator of the list of forward-reaching branches.
+    uint32_t prev = label->position_;
+    Auipc(indirect_reg, prev >> 16);
+    Daddiu(indirect_reg, indirect_reg, prev);
+    Jialc(indirect_reg, 0);
+    // Now make the link object point to the location of this branch
+    // (this forms a linked list of branches preceding this label)
+    label->LinkTo(position);
+  }
+}
+
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bgec(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) {
+  Bgezc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) {
+  Blezc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bltc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) {
+  Bltzc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Blezc(GpuRegister rt, Label* label) {
+  Bgtzc(rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bgeuc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bltuc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) {
+  Bnec(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) {
+  Beqc(rs, rt, 2);
+  B(label);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) {
+  Bnezc(rs, 2);
+  B(label);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) {
+  Beqzc(rs, 2);
+  B(label);
 }
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kLoadSignedByte:
       Lb(reg, base, offset);
@@ -489,23 +961,25 @@
       Lwu(reg, base, offset);
       break;
     case kLoadDoubleword:
-      // TODO: alignment issues ???
       Ld(reg, base, offset);
       break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
   }
 }
 
 void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
                                         int32_t offset) {
-  CHECK((offset >= -32768) && (offset <= 32766));
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kLoadWord:
       Lwc1(reg, base, offset);
       break;
     case kLoadDoubleword:
-      // TODO: alignment issues ???
       Ldc1(reg, base, offset);
       break;
     default:
@@ -542,6 +1016,13 @@
 
 void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
                                     int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kStoreByte:
       Sb(reg, base, offset);
@@ -553,7 +1034,6 @@
       Sw(reg, base, offset);
       break;
     case kStoreDoubleword:
-      // TODO: alignment issues ???
       Sd(reg, base, offset);
       break;
     default:
@@ -563,6 +1043,13 @@
 
 void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
                                        int32_t offset) {
+  if (!IsInt<16>(offset)) {
+    LoadConst32(AT, offset);
+    Daddu(AT, AT, base);
+    base = AT;
+    offset = 0;
+  }
+
   switch (type) {
     case kStoreWord:
       Swc1(reg, base, offset);
@@ -613,10 +1100,12 @@
       // only increment stack offset.
       offset += size;
     } else if (reg.IsFpuRegister()) {
-      StoreFpuToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsFpuRegister(), SP, offset);
+      StoreFpuToOffset((size == 4) ? kStoreWord : kStoreDoubleword,
+          reg.AsFpuRegister(), SP, offset);
       offset += size;
     } else if (reg.IsGpuRegister()) {
-      StoreToOffset((size == 4) ? kStoreWord : kStoreDoubleword, reg.AsGpuRegister(), SP, offset);
+      StoreToOffset((size == 4) ? kStoreWord : kStoreDoubleword,
+          reg.AsGpuRegister(), SP, offset);
       offset += size;
     }
   }
@@ -650,14 +1139,14 @@
 }
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant64(SP, SP, -adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Daddiu64(SP, SP, static_cast<int32_t>(-adjust));
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant64(SP, SP, adjust);
+  CHECK_ALIGNED(adjust, kFramePointerSize);
+  Daddiu64(SP, SP, static_cast<int32_t>(adjust));
   cfi_.AdjustCFAOffset(-adjust);
 }
 
@@ -702,7 +1191,7 @@
                                             ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  LoadImmediate64(scratch.AsGpuRegister(), imm);
+  LoadConst32(scratch.AsGpuRegister(), imm);
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
@@ -710,7 +1199,9 @@
                                                ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  LoadImmediate64(scratch.AsGpuRegister(), imm);
+  // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?).
+  // Is this function even referenced anywhere else in the code?
+  LoadConst32(scratch.AsGpuRegister(), imm);
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
 }
 
@@ -719,7 +1210,7 @@
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
-  AddConstant64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
+  Daddiu64(scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
@@ -757,20 +1248,24 @@
   LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
   if (kPoisonHeapReferences && poison_reference) {
-    Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
+    // TODO: review
+    // Negate the 32-bit ref
+    Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
+    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 31);
   }
 }
 
 void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base,
-                               Offset offs) {
+                                 Offset offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
-  CHECK(dest.IsGpuRegister() && dest.IsGpuRegister()) << dest;
+  CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                         ThreadOffset<8> offs) {
+                                             ThreadOffset<8> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
@@ -849,7 +1344,7 @@
 }
 
 void Mips64Assembler::Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset,
-                         ManagedRegister mscratch, size_t size) {
+                           ManagedRegister mscratch, size_t size) {
   GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
@@ -866,7 +1361,7 @@
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
-                         ManagedRegister mscratch, size_t size) {
+                           ManagedRegister mscratch, size_t size) {
   GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
@@ -888,8 +1383,8 @@
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
-                         ManagedRegister src, Offset src_offset,
-                         ManagedRegister mscratch, size_t size) {
+                           ManagedRegister src, Offset src_offset,
+                           ManagedRegister mscratch, size_t size) {
   GpuRegister scratch = mscratch.AsMips64().AsGpuRegister();
   CHECK(size == 4 || size == 8) << size;
   if (size == 4) {
@@ -912,12 +1407,14 @@
 }
 
 void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+  // TODO: sync?
   UNIMPLEMENTED(FATAL) << "no mips64 implementation";
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister min_reg, bool null_allowed) {
+                                             FrameOffset handle_scope_offset,
+                                             ManagedRegister min_reg,
+                                             bool null_allowed) {
   Mips64ManagedRegister out_reg = mout_reg.AsMips64();
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
@@ -933,20 +1430,20 @@
       in_reg = out_reg;
     }
     if (!out_reg.Equals(in_reg)) {
-      LoadImmediate64(out_reg.AsGpuRegister(), 0);
+      LoadConst32(out_reg.AsGpuRegister(), 0);
     }
-    EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true);
-    AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    Beqzc(in_reg.AsGpuRegister(), &null_arg);
+    Daddiu64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Daddiu64(out_reg.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
   }
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
-                                    FrameOffset handle_scope_offset,
-                                    ManagedRegister mscratch,
-                                    bool null_allowed) {
+                                             FrameOffset handle_scope_offset,
+                                             ManagedRegister mscratch,
+                                             bool null_allowed) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
@@ -956,30 +1453,30 @@
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    EmitBranch(scratch.AsGpuRegister(), ZERO, &null_arg, true);
-    AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
-    Bind(&null_arg, false);
+    Beqzc(scratch.AsGpuRegister(), &null_arg);
+    Daddiu64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Bind(&null_arg);
   } else {
-    AddConstant64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
+    Daddiu64(scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value());
   }
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, out_off.Int32Value());
 }
 
 // Given a handle scope entry, load the associated reference.
 void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg,
-                                          ManagedRegister min_reg) {
+                                                   ManagedRegister min_reg) {
   Mips64ManagedRegister out_reg = mout_reg.AsMips64();
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   CHECK(in_reg.IsGpuRegister()) << in_reg;
   Label null_arg;
   if (!out_reg.Equals(in_reg)) {
-    LoadImmediate64(out_reg.AsGpuRegister(), 0);
+    LoadConst32(out_reg.AsGpuRegister(), 0);
   }
-  EmitBranch(in_reg.AsGpuRegister(), ZERO, &null_arg, true);
+  Beqzc(in_reg.AsGpuRegister(), &null_arg);
   LoadFromOffset(kLoadDoubleword, out_reg.AsGpuRegister(),
                  in_reg.AsGpuRegister(), 0);
-  Bind(&null_arg, false);
+  Bind(&null_arg);
 }
 
 void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
@@ -1022,7 +1519,7 @@
 }
 
 void Mips64Assembler::GetCurrentThread(FrameOffset offset,
-                                     ManagedRegister /*mscratch*/) {
+                                       ManagedRegister /*mscratch*/) {
   StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
 }
 
@@ -1032,13 +1529,13 @@
   buffer_.EnqueueSlowPath(slow);
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  S1, Thread::ExceptionOffset<8>().Int32Value());
-  EmitBranch(scratch.AsGpuRegister(), ZERO, slow->Entry(), false);
+  Bnezc(scratch.AsGpuRegister(), slow->Entry());
 }
 
 void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
   Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
 #define __ sp_asm->
-  __ Bind(&entry_, false);
+  __ Bind(&entry_);
   if (stack_adjust_ != 0) {  // Fix up the frame.
     __ DecreaseFrameSize(stack_adjust_);
   }
@@ -1048,6 +1545,7 @@
   // Set up call to Thread::Current()->pDeliverException
   __ LoadFromOffset(kLoadDoubleword, T9, S1,
                     QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
+  // TODO: check T9 usage
   __ Jr(T9);
   // Call never returns
   __ Break();
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 38419ab..88cc4bc 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -56,13 +56,31 @@
   void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Mult(GpuRegister rs, GpuRegister rt);
-  void Multu(GpuRegister rs, GpuRegister rt);
-  void Div(GpuRegister rs, GpuRegister rt);
-  void Divu(GpuRegister rs, GpuRegister rt);
+  void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+
+  void MultR2(GpuRegister rs, GpuRegister rt);  // R2
+  void MultuR2(GpuRegister rs, GpuRegister rt);  // R2
+  void DivR2(GpuRegister rs, GpuRegister rt);  // R2
+  void DivuR2(GpuRegister rs, GpuRegister rt);  // R2
+  void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
+  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
+  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
 
   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -72,40 +90,72 @@
   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
 
-  void Sll(GpuRegister rd, GpuRegister rs, int shamt);
-  void Srl(GpuRegister rd, GpuRegister rs, int shamt);
-  void Sra(GpuRegister rd, GpuRegister rs, int shamt);
-  void Sllv(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Srlv(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Srav(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Seb(GpuRegister rd, GpuRegister rt);  // R2+
+  void Seh(GpuRegister rd, GpuRegister rt);  // R2+
+  void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
+
+  void Sll(GpuRegister rd, GpuRegister rt, int shamt);
+  void Srl(GpuRegister rd, GpuRegister rt, int shamt);
+  void Sra(GpuRegister rd, GpuRegister rt, int shamt);
+  void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs);
+  void Dsll(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsrl(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsra(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsll32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsra32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
+  void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
+  void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
+  void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
 
   void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
-  void Mfhi(GpuRegister rd);
-  void Mflo(GpuRegister rd);
+  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Sync(uint32_t stype);
+  void Mfhi(GpuRegister rd);  // R2
+  void Mflo(GpuRegister rd);  // R2
 
   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
 
   void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
 
-  void Beq(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void Bne(GpuRegister rt, GpuRegister rs, uint16_t imm16);
-  void J(uint32_t address);
-  void Jal(uint32_t address);
-  void Jr(GpuRegister rs);
+  void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void J(uint32_t addr26);
+  void Jal(uint32_t addr26);
+  void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
+  void Jr(GpuRegister rs);
+  void Auipc(GpuRegister rs, uint16_t imm16);  // R6
+  void Jic(GpuRegister rt, uint16_t imm16);  // R6
+  void Jialc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bltzc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bgtzc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bgezc(GpuRegister rt, uint16_t imm16);  // R6
+  void Blezc(GpuRegister rt, uint16_t imm16);  // R6
+  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
+  void Beqzc(GpuRegister rs, uint32_t imm21);  // R6
+  void Bnezc(GpuRegister rs, uint32_t imm21);  // R6
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -117,9 +167,18 @@
   void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MovS(FpuRegister fd, FpuRegister fs);
   void MovD(FpuRegister fd, FpuRegister fs);
+  void NegS(FpuRegister fd, FpuRegister fs);
+  void NegD(FpuRegister fd, FpuRegister fs);
+
+  void Cvtsw(FpuRegister fd, FpuRegister fs);
+  void Cvtdw(FpuRegister fd, FpuRegister fs);
+  void Cvtsd(FpuRegister fd, FpuRegister fs);
+  void Cvtds(FpuRegister fd, FpuRegister fs);
 
   void Mfc1(GpuRegister rt, FpuRegister fs);
-  void Mtc1(FpuRegister ft, GpuRegister rs);
+  void Mtc1(GpuRegister rt, FpuRegister fs);
+  void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
+  void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
   void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
   void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
@@ -127,15 +186,33 @@
 
   void Break();
   void Nop();
-  void Move(GpuRegister rt, GpuRegister rs);
-  void Clear(GpuRegister rt);
-  void Not(GpuRegister rt, GpuRegister rs);
-  void Mul(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Div(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Rem(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Move(GpuRegister rd, GpuRegister rs);
+  void Clear(GpuRegister rd);
+  void Not(GpuRegister rd, GpuRegister rs);
 
-  void AddConstant64(GpuRegister rt, GpuRegister rs, int32_t value);
-  void LoadImmediate64(GpuRegister rt, int32_t value);
+  // Higher level composite instructions
+  void LoadConst32(GpuRegister rd, int32_t value);
+  void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
+
+  void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
+  void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
+
+  void Bind(Label* label);  // R6
+  void B(Label* label);  // R6
+  void Jalr(Label* label, GpuRegister indirect_reg = RA);  // R6
+  // TODO: implement common for R6 and non-R6 interface for conditional branches?
+  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bltzc(GpuRegister rt, Label* label);  // R6
+  void Bgtzc(GpuRegister rt, Label* label);  // R6
+  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bgezc(GpuRegister rt, Label* label);  // R6
+  void Blezc(GpuRegister rt, Label* label);  // R6
+  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
+  void Beqzc(GpuRegister rs, Label* label);  // R6
+  void Bnezc(GpuRegister rs, Label* label);  // R6
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -144,10 +221,7 @@
   void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
 
   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
-  void Emit(int32_t value);
-  void EmitBranch(GpuRegister rt, GpuRegister rs, Label* label, bool equal);
-  void EmitJump(Label* label, bool link);
-  void Bind(Label* label, bool is_jump);
+  void Emit(uint32_t value);
 
   //
   // Overridden common assembler high-level functionality
@@ -269,13 +343,11 @@
  private:
   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
-  void EmitJ(int opcode, int address);
+  void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
+  void EmitJ(int opcode, uint32_t addr26);
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
 
-  int32_t EncodeBranchOffset(int offset, int32_t inst, bool is_jump);
-  int DecodeBranchOffset(int32_t inst, bool is_jump);
-
   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
 };
 
diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h
index 8b7697c..f57498d 100644
--- a/compiler/utils/mips64/constants_mips64.h
+++ b/compiler/utils/mips64/constants_mips64.h
@@ -67,7 +67,7 @@
   static const uint32_t kBreakPointInstruction = 0x0000000D;
 
   bool IsBreakPoint() {
-    return ((*reinterpret_cast<const uint32_t*>(this)) & 0xFC0000CF) == kBreakPointInstruction;
+    return ((*reinterpret_cast<const uint32_t*>(this)) & 0xFC00003F) == kBreakPointInstruction;
   }
 
   // Instructions are read out of a code stream. The only way to get a
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index ac81737..70ca88d 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -106,6 +106,9 @@
   { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", },
   { kRTypeMask, 62, "dsrl32", "DTA", },
   { kRTypeMask, 63, "dsra32", "DTA", },
+  { kRTypeMask, (31u << kOpcodeShift) | 3, "dext", "TSAZ", },
+  { kRTypeMask | (0x1f << 21) | (0x1f << 6), (31u << 26) | (16 << 6) | 32, "seb", "DT", },
+  { kRTypeMask | (0x1f << 21) | (0x1f << 6), (31u << 26) | (24 << 6) | 32, "seh", "DT", },
 
   // SPECIAL0
   { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
@@ -150,13 +153,17 @@
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (16 << 16), "bltzal", "SB" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (18 << 16), "bltzall", "SB" },
   { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" },
+  { kITypeMask, 6 << kOpcodeShift, "bgeuc", "STB" },
   { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" },
+  { kITypeMask, 7 << kOpcodeShift, "bltuc", "STB" },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", },
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", },
 
   { 0xffff0000, (4 << kOpcodeShift), "b", "B" },
   { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" },
 
+  { kITypeMask, 8 << kOpcodeShift, "beqc", "STB" },
+
   { kITypeMask, 8 << kOpcodeShift, "addi", "TSi", },
   { kITypeMask, 9 << kOpcodeShift, "addiu", "TSi", },
   { kITypeMask, 10 << kOpcodeShift, "slti", "TSi", },
@@ -166,6 +173,83 @@
   { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", },
   { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
   { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
+
+  { kITypeMask | (0x1f << 21), 22 << kOpcodeShift, "blezc", "TB" },
+
+  // TODO: de-dup
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (1  << 21) | (1  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (2  << 21) | (2  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (3  << 21) | (3  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (4  << 21) | (4  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (5  << 21) | (5  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (6  << 21) | (6  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (7  << 21) | (7  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (8  << 21) | (8  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (9  << 21) | (9  << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (10 << 21) | (10 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (11 << 21) | (11 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (12 << 21) | (12 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (13 << 21) | (13 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (14 << 21) | (14 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (15 << 21) | (15 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (16 << 21) | (16 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (17 << 21) | (17 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (18 << 21) | (18 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (19 << 21) | (19 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (20 << 21) | (20 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (21 << 21) | (21 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (22 << 21) | (22 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (23 << 21) | (23 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (24 << 21) | (24 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (25 << 21) | (25 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (26 << 21) | (26 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (27 << 21) | (27 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (28 << 21) | (28 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (29 << 21) | (29 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (30 << 21) | (30 << 16), "bgezc", "TB" },
+  { kITypeMask | (0x3ff << 16), (22 << kOpcodeShift) | (31 << 21) | (31 << 16), "bgezc", "TB" },
+
+  { kITypeMask, 22 << kOpcodeShift, "bgec", "STB" },
+
+  { kITypeMask | (0x1f << 21), 23 << kOpcodeShift, "bgtzc", "TB" },
+
+  // TODO: de-dup
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (1  << 21) | (1  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (2  << 21) | (2  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (3  << 21) | (3  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (4  << 21) | (4  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (5  << 21) | (5  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (6  << 21) | (6  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (7  << 21) | (7  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (8  << 21) | (8  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (9  << 21) | (9  << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (10 << 21) | (10 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (11 << 21) | (11 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (12 << 21) | (12 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (13 << 21) | (13 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (14 << 21) | (14 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (15 << 21) | (15 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (16 << 21) | (16 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (17 << 21) | (17 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (18 << 21) | (18 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (19 << 21) | (19 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (20 << 21) | (20 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (21 << 21) | (21 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (22 << 21) | (22 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (23 << 21) | (23 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (24 << 21) | (24 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (25 << 21) | (25 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (26 << 21) | (26 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (27 << 21) | (27 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (28 << 21) | (28 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (29 << 21) | (29 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (30 << 21) | (30 << 16), "bltzc", "TB" },
+  { kITypeMask | (0x3ff << 16), (23 << kOpcodeShift) | (31 << 21) | (31 << 16), "bltzc", "TB" },
+
+  { kITypeMask, 23 << kOpcodeShift, "bltc", "STB" },
+
+  { kITypeMask, 24 << kOpcodeShift, "bnec", "STB" },
+
   { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", },
   { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", },
 
@@ -180,9 +264,22 @@
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
+  { kITypeMask | (0x1f << 21), 54u << kOpcodeShift, "jic", "Ti" },
+  { kITypeMask | (1 << 21), (54u << kOpcodeShift) | (1 << 21), "beqzc", "Sb" },  // TODO: de-dup?
+  { kITypeMask | (1 << 22), (54u << kOpcodeShift) | (1 << 22), "beqzc", "Sb" },
+  { kITypeMask | (1 << 23), (54u << kOpcodeShift) | (1 << 23), "beqzc", "Sb" },
+  { kITypeMask | (1 << 24), (54u << kOpcodeShift) | (1 << 24), "beqzc", "Sb" },
+  { kITypeMask | (1 << 25), (54u << kOpcodeShift) | (1 << 25), "beqzc", "Sb" },
   { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
+  { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
+  { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" },
+  { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" },  // TODO: de-dup?
+  { kITypeMask | (1 << 22), (62u << kOpcodeShift) | (1 << 22), "bnezc", "Sb" },
+  { kITypeMask | (1 << 23), (62u << kOpcodeShift) | (1 << 23), "bnezc", "Sb" },
+  { kITypeMask | (1 << 24), (62u << kOpcodeShift) | (1 << 24), "bnezc", "Sb" },
+  { kITypeMask | (1 << 25), (62u << kOpcodeShift) | (1 << 25), "bnezc", "Sb" },
   { kITypeMask, 63u << kOpcodeShift, "sd", "TO", },
 
   // Floating point.
@@ -241,7 +338,7 @@
       opcode = gMipsInstructions[i].name;
       for (const char* args_fmt = gMipsInstructions[i].args_fmt; *args_fmt; ++args_fmt) {
         switch (*args_fmt) {
-          case 'A':  // sa (shift amount).
+          case 'A':  // sa (shift amount or [d]ext position).
             args << sa;
             break;
           case 'B':  // Branch offset.
@@ -253,6 +350,15 @@
                    << StringPrintf("  ; %+d", offset);
             }
             break;
+          case 'b':  // 21-bit branch offset.
+            {
+              int32_t offset = (instruction & 0x1fffff) - ((instruction & 0x100000) << 1);
+              offset <<= 2;
+              offset += 4;  // Delay slot.
+              args << FormatInstructionPointer(instr_ptr + offset)
+                   << StringPrintf("  ; %+d", offset);
+            }
+            break;
           case 'D': args << 'r' << rd; break;
           case 'd': args << 'f' << rd; break;
           case 'a': args << 'f' << sa; break;
@@ -302,6 +408,7 @@
           case 's': args << 'f' << rs; break;
           case 'T': args << 'r' << rt; break;
           case 't': args << 'f' << rt; break;
+          case 'Z': args << rd; break;   // sz ([d]ext size).
         }
         if (*(args_fmt + 1)) {
           args << ", ";
@@ -311,9 +418,36 @@
     }
   }
 
+  // Special cases for sequences of:
+  //   pc-relative +/- 2GB branch:
+  //     auipc  reg, imm
+  //     jic    reg, imm
+  //   pc-relative +/- 2GB branch and link:
+  //     auipc  reg, imm
+  //     daddiu reg, reg, imm
+  //     jialc  reg, 0
+  if (((op == 0x36 && rs == 0 && rt != 0) ||  // jic
+       (op == 0x19 && rs == rt && rt != 0)) &&  // daddiu
+      last_ptr_ && (intptr_t)instr_ptr - (intptr_t)last_ptr_ == 4 &&
+      (last_instr_ & 0xFC1F0000) == 0xEC1E0000 &&  // auipc
+      ((last_instr_ >> 21) & 0x1F) == rt) {
+    uint32_t offset = (last_instr_ << 16) | (instruction & 0xFFFF);
+    offset -= (offset & 0x8000) << 1;
+    offset -= 4;
+    if (op == 0x36) {
+      args << "  ; b ";
+    } else {
+      args << "  ; move r" << rt << ", ";
+    }
+    args << FormatInstructionPointer(instr_ptr + (int32_t)offset);
+    args << StringPrintf("  ; %+d", (int32_t)offset);
+  }
+
   os << FormatInstructionPointer(instr_ptr)
      << StringPrintf(": %08x\t%-7s ", instruction, opcode.c_str())
      << args.str() << '\n';
+  last_ptr_ = instr_ptr;
+  last_instr_ = instruction;
   return 4;
 }
 
diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h
index 67c3fcb..4f70a9b 100644
--- a/disassembler/disassembler_mips.h
+++ b/disassembler/disassembler_mips.h
@@ -27,7 +27,9 @@
 class DisassemblerMips FINAL : public Disassembler {
  public:
   explicit DisassemblerMips(DisassemblerOptions* options, bool is64bit) : Disassembler(options),
-      is64bit_(is64bit) {}
+      is64bit_(is64bit),
+      last_ptr_(nullptr),
+      last_instr_(0) {}
 
   size_t Dump(std::ostream& os, const uint8_t* begin) OVERRIDE;
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
@@ -35,6 +37,11 @@
  private:
   const bool is64bit_;
 
+  // Address and encoding of the last disassembled instruction.
+  // Needed to produce more readable disassembly of certain 2-instruction sequences.
+  const uint8_t* last_ptr_;
+  uint32_t last_instr_;
+
   DISALLOW_COPY_AND_ASSIGN(DisassemblerMips);
 };
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 44b67ca..bee379e 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1400,8 +1400,9 @@
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
-    addl LITERAL(76), %esp        // pop arguments
-    CFI_ADJUST_CFA_OFFSET(-76)
+    addl LITERAL(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE))
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 66dfe5a..5c741a5 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1334,9 +1334,8 @@
     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
     movq %rsp, %rcx                         // Pass SP.
     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
+    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     movq %rax, %xmm0                        // Copy return value in case of float returns.
-    addq LITERAL(168 + 4*8), %rsp           // Pop arguments.
-    CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_proxy_invoke_handler
 
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index dd65f2c..7344d13 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -223,6 +223,7 @@
     case k22t: return true;
     case k22x: return true;
     case k23x: return true;
+    case k25x: return true;
     case k31c: return true;
     case k31i: return true;
     case k31t: return true;
@@ -252,6 +253,7 @@
     case k22t: return VRegB_22t();
     case k22x: return VRegB_22x();
     case k23x: return VRegB_23x();
+    case k25x: return VRegB_25x();
     case k31c: return VRegB_31c();
     case k31i: return VRegB_31i();
     case k31t: return VRegB_31t();
@@ -329,6 +331,12 @@
   return static_cast<uint8_t>(Fetch16(1) & 0xff);
 }
 
+// Number of additional registers in this instruction. # of var arg registers = this value + 1.
+inline uint4_t Instruction::VRegB_25x() const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+  return InstB(Fetch16(0));
+}
+
 inline uint32_t Instruction::VRegB_31c() const {
   DCHECK_EQ(FormatOf(Opcode()), k31c);
   return Fetch32(1);
@@ -375,6 +383,7 @@
     case k22s: return true;
     case k22t: return true;
     case k23x: return true;
+    case k25x: return true;
     case k35c: return true;
     case k3rc: return true;
     default: return false;
@@ -388,6 +397,7 @@
     case k22s: return VRegC_22s();
     case k22t: return VRegC_22t();
     case k23x: return VRegC_23x();
+    case k25x: return VRegC_25x();
     case k35c: return VRegC_35c();
     case k3rc: return VRegC_3rc();
     default:
@@ -421,6 +431,11 @@
   return static_cast<uint8_t>(Fetch16(1) >> 8);
 }
 
+inline uint4_t Instruction::VRegC_25x() const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+  return static_cast<uint4_t>(Fetch16(1) & 0xf);
+}
+
 inline uint4_t Instruction::VRegC_35c() const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
   return static_cast<uint4_t>(Fetch16(2) & 0x0f);
@@ -431,11 +446,78 @@
   return Fetch16(2);
 }
 
-inline bool Instruction::HasVarArgs() const {
+inline bool Instruction::HasVarArgs35c() const {
   return FormatOf(Opcode()) == k35c;
 }
 
-inline void Instruction::GetVarArgs(uint32_t arg[5], uint16_t inst_data) const {
+inline bool Instruction::HasVarArgs25x() const {
+  return FormatOf(Opcode()) == k25x;
+}
+
+// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1.
+inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const {
+  DCHECK_EQ(FormatOf(Opcode()), k25x);
+
+  /*
+   * The opcode looks like this:
+   *   op vC, {vD, vE, vF, vG}
+   *
+   *  and vB is the (implicit) register count (0-4) which denotes how far from vD to vG to read.
+   *
+   *  vC is always present, so with "op vC, {}" the register count will be 0 even though vC
+   *  is valid.
+   *
+   *  The exact semantic meanings of vC:vG is up to the instruction using the format.
+   *
+   *  Encoding drawing as a bit stream:
+   *  (Note that each uint16 is little endian, and each register takes up 4 bits)
+   *
+   *       uint16  |||   uint16
+   *   7-0     15-8    7-0   15-8
+   *  |------|-----|||-----|-----|
+   *  |opcode|vB|vG|||vD|vC|vF|vE|
+   *  |------|-----|||-----|-----|
+   */
+  uint16_t reg_list = Fetch16(1);
+  uint4_t count = VRegB_25x();
+  DCHECK_LE(count, 4U) << "Invalid arg count in 25x (" << count << ")";
+
+  /*
+   * TODO(iam): Change instruction encoding to one of:
+   *
+   * - (X) vA = args count, vB = closure register, {vC..vG} = args (25x)
+   * - (Y) vA = args count, vB = method index, {vC..vG} = args (35x)
+   *
+   * (do this in conjunction with adding verifier support for invoke-lambda)
+   */
+
+  /*
+   * Copy the argument registers into the arg[] array, and
+   * also copy the first argument into vC. (The
+   * DecodedInstruction structure doesn't have separate
+   * fields for {vD, vE, vF, vG}, so there's no need to make
+   * copies of those.) Note that all cases fall-through.
+   */
+  switch (count) {
+    case 4:
+      arg[4] = (Fetch16(0) >> 8) & 0x0f;  // vG
+      FALLTHROUGH_INTENDED;
+    case 3:
+      arg[3] = (reg_list >> 12) & 0x0f;  // vF
+      FALLTHROUGH_INTENDED;
+    case 2:
+      arg[2] = (reg_list >> 8) & 0x0f;  // vE
+      FALLTHROUGH_INTENDED;
+    case 1:
+      arg[1] = (reg_list >> 4) & 0x0f;  // vD
+      FALLTHROUGH_INTENDED;
+    default:  // case 0
+      arg[0] = VRegC_25x();  // vC
+      break;
+  }
+}
+
+inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
   DCHECK_EQ(FormatOf(Opcode()), k35c);
 
   /*
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 69fe874..537fa15 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -63,7 +63,7 @@
 #define INSTRUCTION_SIZE(opcode, c, p, format, r, i, a, v) \
     ((opcode == NOP)                        ? -1 : \
      ((format >= k10x) && (format <= k10t)) ?  1 : \
-     ((format >= k20t) && (format <= k22c)) ?  2 : \
+     ((format >= k20t) && (format <= k25x)) ?  2 : \
      ((format >= k32x) && (format <= k3rc)) ?  3 : \
       (format == k51l)                      ?  5 : -1),
 #include "dex_instruction_list.h"
@@ -224,6 +224,14 @@
             break;
           }
           FALLTHROUGH_INTENDED;
+        case CREATE_LAMBDA:
+          if (file != nullptr) {
+            uint32_t method_idx = VRegB_21c();
+            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyMethod(method_idx, *file, true)
+               << " // method@" << method_idx;
+            break;
+          }
+          FALLTHROUGH_INTENDED;
         default:
           os << StringPrintf("%s v%d, thing@%d", opcode, VRegA_21c(), VRegB_21c());
           break;
@@ -304,6 +312,26 @@
       }
       break;
     }
+    case k25x: {
+      if (Opcode() == INVOKE_LAMBDA) {
+        uint32_t arg[kMaxVarArgRegs];
+        GetAllArgs25x(arg);
+        const size_t num_extra_var_args = VRegB_25x();
+        DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs);
+
+        // invoke-lambda vC, {vD, vE, vF, vG}
+        os << opcode << " v" << arg[0] << ", {";
+        for (size_t i = 0; i < num_extra_var_args; ++i) {
+          if (i != 0) {
+            os << ", ";
+          }
+          os << "v" << arg[i+1];
+        }
+        os << "}";
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    }
     case k32x:  os << StringPrintf("%s v%d, v%d", opcode, VRegA_32x(), VRegB_32x()); break;
     case k30t:  os << StringPrintf("%s %+d", opcode, VRegA_30t()); break;
     case k31t:  os << StringPrintf("%s v%d, %+d", opcode, VRegA_31t(), VRegB_31t()); break;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index c64c21e..b043aba 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -105,6 +105,7 @@
     k22t,  // op vA, vB, +CCCC
     k22s,  // op vA, vB, #+CCCC
     k22c,  // op vA, vB, thing@CCCC
+    k25x,  // op vC, {vD, vE, vF, vG} (B: count)
     k32x,  // op vAAAA, vBBBB
     k30t,  // op +AAAAAAAA
     k31t,  // op vAA, +BBBBBBBB
@@ -116,30 +117,31 @@
   };
 
   enum Flags {
-    kBranch              = 0x000001,  // conditional or unconditional branch
-    kContinue            = 0x000002,  // flow can continue to next statement
-    kSwitch              = 0x000004,  // switch statement
-    kThrow               = 0x000008,  // could cause an exception to be thrown
-    kReturn              = 0x000010,  // returns, no additional statements
-    kInvoke              = 0x000020,  // a flavor of invoke
-    kUnconditional       = 0x000040,  // unconditional branch
-    kAdd                 = 0x000080,  // addition
-    kSubtract            = 0x000100,  // subtract
-    kMultiply            = 0x000200,  // multiply
-    kDivide              = 0x000400,  // division
-    kRemainder           = 0x000800,  // remainder
-    kAnd                 = 0x001000,  // and
-    kOr                  = 0x002000,  // or
-    kXor                 = 0x004000,  // xor
-    kShl                 = 0x008000,  // shl
-    kShr                 = 0x010000,  // shr
-    kUshr                = 0x020000,  // ushr
-    kCast                = 0x040000,  // cast
-    kStore               = 0x080000,  // store opcode
-    kLoad                = 0x100000,  // load opcode
-    kClobber             = 0x200000,  // clobbers memory in a big way (not just a write)
-    kRegCFieldOrConstant = 0x400000,  // is the third virtual register a field or literal constant (vC)
-    kRegBFieldOrConstant = 0x800000,  // is the second virtual register a field or literal constant (vB)
+    kBranch              = 0x0000001,  // conditional or unconditional branch
+    kContinue            = 0x0000002,  // flow can continue to next statement
+    kSwitch              = 0x0000004,  // switch statement
+    kThrow               = 0x0000008,  // could cause an exception to be thrown
+    kReturn              = 0x0000010,  // returns, no additional statements
+    kInvoke              = 0x0000020,  // a flavor of invoke
+    kUnconditional       = 0x0000040,  // unconditional branch
+    kAdd                 = 0x0000080,  // addition
+    kSubtract            = 0x0000100,  // subtract
+    kMultiply            = 0x0000200,  // multiply
+    kDivide              = 0x0000400,  // division
+    kRemainder           = 0x0000800,  // remainder
+    kAnd                 = 0x0001000,  // and
+    kOr                  = 0x0002000,  // or
+    kXor                 = 0x0004000,  // xor
+    kShl                 = 0x0008000,  // shl
+    kShr                 = 0x0010000,  // shr
+    kUshr                = 0x0020000,  // ushr
+    kCast                = 0x0040000,  // cast
+    kStore               = 0x0080000,  // store opcode
+    kLoad                = 0x0100000,  // load opcode
+    kClobber             = 0x0200000,  // clobbers memory in a big way (not just a write)
+    kRegCFieldOrConstant = 0x0400000,  // is the third virtual register a field or literal constant (vC)
+    kRegBFieldOrConstant = 0x0800000,  // is the second virtual register a field or literal constant (vB)
+    kExperimental        = 0x1000000,  // is an experimental opcode
   };
 
   enum VerifyFlag {
@@ -205,7 +207,7 @@
 
   // Returns a pointer to the instruction after this 2xx instruction in the stream.
   const Instruction* Next_2xx() const {
-    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k22c);
+    DCHECK(FormatOf(Opcode()) >= k20t && FormatOf(Opcode()) <= k25x);
     return RelativeAt(2);
   }
 
@@ -355,6 +357,7 @@
   }
   uint16_t VRegB_22x() const;
   uint8_t VRegB_23x() const;
+  uint4_t VRegB_25x() const;
   uint32_t VRegB_31c() const;
   int32_t VRegB_31i() const;
   int32_t VRegB_31t() const;
@@ -381,15 +384,20 @@
   int16_t VRegC_22s() const;
   int16_t VRegC_22t() const;
   uint8_t VRegC_23x() const;
+  uint4_t VRegC_25x() const;
   uint4_t VRegC_35c() const;
   uint16_t VRegC_3rc() const;
 
   // Fills the given array with the 'arg' array of the instruction.
-  bool HasVarArgs() const;
+  bool HasVarArgs35c() const;
+  bool HasVarArgs25x() const;
+
+  // TODO(iam): Make this name more consistent with GetAllArgs25x by including the opcode format.
   void GetVarArgs(uint32_t args[kMaxVarArgRegs], uint16_t inst_data) const;
   void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const {
     return GetVarArgs(args, Fetch16(0));
   }
+  void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const;
 
   // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first
   // 16 bits of instruction.
@@ -489,6 +497,11 @@
     return (kInstructionFlags[Opcode()] & kInvoke) != 0;
   }
 
+  // Determine if this instruction is experimental.
+  bool IsExperimental() const {
+    return (kInstructionFlags[Opcode()] & kExperimental) != 0;
+  }
+
   int GetVerifyTypeArgumentA() const {
     return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegA | kVerifyRegAWide));
   }
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index f8f85f9..41c2417 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -261,10 +261,11 @@
   V(0xF0, IGET_BYTE_QUICK, "iget-byte-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF1, IGET_CHAR_QUICK, "iget-char-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
   V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kFieldRef, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \
-  V(0xF3, UNUSED_F3, "unused-f3", k10x, false, kUnknown, 0, kVerifyError) \
+  V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \
   V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kUnknown, 0, kVerifyError) \
-  V(0xF6, UNUSED_F6, "unused-f6", k10x, false, kUnknown, 0, kVerifyError) \
+  /* TODO(iam): get rid of the unused 'false' column */ \
+  V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \
   V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF8, UNUSED_F8, "unused-f8", k10x, false, kUnknown, 0, kVerifyError) \
   V(0xF9, UNUSED_F9, "unused-f9", k10x, false, kUnknown, 0, kVerifyError) \
@@ -292,6 +293,7 @@
   V(k22t) \
   V(k22s) \
   V(k22c) \
+  V(k25x) \
   V(k32x) \
   V(k30t) \
   V(k31t) \
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 0f6f788..0bbc014 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -480,12 +480,28 @@
   Runtime::Current()->AbortTransactionAndThrowAbortError(self, abort_msg);
 }
 
+// Separate declaration is required solely for the attributes.
+template<bool is_range, bool do_assignability_check> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+static inline bool DoCallCommon(ArtMethod* called_method,
+                                Thread* self,
+                                ShadowFrame& shadow_frame,
+                                JValue* result,
+                                uint16_t number_of_inputs,
+                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t vregC) ALWAYS_INLINE;
+
 template<bool is_range, bool do_assignability_check>
-bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
-            const Instruction* inst, uint16_t inst_data, JValue* result) {
+static inline bool DoCallCommon(ArtMethod* called_method,
+                                Thread* self,
+                                ShadowFrame& shadow_frame,
+                                JValue* result,
+                                uint16_t number_of_inputs,
+                                uint32_t arg[Instruction::kMaxVarArgRegs],
+                                uint32_t vregC) {
   bool string_init = false;
   // Replace calls to String.<init> with equivalent StringFactory call.
-  if (called_method->GetDeclaringClass()->IsStringClass() && called_method->IsConstructor()) {
+  if (UNLIKELY(called_method->GetDeclaringClass()->IsStringClass()
+               && called_method->IsConstructor())) {
     ScopedObjectAccessUnchecked soa(self);
     jmethodID mid = soa.EncodeMethod(called_method);
     called_method = soa.DecodeMethod(WellKnownClasses::StringInitToStringFactoryMethodID(mid));
@@ -494,28 +510,56 @@
 
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
-  const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+
+  // Number of registers for the callee's call frame.
   uint16_t num_regs;
   if (LIKELY(code_item != nullptr)) {
     num_regs = code_item->registers_size_;
-    DCHECK_EQ(string_init ? num_ins - 1 : num_ins, code_item->ins_size_);
+    DCHECK_EQ(string_init ? number_of_inputs - 1 : number_of_inputs, code_item->ins_size_);
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
-    num_regs = num_ins;
-    if (string_init) {
-      // The new StringFactory call is static and has one fewer argument.
-      num_regs--;
-    }
+    num_regs = number_of_inputs;
   }
 
+  // Hack for String init:
+  //
+  // Rewrite invoke-x java.lang.String.<init>(this, a, b, c, ...) into:
+  //         invoke-x StringFactory(a, b, c, ...)
+  // by effectively dropping the first virtual register from the invoke.
+  //
+  // (at this point the ArtMethod has already been replaced,
+  // so we just need to fix-up the arguments)
+  uint32_t string_init_vreg_this = is_range ? vregC : arg[0];
+  if (UNLIKELY(code_item == nullptr && string_init)) {
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
+
+    DCHECK_GT(num_regs, 0u);  // As the method is an instance method, there should be at least 1.
+    // The new StringFactory call is static and has one fewer argument.
+    num_regs--;
+    number_of_inputs--;
+
+    // Rewrite the var-args, dropping the 0th argument ("this")
+    for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) {
+      arg[i - 1] = arg[i];
+    }
+    arg[Instruction::kMaxVarArgRegs - 1] = 0;
+
+    // Rewrite the non-var-arg case
+    vregC++;  // Skips the 0th vreg in the range ("this").
+  }
+
+  // Parameter registers go at the end of the shadow frame.
+  DCHECK_GE(num_regs, number_of_inputs);
+  size_t first_dest_reg = num_regs - number_of_inputs;
+  DCHECK_NE(first_dest_reg, (size_t)-1);
+
   // Allocate shadow frame on the stack.
-  const char* old_cause = self->StartAssertNoThreadSuspension("DoCall");
+  const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
   void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
   ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0,
                                                     memory));
 
-  // Initialize new shadow frame.
-  size_t first_dest_reg = num_regs - num_ins;
+  // Initialize new shadow frame by copying the registers from the callee shadow frame.
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -530,33 +574,23 @@
     uint32_t shorty_len = 0;
     const char* shorty = new_shadow_frame->GetMethod()->GetShorty(&shorty_len);
 
-    // TODO: find a cleaner way to separate non-range and range information without duplicating
-    //       code.
-    uint32_t arg[5];  // only used in invoke-XXX.
-    uint32_t vregC;   // only used in invoke-XXX-range.
-    if (is_range) {
-      vregC = inst->VRegC_3rc();
-    } else {
-      inst->GetVarArgs(arg, inst_data);
-    }
-
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
+
     if (!new_shadow_frame->GetMethod()->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
       ++arg_offset;
-    } else if (string_init) {
-      // Skip the referrer for the new static StringFactory call.
-      ++dest_reg;
-      ++arg_offset;
     }
+
+    // Copy the caller's invoke-* arguments into the callee's parameter registers.
     for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, shorty_len);
       const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
       switch (shorty[shorty_pos + 1]) {
+        // Handle Object references. 1 virtual register slot.
         case 'L': {
           Object* o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
@@ -581,50 +615,40 @@
           new_shadow_frame->SetVRegReference(dest_reg, o);
           break;
         }
+        // Handle doubles and longs. 2 consecutive virtual register slots.
         case 'J': case 'D': {
-          uint64_t wide_value = (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << 32) |
-                                static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
+          uint64_t wide_value =
+              (static_cast<uint64_t>(shadow_frame.GetVReg(src_reg + 1)) << BitSizeOf<uint32_t>()) |
+               static_cast<uint32_t>(shadow_frame.GetVReg(src_reg));
           new_shadow_frame->SetVRegLong(dest_reg, wide_value);
+          // Skip the next virtual register slot since we already used it.
           ++dest_reg;
           ++arg_offset;
           break;
         }
+        // Handle all other primitives that are always 1 virtual register slot.
         default:
           new_shadow_frame->SetVReg(dest_reg, shadow_frame.GetVReg(src_reg));
           break;
       }
     }
   } else {
+    size_t arg_index = 0;
+
     // Fast path: no extra checks.
     if (is_range) {
-      uint16_t first_src_reg = inst->VRegC_3rc();
-      if (string_init) {
-        // Skip the referrer for the new static StringFactory call.
-        ++first_src_reg;
-        ++first_dest_reg;
-      }
+      // TODO: Implement the range version of invoke-lambda
+      uint16_t first_src_reg = vregC;
+
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
         AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
-      DCHECK_LE(num_ins, 5U);
-      uint16_t regList = inst->Fetch16(2);
-      uint16_t count = num_ins;
-      size_t arg_index = 0;
-      if (count == 5) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U,
-                       (inst_data >> 8) & 0x0f);
-        --count;
-      }
-      if (string_init) {
-        // Skip the referrer for the new static StringFactory call.
-        regList >>= 4;
-        ++first_dest_reg;
-        --count;
-      }
-      for (; arg_index < count; ++arg_index, regList >>= 4) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
+      DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs);
+
+      for (; arg_index < number_of_inputs; ++arg_index) {
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]);
       }
     }
     self->EndAssertNoThreadSuspension(old_cause);
@@ -660,8 +684,7 @@
 
   if (string_init && !self->IsExceptionPending()) {
     // Set the new string result of the StringFactory.
-    uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-    shadow_frame.SetVRegReference(vregC, result->GetL());
+    shadow_frame.SetVRegReference(string_init_vreg_this, result->GetL());
     // Overwrite all potential copies of the original result of the new-instance of string with the
     // new result of the StringFactory. Use the verifier to find this set of registers.
     ArtMethod* method = shadow_frame.GetMethod();
@@ -692,6 +715,56 @@
   return !self->IsExceptionPending();
 }
 
+template<bool is_range, bool do_assignability_check>
+bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+                  const Instruction* inst, uint16_t inst_data, JValue* result) {
+  const uint4_t num_additional_registers = inst->VRegB_25x();
+  // Argument word count.
+  const uint16_t number_of_inputs = num_additional_registers + 1;
+  // The first input register is always present and is not encoded in the count.
+
+  // TODO: find a cleaner way to separate non-range and range information without duplicating
+  //       code.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t vregC = 0;   // only used in invoke-XXX-range.
+  if (is_range) {
+    vregC = inst->VRegC_3rc();
+  } else {
+    // TODO(iam): See if it's possible to remove inst_data dependency from 35x to avoid this path
+    UNUSED(inst_data);
+    inst->GetAllArgs25x(arg);
+  }
+
+  // TODO: if there's an assignability check, throw instead?
+  DCHECK(called_method->IsStatic());
+
+  return DoCallCommon<is_range, do_assignability_check>(
+      called_method, self, shadow_frame,
+      result, number_of_inputs, arg, vregC);
+}
+
+template<bool is_range, bool do_assignability_check>
+bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+            const Instruction* inst, uint16_t inst_data, JValue* result) {
+  // Argument word count.
+  const uint16_t number_of_inputs = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
+
+  // TODO: find a cleaner way to separate non-range and range information without duplicating
+  //       code.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in invoke-XXX.
+  uint32_t vregC = 0;
+  if (is_range) {
+    vregC = inst->VRegC_3rc();
+  } else {
+    vregC = inst->VRegC_35c();
+    inst->GetVarArgs(arg, inst_data);
+  }
+
+  return DoCallCommon<is_range, do_assignability_check>(
+      called_method, self, shadow_frame,
+      result, number_of_inputs, arg, vregC);
+}
+
 template <bool is_range, bool do_access_check, bool transaction_active>
 bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
                       Thread* self, JValue* result) {
@@ -707,50 +780,52 @@
     return false;
   }
   uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  Class* arrayClass = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
-                                             self, false, do_access_check);
-  if (UNLIKELY(arrayClass == nullptr)) {
+  Class* array_class = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
+                                              self, false, do_access_check);
+  if (UNLIKELY(array_class == nullptr)) {
     DCHECK(self->IsExceptionPending());
     return false;
   }
-  CHECK(arrayClass->IsArrayClass());
-  Class* componentClass = arrayClass->GetComponentType();
-  if (UNLIKELY(componentClass->IsPrimitive() && !componentClass->IsPrimitiveInt())) {
-    if (componentClass->IsPrimitiveLong() || componentClass->IsPrimitiveDouble()) {
+  CHECK(array_class->IsArrayClass());
+  Class* component_class = array_class->GetComponentType();
+  const bool is_primitive_int_component = component_class->IsPrimitiveInt();
+  if (UNLIKELY(component_class->IsPrimitive() && !is_primitive_int_component)) {
+    if (component_class->IsPrimitiveLong() || component_class->IsPrimitiveDouble()) {
       ThrowRuntimeException("Bad filled array request for type %s",
-                            PrettyDescriptor(componentClass).c_str());
+                            PrettyDescriptor(component_class).c_str());
     } else {
       self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                "Found type %s; filled-new-array not implemented for anything but 'int'",
-                               PrettyDescriptor(componentClass).c_str());
+                               PrettyDescriptor(component_class).c_str());
     }
     return false;
   }
-  Object* newArray = Array::Alloc<true>(self, arrayClass, length,
-                                        arrayClass->GetComponentSizeShift(),
-                                        Runtime::Current()->GetHeap()->GetCurrentAllocator());
-  if (UNLIKELY(newArray == nullptr)) {
-    DCHECK(self->IsExceptionPending());
+  Object* new_array = Array::Alloc<true>(self, array_class, length,
+                                         array_class->GetComponentSizeShift(),
+                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  if (UNLIKELY(new_array == nullptr)) {
+    self->AssertPendingOOMException();
     return false;
   }
-  uint32_t arg[5];  // only used in filled-new-array.
-  uint32_t vregC;   // only used in filled-new-array-range.
+  uint32_t arg[Instruction::kMaxVarArgRegs];  // only used in filled-new-array.
+  uint32_t vregC = 0;   // only used in filled-new-array-range.
   if (is_range) {
     vregC = inst->VRegC_3rc();
   } else {
     inst->GetVarArgs(arg);
   }
-  const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
   for (int32_t i = 0; i < length; ++i) {
     size_t src_reg = is_range ? vregC + i : arg[i];
     if (is_primitive_int_component) {
-      newArray->AsIntArray()->SetWithoutChecks<transaction_active>(i, shadow_frame.GetVReg(src_reg));
+      new_array->AsIntArray()->SetWithoutChecks<transaction_active>(
+          i, shadow_frame.GetVReg(src_reg));
     } else {
-      newArray->AsObjectArray<Object>()->SetWithoutChecks<transaction_active>(i, shadow_frame.GetVRegReference(src_reg));
+      new_array->AsObjectArray<Object>()->SetWithoutChecks<transaction_active>(
+          i, shadow_frame.GetVRegReference(src_reg));
     }
   }
 
-  result->SetL(newArray);
+  result->SetL(new_array);
   return true;
 }
 
@@ -815,6 +890,20 @@
 EXPLICIT_DO_CALL_TEMPLATE_DECL(true, true);
 #undef EXPLICIT_DO_CALL_TEMPLATE_DECL
 
+// Explicit DoLambdaCall template function declarations.
+#define EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(_is_range, _do_assignability_check)               \
+  template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                          \
+  bool DoLambdaCall<_is_range, _do_assignability_check>(ArtMethod* method, Thread* self,        \
+                                                        ShadowFrame& shadow_frame,              \
+                                                        const Instruction* inst,                \
+                                                        uint16_t inst_data,                     \
+                                                        JValue* result)
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, false);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(false, true);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, false);
+EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL(true, true);
+#undef EXPLICIT_DO_LAMBDA_CALL_TEMPLATE_DECL
+
 // Explicit DoFilledNewArray template function declarations.
 #define EXPLICIT_DO_FILLED_NEW_ARRAY_TEMPLATE_DECL(_is_range_, _check, _transaction_active)       \
   template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                            \
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 0124d90..b21103b 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -97,6 +97,127 @@
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
+// Invokes the given lambda closure. This is part of the invocation support and is used by
+// DoLambdaInvoke functions.
+// Returns true on success, otherwise throws an exception and returns false.
+template<bool is_range, bool do_assignability_check>
+bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
+                  const Instruction* inst, uint16_t inst_data, JValue* result);
+
+// Validates that the art method corresponding to a lambda method target
+// is semantically valid:
+//
+// Must be ACC_STATIC and ACC_LAMBDA. Must be a concrete managed implementation
+// (i.e. not native, not proxy, not abstract, ...).
+//
+// If the validation fails, return false and raise an exception.
+static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool success = false;
+
+  if (UNLIKELY(called_method == nullptr)) {
+    // The shadow frame should already be pushed, so we don't need to update it.
+  } else if (UNLIKELY(called_method->IsAbstract())) {
+    ThrowAbstractMethodError(called_method);
+    // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
+    // TODO(iam): Also make sure that ACC_LAMBDA is set.
+  } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
+    // Method could be native, proxy method, etc. Lambda targets have to be concrete impls,
+    // so don't allow this.
+  } else {
+    success = true;
+  }
+
+  return success;
+}
+
+// Handles create-lambda instructions.
+// Returns true on success, otherwise throws an exception and returns false.
+// (Exceptions are thrown by creating a new exception and then being put in the thread TLS)
+//
+// As a work-in-progress implementation, this shoves the ArtMethod object corresponding
+// to the target dex method index into the target register vA and vA + 1.
+template<bool do_access_check>
+static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame,
+                                  const Instruction* inst) {
+  /*
+   * create-lambda is opcode 0x21c
+   * - vA is the target register where the closure will be stored into
+   *   (also stores into vA + 1)
+   * - vB is the method index which will be the target for a later invoke-lambda
+   */
+  const uint32_t method_idx = inst->VRegB_21c();
+  mirror::Object* receiver = nullptr;  // Always static. (see 'kStatic')
+  ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>(
+      method_idx, &receiver, &sf_method, self);
+
+  uint32_t vregA = inst->VRegA_21c();
+
+  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    CHECK(self->IsExceptionPending());
+    shadow_frame.SetVReg(vregA, 0u);
+    shadow_frame.SetVReg(vregA + 1, 0u);
+    return false;
+  }
+
+  // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers.
+  uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(called_method));
+  uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(called_method)
+                                                    >> BitSizeOf<uint32_t>());
+  // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+
+  DCHECK_NE(called_method_lo | called_method_hi, 0u);
+
+  shadow_frame.SetVReg(vregA, called_method_lo);
+  shadow_frame.SetVReg(vregA + 1, called_method_hi);
+  return true;
+}
+
+template<bool do_access_check>
+static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
+                                  uint16_t inst_data, JValue* result) {
+  /*
+   * invoke-lambda is opcode 0x25
+   *
+   * - vC is the closure register (both vC and vC + 1 will be used to store the closure).
+   * - vB is the number of additional registers up to |{vD,vE,vF,vG}| (4)
+   * - the rest of the registers are always var-args
+   *
+   * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB)
+   */
+  uint32_t vC = inst->VRegC_25x();
+
+  // TODO(iam): Introduce a closure abstraction that will contain the captured variables
+  // instead of just an ArtMethod. We also should only need to use 1 register instead of 2.
+  uint32_t vc_value_lo = shadow_frame.GetVReg(vC);
+  uint32_t vc_value_hi = shadow_frame.GetVReg(vC + 1);
+
+  uint64_t vc_value_ptr = (static_cast<uint64_t>(vc_value_hi) << BitSizeOf<uint32_t>())
+                           | vc_value_lo;
+
+  // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit.
+  static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible");
+  ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr);
+
+  // Guard against the user passing a null closure, which is odd but (sadly) semantically valid.
+  if (UNLIKELY(called_method == nullptr)) {
+    ThrowNullPointerExceptionFromInterpreter();
+    result->SetJ(0);
+    return false;
+  }
+
+  if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  } else {
+    return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
+                                                result);
+  }
+}
+
 // Handles invoke-XXX/range instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
@@ -420,6 +541,26 @@
 EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true);   // invoke-virtual-quick-range.
 #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK
 
+// Explicitly instantiate all DoCreateLambda functions.
+#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check)                                    \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                 \
+bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame,              \
+                        const Instruction* inst)
+
+EXPLICIT_DO_CREATE_LAMBDA_DECL(false);  // create-lambda
+EXPLICIT_DO_CREATE_LAMBDA_DECL(true);   // create-lambda
+#undef EXPLICIT_DO_CREATE_LAMBDA_DECL
+
+// Explicitly instantiate all DoInvokeLambda functions.
+#define EXPLICIT_DO_INVOKE_LAMBDA_DECL(_do_check)                                    \
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)                                 \
+bool DoInvokeLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, \
+                               uint16_t inst_data, JValue* result);
+
+EXPLICIT_DO_INVOKE_LAMBDA_DECL(false);  // invoke-lambda
+EXPLICIT_DO_INVOKE_LAMBDA_DECL(true);   // invoke-lambda
+#undef EXPLICIT_DO_INVOKE_LAMBDA_DECL
+
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 86027c5..7bc8c15 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -75,6 +75,17 @@
 #define HANDLE_INSTRUCTION_START(opcode) op_##opcode:  // NOLINT(whitespace/labels)
 #define HANDLE_INSTRUCTION_END() UNREACHABLE_CODE_CHECK()
 
+// Use with instructions labeled with kExperimental flag:
+#define HANDLE_EXPERIMENTAL_INSTRUCTION_START(opcode)                                             \
+  HANDLE_INSTRUCTION_START(opcode);                                                               \
+  DCHECK(inst->IsExperimental());                                                                 \
+  if (Runtime::Current()->AreExperimentalLambdasEnabled()) {
+#define HANDLE_EXPERIMENTAL_INSTRUCTION_END()                                                     \
+  } else {                                                                                        \
+      UnexpectedOpcode(inst, shadow_frame);                                                       \
+  } HANDLE_INSTRUCTION_END();
+
+
 /**
  * Interpreter based on computed goto tables.
  *
@@ -1609,6 +1620,14 @@
   }
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(INVOKE_LAMBDA) {
+    bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
+                                                   &result_register);
+    UPDATE_HANDLER_TABLE();
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(NEG_INT)
     shadow_frame.SetVReg(
         inst->VRegA_12x(inst_data), -shadow_frame.GetVReg(inst->VRegB_12x(inst_data)));
@@ -2390,6 +2409,12 @@
     ADVANCE(2);
   HANDLE_INSTRUCTION_END();
 
+  HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) {
+    bool success = DoCreateLambda<true>(self, shadow_frame, inst);
+    POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2);
+  }
+  HANDLE_EXPERIMENTAL_INSTRUCTION_END();
+
   HANDLE_INSTRUCTION_START(UNUSED_3E)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2422,10 +2447,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F3)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_F4)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
@@ -2434,10 +2455,6 @@
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
-  HANDLE_INSTRUCTION_START(UNUSED_F6)
-    UnexpectedOpcode(inst, shadow_frame);
-  HANDLE_INSTRUCTION_END();
-
   HANDLE_INSTRUCTION_START(UNUSED_F7)
     UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index fcf083c..8040197 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -53,6 +53,11 @@
     }                                                                                           \
   } while (false)
 
+static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
+  DCHECK(inst->IsExperimental());
+  return Runtime::Current()->AreExperimentalLambdasEnabled();
+}
+
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register) {
@@ -2217,8 +2222,39 @@
                              (inst->VRegC_22b() & 0x1f));
         inst = inst->Next_2xx();
         break;
+      case Instruction::INVOKE_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoInvokeLambda<do_access_check>(self, shadow_frame, inst, inst_data,
+                                                       &result_register);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::CREATE_LAMBDA: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        PREAMBLE();
+        bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst);
+        POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx);
+        break;
+      }
+      case Instruction::UNUSED_F4:
+      case Instruction::UNUSED_F5:
+      case Instruction::UNUSED_F7 ... Instruction::UNUSED_F9: {
+        if (!IsExperimentalInstructionEnabled(inst)) {
+          UnexpectedOpcode(inst, shadow_frame);
+        }
+
+        CHECK(false);  // TODO(iam): Implement opcodes for lambdas
+        break;
+      }
       case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-      case Instruction::UNUSED_F3 ... Instruction::UNUSED_FF:
+      case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
         UnexpectedOpcode(inst, shadow_frame);
diff --git a/runtime/oat.h b/runtime/oat.h
index 604e161..000ae8e 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '6', '3', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '6', '4', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 4b563b5..5e84df5 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -260,6 +260,10 @@
       .Define("--cpu-abilist=_")
           .WithType<std::string>()
           .IntoKey(M::CpuAbiList)
+      .Define({"-Xexperimental-lambdas", "-Xnoexperimental-lambdas"})
+          .WithType<bool>()
+          .WithValues({true, false})
+          .IntoKey(M::ExperimentalLambdas)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -544,6 +548,12 @@
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
+  if (args.GetOrDefault(M::ExperimentalLambdas)) {
+    LOG(WARNING) << "Experimental lambdas have been enabled. All lambda opcodes have "
+                 << "an unstable specification and are nearly guaranteed to change over time. "
+                 << "Do not attempt to write shipping code against these opcodes.";
+  }
+
   *runtime_options = std::move(args);
   return true;
 }
@@ -663,6 +673,8 @@
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
   UsageMessage(stream, "  -Xno-dex-file-fallback "
                        "(Don't fall back to dex files without oat files)\n");
+  UsageMessage(stream, "  -X[no]experimental-lambdas\n"
+                       "     (Enable new experimental dalvik opcodes, off by default)\n");
   UsageMessage(stream, "\n");
 
   UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n");
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 66ec7cc..7a78928 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -189,7 +189,8 @@
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
       is_native_bridge_loaded_(false),
-      zygote_max_failed_boots_(0) {
+      zygote_max_failed_boots_(0),
+      experimental_lambdas_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
 }
@@ -841,6 +842,7 @@
   }
 
   zygote_max_failed_boots_ = runtime_options.GetOrDefault(Opt::ZygoteMaxFailedBoots);
+  experimental_lambdas_ = runtime_options.GetOrDefault(Opt::ExperimentalLambdas);
 
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   ATRACE_BEGIN("CreateHeap");
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e569333..3cd7404 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -527,6 +527,10 @@
     return zygote_max_failed_boots_;
   }
 
+  bool AreExperimentalLambdasEnabled() const {
+    return experimental_lambdas_;
+  }
+
   // Create the JIT and instrumentation and code cache.
   void CreateJit();
 
@@ -727,6 +731,12 @@
   // zygote.
   uint32_t zygote_max_failed_boots_;
 
+  // Enable experimental opcodes that aren't fully specified yet. The intent is to
+  // eventually publish them as public-usable opcodes, but they aren't ready yet.
+  //
+  // Experimental opcodes should not be used by other production code.
+  bool experimental_lambdas_;
+
   MethodRefToStringInitRegMap method_ref_string_init_reg_map_;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 4a307d5..fc527b5 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -110,6 +110,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           10)
 RUNTIME_OPTIONS_KEY (Unit,                NoDexFileFallback)
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
+RUNTIME_OPTIONS_KEY (bool,                ExperimentalLambdas,            false) // -X[no]experimental-lambdas
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index b86a7ee..de51fe0 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -572,6 +572,7 @@
     case VERIFY_ERROR_ACCESS_METHOD:
     case VERIFY_ERROR_INSTANTIATION:
     case VERIFY_ERROR_CLASS_CHANGE:
+    case VERIFY_ERROR_FORCE_INTERPRETER:
       if (Runtime::Current()->IsAotCompiler() || !can_load_classes_) {
         // If we're optimistically running verification at compile time, turn NO_xxx, ACCESS_xxx,
         // class change and instantiation errors into soft verification errors so that we re-verify
@@ -857,14 +858,18 @@
     case Instruction::kVerifyVarArgNonZero:
       // Fall-through.
     case Instruction::kVerifyVarArg: {
-      if (inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgNonZero && inst->VRegA() <= 0) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << inst->VRegA() << ") in "
+      // Instructions that can actually return a negative value shouldn't have this flag.
+      uint32_t v_a = dchecked_integral_cast<uint32_t>(inst->VRegA());
+      if ((inst->GetVerifyExtraFlags() == Instruction::kVerifyVarArgNonZero && v_a == 0) ||
+          v_a > Instruction::kMaxVarArgRegs) {
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << v_a << ") in "
                                              "non-range invoke";
         return false;
       }
+
       uint32_t args[Instruction::kMaxVarArgRegs];
       inst->GetVarArgs(args);
-      result = result && CheckVarArgRegs(inst->VRegA(), args);
+      result = result && CheckVarArgRegs(v_a, args);
       break;
     }
     case Instruction::kVerifyVarArgRangeNonZero:
@@ -1175,10 +1180,6 @@
 }
 
 bool MethodVerifier::CheckVarArgRegs(uint32_t vA, uint32_t arg[]) {
-  if (vA > Instruction::kMaxVarArgRegs) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid arg count (" << vA << ") in non-range invoke)";
-    return false;
-  }
   uint16_t registers_size = code_item_->registers_size_;
   for (uint32_t idx = 0; idx < vA; idx++) {
     if (arg[idx] >= registers_size) {
@@ -1291,13 +1292,22 @@
 
 bool MethodVerifier::SetTypesFromSignature() {
   RegisterLine* reg_line = reg_table_.GetLine(0);
-  int arg_start = code_item_->registers_size_ - code_item_->ins_size_;
+
+  // Should have been verified earlier.
+  DCHECK_GE(code_item_->registers_size_, code_item_->ins_size_);
+
+  uint32_t arg_start = code_item_->registers_size_ - code_item_->ins_size_;
   size_t expected_args = code_item_->ins_size_;   /* long/double count as two */
 
-  DCHECK_GE(arg_start, 0);      /* should have been verified earlier */
   // Include the "this" pointer.
   size_t cur_arg = 0;
   if (!IsStatic()) {
+    if (expected_args == 0) {
+      // Expect at least a receiver.
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected 0 args, but method is not static";
+      return false;
+    }
+
     // If this is a constructor for a class other than java.lang.Object, mark the first ("this")
     // argument as uninitialized. This restricts field access until the superclass constructor is
     // called.
@@ -2828,10 +2838,31 @@
       }
       break;
     }
+    case Instruction::INVOKE_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement invoke-lambda verification
+      break;
+    }
+    case Instruction::CREATE_LAMBDA: {
+      // Don't bother verifying, instead the interpreter will take the slow path with access checks.
+      // If the code would've normally hard-failed, then the interpreter will throw the
+      // appropriate verification errors at runtime.
+      Fail(VERIFY_ERROR_FORCE_INTERPRETER);  // TODO(iam): implement create-lambda verification
+      break;
+    }
+
+    case 0xf4:
+    case 0xf5:
+    case 0xf7 ... 0xf9: {
+      DCHECK(false);  // TODO(iam): Implement opcodes for lambdas
+      FALLTHROUGH_INTENDED;  // Conservatively fail verification on release builds.
+    }
 
     /* These should never appear during verification. */
     case Instruction::UNUSED_3E ... Instruction::UNUSED_43:
-    case Instruction::UNUSED_F3 ... Instruction::UNUSED_FF:
+    case Instruction::UNUSED_FA ... Instruction::UNUSED_FF:
     case Instruction::UNUSED_79:
     case Instruction::UNUSED_7A:
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_);
@@ -3790,8 +3821,9 @@
   } else {
     const RegType& array_type = work_line_->GetRegisterType(this, inst->VRegB_23x());
     if (array_type.IsZero()) {
-      // Null array type; this code path will fail at runtime. Infer a merge-able type from the
-      // instruction type.
+      // Null array type; this code path will fail at runtime.
+      // Still check that the given value matches the instruction's type.
+      work_line_->VerifyRegisterType(this, inst->VRegA_23x(), insn_type);
     } else if (!array_type.IsArrayTypes()) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "not array type " << array_type << " with aput";
     } else {
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 873b8ab..824daf6 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -77,6 +77,16 @@
   VERIFY_ERROR_ACCESS_METHOD,   // IllegalAccessError.
   VERIFY_ERROR_CLASS_CHANGE,    // IncompatibleClassChangeError.
   VERIFY_ERROR_INSTANTIATION,   // InstantiationError.
+  // For opcodes that don't have complete verifier support (such as lambda opcodes),
+  // we need a way to continue execution at runtime without attempting to re-verify
+  // (since we know it will fail no matter what). Instead, run as the interpreter
+  // in a special "do access checks" mode which will perform verifier-like checking
+  // on the fly.
+  //
+  // TODO: Once all new opcodes have implemented full verifier support, this can be removed.
+  VERIFY_ERROR_FORCE_INTERPRETER,  // Skip the verification phase at runtime;
+                                   // force the interpreter to do access checks.
+                                   // (sets a soft fail at compile time).
 };
 std::ostream& operator<<(std::ostream& os, const VerifyError& rhs);
 
diff --git a/test/044-proxy/src/ReturnsAndArgPassing.java b/test/044-proxy/src/ReturnsAndArgPassing.java
index a173410..225cc5b 100644
--- a/test/044-proxy/src/ReturnsAndArgPassing.java
+++ b/test/044-proxy/src/ReturnsAndArgPassing.java
@@ -57,6 +57,8 @@
       check(proxy instanceof Proxy);
       check(method.getDeclaringClass() == MyInterface.class);
       String name = method.getName();
+      // Check for moving GC bugs in proxy stubs.
+      Runtime.getRuntime().gc();
       if (name.endsWith("Foo")) {
         check(args == null);
         fooInvocations++;
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index ef18f64..3c3b939 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -933,18 +933,18 @@
    * remove the second.
    */
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) last_instruction_simplifier (before)
+  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
   /// CHECK-DAG:     <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) last_instruction_simplifier (after)
+  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:                        BooleanNot [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Arg>>]
 
-  /// CHECK-START: boolean Main.NotNotBool(boolean) last_instruction_simplifier (after)
+  /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after)
   /// CHECK:                            BooleanNot
   /// CHECK-NOT:                        BooleanNot
 
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index da27bf6..ab4afdb 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -141,7 +141,7 @@
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<Mul9>>,<<PhiX1>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
@@ -158,7 +158,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
 ## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
-## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<Mul9>>,<<PhiX1>>]                   loop:none
+## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
 ## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
 
 .method public static testExitPredecessors(IZZ)I
diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java
index f6d0b41..4f23eec 100644
--- a/test/496-checker-inlining-and-class-loader/src/Main.java
+++ b/test/496-checker-inlining-and-class-loader/src/Main.java
@@ -106,7 +106,7 @@
   }
 }
 
-class Main {
+public class Main {
   public static void main(String[] args) throws Exception {
     MyClassLoader o = new MyClassLoader();
     Class foo = o.loadClass("LoadedByMyClassLoader");
diff --git a/test/501-null-constant-dce/expected.txt b/test/501-null-constant-dce/expected.txt
new file mode 100644
index 0000000..ccaf6f8
--- /dev/null
+++ b/test/501-null-constant-dce/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/501-null-constant-dce/info.txt b/test/501-null-constant-dce/info.txt
new file mode 100644
index 0000000..2c4a686
--- /dev/null
+++ b/test/501-null-constant-dce/info.txt
@@ -0,0 +1 @@
+Regression test for the optimizing compiler. See comment in smali file.
diff --git a/test/501-null-constant-dce/smali/DCE.smali b/test/501-null-constant-dce/smali/DCE.smali
new file mode 100644
index 0000000..4a1765e
--- /dev/null
+++ b/test/501-null-constant-dce/smali/DCE.smali
@@ -0,0 +1,37 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LDCE;
+
+.super Ljava/lang/Object;
+
+.method public static method([I)LDCE;
+   .registers 2
+   const/4 v0, 0
+   # Jump over the code that requires the null constant
+   # so that the compiler sees the null constant as dead code.
+   if-eq v0, v0, :end
+   invoke-static {v0}, LDCE;->method([I)LDCE;
+   :end
+   invoke-static {}, LDCE;->$inline$returnNull()LDCE;
+   move-result-object v0
+   return-object v0
+.end method
+
+.method public static $inline$returnNull()LDCE;
+   .registers 2
+   const/4 v0, 0
+   # Return null to make `method` call GetConstantNull again.
+   return-object v0
+.end method
diff --git a/test/501-null-constant-dce/src/Main.java b/test/501-null-constant-dce/src/Main.java
new file mode 100644
index 0000000..3a2d491
--- /dev/null
+++ b/test/501-null-constant-dce/src/Main.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    Class<?> c = Class.forName("DCE");
+    Method m = c.getMethod("method", int[].class);
+    int[] array = new int[7];
+    Object[] arguments = { array };
+    Object result = m.invoke(null, arguments);
+    if (result != null) {
+      throw new Error("Expected null, got " + result);
+    }
+  }
+}
diff --git a/test/503-dead-instructions/expected.txt b/test/503-dead-instructions/expected.txt
new file mode 100644
index 0000000..ccaf6f8
--- /dev/null
+++ b/test/503-dead-instructions/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/503-dead-instructions/info.txt b/test/503-dead-instructions/info.txt
new file mode 100644
index 0000000..7e3f1ab
--- /dev/null
+++ b/test/503-dead-instructions/info.txt
@@ -0,0 +1,2 @@
+Regression test for the building phase of the optimizing
+compiler. See comment in smali file.
diff --git a/test/503-dead-instructions/smali/DeadInstructions.smali b/test/503-dead-instructions/smali/DeadInstructions.smali
new file mode 100644
index 0000000..9f6c565
--- /dev/null
+++ b/test/503-dead-instructions/smali/DeadInstructions.smali
@@ -0,0 +1,63 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LDeadInstructions;
+
+.super Ljava/lang/Object;
+
+.method public static method1()V
+   .registers 2
+   return-void
+   # Create a label and a branch to that label to trick the
+   # optimizing compiler into thinking the invoke is live.
+   :start
+   const/4 v0, 0
+   const/4 v1, 0
+   # Provide more arguments than we should. Because this is dead
+   # code, the verifier will not check the argument count. So
+   # the compilers must do the same.
+   invoke-static {v0, v1}, LDeadInstructions;->method1()V
+   goto :start
+.end method
+
+.method public static method2(J)V
+   .registers 3
+   return-void
+   :start
+   const/4 v0, 0
+   const/4 v1, 0
+   const/4 v2, 0
+   # Give a non-sequential pair for the long argument.
+   invoke-static {v0, v2}, LDeadInstructions;->method2(J)V
+   goto :start
+.end method
+
+.method public static method3()V
+   .registers 1
+   return-void
+   :start
+   const/4 v0, 0
+   # Give one half of a pair.
+   invoke-static {v0}, LDeadInstructions;->method2(J)V
+   goto :start
+.end method
+
+.method public static method4()V
+   .registers 2
+   return-void
+   :start
+   # Provide less arguments than we should.
+   invoke-static {}, LDeadInstructions;->method3(J)V
+   goto :start
+.end method
diff --git a/test/503-dead-instructions/src/Main.java b/test/503-dead-instructions/src/Main.java
new file mode 100644
index 0000000..6249dc7
--- /dev/null
+++ b/test/503-dead-instructions/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    Class<?> c = Class.forName("DeadInstructions");
+    Method m = c.getMethod("method1");
+    Object[] arguments1 = { };
+    m.invoke(null, arguments1);
+
+    Object[] arguments2 = { (long)4 };
+    m = c.getMethod("method2", long.class);
+    m.invoke(null, arguments2);
+
+    Object[] arguments3 = { };
+    m = c.getMethod("method3");
+    m.invoke(null, arguments3);
+
+    Object[] arguments4 = { };
+    m = c.getMethod("method4");
+    m.invoke(null, arguments4);
+  }
+}
diff --git a/test/504-regression-baseline-entry/expected.txt b/test/504-regression-baseline-entry/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/504-regression-baseline-entry/expected.txt
diff --git a/test/504-regression-baseline-entry/info.txt b/test/504-regression-baseline-entry/info.txt
new file mode 100644
index 0000000..26cc9ce
--- /dev/null
+++ b/test/504-regression-baseline-entry/info.txt
@@ -0,0 +1,2 @@
+Regression test for the baseline compiler which required the entry block to fall
+through to the next block.
\ No newline at end of file
diff --git a/test/504-regression-baseline-entry/smali/Test.smali b/test/504-regression-baseline-entry/smali/Test.smali
new file mode 100644
index 0000000..06412e7
--- /dev/null
+++ b/test/504-regression-baseline-entry/smali/Test.smali
@@ -0,0 +1,30 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static SingleGotoStart()I
+  .registers 1
+  goto :second
+
+  :first
+  return v0
+
+  :second
+  const/4 v0, 0x5
+  goto :first
+.end method
diff --git a/test/504-regression-baseline-entry/src/Main.java b/test/504-regression-baseline-entry/src/Main.java
new file mode 100644
index 0000000..2c9df28
--- /dev/null
+++ b/test/504-regression-baseline-entry/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Type;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String args[]) throws Exception {
+    Class<?> c = Class.forName("Test");
+    Method m = c.getMethod("SingleGotoStart", (Class[]) null);
+    Integer result = (Integer) m.invoke(null);
+    if (result != 5) {
+      throw new Error("Expected 5, got " + result);
+    }
+  }
+}
diff --git a/test/505-simplifier-type-propagation/expected.txt b/test/505-simplifier-type-propagation/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/505-simplifier-type-propagation/expected.txt
diff --git a/test/505-simplifier-type-propagation/info.txt b/test/505-simplifier-type-propagation/info.txt
new file mode 100644
index 0000000..cd84432
--- /dev/null
+++ b/test/505-simplifier-type-propagation/info.txt
@@ -0,0 +1,3 @@
+Regression test for the optimizing compiler, where
+the code generators did not expect type conversion
+instructions from one type to the same type.
diff --git a/test/505-simplifier-type-propagation/src/Main.java b/test/505-simplifier-type-propagation/src/Main.java
new file mode 100644
index 0000000..780cb34
--- /dev/null
+++ b/test/505-simplifier-type-propagation/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+  public static void main(String[] args) {
+    byte result = bar((byte)2);
+    if (result != 2) {
+      throw new Error("Expected 2, got " + result);
+    }
+  }
+
+  public static byte bar(byte myByte) {
+    int a = 0;
+    // The following call will be inlined, which will make
+    // the type conversion below from byte to byte.
+    if ($inline$foo()) {
+      a = myByte;
+    }
+    return (byte)a;
+  }
+
+  public static boolean $inline$foo() {
+    return true;
+  }
+}
diff --git a/test/506-verify-aput/expected.txt b/test/506-verify-aput/expected.txt
new file mode 100644
index 0000000..ccaf6f8
--- /dev/null
+++ b/test/506-verify-aput/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/506-verify-aput/info.txt b/test/506-verify-aput/info.txt
new file mode 100644
index 0000000..461d9d3
--- /dev/null
+++ b/test/506-verify-aput/info.txt
@@ -0,0 +1,2 @@
+Test that an aput on a null array is properly checked
+by the verifier.
diff --git a/test/506-verify-aput/smali/VerifyAPut1.smali b/test/506-verify-aput/smali/VerifyAPut1.smali
new file mode 100644
index 0000000..d50636f
--- /dev/null
+++ b/test/506-verify-aput/smali/VerifyAPut1.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LVerifyAPut1;
+
+.super Ljava/lang/Object;
+
+.method public static method()V
+   .registers 3
+   const/4 v0, 0
+   const/4 v1, 1
+   const/4 v2, 2
+   aput-object v2, v0, v1
+   return-void
+.end method
diff --git a/test/506-verify-aput/smali/VerifyAPut2.smali b/test/506-verify-aput/smali/VerifyAPut2.smali
new file mode 100644
index 0000000..2eceebb
--- /dev/null
+++ b/test/506-verify-aput/smali/VerifyAPut2.smali
@@ -0,0 +1,25 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LVerifyAPut2;
+
+.super Ljava/lang/Object;
+
+.method public static method(LMain;)V
+   .registers 3
+   const/4 v0, 0
+   const/4 v1, 1
+   aput p0, v0, v1
+   return-void
+.end method
diff --git a/test/506-verify-aput/src/Main.java b/test/506-verify-aput/src/Main.java
new file mode 100644
index 0000000..8359f2c
--- /dev/null
+++ b/test/506-verify-aput/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    try {
+      Class.forName("VerifyAPut1");
+      throw new Error("expected verification error");
+    } catch (VerifyError e) { /* ignore */ }
+
+    try {
+      Class.forName("VerifyAPut2");
+      throw new Error("expected verification error");
+    } catch (VerifyError e) { /* ignore */ }
+  }
+}
diff --git a/test/507-boolean-test/expected.txt b/test/507-boolean-test/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/507-boolean-test/expected.txt
diff --git a/test/507-boolean-test/info.txt b/test/507-boolean-test/info.txt
new file mode 100644
index 0000000..15c20c1
--- /dev/null
+++ b/test/507-boolean-test/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler that used to
+crash when compiling (a ? 1 : 0) == 2.
diff --git a/test/507-boolean-test/src/Main.java b/test/507-boolean-test/src/Main.java
new file mode 100644
index 0000000..f3ce92a
--- /dev/null
+++ b/test/507-boolean-test/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    if (bar(true)) {
+      throw new Error("Expected false, got true");
+    }
+  }
+
+  public static boolean bar(boolean a) {
+    return (a ? 0 : 1) == 2;
+  }
+}
diff --git a/test/507-referrer/expected.txt b/test/507-referrer/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/507-referrer/expected.txt
diff --git a/test/507-referrer/info.txt b/test/507-referrer/info.txt
new file mode 100644
index 0000000..1335994
--- /dev/null
+++ b/test/507-referrer/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler, which used
+to do incorrect access checks on static fields when inlining.
diff --git a/test/507-referrer/src/Main.java b/test/507-referrer/src/Main.java
new file mode 100644
index 0000000..6393f39
--- /dev/null
+++ b/test/507-referrer/src/Main.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import p1.InPackage;
+
+public class Main {
+  public static void main(String[] args) {
+    int result = InPackage.$inline$foo();
+    if (result != 42) {
+      throw new Error("Expected 42, got " + result);
+    }
+  }
+}
diff --git a/test/507-referrer/src/p1/InPackage.java b/test/507-referrer/src/p1/InPackage.java
new file mode 100644
index 0000000..162f055
--- /dev/null
+++ b/test/507-referrer/src/p1/InPackage.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package p1;
+
+public class InPackage {
+  public static int $inline$foo() {
+    return myField;
+  }
+
+  protected static int myField = 42;
+}
diff --git a/test/509-pre-header/expected.txt b/test/509-pre-header/expected.txt
new file mode 100644
index 0000000..ccaf6f8
--- /dev/null
+++ b/test/509-pre-header/expected.txt
@@ -0,0 +1 @@
+Enter
diff --git a/test/509-pre-header/info.txt b/test/509-pre-header/info.txt
new file mode 100644
index 0000000..e9d8b94
--- /dev/null
+++ b/test/509-pre-header/info.txt
@@ -0,0 +1,3 @@
+Regression test for the SimplifyCFG phase of optimizing.
+The invariant that the pre header of a loop header is the
+first predecessor was not preserved.
diff --git a/test/509-pre-header/smali/PreHeader.smali b/test/509-pre-header/smali/PreHeader.smali
new file mode 100644
index 0000000..04f4e49
--- /dev/null
+++ b/test/509-pre-header/smali/PreHeader.smali
@@ -0,0 +1,39 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LPreHeader;
+
+.super Ljava/lang/Object;
+
+# Label names in this method are taken from the original apk
+# that exposed the crash. The crash was due to fixing a critical
+# edge and not preserving the invariant that the pre header of a loop
+# is the first predecessor of the loop header.
+.method public static method()V
+   .registers 2
+   const/4 v0, 0
+   const/4 v1, 0
+   goto :b31
+   :b23
+   if-eqz v0, :b25
+   goto :b23
+   :b25
+   return-void
+   :b31
+   if-eqz v0, :b23
+   if-eqz v1, :bexit
+   goto :b31
+   :bexit
+   return-void
+.end method
diff --git a/test/509-pre-header/src/Main.java b/test/509-pre-header/src/Main.java
new file mode 100644
index 0000000..1eca419
--- /dev/null
+++ b/test/509-pre-header/src/Main.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Enter");
+    Class<?> c = Class.forName("PreHeader");
+    Method m = c.getMethod("method");
+    Object[] arguments = { };
+    m.invoke(null, arguments);
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 8565637..c762603 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -17,4 +17,8 @@
 EmptySparseSwitch
 b/20224106
 b/17410612
+b/21863767
+b/21873167
+b/21614284
+b/21902684
 Done!
diff --git a/test/800-smali/smali/b_21614284.smali b/test/800-smali/smali/b_21614284.smali
new file mode 100644
index 0000000..3cb1bd0
--- /dev/null
+++ b/test/800-smali/smali/b_21614284.smali
@@ -0,0 +1,22 @@
+.class public LB21614284;
+.super Ljava/lang/Object;
+
+.field private a:I
+
+.method public constructor <init>()V
+    .registers 2
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    const v0, 42
+    iput v0, p0, LB21614284;->a:I
+    return-void
+.end method
+
+.method public static test(LB21614284;)I
+    .registers 2
+    # Empty if, testing p0.
+    if-nez p0, :label
+    :label
+    # p0 still needs a null check.
+    iget v0, p0, LB21614284;->a:I
+    return v0
+.end method
diff --git a/test/800-smali/smali/b_21863767.smali b/test/800-smali/smali/b_21863767.smali
new file mode 100644
index 0000000..9b33bc3
--- /dev/null
+++ b/test/800-smali/smali/b_21863767.smali
@@ -0,0 +1,29 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LB21863767;
+
+.super Ljava/lang/Object;
+
+.method public static run()V
+   .registers 2
+   return-void
+   goto :start
+   :start
+   # The following is dead code but used to crash the compiler.
+   const/4 v0, 0
+   return-wide v0
+   return v0
+   return-object v0
+.end method
diff --git a/test/800-smali/smali/b_21873167.smali b/test/800-smali/smali/b_21873167.smali
new file mode 100644
index 0000000..c0c09cb
--- /dev/null
+++ b/test/800-smali/smali/b_21873167.smali
@@ -0,0 +1,18 @@
+.class public LB21873167;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public test()V
+    .registers 1
+    :start
+    monitor-enter p0
+    monitor-exit  p0
+    :end
+    return-void
+    .catchall {:start .. :end} :end
+.end method
diff --git a/test/800-smali/smali/b_21902684.smali b/test/800-smali/smali/b_21902684.smali
new file mode 100644
index 0000000..2d906b6
--- /dev/null
+++ b/test/800-smali/smali/b_21902684.smali
@@ -0,0 +1,17 @@
+.class public LB21902684;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public test()V
+    .registers 1
+    goto :end
+    new-instance v0, Ljava/lang/String;
+    invoke-direct {v0}, Ljava/lang/String;-><init>()V
+    :end
+    return-void
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 33df06d..7280d45 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -83,6 +83,12 @@
                 0));
         testCases.add(new TestCase("b/17410612", "B17410612", "run", null, new VerifyError(),
                 0));
+        testCases.add(new TestCase("b/21863767", "B21863767", "run", null, null,
+                null));
+        testCases.add(new TestCase("b/21873167", "B21873167", "test", null, null, null));
+        testCases.add(new TestCase("b/21614284", "B21614284", "test", new Object[] { null },
+            new NullPointerException(), null));
+        testCases.add(new TestCase("b/21902684", "B21902684", "test", null, null, null));
     }
 
     public void runTests() {
diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt
new file mode 100644
index 0000000..ed1f875
--- /dev/null
+++ b/test/955-lambda-smali/expected.txt
@@ -0,0 +1,4 @@
+SanityCheck
+Hello world! (0-args, no closure)
+ABCD Hello world! (4-args, no closure)
+Caught NPE
diff --git a/test/955-lambda-smali/info.txt b/test/955-lambda-smali/info.txt
new file mode 100644
index 0000000..aed5e84
--- /dev/null
+++ b/test/955-lambda-smali/info.txt
@@ -0,0 +1,3 @@
+Smali-based tests for experimental lambda intructions.
+
+Obviously needs to run under ART.
diff --git a/test/955-lambda-smali/run b/test/955-lambda-smali/run
new file mode 100755
index 0000000..2aeca8c
--- /dev/null
+++ b/test/955-lambda-smali/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ensure that the lambda experimental opcodes are turned on for dalvikvm and dex2oat
+${RUN} "$@" --runtime-option -Xexperimental-lambdas -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental-lambdas
diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali
new file mode 100644
index 0000000..1851399
--- /dev/null
+++ b/test/955-lambda-smali/smali/Main.smali
@@ -0,0 +1,29 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LMain;
+
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .registers 2
+
+    invoke-static {}, LSanityCheck;->run()I
+    invoke-static {}, LTrivialHelloWorld;->run()V
+
+# TODO: add tests when verification fails
+
+    return-void
+.end method
diff --git a/test/955-lambda-smali/smali/SanityCheck.smali b/test/955-lambda-smali/smali/SanityCheck.smali
new file mode 100644
index 0000000..4c807d7
--- /dev/null
+++ b/test/955-lambda-smali/smali/SanityCheck.smali
@@ -0,0 +1,36 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LSanityCheck;
+.super Ljava/lang/Object;
+
+
+.method public constructor <init>()V
+.registers 1
+   invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+   return-void
+.end method
+
+# This test is just here to make sure that we can at least execute basic non-lambda
+# functionality such as printing (when lambdas are enabled in the runtime).
+.method public static run()I
+# Don't use too many registers here to avoid hitting the Stack::SanityCheck frame<2KB assert
+.registers 3
+    const-string v0, "SanityCheck"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    const v2, 123456
+    return v2
+.end method
diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
new file mode 100644
index 0000000..38ee95a
--- /dev/null
+++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali
@@ -0,0 +1,94 @@
+#
+#  Copyright (C) 2015 The Android Open Source Project
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+.class public LTrivialHelloWorld;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static run()V
+.registers 8
+    # Trivial 0-arg hello world
+    create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    # TODO: create-lambda should not write to both v0 and v1
+    invoke-lambda v0, {}
+
+    # Slightly more interesting 4-arg hello world
+    create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    # TODO: create-lambda should not write to both v2 and v3
+    const-string v4, "A"
+    const-string v5, "B"
+    const-string v6, "C"
+    const-string v7, "D"
+    invoke-lambda v2, {v4, v5, v6, v7}
+
+    invoke-static {}, LTrivialHelloWorld;->testFailures()V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V
+    .registers 3 # 1 parameters, 2 locals
+
+    const-string v0, "Hello world! (0-args, no closure)"
+
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+#TODO: should use a closure type instead of ArtMethod.
+.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+    .registers 7 # 5 parameters, 2 locals
+
+    const-string v0, " Hello world! (4-args, no closure)"
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+    invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V
+
+    invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+.end method
+
+# Test exceptions are thrown as expected when used opcodes incorrectly
+.method private static testFailures()V
+    .registers 4 # 0 parameters, 4 locals
+
+    const v0, 0  # v0 = null
+    const v1, 0  # v1 = null
+:start
+    invoke-lambda v0, {}  # invoking a null lambda shall raise an NPE
+:end
+    return-void
+
+:handler
+    const-string v2, "Caught NPE"
+    sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    return-void
+
+    .catch Ljava/lang/NullPointerException; {:start .. :end} :handler
+.end method
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 469df1f..60165d9 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -377,8 +377,7 @@
 
 # Known broken tests for the default compiler (Quick).
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  457-regs \
-  496-checker-inlining-and-class-loader
+  457-regs
 
 ifneq (,$(filter default,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -399,6 +398,37 @@
 
 TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
 
+# Known broken tests for the MIPS64 optimizing compiler backend in 64-bit mode.  b/21555893
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS := \
+  004-SignalTest \
+  018-stack-overflow \
+  107-int-math2 \
+  449-checker-bce
+
+ifeq ($(TARGET_ARCH),mips64)
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS),64)
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS :=
+
+# Known broken tests for the MIPS64 optimizing compiler backend in 32-bit mode.  b/21555893
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS := \
+  496-checker-inlining-and-class-loader
+
+ifeq ($(TARGET_ARCH),mips64)
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS),32)
+  endif
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS :=
+
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
 
diff --git a/test/etc/default-build b/test/etc/default-build
index fbe97f9..92954a9 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -18,6 +18,7 @@
 set -e
 
 DX_FLAGS=""
+SKIP_DX_MERGER="false"
 
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
@@ -38,22 +39,36 @@
   exit 0
 fi
 
-mkdir classes
-${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+if [ -d src ]; then
+  mkdir classes
+  ${JAVAC} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+fi
 
 if [ -d src2 ]; then
+  mkdir -p classes
   ${JAVAC} -d classes `find src2 -name '*.java'`
 fi
 
-if [ ${NEED_DEX} = "true" ]; then
+if ! [ -d src ] && ! [ -d src2 ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
     --dump-width=1000 ${DX_FLAGS} classes
 fi
 
 if [ -d smali ]; then
   # Compile Smali classes
-  ${SMALI} -JXmx256m --output smali_classes.dex `find smali -name '*.smali'`
-  ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  ${SMALI} -JXmx256m --experimental --api-level 23 --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
 fi
 
 if [ -d src-ex ]; then
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index e1f7581..1b8748b 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -77,7 +77,7 @@
     # classpath/resources differences when compiling the boot image.
     vogar_args="$vogar_args --vm-arg -Ximage:/non/existent"
     shift
-  elif [[ $1 == "--debug" ]]; then
+  elif [[ "$1" == "--debug" ]]; then
     # Remove the --debug from the arguments.
     vogar_args=${vogar_args/$1}
     vogar_args="$vogar_args --vm-arg -XXlib:libartd.so"