Merge "ART: Add another proxy test"
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 767ffbf..eb48cc3 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -25,6 +25,7 @@
   kInvalidRegClass,
   kCoreReg,
   kFPReg,
+  kRefReg,
   kAnyReg,
 };
 
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 3d22774..58d2ed2 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -134,39 +134,133 @@
   }
 }
 
+// Enable opcodes that mostly work, but produce assertion errors (thus breaking libartd.so).
+#define ARM64_USE_EXPERIMENTAL_OPCODES 0
+
 // TODO: Remove this when we are able to compile everything.
 int arm64_support_list[] = {
     Instruction::NOP,
     Instruction::MOVE,
     Instruction::MOVE_FROM16,
     Instruction::MOVE_16,
+    Instruction::MOVE_EXCEPTION,
+    Instruction::RETURN_VOID,
+    Instruction::RETURN,
+    Instruction::RETURN_WIDE,
+    Instruction::CONST_4,
+    Instruction::CONST_16,
+    Instruction::CONST,
+    Instruction::CONST_STRING,
+    Instruction::MONITOR_ENTER,
+    Instruction::MONITOR_EXIT,
+    Instruction::THROW,
+    Instruction::GOTO,
+    Instruction::GOTO_16,
+    Instruction::GOTO_32,
+    Instruction::IF_EQ,
+    Instruction::IF_NE,
+    Instruction::IF_LT,
+    Instruction::IF_GE,
+    Instruction::IF_GT,
+    Instruction::IF_LE,
+    Instruction::IF_EQZ,
+    Instruction::IF_NEZ,
+    Instruction::IF_LTZ,
+    Instruction::IF_GEZ,
+    Instruction::IF_GTZ,
+    Instruction::IF_LEZ,
+    Instruction::NEG_INT,
+    Instruction::NOT_INT,
+    Instruction::NEG_FLOAT,
+    Instruction::INT_TO_BYTE,
+    Instruction::INT_TO_CHAR,
+    Instruction::INT_TO_SHORT,
+    Instruction::ADD_INT,
+    Instruction::SUB_INT,
+    Instruction::MUL_INT,
+    Instruction::DIV_INT,
+    Instruction::REM_INT,
+    Instruction::AND_INT,
+    Instruction::OR_INT,
+    Instruction::XOR_INT,
+    Instruction::SHL_INT,
+    Instruction::SHR_INT,
+    Instruction::USHR_INT,
+    Instruction::ADD_FLOAT,
+    Instruction::SUB_FLOAT,
+    Instruction::MUL_FLOAT,
+    Instruction::DIV_FLOAT,
+    Instruction::ADD_INT_2ADDR,
+    Instruction::SUB_INT_2ADDR,
+    Instruction::MUL_INT_2ADDR,
+    Instruction::DIV_INT_2ADDR,
+    Instruction::REM_INT_2ADDR,
+    Instruction::AND_INT_2ADDR,
+    Instruction::OR_INT_2ADDR,
+    Instruction::XOR_INT_2ADDR,
+    Instruction::SHL_INT_2ADDR,
+    Instruction::SHR_INT_2ADDR,
+    Instruction::USHR_INT_2ADDR,
+    Instruction::ADD_FLOAT_2ADDR,
+    Instruction::SUB_FLOAT_2ADDR,
+    Instruction::MUL_FLOAT_2ADDR,
+    Instruction::DIV_FLOAT_2ADDR,
+    Instruction::ADD_INT_LIT16,
+    Instruction::RSUB_INT,
+    Instruction::MUL_INT_LIT16,
+    Instruction::DIV_INT_LIT16,
+    Instruction::REM_INT_LIT16,
+    Instruction::AND_INT_LIT16,
+    Instruction::OR_INT_LIT16,
+    Instruction::XOR_INT_LIT16,
+    Instruction::ADD_INT_LIT8,
+    Instruction::RSUB_INT_LIT8,
+    Instruction::MUL_INT_LIT8,
+    Instruction::DIV_INT_LIT8,
+    Instruction::REM_INT_LIT8,
+    Instruction::AND_INT_LIT8,
+    Instruction::OR_INT_LIT8,
+    Instruction::XOR_INT_LIT8,
+    Instruction::SHL_INT_LIT8,
+    Instruction::SHR_INT_LIT8,
+    Instruction::USHR_INT_LIT8,
+    // TODO(Arm64): Enable compiler pass
+    // ----- ExtendedMIROpcode -----
+    kMirOpPhi,
+    kMirOpCopy,
+    kMirOpFusedCmplFloat,
+    kMirOpFusedCmpgFloat,
+    kMirOpFusedCmplDouble,
+    kMirOpFusedCmpgDouble,
+    kMirOpFusedCmpLong,
+    kMirOpNop,
+    kMirOpNullCheck,
+    kMirOpRangeCheck,
+    kMirOpDivZeroCheck,
+    kMirOpCheck,
+    kMirOpCheckPart2,
+    kMirOpSelect,
+
+#if ARM64_USE_EXPERIMENTAL_OPCODES
     Instruction::MOVE_WIDE,
     Instruction::MOVE_WIDE_FROM16,
     Instruction::MOVE_WIDE_16,
     Instruction::MOVE_OBJECT,
     Instruction::MOVE_OBJECT_FROM16,
     Instruction::MOVE_OBJECT_16,
+    // Instruction::PACKED_SWITCH,
+    // Instruction::SPARSE_SWITCH,
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
-    Instruction::MOVE_EXCEPTION,
-    Instruction::RETURN_VOID,
-    Instruction::RETURN,
-    Instruction::RETURN_WIDE,
     // Instruction::RETURN_OBJECT,
-    // Instruction::CONST_4,
-    // Instruction::CONST_16,
-    // Instruction::CONST,
     // Instruction::CONST_HIGH16,
     // Instruction::CONST_WIDE_16,
     // Instruction::CONST_WIDE_32,
     // Instruction::CONST_WIDE,
     // Instruction::CONST_WIDE_HIGH16,
-    // Instruction::CONST_STRING,
     // Instruction::CONST_STRING_JUMBO,
     // Instruction::CONST_CLASS,
-    Instruction::MONITOR_ENTER,
-    Instruction::MONITOR_EXIT,
     // Instruction::CHECK_CAST,
     // Instruction::INSTANCE_OF,
     // Instruction::ARRAY_LENGTH,
@@ -175,29 +269,11 @@
     // Instruction::FILLED_NEW_ARRAY,
     // Instruction::FILLED_NEW_ARRAY_RANGE,
     // Instruction::FILL_ARRAY_DATA,
-    Instruction::THROW,
-    // Instruction::GOTO,
-    // Instruction::GOTO_16,
-    // Instruction::GOTO_32,
-    // Instruction::PACKED_SWITCH,
-    // Instruction::SPARSE_SWITCH,
     Instruction::CMPL_FLOAT,
     Instruction::CMPG_FLOAT,
     Instruction::CMPL_DOUBLE,
     Instruction::CMPG_DOUBLE,
     Instruction::CMP_LONG,
-    // Instruction::IF_EQ,
-    // Instruction::IF_NE,
-    // Instruction::IF_LT,
-    // Instruction::IF_GE,
-    // Instruction::IF_GT,
-    // Instruction::IF_LE,
-    // Instruction::IF_EQZ,
-    // Instruction::IF_NEZ,
-    // Instruction::IF_LTZ,
-    // Instruction::IF_GEZ,
-    // Instruction::IF_GTZ,
-    // Instruction::IF_LEZ,
     // Instruction::UNUSED_3E,
     // Instruction::UNUSED_3F,
     // Instruction::UNUSED_40,
@@ -259,11 +335,8 @@
     // Instruction::INVOKE_INTERFACE_RANGE,
     // Instruction::UNUSED_79,
     // Instruction::UNUSED_7A,
-    Instruction::NEG_INT,
-    Instruction::NOT_INT,
     Instruction::NEG_LONG,
     Instruction::NOT_LONG,
-    Instruction::NEG_FLOAT,
     Instruction::NEG_DOUBLE,
     Instruction::INT_TO_LONG,
     Instruction::INT_TO_FLOAT,
@@ -277,20 +350,6 @@
     Instruction::DOUBLE_TO_INT,
     Instruction::DOUBLE_TO_LONG,
     Instruction::DOUBLE_TO_FLOAT,
-    Instruction::INT_TO_BYTE,
-    Instruction::INT_TO_CHAR,
-    Instruction::INT_TO_SHORT,
-    Instruction::ADD_INT,
-    Instruction::SUB_INT,
-    Instruction::MUL_INT,
-    Instruction::DIV_INT,
-    Instruction::REM_INT,
-    Instruction::AND_INT,
-    Instruction::OR_INT,
-    Instruction::XOR_INT,
-    Instruction::SHL_INT,
-    Instruction::SHR_INT,
-    Instruction::USHR_INT,
     Instruction::ADD_LONG,
     Instruction::SUB_LONG,
     Instruction::MUL_LONG,
@@ -302,27 +361,12 @@
     Instruction::SHL_LONG,
     Instruction::SHR_LONG,
     Instruction::USHR_LONG,
-    Instruction::ADD_FLOAT,
-    Instruction::SUB_FLOAT,
-    Instruction::MUL_FLOAT,
-    Instruction::DIV_FLOAT,
     // Instruction::REM_FLOAT,
     Instruction::ADD_DOUBLE,
     Instruction::SUB_DOUBLE,
     Instruction::MUL_DOUBLE,
     Instruction::DIV_DOUBLE,
     // Instruction::REM_DOUBLE,
-    Instruction::ADD_INT_2ADDR,
-    Instruction::SUB_INT_2ADDR,
-    Instruction::MUL_INT_2ADDR,
-    Instruction::DIV_INT_2ADDR,
-    Instruction::REM_INT_2ADDR,
-    Instruction::AND_INT_2ADDR,
-    Instruction::OR_INT_2ADDR,
-    Instruction::XOR_INT_2ADDR,
-    Instruction::SHL_INT_2ADDR,
-    Instruction::SHR_INT_2ADDR,
-    Instruction::USHR_INT_2ADDR,
     Instruction::ADD_LONG_2ADDR,
     Instruction::SUB_LONG_2ADDR,
     Instruction::MUL_LONG_2ADDR,
@@ -334,35 +378,12 @@
     Instruction::SHL_LONG_2ADDR,
     Instruction::SHR_LONG_2ADDR,
     Instruction::USHR_LONG_2ADDR,
-    Instruction::ADD_FLOAT_2ADDR,
-    Instruction::SUB_FLOAT_2ADDR,
-    Instruction::MUL_FLOAT_2ADDR,
-    Instruction::DIV_FLOAT_2ADDR,
     // Instruction::REM_FLOAT_2ADDR,
     Instruction::ADD_DOUBLE_2ADDR,
     Instruction::SUB_DOUBLE_2ADDR,
     Instruction::MUL_DOUBLE_2ADDR,
     Instruction::DIV_DOUBLE_2ADDR,
     // Instruction::REM_DOUBLE_2ADDR,
-    Instruction::ADD_INT_LIT16,
-    Instruction::RSUB_INT,
-    Instruction::MUL_INT_LIT16,
-    Instruction::DIV_INT_LIT16,
-    Instruction::REM_INT_LIT16,
-    Instruction::AND_INT_LIT16,
-    Instruction::OR_INT_LIT16,
-    Instruction::XOR_INT_LIT16,
-    Instruction::ADD_INT_LIT8,
-    Instruction::RSUB_INT_LIT8,
-    Instruction::MUL_INT_LIT8,
-    Instruction::DIV_INT_LIT8,
-    Instruction::REM_INT_LIT8,
-    Instruction::AND_INT_LIT8,
-    Instruction::OR_INT_LIT8,
-    Instruction::XOR_INT_LIT8,
-    Instruction::SHL_INT_LIT8,
-    Instruction::SHR_INT_LIT8,
-    Instruction::USHR_INT_LIT8,
     // Instruction::IGET_QUICK,
     // Instruction::IGET_WIDE_QUICK,
     // Instruction::IGET_OBJECT_QUICK,
@@ -392,24 +413,7 @@
     // Instruction::UNUSED_FD,
     // Instruction::UNUSED_FE,
     // Instruction::UNUSED_FF,
-
-    // TODO(Arm64): Enable compiler pass
-    // ----- ExtendedMIROpcode -----
-    kMirOpPhi,
-    kMirOpCopy,
-    kMirOpFusedCmplFloat,
-    kMirOpFusedCmpgFloat,
-    kMirOpFusedCmplDouble,
-    kMirOpFusedCmpgDouble,
-    kMirOpFusedCmpLong,
-    kMirOpNop,
-    kMirOpNullCheck,
-    kMirOpRangeCheck,
-    kMirOpDivZeroCheck,
-    kMirOpCheck,
-    kMirOpCheckPart2,
-    kMirOpSelect,
-    // kMirOpLast,
+#endif /* ARM64_USE_EXPERIMENTAL_OPCODES */
 };
 
 // TODO: Remove this when we are able to compile everything.
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index db28f3a..3ef1dbf 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1072,19 +1072,21 @@
     }
   }
 
-  // Remove the BB information and also find the after_list
+  // Remove the BB information and also find the after_list.
   for (MIR* mir = first_list_mir; mir != last_list_mir; mir = mir->next) {
     mir->bb = NullBasicBlockId;
   }
 
   after_list = last_list_mir->next;
 
-  // If there is nothing before the list, after_list is the first_mir
+  // If there is nothing before the list, after_list is the first_mir.
   if (before_list == nullptr) {
     first_mir_insn = after_list;
+  } else {
+    before_list->next = after_list;
   }
 
-  // If there is nothing after the list, before_list is last_mir
+  // If there is nothing after the list, before_list is last_mir.
   if (after_list == nullptr) {
     last_mir_insn = before_list;
   }
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5d74b8d..9f9e618 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -313,11 +313,11 @@
 
 void ArmMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rARM_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index f0a9ca4..9c801a5 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index dde8ff0..e06d814 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -51,7 +51,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -94,7 +94,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 2556788..769122d 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -206,13 +206,16 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
   if (mir->ssa_rep->num_uses == 1) {
     // CONST case
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     // Change kCondNe to kCondEq for the special cases below.
     if (ccode == kCondNe) {
       ccode = kCondEq;
@@ -239,8 +242,8 @@
       OpEndIT(it);  // Add a scheduling barrier to keep the IT shadow intact
     } else {
       // Unlikely case - could be tuned.
-      RegStorage t_reg1 = AllocTemp();
-      RegStorage t_reg2 = AllocTemp();
+      RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class);
+      RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
       LoadConstant(t_reg1, true_val);
       LoadConstant(t_reg2, false_val);
       OpRegImm(kOpCmp, rl_src.reg, 0);
@@ -253,9 +256,9 @@
     // MOVE case
     RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
     RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
     OpRegImm(kOpCmp, rl_src.reg, 0);
     LIR* it = nullptr;
     if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {  // Is the "true" case already in place?
@@ -814,10 +817,10 @@
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -840,7 +843,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -1047,7 +1050,7 @@
       ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul);
       FlushAllRegs();
       CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false);
-      rl_result = GetReturnWide(false);
+      rl_result = GetReturnWide(kCoreReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     }
@@ -1126,7 +1129,7 @@
     if (reg_status != 0) {
       // We had manually allocated registers for rl_result.
       // Now construct a RegLocation.
-      rl_result = GetReturnWide(false);  // Just using as a template.
+      rl_result = GetReturnWide(kCoreReg);  // Just using as a template.
       rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi);
     }
 
@@ -1168,7 +1171,7 @@
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1203,7 +1206,7 @@
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -1229,7 +1232,7 @@
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -1267,7 +1270,7 @@
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -1281,7 +1284,7 @@
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 309f676..a50b90a 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -66,6 +66,10 @@
   return arm_loc_c_return;
 }
 
+RegLocation ArmMir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation ArmMir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 2e3ef86..d0f8e74 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -287,9 +287,9 @@
 
 void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<8>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg);
-  Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg);
+  StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 16bb701..6251f4f 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 882ee66..acc7d17 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -47,7 +47,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -90,7 +90,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index d9428f9..7ebb496 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -88,14 +88,16 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode);
 
   RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
   RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
-  rl_true = LoadValue(rl_true, kCoreReg);
-  rl_false = LoadValue(rl_false, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  rl_true = LoadValue(rl_true, result_reg_class);
+  rl_false = LoadValue(rl_false, result_reg_class);
+  rl_result = EvalLoc(rl_dest, result_reg_class, true);
   OpRegImm(kOpCmp, rl_src.reg, 0);
   NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(),
           rl_false.reg.GetReg(), code);
@@ -501,10 +503,10 @@
   // Release store semantics, get the barrier out of the way.  TODO: revisit
   GenMemBarrier(kStoreLoad);
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    rl_new_value = LoadValue(rl_src_new_value);
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -527,7 +529,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected, kCoreReg);
+    rl_expected = LoadValue(rl_src_expected);
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
@@ -769,7 +771,7 @@
   int data_offset;
   RegLocation rl_result;
   bool constant_index = rl_index.is_const;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -804,7 +806,7 @@
       reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
     } else {
       // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTemp();
+      reg_ptr = AllocTempRef();
       OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale));
       FreeTemp(rl_index.reg);
     }
@@ -830,7 +832,7 @@
     }
   } else {
     // Offset base, then use indexed load
-    RegStorage reg_ptr = AllocTemp();
+    RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
     rl_result = EvalLoc(rl_dest, reg_class, true);
@@ -871,7 +873,7 @@
     data_offset += mir_graph_->ConstantValue(rl_index) << scale;
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
   }
@@ -885,7 +887,7 @@
     reg_ptr = rl_array.reg;
   } else {
     allocated_reg_ptr_temp = true;
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
   }
 
   /* null object? */
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 598d05b..7539392 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -96,6 +96,10 @@
   return arm_loc_c_return;
 }
 
+RegLocation Arm64Mir2Lir::LocCReturnRef() {
+  return arm_loc_c_return;
+}
+
 RegLocation Arm64Mir2Lir::LocCReturnWide() {
   return arm_loc_c_return_wide;
 }
@@ -572,7 +576,7 @@
   if (UNLIKELY(is_volatile)) {
     // On arm64, fp register load/store is atomic only for single bytes.
     if (size != kSignedByte && size != kUnsignedByte) {
-      return kCoreReg;
+      return (size == kReference) ? kRefReg : kCoreReg;
     }
   }
   return RegClassBySize(size);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 4f2a876..62c81d0 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -220,6 +220,8 @@
 void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                                   RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through) {
+  DCHECK(!rl_src1.fp);
+  DCHECK(!rl_src2.fp);
   ConditionCode cond;
   switch (opcode) {
     case Instruction::IF_EQ:
@@ -253,7 +255,7 @@
     cond = FlipComparisonOrder(cond);
   }
 
-  rl_src1 = LoadValue(rl_src1, kCoreReg);
+  rl_src1 = LoadValue(rl_src1);
   // Is this really an immediate comparison?
   if (rl_src2.is_const) {
     // If it's already live in a register or not easily materialized, just keep going
@@ -265,14 +267,15 @@
       return;
     }
   }
-  rl_src2 = LoadValue(rl_src2, kCoreReg);
+  rl_src2 = LoadValue(rl_src2);
   OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken);
 }
 
 void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken,
                                       LIR* fall_through) {
   ConditionCode cond;
-  rl_src = LoadValue(rl_src, kCoreReg);
+  DCHECK(!rl_src.fp);
+  rl_src = LoadValue(rl_src);
   switch (opcode) {
     case Instruction::IF_EQZ:
       cond = kCondEq;
@@ -371,7 +374,7 @@
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -503,7 +506,7 @@
     }
   }
   if (info->result.location != kLocInvalid) {
-    StoreValue(info->result, GetReturn(false /* not fp */));
+    StoreValue(info->result, GetReturn(kRefReg));
   }
 }
 
@@ -563,7 +566,7 @@
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method = LoadCurrMethod();
-      r_base = AllocTempWord();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
       if (IsTemp(rl_method.reg)) {
         FreeTemp(rl_method.reg);
@@ -603,6 +606,8 @@
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadLoad);
       }
       FreeTemp(r_method);
     }
@@ -658,7 +663,7 @@
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
       RegLocation rl_method  = LoadCurrMethod();
-      r_base = AllocTempWord();
+      r_base = AllocTempRef();
       LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
@@ -694,6 +699,8 @@
                                                      field_info.StorageIndex(), r_base));
 
         FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadLoad);
       }
       FreeTemp(r_method);
     }
@@ -726,10 +733,10 @@
       GenSgetCall<4>(this, is_long_or_double, is_object, &field_info);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -766,7 +773,7 @@
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     GenNullCheck(rl_obj.reg, opt_flags);
     RegLocation rl_result = EvalLoc(rl_dest, reg_class, true);
     int field_offset = field_info.FieldOffset().Int32Value();
@@ -793,10 +800,10 @@
       GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj);
     }
     if (is_long_or_double) {
-      RegLocation rl_result = GetReturnWide(rl_dest.fp);
+      RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest));
       StoreValueWide(rl_dest, rl_result);
     } else {
-      RegLocation rl_result = GetReturn(rl_dest.fp);
+      RegLocation rl_result = GetReturn(LocToRegClass(rl_dest));
       StoreValue(rl_dest, rl_result);
     }
   }
@@ -824,7 +831,7 @@
       (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) {
     RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile());
     DCHECK_GE(field_info.FieldOffset().Int32Value(), 0);
-    rl_obj = LoadValue(rl_obj, kCoreReg);
+    rl_obj = LoadValue(rl_obj, kRefReg);
     if (is_long_or_double) {
       rl_src = LoadValueWide(rl_src, reg_class);
     } else {
@@ -881,7 +888,7 @@
 void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
   RegLocation rl_method = LoadCurrMethod();
   RegStorage res_reg = AllocTemp();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
                                                    *cu_->dex_file,
                                                    type_idx)) {
@@ -894,15 +901,15 @@
       CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
                               type_idx, rl_method.reg, true);
     }
-    RegLocation rl_result = GetReturn(false);
+    RegLocation rl_result = GetReturn(kRefReg);
     StoreValue(rl_dest, rl_result);
   } else {
     // We're don't need access checks, load type from dex cache
     int32_t dex_cache_offset =
         mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
-    Load32Disp(rl_method.reg, dex_cache_offset, res_reg);
+    LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg);
     int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
-    Load32Disp(res_reg, offset_of_type, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_type, rl_result.reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
@@ -976,7 +983,7 @@
                 TargetReg(kArg0));
 
     // Might call out to helper, which will return resolved string in kRet0
-    Load32Disp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
+    LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
     LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
     LIR* cont = NewLIR0(kPseudoTargetLabel);
 
@@ -1010,13 +1017,13 @@
     }
 
     GenBarrier();
-    StoreValue(rl_dest, GetReturn(false));
+    StoreValue(rl_dest, GetReturn(kRefReg));
   } else {
     RegLocation rl_method = LoadCurrMethod();
-    RegStorage res_reg = AllocTemp();
-    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    RegStorage res_reg = AllocTempRef();
+    RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
     LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg);
-    Load32Disp(res_reg, offset_of_string, rl_result.reg);
+    LoadRefDisp(res_reg, offset_of_string, rl_result.reg);
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1071,7 +1078,7 @@
     func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectWithAccessCheck);
     mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  RegLocation rl_result = mir_to_lir->GetReturn(false);
+  RegLocation rl_result = mir_to_lir->GetReturn(kRefReg);
   mir_to_lir->StoreValue(rl_dest, rl_result);
 }
 
@@ -1103,7 +1110,7 @@
   // X86 has its own implementation.
   DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
 
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
   if (result_reg == object.reg) {
@@ -1112,8 +1119,8 @@
   LoadConstant(result_reg, 0);     // assume false
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
-  RegStorage object_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
+  RegStorage object_class = AllocTypedTemp(false, kRefReg);
 
   LoadCurrMethodDirect(check_class);
   if (use_declaring_class) {
@@ -1206,7 +1213,7 @@
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
   if (cu_->instruction_set == kMips) {
     // On MIPS rArg0 != rl_result, place false in result if branch is taken.
     LoadConstant(rl_result.reg, 0);
@@ -1511,7 +1518,7 @@
   } else {
     GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift);
   }
-  RegLocation rl_result = GetReturnWide(false);
+  RegLocation rl_result = GetReturnWide(kCoreReg);
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1653,7 +1660,7 @@
         CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */);
       }
       if (op == kOpDiv)
-        rl_result = GetReturn(false);
+        rl_result = GetReturn(kCoreReg);
       else
         rl_result = GetReturnAlt();
     }
@@ -1918,7 +1925,7 @@
                                   false);
         }
         if (is_div)
-          rl_result = GetReturn(false);
+          rl_result = GetReturn(kCoreReg);
         else
           rl_result = GetReturnAlt();
       }
@@ -2081,7 +2088,7 @@
     }
     // Adjust return regs in to handle case of rem returning kArg2/kArg3
     if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg())
-      rl_result = mir_to_lir->GetReturnWide(false);
+      rl_result = mir_to_lir->GetReturnWide(kCoreReg);
     else
       rl_result = mir_to_lir->GetReturnWideAlt();
     mir_to_lir->StoreValueWide(rl_dest, rl_result);
@@ -2119,11 +2126,11 @@
   CallRuntimeHelperRegLocation(func_offset, rl_src, false);
   if (rl_dest.wide) {
     RegLocation rl_result;
-    rl_result = GetReturnWide(rl_dest.fp);
+    rl_result = GetReturnWide(LocToRegClass(rl_dest));
     StoreValueWide(rl_dest, rl_result);
   } else {
     RegLocation rl_result;
-    rl_result = GetReturn(rl_dest.fp);
+    rl_result = GetReturn(LocToRegClass(rl_dest));
     StoreValue(rl_dest, rl_result);
   }
 }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index eef3294..1817fd3 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1156,7 +1156,7 @@
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturn(false);
+    res = GetReturn(LocToRegClass(info->result));
   } else {
     res = info->result;
   }
@@ -1166,7 +1166,7 @@
 RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
-    res = GetReturnWide(false);
+    res = GetReturnWide(kCoreReg);
   } else {
     res = info->result;
   }
@@ -1189,7 +1189,7 @@
 
   RegLocation rl_obj = info->args[0];
   RegLocation rl_idx = info->args[1];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   // X86 wants to avoid putting a constant index into a register.
   if (!((cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64)&& rl_idx.is_const)) {
     rl_idx = LoadValue(rl_idx, kCoreReg);
@@ -1202,7 +1202,7 @@
   RegStorage reg_ptr;
   if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) {
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     if (range_check) {
       reg_max = AllocTemp();
       Load32Disp(rl_obj.reg, count_offset, reg_max);
@@ -1232,9 +1232,9 @@
       }
     }
     reg_off = AllocTemp();
-    reg_ptr = AllocTemp();
+    reg_ptr = AllocTempRef();
     Load32Disp(rl_obj.reg, offset_offset, reg_off);
-    Load32Disp(rl_obj.reg, value_offset, reg_ptr);
+    LoadRefDisp(rl_obj.reg, value_offset, reg_ptr);
   }
   if (rl_idx.is_const) {
     OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg));
@@ -1271,7 +1271,7 @@
   }
   // dst = src.length();
   RegLocation rl_obj = info->args[0];
-  rl_obj = LoadValue(rl_obj, kCoreReg);
+  rl_obj = LoadValue(rl_obj, kRefReg);
   RegLocation rl_dest = InlineTarget(info);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   GenNullCheck(rl_obj.reg, info->opt_flags);
@@ -1477,7 +1477,7 @@
     DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0);
     DCHECK(high_code_point_branch == nullptr);
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1523,7 +1523,7 @@
       OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo));
     }
   }
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
   StoreValue(rl_dest, rl_return);
   return true;
@@ -1575,7 +1575,7 @@
   rl_src_offset = NarrowRegLoc(rl_src_offset);  // ignore high half in info->args[3]
   RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info);  // result reg
 
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_long) {
@@ -1621,7 +1621,7 @@
     // There might have been a store before this volatile one so insert StoreStore barrier.
     GenMemBarrier(kStoreStore);
   }
-  RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
+  RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
   RegLocation rl_value;
   if (is_long) {
@@ -1635,7 +1635,7 @@
       FreeTemp(rl_temp_offset);
     }
   } else {
-    rl_value = LoadValue(rl_src_value, kCoreReg);
+    rl_value = LoadValue(rl_src_value);
     StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32);
   }
 
@@ -1658,7 +1658,7 @@
     if (info->type != kStatic &&
         ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 ||
          (info->opt_flags & MIR_IGNORE_NULL_CHECK) == 0))  {
-      RegLocation rl_obj = LoadValue(info->args[0], kCoreReg);
+      RegLocation rl_obj = LoadValue(info->args[0], kRefReg);
       GenNullCheck(rl_obj.reg);
     }
     return;
@@ -1783,10 +1783,10 @@
   if (info->result.location != kLocInvalid) {
     // We have a following MOVE_RESULT - do it now.
     if (info->result.wide) {
-      RegLocation ret_loc = GetReturnWide(info->result.fp);
+      RegLocation ret_loc = GetReturnWide(LocToRegClass(info->result));
       StoreValueWide(info->result, ret_loc);
     } else {
-      RegLocation ret_loc = GetReturn(info->result.fp);
+      RegLocation ret_loc = GetReturn(LocToRegClass(info->result));
       StoreValue(info->result, ret_loc);
     }
   }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index f5e7e63..2c8b9b9 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -139,6 +139,7 @@
 }
 
 RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) {
+  DCHECK(!rl_src.ref || op_kind == kRefReg);
   rl_src = UpdateLoc(rl_src);
   if (rl_src.location == kLocPhysReg) {
     if (!RegClassMatches(op_kind, rl_src.reg)) {
@@ -162,6 +163,10 @@
   return rl_src;
 }
 
+RegLocation Mir2Lir::LoadValue(RegLocation rl_src) {
+  return LoadValue(rl_src, LocToRegClass(rl_src));
+}
+
 void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
   /*
    * Sanity checking - should never try to store to the same
@@ -366,7 +371,7 @@
 }
 
 RegLocation Mir2Lir::LoadCurrMethod() {
-  return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
+  return LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
 }
 
 RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 3af3715..e1bdb2e 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -261,11 +261,11 @@
 
 void MipsMir2Lir::GenMoveException(RegLocation rl_dest) {
   int ex_offset = Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  RegStorage reset_reg = AllocTemp();
-  Load32Disp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
+  RegStorage reset_reg = AllocTempRef();
+  LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg);
   LoadConstant(reset_reg, 0);
-  Store32Disp(rs_rMIPS_SELF, ex_offset, reset_reg);
+  StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg);
   FreeTemp(reset_reg);
   StoreValue(rl_dest, rl_result);
 }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e462173..ea3c901 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 9fffb2f..4e31477 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -52,7 +52,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -95,7 +95,7 @@
       FlushAllRegs();   // Send everything to home location
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                               false);
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
@@ -204,7 +204,7 @@
   RegStorage r_tgt = LoadHelper(offset);
   // NOTE: not a safepoint
   OpReg(kOpBlx, r_tgt);
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kCoreReg);
   StoreValue(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 55cf434..fcf5f94 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -66,6 +66,10 @@
   return mips_loc_c_return;
 }
 
+RegLocation MipsMir2Lir::LocCReturnRef() {
+  return mips_loc_c_return;
+}
+
 RegLocation MipsMir2Lir::LocCReturnWide() {
   return mips_loc_c_return_wide;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index df56820..9621995 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -23,6 +23,36 @@
 
 namespace art {
 
+RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
+  RegisterClass res;
+  switch (shorty_type) {
+    case 'L':
+      res = kRefReg;
+      break;
+    case 'F':
+      // Expected fallthrough.
+    case 'D':
+      res = kFPReg;
+      break;
+    default:
+      res = kCoreReg;
+  }
+  return res;
+}
+
+RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
+  RegisterClass res;
+  if (loc.fp) {
+    DCHECK(!loc.ref) << "At most, one of ref/fp may be set";
+    res = kFPReg;
+  } else if (loc.ref) {
+    res = kRefReg;
+  } else {
+    res = kCoreReg;
+  }
+  return res;
+}
+
 void Mir2Lir::LockArg(int in_position, bool wide) {
   RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
   RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
@@ -149,15 +179,13 @@
     return false;
   }
 
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
-
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.object_arg);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
+  RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class);
   RegStorage r_result = rl_dest.reg;
   if (!RegClassMatches(reg_class, r_result)) {
     r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class)
@@ -205,7 +233,7 @@
   GenPrintLabel(mir);
   LockArg(data.object_arg);
   LockArg(data.src_arg, wide);
-  RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg);
+  RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
   RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
   RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide);
   if (data.is_volatile) {
@@ -226,13 +254,12 @@
 bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
   const InlineReturnArgData& data = special.d.return_data;
   bool wide = (data.is_wide != 0u);
-  // The inliner doesn't distinguish kDouble or kFloat, use shorty.
-  bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D';
 
   // Point of no return - no aborts after this
   GenPrintLabel(mir);
   LockArg(data.arg, wide);
-  RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float);
+  RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
+  RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
   LoadArgDirect(data.arg, rl_dest);
   return true;
 }
@@ -254,7 +281,7 @@
       break;
     case kInlineOpNonWideConst: {
       successful = true;
-      RegLocation rl_dest = GetReturn(cu_->shorty[0] == 'F');
+      RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
       GenPrintLabel(mir);
       LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
       return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir);
@@ -377,26 +404,30 @@
       }
       break;
 
-    case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
+      DCHECK(rl_src[0].ref);
+      // Intentional fallthrough.
+    case Instruction::RETURN:
       if (!mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValue(GetReturn(cu_->shorty[0] == 'F'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValue(GetReturn(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::RETURN_WIDE:
       if (!mir_graph_->MethodIsLeaf()) {
         GenSuspendTest(opt_flags);
       }
-      StoreValueWide(GetReturnWide(cu_->shorty[0] == 'D'), rl_src[0]);
+      DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0]));
+      StoreValueWide(GetReturnWide(LocToRegClass(rl_src[0])), rl_src[0]);
       break;
 
     case Instruction::MOVE_RESULT_WIDE:
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValueWide(rl_dest, GetReturnWide(rl_dest.fp));
+      StoreValueWide(rl_dest, GetReturnWide(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE_RESULT:
@@ -404,7 +435,7 @@
       if ((opt_flags & MIR_INLINED) != 0) {
         break;  // Nop - combined w/ previous invoke.
       }
-      StoreValue(rl_dest, GetReturn(rl_dest.fp));
+      StoreValue(rl_dest, GetReturn(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE:
@@ -474,7 +505,7 @@
     case Instruction::ARRAY_LENGTH:
       int len_offset;
       len_offset = mirror::Array::LengthOffset().Int32Value();
-      rl_src[0] = LoadValue(rl_src[0], kCoreReg);
+      rl_src[0] = LoadValue(rl_src[0], kRefReg);
       GenNullCheck(rl_src[0].reg, opt_flags);
       rl_result = EvalLoc(rl_dest, kCoreReg, true);
       Load32Disp(rl_src[0].reg, len_offset, rl_result.reg);
@@ -782,7 +813,7 @@
 
     case Instruction::LONG_TO_INT:
       rl_src[0] = UpdateLocWide(rl_src[0]);
-      rl_src[0] = WideToNarrow(rl_src[0]);
+      rl_src[0] = NarrowRegLoc(rl_src[0]);
       StoreValue(rl_dest, rl_src[0]);
       break;
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 8d572ca..1281d45 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -456,6 +456,8 @@
       int next_sp_reg_;
       GrowableArray<RegisterInfo*> dp_regs_;    // Double precision float.
       int next_dp_reg_;
+      GrowableArray<RegisterInfo*>* ref_regs_;  // Points to core_regs_ or core64_regs_
+      int* next_ref_reg_;
 
      private:
       Mir2Lir* const m2l_;
@@ -550,8 +552,12 @@
      * just use our knowledge of type to select the most appropriate register class?
      */
     RegisterClass RegClassBySize(OpSize size) {
-      return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
-              size == kSignedByte) ? kCoreReg : kAnyReg;
+      if (size == kReference) {
+        return kRefReg;
+      } else {
+        return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte ||
+                size == kSignedByte) ? kCoreReg : kAnyReg;
+      }
     }
 
     size_t CodeBufferSizeInBytes() {
@@ -612,6 +618,8 @@
       return current_dalvik_offset_;
     }
 
+    RegisterClass ShortyToRegClass(char shorty_type);
+    RegisterClass LocToRegClass(RegLocation loc);
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -699,7 +707,7 @@
     virtual RegStorage AllocFreeTemp();
     virtual RegStorage AllocTemp();
     virtual RegStorage AllocTempWide();
-    virtual RegStorage AllocTempWord();
+    virtual RegStorage AllocTempRef();
     virtual RegStorage AllocTempSingle();
     virtual RegStorage AllocTempDouble();
     virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class);
@@ -719,7 +727,6 @@
     void NullifyRange(RegStorage reg, int s_reg);
     void MarkDef(RegLocation rl, LIR *start, LIR *finish);
     void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
-    virtual RegLocation WideToNarrow(RegLocation rl);
     void ResetDefLoc(RegLocation rl);
     void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
@@ -764,8 +771,8 @@
     void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
-    RegLocation GetReturnWide(bool is_double);
-    RegLocation GetReturn(bool is_float);
+    RegLocation GetReturnWide(RegisterClass reg_class);
+    RegLocation GetReturn(RegisterClass reg_class);
     RegisterInfo* GetRegInfo(RegStorage reg);
 
     // Shared by all targets - implemented in gen_common.cc.
@@ -973,6 +980,8 @@
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
+    // Same as above, but derive the target register class from the location record.
+    virtual RegLocation LoadValue(RegLocation rl_src);
     // Load Dalvik value with 64-bit memory storage.
     virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
@@ -1122,6 +1131,7 @@
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
+    virtual RegLocation LocCReturnRef() = 0;
     virtual RegLocation LocCReturnDouble() = 0;
     virtual RegLocation LocCReturnFloat() = 0;
     virtual RegLocation LocCReturnWide() = 0;
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 058b89c..2303c62 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -128,6 +128,15 @@
   // Add an entry for InvalidReg with zero'd mask.
   RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0);
   m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg);
+
+  // Existence of core64 registers implies wide references.
+  if (core64_regs_.Size() != 0) {
+    ref_regs_ = &core64_regs_;
+    next_ref_reg_ = &next_core64_reg_;
+  } else {
+    ref_regs_ = &core_regs_;
+    next_ref_reg_ = &next_core_reg_;
+  }
 }
 
 void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) {
@@ -145,6 +154,7 @@
 
 void Mir2Lir::DumpCoreRegPool() {
   DumpRegPool(&reg_pool_->core_regs_);
+  DumpRegPool(&reg_pool_->core64_regs_);
 }
 
 void Mir2Lir::DumpFpRegPool() {
@@ -274,6 +284,7 @@
 
 /* Reserve a callee-save register.  Return InvalidReg if none available */
 RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) {
+  // TODO: 64-bit and refreg update
   RegStorage res;
   GrowableArray<RegisterInfo*>::Iterator it(&reg_pool_->core_regs_);
   for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) {
@@ -406,17 +417,10 @@
   return res;
 }
 
-RegStorage Mir2Lir::AllocTempWord() {
-  // FIXME: temporary workaround.  For bring-up purposes, x86_64 needs the ability
-  // to allocate wide values as a pair of core registers.  However, we can't hold
-  // a reference in a register pair.  This workaround will be removed when the
-  // reference handling code is reworked, or x86_64 backend starts using wide core
-  // registers - whichever happens first.
-  if (cu_->instruction_set == kX86_64) {
-    return AllocTemp();
-  } else {
-    return (Is64BitInstructionSet(cu_->instruction_set)) ? AllocTempWide() : AllocTemp();
-  }
+RegStorage Mir2Lir::AllocTempRef() {
+  RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true);
+  DCHECK(!res.IsPair());
+  return res;
 }
 
 RegStorage Mir2Lir::AllocTempSingle() {
@@ -432,6 +436,7 @@
 }
 
 RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) {
+  DCHECK_NE(reg_class, kRefReg);  // NOTE: the Dalvik width of a reference is always 32 bits.
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempDouble();
   }
@@ -441,6 +446,8 @@
 RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) {
   if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
     return AllocTempSingle();
+  } else if (reg_class == kRefReg) {
+    return AllocTempRef();
   }
   return AllocTemp();
 }
@@ -459,8 +466,10 @@
 
 RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) {
   RegStorage reg;
-  // TODO: might be worth a sanity check here to verify at most 1 live reg per s_reg.
-  if ((reg_class == kAnyReg) || (reg_class == kFPReg)) {
+  if (reg_class == kRefReg) {
+    reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg);
+  }
+  if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) {
     reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg);
   }
   if (!reg.Valid() && (reg_class != kFPReg)) {
@@ -675,39 +684,6 @@
   p->SetDefEnd(finish);
 }
 
-RegLocation Mir2Lir::WideToNarrow(RegLocation rl) {
-  DCHECK(rl.wide);
-  if (rl.location == kLocPhysReg) {
-    if (rl.reg.IsPair()) {
-      RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLow());
-      RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHigh());
-      if (info_lo->IsTemp()) {
-        info_lo->SetIsWide(false);
-        info_lo->ResetDefBody();
-      }
-      if (info_hi->IsTemp()) {
-        info_hi->SetIsWide(false);
-        info_hi->ResetDefBody();
-      }
-      rl.reg = rl.reg.GetLow();
-    } else {
-      /*
-       * TODO: If not a pair, we can't just drop the high register.  On some targets, we may be
-       * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit
-       * this code.  Will probably want to make this a virtual function.
-       */
-      // Can't narrow 64-bit register.  Clobber.
-      if (GetRegInfo(rl.reg)->IsTemp()) {
-        Clobber(rl.reg);
-        FreeTemp(rl.reg);
-      }
-      rl.location = kLocDalvikFrame;
-    }
-  }
-  rl.wide = false;
-  return rl;
-}
-
 void Mir2Lir::ResetDefLoc(RegLocation rl) {
   DCHECK(!rl.wide);
   if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) {
@@ -727,16 +703,8 @@
 }
 
 void Mir2Lir::ResetDefTracking() {
-  GrowableArray<RegisterInfo*>::Iterator core_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = core_it.Next(); info != nullptr; info = core_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator sp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = sp_it.Next(); info != nullptr; info = sp_it.Next()) {
-    info->ResetDefBody();
-  }
-  GrowableArray<RegisterInfo*>::Iterator dp_it(&reg_pool_->core_regs_);
-  for (RegisterInfo* info = dp_it.Next(); info != nullptr; info = dp_it.Next()) {
+  GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_);
+  for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) {
     info->ResetDefBody();
   }
 }
@@ -811,7 +779,11 @@
 bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) {
   if (reg_class == kAnyReg) {
     return true;
-  } else if (reg_class == kCoreReg) {
+  } else if ((reg_class == kCoreReg) || (reg_class == kRefReg)) {
+    /*
+     * For this purpose, consider Core and Ref to be the same class. We aren't dealing
+     * with width here - that should be checked at a higher level (if needed).
+     */
     return !reg.IsFloat();
   } else {
     return reg.IsFloat();
@@ -1347,20 +1319,26 @@
 }
 
 /* Mark register usage state and return long retloc */
-RegLocation Mir2Lir::GetReturnWide(bool is_double) {
-  RegLocation gpr_res = LocCReturnWide();
-  RegLocation fpr_res = LocCReturnDouble();
-  RegLocation res = is_double ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: LOG(FATAL); break;
+    case kFPReg: res = LocCReturnDouble(); break;
+    default: res = LocCReturnWide(); break;
+  }
   Clobber(res.reg);
   LockTemp(res.reg);
   MarkWide(res.reg);
   return res;
 }
 
-RegLocation Mir2Lir::GetReturn(bool is_float) {
-  RegLocation gpr_res = LocCReturn();
-  RegLocation fpr_res = LocCReturnFloat();
-  RegLocation res = is_float ? fpr_res : gpr_res;
+RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) {
+  RegLocation res;
+  switch (reg_class) {
+    case kRefReg: res = LocCReturnRef(); break;
+    case kFPReg: res = LocCReturnFloat(); break;
+    default: res = LocCReturn(); break;
+  }
   Clobber(res.reg);
   if (cu_->instruction_set == kMips) {
     MarkInUse(res.reg);
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4673cc0..f363eb3 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -169,7 +169,7 @@
   int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ?
       Thread::ExceptionOffset<8>().Int32Value() :
       Thread::ExceptionOffset<4>().Int32Value();
-  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
   NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
   NewLIR2(kX86Mov32TI, ex_offset, 0);
   StoreValue(rl_dest, rl_result);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d66790d..648c148 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -59,6 +59,7 @@
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
+    RegLocation LocCReturnRef();
     RegLocation LocCReturnDouble();
     RegLocation LocCReturnFloat();
     RegLocation LocCReturnWide();
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index aec39ab..0421a59 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -56,7 +56,7 @@
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturn(true);
+      rl_result = GetReturn(kFPReg);
       StoreValue(rl_dest, rl_result);
       return;
     case Instruction::NEG_FLOAT:
@@ -118,7 +118,7 @@
         CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
                                                 false);
       }
-      rl_result = GetReturnWide(true);
+      rl_result = GetReturnWide(kFPReg);
       StoreValueWide(rl_dest, rl_result);
       return;
     case Instruction::NEG_DOUBLE:
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 48bff6e..1cc16b9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -173,7 +173,10 @@
   RegLocation rl_result;
   RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
   RegLocation rl_dest = mir_graph_->GetDest(mir);
-  rl_src = LoadValue(rl_src, kCoreReg);
+  // Avoid using float regs here.
+  RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, src_reg_class);
   ConditionCode ccode = mir->meta.ccode;
 
   // The kMirOpSelect has two variants, one for constants and one for moves.
@@ -182,7 +185,7 @@
   if (is_constant_case) {
     int true_val = mir->dalvikInsn.vB;
     int false_val = mir->dalvikInsn.vC;
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -203,6 +206,8 @@
      *     mov t1, $true_case
      *     cmovz result_reg, t1
      */
+    // FIXME: depending on how you use registers you could get a false != mismatch when dealing
+    // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
     const bool result_reg_same_as_src =
         (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg());
     const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
@@ -224,7 +229,7 @@
     if (true_zero_case || false_zero_case || catch_all_case) {
       ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
       int immediateForTemp = true_zero_case ? false_val : true_val;
-      RegStorage temp1_reg = AllocTemp();
+      RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
       OpRegImm(kOpMov, temp1_reg, immediateForTemp);
 
       OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
@@ -234,9 +239,9 @@
   } else {
     RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
     RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
-    rl_true = LoadValue(rl_true, kCoreReg);
-    rl_false = LoadValue(rl_false, kCoreReg);
-    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    rl_true = LoadValue(rl_true, result_reg_class);
+    rl_false = LoadValue(rl_false, result_reg_class);
+    rl_result = EvalLoc(rl_dest, result_reg_class, true);
 
     /*
      * For ccode == kCondEq:
@@ -792,8 +797,8 @@
     Clobber(rs_r0);
     LockTemp(rs_r0);
 
-    RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
-    RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
+    RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
+    RegLocation rl_new_value = LoadValue(rl_src_new_value);
 
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
@@ -1441,7 +1446,7 @@
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   RegLocation rl_result;
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
 
   int data_offset;
   if (size == k64 || size == kDouble) {
@@ -1497,7 +1502,7 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  rl_array = LoadValue(rl_array, kCoreReg);
+  rl_array = LoadValue(rl_array, kRefReg);
   bool constant_index = rl_index.is_const;
   int32_t constant_index_value = 0;
   if (!constant_index) {
@@ -1880,7 +1885,7 @@
 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
                                     RegLocation rl_dest, RegLocation rl_src) {
-  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation object = LoadValue(rl_src, kRefReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
@@ -1894,7 +1899,7 @@
   LoadConstant(result_reg, 0);
   LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
 
-  RegStorage check_class = AllocTypedTemp(false, kCoreReg);
+  RegStorage check_class = AllocTypedTemp(false, kRefReg);
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
@@ -1972,8 +1977,8 @@
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
-        * type_idx);
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+        (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
@@ -1992,7 +1997,7 @@
     }
   }
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
-  RegLocation rl_result = GetReturn(false);
+  RegLocation rl_result = GetReturn(kRefReg);
 
   // SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 8b34168..79081c8 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -177,6 +177,11 @@
   return x86_loc_c_return;
 }
 
+RegLocation X86Mir2Lir::LocCReturnRef() {
+  // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported.
+  return x86_loc_c_return;
+}
+
 RegLocation X86Mir2Lir::LocCReturnWide() {
   return x86_loc_c_return_wide;
 }
@@ -981,7 +986,7 @@
   }
 
   // Okay, we are commited to inlining this.
-  RegLocation rl_return = GetReturn(false);
+  RegLocation rl_return = GetReturn(kCoreReg);
   RegLocation rl_dest = InlineTarget(info);
 
   // Is the string non-NULL?
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 092e68e..618b3a5 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -572,7 +572,7 @@
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val);
         } else {
-          res = LoadConstantNoClobber(RegStorage::Solo32(low_reg_val), val_lo);
+          res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo);
         }
         if (val_hi != 0) {
           RegStorage r_dest_hi = AllocTempDouble();
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 964dfeb..ca956aa 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -173,7 +173,10 @@
   env_->ExceptionClear();
   jclass jlame = env_->FindClass("java/lang/AbstractMethodError");
   EXPECT_TRUE(env_->IsInstanceOf(exception, jlame));
-  Thread::Current()->ClearException();
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    Thread::Current()->ClearException();
+  }
 }
 
 // TODO: need check-cast test (when stub complete & we can throw/catch
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index e37f943..ca1239f 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -650,34 +650,55 @@
       copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
           (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
-      copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
-          (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
       // Use original code if it exists. Otherwise, set the code pointer to the resolution
       // trampoline.
+
+      // Quick entrypoint:
       const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
+      bool quick_is_interpreted = false;
       if (quick_code != nullptr &&
           (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
         // We have code for a non-static or initialized method, just use the code.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
       } else if (quick_code == nullptr && orig->IsNative() &&
           (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
         // Non-static or initialized native method missing compiled code, use generic JNI version.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_generic_jni_trampoline_offset_));
+        quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
       } else if (quick_code == nullptr && !orig->IsNative()) {
         // We don't have code at all for a non-native method, use the interpreter.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_to_interpreter_bridge_offset_));
+        quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+        quick_is_interpreted = true;
       } else {
         CHECK(!orig->GetDeclaringClass()->IsInitialized());
         // We have code for a static method, but need to go through the resolution stub for class
         // initialization.
-        copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_resolution_trampoline_offset_));
+        quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
       }
+      copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
+
+      // Portable entrypoint:
       const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
-      if (portable_code != nullptr) {
-        copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+      bool portable_is_interpreted = false;
+      if (portable_code != nullptr &&
+          (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) {
+        // We have code for a non-static or initialized method, just use the code.
+      } else if (portable_code == nullptr && orig->IsNative() &&
+          (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) {
+        // Non-static or initialized native method missing compiled code, use generic JNI version.
+        // TODO: generic JNI support for LLVM.
+        portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
+      } else if (portable_code == nullptr && !orig->IsNative()) {
+        // We don't have code at all for a non-native method, use the interpreter.
+        portable_code = GetOatAddress(portable_to_interpreter_bridge_offset_);
+        portable_is_interpreted = true;
       } else {
-        copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_resolution_trampoline_offset_));
+        CHECK(!orig->GetDeclaringClass()->IsInitialized());
+        // We have code for a static method, but need to go through the resolution stub for class
+        // initialization.
+        portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
       }
+      copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
+
+      // JNI entrypoint:
       if (orig->IsNative()) {
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
@@ -688,6 +709,15 @@
         const byte* native_gc_map = GetOatAddress(native_gc_map_offset);
         copy->SetNativeGcMap<kVerifyNone>(reinterpret_cast<const uint8_t*>(native_gc_map));
       }
+
+      // Interpreter entrypoint:
+      // Set the interpreter entrypoint depending on whether there is compiled code or not.
+      uint32_t interpreter_code = (quick_is_interpreted && portable_is_interpreted)
+          ? interpreter_to_interpreter_bridge_offset_
+          : interpreter_to_compiled_code_bridge_offset_;
+      copy->SetEntryPointFromInterpreter<kVerifyNone>(
+          reinterpret_cast<EntryPointFromInterpreter*>(
+              const_cast<byte*>(GetOatAddress(interpreter_code))));
     }
   }
 }
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index 25c9b20..f8dca66 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -1868,6 +1868,10 @@
 
   phi->addIncoming(storage_object_addr, block_check_init);
   phi->addIncoming(loaded_storage_object_addr, block_after_load_static);
+
+  // Ensure load of status and load of value don't re-order.
+  irb_.CreateMemoryBarrier(art::kLoadLoad);
+
   return phi;
 }
 
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 53e7bbe..7a33620 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -26,6 +26,18 @@
 
 namespace art {
 
+static void DumpBitVector(BitVector* vector,
+                          std::ostream& buffer,
+                          size_t count,
+                          const char* prefix) {
+  buffer << prefix;
+  buffer << '(';
+  for (size_t i = 0; i < count; ++i) {
+    buffer << vector->IsBitSet(i);
+  }
+  buffer << ")\n";
+}
+
 static void TestCode(const uint16_t* data, const char* expected) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
@@ -43,12 +55,13 @@
   for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     buffer << "Block " << block->GetBlockId() << std::endl;
+    size_t ssa_values = liveness.GetNumberOfSsaValues();
     BitVector* live_in = liveness.GetLiveInSet(*block);
-    live_in->Dump(buffer, "  live in: ");
+    DumpBitVector(live_in, buffer, ssa_values, "  live in: ");
     BitVector* live_out = liveness.GetLiveOutSet(*block);
-    live_out->Dump(buffer, "  live out: ");
+    DumpBitVector(live_out, buffer, ssa_values, "  live out: ");
     BitVector* kill = liveness.GetKillSet(*block);
-    kill->Dump(buffer, "  kill: ");
+    DumpBitVector(kill, buffer, ssa_values, "  kill: ");
   }
   ASSERT_STREQ(expected, buffer.str().c_str());
 }
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 28bf856..69f5957 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -338,62 +338,6 @@
     .cfi_adjust_cfa_offset -304
 .endm
 
-.macro RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME_NO_D0
-
-    ldr d1,   [sp, #24]
-    ldp d2, d3,   [sp, #32]
-    ldp d4, d5,   [sp, #48]
-    ldp d6, d7,   [sp, #64]
-    ldp d8, d9,   [sp, #80]
-    ldp d10, d11, [sp, #96]
-    ldp d12, d13, [sp, #112]
-    ldp d14, d15, [sp, #128]
-
-    // args.
-    ldp x1,  x2, [sp, #144]
-    .cfi_restore x1
-    .cfi_restore x2
-
-    ldp x3,  x4, [sp, #160]
-    .cfi_restore x3
-    .cfi_restore x4
-
-    ldp x5,  x6, [sp, #176]
-    .cfi_restore x5
-    .cfi_restore x6
-
-    ldp x7,  xSELF, [sp, #192]
-    .cfi_restore x7
-    .cfi_restore x18
-
-    ldp x19, x20, [sp, #208]
-    .cfi_restore x19
-    .cfi_restore x20
-
-    ldp x21, x22, [sp, #224]
-    .cfi_restore x21
-    .cfi_restore x22
-
-    ldp x23, x24, [sp, #240]
-    .cfi_restore x23
-    .cfi_restore x24
-
-    ldp x25, x26, [sp, #256]
-    .cfi_restore x25
-    .cfi_restore x26
-
-    ldp x27, x28, [sp, #272]
-    .cfi_restore x27
-    .cfi_restore x28
-
-    ldp xFP, xLR, [sp, #288]
-    .cfi_restore x29
-    .cfi_restore x30
-
-    add sp, sp, #304
-    .cfi_adjust_cfa_offset -304
-.endm
-
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz x0, 1f                // result non-zero branch over
     ret                        // return
@@ -1414,7 +1358,8 @@
     ldr     xSELF, [sp, #200]           // Restore self pointer.
     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
-    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME_NO_D0 // keep d0
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
+    fmov    d0, x0                      // Store result in d0 in case it was float or double
     ret                                 // return on success
 .Lexception_in_proxy:
     RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index b311ea5..07268ea 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1196,7 +1196,7 @@
     addl  LITERAL(28), %esp       // Pop arguments upto saved Method*.
     movl 28(%esp), %edi           // Restore edi.
     movl %eax, 28(%esp)           // Place code* over edi, just under return pc.
-    movl LITERAL(PLT_SYMBOL(art_quick_instrumentation_exit)), 32(%esp)
+    movl LITERAL(SYMBOL(art_quick_instrumentation_exit)), 32(%esp)
                                   // Place instrumentation exit as return pc.
     movl (%esp), %eax             // Restore eax.
     movl 8(%esp), %ecx            // Restore ecx.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 48c33d5..0d9d388 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -791,14 +791,14 @@
 
 
 DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
-    movl ARRAY_LENGTH_OFFSET(%edi), %ebx
-//  movl ARRAY_LENGTH_OFFSET(%rdi), %ebx      // This zero-extends, so value(%rbx)=value(%ebx)
-    cmpl %ebx, %esi
+    movl ARRAY_LENGTH_OFFSET(%edi), %ecx
+//  movl ARRAY_LENGTH_OFFSET(%rdi), %ecx      // This zero-extends, so value(%rcx)=value(%ecx)
+    cmpl %ecx, %esi
     jb art_quick_aput_obj_local
     mov %esi, %edi
 //  mov %rsi, %rdi
-    mov %ebx, %esi
-//  mov %rbx, %rsi
+    mov %ecx, %esi
+//  mov %rcx, %rsi
     jmp art_quick_throw_array_bounds_local
 END_FUNCTION art_quick_aput_obj_with_bound_check
 
@@ -807,12 +807,12 @@
     testl %edx, %edx                // store of null
 //  test %rdx, %rdx
     jz .Ldo_aput_null
-    movl CLASS_OFFSET(%edi), %ebx
-//  movq CLASS_OFFSET(%rdi), %rbx
-    movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
-//  movq CLASS_COMPONENT_TYPE_OFFSET(%rbx), %rbx
-    cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
-//  cmpq CLASS_OFFSET(%rdx), %rbx
+    movl CLASS_OFFSET(%edi), %ecx
+//  movq CLASS_OFFSET(%rdi), %rcx
+    movl CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
+//  movq CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
+    cmpl CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
+//  cmpq CLASS_OFFSET(%rdx), %rcx
     jne .Lcheck_assignability
 .Ldo_aput:
     movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
@@ -836,7 +836,7 @@
 
                                   // "Uncompress" = do nothing, as already zero-extended on load.
     movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
-    movq %rbx, %rdi               // Pass arg1 = array's component type.
+    movq %rcx, %rdi               // Pass arg1 = array's component type.
 
     call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
 
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 0053389..1b9022e 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -43,8 +43,7 @@
   : allocator_(allocator),
     expandable_(expandable),
     storage_size_(storage_size),
-    storage_(storage),
-    number_of_bits_(start_bits) {
+    storage_(storage) {
   COMPILE_ASSERT(sizeof(*storage_) == kWordBytes, check_word_bytes);
   COMPILE_ASSERT(sizeof(*storage_) * 8u == kWordBits, check_word_bits);
   if (storage_ == nullptr) {
@@ -95,7 +94,6 @@
     // TOTO: collect stats on space wasted because of resize.
     storage_ = new_storage;
     storage_size_ = new_size;
-    number_of_bits_ = num;
   }
 
   storage_[num >> 5] |= check_masks[num & 0x1f];
@@ -434,7 +432,7 @@
     buffer << prefix;
   }
 
-  for (size_t i = 0; i < number_of_bits_; i++) {
+  for (size_t i = 0; i < storage_size_ * kWordBits; i++) {
     if (IsBitSet(i)) {
       buffer << i << " ";
     }
@@ -448,7 +446,7 @@
   }
 
   buffer << '(';
-  for (size_t i = 0; i < number_of_bits_; i++) {
+  for (size_t i = 0; i < storage_size_ * kWordBits; i++) {
     buffer << IsBitSet(i);
   }
   buffer << ')';
diff --git a/runtime/base/bit_vector.h b/runtime/base/bit_vector.h
index 8f9afff..fb1646f 100644
--- a/runtime/base/bit_vector.h
+++ b/runtime/base/bit_vector.h
@@ -247,7 +247,6 @@
     const bool expandable_;         // expand bitmap if we run out?
     uint32_t   storage_size_;       // current size, in 32-bit words.
     uint32_t*  storage_;
-    uint32_t number_of_bits_;
 };
 
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 39b2ec2..320273d 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -148,7 +148,9 @@
   soa.Self()->AssertThreadSuspensionIsAllowable();
   jobjectArray args_jobj = NULL;
   const JValue zero;
-  if (args.size() > 0) {
+  int32_t target_sdk_version = Runtime::Current()->GetTargetSdkVersion();
+  // Do not create empty arrays unless needed to maintain Dalvik bug compatibility.
+  if (args.size() > 0 || (target_sdk_version > 0 && target_sdk_version <= 21)) {
     args_jobj = soa.Env()->NewObjectArray(args.size(), WellKnownClasses::java_lang_Object, NULL);
     if (args_jobj == NULL) {
       CHECK(soa.Self()->IsExceptionPending());
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index f1284db..19ee1ff 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -669,7 +669,8 @@
   }
 
   static void ExceptionClear(JNIEnv* env) {
-    static_cast<JNIEnvExt*>(env)->self->ClearException();
+    ScopedObjectAccess soa(env);
+    soa.Self()->ClearException();
   }
 
   static void ExceptionDescribe(JNIEnv* env) {
@@ -1932,32 +1933,66 @@
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
     mirror::CharArray* chars = s->GetCharArray();
     PinPrimitiveArray(soa, chars);
-    if (is_copy != nullptr) {
-      *is_copy = JNI_TRUE;
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    if (heap->IsMovableObject(chars)) {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_TRUE;
+      }
+      int32_t char_count = s->GetLength();
+      int32_t offset = s->GetOffset();
+      jchar* bytes = new jchar[char_count];
+      for (int32_t i = 0; i < char_count; i++) {
+        bytes[i] = chars->Get(i + offset);
+      }
+      return bytes;
+    } else {
+      if (is_copy != nullptr) {
+        *is_copy = JNI_FALSE;
+      }
+      return static_cast<jchar*>(chars->GetData() + s->GetOffset());
     }
-    int32_t char_count = s->GetLength();
-    int32_t offset = s->GetOffset();
-    jchar* bytes = new jchar[char_count + 1];
-    for (int32_t i = 0; i < char_count; i++) {
-      bytes[i] = chars->Get(i + offset);
-    }
-    bytes[char_count] = '\0';
-    return bytes;
   }
 
   static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar* chars) {
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
-    delete[] chars;
     ScopedObjectAccess soa(env);
-    UnpinPrimitiveArray(soa, soa.Decode<mirror::String*>(java_string)->GetCharArray());
+    mirror::String* s = soa.Decode<mirror::String*>(java_string);
+    mirror::CharArray* s_chars = s->GetCharArray();
+    if (chars != (s_chars->GetData() + s->GetOffset())) {
+      delete[] chars;
+    }
+    UnpinPrimitiveArray(soa, s->GetCharArray());
   }
 
   static const jchar* GetStringCritical(JNIEnv* env, jstring java_string, jboolean* is_copy) {
-    return GetStringChars(env, java_string, is_copy);
+    CHECK_NON_NULL_ARGUMENT(java_string);
+    ScopedObjectAccess soa(env);
+    mirror::String* s = soa.Decode<mirror::String*>(java_string);
+    mirror::CharArray* chars = s->GetCharArray();
+    int32_t offset = s->GetOffset();
+    PinPrimitiveArray(soa, chars);
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    if (heap->IsMovableObject(chars)) {
+      StackHandleScope<1> hs(soa.Self());
+      HandleWrapper<mirror::CharArray> h(hs.NewHandleWrapper(&chars));
+      heap->IncrementDisableMovingGC(soa.Self());
+    }
+    if (is_copy != nullptr) {
+      *is_copy = JNI_FALSE;
+    }
+    return static_cast<jchar*>(chars->GetData() + offset);
   }
 
   static void ReleaseStringCritical(JNIEnv* env, jstring java_string, const jchar* chars) {
-    return ReleaseStringChars(env, java_string, chars);
+    CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
+    ScopedObjectAccess soa(env);
+    UnpinPrimitiveArray(soa, soa.Decode<mirror::String*>(java_string)->GetCharArray());
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    mirror::String* s = soa.Decode<mirror::String*>(java_string);
+    mirror::CharArray* s_chars = s->GetCharArray();
+    if (heap->IsMovableObject(s_chars)) {
+      heap->DecrementDisableMovingGC(soa.Self());
+    }
   }
 
   static const char* GetStringUTFChars(JNIEnv* env, jstring java_string, jboolean* is_copy) {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 3429827..f182e95 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -18,6 +18,7 @@
 
 #include "common_compiler_test.h"
 #include "mirror/art_method-inl.h"
+#include "mirror/string-inl.h"
 #include "ScopedLocalRef.h"
 
 namespace art {
@@ -1071,6 +1072,8 @@
 
 TEST_F(JniInternalTest, GetStringChars_ReleaseStringChars) {
   jstring s = env_->NewStringUTF("hello");
+  ScopedObjectAccess soa(env_);
+  mirror::String* s_m = soa.Decode<mirror::String*>(s);
   ASSERT_TRUE(s != nullptr);
 
   jchar expected[] = { 'h', 'e', 'l', 'l', 'o' };
@@ -1084,7 +1087,11 @@
 
   jboolean is_copy = JNI_FALSE;
   chars = env_->GetStringChars(s, &is_copy);
-  EXPECT_EQ(JNI_TRUE, is_copy);
+  if (Runtime::Current()->GetHeap()->IsMovableObject(s_m->GetCharArray())) {
+    EXPECT_EQ(JNI_TRUE, is_copy);
+  } else {
+    EXPECT_EQ(JNI_FALSE, is_copy);
+  }
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
@@ -1106,10 +1113,9 @@
   EXPECT_EQ(expected[4], chars[4]);
   env_->ReleaseStringCritical(s, chars);
 
-  jboolean is_copy = JNI_FALSE;
+  jboolean is_copy = JNI_TRUE;
   chars = env_->GetStringCritical(s, &is_copy);
-  // TODO: Fix GetStringCritical to use the same mechanism as GetPrimitiveArrayElementsCritical.
-  EXPECT_EQ(JNI_TRUE, is_copy);
+  EXPECT_EQ(JNI_FALSE, is_copy);
   EXPECT_EQ(expected[0], chars[0]);
   EXPECT_EQ(expected[1], chars[1]);
   EXPECT_EQ(expected[2], chars[2]);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 4869b45..4b02c0f 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -105,11 +105,11 @@
 
     self->SetException(gc_safe_throw_location, old_exception.Get());
   }
-  CHECK(sizeof(Status) == sizeof(uint32_t)) << PrettyClass(this);
+  COMPILE_ASSERT(sizeof(Status) == sizeof(uint32_t), size_of_status_not_uint32);
   if (Runtime::Current()->IsActiveTransaction()) {
-    SetField32<true>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
+    SetField32Volatile<true>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
   } else {
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
+    SetField32Volatile<false>(OFFSET_OF_OBJECT_MEMBER(Class, status_), new_status);
   }
   // Classes that are being resolved or initialized need to notify waiters that the class status
   // changed. See ClassLinker::EnsureResolved and ClassLinker::WaitForInitializeClass.
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 40c9975..90381a7 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -125,7 +125,8 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   Status GetStatus() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     COMPILE_ASSERT(sizeof(Status) == sizeof(uint32_t), size_of_status_not_uint32);
-    return static_cast<Status>(GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, status_)));
+    return static_cast<Status>(
+        GetField32Volatile<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, status_)));
   }
 
   void SetStatus(Status new_status, Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index e5cc671..a369365 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -171,13 +171,12 @@
   return Runtime::Current()->GetJavaVM()->check_jni ? JNI_TRUE : JNI_FALSE;
 }
 
-static void VMRuntime_setTargetSdkVersionNative(JNIEnv* env, jobject, jint targetSdkVersion) {
+static void VMRuntime_setTargetSdkVersionNative(JNIEnv*, jobject, jint target_sdk_version) {
   // This is the target SDK version of the app we're about to run. It is intended that this a place
   // where workarounds can be enabled.
   // Note that targetSdkVersion may be CUR_DEVELOPMENT (10000).
   // Note that targetSdkVersion may be 0, meaning "current".
-  UNUSED(env);
-  UNUSED(targetSdkVersion);
+  Runtime::Current()->SetTargetSdkVersion(target_sdk_version);
 }
 
 static void VMRuntime_registerNativeAllocation(JNIEnv* env, jobject, jint bytes) {
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 9c14a4f..6ab0d1e 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '0', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '2', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index db2a61b..72a868e 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -264,7 +264,7 @@
 #ifdef HAVE_ANDROID_OS
   {
     char buf[PROP_VALUE_MAX];
-    property_get("dalvik.vm.implicit_checks", buf, "none");
+    property_get("dalvik.vm.implicit_checks", buf, "null,stack");
     std::string checks(buf);
     std::vector<std::string> checkvec;
     Split(checks, ',', checkvec);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 361070c..68b10cc 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -141,7 +141,8 @@
       null_pointer_handler_(nullptr),
       suspend_handler_(nullptr),
       stack_overflow_handler_(nullptr),
-      verify_(false) {
+      verify_(false),
+      target_sdk_version_(0) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     callee_save_methods_[i] = nullptr;
   }
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 261429e..afb5aa7 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -441,6 +441,14 @@
     return running_on_valgrind_;
   }
 
+  void SetTargetSdkVersion(int32_t version) {
+    target_sdk_version_ = version;
+  }
+
+  int32_t GetTargetSdkVersion() const {
+    return target_sdk_version_;
+  }
+
   static const char* GetDefaultInstructionSetFeatures() {
     return kDefaultInstructionSetFeatures;
   }
@@ -588,6 +596,9 @@
   // If false, verification is disabled. True by default.
   bool verify_;
 
+  // Specifies target SDK version to allow workarounds for certain API levels.
+  int32_t target_sdk_version_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 88b4b0d..6569a96 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -326,7 +326,7 @@
     tlsPtr_.throw_location = throw_location;
   }
 
-  void ClearException() {
+  void ClearException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     tlsPtr_.exception = nullptr;
     tlsPtr_.throw_location.Clear();
   }
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index d4ce71f..1573297 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -270,6 +270,20 @@
           }
         }
 
+        if (method.getDeclaringClass() == Trace.class) {
+          if (method.getName().equals("getTrace")) {
+            StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace();
+            for (int i = 0; i < stackTrace.length; i++) {
+                StackTraceElement ste = stackTrace[i];
+                if (ste.getMethodName().equals("getTrace")) {
+                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                                     ste.getFileName() + ":" + ste.getLineNumber());
+                }
+            }
+            return null;
+          }
+        }
+
         System.out.println("Invoke " + method);
         if (args == null || args.length == 0) {
             System.out.println(" (no args)");