Merge "Rewrite use/def masks to support 128 bits."
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index d544397..8218cf1 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -147,6 +147,7 @@
     Instruction::RETURN_VOID,
     Instruction::RETURN,
     Instruction::RETURN_WIDE,
+    Instruction::RETURN_OBJECT,
     Instruction::CONST_4,
     Instruction::CONST_16,
     Instruction::CONST,
@@ -226,6 +227,39 @@
     Instruction::SHL_INT_LIT8,
     Instruction::SHR_INT_LIT8,
     Instruction::USHR_INT_LIT8,
+    Instruction::SGET,
+    Instruction::SGET_BOOLEAN,
+    Instruction::SGET_BYTE,
+    Instruction::SGET_CHAR,
+    Instruction::SGET_SHORT,
+    Instruction::SGET_OBJECT,
+    Instruction::SPUT,
+    Instruction::SPUT_OBJECT,
+    Instruction::SPUT_BOOLEAN,
+    Instruction::SPUT_BYTE,
+    Instruction::SPUT_CHAR,
+    Instruction::SPUT_SHORT,
+    Instruction::MOVE_WIDE,
+    Instruction::MOVE_WIDE_FROM16,
+    Instruction::MOVE_WIDE_16,
+    Instruction::MOVE_OBJECT,
+    Instruction::MOVE_OBJECT_FROM16,
+    Instruction::MOVE_OBJECT_16,
+    Instruction::CMPL_FLOAT,
+    Instruction::CMPG_FLOAT,
+    Instruction::IGET,
+    Instruction::IGET_OBJECT,
+    Instruction::IGET_BOOLEAN,
+    Instruction::IGET_BYTE,
+    Instruction::IGET_CHAR,
+    Instruction::IGET_SHORT,
+    Instruction::IPUT,
+    Instruction::IPUT_OBJECT,
+    Instruction::IPUT_BOOLEAN,
+    Instruction::IPUT_BYTE,
+    Instruction::IPUT_CHAR,
+    Instruction::IPUT_SHORT,
+
     // TODO(Arm64): Enable compiler pass
     // ----- ExtendedMIROpcode -----
     kMirOpPhi,
@@ -244,16 +278,9 @@
     kMirOpSelect,
 
 #if ARM64_USE_EXPERIMENTAL_OPCODES
-    Instruction::MOVE_WIDE,
-    Instruction::MOVE_WIDE_FROM16,
-    Instruction::MOVE_WIDE_16,
-    Instruction::MOVE_OBJECT,
-    Instruction::MOVE_OBJECT_FROM16,
-    Instruction::MOVE_OBJECT_16,
     // Instruction::MOVE_RESULT,
     // Instruction::MOVE_RESULT_WIDE,
     // Instruction::MOVE_RESULT_OBJECT,
-    // Instruction::RETURN_OBJECT,
     // Instruction::CONST_HIGH16,
     // Instruction::CONST_WIDE_16,
     // Instruction::CONST_WIDE_32,
@@ -269,8 +296,6 @@
     // Instruction::FILLED_NEW_ARRAY,
     // Instruction::FILLED_NEW_ARRAY_RANGE,
     // Instruction::FILL_ARRAY_DATA,
-    Instruction::CMPL_FLOAT,
-    Instruction::CMPG_FLOAT,
     Instruction::CMPL_DOUBLE,
     Instruction::CMPG_DOUBLE,
     Instruction::CMP_LONG,
@@ -294,34 +319,10 @@
     // Instruction::APUT_BYTE,
     // Instruction::APUT_CHAR,
     // Instruction::APUT_SHORT,
-    // Instruction::IGET,
-    // Instruction::IGET_WIDE,
-    // Instruction::IGET_OBJECT,
-    // Instruction::IGET_BOOLEAN,
-    // Instruction::IGET_BYTE,
-    // Instruction::IGET_CHAR,
-    // Instruction::IGET_SHORT,
-    // Instruction::IPUT,
     // Instruction::IPUT_WIDE,
-    // Instruction::IPUT_OBJECT,
-    // Instruction::IPUT_BOOLEAN,
-    // Instruction::IPUT_BYTE,
-    // Instruction::IPUT_CHAR,
-    // Instruction::IPUT_SHORT,
-    Instruction::SGET,
+    // Instruction::IGET_WIDE,
     // Instruction::SGET_WIDE,
-    Instruction::SGET_OBJECT,
-    // Instruction::SGET_BOOLEAN,
-    // Instruction::SGET_BYTE,
-    // Instruction::SGET_CHAR,
-    // Instruction::SGET_SHORT,
-    Instruction::SPUT,
     // Instruction::SPUT_WIDE,
-    // Instruction::SPUT_OBJECT,
-    // Instruction::SPUT_BOOLEAN,
-    // Instruction::SPUT_BYTE,
-    // Instruction::SPUT_CHAR,
-    // Instruction::SPUT_SHORT,
     Instruction::INVOKE_VIRTUAL,
     Instruction::INVOKE_SUPER,
     Instruction::INVOKE_DIRECT,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index c977a23..8db7d4e 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -205,6 +205,8 @@
     void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val,
                                   ConditionCode ccode);
     LIR* LoadFPConstantValue(int r_dest, int value);
+    LIR* LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
+                              RegStorage r_src_dest, RegStorage r_work = RegStorage::InvalidReg());
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 92781b5..61d3d56 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -819,6 +819,30 @@
   return store;
 }
 
+// Helper function for LoadBaseDispBody()/StoreBaseDispBody().
+LIR* ArmMir2Lir::LoadStoreMaxDisp1020(ArmOpcode opcode, RegStorage r_base, int displacement,
+                                      RegStorage r_src_dest, RegStorage r_work) {
+  DCHECK_EQ(displacement & 3, 0);
+  int encoded_disp = (displacement & 1020) >> 2;  // Within range of the instruction.
+  RegStorage r_ptr = r_base;
+  if ((displacement & ~1020) != 0) {
+    r_ptr = r_work.Valid() ? r_work : AllocTemp();
+    // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
+    OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
+  }
+  LIR* lir = nullptr;
+  if (!r_src_dest.IsPair()) {
+    lir = NewLIR3(opcode, r_src_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
+  } else {
+    lir = NewLIR4(opcode, r_src_dest.GetLowReg(), r_src_dest.GetHighReg(), r_ptr.GetReg(),
+                  encoded_disp);
+  }
+  if ((displacement & ~1020) != 0 && !r_work.Valid()) {
+    FreeTemp(r_ptr);
+  }
+  return lir;
+}
+
 /*
  * Load value from base + displacement.  Optionally perform null check
  * on base (which must have an associated s_reg and MIR).  If not
@@ -836,40 +860,26 @@
   switch (size) {
     case kDouble:
     // Intentional fall-though.
-    case k64: {
-      DCHECK_EQ(displacement & 3, 0);
-      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vldrd/kThumb2LdrdI8.
-      RegStorage r_ptr = r_base;
-      if ((displacement & ~1020) != 0) {
-        // For core register load, use the r_dest.GetLow() for the temporary pointer.
-        r_ptr = r_dest.IsFloat() ? AllocTemp() : r_dest.GetLow();
-        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
-        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
-      }
+    case k64:
       if (r_dest.IsFloat()) {
         DCHECK(!r_dest.IsPair());
-        load = NewLIR3(kThumb2Vldrd, r_dest.GetReg(), r_ptr.GetReg(), encoded_disp);
+        load = LoadStoreMaxDisp1020(kThumb2Vldrd, r_base, displacement, r_dest);
       } else {
-        load = NewLIR4(kThumb2LdrdI8, r_dest.GetLowReg(), r_dest.GetHighReg(), r_ptr.GetReg(),
-                       encoded_disp);
-      }
-      if ((displacement & ~1020) != 0 && r_dest.IsFloat()) {
-        FreeTemp(r_ptr);
+        DCHECK(r_dest.IsPair());
+        // Use the r_dest.GetLow() for the temporary pointer if needed.
+        load = LoadStoreMaxDisp1020(kThumb2LdrdI8, r_base, displacement, r_dest, r_dest.GetLow());
       }
       already_generated = true;
       break;
-    }
     case kSingle:
     // Intentional fall-though.
     case k32:
     // Intentional fall-though.
     case kReference:
       if (r_dest.IsFloat()) {
-        opcode = kThumb2Vldrs;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+        DCHECK(r_dest.IsSingle());
+        load = LoadStoreMaxDisp1020(kThumb2Vldrs, r_base, displacement, r_dest);
+        already_generated = true;
         break;
       }
       if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
@@ -934,13 +944,8 @@
     } else {
       RegStorage reg_offset = AllocTemp();
       LoadConstant(reg_offset, encoded_disp);
-      if (r_dest.IsFloat()) {
-        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-        OpRegReg(kOpAdd, reg_offset, r_base);
-        load = LoadBaseDispBody(reg_offset, 0, r_dest, size);
-      } else {
-        load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
-      }
+      DCHECK(!r_dest.IsFloat());
+      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
       FreeTemp(reg_offset);
     }
   }
@@ -993,28 +998,16 @@
   switch (size) {
     case kDouble:
     // Intentional fall-though.
-    case k64: {
-      DCHECK_EQ(displacement & 3, 0);
-      encoded_disp = (displacement & 1020) >> 2;  // Within range of kThumb2Vstrd/kThumb2StrdI8.
-      RegStorage r_ptr = r_base;
-      if ((displacement & ~1020) != 0) {
-        r_ptr = AllocTemp();
-        // Add displacement & ~1020 to base, it's a single instruction for up to +-256KiB.
-        OpRegRegImm(kOpAdd, r_ptr, r_base, displacement & ~1020);
-      }
+    case k64:
       if (r_src.IsFloat()) {
         DCHECK(!r_src.IsPair());
-        store = NewLIR3(kThumb2Vstrd, r_src.GetReg(), r_ptr.GetReg(), encoded_disp);
+        store = LoadStoreMaxDisp1020(kThumb2Vstrd, r_base, displacement, r_src);
       } else {
-        store = NewLIR4(kThumb2StrdI8, r_src.GetLowReg(), r_src.GetHighReg(), r_ptr.GetReg(),
-                        encoded_disp);
-      }
-      if ((displacement & ~1020) != 0) {
-        FreeTemp(r_ptr);
+        DCHECK(r_src.IsPair());
+        store = LoadStoreMaxDisp1020(kThumb2StrdI8, r_base, displacement, r_src);
       }
       already_generated = true;
       break;
-    }
     case kSingle:
     // Intentional fall-through.
     case k32:
@@ -1022,11 +1015,8 @@
     case kReference:
       if (r_src.IsFloat()) {
         DCHECK(r_src.IsSingle());
-        opcode = kThumb2Vstrs;
-        if (displacement <= 1020) {
-          short_form = true;
-          encoded_disp >>= 2;
-        }
+        store = LoadStoreMaxDisp1020(kThumb2Vstrs, r_base, displacement, r_src);
+        already_generated = true;
         break;
       }
       if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
@@ -1074,13 +1064,8 @@
     } else {
       RegStorage r_scratch = AllocTemp();
       LoadConstant(r_scratch, encoded_disp);
-      if (r_src.IsFloat()) {
-        // No index ops - must use a long sequence.  Turn the offset into a direct pointer.
-        OpRegReg(kOpAdd, r_scratch, r_base);
-        store = StoreBaseDispBody(r_scratch, 0, r_src, size);
-      } else {
-        store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
-      }
+      DCHECK(!r_src.IsFloat());
+      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
       FreeTemp(r_scratch);
     }
   }
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 01afc99..e15ccb4 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -209,7 +209,7 @@
 };
 
 #define ENCODE_NO_SHIFT (EncodeShift(kA64Lsl, 0))
-
+#define ENCODE_NO_EXTEND (EncodeExtend(kA64Uxtx, 0))
 /*
  * The following enum defines the list of supported A64 instructions by the
  * assembler. Their corresponding EncodingMap positions will be defined in
@@ -326,7 +326,7 @@
   kA64Stur3rXd,      // stur[1s111000000] imm_9[20-12] [00] rn[9-5] rt[4-0].
   kA64Stxr3wrX,      // stxr[11001000000] rs[20-16] [011111] rn[9-5] rt[4-0].
   kA64Sub4RRdT,      // sub [s101000100] imm_12[21-10] rn[9-5] rd[4-0].
-  kA64Sub4rrro,      // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
+  kA64Sub4rrro,      // sub [s1001011000] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
   kA64Subs3rRd,      // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
   kA64Tst3rro,       // tst alias of "ands rzr, arg1, arg2, arg3".
   kA64Ubfm4rrdd,     // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
@@ -391,9 +391,6 @@
   kFmtSkip,      // Unused field, but continue to next.
 };
 
-// TODO(Arm64): should we get rid of kFmtExtend?
-//   Note: the only instructions that use it (cmp, cmn) are not used themselves.
-
 // Struct used to define the snippet positions for each A64 opcode.
 struct ArmEncodingMap {
   uint32_t wskeleton;
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index b85f569..59eec3d 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -301,12 +301,14 @@
  * Mark garbage collection card. Skip if the value we're storing is null.
  */
 void Arm64Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
-  RegStorage reg_card_base = AllocTemp();
+  RegStorage reg_card_base = AllocTempWide();
   RegStorage reg_card_no = AllocTemp();
   LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
   LoadWordDisp(rs_rA64_SELF, Thread::CardTableOffset<8>().Int32Value(), reg_card_base);
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
-  StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
+  // TODO(Arm64): generate "strb wB, [xB, wC, uxtw]" rather than "strb wB, [xB, xC]"?
+  StoreBaseIndexed(reg_card_base, As64BitReg(reg_card_no), As32BitReg(reg_card_base),
+                   0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
   branch_over->target = target;
   FreeTemp(reg_card_base);
@@ -315,62 +317,133 @@
 
 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
-   * On entry, x0, x1, x2 & x3 are live.  Let the register allocation
+   * On entry, x0 to x7 are live.  Let the register allocation
    * mechanism know so it doesn't try to use any of them when
-   * expanding the frame or flushing.  This leaves the utility
-   * code with a single temp: r12.  This should be enough.
+   * expanding the frame or flushing.
+   * Reserve x8 & x9 for temporaries.
    */
   LockTemp(rs_x0);
   LockTemp(rs_x1);
   LockTemp(rs_x2);
   LockTemp(rs_x3);
+  LockTemp(rs_x4);
+  LockTemp(rs_x5);
+  LockTemp(rs_x6);
+  LockTemp(rs_x7);
+  LockTemp(rs_x8);
+  LockTemp(rs_x9);
 
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
   bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                            (static_cast<size_t>(frame_size_) <
-                            Thread::kStackOverflowReservedBytes));
+                              (static_cast<size_t>(frame_size_) <
+                              Thread::kStackOverflowReservedBytes));
+
   NewLIR0(kPseudoMethodEntry);
 
+  const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  const int spill_count = num_core_spills_ + num_fp_spills_;
+  const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
+  const int frame_size_without_spills = frame_size_ - spill_size;
+
   if (!skip_overflow_check) {
-    LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x12);
-    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
-      /* Load stack limit */
-      // TODO(Arm64): fix the line below:
-      // GenRegRegCheck(kCondUlt, rA64_SP, r12, kThrowStackOverflow);
+      if (!large_frame) {
+        // Load stack limit
+        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x9);
+      }
     } else {
+      // TODO(Arm64) Implement implicit checks.
       // Implicit stack overflow check.
       // Generate a load from [sp, #-framesize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      // TODO(Arm64): does the following really work or do we need a reg != rA64_ZR?
-      Load32Disp(rs_rA64_SP, 0, rs_wzr);
-      MarkPossibleStackOverflowException();
+      // Load32Disp(rs_rA64_SP, -Thread::kStackOverflowReservedBytes, rs_wzr);
+      // MarkPossibleStackOverflowException();
+      LOG(FATAL) << "Implicit stack overflow checks not implemented.";
     }
-  } else if (frame_size_ > 0) {
-    OpRegImm64(kOpSub, rs_rA64_SP, frame_size_);
+  }
+
+  if (frame_size_ > 0) {
+    OpRegImm64(kOpSub, rs_rA64_SP, spill_size);
   }
 
   /* Need to spill any FP regs? */
   if (fp_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
+    int spill_offset = spill_size - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
     SpillFPRegs(rs_rA64_SP, spill_offset, fp_spill_mask_);
   }
 
   /* Spill core callee saves. */
   if (core_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
+    int spill_offset = spill_size - kArm64PointerSize*num_core_spills_;
     SpillCoreRegs(rs_rA64_SP, spill_offset, core_spill_mask_);
   }
 
+  if (!skip_overflow_check) {
+    if (Runtime::Current()->ExplicitStackOverflowChecks()) {
+      class StackOverflowSlowPath: public LIRSlowPath {
+      public:
+        StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
+              LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, nullptr),
+              sp_displace_(sp_displace) {
+        }
+        void Compile() OVERRIDE {
+          m2l_->ResetRegPool();
+          m2l_->ResetDefTracking();
+          GenerateTargetLabel(kPseudoThrowTarget);
+          // Unwinds stack.
+          m2l_->OpRegImm(kOpAdd, rs_rA64_SP, sp_displace_);
+          m2l_->ClobberCallerSave();
+          ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
+          m2l_->LockTemp(rs_x8);
+          m2l_->LoadWordDisp(rs_rA64_SELF, func_offset.Int32Value(), rs_x8);
+          m2l_->NewLIR1(kA64Br1x, rs_x8.GetReg());
+          m2l_->FreeTemp(rs_x8);
+        }
+
+      private:
+        const size_t sp_displace_;
+      };
+
+      if (large_frame) {
+        // Compare Expected SP against bottom of stack.
+        // Branch to throw target if there is not enough room.
+        OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
+        LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
+        OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
+      } else {
+        /*
+         * If the frame is small enough we are guaranteed to have enough space that remains to
+         * handle signals on the user stack.
+         * Establishes stack before checks.
+         */
+        OpRegRegImm(kOpSub, rs_rA64_SP, rs_rA64_SP, frame_size_without_spills);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x9, nullptr);
+        AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, frame_size_));
+      }
+    } else {
+      OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
+    }
+  } else {
+    OpRegImm(kOpSub, rs_rA64_SP, frame_size_without_spills);
+  }
+
   FlushIns(ArgLocs, rl_method);
 
   FreeTemp(rs_x0);
   FreeTemp(rs_x1);
   FreeTemp(rs_x2);
   FreeTemp(rs_x3);
+  FreeTemp(rs_x4);
+  FreeTemp(rs_x5);
+  FreeTemp(rs_x6);
+  FreeTemp(rs_x7);
+  FreeTemp(rs_x8);
+  FreeTemp(rs_x9);
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 75e24fe..c92832e 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -202,6 +202,7 @@
     LIR* OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2,
                           int shift);
     LIR* OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
+    LIR* OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift);
     static const ArmEncodingMap EncodingMap[kA64Last];
     int EncodeShift(int code, int amount);
     int EncodeExtend(int extend_type, int amount);
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index ab5014f..954360d 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -426,8 +426,43 @@
   return NULL;
 }
 
+LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int extend) {
+  ArmOpcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
+  ArmOpcode opcode = kA64Brk1d;
+
+  switch (op) {
+    case kOpCmn:
+      opcode = kA64Cmn3Rre;
+      break;
+    case kOpCmp:
+      opcode = kA64Cmp3Rre;
+      break;
+    default:
+      LOG(FATAL) << "Bad Opcode: " << opcode;
+      break;
+  }
+
+  DCHECK(!IsPseudoLirOp(opcode));
+  if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
+    ArmEncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
+    if (kind == kFmtExtend) {
+      return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), extend);
+    }
+  }
+
+  LOG(FATAL) << "Unexpected encoding operand count";
+  return NULL;
+}
+
 LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
-  return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
+  /* RegReg operations with SP in first parameter need extended register instruction form.
+   * Only CMN and CMP instructions are implemented.
+   */
+  if (r_dest_src1 == rs_rA64_SP) {
+    return OpRegRegExtend(op, r_dest_src1, r_src2, ENCODE_NO_EXTEND);
+  } else {
+    return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
+  }
 }
 
 LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset, MoveType move_type) {
@@ -526,7 +561,6 @@
   ArmOpcode alt_opcode = kA64Brk1d;
   int32_t log_imm = -1;
   bool is_wide = r_dest.Is64Bit();
-  CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
   ArmOpcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
 
   switch (op) {
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 89cdb4d..fe5e104 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -815,6 +815,10 @@
 bool VerifyAccess(mirror::Object* obj, mirror::Class* declaring_class, uint32_t access_flags) {
   NthCallerVisitor visitor(Thread::Current(), 2);
   visitor.WalkStack();
+  if (UNLIKELY(visitor.caller == nullptr)) {
+    // The caller is an attached native thread.
+    return (access_flags & kAccPublic) != 0;
+  }
   mirror::Class* caller_class = visitor.caller->GetDeclaringClass();
 
   if (((access_flags & kAccPublic) != 0) || (caller_class == declaring_class)) {
diff --git a/test/JniTest/JniTest.java b/test/JniTest/JniTest.java
index 3c4ed35..33418a9 100644
--- a/test/JniTest/JniTest.java
+++ b/test/JniTest/JniTest.java
@@ -21,6 +21,7 @@
         System.loadLibrary("arttest");
         testFindClassOnAttachedNativeThread();
         testFindFieldOnAttachedNativeThread();
+        testReflectFieldGetFromAttachedNativeThreadNative();
         testCallStaticVoidMethodOnSubClass();
         testGetMirandaMethod();
         testZeroLengthByteBuffers();
@@ -34,6 +35,10 @@
 
     private static boolean testFindFieldOnAttachedNativeThreadField;
 
+    private static native void testReflectFieldGetFromAttachedNativeThreadNative();
+
+    public static boolean testReflectFieldGetFromAttachedNativeThreadField;
+
     private static void testFindFieldOnAttachedNativeThread() {
       testFindFieldOnAttachedNativeThreadNative();
       if (!testFindFieldOnAttachedNativeThreadField) {
diff --git a/test/JniTest/jni_test.cc b/test/JniTest/jni_test.cc
index 024ba53..36cad72 100644
--- a/test/JniTest/jni_test.cc
+++ b/test/JniTest/jni_test.cc
@@ -103,6 +103,66 @@
   assert(pthread_join_result == 0);
 }
 
+static void* testReflectFieldGetFromAttachedNativeThread(void*) {
+  assert(jvm != NULL);
+
+  JNIEnv* env = NULL;
+  JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL };
+  int attach_result = jvm->AttachCurrentThread(&env, &args);
+  assert(attach_result == 0);
+
+  jclass clazz = env->FindClass("JniTest");
+  assert(clazz != NULL);
+  assert(!env->ExceptionCheck());
+
+  jclass class_clazz = env->FindClass("java/lang/Class");
+  assert(class_clazz != NULL);
+  assert(!env->ExceptionCheck());
+
+  jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField",
+                                               "(Ljava/lang/String;)Ljava/lang/reflect/Field;");
+  assert(getFieldMetodId != NULL);
+  assert(!env->ExceptionCheck());
+
+  jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField");
+  assert(field_name != NULL);
+  assert(!env->ExceptionCheck());
+
+  jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name);
+  assert(field != NULL);
+  assert(!env->ExceptionCheck());
+
+  jclass field_clazz = env->FindClass("java/lang/reflect/Field");
+  assert(field_clazz != NULL);
+  assert(!env->ExceptionCheck());
+
+  jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean",
+                                                 "(Ljava/lang/Object;)Z");
+  assert(getBooleanMetodId != NULL);
+  assert(!env->ExceptionCheck());
+
+  jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz);
+  assert(value == false);
+  assert(!env->ExceptionCheck());
+
+  int detach_result = jvm->DetachCurrentThread();
+  assert(detach_result == 0);
+  return NULL;
+}
+
+// http://b/15539150
+extern "C" JNIEXPORT void JNICALL Java_JniTest_testReflectFieldGetFromAttachedNativeThreadNative(
+    JNIEnv*, jclass) {
+  pthread_t pthread;
+  int pthread_create_result = pthread_create(&pthread,
+                                             NULL,
+                                             testReflectFieldGetFromAttachedNativeThread,
+                                             NULL);
+  assert(pthread_create_result == 0);
+  int pthread_join_result = pthread_join(pthread, NULL);
+  assert(pthread_join_result == 0);
+}
+
 
 // http://b/11243757
 extern "C" JNIEXPORT void JNICALL Java_JniTest_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,