Merge "ART: Clean up arm64 kNumberOfXRegisters usage."
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 5bf77aa..303ea3e 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -35,14 +35,15 @@
  * r16-r17: Also known as ip0-ip1, respectively. Used as scratch registers by
  *          the linker, by the trampolines and other stubs (the backend uses
  *          these as temporary registers).
- * r18    : (rxSELF) is reserved (pointer to thread-local storage).
- * r19-r29: Callee save registers (promotion targets).
+ * r18    : Caller save register (used as temporary register).
+ * r19    : (rxSELF) is reserved (pointer to thread-local storage).
+ * r20-r29: Callee save registers (promotion targets).
  * r30    : (lr) is reserved (the link register).
  * rsp    : (sp) is reserved (the stack pointer).
  * rzr    : (zr) is reserved (the zero register).
  *
- * 18 core temps that codegen can use (r0-r17).
- * 10 core registers that can be used for promotion.
+ * 19 core temps that codegen can use (r0-r18).
+ * 9 core registers that can be used for promotion.
  *
  * Floating-point registers
  * v0-v31
@@ -145,7 +146,7 @@
   // Aliases which are not defined in "ARM Architecture Reference, register names".
   rxIP0 = rx16,
   rxIP1 = rx17,
-  rxSELF = rx18,
+  rxSELF = rx19,
   rxLR = rx30,
   /*
    * FIXME: It's a bit awkward to define both 32 and 64-bit views of these - we'll only ever use
@@ -154,7 +155,7 @@
    */
   rwIP0 = rw16,
   rwIP1 = rw17,
-  rwSELF = rw18,
+  rwSELF = rw19,
   rwLR = rw30,
 };
 
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index fc32ecd..fe15391 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -51,19 +51,17 @@
      rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15,
      rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23,
      rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31};
-// Note: we are not able to call to C function since rs_xSELF is a special register need to be
-// preserved but would be scratched by native functions follow aapcs64.
 static constexpr RegStorage reserved_regs_arr[] = {rs_wSELF, rs_wsp, rs_wLR, rs_wzr};
 static constexpr RegStorage reserved64_regs_arr[] = {rs_xSELF, rs_sp, rs_xLR, rs_xzr};
 
 static constexpr RegStorage core_temps_arr[] =
     {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7,
      rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16,
-     rs_w17};
+     rs_w17, rs_w18};
 static constexpr RegStorage core64_temps_arr[] =
     {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7,
      rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16,
-     rs_x17};
+     rs_x17, rs_x18};
 static constexpr RegStorage sp_temps_arr[] =
     {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7,
      rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23,
@@ -691,6 +689,7 @@
   Clobber(rs_x15);
   Clobber(rs_x16);
   Clobber(rs_x17);
+  Clobber(rs_x18);
   Clobber(rs_x30);
 
   Clobber(rs_f0);
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
index 48109d2..634fdee 100644
--- a/compiler/dex/quick/quick_cfi_test_expected.inc
+++ b/compiler/dex/quick/quick_cfi_test_expected.inc
@@ -33,15 +33,15 @@
 // 0x00000014: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF3, 0xD3, 0x02, 0xA9,
+    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
     0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D,
-    0xF3, 0xD3, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
+    0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
     0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x93,
-    0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
-    0x49, 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
+    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
+    0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
+    0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
     0x40,
 };
 // 0x00000000: sub sp, sp, #0x40 (64)
@@ -49,9 +49,9 @@
 // 0x00000004: stp d8, d9, [sp, #24]
 // 0x00000008: .cfi_offset_extended: r72 at cfa-40
 // 0x00000008: .cfi_offset_extended: r73 at cfa-32
-// 0x00000008: stp x19, x20, [sp, #40]
-// 0x0000000c: .cfi_offset: r19 at cfa-24
-// 0x0000000c: .cfi_offset: r20 at cfa-16
+// 0x00000008: stp x20, x21, [sp, #40]
+// 0x0000000c: .cfi_offset: r20 at cfa-24
+// 0x0000000c: .cfi_offset: r21 at cfa-16
 // 0x0000000c: str lr, [sp, #56]
 // 0x00000010: .cfi_offset: r30 at cfa-8
 // 0x00000010: str w0, [sp]
@@ -59,9 +59,9 @@
 // 0x00000014: ldp d8, d9, [sp, #24]
 // 0x00000018: .cfi_restore_extended: r72
 // 0x00000018: .cfi_restore_extended: r73
-// 0x00000018: ldp x19, x20, [sp, #40]
-// 0x0000001c: .cfi_restore: r19
+// 0x00000018: ldp x20, x21, [sp, #40]
 // 0x0000001c: .cfi_restore: r20
+// 0x0000001c: .cfi_restore: r21
 // 0x0000001c: ldr lr, [sp, #56]
 // 0x00000020: .cfi_restore: r30
 // 0x00000020: add sp, sp, #0x40 (64)
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 4945a91..f988812 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -75,8 +75,8 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
+// Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
+static constexpr bool kProduce64BitELFFiles = false;
 
 // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
 // given, too all compilations.
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 195949b..bf32feb 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1006,7 +1006,7 @@
   // Fixup int pointers for the field array.
   CHECK(!arr->IsObjectArray());
   const size_t num_elements = arr->GetLength();
-  if (target_ptr_size_ == 4) {
+  if (target_ptr_size_ == 4u) {
     // Will get fixed up by fixup object.
     dst->SetClass(down_cast<mirror::Class*>(
     GetImageAddress(mirror::IntArray::GetArrayClass())));
@@ -1026,10 +1026,11 @@
       CHECK(it2 != art_field_reloc_.end()) << "No relocation for field " << PrettyField(field);
       fixup_location = image_begin_ + it2->second;
     }
-    if (target_ptr_size_ == 4) {
+    if (target_ptr_size_ == 4u) {
       down_cast<mirror::IntArray*>(dest_array)->SetWithoutChecks<kVerifyNone>(
           i, static_cast<uint32_t>(reinterpret_cast<uint64_t>(fixup_location)));
     } else {
+      DCHECK_EQ(target_ptr_size_, 8u);
       down_cast<mirror::LongArray*>(dest_array)->SetWithoutChecks<kVerifyNone>(
           i, reinterpret_cast<uint64_t>(fixup_location));
     }
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index eaf7872..42fc30f 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -84,14 +84,13 @@
     0xFF, 0x03, 0x03, 0xD1, 0xF3, 0x53, 0x06, 0xA9, 0xF5, 0x5B, 0x07, 0xA9,
     0xF7, 0x63, 0x08, 0xA9, 0xF9, 0x6B, 0x09, 0xA9, 0xFB, 0x73, 0x0A, 0xA9,
     0xFD, 0x7B, 0x0B, 0xA9, 0xE8, 0x27, 0x02, 0x6D, 0xEA, 0x2F, 0x03, 0x6D,
-    0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xF5, 0x03, 0x12, 0xAA,
-    0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD,
-    0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1,
-    0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xF3, 0x53, 0x46, 0xA9,
-    0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9, 0xF9, 0x6B, 0x49, 0xA9,
-    0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9, 0xE8, 0x27, 0x42, 0x6D,
-    0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D, 0xEE, 0x3F, 0x45, 0x6D,
-    0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+    0xEC, 0x37, 0x04, 0x6D, 0xEE, 0x3F, 0x05, 0x6D, 0xE0, 0x03, 0x00, 0xB9,
+    0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, 0xE2, 0xCF, 0x00, 0xB9,
+    0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, 0xFF, 0x83, 0x00, 0x91,
+    0xF3, 0x53, 0x46, 0xA9, 0xF5, 0x5B, 0x47, 0xA9, 0xF7, 0x63, 0x48, 0xA9,
+    0xF9, 0x6B, 0x49, 0xA9, 0xFB, 0x73, 0x4A, 0xA9, 0xFD, 0x7B, 0x4B, 0xA9,
+    0xE8, 0x27, 0x42, 0x6D, 0xEA, 0x2F, 0x43, 0x6D, 0xEC, 0x37, 0x44, 0x6D,
+    0xEE, 0x3F, 0x45, 0x6D, 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
     0x44, 0x0E, 0xC0, 0x01, 0x44, 0x93, 0x18, 0x94, 0x16, 0x44, 0x95, 0x14,
@@ -99,11 +98,11 @@
     0x44, 0x9B, 0x08, 0x9C, 0x06, 0x44, 0x9D, 0x04, 0x9E, 0x02, 0x44, 0x05,
     0x48, 0x28, 0x05, 0x49, 0x26, 0x44, 0x05, 0x4A, 0x24, 0x05, 0x4B, 0x22,
     0x44, 0x05, 0x4C, 0x20, 0x05, 0x4D, 0x1E, 0x44, 0x05, 0x4E, 0x1C, 0x05,
-    0x4F, 0x1A, 0x5C, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x0A,
-    0x44, 0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA,
-    0x44, 0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44,
-    0x06, 0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E,
-    0x06, 0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
+    0x4F, 0x1A, 0x58, 0x0E, 0xE0, 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x44,
+    0xD3, 0xD4, 0x44, 0xD5, 0xD6, 0x44, 0xD7, 0xD8, 0x44, 0xD9, 0xDA, 0x44,
+    0xDB, 0xDC, 0x44, 0xDD, 0xDE, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0x06,
+    0x4A, 0x06, 0x4B, 0x44, 0x06, 0x4C, 0x06, 0x4D, 0x44, 0x06, 0x4E, 0x06,
+    0x4F, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
 };
 // 0x00000000: sub sp, sp, #0xc0 (192)
 // 0x00000004: .cfi_def_cfa_offset: 192
@@ -137,53 +136,51 @@
 // 0x00000028: stp d14, d15, [sp, #80]
 // 0x0000002c: .cfi_offset_extended: r78 at cfa-112
 // 0x0000002c: .cfi_offset_extended: r79 at cfa-104
-// 0x0000002c: mov x21, tr
-// 0x00000030: str w0, [sp]
-// 0x00000034: str w1, [sp, #196]
-// 0x00000038: str s0, [sp, #200]
-// 0x0000003c: str w2, [sp, #204]
-// 0x00000040: str w3, [sp, #208]
-// 0x00000044: sub sp, sp, #0x20 (32)
-// 0x00000048: .cfi_def_cfa_offset: 224
-// 0x00000048: add sp, sp, #0x20 (32)
-// 0x0000004c: .cfi_def_cfa_offset: 192
-// 0x0000004c: mov tr, x21
-// 0x00000050: .cfi_remember_state
-// 0x00000050: ldp x19, x20, [sp, #96]
-// 0x00000054: .cfi_restore: r19
-// 0x00000054: .cfi_restore: r20
-// 0x00000054: ldp x21, x22, [sp, #112]
-// 0x00000058: .cfi_restore: r21
-// 0x00000058: .cfi_restore: r22
-// 0x00000058: ldp x23, x24, [sp, #128]
-// 0x0000005c: .cfi_restore: r23
-// 0x0000005c: .cfi_restore: r24
-// 0x0000005c: ldp x25, x26, [sp, #144]
-// 0x00000060: .cfi_restore: r25
-// 0x00000060: .cfi_restore: r26
-// 0x00000060: ldp x27, x28, [sp, #160]
-// 0x00000064: .cfi_restore: r27
-// 0x00000064: .cfi_restore: r28
-// 0x00000064: ldp x29, lr, [sp, #176]
-// 0x00000068: .cfi_restore: r29
-// 0x00000068: .cfi_restore: r30
-// 0x00000068: ldp d8, d9, [sp, #32]
-// 0x0000006c: .cfi_restore_extended: r72
-// 0x0000006c: .cfi_restore_extended: r73
-// 0x0000006c: ldp d10, d11, [sp, #48]
-// 0x00000070: .cfi_restore_extended: r74
-// 0x00000070: .cfi_restore_extended: r75
-// 0x00000070: ldp d12, d13, [sp, #64]
-// 0x00000074: .cfi_restore_extended: r76
-// 0x00000074: .cfi_restore_extended: r77
-// 0x00000074: ldp d14, d15, [sp, #80]
-// 0x00000078: .cfi_restore_extended: r78
-// 0x00000078: .cfi_restore_extended: r79
-// 0x00000078: add sp, sp, #0xc0 (192)
-// 0x0000007c: .cfi_def_cfa_offset: 0
-// 0x0000007c: ret
-// 0x00000080: .cfi_restore_state
-// 0x00000080: .cfi_def_cfa_offset: 192
+// 0x0000002c: str w0, [sp]
+// 0x00000030: str w1, [sp, #196]
+// 0x00000034: str s0, [sp, #200]
+// 0x00000038: str w2, [sp, #204]
+// 0x0000003c: str w3, [sp, #208]
+// 0x00000040: sub sp, sp, #0x20 (32)
+// 0x00000044: .cfi_def_cfa_offset: 224
+// 0x00000044: add sp, sp, #0x20 (32)
+// 0x00000048: .cfi_def_cfa_offset: 192
+// 0x00000048: .cfi_remember_state
+// 0x00000048: ldp x19, x20, [sp, #96]
+// 0x0000004c: .cfi_restore: r19
+// 0x0000004c: .cfi_restore: r20
+// 0x0000004c: ldp x21, x22, [sp, #112]
+// 0x00000050: .cfi_restore: r21
+// 0x00000050: .cfi_restore: r22
+// 0x00000050: ldp x23, x24, [sp, #128]
+// 0x00000054: .cfi_restore: r23
+// 0x00000054: .cfi_restore: r24
+// 0x00000054: ldp x25, x26, [sp, #144]
+// 0x00000058: .cfi_restore: r25
+// 0x00000058: .cfi_restore: r26
+// 0x00000058: ldp x27, x28, [sp, #160]
+// 0x0000005c: .cfi_restore: r27
+// 0x0000005c: .cfi_restore: r28
+// 0x0000005c: ldp x29, lr, [sp, #176]
+// 0x00000060: .cfi_restore: r29
+// 0x00000060: .cfi_restore: r30
+// 0x00000060: ldp d8, d9, [sp, #32]
+// 0x00000064: .cfi_restore_extended: r72
+// 0x00000064: .cfi_restore_extended: r73
+// 0x00000064: ldp d10, d11, [sp, #48]
+// 0x00000068: .cfi_restore_extended: r74
+// 0x00000068: .cfi_restore_extended: r75
+// 0x00000068: ldp d12, d13, [sp, #64]
+// 0x0000006c: .cfi_restore_extended: r76
+// 0x0000006c: .cfi_restore_extended: r77
+// 0x0000006c: ldp d14, d15, [sp, #80]
+// 0x00000070: .cfi_restore_extended: r78
+// 0x00000070: .cfi_restore_extended: r79
+// 0x00000070: add sp, sp, #0xc0 (192)
+// 0x00000074: .cfi_def_cfa_offset: 0
+// 0x00000074: ret
+// 0x00000078: .cfi_restore_state
+// 0x00000078: .cfi_def_cfa_offset: 192
 
 static constexpr uint8_t expected_asm_kX86[] = {
     0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3,
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 4344c90..8e7fd2b 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -184,7 +184,7 @@
   // Jni function is the native function which the java code wants to call.
   // Jni method is the method that compiled by jni compiler.
   // Call chain: managed code(java) --> jni method --> jni function.
-  // Thread register(X18, scratched by aapcs64) is not saved on stack, it is saved in ETR(X21).
+  // Thread register(X19) is saved on stack.
   return 1 << X19 | 1 << X20 | 1 << X21 | 1 << X22 | 1 << X23 | 1 << X24 |
          1 << X25 | 1 << X26 | 1 << X27 | 1 << X28 | 1 << X29 | 1 << LR;
 }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c4eaabf..49a0444 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -711,8 +711,8 @@
           clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
         } else {
           clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-          HLoadClass* load_class =
-              new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc);
+          HLoadClass* load_class = new (arena_) HLoadClass(
+              storage_index, *dex_compilation_unit_->GetDexFile(), is_referrer_class, dex_pc);
           current_block_->AddInstruction(load_class);
           clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
           current_block_->AddInstruction(clinit_check);
@@ -915,7 +915,8 @@
       *outer_compilation_unit_->GetDexFile(), storage_index);
   bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
 
-  HLoadClass* constant = new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc);
+  HLoadClass* constant = new (arena_) HLoadClass(
+      storage_index, *dex_compilation_unit_->GetDexFile(), is_referrer_class, dex_pc);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
@@ -1151,7 +1152,10 @@
   }
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
   HLoadClass* cls = new (arena_) HLoadClass(
-      type_index, IsOutermostCompilingClass(type_index), dex_pc);
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      IsOutermostCompilingClass(type_index),
+      dex_pc);
   current_block_->AddInstruction(cls);
   // The class needs a temporary before being used by the type check.
   Temporaries temps(graph_);
@@ -1976,7 +1980,8 @@
             ? kQuickAllocObjectWithAccessCheck
             : kQuickAllocObject;
 
-        current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
+        current_block_->AddInstruction(new (arena_) HNewInstance(
+            dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), entrypoint));
         UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       }
       break;
@@ -2161,8 +2166,11 @@
         MaybeRecordStat(MethodCompilationStat::kNotCompiledCantAccesType);
         return false;
       }
-      current_block_->AddInstruction(
-          new (arena_) HLoadClass(type_index, IsOutermostCompilingClass(type_index), dex_pc));
+      current_block_->AddInstruction(new (arena_) HLoadClass(
+          type_index,
+          *dex_compilation_unit_->GetDexFile(),
+          IsOutermostCompilingClass(type_index),
+          dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 5f8023d..7d26a3c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -2878,7 +2878,7 @@
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  Label less, greater, done;
+  NearLabel less, greater, done;
   Primitive::Type type = compare->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
@@ -2974,7 +2974,7 @@
                                                           Register temp1,
                                                           Register temp2,
                                                           HInstruction* instruction) {
-  Label fail;
+  NearLabel fail;
   if (offset != 0) {
     __ LoadImmediate(temp1, offset);
     __ add(IP, addr, ShifterOperand(temp1));
@@ -3654,7 +3654,7 @@
                                   Register object,
                                   Register value,
                                   bool can_be_null) {
-  Label is_null;
+  NearLabel is_null;
   if (can_be_null) {
     __ CompareAndBranchIfZero(value, &is_null);
   }
@@ -4076,14 +4076,13 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  Label done, zero;
+  NearLabel done, zero;
   SlowPathCodeARM* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ cmp(obj, ShifterOperand(0));
-    __ b(&zero, EQ);
+    __ CompareAndBranchIfZero(obj, &zero);
   }
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, out, obj, class_offset);
@@ -4134,16 +4133,19 @@
       instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
   codegen_->AddSlowPath(slow_path);
 
+  NearLabel done;
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ cmp(obj, ShifterOperand(0));
-    __ b(slow_path->GetExitLabel(), EQ);
+    __ CompareAndBranchIfZero(obj, &done);
   }
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
   __ cmp(temp, ShifterOperand(cls));
   __ b(slow_path->GetEntryLabel(), NE);
   __ Bind(slow_path->GetExitLabel());
+  if (instruction->MustDoNullCheck()) {
+    __ Bind(&done);
+  }
 }
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index b56ca10..ab793a5 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -44,7 +44,7 @@
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
-const vixl::Register tr = vixl::x18;                        // Thread Register
+const vixl::Register tr = vixl::x19;                        // Thread Register
 static const vixl::Register kArtMethodRegister = vixl::w0;  // Method register on invoke.
 
 const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
@@ -52,10 +52,10 @@
 
 const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
 
-// Callee-saved registers defined by AAPCS64.
+// Callee-saved registers AAPCS64 (without x19 - Thread Register)
 const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
                                                    vixl::kXRegSize,
-                                                   vixl::x19.code(),
+                                                   vixl::x20.code(),
                                                    vixl::x30.code());
 const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
                                                  vixl::kDRegSize,
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 47c6318..a72817f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -141,7 +141,6 @@
   }
 
   if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) {
-    resolved_method->SetShouldNotInline();
     return false;
   }
 
@@ -208,6 +207,7 @@
   if (!builder.BuildGraph(*code_item)) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                    << " could not be built, so cannot be inlined";
+    resolved_method->SetShouldNotInline();
     return false;
   }
 
@@ -215,12 +215,14 @@
                                                   compiler_driver_->GetInstructionSet())) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                    << " cannot be inlined because of the register allocator";
+    resolved_method->SetShouldNotInline();
     return false;
   }
 
   if (!callee_graph->TryBuildingSsa()) {
     VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                    << " could not be transformed to SSA";
+    resolved_method->SetShouldNotInline();
     return false;
   }
 
@@ -257,6 +259,7 @@
     if (block->IsLoopHeader()) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " could not be inlined because it contains a loop";
+      resolved_method->SetShouldNotInline();
       return false;
     }
 
@@ -272,6 +275,7 @@
         VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                        << " could not be inlined because " << current->DebugName()
                        << " can throw";
+        resolved_method->SetShouldNotInline();
         return false;
       }
 
@@ -279,6 +283,7 @@
         VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                        << " could not be inlined because " << current->DebugName()
                        << " needs an environment";
+        resolved_method->SetShouldNotInline();
         return false;
       }
 
@@ -286,6 +291,8 @@
         VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                        << " could not be inlined because " << current->DebugName()
                        << " it is in a different dex file and requires access to the dex cache";
+        // Do not flag the method as not-inlineable. A caller within the same
+        // dex file could still successfully inline it.
         return false;
       }
     }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 9e18f11..8ef13e1 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -327,9 +327,6 @@
 
 // TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod.
 void IntrinsicsRecognizer::Run() {
-  DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_);
-  DCHECK(inliner != nullptr);
-
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
@@ -338,6 +335,9 @@
       if (inst->IsInvoke()) {
         HInvoke* invoke = inst->AsInvoke();
         InlineMethod method;
+        DexFileMethodInliner* inliner =
+            driver_->GetMethodInlinerMap()->GetMethodInliner(&invoke->GetDexFile());
+        DCHECK(inliner != nullptr);
         if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
           Intrinsics intrinsic = GetIntrinsic(method);
 
@@ -345,7 +345,7 @@
             if (!CheckInvokeType(intrinsic, invoke)) {
               LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
                            << intrinsic << " for "
-                           << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_);
+                           << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile());
             } else {
               invoke->SetIntrinsic(intrinsic);
             }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index c243ef3..741fb64 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -30,16 +30,15 @@
 // Recognize intrinsics from HInvoke nodes.
 class IntrinsicsRecognizer : public HOptimization {
  public:
-  IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver)
+  IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver)
       : HOptimization(graph, true, kIntrinsicsRecognizerPassName),
-        dex_file_(dex_file), driver_(driver) {}
+        driver_(driver) {}
 
   void Run() OVERRIDE;
 
   static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition";
 
  private:
-  const DexFile* dex_file_;
   CompilerDriver* driver_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 91daeb7..483c09e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1291,11 +1291,16 @@
     block->RemovePhi(it.Current()->AsPhi());
   }
 
+  if (block->IsExitBlock()) {
+    exit_block_ = nullptr;
+  }
+
   reverse_post_order_.Delete(block);
   blocks_.Put(block->GetBlockId(), nullptr);
 }
 
 void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
+  DCHECK(HasExitBlock()) << "Unimplemented scenario";
   if (GetBlocks().Size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
     // Put the body block's instruction into `invoke`'s block.
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 12ace41..01870c3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -155,6 +155,7 @@
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
   HBasicBlock* GetExitBlock() const { return exit_block_; }
+  bool HasExitBlock() const { return exit_block_ != nullptr; }
 
   void SetEntryBlock(HBasicBlock* block) { entry_block_ = block; }
   void SetExitBlock(HBasicBlock* block) { exit_block_ = block; }
@@ -2387,6 +2388,7 @@
   uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
 
   uint32_t GetDexMethodIndex() const { return dex_method_index_; }
+  const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); }
 
   InvokeType GetOriginalInvokeType() const { return original_invoke_type_; }
 
@@ -2598,14 +2600,19 @@
 
 class HNewInstance : public HExpression<0> {
  public:
-  HNewInstance(uint32_t dex_pc, uint16_t type_index, QuickEntrypointEnum entrypoint)
+  HNewInstance(uint32_t dex_pc,
+               uint16_t type_index,
+               const DexFile& dex_file,
+               QuickEntrypointEnum entrypoint)
       : HExpression(Primitive::kPrimNot, SideEffects::None()),
         dex_pc_(dex_pc),
         type_index_(type_index),
+        dex_file_(dex_file),
         entrypoint_(entrypoint) {}
 
   uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
+  const DexFile& GetDexFile() const { return dex_file_; }
 
   // Calls runtime so needs an environment.
   bool NeedsEnvironment() const OVERRIDE { return true; }
@@ -2624,6 +2631,7 @@
  private:
   const uint32_t dex_pc_;
   const uint16_t type_index_;
+  const DexFile& dex_file_;
   const QuickEntrypointEnum entrypoint_;
 
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
@@ -3428,10 +3436,12 @@
 class HLoadClass : public HExpression<0> {
  public:
   HLoadClass(uint16_t type_index,
+             const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc)
       : HExpression(Primitive::kPrimNot, SideEffects::None()),
         type_index_(type_index),
+        dex_file_(dex_file),
         is_referrers_class_(is_referrers_class),
         dex_pc_(dex_pc),
         generate_clinit_check_(false),
@@ -3487,12 +3497,15 @@
     return loaded_class_rti_.IsExact();
   }
 
+  const DexFile& GetDexFile() { return dex_file_; }
+
   bool NeedsDexCache() const OVERRIDE { return !is_referrers_class_; }
 
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
   const uint16_t type_index_;
+  const DexFile& dex_file_;
   const bool is_referrers_class_;
   const uint32_t dex_pc_;
   // Whether this instruction must generate the initialization check.
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 2125f6e..ecb3b0a 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,20 +32,20 @@
 // 0x00000012: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
-    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+    0xE0, 0x0F, 0x1C, 0xB8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9,
     0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
 static constexpr uint8_t expected_cfi_kArm64[] = {
-    0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44,
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
-    0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+    0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: str w0, [sp, #-64]!
 // 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: stp x19, x20, [sp, #40]
-// 0x00000008: .cfi_offset: r19 at cfa-24
-// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000004: stp x20, x21, [sp, #40]
+// 0x00000008: .cfi_offset: r20 at cfa-24
+// 0x00000008: .cfi_offset: r21 at cfa-16
 // 0x00000008: str lr, [sp, #56]
 // 0x0000000c: .cfi_offset: r30 at cfa-8
 // 0x0000000c: stp d8, d9, [sp, #24]
@@ -55,9 +55,9 @@
 // 0x00000010: ldp d8, d9, [sp, #24]
 // 0x00000014: .cfi_restore_extended: r72
 // 0x00000014: .cfi_restore_extended: r73
-// 0x00000014: ldp x19, x20, [sp, #40]
-// 0x00000018: .cfi_restore: r19
+// 0x00000014: ldp x20, x21, [sp, #40]
 // 0x00000018: .cfi_restore: r20
+// 0x00000018: .cfi_restore: r21
 // 0x00000018: ldr lr, [sp, #56]
 // 0x0000001c: .cfi_restore: r30
 // 0x0000001c: add sp, sp, #0x40 (64)
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b2e8ecd..fa3c310 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -223,7 +223,6 @@
   CompiledMethod* CompileOptimized(HGraph* graph,
                                    CodeGenerator* codegen,
                                    CompilerDriver* driver,
-                                   const DexFile& dex_file,
                                    const DexCompilationUnit& dex_compilation_unit,
                                    PassInfoPrinter* pass_info) const;
 
@@ -316,7 +315,6 @@
 static void RunOptimizations(HGraph* graph,
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
-                             const DexFile& dex_file,
                              const DexCompilationUnit& dex_compilation_unit,
                              PassInfoPrinter* pass_info_printer,
                              StackHandleScopeCollection* handles) {
@@ -335,10 +333,10 @@
   GVNOptimization gvn(graph, side_effects);
   LICM licm(graph, side_effects);
   BoundsCheckElimination bce(graph);
-  ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles);
+  ReferenceTypePropagation type_propagation(graph, handles);
   InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types");
 
-  IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
+  IntrinsicsRecognizer intrinsics(graph, driver);
 
   HOptimization* optimizations[] = {
     &intrinsics,
@@ -391,12 +389,11 @@
 CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
                                                      CodeGenerator* codegen,
                                                      CompilerDriver* compiler_driver,
-                                                     const DexFile& dex_file,
                                                      const DexCompilationUnit& dex_compilation_unit,
                                                      PassInfoPrinter* pass_info_printer) const {
   StackHandleScopeCollection handles(Thread::Current());
   RunOptimizations(graph, compiler_driver, compilation_stats_.get(),
-                   dex_file, dex_compilation_unit, pass_info_printer, &handles);
+                   dex_compilation_unit, pass_info_printer, &handles);
 
   AllocateRegisters(graph, codegen, pass_info_printer);
 
@@ -585,7 +582,6 @@
     return CompileOptimized(graph,
                             codegen.get(),
                             compiler_driver,
-                            dex_file,
                             dex_compilation_unit,
                             &pass_info_printer);
   } else if (shouldOptimize && can_allocate_registers) {
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 12b1c2b..601b48a 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -161,7 +161,8 @@
 
 void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+  mirror::DexCache* dex_cache =
+      Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile());
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
   if (resolved_class != nullptr) {
@@ -172,7 +173,8 @@
 
 void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+  mirror::DexCache* dex_cache =
+      Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile());
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
   if (resolved_class != nullptr) {
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 733e18e..b68fc67 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -30,13 +30,8 @@
  */
 class ReferenceTypePropagation : public HOptimization {
  public:
-  ReferenceTypePropagation(HGraph* graph,
-                           const DexFile& dex_file,
-                           const DexCompilationUnit& dex_compilation_unit,
-                           StackHandleScopeCollection* handles)
+  ReferenceTypePropagation(HGraph* graph, StackHandleScopeCollection* handles)
     : HOptimization(graph, true, kReferenceTypePropagationPassName),
-      dex_file_(dex_file),
-      dex_compilation_unit_(dex_compilation_unit),
       handles_(handles),
       worklist_(graph->GetArena(), kDefaultWorklistSize) {}
 
@@ -66,8 +61,6 @@
   ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const DexFile& dex_file_;
-  const DexCompilationUnit& dex_compilation_unit_;
   StackHandleScopeCollection* handles_;
 
   GrowableArray<HInstruction*> worklist_;
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index b446815..5663e39 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -144,14 +144,17 @@
 
   // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned.
   needed_size_ = CodeInfo::kFixedSize
-      + dex_register_location_catalog_size_
       + stack_maps_size_
+      + dex_register_location_catalog_size_
       + dex_register_maps_size_
       + inline_info_size_;
 
-  dex_register_location_catalog_start_ = CodeInfo::kFixedSize;
-  stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
-  dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_;
+  stack_maps_start_ = CodeInfo::kFixedSize;
+  // TODO: Move the catalog at the end. It is currently too expensive at runtime
+  // to compute its size (note that we do not encode that size in the CodeInfo).
+  dex_register_location_catalog_start_ = stack_maps_start_ + stack_maps_size_;
+  dex_register_maps_start_ =
+      dex_register_location_catalog_start_ + dex_register_location_catalog_size_;
   inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_;
 
   return needed_size_;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index e2c2e27..9179965 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -33,6 +33,16 @@
 class Arm32Assembler;
 class Thumb2Assembler;
 
+// This class indicates that the label and its uses
+// will fall into a range that is encodable in 16bits on thumb2.
+class NearLabel : public Label {
+ public:
+  NearLabel() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NearLabel);
+};
+
 class ShifterOperand {
  public:
   ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
@@ -519,6 +529,9 @@
 
   // Branch instructions.
   virtual void b(Label* label, Condition cond = AL) = 0;
+  virtual void b(NearLabel* label, Condition cond = AL) {
+    b(static_cast<Label*>(label), cond);
+  }
   virtual void bl(Label* label, Condition cond = AL) = 0;
   virtual void blx(Register rm, Condition cond = AL) = 0;
   virtual void bx(Register rm, Condition cond = AL) = 0;
@@ -654,6 +667,9 @@
   virtual void Bind(Label* label) = 0;
 
   virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
+  virtual void CompareAndBranchIfZero(Register r, NearLabel* label) {
+    CompareAndBranchIfZero(r, static_cast<Label*>(label));
+  }
   virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
   //
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 82fed50..3164623 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -200,8 +200,8 @@
   void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
 
   // Branch instructions.
-  void b(Label* label, Condition cond = AL);
-  void bl(Label* label, Condition cond = AL);
+  void b(Label* label, Condition cond = AL) OVERRIDE;
+  void bl(Label* label, Condition cond = AL) OVERRIDE;
   void blx(Register rm, Condition cond = AL) OVERRIDE;
   void bx(Register rm, Condition cond = AL) OVERRIDE;
   void Lsl(Register rd, Register rm, uint32_t shift_imm, bool setcc = false,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index ab83f95..26cb6c3 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -671,11 +671,17 @@
   EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
 }
 
+
 void Thumb2Assembler::b(Label* label, Condition cond) {
   EmitBranch(cond, label, false, false);
 }
 
 
+void Thumb2Assembler::b(NearLabel* label, Condition cond) {
+  EmitBranch(cond, label, false, false, /* is_near */ true);
+}
+
+
 void Thumb2Assembler::bl(Label* label, Condition cond) {
   CheckCondition(cond);
   EmitBranch(cond, label, true, false);
@@ -1369,6 +1375,7 @@
 
 
 uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) {
+  CHECK(IsLowRegister(rn));
   uint32_t location = buffer_.Size();
 
   // This is always unresolved as it must be a forward branch.
@@ -1613,7 +1620,7 @@
 }
 
 
-void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) {
+void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near) {
   uint32_t pc = buffer_.Size();
   Branch::Type branch_type;
   if (cond == AL) {
@@ -1644,8 +1651,8 @@
     }
   } else {
     // Branch is to an unbound label.  Emit space for it.
-    uint16_t branch_id = AddBranch(branch_type, pc, cond);    // Unresolved branch.
-    if (!CanRelocateBranches() || force_32bit_) {
+    uint16_t branch_id = AddBranch(branch_type, pc, cond, is_near);    // Unresolved branch.
+    if (force_32bit_ || (!CanRelocateBranches() && !is_near)) {
       Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
       Emit16(0);                   // another 16 bits.
     } else {
@@ -2199,6 +2206,9 @@
   if (label->IsBound()) {
     LOG(FATAL) << "cbz can only be used to branch forwards";
     UNREACHABLE();
+  } else if (IsHighRegister(rn)) {
+    LOG(FATAL) << "cbz can only be used with low registers";
+    UNREACHABLE();
   } else {
     uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), false);
     label->LinkTo(branchid);
@@ -2211,6 +2221,9 @@
   if (label->IsBound()) {
     LOG(FATAL) << "cbnz can only be used to branch forwards";
     UNREACHABLE();
+  } else if (IsHighRegister(rn)) {
+    LOG(FATAL) << "cbnz can only be used with low registers";
+    UNREACHABLE();
   } else {
     uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), true);
     label->LinkTo(branchid);
@@ -2741,7 +2754,17 @@
 
 
 void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) {
-  if (CanRelocateBranches()) {
+  if (CanRelocateBranches() && IsLowRegister(r)) {
+    cbz(r, label);
+  } else {
+    cmp(r, ShifterOperand(0));
+    b(label, EQ);
+  }
+}
+
+
+void Thumb2Assembler::CompareAndBranchIfZero(Register r, NearLabel* label) {
+  if (IsLowRegister(r)) {
     cbz(r, label);
   } else {
     cmp(r, ShifterOperand(0));
@@ -2751,7 +2774,7 @@
 
 
 void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
-  if (CanRelocateBranches()) {
+  if (CanRelocateBranches() && IsLowRegister(r)) {
     cbnz(r, label);
   } else {
     cmp(r, ShifterOperand(0));
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 2da8ee2..2382b74 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -238,6 +238,7 @@
 
   // Branch instructions.
   void b(Label* label, Condition cond = AL);
+  void b(NearLabel* label, Condition cond = AL);
   void bl(Label* label, Condition cond = AL);
   void blx(Label* label);
   void blx(Register rm, Condition cond = AL) OVERRIDE;
@@ -272,6 +273,7 @@
   void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
 
   void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
+  void CompareAndBranchIfZero(Register r, NearLabel* label) OVERRIDE;
   void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
 
   // Memory barriers.
@@ -430,7 +432,7 @@
 
   void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
 
-  void EmitBranch(Condition cond, Label* label, bool link, bool x);
+  void EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near = false);
   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
@@ -558,6 +560,7 @@
     // Resolve a branch when the target is known.  If this causes the
     // size of the branch to change return true.  Otherwise return false.
     bool Resolve(uint32_t target) {
+      uint32_t old_target = target_;
       target_ = target;
       if (assembler_->CanRelocateBranches()) {
         Size new_size = CalculateSize();
@@ -568,9 +571,12 @@
         return false;
       } else {
         if (kIsDebugBuild) {
-          Size new_size = CalculateSize();
-          // Check that the size has not increased.
-          DCHECK(!(new_size == k32Bit && size_ == k16Bit));
+          if (old_target == kUnresolved) {
+            // Check that the size has not increased.
+            DCHECK(!(CalculateSize() == k32Bit && size_ == k16Bit));
+          } else {
+            DCHECK(CalculateSize() == size_);
+          }
         }
         return false;
       }
@@ -650,6 +656,10 @@
         if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) {
           return k32Bit;
         }
+        if (IsCompareAndBranch()) {
+          // Compare and branch instructions can only be encoded on 16 bits.
+          return k16Bit;
+        }
         return assembler_->CanRelocateBranches() ? k16Bit : k32Bit;
       }
       // When the target is resolved, we know the best encoding for it.
@@ -713,8 +723,15 @@
   }
 
   // Add an unresolved branch and return its id.
-  uint16_t AddBranch(Branch::Type type, uint32_t location, Condition cond = AL) {
-    branches_.push_back(new Branch(this, type, location, cond));
+  uint16_t AddBranch(Branch::Type type,
+                     uint32_t location,
+                     Condition cond = AL,
+                     bool is_near = false) {
+    Branch* branch = new Branch(this, type, location, cond);
+    if (is_near) {
+      branch->ResetSize(Branch::k16Bit);
+    }
+    branches_.push_back(branch);
     return branches_.size() - 1;
   }
 
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 3ee79a1..9cc0c91 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -51,11 +51,11 @@
 }
 
 void Arm64Assembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(ETR));
+  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
 }
 
 void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
-  StoreToOffset(ETR, SP, offset.Int32Value());
+  StoreToOffset(TR, SP, offset.Int32Value());
 }
 
 // See Arm64 PCS Section 5.2.2.1.
@@ -167,7 +167,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   LoadImmediate(scratch.AsXRegister(), imm);
-  StoreToOffset(scratch.AsXRegister(), ETR, offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> tr_offs,
@@ -176,14 +176,14 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::StoreStackPointerToThread64(ThreadOffset<8> tr_offs) {
   vixl::UseScratchRegisterScope temps(vixl_masm_);
   vixl::Register temp = temps.AcquireX();
   ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(ETR), tr_offs.Int32Value()));
+  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
 void Arm64Assembler::StoreSpanning(FrameOffset dest_off, ManagedRegister m_source,
@@ -284,7 +284,7 @@
 }
 
 void Arm64Assembler::LoadFromThread64(ManagedRegister m_dst, ThreadOffset<8> src, size_t size) {
-  return Load(m_dst.AsArm64(), ETR, src.Int32Value(), size);
+  return Load(m_dst.AsArm64(), TR, src.Int32Value(), size);
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, FrameOffset offs) {
@@ -319,7 +319,7 @@
 void Arm64Assembler::LoadRawPtrFromThread64(ManagedRegister m_dst, ThreadOffset<8> offs) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   CHECK(dst.IsXRegister()) << dst;
-  LoadFromOffset(dst.AsXRegister(), ETR, offs.Int32Value());
+  LoadFromOffset(dst.AsXRegister(), TR, offs.Int32Value());
 }
 
 // Copying routines.
@@ -357,7 +357,7 @@
                                           ManagedRegister m_scratch) {
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
   StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
 }
 
@@ -367,7 +367,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(scratch.IsXRegister()) << scratch;
   LoadFromOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), ETR, tr_offs.Int32Value());
+  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
 void Arm64Assembler::CopyRef(FrameOffset dest, FrameOffset src,
@@ -610,7 +610,7 @@
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
   Arm64Exception *current_exception = new Arm64Exception(scratch, stack_adjust);
   exception_blocks_.push_back(current_exception);
-  LoadFromOffset(scratch.AsXRegister(), ETR, Thread::ExceptionOffset<8>().Int32Value());
+  LoadFromOffset(scratch.AsXRegister(), TR, Thread::ExceptionOffset<8>().Int32Value());
   ___ Cbnz(reg_x(scratch.AsXRegister()), current_exception->Entry());
 }
 
@@ -627,12 +627,7 @@
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
   ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp, MEM_OP(reg_x(ETR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
-
-  // Move ETR(Callee saved) back to TR(Caller saved) reg. We use ETR on calls
-  // to external functions that might trash TR. We do not need the original
-  // ETR(X21) saved in BuildFrame().
-  ___ Mov(reg_x(TR), reg_x(ETR));
+  ___ Ldr(temp, MEM_OP(reg_x(TR), QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()));
 
   ___ Blr(temp);
   // Call should never return.
@@ -713,12 +708,7 @@
   SpillRegisters(core_reg_list, frame_size - core_reg_size);
   SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
 
-  // Note: This is specific to JNI method frame.
-  // We will need to move TR(Caller saved in AAPCS) to ETR(Callee saved in AAPCS). The original
-  // (ETR)X21 has been saved on stack. In this way, we can restore TR later.
-  DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR)));
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR)));
-  ___ Mov(reg_x(ETR), reg_x(TR));
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
 
   // Write StackReference<Method>.
   DCHECK(X0 == method_reg.AsArm64().AsXRegister());
@@ -771,11 +761,7 @@
   DCHECK_GE(frame_size, core_reg_size + fp_reg_size + sizeof(StackReference<mirror::ArtMethod>));
   DCHECK_ALIGNED(frame_size, kStackAlignment);
 
-  // Note: This is specific to JNI method frame.
-  // Restore TR(Caller saved in AAPCS) from ETR(Callee saved in AAPCS).
-  DCHECK(!core_reg_list.IncludesAliasOf(reg_x(TR)));
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(ETR)));
-  ___ Mov(reg_x(TR), reg_x(ETR));
+  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
 
   cfi_.RememberState();
 
diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc
index 32c2e62..e27115d 100644
--- a/compiler/utils/arm64/managed_register_arm64_test.cc
+++ b/compiler/utils/arm64/managed_register_arm64_test.cc
@@ -623,7 +623,7 @@
   EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(X29)));
   EXPECT_TRUE(vixl::x30.Is(Arm64Assembler::reg_x(X30)));
 
-  EXPECT_TRUE(vixl::x18.Is(Arm64Assembler::reg_x(TR)));
+  EXPECT_TRUE(vixl::x19.Is(Arm64Assembler::reg_x(TR)));
   EXPECT_TRUE(vixl::ip0.Is(Arm64Assembler::reg_x(IP0)));
   EXPECT_TRUE(vixl::ip1.Is(Arm64Assembler::reg_x(IP1)));
   EXPECT_TRUE(vixl::x29.Is(Arm64Assembler::reg_x(FP)));
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 574051a..c8b3fe5 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -216,9 +216,9 @@
 
     bool success = Exec(args, error_msg);
     if (!success) {
-      LOG(INFO) << "Assembler command line:";
+      LOG(ERROR) << "Assembler command line:";
       for (std::string arg : args) {
-        LOG(INFO) << arg;
+        LOG(ERROR) << arg;
       }
     }
     return success;
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 7738627..1a2c9a9 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1338,6 +1338,24 @@
   delete assembler;
 }
 
+TEST(Thumb2AssemblerTest, CompareAndBranch) {
+  arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2));
+
+  arm::NearLabel label;
+  __ CompareAndBranchIfZero(arm::R0, &label);
+  __ CompareAndBranchIfZero(arm::R11, &label);
+  __ CompareAndBranchIfNonZero(arm::R0, &label);
+  __ CompareAndBranchIfNonZero(arm::R11, &label);
+  __ Bind(&label);
+
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  dump(managed_code, "CompareAndBranch");
+  delete assembler;
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 3d03234..841d6a0 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -4822,6 +4822,16 @@
   "  30:   f8a4 0040       strh.w  r0, [r4, #64]   ; 0x40\n",
   nullptr
 };
+const char* CompareAndBranchResults[] = {
+  "  0: b130        cbz r0, 10 <CompareAndBranch+0x10>\n",
+  "  2: f1bb 0f00   cmp.w fp, #0\n",
+  "  6: d003        beq.n 10 <CompareAndBranch+0x10>\n",
+  "  8: b910        cbnz r0, 10 <CompareAndBranch+0x10>\n",
+  "  a: f1bb 0f00   cmp.w fp, #0\n",
+  "  e: d1ff        bne.n 10 <CompareAndBranch+0x10>\n",
+  nullptr
+};
+
 std::map<std::string, const char**> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -4869,4 +4879,5 @@
     test_results["LoadStoreRegOffset"] = LoadStoreRegOffsetResults;
     test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
     test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
+    test_results["CompareAndBranch"] = CompareAndBranchResults;
 }
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index d195efc..348b2a5 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -31,8 +31,7 @@
 // runtime/arch/arm64/registers_arm64.h. We do not include that file to
 // avoid a dependency on libart.
 enum {
-  TR  = 18,
-  ETR = 21,
+  TR  = 19,
   IP0 = 16,
   IP1 = 17,
   FP  = 29,
diff --git a/runtime/arch/arm64/asm_support_arm64.S b/runtime/arch/arm64/asm_support_arm64.S
index 39a8aa5..051f40b 100644
--- a/runtime/arch/arm64/asm_support_arm64.S
+++ b/runtime/arch/arm64/asm_support_arm64.S
@@ -22,9 +22,7 @@
 // Define special registers.
 
 // Register holding Thread::Current().
-#define xSELF x18
-// x18 is not preserved by aapcs64, save it on xETR(External Thread reg) for restore and later use.
-#define xETR x21
+#define xSELF x19
 // Frame Pointer
 #define xFP   x29
 // Link Register
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 998f567..989ecc6 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -20,7 +20,7 @@
 #include "asm_support.h"
 
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
-#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 112
+#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 96
 #define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 224
 
 #endif  // ART_RUNTIME_ARCH_ARM64_ASM_SUPPORT_ARM64_H_
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index 9c7bb55..af2f661 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -90,6 +90,7 @@
   gprs_[X13] = nullptr;
   gprs_[X14] = nullptr;
   gprs_[X15] = nullptr;
+  gprs_[X18] = nullptr;
 
   // d0-d7, d16-d31 are caller-saved; d8-d15 are callee-saved.
 
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 8c8f8d5..2ce2a29 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -27,16 +27,9 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
+extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
-// Single-precision FP arithmetics.
-extern "C" float art_quick_fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
-
-// Double-precision FP arithmetics.
-extern "C" double art_quick_fmod(double a, double b);        // REM_DOUBLE[_2ADDR]
-
-
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      QuickEntryPoints* qpoints) {
   // Interpreter
@@ -50,7 +43,7 @@
   ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
+  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
   qpoints->pCheckCast = art_quick_check_cast;
 
   // DexCache
@@ -110,9 +103,9 @@
   qpoints->pCmpgFloat = nullptr;
   qpoints->pCmplDouble = nullptr;
   qpoints->pCmplFloat = nullptr;
-  qpoints->pFmod = art_quick_fmod;
+  qpoints->pFmod = fmod;
   qpoints->pL2d = nullptr;
-  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pFmodf = fmodf;
   qpoints->pL2f = nullptr;
   qpoints->pD2iz = nullptr;
   qpoints->pF2iz = nullptr;
@@ -129,7 +122,7 @@
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
   qpoints->pStringCompareTo = art_quick_string_compareto;
-  qpoints->pMemcpy = art_quick_memcpy;
+  qpoints->pMemcpy = memcpy;
 
   // Invocation
   qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6b16a2e5..991d29f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -32,6 +32,8 @@
 
     // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
+
+    // Loads appropriate callee-save-method.
     ldr wIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET ]
 
     sub sp, sp, #176
@@ -42,43 +44,40 @@
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
-    // FP callee-saves
-    stp d8, d9,   [sp, #8]
-    stp d10, d11, [sp, #24]
-    stp d12, d13, [sp, #40]
-    stp d14, d15, [sp, #56]
+    // Stack alignment filler [sp, #8].
+    // FP callee-saves.
+    stp d8, d9,   [sp, #16]
+    stp d10, d11, [sp, #32]
+    stp d12, d13, [sp, #48]
+    stp d14, d15, [sp, #64]
 
-    // Thread register and x19 (callee-save)
-    stp xSELF, x19, [sp, #72]
-    .cfi_rel_offset x18, 72
+    // GP callee-saves
+    stp x19, x20, [sp, #80]
     .cfi_rel_offset x19, 80
-
-    // callee-saves
-    stp x20, x21, [sp, #88]
     .cfi_rel_offset x20, 88
+
+    stp x21, x22, [sp, #96]
     .cfi_rel_offset x21, 96
-
-    stp x22, x23, [sp, #104]
     .cfi_rel_offset x22, 104
+
+    stp x23, x24, [sp, #112]
     .cfi_rel_offset x23, 112
-
-    stp x24, x25, [sp, #120]
     .cfi_rel_offset x24, 120
+
+    stp x25, x26, [sp, #128]
     .cfi_rel_offset x25, 128
-
-    stp x26, x27, [sp, #136]
     .cfi_rel_offset x26, 136
+
+    stp x27, x28, [sp, #144]
     .cfi_rel_offset x27, 144
-
-    stp x28, x29, [sp, #152]
     .cfi_rel_offset x28, 152
-    .cfi_rel_offset x29, 160
 
-    str xLR, [sp, #168]
+    stp x29, xLR, [sp, #160]
+    .cfi_rel_offset x29, 160
     .cfi_rel_offset x30, 168
 
-    // Loads appropriate callee-save-method
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs]
+    // Store ArtMethod* Runtime::callee_save_methods_[kRefsAndArgs].
+    str xIP0, [sp]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -95,49 +94,46 @@
     // Our registers aren't intermixed - just spill in order.
     ldr xIP0, [xIP0]  // xIP0 = & (art::Runtime * art::Runtime.instance_) .
 
-    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefAndArgs]  .
+    // xIP0 = (ArtMethod*) Runtime.instance_.callee_save_methods[kRefOnly]  .
     THIS_LOAD_REQUIRES_READ_BARRIER
+
+    // Loads appropriate callee-save-method.
     ldr wIP0, [xIP0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET ]
 
-    sub sp, sp, #112
-    .cfi_adjust_cfa_offset 112
+    sub sp, sp, #96
+    .cfi_adjust_cfa_offset 96
 
     // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 112)
+#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 96)
 #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
-    // Callee-saves
-    stp x19, x20,  [sp, #16]
-    .cfi_rel_offset x19, 16
-    .cfi_rel_offset x20, 24
+    // GP callee-saves.
+    // x20 paired with ArtMethod* - see below.
+    stp x21, x22, [sp, #16]
+    .cfi_rel_offset x21, 16
+    .cfi_rel_offset x22, 24
 
-    stp x21, x22, [sp, #32]
-    .cfi_rel_offset x21, 32
-    .cfi_rel_offset x22, 40
+    stp x23, x24, [sp, #32]
+    .cfi_rel_offset x23, 32
+    .cfi_rel_offset x24, 40
 
-    stp x23, x24, [sp, #48]
-    .cfi_rel_offset x23, 48
-    .cfi_rel_offset x24, 56
+    stp x25, x26, [sp, #48]
+    .cfi_rel_offset x25, 48
+    .cfi_rel_offset x26, 56
 
-    stp x25, x26, [sp, #64]
-    .cfi_rel_offset x25, 64
-    .cfi_rel_offset x26, 72
+    stp x27, x28, [sp, #64]
+    .cfi_rel_offset x27, 64
+    .cfi_rel_offset x28, 72
 
-    stp x27, x28, [sp, #80]
-    .cfi_rel_offset x27, 80
-    .cfi_rel_offset x28, 88
+    stp x29, xLR, [sp, #80]
+    .cfi_rel_offset x29, 80
+    .cfi_rel_offset x30, 88
 
-    // x29(callee-save) and LR
-    stp x29, xLR, [sp, #96]
-    .cfi_rel_offset x29, 96
-    .cfi_rel_offset x30, 104
+    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly].
+    stp xIP0, x20, [sp]
+    .cfi_rel_offset x20, 8
 
-    // Save xSELF to xETR.
-    mov xETR, xSELF
-
-    // Loads appropriate callee-save-method
-    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kRefsOnly]
     // Place sp in Thread::Current()->top_quick_frame.
     mov xIP0, sp
     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
@@ -145,48 +141,37 @@
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    // Restore xSELF.
-    mov xSELF, xETR
-
-    // Callee-saves
-    ldp x19, x20,  [sp, #16]
-    .cfi_restore x19
+    // Callee-saves.
+    ldr x20, [sp, #8]
     .cfi_restore x20
 
-    ldp x21, x22, [sp, #32]
+    ldp x21, x22, [sp, #16]
     .cfi_restore x21
     .cfi_restore x22
 
-    ldp x23, x24, [sp, #48]
+    ldp x23, x24, [sp, #32]
     .cfi_restore x23
     .cfi_restore x24
 
-    ldp x25, x26, [sp, #64]
+    ldp x25, x26, [sp, #48]
     .cfi_restore x25
     .cfi_restore x26
 
-    ldp x27, x28, [sp, #80]
+    ldp x27, x28, [sp, #64]
     .cfi_restore x27
     .cfi_restore x28
 
-    // x29(callee-save) and LR
-    ldp x29, xLR, [sp, #96]
+    ldp x29, xLR, [sp, #80]
     .cfi_restore x29
     .cfi_restore x30
 
-    add sp, sp, #112
-    .cfi_adjust_cfa_offset -112
+    add sp, sp, #96
+    .cfi_adjust_cfa_offset -96
 .endm
 
 .macro POP_REFS_ONLY_CALLEE_SAVE_FRAME
-    // Restore xSELF as it might be scratched.
-    mov xSELF, xETR
-    // ETR
-    ldr xETR, [sp, #32]
-    .cfi_restore x21
-
-    add sp, sp, #112
-    .cfi_adjust_cfa_offset -112
+    add sp, sp, #96
+    .cfi_adjust_cfa_offset - 96
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
@@ -204,31 +189,29 @@
 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
 #endif
 
+    // Stack alignment filler [sp, #8].
     // FP args.
-    stp d0, d1, [sp, #8]
-    stp d2, d3, [sp, #24]
-    stp d4, d5, [sp, #40]
-    stp d6, d7, [sp, #56]
+    stp d0, d1, [sp, #16]
+    stp d2, d3, [sp, #32]
+    stp d4, d5, [sp, #48]
+    stp d6, d7, [sp, #64]
 
     // Core args.
-    str x1, [sp, 72]
-    .cfi_rel_offset x1, 72
+    stp x1, x2, [sp, #80]
+    .cfi_rel_offset x1, 80
+    .cfi_rel_offset x2, 88
 
-    stp x2,  x3, [sp, #80]
-    .cfi_rel_offset x2, 80
-    .cfi_rel_offset x3, 88
+    stp x3, x4, [sp, #96]
+    .cfi_rel_offset x3, 96
+    .cfi_rel_offset x4, 104
 
-    stp x4,  x5, [sp, #96]
-    .cfi_rel_offset x4, 96
-    .cfi_rel_offset x5, 104
+    stp x5, x6, [sp, #112]
+    .cfi_rel_offset x5, 112
+    .cfi_rel_offset x6, 120
 
-    stp x6,  x7, [sp, #112]
-    .cfi_rel_offset x6, 112
-    .cfi_rel_offset x7, 120
-
-    // Callee-saves.
-    stp x19, x20, [sp, #128]
-    .cfi_rel_offset x19, 128
+    // x7, Callee-saves.
+    stp x7, x20, [sp, #128]
+    .cfi_rel_offset x7, 128
     .cfi_rel_offset x20, 136
 
     stp x21, x22, [sp, #144]
@@ -247,13 +230,11 @@
     .cfi_rel_offset x27, 192
     .cfi_rel_offset x28, 200
 
-    // x29(callee-save) and LR
+    // x29(callee-save) and LR.
     stp x29, xLR, [sp, #208]
     .cfi_rel_offset x29, 208
     .cfi_rel_offset x30, 216
 
-    // Save xSELF to xETR.
-    mov xETR, xSELF
 .endm
 
     /*
@@ -291,34 +272,28 @@
 
 // TODO: Probably no need to restore registers preserved by aapcs64.
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
-    // Restore xSELF.
-    mov xSELF, xETR
-
     // FP args.
-    ldp d0, d1, [sp, #8]
-    ldp d2, d3, [sp, #24]
-    ldp d4, d5, [sp, #40]
-    ldp d6, d7, [sp, #56]
+    ldp d0, d1, [sp, #16]
+    ldp d2, d3, [sp, #32]
+    ldp d4, d5, [sp, #48]
+    ldp d6, d7, [sp, #64]
 
     // Core args.
-    ldr x1, [sp, 72]
+    ldp x1, x2, [sp, #80]
     .cfi_restore x1
-
-    ldp x2,  x3, [sp, #80]
     .cfi_restore x2
+
+    ldp x3, x4, [sp, #96]
     .cfi_restore x3
-
-    ldp x4,  x5, [sp, #96]
     .cfi_restore x4
+
+    ldp x5, x6, [sp, #112]
     .cfi_restore x5
-
-    ldp x6,  x7, [sp, #112]
     .cfi_restore x6
-    .cfi_restore x7
 
-    // Callee-saves.
-    ldp x19, x20, [sp, #128]
-    .cfi_restore x19
+    // x7, Callee-saves.
+    ldp x7, x20, [sp, #128]
+    .cfi_restore x7
     .cfi_restore x20
 
     ldp x21, x22, [sp, #144]
@@ -337,7 +312,7 @@
     .cfi_restore x27
     .cfi_restore x28
 
-    // x29(callee-save) and LR
+    // x29(callee-save) and LR.
     ldp x29, xLR, [sp, #208]
     .cfi_restore x29
     .cfi_restore x30
@@ -1106,13 +1081,12 @@
     .extern artThrowClassCastException
 ENTRY art_quick_check_cast
     // Store arguments and link register
-    sub sp, sp, #32                     // Stack needs to be 16b aligned on calls
+    // Stack needs to be 16B aligned on calls.
+    stp x0, x1, [sp,#-32]!
     .cfi_adjust_cfa_offset 32
-    stp x0, x1, [sp]
     .cfi_rel_offset x0, 0
     .cfi_rel_offset x1, 8
-    stp xSELF, xLR, [sp, #16]
-    .cfi_rel_offset x18, 16
+    str xLR, [sp, #24]
     .cfi_rel_offset x30, 24
 
     // Call runtime code
@@ -1122,25 +1096,21 @@
     cbz x0, .Lthrow_class_cast_exception
 
     // Restore and return
-    ldp x0, x1, [sp]
+    ldr xLR, [sp, #24]
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp xSELF, xLR, [sp, #16]
-    .cfi_restore x18
-    .cfi_restore x30
-    add sp, sp, #32
     .cfi_adjust_cfa_offset -32
     ret
 
 .Lthrow_class_cast_exception:
     // Restore
-    ldp x0, x1, [sp]
+    ldr xLR, [sp, #24]
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp xSELF, xLR, [sp, #16]
-    .cfi_restore x18
-    .cfi_restore x30
-    add sp, sp, #32
     .cfi_adjust_cfa_offset -32
 
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
@@ -1201,16 +1171,13 @@
     ret
 .Lcheck_assignability:
     // Store arguments and link register
-    sub sp, sp, #48                     // Stack needs to be 16b aligned on calls
-    .cfi_adjust_cfa_offset 48
-    stp x0, x1, [sp]
+    stp x0, x1, [sp,#-32]!
+    .cfi_adjust_cfa_offset 32
     .cfi_rel_offset x0, 0
     .cfi_rel_offset x1, 8
-    stp x2, xSELF, [sp, #16]
+    stp x2, xLR, [sp, #16]
     .cfi_rel_offset x2, 16
-    .cfi_rel_offset x18, 24
-    str xLR, [sp, #32]
-    .cfi_rel_offset x30, 32
+    .cfi_rel_offset x30, 24
 
     // Call runtime code
     mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
@@ -1221,16 +1188,13 @@
     cbz x0, .Lthrow_array_store_exception
 
     // Restore
-    ldp x0, x1, [sp]
+    ldp x2, x30, [sp, #16]
+    .cfi_restore x2
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp x2, xSELF, [sp, #16]
-    .cfi_restore x2
-    .cfi_restore x18
-    ldr xLR, [sp, #32]
-    .cfi_restore x30
-    add sp, sp, #48
-    .cfi_adjust_cfa_offset -48
+    .cfi_adjust_cfa_offset -32
 
     add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
                                                           // "Compress" = do nothing
@@ -1240,16 +1204,13 @@
     strb w3, [x3, x0]
     ret
 .Lthrow_array_store_exception:
-    ldp x0, x1, [sp]
+    ldp x2, x30, [sp, #16]
+    .cfi_restore x2
+    .cfi_restore x30
+    ldp x0, x1, [sp], #32
     .cfi_restore x0
     .cfi_restore x1
-    ldp x2, xSELF, [sp, #16]
-    .cfi_restore x2
-    .cfi_restore x18
-    ldr xLR, [sp, #32]
-    .cfi_restore x30
-    add sp, sp, #48
-    .cfi_adjust_cfa_offset -48
+    .cfi_adjust_cfa_offset -32
 
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     mov x1, x2                    // Pass value.
@@ -1450,8 +1411,7 @@
     mov     x2, xSELF                   // pass Thread::Current
     mov     x3, sp                      // pass SP
     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
-    // Use xETR as xSELF might be scratched by native function above.
-    ldr     x2, [xETR, THREAD_EXCEPTION_OFFSET]
+    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME // Restore frame
     fmov    d0, x0                      // Store result in d0 in case it was float or double
@@ -1601,15 +1561,14 @@
     // prepare for artQuickGenericJniEndTrampoline call
     // (Thread*, result, result_f)
     //    x0       x1       x2        <= C calling convention
-    mov x1, x0      // Result (from saved)
-    mov x0, xETR    // Thread register, original xSELF might be scratched by native code.
+    mov x1, x0      // Result (from saved).
+    mov x0, xSELF   // Thread register.
     fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
 
     bl artQuickGenericJniEndTrampoline
 
     // Pending exceptions possible.
-    // Use xETR as xSELF might be scratched by native code
-    ldr x2, [xETR, THREAD_EXCEPTION_OFFSET]
+    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
     cbnz x2, .Lexception_in_native
 
     // Tear down the alloca.
@@ -1624,8 +1583,6 @@
     ret
 
 .Lexception_in_native:
-    // Restore xSELF. It might have been scratched by native code.
-    mov xSELF, xETR
     // Move to x1 then sp to please assembler.
     ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
     mov sp, x1
@@ -1921,21 +1878,3 @@
     csel x0, x0, x14, ne         // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
     ret
 END art_quick_string_compareto
-
-// Macro to facilitate adding new entrypoints which call to native function directly.
-// Currently, xSELF is the only thing we need to take care of between managed code and AAPCS.
-// But we might introduce more differences.
-.macro NATIVE_DOWNCALL name, entrypoint
-    .extern \entrypoint
-ENTRY \name
-    stp    xSELF, xLR, [sp, #-16]!
-    bl     \entrypoint
-    ldp    xSELF, xLR, [sp], #16
-    ret
-END \name
-.endm
-
-NATIVE_DOWNCALL art_quick_fmod fmod
-NATIVE_DOWNCALL art_quick_fmodf fmodf
-NATIVE_DOWNCALL art_quick_memcpy memcpy
-NATIVE_DOWNCALL art_quick_assignable_from_code artIsAssignableFromCode
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index dfb3f99..b525309 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -33,18 +33,17 @@
     (1 << art::arm64::LR);
 // Callee saved registers
 static constexpr uint32_t kArm64CalleeSaveRefSpills =
-    (1 << art::arm64::X19) | (1 << art::arm64::X20) | (1 << art::arm64::X21) |
-    (1 << art::arm64::X22) | (1 << art::arm64::X23) | (1 << art::arm64::X24) |
-    (1 << art::arm64::X25) | (1 << art::arm64::X26) | (1 << art::arm64::X27) |
-    (1 << art::arm64::X28) | (1 << art::arm64::X29);
+    (1 << art::arm64::X20) | (1 << art::arm64::X21) | (1 << art::arm64::X22) |
+    (1 << art::arm64::X23) | (1 << art::arm64::X24) | (1 << art::arm64::X25) |
+    (1 << art::arm64::X26) | (1 << art::arm64::X27) | (1 << art::arm64::X28) |
+    (1 << art::arm64::X29);
 // X0 is the method pointer. Not saved.
 static constexpr uint32_t kArm64CalleeSaveArgSpills =
     (1 << art::arm64::X1) | (1 << art::arm64::X2) | (1 << art::arm64::X3) |
     (1 << art::arm64::X4) | (1 << art::arm64::X5) | (1 << art::arm64::X6) |
     (1 << art::arm64::X7);
 static constexpr uint32_t kArm64CalleeSaveAllSpills =
-    // Thread register.
-    (1 << art::arm64::X18);
+    (1 << art::arm64::X19);
 
 static constexpr uint32_t kArm64CalleeSaveFpAlwaysSpills = 0;
 static constexpr uint32_t kArm64CalleeSaveFpRefSpills = 0;
diff --git a/runtime/arch/arm64/registers_arm64.h b/runtime/arch/arm64/registers_arm64.h
index 51ae184..4683fc3 100644
--- a/runtime/arch/arm64/registers_arm64.h
+++ b/runtime/arch/arm64/registers_arm64.h
@@ -60,8 +60,7 @@
                  // different enum value to distinguish between the two.
   kNumberOfXRegisters = 33,
   // Aliases.
-  TR  = X18,     // ART Thread Register - Managed Runtime (Caller Saved Reg)
-  ETR = X21,     // ART Thread Register - External Calls  (Callee Saved Reg)
+  TR  = X19,     // ART Thread Register - Managed Runtime (Callee Saved Reg)
   IP0 = X16,     // Used as scratch by VIXL.
   IP1 = X17,     // Used as scratch by ART JNI Assembler.
   FP  = X29,
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index a7d24b8..23b7cfa 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -173,7 +173,7 @@
         // Load call params into the right registers.
         "ldp x0, x1, [sp]\n\t"
         "ldp x2, x3, [sp, #16]\n\t"
-        "ldr x18, [sp, #32]\n\t"
+        "ldr x19, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset -48\n\t"
 
@@ -526,7 +526,7 @@
         // Load call params into the right registers.
         "ldp x0, x1, [sp]\n\t"
         "ldp x2, x3, [sp, #16]\n\t"
-        "ldp x18, x17, [sp, #32]\n\t"
+        "ldp x19, x17, [sp, #32]\n\t"
         "add sp, sp, #48\n\t"
         ".cfi_adjust_cfa_offset -48\n\t"
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index ab5f176..9ad987a 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1408,9 +1408,10 @@
     return nullptr;
   }
   Handle<mirror::Array> fields;
-  if (image_pointer_size_ == 8) {
+  if (image_pointer_size_ == 8u) {
     fields = hs.NewHandle<mirror::Array>(mirror::LongArray::Alloc(self, dex_file.NumFieldIds()));
   } else {
+    DCHECK_EQ(image_pointer_size_, 4u);
     fields = hs.NewHandle<mirror::Array>(mirror::IntArray::Alloc(self, dex_file.NumFieldIds()));
   }
   if (fields.Get() == nullptr) {
@@ -1816,8 +1817,8 @@
   // TODO: Use fast jobjects?
   auto interfaces = hs.NewHandle<mirror::ObjectArray<mirror::Class>>(nullptr);
 
-  mirror::Class* new_class = nullptr;
-  if (!LinkClass(self, descriptor, klass, interfaces, &new_class)) {
+  MutableHandle<mirror::Class> h_new_class = hs.NewHandle<mirror::Class>(nullptr);
+  if (!LinkClass(self, descriptor, klass, interfaces, &h_new_class)) {
     // Linking failed.
     if (!klass->IsErroneous()) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
@@ -1825,10 +1826,8 @@
     return nullptr;
   }
   self->AssertNoPendingException();
-  CHECK(new_class != nullptr) << descriptor;
-  CHECK(new_class->IsResolved()) << descriptor;
-
-  Handle<mirror::Class> new_class_h(hs.NewHandle(new_class));
+  CHECK(h_new_class.Get() != nullptr) << descriptor;
+  CHECK(h_new_class->IsResolved()) << descriptor;
 
   // Instrumentation may have updated entrypoints for all methods of all
   // classes. However it could not update methods of this class while we
@@ -1839,7 +1838,7 @@
     // suspending all threads to update entrypoints while we are doing it
     // for this class.
     DCHECK_EQ(self->GetState(), kRunnable);
-    Runtime::Current()->GetInstrumentation()->InstallStubsForClass(new_class_h.Get());
+    Runtime::Current()->GetInstrumentation()->InstallStubsForClass(h_new_class.Get());
   }
 
   /*
@@ -1853,9 +1852,9 @@
    * The class has been prepared and resolved but possibly not yet verified
    * at this point.
    */
-  Dbg::PostClassPrepare(new_class_h.Get());
+  Dbg::PostClassPrepare(h_new_class.Get());
 
-  return new_class_h.Get();
+  return h_new_class.Get();
 }
 
 uint32_t ClassLinker::SizeOfClassWithoutEmbeddedTables(const DexFile& dex_file,
@@ -2739,11 +2738,7 @@
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   auto existing_it = class_table_.FindWithHash(std::make_pair(descriptor, klass->GetClassLoader()),
                                                hash);
-  if (existing_it == class_table_.end()) {
-    CHECK(klass->IsProxyClass());
-    return nullptr;
-  }
-
+  CHECK(existing_it != class_table_.end());
   mirror::Class* existing = existing_it->Read();
   CHECK_NE(existing, klass) << descriptor;
   CHECK(!existing->IsResolved()) << descriptor;
@@ -3215,7 +3210,7 @@
                                              jobjectArray interfaces, jobject loader,
                                              jobjectArray methods, jobjectArray throws) {
   Thread* self = soa.Self();
-  StackHandleScope<9> hs(self);
+  StackHandleScope<10> hs(self);
   MutableHandle<mirror::Class> klass(hs.NewHandle(
       AllocClass(self, GetClassRoot(kJavaLangClass), sizeof(mirror::Class))));
   if (klass.Get() == nullptr) {
@@ -3229,9 +3224,17 @@
   klass->SetClassLoader(soa.Decode<mirror::ClassLoader*>(loader));
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
   klass->SetName(soa.Decode<mirror::String*>(name));
-  mirror::Class* proxy_class = GetClassRoot(kJavaLangReflectProxy);
-  klass->SetDexCache(proxy_class->GetDexCache());
+  klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
+  std::string descriptor(GetDescriptorForProxy(klass.Get()));
+  size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
+
+  // Insert the class before loading the fields as the field roots
+  // (ArtField::declaring_class_) are only visited from the class
+  // table. There can't be any suspend points between inserting the
+  // class and setting the field arrays below.
+  mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), hash);
+  CHECK(existing == nullptr);
 
   // Instance fields are inherited, but we add a couple of static fields...
   const size_t num_fields = 2;
@@ -3254,18 +3257,21 @@
 
   // Proxies have 1 direct method, the constructor
   {
-    mirror::ObjectArray<mirror::ArtMethod>* directs = AllocArtMethodArray(self, 1);
-    if (UNLIKELY(directs == nullptr)) {
+    StackHandleScope<2> hs2(self);
+    Handle<mirror::ObjectArray<mirror::ArtMethod>> directs =
+        hs2.NewHandle(AllocArtMethodArray(self, 1));
+    if (UNLIKELY(directs.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return nullptr;
     }
-    klass->SetDirectMethods(directs);
-    mirror::ArtMethod* constructor = CreateProxyConstructor(self, klass, proxy_class);
-    if (UNLIKELY(constructor == nullptr)) {
+    klass->SetDirectMethods(directs.Get());
+    Handle<mirror::ArtMethod> constructor =
+        hs2.NewHandle(CreateProxyConstructor(self, klass));
+    if (UNLIKELY(constructor.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return nullptr;
     }
-    klass->SetDirectMethod(0, constructor);
+    klass->SetDirectMethod(0, constructor.Get());
   }
 
   // Create virtual method using specified prototypes.
@@ -3274,35 +3280,38 @@
     << PrettyClass(h_methods->GetClass());
   const size_t num_virtual_methods = h_methods->GetLength();
   {
-    mirror::ObjectArray<mirror::ArtMethod>* virtuals = AllocArtMethodArray(self,
-                                                                           num_virtual_methods);
-    if (UNLIKELY(virtuals == nullptr)) {
+    StackHandleScope<1> hs2(self);
+    Handle<mirror::ObjectArray<mirror::ArtMethod>> virtuals =
+        hs2.NewHandle(AllocArtMethodArray(self, num_virtual_methods));
+    if (UNLIKELY(virtuals.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return nullptr;
     }
-    klass->SetVirtualMethods(virtuals);
+    klass->SetVirtualMethods(virtuals.Get());
   }
   for (size_t i = 0; i < num_virtual_methods; ++i) {
-    StackHandleScope<1> hs2(self);
+    StackHandleScope<2> hs2(self);
     Handle<mirror::ArtMethod> prototype(hs2.NewHandle(h_methods->Get(i)->GetArtMethod()));
-    mirror::ArtMethod* clone = CreateProxyMethod(self, klass, prototype);
-    if (UNLIKELY(clone == nullptr)) {
+    Handle<mirror::ArtMethod> clone(hs2.NewHandle(CreateProxyMethod(self, klass, prototype)));
+    if (UNLIKELY(clone.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());  // OOME.
       return nullptr;
     }
-    klass->SetVirtualMethod(i, clone);
+    klass->SetVirtualMethod(i, clone.Get());
   }
 
-  klass->SetSuperClass(proxy_class);  // The super class is java.lang.reflect.Proxy
-  mirror::Class::SetStatus(klass, mirror::Class::kStatusLoaded, self);  // Now effectively in the loaded state.
+  // The super class is java.lang.reflect.Proxy
+  klass->SetSuperClass(GetClassRoot(kJavaLangReflectProxy));
+  // Now effectively in the loaded state.
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusLoaded, self);
   self->AssertNoPendingException();
 
-  std::string descriptor(GetDescriptorForProxy(klass.Get()));
-  mirror::Class* new_class = nullptr;
+  MutableHandle<mirror::Class> new_class = hs.NewHandle<mirror::Class>(nullptr);
   {
     // Must hold lock on object when resolved.
     ObjectLock<mirror::Class> resolution_lock(self, klass);
-    // Link the fields and virtual methods, creating vtable and iftables
+    // Link the fields and virtual methods, creating vtable and iftables.
+    // The new class will replace the old one in the class table.
     Handle<mirror::ObjectArray<mirror::Class> > h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces)));
     if (!LinkClass(self, descriptor.c_str(), klass, h_interfaces, &new_class)) {
@@ -3310,15 +3319,14 @@
       return nullptr;
     }
   }
-
   CHECK(klass->IsRetired());
-  CHECK_NE(klass.Get(), new_class);
-  klass.Assign(new_class);
+  CHECK_NE(klass.Get(), new_class.Get());
+  klass.Assign(new_class.Get());
 
-  CHECK_EQ(interfaces_sfield->GetDeclaringClass(), new_class);
+  CHECK_EQ(interfaces_sfield->GetDeclaringClass(), klass.Get());
   interfaces_sfield->SetObject<false>(klass.Get(),
                                       soa.Decode<mirror::ObjectArray<mirror::Class>*>(interfaces));
-  CHECK_EQ(throws_sfield->GetDeclaringClass(), new_class);
+  CHECK_EQ(throws_sfield->GetDeclaringClass(), klass.Get());
   throws_sfield->SetObject<false>(klass.Get(),
       soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class> >*>(throws));
 
@@ -3339,7 +3347,8 @@
       CheckProxyMethod(virtual_method, prototype);
     }
 
-    mirror::String* decoded_name = soa.Decode<mirror::String*>(name);
+    StackHandleScope<1> hs2(self);
+    Handle<mirror::String> decoded_name = hs2.NewHandle(soa.Decode<mirror::String*>(name));
     std::string interfaces_field_name(StringPrintf("java.lang.Class[] %s.interfaces",
                                                    decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(PrettyField(klass->GetStaticField(0)), interfaces_field_name);
@@ -3353,9 +3362,6 @@
     CHECK_EQ(klass.Get()->GetThrows(),
              soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>*>(throws));
   }
-  mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(),
-                                        ComputeModifiedUtf8Hash(descriptor.c_str()));
-  CHECK(existing == nullptr);
   return klass.Get();
 }
 
@@ -3390,17 +3396,16 @@
 
 
 mirror::ArtMethod* ClassLinker::CreateProxyConstructor(Thread* self,
-                                                       Handle<mirror::Class> klass,
-                                                       mirror::Class* proxy_class) {
+                                                       Handle<mirror::Class> klass) {
   // Create constructor for Proxy that must initialize h
   mirror::ObjectArray<mirror::ArtMethod>* proxy_direct_methods =
-      proxy_class->GetDirectMethods();
+      GetClassRoot(kJavaLangReflectProxy)->GetDirectMethods();
   CHECK_EQ(proxy_direct_methods->GetLength(), 16);
   mirror::ArtMethod* proxy_constructor = proxy_direct_methods->Get(2);
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
   // constructor method.
-  proxy_class->GetDexCache()->SetResolvedMethod(proxy_constructor->GetDexMethodIndex(),
-                                                proxy_constructor);
+  GetClassRoot(kJavaLangReflectProxy)->GetDexCache()->SetResolvedMethod(
+      proxy_constructor->GetDexMethodIndex(), proxy_constructor);
   // Clone the existing constructor of Proxy (our constructor would just invoke it so steal its
   // code_ too)
   mirror::ArtMethod* constructor = down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self));
@@ -3924,7 +3929,7 @@
 
 bool ClassLinker::LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
                             Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                            mirror::Class** new_class) {
+                            MutableHandle<mirror::Class>* h_new_class_out) {
   CHECK_EQ(mirror::Class::kStatusLoaded, klass->GetStatus());
 
   if (!LinkSuperClass(klass)) {
@@ -3957,25 +3962,23 @@
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
-    *new_class = klass.Get();
+    h_new_class_out->Assign(klass.Get());
   } else {
     CHECK(!klass->IsResolved());
     // Retire the temporary class and create the correctly sized resolved class.
-    *new_class = klass->CopyOf(self, class_size, &imt_handle_scope);
-    if (UNLIKELY(*new_class == nullptr)) {
+    StackHandleScope<1> hs(self);
+    auto h_new_class = hs.NewHandle<mirror::Class>(
+        klass->CopyOf(self, class_size, &imt_handle_scope));
+    if (UNLIKELY(h_new_class.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());  // Expect an OOME.
       mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
       return false;
     }
 
-    CHECK_EQ((*new_class)->GetClassSize(), class_size);
-    StackHandleScope<1> hs(self);
-    auto new_class_h = hs.NewHandleWrapper<mirror::Class>(new_class);
-    ObjectLock<mirror::Class> lock(self, new_class_h);
-
-    FixupTemporaryDeclaringClass(klass.Get(), new_class_h.Get());
-
-    mirror::Class* existing = UpdateClass(descriptor, new_class_h.Get(),
+    CHECK_EQ(h_new_class->GetClassSize(), class_size);
+    ObjectLock<mirror::Class> lock(self, h_new_class);
+    FixupTemporaryDeclaringClass(klass.Get(), h_new_class.Get());
+    mirror::Class* existing = UpdateClass(descriptor, h_new_class.Get(),
                                           ComputeModifiedUtf8Hash(descriptor));
     CHECK(existing == nullptr || existing == klass.Get());
 
@@ -3983,10 +3986,12 @@
     // class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusRetired, self);
 
-    CHECK_EQ(new_class_h->GetStatus(), mirror::Class::kStatusResolving);
+    CHECK_EQ(h_new_class->GetStatus(), mirror::Class::kStatusResolving);
     // This will notify waiters on new_class that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
-    mirror::Class::SetStatus(new_class_h, mirror::Class::kStatusResolved, self);
+    mirror::Class::SetStatus(h_new_class, mirror::Class::kStatusResolved, self);
+    // Return the new class.
+    h_new_class_out->Assign(h_new_class.Get());
   }
   return true;
 }
@@ -5667,7 +5672,7 @@
   ArtField* const parent_field =
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
                                "Ljava/lang/ClassLoader;");
-  DCHECK(parent_field!= nullptr);
+  DCHECK(parent_field != nullptr);
   mirror::Object* boot_cl =
       soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
   parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 95c8aa0..947e152 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -49,6 +49,7 @@
 }  // namespace mirror
 
 template<class T> class Handle;
+template<class T> class MutableHandle;
 class InternTable;
 template<class T> class ObjectLock;
 class Runtime;
@@ -572,7 +573,7 @@
 
   bool LinkClass(Thread* self, const char* descriptor, Handle<mirror::Class> klass,
                  Handle<mirror::ObjectArray<mirror::Class>> interfaces,
-                 mirror::Class** new_class)
+                 MutableHandle<mirror::Class>* h_new_class_out)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkSuperClass(Handle<mirror::Class> klass)
@@ -622,8 +623,7 @@
   // Returns the boot image oat file.
   const OatFile* GetBootOatFile() SHARED_LOCKS_REQUIRED(dex_lock_);
 
-  mirror::ArtMethod* CreateProxyConstructor(Thread* self, Handle<mirror::Class> klass,
-                                            mirror::Class* proxy_class)
+  mirror::ArtMethod* CreateProxyConstructor(Thread* self, Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::ArtMethod* CreateProxyMethod(Thread* self, Handle<mirror::Class> klass,
                                        Handle<mirror::ArtMethod> prototype)
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index c3c0395..0752c59 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2807,7 +2807,27 @@
   JDWP::EventLocation location;
   SetEventLocation(&location, m, dex_pc);
 
+  // We need to be sure no exception is pending when calling JdwpState::PostLocationEvent.
+  // This is required to be able to call JNI functions to create JDWP ids. To achieve this,
+  // we temporarily clear the current thread's exception (if any) and will restore it after
+  // the call.
+  // Note: the only way to get a pending exception here is to suspend on a move-exception
+  // instruction.
+  Thread* const self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Throwable> pending_exception(hs.NewHandle(self->GetException()));
+  self->ClearException();
+  if (kIsDebugBuild && pending_exception.Get() != nullptr) {
+    const DexFile::CodeItem* code_item = location.method->GetCodeItem();
+    const Instruction* instr = Instruction::At(&code_item->insns_[location.dex_pc]);
+    CHECK_EQ(Instruction::MOVE_EXCEPTION, instr->Opcode());
+  }
+
   gJdwpState->PostLocationEvent(&location, this_object, event_flags, return_value);
+
+  if (pending_exception.Get() != nullptr) {
+    self->SetException(pending_exception.Get());
+  }
 }
 
 void Dbg::PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc,
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 345b0ad..838427f 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -89,7 +89,7 @@
   // | LR         |
   // | X29        |
   // |  :         |
-  // | X19        |
+  // | X20        |
   // | X7         |
   // | :          |
   // | X1         |
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 2f0ef26..2a9c03d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -390,6 +390,9 @@
         ArtField* field = holder_->FindFieldByOffset(offset_);
         LOG(INTERNAL_FATAL) << "Field info: "
                             << " holder=" << holder_
+                            << " holder is "
+                            << (mark_sweep_->GetHeap()->IsLiveObjectLocked(holder_)
+                                ? "alive" : "dead")
                             << " holder_size=" << holder_size
                             << " holder_type=" << PrettyTypeOf(holder_)
                             << " offset=" << offset_.Uint32Value()
@@ -405,6 +408,12 @@
                                 ? holder_->AsClass()->NumReferenceStaticFields()
                                 : holder_->GetClass()->NumReferenceInstanceFields())
                             << "\n";
+        // Print the memory content of the holder.
+        for (size_t i = 0; i < holder_size / sizeof(uint32_t); ++i) {
+          uint32_t* p = reinterpret_cast<uint32_t*>(holder_);
+          LOG(INTERNAL_FATAL) << &p[i] << ": " << "holder+" << (i * sizeof(uint32_t)) << " = "
+                              << std::hex << p[i];
+        }
       }
       PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
       MemMap::DumpMaps(LOG(INTERNAL_FATAL), true);
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 9518c9d..079a231 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -201,29 +201,32 @@
   uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(entry_point);
   if (IsOptimized(sizeof(void*))) {
     CodeInfo code_info = GetOptimizedCodeInfo();
-    return code_info.GetStackMapForNativePcOffset(sought_offset).GetDexPc(code_info);
-  }
-
-  MappingTable table(entry_point != nullptr ?
-      GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
-  if (table.TotalSize() == 0) {
-    // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
-    // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
-    DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
-    return DexFile::kDexNoIndex;   // Special no mapping case
-  }
-  // Assume the caller wants a pc-to-dex mapping so check here first.
-  typedef MappingTable::PcToDexIterator It;
-  for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
-    if (cur.NativePcOffset() == sought_offset) {
-      return cur.DexPc();
+    StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset);
+    if (stack_map.IsValid()) {
+      return stack_map.GetDexPc(code_info);
     }
-  }
-  // Now check dex-to-pc mappings.
-  typedef MappingTable::DexToPcIterator It2;
-  for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
-    if (cur.NativePcOffset() == sought_offset) {
-      return cur.DexPc();
+  } else {
+    MappingTable table(entry_point != nullptr ?
+        GetMappingTable(EntryPointToCodePointer(entry_point), sizeof(void*)) : nullptr);
+    if (table.TotalSize() == 0) {
+      // NOTE: Special methods (see Mir2Lir::GenSpecialCase()) have an empty mapping
+      // but they have no suspend checks and, consequently, we never call ToDexPc() for them.
+      DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
+      return DexFile::kDexNoIndex;   // Special no mapping case
+    }
+    // Assume the caller wants a pc-to-dex mapping so check here first.
+    typedef MappingTable::PcToDexIterator It;
+    for (It cur = table.PcToDexBegin(), end = table.PcToDexEnd(); cur != end; ++cur) {
+      if (cur.NativePcOffset() == sought_offset) {
+        return cur.DexPc();
+      }
+    }
+    // Now check dex-to-pc mappings.
+    typedef MappingTable::DexToPcIterator It2;
+    for (It2 cur = table.DexToPcBegin(), end = table.DexToPcEnd(); cur != end; ++cur) {
+      if (cur.NativePcOffset() == sought_offset) {
+        return cur.DexPc();
+      }
     }
   }
   if (abort_on_failure) {
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 1cb437e..bfb9eb5 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -51,7 +51,7 @@
 
 inline ArtField* DexCache::GetResolvedField(uint32_t idx, size_t ptr_size) {
   ArtField* field = nullptr;
-  if (ptr_size == 8) {
+  if (ptr_size == 8u) {
     field = reinterpret_cast<ArtField*>(
         static_cast<uintptr_t>(GetResolvedFields()->AsLongArray()->GetWithoutChecks(idx)));
   } else {
@@ -66,7 +66,7 @@
 }
 
 inline void DexCache::SetResolvedField(uint32_t idx, ArtField* field, size_t ptr_size) {
-  if (ptr_size == 8) {
+  if (ptr_size == 8u) {
     GetResolvedFields()->AsLongArray()->Set(
         idx, static_cast<uint64_t>(reinterpret_cast<uintptr_t>(field)));
   } else {
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index 933784e..ac56129 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -69,6 +69,7 @@
   mirror::DexCache* const dex_cache = declaring_class->GetDexCache();
   ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), sizeof(void*));
   CHECK(art_field != nullptr);
+  CHECK_EQ(declaring_class, art_field->GetDeclaringClass());
   return art_field;
 }
 
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index d65e18e..f0b3c4e 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -340,6 +340,9 @@
                       << "Thread: " << tid << " \"" << thread_name << "\"\n"
                       << "Registers:\n" << Dumpable<UContext>(thread_context) << "\n"
                       << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace);
+  if (kIsDebugBuild && signal_number == SIGSEGV) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
+  }
   Runtime* runtime = Runtime::Current();
   if (runtime != nullptr) {
     if (IsTimeoutSignal(signal_number)) {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 6795516..f7b96ea 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -109,6 +109,7 @@
   uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
   CodeInfo code_info = outer_method->GetOptimizedCodeInfo();
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+  DCHECK(stack_map.IsValid());
   return code_info.GetInlineInfoOf(stack_map);
 }
 
@@ -269,6 +270,7 @@
 
   uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_);
   StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+  DCHECK(stack_map.IsValid());
   size_t depth_in_stack_map = current_inlining_depth_ - 1;
 
   DexRegisterMap dex_register_map = IsInInlinedFrame()
@@ -749,7 +751,7 @@
           CodeInfo code_info = method->GetOptimizedCodeInfo();
           uint32_t native_pc_offset = method->NativeQuickPcOffset(cur_quick_frame_pc_);
           StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
-          if (stack_map.HasInlineInfo(code_info)) {
+          if (stack_map.IsValid() && stack_map.HasInlineInfo(code_info)) {
             InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map);
             DCHECK_EQ(current_inlining_depth_, 0u);
             for (current_inlining_depth_ = inline_info.GetDepth();
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index c61894c..69e57ff 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -641,6 +641,9 @@
 class StackMap {
  public:
   explicit StackMap(MemoryRegion region) : region_(region) {}
+  StackMap() {}
+
+  bool IsValid() const { return region_.pointer() != nullptr; }
 
   uint32_t GetDexPc(const CodeInfo& info) const;
 
@@ -920,10 +923,6 @@
         + (NumberOfBytesForDexRegisterMap() * sizeof(uint8_t));
   }
 
-  uint32_t GetDexRegisterLocationCatalogOffset() const {
-    return kFixedSize;
-  }
-
   DexRegisterLocationCatalog GetDexRegisterLocationCatalog() const {
     return DexRegisterLocationCatalog(region_.Subregion(
         GetDexRegisterLocationCatalogOffset(),
@@ -988,14 +987,18 @@
     return StackMapSize() * GetNumberOfStackMaps();
   }
 
-  size_t GetDexRegisterMapsOffset() const {
+  uint32_t GetDexRegisterLocationCatalogOffset() const {
     return GetStackMapsOffset() + GetStackMapsSize();
   }
 
-  uint32_t GetStackMapsOffset() const {
+  size_t GetDexRegisterMapsOffset() const {
     return GetDexRegisterLocationCatalogOffset() + GetDexRegisterLocationCatalogSize();
   }
 
+  uint32_t GetStackMapsOffset() const {
+    return kFixedSize;
+  }
+
   DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, uint32_t number_of_dex_registers) const {
     DCHECK(stack_map.HasDexRegisterMap(*this));
     uint32_t offset = GetDexRegisterMapsOffset() + stack_map.GetDexRegisterMapOffset(*this);
@@ -1029,8 +1032,7 @@
         return stack_map;
       }
     }
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
+    return StackMap();
   }
 
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset) const {
@@ -1041,8 +1043,7 @@
         return stack_map;
       }
     }
-    LOG(FATAL) << "Unreachable";
-    UNREACHABLE();
+    return StackMap();
   }
 
   void Dump(std::ostream& os, uint16_t number_of_dex_registers) const;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b3b55c4..6f734dd 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2249,6 +2249,7 @@
         uintptr_t native_pc_offset = m->NativeQuickPcOffset(GetCurrentQuickFramePc(), entry_point);
         CodeInfo code_info = m->GetOptimizedCodeInfo();
         StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
+        DCHECK(map.IsValid());
         MemoryRegion mask = map.GetStackMask(code_info);
         // Visit stack entries that hold pointers.
         for (size_t i = 0; i < mask.size_in_bits(); ++i) {
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d401bd3..b08883e 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -289,6 +289,10 @@
 }
 
 static bool IsLargeMethod(const DexFile::CodeItem* const code_item) {
+  if (code_item == nullptr) {
+    return false;
+  }
+
   uint16_t registers_size = code_item->registers_size_;
   uint32_t insns_size = code_item->insns_size_in_code_units_;