ARM/ARM64: Use trampolines for slow-path entrypoint calls.

This reduces the size of the generated code. We do this only
for AOT compilation where we get the most benefit.

Sizes of aosp_taimen-userdebug prebuilts:
 - before:
   arm/boot*.oat: 19624804
   arm64/boot*.oat: 23265752
   oat/arm64/services.odex: 22417968
 - after:
   arm/boot*.oat: 19460500 (-160KiB)
   arm64/boot*.oat: 22957928 (-301KiB)
   oat/arm64/services.odex: 21957864 (-449KiB)

Test: m test-art-host-gtest
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 12607709
Change-Id: Ie9dbd1ba256173e4e439e8bbb8832a791965cbe6
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index 31062fb..03c906b 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -216,6 +216,9 @@
   uint32_t custom_value1 = 0u;
   uint32_t custom_value2 = 0u;
   switch (linker_patch.GetType()) {
+    case linker::LinkerPatch::Type::kCallEntrypoint:
+      custom_value1 = linker_patch.EntrypointOffset();
+      break;
     case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
       custom_value1 = linker_patch.GetBakerCustomValue1();
       custom_value2 = linker_patch.GetBakerCustomValue2();
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
index f9e3930..1c523de 100644
--- a/compiler/linker/linker_patch.h
+++ b/compiler/linker/linker_patch.h
@@ -52,6 +52,7 @@
     kTypeBssEntry,
     kStringRelative,
     kStringBssEntry,
+    kCallEntrypoint,
     kBakerReadBarrierBranch,
   };
 
@@ -141,6 +142,15 @@
     return patch;
   }
 
+  static LinkerPatch CallEntrypointPatch(size_t literal_offset,
+                                         uint32_t entrypoint_offset) {
+    LinkerPatch patch(literal_offset,
+                      Type::kCallEntrypoint,
+                      /* target_dex_file= */ nullptr);
+    patch.entrypoint_offset_ = entrypoint_offset;
+    return patch;
+  }
+
   static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
                                                  uint32_t custom_value1 = 0u,
                                                  uint32_t custom_value2 = 0u) {
@@ -216,6 +226,11 @@
     return pc_insn_offset_;
   }
 
+  uint32_t EntrypointOffset() const {
+    DCHECK(patch_type_ == Type::kCallEntrypoint);
+    return entrypoint_offset_;
+  }
+
   uint32_t GetBakerCustomValue1() const {
     DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
     return baker_custom_value1_;
@@ -249,6 +264,7 @@
     uint32_t type_idx_;           // Type index for Type patches.
     uint32_t string_idx_;         // String index for String patches.
     uint32_t intrinsic_data_;     // Data for IntrinsicObjects.
+    uint32_t entrypoint_offset_;  // Entrypoint offset in the Thread object.
     uint32_t baker_custom_value1_;
     static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 177d982..651a3f7 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -887,10 +887,6 @@
       move_resolver_(graph->GetAllocator(), this),
       assembler_(graph->GetAllocator(),
                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
-      uint32_literals_(std::less<uint32_t>(),
-                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-      uint64_literals_(std::less<uint64_t>(),
-                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -898,7 +894,12 @@
       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      uint64_literals_(std::less<uint64_t>(),
+                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -1687,14 +1688,25 @@
                                        SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
 
-  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArm64PointerSize>(entrypoint).Int32Value()));
-  {
+  ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
+  // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
+  // entire oat file. This adds an extra branch and we do not want to slow down the main path.
+  // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
+  if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) {
+    __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
     // Ensure the pc position is recorded immediately after the `blr` instruction.
     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
     __ blr(lr);
     if (EntrypointRequiresStackMap(entrypoint)) {
       RecordPcInfo(instruction, dex_pc, slow_path);
     }
+  } else {
+    // Ensure the pc position is recorded immediately after the `bl` instruction.
+    ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
+    EmitEntrypointThunkCall(entrypoint_offset);
+    if (EntrypointRequiresStackMap(entrypoint)) {
+      RecordPcInfo(instruction, dex_pc, slow_path);
+    }
   }
 }
 
@@ -4250,6 +4262,15 @@
   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
 }
 
+void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
+  DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
+  DCHECK(!Runtime::Current()->UseJitCompilation());
+  call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
+  vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
+  __ bind(bl_label);
+  __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+}
+
 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
   if (Runtime::Current()->UseJitCompilation()) {
@@ -4406,6 +4427,7 @@
       boot_image_string_patches_.size() +
       string_bss_entry_patches_.size() +
       boot_image_intrinsic_patches_.size() +
+      call_entrypoint_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   if (GetCompilerOptions().IsBootImage()) {
@@ -4430,6 +4452,11 @@
       type_bss_entry_patches_, linker_patches);
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
       string_bss_entry_patches_, linker_patches);
+  for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
+    DCHECK(info.target_dex_file == nullptr);
+    linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
+        info.label.GetLocation(), info.offset_or_index));
+  }
   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
         info.label.GetLocation(), info.custom_data));
@@ -4438,7 +4465,8 @@
 }
 
 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
-  return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+  return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
+         patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
 }
 
@@ -4458,6 +4486,14 @@
       }
       break;
     }
+    case linker::LinkerPatch::Type::kCallEntrypoint: {
+      Offset offset(patch.EntrypointOffset());
+      assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
+      if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+        *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
+      }
+      break;
+    }
     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index ada5742..2680bd0 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -629,6 +629,9 @@
                                                dex::StringIndex string_index,
                                                vixl::aarch64::Label* adrp_label = nullptr);
 
+  // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT.
+  void EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset);
+
   // Emit the CBNZ instruction for baker read barrier and record
   // the associated patch for AOT or slow path for JIT.
   void EmitBakerReadBarrierCbnz(uint32_t custom_data);
@@ -887,10 +890,6 @@
   ParallelMoveResolverARM64 move_resolver_;
   Arm64Assembler assembler_;
 
-  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
-  Uint32ToLiteralMap uint32_literals_;
-  // Deduplication map for 64-bit literals, used for non-patchable method address or method code.
-  Uint64ToLiteralMap uint64_literals_;
   // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
   // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
@@ -906,9 +905,15 @@
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
   // PC-relative patch info for IntrinsicObjects.
   ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
+  // Patch info for calls to entrypoint dispatch thunks. Used for slow paths.
+  ArenaDeque<PatchInfo<vixl::aarch64::Label>> call_entrypoint_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
+  // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Deduplication map for 64-bit literals, used for JIT for method address or method code.
+  Uint64ToLiteralMap uint64_literals_;
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class literals in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 19d04c9..ac09183 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1856,8 +1856,6 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetAllocator(), this),
       assembler_(graph->GetAllocator()),
-      uint32_literals_(std::less<uint32_t>(),
-                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -1865,7 +1863,10 @@
       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -2383,15 +2384,31 @@
                                          uint32_t dex_pc,
                                          SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
-  __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()));
-  // Ensure the pc position is recorded immediately after the `blx` instruction.
-  // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
-  ExactAssemblyScope aas(GetVIXLAssembler(),
-                         vixl32::k16BitT32InstructionSizeInBytes,
-                         CodeBufferCheckScope::kExactSize);
-  __ blx(lr);
-  if (EntrypointRequiresStackMap(entrypoint)) {
-    RecordPcInfo(instruction, dex_pc, slow_path);
+
+  ThreadOffset32 entrypoint_offset = GetThreadOffset<kArmPointerSize>(entrypoint);
+  // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
+  // entire oat file. This adds an extra branch and we do not want to slow down the main path.
+  // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
+  if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) {
+    __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
+    // Ensure the pc position is recorded immediately after the `blx` instruction.
+    // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+    ExactAssemblyScope aas(GetVIXLAssembler(),
+                           vixl32::k16BitT32InstructionSizeInBytes,
+                           CodeBufferCheckScope::kExactSize);
+    __ blx(lr);
+    if (EntrypointRequiresStackMap(entrypoint)) {
+      RecordPcInfo(instruction, dex_pc, slow_path);
+    }
+  } else {
+    // Ensure the pc position is recorded immediately after the `bl` instruction.
+    ExactAssemblyScope aas(GetVIXLAssembler(),
+                           vixl32::k32BitT32InstructionSizeInBytes,
+                           CodeBufferCheckScope::kExactSize);
+    EmitEntrypointThunkCall(entrypoint_offset);
+    if (EntrypointRequiresStackMap(entrypoint)) {
+      RecordPcInfo(instruction, dex_pc, slow_path);
+    }
   }
 }
 
@@ -8858,6 +8875,17 @@
   return &patches->back();
 }
 
+void CodeGeneratorARMVIXL::EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset) {
+  DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
+  DCHECK(!Runtime::Current()->UseJitCompilation());
+  call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
+  vixl::aarch32::Label* bl_label = &call_entrypoint_patches_.back().label;
+  __ bind(bl_label);
+  vixl32::Label placeholder_label;
+  __ bl(&placeholder_label);  // Placeholder, patched at link-time.
+  __ bind(&placeholder_label);
+}
+
 void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
   if (Runtime::Current()->UseJitCompilation()) {
@@ -8980,6 +9008,7 @@
       /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * boot_image_intrinsic_patches_.size() +
+      call_entrypoint_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
   if (GetCompilerOptions().IsBootImage()) {
@@ -9004,6 +9033,11 @@
       type_bss_entry_patches_, linker_patches);
   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
       string_bss_entry_patches_, linker_patches);
+  for (const PatchInfo<vixl32::Label>& info : call_entrypoint_patches_) {
+    DCHECK(info.target_dex_file == nullptr);
+    linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
+        info.label.GetLocation(), info.offset_or_index));
+  }
   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
         info.label.GetLocation(), info.custom_data));
@@ -9012,7 +9046,8 @@
 }
 
 bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
-  return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+  return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
+         patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
 }
 
@@ -9021,23 +9056,30 @@
                                          /*out*/ std::string* debug_name) {
   arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
   switch (patch.GetType()) {
-    case linker::LinkerPatch::Type::kCallRelative:
+    case linker::LinkerPatch::Type::kCallRelative: {
       // The thunk just uses the entry point in the ArtMethod. This works even for calls
       // to the generic JNI and interpreter trampolines.
-      assembler.LoadFromOffset(
-          arm::kLoadWord,
-          vixl32::pc,
-          vixl32::r0,
-          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+      MemberOffset offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
+      assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, vixl32::r0, offset.Int32Value());
       assembler.GetVIXLAssembler()->Bkpt(0);
       if (GetCompilerOptions().GenerateAnyDebugInfo()) {
         *debug_name = "MethodCallThunk";
       }
       break;
-    case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
+    }
+    case linker::LinkerPatch::Type::kCallEntrypoint: {
+      assembler.LoadFromOffset(arm::kLoadWord, vixl32::pc, tr, patch.EntrypointOffset());
+      assembler.GetVIXLAssembler()->Bkpt(0);
+      if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+        *debug_name = "EntrypointCallThunk_" + std::to_string(patch.EntrypointOffset());
+      }
+      break;
+    }
+    case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
       break;
+    }
     default:
       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
       UNREACHABLE();
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 5edca87..4742f78 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -589,6 +589,9 @@
   PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
                                               dex::StringIndex string_index);
 
+  // Emit the BL instruction for entrypoint thunk call and record the associated patch for AOT.
+  void EmitEntrypointThunkCall(ThreadOffset32 entrypoint_offset);
+
   // Emit the BNE instruction for baker read barrier and record
   // the associated patch for AOT or slow path for JIT.
   void EmitBakerReadBarrierBne(uint32_t custom_data);
@@ -869,8 +872,6 @@
 
   ArmVIXLAssembler assembler_;
 
-  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
-  Uint32ToLiteralMap uint32_literals_;
   // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
   // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
   ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
@@ -886,9 +887,13 @@
   ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
   // PC-relative patch info for IntrinsicObjects.
   ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
+  // Patch info for calls to entrypoint dispatch thunks. Used for slow paths.
+  ArenaDeque<PatchInfo<vixl::aarch32::Label>> call_entrypoint_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
+  // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class literals in JIT compiled code.
diff --git a/dex2oat/linker/arm/relative_patcher_arm_base.cc b/dex2oat/linker/arm/relative_patcher_arm_base.cc
index 828dc5d..35e799a 100644
--- a/dex2oat/linker/arm/relative_patcher_arm_base.cc
+++ b/dex2oat/linker/arm/relative_patcher_arm_base.cc
@@ -386,6 +386,12 @@
   return ThunkKey(ThunkType::kMethodCall);
 }
 
+ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetEntrypointCallKey(
+    const LinkerPatch& patch) {
+  DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kCallEntrypoint);
+  return ThunkKey(ThunkType::kEntrypointCall, patch.EntrypointOffset());
+}
+
 ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey(
     const LinkerPatch& patch) {
   DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
@@ -399,6 +405,7 @@
   for (const LinkerPatch& patch : compiled_method->GetPatches()) {
     uint32_t patch_offset = code_offset + patch.LiteralOffset();
     ThunkKey key(static_cast<ThunkType>(-1));
+    bool simple_thunk_patch = false;
     ThunkData* old_data = nullptr;
     if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
       key = GetMethodCallKey();
@@ -411,8 +418,14 @@
       } else {
         old_data = method_call_thunk_;
       }
+    } else if (patch.GetType() == LinkerPatch::Type::kCallEntrypoint) {
+      key = GetEntrypointCallKey(patch);
+      simple_thunk_patch = true;
     } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
       key = GetBakerThunkKey(patch);
+      simple_thunk_patch = true;
+    }
+    if (simple_thunk_patch) {
       auto lb = thunks_.lower_bound(key);
       if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) {
         uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key);
diff --git a/dex2oat/linker/arm/relative_patcher_arm_base.h b/dex2oat/linker/arm/relative_patcher_arm_base.h
index 0eb4417..bf3e81f 100644
--- a/dex2oat/linker/arm/relative_patcher_arm_base.h
+++ b/dex2oat/linker/arm/relative_patcher_arm_base.h
@@ -44,6 +44,7 @@
 
   enum class ThunkType {
     kMethodCall,              // Method call thunk.
+    kEntrypointCall,          // Entrypoint call.
     kBakerReadBarrier,        // Baker read barrier.
   };
 
@@ -84,6 +85,7 @@
   };
 
   static ThunkKey GetMethodCallKey();
+  static ThunkKey GetEntrypointCallKey(const LinkerPatch& patch);
   static ThunkKey GetBakerThunkKey(const LinkerPatch& patch);
 
   uint32_t ReserveSpaceInternal(uint32_t offset,
diff --git a/dex2oat/linker/arm/relative_patcher_thumb2.cc b/dex2oat/linker/arm/relative_patcher_thumb2.cc
index 697fb09..72b93ec 100644
--- a/dex2oat/linker/arm/relative_patcher_thumb2.cc
+++ b/dex2oat/linker/arm/relative_patcher_thumb2.cc
@@ -58,28 +58,10 @@
                                       uint32_t literal_offset,
                                       uint32_t patch_offset,
                                       uint32_t target_offset) {
-  DCHECK_LE(literal_offset + 4u, code->size());
-  DCHECK_EQ(literal_offset & 1u, 0u);
-  DCHECK_EQ(patch_offset & 1u, 0u);
+  DCHECK_ALIGNED(patch_offset, 2u);
   DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
   uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
-  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
-  DCHECK_EQ(displacement & 1u, 0u);
-  DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
-  uint32_t signbit = (displacement >> 31) & 0x1;
-  uint32_t i1 = (displacement >> 23) & 0x1;
-  uint32_t i2 = (displacement >> 22) & 0x1;
-  uint32_t imm10 = (displacement >> 12) & 0x03ff;
-  uint32_t imm11 = (displacement >> 1) & 0x07ff;
-  uint32_t j1 = i1 ^ (signbit ^ 1);
-  uint32_t j2 = i2 ^ (signbit ^ 1);
-  uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
-  value |= 0xf000d000;  // BL
-
-  // Check that we're just overwriting an existing BL.
-  DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
-  // Write the new BL.
-  SetInsn32(code, literal_offset, value);
+  PatchBl(code, literal_offset, displacement);
 }
 
 void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
@@ -102,6 +84,17 @@
   SetInsn32(code, literal_offset, insn);
 }
 
+void Thumb2RelativePatcher::PatchEntrypointCall(std::vector<uint8_t>* code,
+                                                const LinkerPatch& patch,
+                                                uint32_t patch_offset) {
+  DCHECK_ALIGNED(patch_offset, 2u);
+  ThunkKey key = GetEntrypointCallKey(patch);
+  uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
+  DCHECK_ALIGNED(target_offset, 4u);
+  uint32_t displacement = target_offset - patch_offset;
+  PatchBl(code, patch.LiteralOffset(), displacement);
+}
+
 void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                                         const LinkerPatch& patch,
                                                         uint32_t patch_offset) {
@@ -127,6 +120,7 @@
 uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
   switch (key.GetType()) {
     case ThunkType::kMethodCall:
+    case ThunkType::kEntrypointCall:
       return kMaxMethodCallPositiveDisplacement;
     case ThunkType::kBakerReadBarrier:
       return kMaxBcondPositiveDisplacement;
@@ -136,12 +130,35 @@
 uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
   switch (key.GetType()) {
     case ThunkType::kMethodCall:
+    case ThunkType::kEntrypointCall:
       return kMaxMethodCallNegativeDisplacement;
     case ThunkType::kBakerReadBarrier:
       return kMaxBcondNegativeDisplacement;
   }
 }
 
+void Thumb2RelativePatcher::PatchBl(std::vector<uint8_t>* code,
+                                    uint32_t literal_offset,
+                                    uint32_t displacement) {
+  displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
+  DCHECK_EQ(displacement & 1u, 0u);
+  DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
+  uint32_t signbit = (displacement >> 31) & 0x1;
+  uint32_t i1 = (displacement >> 23) & 0x1;
+  uint32_t i2 = (displacement >> 22) & 0x1;
+  uint32_t imm10 = (displacement >> 12) & 0x03ff;
+  uint32_t imm11 = (displacement >> 1) & 0x07ff;
+  uint32_t j1 = i1 ^ (signbit ^ 1);
+  uint32_t j2 = i2 ^ (signbit ^ 1);
+  uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
+  value |= 0xf000d000;  // BL
+
+  // Check that we're just overwriting an existing BL.
+  DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
+  // Write the new BL.
+  SetInsn32(code, literal_offset, value);
+}
+
 void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
   DCHECK_LE(offset + 4u, code->size());
   DCHECK_ALIGNED(offset, 2u);
diff --git a/dex2oat/linker/arm/relative_patcher_thumb2.h b/dex2oat/linker/arm/relative_patcher_thumb2.h
index dbf64a1..d360482 100644
--- a/dex2oat/linker/arm/relative_patcher_thumb2.h
+++ b/dex2oat/linker/arm/relative_patcher_thumb2.h
@@ -42,6 +42,9 @@
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
                                 uint32_t target_offset) override;
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) override;
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) override;
@@ -51,7 +54,9 @@
   uint32_t MaxNegativeDisplacement(const ThunkKey& key) override;
 
  private:
-  void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+  static void PatchBl(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t displacement);
+
+  static void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
   static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
 
   template <typename Vector>
diff --git a/dex2oat/linker/arm/relative_patcher_thumb2_test.cc b/dex2oat/linker/arm/relative_patcher_thumb2_test.cc
index 04a897e..296bf61 100644
--- a/dex2oat/linker/arm/relative_patcher_thumb2_test.cc
+++ b/dex2oat/linker/arm/relative_patcher_thumb2_test.cc
@@ -225,7 +225,8 @@
 
     // Make sure the ThunkProvider has all the necessary thunks.
     for (const LinkerPatch& patch : patches) {
-      if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch ||
+      if (patch.GetType() == LinkerPatch::Type::kCallEntrypoint ||
+          patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch ||
           patch.GetType() == LinkerPatch::Type::kCallRelative) {
         std::string debug_name;
         std::vector<uint8_t> thunk_code = CompileThunk(patch, &debug_name);
@@ -662,6 +663,35 @@
   ASSERT_LT(GetMethodOffset(1u), 0xfcu);
 }
 
+TEST_F(Thumb2RelativePatcherTest, EntrypointCall) {
+  constexpr uint32_t kEntrypointOffset = 512;
+  const LinkerPatch patches[] = {
+      LinkerPatch::CallEntrypointPatch(0u, kEntrypointOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  uint32_t method_offset = GetMethodOffset(1u);
+  uint32_t thunk_offset = CompiledCode::AlignCode(method_offset + kCallCode.size(),
+                                                  InstructionSet::kThumb2);
+  uint32_t diff = thunk_offset - method_offset - kPcAdjustment;
+  ASSERT_TRUE(IsAligned<2u>(diff));
+  ASSERT_LT(diff >> 1, 1u << 8);  // Simple encoding, (diff >> 1) fits into 8 bits.
+  auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 1) & 0xffu));
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+
+  // Verify the thunk.
+  uint32_t ldr_pc_tr_offset =
+      0xf8d00000 |                        // LDR Rt, [Rn, #<imm12>]
+      (/* tr */ 9 << 16) |                // Rn = TR
+      (/* pc */ 15 << 12) |               // Rt = PC
+      kEntrypointOffset;                  // imm12
+  uint16_t bkpt = 0xbe00;
+  ASSERT_LE(6u, output_.size() - thunk_offset);
+  EXPECT_EQ(ldr_pc_tr_offset, GetOutputInsn32(thunk_offset));
+  EXPECT_EQ(bkpt, GetOutputInsn16(thunk_offset + 4u));
+}
+
 const uint32_t kBakerValidRegs[] = {
     0,  1,  2,  3,  4,  5,  6,  7,
     9, 10, 11,                      // r8 (rMR), IP, SP, LR and PC are reserved.
diff --git a/dex2oat/linker/arm64/relative_patcher_arm64.cc b/dex2oat/linker/arm64/relative_patcher_arm64.cc
index ee8d4d1..2260f66 100644
--- a/dex2oat/linker/arm64/relative_patcher_arm64.cc
+++ b/dex2oat/linker/arm64/relative_patcher_arm64.cc
@@ -58,6 +58,7 @@
 inline bool IsAdrpPatch(const LinkerPatch& patch) {
   switch (patch.GetType()) {
     case LinkerPatch::Type::kCallRelative:
+    case LinkerPatch::Type::kCallEntrypoint:
     case LinkerPatch::Type::kBakerReadBarrierBranch:
       return false;
     case LinkerPatch::Type::kIntrinsicReference:
@@ -189,30 +190,21 @@
 
 void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code,
                                      uint32_t literal_offset,
-                                     uint32_t patch_offset, uint32_t
-                                     target_offset) {
-  DCHECK_LE(literal_offset + 4u, code->size());
-  DCHECK_EQ(literal_offset & 3u, 0u);
-  DCHECK_EQ(patch_offset & 3u, 0u);
-  DCHECK_EQ(target_offset & 3u, 0u);
+                                     uint32_t patch_offset,
+                                     uint32_t target_offset) {
+  DCHECK_ALIGNED(literal_offset, 4u);
+  DCHECK_ALIGNED(patch_offset, 4u);
+  DCHECK_ALIGNED(target_offset, 4u);
   uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
-  DCHECK_EQ(displacement & 3u, 0u);
-  DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
-  uint32_t insn = (displacement & 0x0fffffffu) >> 2;
-  insn |= 0x94000000;  // BL
-
-  // Check that we're just overwriting an existing BL.
-  DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u);
-  // Write the new BL.
-  SetInsn(code, literal_offset, insn);
+  PatchBl(code, literal_offset, displacement);
 }
 
 void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
                                                     const LinkerPatch& patch,
                                                     uint32_t patch_offset,
                                                     uint32_t target_offset) {
-  DCHECK_EQ(patch_offset & 3u, 0u);
-  DCHECK_EQ(target_offset & 3u, 0u);
+  DCHECK_ALIGNED(patch_offset, 4u);
+  DCHECK_ALIGNED(target_offset, 4u);
   uint32_t literal_offset = patch.LiteralOffset();
   uint32_t insn = GetInsn(code, literal_offset);
   uint32_t pc_insn_offset = patch.PcInsnOffset();
@@ -307,13 +299,21 @@
   }
 }
 
+void Arm64RelativePatcher::PatchEntrypointCall(std::vector<uint8_t>* code,
+                                               const LinkerPatch& patch,
+                                               uint32_t patch_offset) {
+  DCHECK_ALIGNED(patch_offset, 4u);
+  ThunkKey key = GetEntrypointCallKey(patch);
+  uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
+  uint32_t displacement = target_offset - patch_offset;
+  PatchBl(code, patch.LiteralOffset(), displacement);
+}
+
 void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                                        const LinkerPatch& patch,
                                                        uint32_t patch_offset) {
   DCHECK_ALIGNED(patch_offset, 4u);
   uint32_t literal_offset = patch.LiteralOffset();
-  DCHECK_ALIGNED(literal_offset, 4u);
-  DCHECK_LT(literal_offset, code->size());
   uint32_t insn = GetInsn(code, literal_offset);
   DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000);  // CBNZ Xt, +0 (unpatched)
   ThunkKey key = GetBakerThunkKey(patch);
@@ -328,6 +328,7 @@
 uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) {
   switch (key.GetType()) {
     case ThunkType::kMethodCall:
+    case ThunkType::kEntrypointCall:
       return kMaxMethodCallPositiveDisplacement;
     case ThunkType::kBakerReadBarrier:
       return kMaxBcondPositiveDisplacement;
@@ -337,6 +338,7 @@
 uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) {
   switch (key.GetType()) {
     case ThunkType::kMethodCall:
+    case ThunkType::kEntrypointCall:
       return kMaxMethodCallNegativeDisplacement;
     case ThunkType::kBakerReadBarrier:
       return kMaxBcondNegativeDisplacement;
@@ -357,6 +359,20 @@
       ((disp & 0x80000000u) >> (31 - 23));
 }
 
+void Arm64RelativePatcher::PatchBl(std::vector<uint8_t>* code,
+                                   uint32_t literal_offset,
+                                   uint32_t displacement) {
+  DCHECK_ALIGNED(displacement, 4u);
+  DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
+  uint32_t insn = (displacement & 0x0fffffffu) >> 2;
+  insn |= 0x94000000;  // BL
+
+  // Check that we're just overwriting an existing BL.
+  DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u);
+  // Write the new BL.
+  SetInsn(code, literal_offset, insn);
+}
+
 bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,
                                                    uint32_t literal_offset,
                                                    uint32_t patch_offset) {
@@ -409,7 +425,7 @@
 
 void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
   DCHECK_LE(offset + 4u, code->size());
-  DCHECK_EQ(offset & 3u, 0u);
+  DCHECK_ALIGNED(offset, 4u);
   uint8_t* addr = &(*code)[offset];
   addr[0] = (value >> 0) & 0xff;
   addr[1] = (value >> 8) & 0xff;
@@ -419,7 +435,7 @@
 
 uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) {
   DCHECK_LE(offset + 4u, code.size());
-  DCHECK_EQ(offset & 3u, 0u);
+  DCHECK_ALIGNED(offset, 4u);
   const uint8_t* addr = &code[offset];
   return
       (static_cast<uint32_t>(addr[0]) << 0) +
diff --git a/dex2oat/linker/arm64/relative_patcher_arm64.h b/dex2oat/linker/arm64/relative_patcher_arm64.h
index e95d0fe..9ad2c96 100644
--- a/dex2oat/linker/arm64/relative_patcher_arm64.h
+++ b/dex2oat/linker/arm64/relative_patcher_arm64.h
@@ -47,6 +47,9 @@
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
                                 uint32_t target_offset) override;
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) override;
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) override;
@@ -57,10 +60,11 @@
 
  private:
   static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
+  static void PatchBl(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t displacement);
 
   static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
                                       uint32_t patch_offset);
-  void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+  static void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
   static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset);
 
   template <typename Alloc>
diff --git a/dex2oat/linker/arm64/relative_patcher_arm64_test.cc b/dex2oat/linker/arm64/relative_patcher_arm64_test.cc
index 9e54bbf..8bae5d4 100644
--- a/dex2oat/linker/arm64/relative_patcher_arm64_test.cc
+++ b/dex2oat/linker/arm64/relative_patcher_arm64_test.cc
@@ -198,7 +198,8 @@
 
     // Make sure the ThunkProvider has all the necessary thunks.
     for (const LinkerPatch& patch : patches) {
-      if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch ||
+      if (patch.GetType() == LinkerPatch::Type::kCallEntrypoint ||
+          patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch ||
           patch.GetType() == LinkerPatch::Type::kCallRelative) {
         std::string debug_name;
         std::vector<uint8_t> thunk_code = CompileThunk(patch, &debug_name);
@@ -1005,6 +1006,36 @@
       { 0u, 8u });
 }
 
+TEST_F(Arm64RelativePatcherTestDefault, EntrypointCall) {
+  constexpr uint32_t kEntrypointOffset = 512;
+  const LinkerPatch patches[] = {
+      LinkerPatch::CallEntrypointPatch(0u, kEntrypointOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  uint32_t method_offset = GetMethodOffset(1u);
+  uint32_t thunk_offset = CompiledCode::AlignCode(method_offset + kCallCode.size(),
+                                                  InstructionSet::kArm64);
+  uint32_t diff = thunk_offset - method_offset;
+  ASSERT_TRUE(IsAligned<4u>(diff));
+  ASSERT_LT(diff, 128 * MB);
+  auto expected_code = RawCode({kBlPlus0 | (diff >> 2)});
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+
+  // Verify the thunk.
+  uint32_t ldr_ip0_tr_offset =
+      0xf9400000 |                        // LDR Xt, [Xn, #<simm>]
+      ((kEntrypointOffset >> 3) << 10) |  // imm12 = (simm >> scale), scale = 3
+      (/* tr */ 19 << 5) |                // Xn = TR
+      /* ip0 */ 16;                       // Xt = ip0
+  uint32_t br_ip0 = 0xd61f0000 | (/* ip0 */ 16 << 5);
+  auto expected_thunk = RawCode({ ldr_ip0_tr_offset, br_ip0 });
+  ASSERT_LE(8u, output_.size() - thunk_offset);
+  EXPECT_EQ(ldr_ip0_tr_offset, GetOutputInsn(thunk_offset));
+  EXPECT_EQ(br_ip0, GetOutputInsn(thunk_offset + 4u));
+}
+
 void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
diff --git a/dex2oat/linker/mips/relative_patcher_mips.cc b/dex2oat/linker/mips/relative_patcher_mips.cc
index 69e0846..4f4dc48 100644
--- a/dex2oat/linker/mips/relative_patcher_mips.cc
+++ b/dex2oat/linker/mips/relative_patcher_mips.cc
@@ -86,6 +86,12 @@
   }
 }
 
+void MipsRelativePatcher::PatchEntrypointCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                              const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                              uint32_t patch_offset ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "UNIMPLEMENTED";
+}
+
 void MipsRelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
                                                       const LinkerPatch& patch ATTRIBUTE_UNUSED,
                                                       uint32_t patch_offset ATTRIBUTE_UNUSED) {
diff --git a/dex2oat/linker/mips/relative_patcher_mips.h b/dex2oat/linker/mips/relative_patcher_mips.h
index 4c385a3..7cdac45 100644
--- a/dex2oat/linker/mips/relative_patcher_mips.h
+++ b/dex2oat/linker/mips/relative_patcher_mips.h
@@ -41,6 +41,9 @@
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
                                 uint32_t target_offset) override;
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) override;
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) override;
diff --git a/dex2oat/linker/mips64/relative_patcher_mips64.cc b/dex2oat/linker/mips64/relative_patcher_mips64.cc
index aae5746..2992487 100644
--- a/dex2oat/linker/mips64/relative_patcher_mips64.cc
+++ b/dex2oat/linker/mips64/relative_patcher_mips64.cc
@@ -84,6 +84,12 @@
   }
 }
 
+void Mips64RelativePatcher::PatchEntrypointCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                                const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                                uint32_t patch_offset ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "UNIMPLEMENTED";
+}
+
 void Mips64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
                                                         const LinkerPatch& patch ATTRIBUTE_UNUSED,
                                                         uint32_t patch_offset ATTRIBUTE_UNUSED) {
diff --git a/dex2oat/linker/mips64/relative_patcher_mips64.h b/dex2oat/linker/mips64/relative_patcher_mips64.h
index 7b7c2cc..9d27b87 100644
--- a/dex2oat/linker/mips64/relative_patcher_mips64.h
+++ b/dex2oat/linker/mips64/relative_patcher_mips64.h
@@ -39,6 +39,9 @@
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
                                 uint32_t target_offset) override;
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) override;
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) override;
diff --git a/dex2oat/linker/multi_oat_relative_patcher.h b/dex2oat/linker/multi_oat_relative_patcher.h
index 9b47a0d..2daada4 100644
--- a/dex2oat/linker/multi_oat_relative_patcher.h
+++ b/dex2oat/linker/multi_oat_relative_patcher.h
@@ -114,6 +114,13 @@
     relative_patcher_->PatchPcRelativeReference(code, patch, patch_offset, target_offset);
   }
 
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) {
+    patch_offset += adjustment_;
+    relative_patcher_->PatchEntrypointCall(code, patch, patch_offset);
+  }
+
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) {
diff --git a/dex2oat/linker/multi_oat_relative_patcher_test.cc b/dex2oat/linker/multi_oat_relative_patcher_test.cc
index 274084f..2a05816 100644
--- a/dex2oat/linker/multi_oat_relative_patcher_test.cc
+++ b/dex2oat/linker/multi_oat_relative_patcher_test.cc
@@ -94,6 +94,12 @@
       last_target_offset_ = target_offset;
     }
 
+    void PatchEntrypointCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                             const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                             uint32_t patch_offset ATTRIBUTE_UNUSED) override {
+      LOG(FATAL) << "UNIMPLEMENTED";
+    }
+
     void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
                                      const LinkerPatch& patch ATTRIBUTE_UNUSED,
                                      uint32_t patch_offset ATTRIBUTE_UNUSED) override {
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 3c49634..7368ae4 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -1812,6 +1812,12 @@
                                                                    target_offset);
               break;
             }
+            case LinkerPatch::Type::kCallEntrypoint: {
+              writer_->relative_patcher_->PatchEntrypointCall(&patched_code_,
+                                                              patch,
+                                                              offset_ + literal_offset);
+              break;
+            }
             case LinkerPatch::Type::kBakerReadBarrierBranch: {
               writer_->relative_patcher_->PatchBakerReadBarrierBranch(&patched_code_,
                                                                       patch,
diff --git a/dex2oat/linker/relative_patcher.cc b/dex2oat/linker/relative_patcher.cc
index 4db0e8a..f746cfb 100644
--- a/dex2oat/linker/relative_patcher.cc
+++ b/dex2oat/linker/relative_patcher.cc
@@ -77,6 +77,12 @@
       LOG(FATAL) << "Unexpected relative dex cache array patch.";
     }
 
+    void PatchEntrypointCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                             const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                             uint32_t patch_offset ATTRIBUTE_UNUSED) override {
+      LOG(FATAL) << "Unexpected entrypoint call patch.";
+    }
+
     void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
                                      const LinkerPatch& patch ATTRIBUTE_UNUSED,
                                      uint32_t patch_offset ATTRIBUTE_UNUSED) override {
diff --git a/dex2oat/linker/relative_patcher.h b/dex2oat/linker/relative_patcher.h
index e8e15c9..c05445c 100644
--- a/dex2oat/linker/relative_patcher.h
+++ b/dex2oat/linker/relative_patcher.h
@@ -137,6 +137,11 @@
                                         uint32_t patch_offset,
                                         uint32_t target_offset) = 0;
 
+  // Patch a call to an entrypoint trampoline.
+  virtual void PatchEntrypointCall(std::vector<uint8_t>* code,
+                                   const LinkerPatch& patch,
+                                   uint32_t patch_offset) = 0;
+
   // Patch a branch to a Baker read barrier thunk.
   virtual void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                            const LinkerPatch& patch,
diff --git a/dex2oat/linker/relative_patcher_test.h b/dex2oat/linker/relative_patcher_test.h
index dead38d..dc53ac4 100644
--- a/dex2oat/linker/relative_patcher_test.h
+++ b/dex2oat/linker/relative_patcher_test.h
@@ -174,8 +174,10 @@
             auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod());
             uint32_t target_offset =
                 result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
-            patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
-                                offset + patch.LiteralOffset(), target_offset);
+            patcher_->PatchCall(&patched_code_,
+                                patch.LiteralOffset(),
+                                offset + patch.LiteralOffset(),
+                                target_offset);
           } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
             uint32_t target_offset =
                 bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_);
@@ -190,6 +192,10 @@
                                                patch,
                                                offset + patch.LiteralOffset(),
                                                target_offset);
+          } else if (patch.GetType() == LinkerPatch::Type::kCallEntrypoint) {
+            patcher_->PatchEntrypointCall(&patched_code_,
+                                          patch,
+                                          offset + patch.LiteralOffset());
           } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
             patcher_->PatchBakerReadBarrierBranch(&patched_code_,
                                                   patch,
@@ -300,11 +306,10 @@
      public:
       explicit ThunkKey(const LinkerPatch& patch)
           : type_(patch.GetType()),
-            custom_value1_(patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch
-                               ? patch.GetBakerCustomValue1() : 0u),
-            custom_value2_(patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch
-                               ? patch.GetBakerCustomValue2() : 0u) {
-        CHECK(patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch ||
+            custom_value1_(CustomValue1(patch)),
+            custom_value2_(CustomValue2(patch)) {
+        CHECK(patch.GetType() == LinkerPatch::Type::kCallEntrypoint ||
+              patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch ||
               patch.GetType() == LinkerPatch::Type::kCallRelative);
       }
 
@@ -319,6 +324,26 @@
       }
 
      private:
+      static uint32_t CustomValue1(const LinkerPatch& patch) {
+        switch (patch.GetType()) {
+          case LinkerPatch::Type::kCallEntrypoint:
+            return patch.EntrypointOffset();
+          case LinkerPatch::Type::kBakerReadBarrierBranch:
+            return patch.GetBakerCustomValue1();
+          default:
+            return 0;
+        }
+      }
+
+      static uint32_t CustomValue2(const LinkerPatch& patch) {
+        switch (patch.GetType()) {
+          case LinkerPatch::Type::kBakerReadBarrierBranch:
+            return patch.GetBakerCustomValue2();
+          default:
+            return 0;
+        }
+      }
+
       const LinkerPatch::Type type_;
       const uint32_t custom_value1_;
       const uint32_t custom_value2_;
diff --git a/dex2oat/linker/x86/relative_patcher_x86.cc b/dex2oat/linker/x86/relative_patcher_x86.cc
index cdd2cef..3323506 100644
--- a/dex2oat/linker/x86/relative_patcher_x86.cc
+++ b/dex2oat/linker/x86/relative_patcher_x86.cc
@@ -57,6 +57,12 @@
   (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24);
 }
 
+void X86RelativePatcher::PatchEntrypointCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                             const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                             uint32_t patch_offset ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "UNIMPLEMENTED";
+}
+
 void X86RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
                                                      const LinkerPatch& patch ATTRIBUTE_UNUSED,
                                                      uint32_t patch_offset ATTRIBUTE_UNUSED) {
diff --git a/dex2oat/linker/x86/relative_patcher_x86.h b/dex2oat/linker/x86/relative_patcher_x86.h
index 3da62fb..589a498 100644
--- a/dex2oat/linker/x86/relative_patcher_x86.h
+++ b/dex2oat/linker/x86/relative_patcher_x86.h
@@ -30,6 +30,9 @@
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
                                 uint32_t target_offset) override;
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) override;
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) override;
diff --git a/dex2oat/linker/x86_64/relative_patcher_x86_64.cc b/dex2oat/linker/x86_64/relative_patcher_x86_64.cc
index c80f6a9..0b9d07e 100644
--- a/dex2oat/linker/x86_64/relative_patcher_x86_64.cc
+++ b/dex2oat/linker/x86_64/relative_patcher_x86_64.cc
@@ -35,6 +35,12 @@
   reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement;
 }
 
+void X86_64RelativePatcher::PatchEntrypointCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+                                                const LinkerPatch& patch ATTRIBUTE_UNUSED,
+                                                uint32_t patch_offset ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "UNIMPLEMENTED";
+}
+
 void X86_64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
                                                         const LinkerPatch& patch ATTRIBUTE_UNUSED,
                                                         uint32_t patch_offset ATTRIBUTE_UNUSED) {
diff --git a/dex2oat/linker/x86_64/relative_patcher_x86_64.h b/dex2oat/linker/x86_64/relative_patcher_x86_64.h
index a82fef3..7b99bd8 100644
--- a/dex2oat/linker/x86_64/relative_patcher_x86_64.h
+++ b/dex2oat/linker/x86_64/relative_patcher_x86_64.h
@@ -30,6 +30,9 @@
                                 const LinkerPatch& patch,
                                 uint32_t patch_offset,
                                 uint32_t target_offset) override;
+  void PatchEntrypointCall(std::vector<uint8_t>* code,
+                           const LinkerPatch& patch,
+                           uint32_t patch_offset) override;
   void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
                                    const LinkerPatch& patch,
                                    uint32_t patch_offset) override;
diff --git a/test/521-checker-array-set-null/src/Main.java b/test/521-checker-array-set-null/src/Main.java
index 74bb73f..f166b92 100644
--- a/test/521-checker-array-set-null/src/Main.java
+++ b/test/521-checker-array-set-null/src/Main.java
@@ -22,19 +22,19 @@
   }
 
   /// CHECK-START: void Main.testWithNull(java.lang.Object[]) disassembly (after)
-  /// CHECK-NOT:      pAputObject
+  /// CHECK:          ArraySet needs_type_check:false
   public static void testWithNull(Object[] o) {
     o[0] = null;
   }
 
   /// CHECK-START: void Main.testWithUnknown(java.lang.Object[], java.lang.Object) disassembly (after)
-  /// CHECK:          pAputObject
+  /// CHECK:          ArraySet needs_type_check:true
   public static void testWithUnknown(Object[] o, Object obj) {
     o[0] = obj;
   }
 
   /// CHECK-START: void Main.testWithSame(java.lang.Object[]) disassembly (after)
-  /// CHECK-NOT:      pAputObject
+  /// CHECK:          ArraySet needs_type_check:false
   public static void testWithSame(Object[] o) {
     o[0] = o[1];
   }