Optimizing: Better invoke-static/-direct dispatch.

Add framework for different types of loading ArtMethod*
and code pointer retrieval. Implement invoke-static and
invoke-direct calls the same way as Quick. Document the
dispatch kinds in HInvokeStaticOrDirect's new enumerations
MethodLoadKind and CodePtrLocation.

PC-relative loads from dex cache arrays are used only for
x86-64 and arm64. The implementation for other architectures
will be done in separate CLs.

Change-Id: I468ca4d422dbd14748e1ba6b45289f0d31734d94
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 8629f39..72754ae 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1152,7 +1152,7 @@
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
   }
 
-  // Sort patches by literal offset for better deduplication.
+  // Sort patches by literal offset. Required for .oat_patches encoding.
   std::sort(patches_.begin(), patches_.end(), [](const LinkerPatch& lhs, const LinkerPatch& rhs) {
     return lhs.LiteralOffset() < rhs.LiteralOffset();
   });
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 633bc1b..80387f2 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -267,10 +267,9 @@
     ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change) {
-  DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
   ArtMethod* resolved_method = mUnit->GetClassLinker()->ResolveMethod(
-      *mUnit->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
+      *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
   DCHECK_EQ(resolved_method == nullptr, soa.Self()->IsExceptionPending());
   if (UNLIKELY(resolved_method == nullptr)) {
     // Clean up any exception left by type resolution.
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
index 315585d..24b1481 100644
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -39,7 +39,8 @@
   DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u);
 
   // Check that the patched data contains kDummy32BitOffset.
-  constexpr int kDummy32BitOffset = 256;  // Must match X86Mir2Lir::kDummy32BitOffset.
+  // Must match X86Mir2Lir::kDummy32BitOffset and CodeGeneratorX86_64::kDummy32BitOffset.
+  constexpr int kDummy32BitOffset = 256;
   DCHECK_LE(literal_offset, code->size());
   DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0));
   DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8));
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 8841498..cae3da3 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -31,6 +31,7 @@
 #include "primitive.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -786,6 +787,77 @@
   }
 }
 
+HInvokeStaticOrDirect::DispatchInfo HGraphBuilder::ComputeDispatchInfo(
+    bool is_string_init,
+    int32_t string_init_offset,
+    MethodReference target_method,
+    uintptr_t direct_method,
+    uintptr_t direct_code) {
+  HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
+  HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
+  uint64_t method_load_data = 0u;
+  uint64_t direct_code_ptr = 0u;
+
+  if (is_string_init) {
+    // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kStringInit;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    method_load_data = string_init_offset;
+  } else if (target_method.dex_file == outer_compilation_unit_->GetDexFile() &&
+      target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex()) {
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
+  } else {
+    if (direct_method != 0u) {  // Should we use a direct pointer to the method?
+      if (direct_method != static_cast<uintptr_t>(-1)) {  // Is the method pointer known now?
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
+        method_load_data = direct_method;
+      } else {  // The direct pointer will be known at link time.
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup;
+      }
+    } else {  // Use dex cache.
+      DCHECK(target_method.dex_file == dex_compilation_unit_->GetDexFile());
+      DexCacheArraysLayout layout =
+          compiler_driver_->GetDexCacheArraysLayout(target_method.dex_file);
+      if (layout.Valid()) {  // Can we use PC-relative access to the dex cache arrays?
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
+        method_load_data = layout.MethodOffset(target_method.dex_method_index);
+      } else {  // We must go through the ArtMethod's pointer to resolved methods.
+        method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+      }
+    }
+    if (direct_code != 0u) {  // Should we use a direct pointer to the code?
+      if (direct_code != static_cast<uintptr_t>(-1)) {  // Is the code pointer known now?
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect;
+        direct_code_ptr = direct_code;
+      } else if (compiler_driver_->IsImage() ||
+          target_method.dex_file == dex_compilation_unit_->GetDexFile()) {
+        // Use PC-relative calls for invokes within a multi-dex oat file.
+        // TODO: Recognize when the target dex file is within the current oat file for
+        // app compilation. At the moment we recognize only the boot image as multi-dex.
+        // NOTE: This will require changing the ARM backend which currently falls
+        // through from kCallPCRelative to kDirectCodeFixup for different dex files.
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative;
+      } else {  // The direct pointer will be known at link time.
+        // NOTE: This is used for app->boot calls when compiling an app against
+        // a relocatable but not yet relocated image.
+        code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup;
+      }
+    } else {  // We must use the code pointer from the ArtMethod.
+      code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+    }
+  }
+
+  if (graph_->IsDebuggable()) {
+    // For debuggable apps always use the code pointer from ArtMethod
+    // so that we don't circumvent instrumentation stubs if installed.
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+  }
+
+  return HInvokeStaticOrDirect::DispatchInfo {
+    method_load_kind, code_ptr_location, method_load_data, direct_code_ptr };
+}
+
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
                                 uint32_t dex_pc,
                                 uint32_t method_idx,
@@ -879,12 +951,6 @@
         arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
   } else {
     DCHECK(optimized_invoke_type == kDirect || optimized_invoke_type == kStatic);
-    // Sharpening to kDirect only works if we compile PIC.
-    DCHECK((optimized_invoke_type == invoke_type) || (optimized_invoke_type != kDirect)
-           || compiler_driver_->GetCompilerOptions().GetCompilePic());
-    bool is_recursive =
-        (target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex())
-        && (target_method.dex_file == outer_compilation_unit_->GetDexFile());
 
     if (optimized_invoke_type == kStatic && !is_string_init) {
       ScopedObjectAccess soa(Thread::Current());
@@ -958,13 +1024,18 @@
       }
     }
 
+    HInvokeStaticOrDirect::DispatchInfo dispatch_info = ComputeDispatchInfo(is_string_init,
+                                                                            string_init_offset,
+                                                                            target_method,
+                                                                            direct_method,
+                                                                            direct_code);
     invoke = new (arena_) HInvokeStaticOrDirect(arena_,
                                                 number_of_arguments,
                                                 return_type,
                                                 dex_pc,
-                                                target_method.dex_method_index,
-                                                is_recursive,
-                                                string_init_offset,
+                                                method_idx,
+                                                target_method,
+                                                dispatch_info,
                                                 invoke_type,
                                                 optimized_invoke_type,
                                                 clinit_check_requirement);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index ad5d923..08600c7 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -266,6 +266,12 @@
                                      uint32_t dex_pc,
                                      HInvoke* invoke);
 
+  HInvokeStaticOrDirect::DispatchInfo ComputeDispatchInfo(bool is_string_init,
+                                                          int32_t string_init_offset,
+                                                          MethodReference target_method,
+                                                          uintptr_t direct_method,
+                                                          uintptr_t direct_code);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d0b5ffd..8fe3170 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -249,6 +249,10 @@
   GetAssembler()->FinalizeInstructions(code);
 }
 
+void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) {
+  // No linker patches by default.
+}
+
 size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
   for (size_t i = 0; i < length; ++i) {
     if (!array[i]) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 2582444..938369b 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -48,6 +48,7 @@
 class Assembler;
 class CodeGenerator;
 class DexCompilationUnit;
+class LinkerPatch;
 class ParallelMoveResolver;
 class SrcMapElem;
 template <class Alloc>
@@ -160,6 +161,7 @@
 
   virtual void Initialize() = 0;
   virtual void Finalize(CodeAllocator* allocator);
+  virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
@@ -356,6 +358,17 @@
   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
 
  protected:
+  // Method patch info used for recording locations of required linker patches and
+  // target methods. The target method can be used for various purposes, whether for
+  // patching the address of the method or the code pointer or a PC-relative call.
+  template <typename LabelType>
+  struct MethodPatchInfo {
+    explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { }
+
+    MethodReference target_method;
+    LabelType label;
+  };
+
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
                 size_t number_of_fpu_registers,
@@ -427,8 +440,8 @@
 
   // Arm64 has its own type for a label, so we need to templatize this method
   // to share the logic.
-  template <typename T>
-  T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
+  template <typename LabelType>
+  LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
     block = FirstNonEmptyBlock(block);
     return raw_pointer_to_labels_array + block->GetBlockId();
   }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1bd4216..f5079ef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,7 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
 #include "code_generator_utils.h"
+#include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
@@ -411,7 +412,10 @@
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
       assembler_(),
-      isa_features_(isa_features) {
+      isa_features_(isa_features),
+      method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
+      call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
+      relative_call_patches_(graph->GetArena()->Adapter()) {
   // Save the PC register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(PC));
 }
@@ -452,6 +456,10 @@
       it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
     }
   }
+  // Adjust pc offsets for relative call patches.
+  for (MethodPatchInfo<Label>& info : relative_call_patches_) {
+    __ AdjustLabelPosition(&info.label);
+  }
 
   CodeGenerator::Finalize(allocator);
 }
@@ -4507,53 +4515,156 @@
 }
 
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+  // For better instruction scheduling we load the direct code pointer before the method pointer.
+  bool direct_code_loaded = false;
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      if (IsSameDexFile(*invoke->GetTargetMethod().dex_file, GetGraph()->GetDexFile())) {
+        break;
+      }
+      // Calls across dex files are more likely to exceed the available BL range,
+      // so use absolute patch by falling through to kDirectCodeFixup.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // LR = code address from literal pool with link-time patch.
+      __ LoadLiteral(LR, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
+      direct_code_loaded = true;
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR = invoke->GetDirectCodePtr();
+      __ LoadImmediate(LR, invoke->GetDirectCodePtr());
+      direct_code_loaded = true;
+      break;
+    default:
+      break;
+  }
 
-  if (invoke->IsStringInit()) {
-    Register reg = temp.AsRegister<Register>();
-    // temp = thread->string_init_entrypoint
-    __ LoadFromOffset(kLoadWord, reg, TR, invoke->GetStringInitOffset());
-    // LR = temp[offset_of_quick_compiled_code]
-    __ LoadFromOffset(kLoadWord, LR, reg,
-                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kArmWordSize).Int32Value());
-    // LR()
-    __ blx(LR);
-  } else if (invoke->IsRecursive()) {
-    __ bl(GetFrameEntryLabel());
-  } else {
-    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
-    Register method_reg;
-    Register reg = temp.AsRegister<Register>();
-    if (current_method.IsRegister()) {
-      method_reg = current_method.AsRegister<Register>();
-    } else {
-      DCHECK(invoke->GetLocations()->Intrinsified());
-      DCHECK(!current_method.IsValid());
-      method_reg = reg;
-      __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      // temp = thread->string_init_entrypoint
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      // Nothing to do.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      __ LoadLiteral(temp.AsRegister<Register>(),
+                     DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Register method_reg;
+      Register reg = temp.AsRegister<Register>();
+      if (current_method.IsRegister()) {
+        method_reg = current_method.AsRegister<Register>();
+      } else {
+        DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = reg;
+        __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
+      }
+      // temp = current_method->dex_cache_resolved_methods_;
+      __ LoadFromOffset(
+          kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+      // temp = temp[index_in_cache]
+      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
+      break;
     }
-    // reg = current_method->dex_cache_resolved_methods_;
-    __ LoadFromOffset(
-        kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-    // reg = reg[index_in_cache]
-    __ LoadFromOffset(
-        kLoadWord, reg, reg, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
-    // LR = reg[offset_of_quick_compiled_code]
-    __ LoadFromOffset(kLoadWord, LR, reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-        kArmWordSize).Int32Value());
-    // LR()
-    __ blx(LR);
+  }
+
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ bl(GetFrameEntryLabel());
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      if (!direct_code_loaded) {
+        relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+        __ Bind(&relative_call_patches_.back().label);
+        Label label;
+        __ bl(&label);  // Arbitrarily branch to the instruction after BL, override at link time.
+        __ Bind(&label);
+        break;
+      }
+      // If we loaded the direct code above, fall through.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR prepared above for better instruction scheduling.
+      DCHECK(direct_code_loaded);
+      // LR()
+      __ blx(LR);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // LR = temp->entry_point_from_quick_compiled_code_
+      __ LoadFromOffset(
+          kLoadWord, LR, temp.AsRegister<Register>(),
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value());
+      // LR()
+      __ blx(LR);
+      break;
   }
 
   DCHECK(!IsLeafMethod());
 }
 
+void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  linker_patches->reserve(size);
+  for (const auto& entry : method_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       target_method.dex_file,
+                                                       target_method.dex_method_index));
+  }
+  for (const auto& entry : call_patches_) {
+    const MethodReference& target_method = entry.first;
+    Literal* literal = entry.second;
+    DCHECK(literal->GetLabel()->IsBound());
+    uint32_t literal_offset = literal->GetLabel()->Position();
+    linker_patches->push_back(LinkerPatch::CodePatch(literal_offset,
+                                                     target_method.dex_file,
+                                                     target_method.dex_method_index));
+  }
+  for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
+    uint32_t literal_offset = info.label.Position();
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
+                                                             info.target_method.dex_file,
+                                                             info.target_method.dex_method_index));
+  }
+}
+
+Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
+                                                    MethodToLiteralMap* map) {
+  // Look up the literal for target_method.
+  auto lb = map->lower_bound(target_method);
+  if (lb != map->end() && !map->key_comp()(target_method, lb->first)) {
+    return lb->second;
+  }
+  // We don't have a literal for this method yet, insert a new one.
+  Literal* literal = __ NewLiteral<uint32_t>(0u);
+  map->PutBefore(lb, target_method, literal);
+  return literal;
+}
+
+Literal* CodeGeneratorARM::DeduplicateMethodAddressLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &method_patches_);
+}
+
+Literal* CodeGeneratorARM::DeduplicateMethodCodeLiteral(MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &call_patches_);
+}
+
 void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   UNUSED(instruction);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 53bd766..9528cca 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -328,7 +328,15 @@
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
 
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
  private:
+  using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+
+  Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
+  Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  Literal* DeduplicateMethodCodeLiteral(MethodReference target_method);
+
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
   Label frame_entry_label_;
@@ -338,6 +346,13 @@
   Thumb2Assembler assembler_;
   const ArmInstructionSetFeatures& isa_features_;
 
+  // Method patch info, map MethodReference to a literal for method address and method code.
+  MethodToLiteralMap method_patches_;
+  MethodToLiteralMap call_patches_;
+  // Relative call patch info.
+  // Using ArenaDeque<> which retains element addresses on push/emplace_back().
+  ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b8ac421..d6169b0 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -20,6 +20,7 @@
 #include "art_method.h"
 #include "code_generator_utils.h"
 #include "common_arm64.h"
+#include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
@@ -521,7 +522,12 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      isa_features_(isa_features) {
+      isa_features_(isa_features),
+      uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter()),
+      method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
+      call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
+      relative_call_patches_(graph->GetArena()->Adapter()),
+      pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
@@ -532,6 +538,7 @@
 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
   // Ensure we emit the literal pool.
   __ FinalizeCode();
+
   CodeGenerator::Finalize(allocator);
 }
 
@@ -2370,55 +2377,186 @@
 }
 
 void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  // For better instruction scheduling we load the direct code pointer before the method pointer.
+  bool direct_code_loaded = false;
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // LR = code address from literal pool with link-time patch.
+      __ Ldr(lr, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod()));
+      direct_code_loaded = true;
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR = invoke->GetDirectCodePtr();
+      __ Ldr(lr, DeduplicateUint64Literal(invoke->GetDirectCodePtr()));
+      direct_code_loaded = true;
+      break;
+    default:
+      break;
+  }
+
   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
-  size_t index_in_cache = GetCachePointerOffset(invoke->GetDexMethodIndex());
-
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
-
-  if (invoke->IsStringInit()) {
-    Register reg = XRegisterFrom(temp);
-    // temp = thread->string_init_entrypoint
-    __ Ldr(reg.X(), MemOperand(tr, invoke->GetStringInitOffset()));
-    // LR = temp->entry_point_from_quick_compiled_code_;
-    __ Ldr(lr, MemOperand(
-        reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
-    // lr()
-    __ Blr(lr);
-  } else if (invoke->IsRecursive()) {
-    __ Bl(&frame_entry_label_);
-  } else {
-    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
-    Register reg = XRegisterFrom(temp);
-    Register method_reg;
-    if (current_method.IsRegister()) {
-      method_reg = XRegisterFrom(current_method);
-    } else {
-      DCHECK(invoke->GetLocations()->Intrinsified());
-      DCHECK(!current_method.IsValid());
-      method_reg = reg;
-      __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      // temp = thread->string_init_entrypoint
+      __ Ldr(XRegisterFrom(temp).X(), MemOperand(tr, invoke->GetStringInitOffset()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      // Nothing to do.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      // Load method address from literal pool.
+      __ Ldr(XRegisterFrom(temp).X(), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      // Load method address from literal pool with a link-time patch.
+      __ Ldr(XRegisterFrom(temp).X(),
+             DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      // Add ADRP with its PC-relative DexCache access patch.
+      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                             invoke->GetDexCacheArrayOffset());
+      vixl::Label* pc_insn_label = &pc_rel_dex_cache_patches_.back().label;
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ adrp(XRegisterFrom(temp).X(), 0);
+      }
+      __ Bind(pc_insn_label);  // Bind after ADRP.
+      pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+      // Add LDR with its PC-relative DexCache access patch.
+      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                             invoke->GetDexCacheArrayOffset());
+      __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0));
+      __ Bind(&pc_rel_dex_cache_patches_.back().label);  // Bind after LDR.
+      pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+      break;
     }
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Register reg = XRegisterFrom(temp);
+      Register method_reg;
+      if (current_method.IsRegister()) {
+        method_reg = XRegisterFrom(current_method);
+      } else {
+        DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = reg;
+        __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
+      }
 
-    // temp = current_method->dex_cache_resolved_methods_;
-    __ Ldr(reg.W(), MemOperand(method_reg.X(),
-                               ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
-    // temp = temp[index_in_cache];
-    __ Ldr(reg.X(), MemOperand(reg, index_in_cache));
-    // lr = temp->entry_point_from_quick_compiled_code_;
-    __ Ldr(lr, MemOperand(reg.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-        kArm64WordSize).Int32Value()));
-    // lr();
-    __ Blr(lr);
+      // temp = current_method->dex_cache_resolved_methods_;
+      __ Ldr(reg.W(), MemOperand(method_reg.X(),
+                                 ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+      // temp = temp[index_in_cache];
+      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+    __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
+      break;
+    }
+  }
+
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ Bl(&frame_entry_label_);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
+      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+      vixl::Label* label = &relative_call_patches_.back().label;
+      __ Bl(label);  // Arbitrarily branch to the instruction after BL, override at link time.
+      __ Bind(label);  // Bind after BL.
+      break;
+    }
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR prepared above for better instruction scheduling.
+      DCHECK(direct_code_loaded);
+      // lr()
+      __ Blr(lr);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // LR = temp->entry_point_from_quick_compiled_code_;
+      __ Ldr(lr, MemOperand(
+          XRegisterFrom(temp).X(),
+          ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
+      // lr()
+      __ Blr(lr);
+      break;
   }
 
   DCHECK(!IsLeafMethod());
 }
 
+void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      pc_rel_dex_cache_patches_.size();
+  linker_patches->reserve(size);
+  for (const auto& entry : method_patches_) {
+    const MethodReference& target_method = entry.first;
+    vixl::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal->offset(),
+                                                       target_method.dex_file,
+                                                       target_method.dex_method_index));
+  }
+  for (const auto& entry : call_patches_) {
+    const MethodReference& target_method = entry.first;
+    vixl::Literal<uint64_t>* literal = entry.second;
+    linker_patches->push_back(LinkerPatch::CodePatch(literal->offset(),
+                                                     target_method.dex_file,
+                                                     target_method.dex_method_index));
+  }
+  for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location() - 4u,
+                                                             info.target_method.dex_file,
+                                                             info.target_method.dex_method_index));
+  }
+  for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u,
+                                                              &info.target_dex_file,
+                                                              info.pc_insn_label->location() - 4u,
+                                                              info.element_offset));
+  }
+}
+
+vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
+  // Look up the literal for value.
+  auto lb = uint64_literals_.lower_bound(value);
+  if (lb != uint64_literals_.end() && !uint64_literals_.key_comp()(value, lb->first)) {
+    return lb->second;
+  }
+  // We don't have a literal for this value, insert a new one.
+  vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(value);
+  uint64_literals_.PutBefore(lb, value, literal);
+  return literal;
+}
+
+vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral(
+    MethodReference target_method,
+    MethodToLiteralMap* map) {
+  // Look up the literal for target_method.
+  auto lb = map->lower_bound(target_method);
+  if (lb != map->end() && !map->key_comp()(target_method, lb->first)) {
+    return lb->second;
+  }
+  // We don't have a literal for this method yet, insert a new one.
+  vixl::Literal<uint64_t>* literal = __ CreateLiteralDestroyedWithPool<uint64_t>(0u);
+  map->PutBefore(lb, target_method, literal);
+  return literal;
+}
+
+vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral(
+    MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &method_patches_);
+}
+
+vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
+    MethodReference target_method) {
+  return DeduplicateMethodLiteral(target_method, &call_patches_);
+}
+
+
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   // When we do not run baseline, explicit clinit checks triggered by static
   // invokes must have been pruned by art::PrepareForRegisterAllocation.
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index ac7ee10..18070fc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -360,7 +360,32 @@
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
 
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
  private:
+  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
+  using MethodToLiteralMap = ArenaSafeMap<MethodReference,
+                                          vixl::Literal<uint64_t>*,
+                                          MethodReferenceComparator>;
+
+  vixl::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+  vixl::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method,
+                                                    MethodToLiteralMap* map);
+  vixl::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method);
+  vixl::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method);
+
+  struct PcRelativeDexCacheAccessInfo {
+    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
+        : target_dex_file(dex_file), element_offset(element_off), label(), pc_insn_label() { }
+
+    const DexFile& target_dex_file;
+    uint32_t element_offset;
+    // NOTE: Labels are bound to the end of the patched instruction because
+    // we don't know if there will be a veneer or how big it will be.
+    vixl::Label label;
+    vixl::Label* pc_insn_label;
+  };
+
   // Labels for each block that will be compiled.
   vixl::Label* block_labels_;
   vixl::Label frame_entry_label_;
@@ -371,6 +396,17 @@
   Arm64Assembler assembler_;
   const Arm64InstructionSetFeatures& isa_features_;
 
+  // Deduplication map for 64-bit literals, used for non-patchable method address and method code.
+  Uint64ToLiteralMap uint64_literals_;
+  // Method patch info, map MethodReference to a literal for method address and method code.
+  MethodToLiteralMap method_patches_;
+  MethodToLiteralMap call_patches_;
+  // Relative call patch info.
+  // Using ArenaDeque<> which retains element addresses on push/emplace_back().
+  ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
+  // PC-relative DexCache access info.
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 167e025..48323c7 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2407,64 +2407,84 @@
 void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // All registers are assumed to be correctly set up per the calling convention.
 
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      // temp = thread->string_init_entrypoint
+      __ LoadFromOffset(kLoadDoubleword,
+                        temp.AsRegister<GpuRegister>(),
+                        TR,
+                        invoke->GetStringInitOffset());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      // Nothing to do.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      // TODO: Implement this type. (Needs literal support.) At the moment, the
+      // CompilerDriver will not direct the backend to use this type for MIPS.
+      LOG(FATAL) << "Unsupported!";
+      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      GpuRegister reg = temp.AsRegister<GpuRegister>();
+      GpuRegister method_reg;
+      if (current_method.IsRegister()) {
+        method_reg = current_method.AsRegister<GpuRegister>();
+      } else {
+        // TODO: use the appropriate DCHECK() here if possible.
+        // DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = reg;
+        __ Ld(reg, SP, kCurrentMethodStackOffset);
+      }
 
-  if (invoke->IsStringInit()) {
-    GpuRegister reg = temp.AsRegister<GpuRegister>();
-    // temp = thread->string_init_entrypoint
-    __ LoadFromOffset(kLoadDoubleword,
-                      reg,
-                      TR,
-                      invoke->GetStringInitOffset());
-    // T9 = temp->entry_point_from_quick_compiled_code_;
-    __ LoadFromOffset(kLoadDoubleword,
-                      T9,
-                      reg,
-                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kMips64WordSize).Int32Value());
-    // T9()
-    __ Jalr(T9);
-  } else if (invoke->IsRecursive()) {
-    __ Jalr(&frame_entry_label_, T9);
-  } else {
-    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
-    GpuRegister reg = temp.AsRegister<GpuRegister>();
-    GpuRegister method_reg;
-    if (current_method.IsRegister()) {
-      method_reg = current_method.AsRegister<GpuRegister>();
-    } else {
-      // TODO: use the appropriate DCHECK() here if possible.
-      // DCHECK(invoke->GetLocations()->Intrinsified());
-      DCHECK(!current_method.IsValid());
-      method_reg = reg;
-      __ Ld(reg, SP, kCurrentMethodStackOffset);
+      // temp = temp->dex_cache_resolved_methods_;
+      __ LoadFromOffset(kLoadUnsignedWord,
+                        reg,
+                        method_reg,
+                        ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+      // temp = temp[index_in_cache]
+      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      __ LoadFromOffset(kLoadDoubleword,
+                        reg,
+                        reg,
+                        CodeGenerator::GetCachePointerOffset(index_in_cache));
+      break;
     }
-
-    // temp = temp->dex_cache_resolved_methods_;
-    __ LoadFromOffset(kLoadUnsignedWord,
-                      reg,
-                      method_reg,
-                      ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-    // temp = temp[index_in_cache]
-    __ LoadFromOffset(kLoadDoubleword,
-                      reg,
-                      reg,
-                      CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()));
-    // T9 = temp[offset_of_quick_compiled_code]
-    __ LoadFromOffset(kLoadDoubleword,
-                      T9,
-                      reg,
-                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                          kMips64WordSize).Int32Value());
-    // T9()
-    __ Jalr(T9);
   }
 
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ Jalr(&frame_entry_label_, T9);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // LR = invoke->GetDirectCodePtr();
+      __ LoadConst64(T9, invoke->GetDirectCodePtr());
+      // LR()
+      __ Jalr(T9);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
+      // TODO: Implement kCallPCRelative. For the moment, we fall back to kMethodCode.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+      // TODO: Implement kDirectCodeFixup. For the moment, we fall back to kMethodCode.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // T9 = temp->entry_point_from_quick_compiled_code_;
+      __ LoadFromOffset(kLoadDoubleword,
+                        T9,
+                        temp.AsRegister<GpuRegister>(),
+                        ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                            kMips64WordSize).Int32Value());
+      // T9()
+      __ Jalr(T9);
+      break;
+  }
   DCHECK(!IsLeafMethod());
 }
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 091a3e5..96ef863 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -18,6 +18,7 @@
 
 #include "art_method.h"
 #include "code_generator_utils.h"
+#include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
@@ -450,7 +451,9 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      isa_features_(isa_features) {
+      isa_features_(isa_features),
+      method_patches_(graph->GetArena()->Adapter()),
+      relative_call_patches_(graph->GetArena()->Adapter()) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -3521,50 +3524,94 @@
 }
 
 
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                  Location temp) {
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
-
-  if (invoke->IsStringInit()) {
-    // temp = thread->string_init_entrypoint
-    Register reg = temp.AsRegister<Register>();
-    __ fs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset()));
-    // (temp + offset_of_quick_compiled_code)()
-    __ call(Address(
-        reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
-  } else if (invoke->IsRecursive()) {
-    __ call(GetFrameEntryLabel());
-  } else {
-    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
-
-    Register method_reg;
-    Register reg = temp.AsRegister<Register>();
-    if (current_method.IsRegister()) {
-      method_reg = current_method.AsRegister<Register>();
-    } else {
-      DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
-      DCHECK(!current_method.IsValid());
-      method_reg = reg;
-      __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      // temp = thread->string_init_entrypoint
+      __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      // Nothing to do.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      __ movl(temp.AsRegister<Register>(), Immediate(0));  // Placeholder.
+      method_patches_.emplace_back(invoke->GetTargetMethod());
+      __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Register method_reg;
+      Register reg = temp.AsRegister<Register>();
+      if (current_method.IsRegister()) {
+        method_reg = current_method.AsRegister<Register>();
+      } else {
+        DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = reg;
+        __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
+      }
+      // temp = temp->dex_cache_resolved_methods_;
+      __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+      // temp = temp[index_in_cache]
+      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
+      break;
     }
-    // temp = temp->dex_cache_resolved_methods_;
-    __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
-    // temp = temp[index_in_cache]
-    __ movl(reg, Address(reg,
-                         CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex())));
-    // (temp + offset_of_quick_compiled_code)()
-    __ call(Address(reg,
-        ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+  }
+
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ call(GetFrameEntryLabel());
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
+      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+      Label* label = &relative_call_patches_.back().label;
+      __ call(label);  // Bind to the patch label, override at link time.
+      __ Bind(label);  // Bind the label at the end of the "call" insn.
+      break;
+    }
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
+      // (Though the direct CALL ptr16:32 is available for consideration).
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // (temp + offset_of_quick_compiled_code)()
+      __ call(Address(temp.AsRegister<Register>(), ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+          kX86WordSize).Int32Value()));
+      break;
   }
 
   DCHECK(!IsLeafMethod());
 }
 
+void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  linker_patches->reserve(method_patches_.size() + relative_call_patches_.size());
+  for (const MethodPatchInfo<Label>& info : method_patches_) {
+    // The label points to the end of the "movl" insn but the literal offset for method
+    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
+    uint32_t literal_offset = info.label.Position() - 4;
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       info.target_method.dex_file,
+                                                       info.target_method.dex_method_index));
+  }
+  for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
+    // The label points to the end of the "call" insn but the literal offset for method
+    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
+    uint32_t literal_offset = info.label.Position() - 4;
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
+                                                             info.target_method.dex_file,
+                                                             info.target_method.dex_method_index));
+  }
+}
+
 void CodeGeneratorX86::MarkGCCard(Register temp,
                                   Register card,
                                   Register object,
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2e3d4d4..17787a8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -295,6 +295,9 @@
   // Generate a call to a static or direct method.
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
 
+  // Emit linker patches.
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
   // Emit a write barrier.
   void MarkGCCard(Register temp,
                   Register card,
@@ -332,6 +335,10 @@
   X86Assembler assembler_;
   const X86InstructionSetFeatures& isa_features_;
 
+  // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
+  ArenaDeque<MethodPatchInfo<Label>> method_patches_;
+  ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
 };
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2c5cef3..42f3d82 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,7 @@
 
 #include "art_method.h"
 #include "code_generator_utils.h"
+#include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
@@ -410,48 +411,111 @@
                                                      Location temp) {
   // All registers are assumed to be correctly set up.
 
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
-
-  if (invoke->IsStringInit()) {
-    CpuRegister reg = temp.AsRegister<CpuRegister>();
-    // temp = thread->string_init_entrypoint
-    __ gs()->movq(reg, Address::Absolute(invoke->GetStringInitOffset(), true));
-    // (temp + offset_of_quick_compiled_code)()
-    __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-        kX86_64WordSize).SizeValue()));
-  } else if (invoke->IsRecursive()) {
-    __ call(&frame_entry_label_);
-  } else {
-    CpuRegister reg = temp.AsRegister<CpuRegister>();
-    Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
-    Register method_reg;
-    if (current_method.IsRegister()) {
-      method_reg = current_method.AsRegister<Register>();
-    } else {
-      DCHECK(invoke->GetLocations()->Intrinsified());
-      DCHECK(!current_method.IsValid());
-      method_reg = reg.AsRegister();
-      __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      // temp = thread->string_init_entrypoint
+      __ gs()->movl(temp.AsRegister<CpuRegister>(),
+                    Address::Absolute(invoke->GetStringInitOffset(), true));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      // Nothing to do.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      __ movl(temp.AsRegister<CpuRegister>(), Immediate(0));  // Placeholder.
+      method_patches_.emplace_back(invoke->GetTargetMethod());
+      __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                             invoke->GetDexCacheArrayOffset());
+      __ movq(temp.AsRegister<CpuRegister>(),
+              Address::Absolute(kDummy32BitOffset, false /* no_rip */));
+      // Bind the label at the end of the "movl" insn.
+      __ Bind(&pc_rel_dex_cache_patches_.back().label);
+      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Register method_reg;
+      CpuRegister reg = temp.AsRegister<CpuRegister>();
+      if (current_method.IsRegister()) {
+        method_reg = current_method.AsRegister<Register>();
+      } else {
+        DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = reg.AsRegister();
+        __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+      }
+      // temp = temp->dex_cache_resolved_methods_;
+      __ movl(reg, Address(CpuRegister(method_reg),
+                           ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+      // temp = temp[index_in_cache]
+      uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
+      __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
+      break;
     }
-    // temp = temp->dex_cache_resolved_methods_;
-    __ movl(reg, Address(CpuRegister(method_reg),
-                         ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
-    // temp = temp[index_in_cache]
-    __ movq(reg, Address(
-        reg, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex())));
-    // (temp + offset_of_quick_compiled_code)()
-    __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-        kX86_64WordSize).SizeValue()));
+  }
+
+  switch (invoke->GetCodePtrLocation()) {
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
+      __ call(&frame_entry_label_);
+      break;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
+      relative_call_patches_.emplace_back(invoke->GetTargetMethod());
+      Label* label = &relative_call_patches_.back().label;
+      __ call(label);  // Bind to the patch label, override at link time.
+      __ Bind(label);  // Bind the label at the end of the "call" insn.
+      break;
+    }
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
+      // For direct code, we actually prefer to call via the code pointer from ArtMethod*.
+      FALLTHROUGH_INTENDED;
+    case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
+      // (temp + offset_of_quick_compiled_code)()
+      __ call(Address(temp.AsRegister<CpuRegister>(),
+                      ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kX86_64WordSize).SizeValue()));
+      break;
   }
 
   DCHECK(!IsLeafMethod());
 }
 
+void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      method_patches_.size() + relative_call_patches_.size() + pc_rel_dex_cache_patches_.size();
+  linker_patches->reserve(size);
+  for (const MethodPatchInfo<Label>& info : method_patches_) {
+    // The label points to the end of the "movl" instruction but the literal offset for method
+    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
+    uint32_t literal_offset = info.label.Position() - 4;
+    linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
+                                                       info.target_method.dex_file,
+                                                       info.target_method.dex_method_index));
+  }
+  for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
+    // The label points to the end of the "call" instruction but the literal offset for method
+    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
+    uint32_t literal_offset = info.label.Position() - 4;
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
+                                                             info.target_method.dex_file,
+                                                             info.target_method.dex_method_index));
+  }
+  for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
+    // The label points to the end of the "mov" instruction but the literal offset for method
+    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
+    uint32_t literal_offset = info.label.Position() - 4;
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
+                                                              &info.target_dex_file,
+                                                              info.label.Position(),
+                                                              info.element_offset));
+  }
+}
+
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -510,7 +574,10 @@
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
         isa_features_(isa_features),
-        constant_area_start_(0) {
+        constant_area_start_(0),
+        method_patches_(graph->GetArena()->Adapter()),
+        relative_call_patches_(graph->GetArena()->Adapter()),
+        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 41bebac..21357be 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -306,6 +306,8 @@
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
 
+  void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+
   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
     return isa_features_;
   }
@@ -326,6 +328,15 @@
   void Store64BitValueToStack(Location dest, int64_t value);
 
  private:
+  struct PcRelativeDexCacheAccessInfo {
+    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
+        : target_dex_file(dex_file), element_offset(element_off), label() { }
+
+    const DexFile& target_dex_file;
+    uint32_t element_offset;
+    Label label;
+  };
+
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
   Label frame_entry_label_;
@@ -339,6 +350,16 @@
   // Used for fixups to the constant area.
   int constant_area_start_;
 
+  // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
+  ArenaDeque<MethodPatchInfo<Label>> method_patches_;
+  ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+  // PC-relative DexCache access info.
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+
+  // When we don't know the proper offset for the value, we use kDummy32BitOffset.
+  // We will fix this up in the linker later to have the right value.
+  static constexpr int32_t kDummy32BitOffset = 256;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 4c74679..202f3f0 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -63,7 +63,7 @@
       if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
         // We use the original invoke type to ensure the resolution of the called method
         // works properly.
-        if (!TryInline(call, call->GetDexMethodIndex())) {
+        if (!TryInline(call)) {
           if (kIsDebugBuild && IsCompilingWithCoreImage()) {
             std::string callee_name =
                 PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
@@ -169,15 +169,23 @@
   }
 }
 
-bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const {
+bool HInliner::TryInline(HInvoke* invoke_instruction) const {
+  uint32_t method_index = invoke_instruction->GetDexMethodIndex();
   ScopedObjectAccess soa(Thread::Current());
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
   VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file);
 
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   // We can query the dex cache directly. The verifier has populated it already.
-  ArtMethod* resolved_method = class_linker->FindDexCache(caller_dex_file)->GetResolvedMethod(
-      method_index, class_linker->GetImagePointerSize());
+  ArtMethod* resolved_method;
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
+    resolved_method = class_linker->FindDexCache(*ref.dex_file)->GetResolvedMethod(
+        ref.dex_method_index, class_linker->GetImagePointerSize());
+  } else {
+    resolved_method = class_linker->FindDexCache(caller_dex_file)->GetResolvedMethod(
+        method_index, class_linker->GetImagePointerSize());
+  }
 
   if (resolved_method == nullptr) {
     // Method cannot be resolved if it is in another dex file we do not have access to.
@@ -204,11 +212,8 @@
     }
   }
 
-  bool same_dex_file = true;
-  const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile();
-  if (resolved_method->GetDexFile()->GetLocation().compare(outer_dex_file.GetLocation()) != 0) {
-    same_dex_file = false;
-  }
+  bool same_dex_file =
+      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
 
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index ffd7569..9062e1a 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -49,7 +49,7 @@
   static constexpr const char* kInlinerPassName = "inliner";
 
  private:
-  bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const;
+  bool TryInline(HInvoke* invoke_instruction) const;
   bool TryBuildAndInline(ArtMethod* resolved_method,
                          HInvoke* invoke_instruction,
                          bool same_dex_file) const;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0df5d6d..fd5b86e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -27,6 +27,7 @@
 #include "handle_scope.h"
 #include "invoke_type.h"
 #include "locations.h"
+#include "method_reference.h"
 #include "mirror/class.h"
 #include "offsets.h"
 #include "primitive.h"
@@ -2998,13 +2999,81 @@
     kImplicit,  // Static call implicitly requiring a clinit check.
   };
 
+  // Determines how to load the target ArtMethod*.
+  enum class MethodLoadKind {
+    // Use a String init ArtMethod* loaded from Thread entrypoints.
+    kStringInit,
+
+    // Use the method's own ArtMethod* loaded by the register allocator.
+    kRecursive,
+
+    // Use ArtMethod* at a known address, embed the direct address in the code.
+    // Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
+    kDirectAddress,
+
+    // Use ArtMethod* at an address that will be known at link time, embed the direct
+    // address in the code. If the image is relocatable, emit .patch_oat entry.
+    // Used for app->boot calls with relocatable image and boot->boot calls, whether
+    // the image relocatable or not.
+    kDirectAddressWithFixup,
+
+    // Load from resoved methods array in the dex cache using a PC-relative load.
+    // Used when we need to use the dex cache, for example for invoke-static that
+    // may cause class initialization (the entry may point to a resolution method),
+    // and we know that we can access the dex cache arrays using a PC-relative load.
+    kDexCachePcRelative,
+
+    // Use ArtMethod* from the resolved methods of the compiled method's own ArtMethod*.
+    // Used for JIT when we need to use the dex cache. This is also the last-resort-kind
+    // used when other kinds are unavailable (say, dex cache arrays are not PC-relative)
+    // or unimplemented or impractical (i.e. slow) on a particular architecture.
+    kDexCacheViaMethod,
+  };
+
+  // Determines the location of the code pointer.
+  enum class CodePtrLocation {
+    // Recursive call, use local PC-relative call instruction.
+    kCallSelf,
+
+    // Use PC-relative call instruction patched at link time.
+    // Used for calls within an oat file, boot->boot or app->app.
+    kCallPCRelative,
+
+    // Call to a known target address, embed the direct address in code.
+    // Used for app->boot call with non-relocatable image and for JIT-compiled calls.
+    kCallDirect,
+
+    // Call to a target address that will be known at link time, embed the direct
+    // address in code. If the image is relocatable, emit .patch_oat entry.
+    // Used for app->boot calls with relocatable image and boot->boot calls, whether
+    // the image relocatable or not.
+    kCallDirectWithFixup,
+
+    // Use code pointer from the ArtMethod*.
+    // Used when we don't know the target code. This is also the last-resort-kind used when
+    // other kinds are unimplemented or impractical (i.e. slow) on a particular architecture.
+    kCallArtMethod,
+  };
+
+  struct DispatchInfo {
+    const MethodLoadKind method_load_kind;
+    const CodePtrLocation code_ptr_location;
+    // The method load data holds
+    //   - thread entrypoint offset for kStringInit method if this is a string init invoke.
+    //     Note that there are multiple string init methods, each having its own offset.
+    //   - the method address for kDirectAddress
+    //   - the dex cache arrays offset for kDexCachePcRel.
+    const uint64_t method_load_data;
+    const uint64_t direct_code_ptr;
+  };
+
   HInvokeStaticOrDirect(ArenaAllocator* arena,
                         uint32_t number_of_arguments,
                         Primitive::Type return_type,
                         uint32_t dex_pc,
-                        uint32_t dex_method_index,
-                        bool is_recursive,
-                        int32_t string_init_offset,
+                        uint32_t method_index,
+                        MethodReference target_method,
+                        DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
                         InvokeType invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
@@ -3014,15 +3083,15 @@
                 // potentially one other if the clinit check is explicit, and one other
                 // if the method is a string factory.
                 1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u)
-                   + (string_init_offset ? 1u : 0u),
+                   + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
                 return_type,
                 dex_pc,
-                dex_method_index,
+                method_index,
                 original_invoke_type),
         invoke_type_(invoke_type),
-        is_recursive_(is_recursive),
         clinit_check_requirement_(clinit_check_requirement),
-        string_init_offset_(string_init_offset) {}
+        target_method_(target_method),
+        dispatch_info_(dispatch_info) {}
 
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
     UNUSED(obj);
@@ -3036,11 +3105,36 @@
   }
 
   InvokeType GetInvokeType() const { return invoke_type_; }
-  bool IsRecursive() const { return is_recursive_; }
-  bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); }
-  bool IsStringInit() const { return string_init_offset_ != 0; }
-  int32_t GetStringInitOffset() const { return string_init_offset_; }
+  MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
+  CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
+  bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
+  bool NeedsDexCache() const OVERRIDE { return !IsRecursive() && !IsStringInit(); }
+  bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
   uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
+  bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
+  bool HasPcRelDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; }
+  bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
+  MethodReference GetTargetMethod() const { return target_method_; }
+
+  int32_t GetStringInitOffset() const {
+    DCHECK(IsStringInit());
+    return dispatch_info_.method_load_data;
+  }
+
+  uint64_t GetMethodAddress() const {
+    DCHECK(HasMethodAddress());
+    return dispatch_info_.method_load_data;
+  }
+
+  uint32_t GetDexCacheArrayOffset() const {
+    DCHECK(HasPcRelDexCache());
+    return dispatch_info_.method_load_data;
+  }
+
+  uint64_t GetDirectCodePtr() const {
+    DCHECK(HasDirectCodePtr());
+    return dispatch_info_.direct_code_ptr;
+  }
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
@@ -3111,11 +3205,12 @@
 
  private:
   const InvokeType invoke_type_;
-  const bool is_recursive_;
   ClinitCheckRequirement clinit_check_requirement_;
-  // Thread entrypoint offset for string init method if this is a string init invoke.
-  // Note that there are multiple string init methods, each having its own offset.
-  int32_t string_init_offset_;
+  // The target method may refer to different dex file or method index than the original
+  // invoke. This happens for sharpened calls and for calls where a method was redeclared
+  // in derived class to increase visibility.
+  MethodReference target_method_;
+  DispatchInfo dispatch_info_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
@@ -5019,6 +5114,16 @@
                                    : constant->AsLongConstant()->GetValue();
 }
 
+inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) {
+  // For the purposes of the compiler, the dex files must actually be the same object
+  // if we want to safely treat them as the same. This is especially important for JIT
+  // as custom class loaders can open the same underlying file (or memory) multiple
+  // times and provide different class resolution but no two class loaders should ever
+  // use the same DexFile object - doing so is an unsupported hack that can lead to
+  // all sorts of weird failures.
+  return &lhs == &rhs;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6a50b7d..b18c921 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -488,6 +488,19 @@
   }
 }
 
+static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
+  ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
+  codegen->EmitLinkerPatches(&linker_patches);
+
+  // Sort patches by literal offset. Required for .oat_patches encoding.
+  std::sort(linker_patches.begin(), linker_patches.end(),
+            [](const LinkerPatch& lhs, const LinkerPatch& rhs) {
+    return lhs.LiteralOffset() < rhs.LiteralOffset();
+  });
+
+  return linker_patches;
+}
+
 CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
                                                      CodeGenerator* codegen,
                                                      CompilerDriver* compiler_driver,
@@ -502,6 +515,8 @@
   CodeVectorAllocator allocator;
   codegen->CompileOptimized(&allocator);
 
+  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
+
   DefaultSrcMap src_mapping_table;
   if (compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()) {
     codegen->BuildSourceMap(&src_mapping_table);
@@ -527,7 +542,7 @@
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(),  // native_gc_map.
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>());
+      ArrayRef<const LinkerPatch>(linker_patches));
   pass_observer->DumpDisassembly();
   return compiled_method;
 }
@@ -540,6 +555,8 @@
   CodeVectorAllocator allocator;
   codegen->CompileBaseline(&allocator);
 
+  ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
+
   std::vector<uint8_t> mapping_table;
   codegen->BuildMappingTable(&mapping_table);
   DefaultSrcMap src_mapping_table;
@@ -567,7 +584,7 @@
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-      ArrayRef<const LinkerPatch>());
+      ArrayRef<const LinkerPatch>(linker_patches));
   pass_observer->DumpDisassembly();
   return compiled_method;
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 5653612..99736e9 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -748,10 +748,6 @@
       // to compile the boot image with Quick, and the rest with Optimizing.
       compiler_kind_ = image_ ? Compiler::kQuick : Compiler::kOptimizing;
     }
-    if (compiler_kind_ == Compiler::kOptimizing) {
-      // Optimizing only supports PIC mode.
-      parser_options->compile_pic = true;
-    }
 
     if (oat_filename_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");