Record profiling information before Jitting.

- Add a new instrumentation kind to record dynamic invokes.
- Use the JNI entry point field to store the profiling data.
- Record seen receivers for every dynamic invoke.

Change-Id: I2c1738ab2a72052d45964d055dc16b44b906e54c
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 963eecb..995a1d5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -99,6 +99,7 @@
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
   lambda/closure.cc \
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index cfd7fcd..a84c20a 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -26,6 +26,7 @@
 #include "dex_file.h"
 #include "dex_file-inl.h"
 #include "gc_root-inl.h"
+#include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
@@ -545,6 +546,10 @@
   }
 
   visitor.VisitRootIfNonNull(declaring_class_.AddressWithoutBarrier());
+  ProfilingInfo* profiling_info = GetProfilingInfo();
+  if (hotness_count_ != 0 && !IsNative() && profiling_info != nullptr) {
+    profiling_info->VisitRoots(visitor);
+  }
 }
 
 inline void ArtMethod::CopyFrom(const ArtMethod* src, size_t image_pointer_size) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 64416d2..5dbea52 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -30,6 +30,7 @@
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jit/profiling_info.h"
 #include "jni_internal.h"
 #include "mapping_table.h"
 #include "mirror/abstract_method.h"
@@ -579,4 +580,16 @@
   return oat_method.GetVmapTable();
 }
 
+ProfilingInfo* ArtMethod::CreateProfilingInfo() {
+  ProfilingInfo* info = ProfilingInfo::Create(this);
+  MemberOffset offset = ArtMethod::EntryPointFromJniOffset(sizeof(void*));
+  uintptr_t pointer = reinterpret_cast<uintptr_t>(this) + offset.Uint32Value();
+  if (!reinterpret_cast<Atomic<ProfilingInfo*>*>(pointer)->
+          CompareExchangeStrongSequentiallyConsistent(nullptr, info)) {
+    return GetProfilingInfo();
+  } else {
+    return info;
+  }
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index e0b11d0..3f2161f 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -33,6 +33,7 @@
 namespace art {
 
 union JValue;
+class ProfilingInfo;
 class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
 class ShadowFrame;
@@ -389,16 +390,25 @@
         PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
+  ProfilingInfo* CreateProfilingInfo() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ProfilingInfo* GetProfilingInfo() {
+    return reinterpret_cast<ProfilingInfo*>(GetEntryPointFromJni());
+  }
+
   void* GetEntryPointFromJni() {
     return GetEntryPointFromJniPtrSize(sizeof(void*));
   }
+
   ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size) {
     return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
   }
 
   void SetEntryPointFromJni(const void* entrypoint) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsNative());
     SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*));
   }
+
   ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size) {
     SetNativePointer(EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
   }
@@ -523,6 +533,10 @@
   ALWAYS_INLINE GcRoot<mirror::Class>* GetDexCacheResolvedTypes(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  uint16_t IncrementCounter() {
+    return ++hotness_count_;
+  }
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
@@ -544,7 +558,11 @@
   // Entry within a dispatch table for this method. For static/direct methods the index is into
   // the declaringClass.directMethods, for virtual methods the vtable and for interface methods the
   // ifTable.
-  uint32_t method_index_;
+  uint16_t method_index_;
+
+  // The hotness we measure for this method. Incremented by the interpreter. Not atomic, as we allow
+  // missing increments: if the method is hot, we will see it eventually.
+  uint16_t hotness_count_;
 
   // Fake padding field gets inserted here.
 
@@ -558,7 +576,8 @@
     // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
     GcRoot<mirror::Class>* dex_cache_resolved_types_;
 
-    // Pointer to JNI function registered to this method, or a function to resolve the JNI function.
+    // Pointer to JNI function registered to this method, or a function to resolve the JNI function,
+    // or the profiling data for non-native methods.
     void* entry_point_from_jni_;
 
     // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index e1aca2f..c3bd575 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -218,6 +218,17 @@
                << " " << dex_pc_offset;
   }
 
+  // We only care about invokes in the Jit.
+  void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
+                                mirror::Object*,
+                                ArtMethod* method,
+                                uint32_t dex_pc,
+                                ArtMethod*)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    LOG(ERROR) << "Unexpected invoke event in debugger " << PrettyMethod(method)
+               << " " << dex_pc;
+  }
+
  private:
   static bool IsReturn(ArtMethod* method, uint32_t dex_pc)
       SHARED_REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 63c02ed..973cd7d 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -407,6 +407,10 @@
     backward_branch_listeners_.push_back(listener);
     have_backward_branch_listeners_ = true;
   }
+  if (HasEvent(kInvokeVirtualOrInterface, events)) {
+    invoke_virtual_or_interface_listeners_.push_back(listener);
+    have_invoke_virtual_or_interface_listeners_ = true;
+  }
   if (HasEvent(kDexPcMoved, events)) {
     std::list<InstrumentationListener*>* modified;
     if (have_dex_pc_listeners_) {
@@ -466,13 +470,17 @@
     have_method_exit_listeners_ = !method_exit_listeners_.empty();
   }
   if (HasEvent(kMethodUnwind, events) && have_method_unwind_listeners_) {
-      method_unwind_listeners_.remove(listener);
-      have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
+    method_unwind_listeners_.remove(listener);
+    have_method_unwind_listeners_ = !method_unwind_listeners_.empty();
   }
   if (HasEvent(kBackwardBranch, events) && have_backward_branch_listeners_) {
-      backward_branch_listeners_.remove(listener);
-      have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
-    }
+    backward_branch_listeners_.remove(listener);
+    have_backward_branch_listeners_ = !backward_branch_listeners_.empty();
+  }
+  if (HasEvent(kInvokeVirtualOrInterface, events) && have_invoke_virtual_or_interface_listeners_) {
+    invoke_virtual_or_interface_listeners_.remove(listener);
+    have_invoke_virtual_or_interface_listeners_ = !invoke_virtual_or_interface_listeners_.empty();
+  }
   if (HasEvent(kDexPcMoved, events) && have_dex_pc_listeners_) {
     std::list<InstrumentationListener*>* modified =
         new std::list<InstrumentationListener*>(*dex_pc_listeners_.get());
@@ -908,6 +916,16 @@
   }
 }
 
+void Instrumentation::InvokeVirtualOrInterfaceImpl(Thread* thread,
+                                                   mirror::Object* this_object,
+                                                   ArtMethod* caller,
+                                                   uint32_t dex_pc,
+                                                   ArtMethod* callee) const {
+  for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
+    listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+  }
+}
+
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          ArtMethod* method, uint32_t dex_pc,
                                          ArtField* field) const {
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 93ff567..6711ac3 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -97,6 +97,14 @@
   // Call-back for when we get a backward branch.
   virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+
+  // Call-back for when we get an invokevirtual or an invokeinterface.
+  virtual void InvokeVirtualOrInterface(Thread* thread,
+                                        mirror::Object* this_object,
+                                        ArtMethod* caller,
+                                        uint32_t dex_pc,
+                                        ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 };
 
 // Instrumentation is a catch-all for when extra information is required from the runtime. The
@@ -114,6 +122,7 @@
     kFieldWritten = 0x20,
     kExceptionCaught = 0x40,
     kBackwardBranch = 0x80,
+    kInvokeVirtualOrInterface = 0x100,
   };
 
   enum class InstrumentationLevel {
@@ -257,6 +266,10 @@
     return have_backward_branch_listeners_;
   }
 
+  bool HasInvokeVirtualOrInterfaceListeners() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return have_invoke_virtual_or_interface_listeners_;
+  }
+
   bool IsActive() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return have_dex_pc_listeners_ || have_method_entry_listeners_ || have_method_exit_listeners_ ||
         have_field_read_listeners_ || have_field_write_listeners_ ||
@@ -325,6 +338,17 @@
     }
   }
 
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee) const
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (UNLIKELY(HasInvokeVirtualOrInterfaceListeners())) {
+      InvokeVirtualOrInterfaceImpl(thread, this_object, caller, dex_pc, callee);
+    }
+  }
+
   // Inform listeners that an exception was caught.
   void ExceptionCaughtEvent(Thread* thread, mirror::Throwable* exception_object) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -385,6 +409,12 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void BackwardBranchImpl(Thread* thread, ArtMethod* method, int32_t offset) const
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void InvokeVirtualOrInterfaceImpl(Thread* thread,
+                                    mirror::Object* this_object,
+                                    ArtMethod* caller,
+                                    uint32_t dex_pc,
+                                    ArtMethod* callee) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                            ArtMethod* method, uint32_t dex_pc,
                            ArtField* field) const
@@ -451,6 +481,9 @@
   // Do we have any backward branch listeners? Short-cut to avoid taking the instrumentation_lock_.
   bool have_backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
 
+  // Do we have any invoke listeners? Short-cut to avoid taking the instrumentation_lock_.
+  bool have_invoke_virtual_or_interface_listeners_ GUARDED_BY(Locks::mutator_lock_);
+
   // Contains the instrumentation level required by each client of the instrumentation identified
   // by a string key.
   typedef SafeMap<const char*, InstrumentationLevel> InstrumentationLevelTable;
@@ -461,6 +494,8 @@
   std::list<InstrumentationListener*> method_exit_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> method_unwind_listeners_ GUARDED_BY(Locks::mutator_lock_);
   std::list<InstrumentationListener*> backward_branch_listeners_ GUARDED_BY(Locks::mutator_lock_);
+  std::list<InstrumentationListener*> invoke_virtual_or_interface_listeners_
+      GUARDED_BY(Locks::mutator_lock_);
   std::shared_ptr<std::list<InstrumentationListener*>> dex_pc_listeners_
       GUARDED_BY(Locks::mutator_lock_);
   std::shared_ptr<std::list<InstrumentationListener*>> field_read_listeners_
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index 56fe9ef..c7cc68a 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -36,7 +36,8 @@
     : received_method_enter_event(false), received_method_exit_event(false),
       received_method_unwind_event(false), received_dex_pc_moved_event(false),
       received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_backward_branch_event(false) {}
+      received_exception_caught_event(false), received_backward_branch_event(false),
+      received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
 
@@ -105,6 +106,15 @@
     received_backward_branch_event = true;
   }
 
+  void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
+                                mirror::Object* this_object ATTRIBUTE_UNUSED,
+                                ArtMethod* caller ATTRIBUTE_UNUSED,
+                                uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                ArtMethod* callee ATTRIBUTE_UNUSED)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
+    received_invoke_virtual_or_interface_event = true;
+  }
+
   void Reset() {
     received_method_enter_event = false;
     received_method_exit_event = false;
@@ -114,6 +124,7 @@
     received_field_written_event = false;
     received_exception_caught_event = false;
     received_backward_branch_event = false;
+    received_invoke_virtual_or_interface_event = false;
   }
 
   bool received_method_enter_event;
@@ -124,6 +135,7 @@
   bool received_field_written_event;
   bool received_exception_caught_event;
   bool received_backward_branch_event;
+  bool received_invoke_virtual_or_interface_event;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(TestInstrumentationListener);
@@ -287,6 +299,8 @@
         return instr->HasExceptionCaughtListeners();
       case instrumentation::Instrumentation::kBackwardBranch:
         return instr->HasBackwardBranchListeners();
+      case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
+        return instr->HasInvokeVirtualOrInterfaceListeners();
       default:
         LOG(FATAL) << "Unknown instrumentation event " << event_type;
         UNREACHABLE();
@@ -330,6 +344,9 @@
       case instrumentation::Instrumentation::kBackwardBranch:
         instr->BackwardBranch(self, method, dex_pc);
         break;
+      case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
+        instr->InvokeVirtualOrInterface(self, obj, method, dex_pc, method);
+        break;
       default:
         LOG(FATAL) << "Unknown instrumentation event " << event_type;
         UNREACHABLE();
@@ -355,6 +372,8 @@
         return listener.received_exception_caught_event;
       case instrumentation::Instrumentation::kBackwardBranch:
         return listener.received_backward_branch_event;
+      case instrumentation::Instrumentation::kInvokeVirtualOrInterface:
+        return listener.received_invoke_virtual_or_interface_event;
       default:
         LOG(FATAL) << "Unknown instrumentation event " << event_type;
         UNREACHABLE();
@@ -418,6 +437,10 @@
   TestEvent(instrumentation::Instrumentation::kBackwardBranch);
 }
 
+TEST_F(InstrumentationTest, InvokeVirtualOrInterfaceEvent) {
+  TestEvent(instrumentation::Instrumentation::kInvokeVirtualOrInterface);
+}
+
 TEST_F(InstrumentationTest, DeoptimizeDirectMethod) {
   ScopedObjectAccess soa(Thread::Current());
   jobject class_loader = LoadDex("Instrumentation");
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index fdefb9f..7398778 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -265,6 +265,13 @@
     result->SetJ(0);
     return false;
   } else {
+    if (type == kVirtual || type == kInterface) {
+      instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+      if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
+        instrumentation->InvokeVirtualOrInterface(
+            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+    }
     return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
                                              result);
   }
@@ -297,6 +304,11 @@
     result->SetJ(0);
     return false;
   } else {
+    instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
+      instrumentation->InvokeVirtualOrInterface(
+          self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
+    }
     // No need to check since we've been quickened.
     return DoCall<is_range, false>(called_method, self, shadow_frame, inst, inst_data, result);
   }
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 26a4fe4..683b2cf 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -39,6 +39,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
   jit_options->compile_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+  jit_options->warmup_threshold_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
   return jit_options;
@@ -160,17 +162,19 @@
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
   CHECK_GT(compile_threshold, 0U);
   Runtime* const runtime = Runtime::Current();
   runtime->GetThreadList()->SuspendAll(__FUNCTION__);
   // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
   // something.
-  instrumentation_cache_.reset(new jit::JitInstrumentationCache(compile_threshold));
+  instrumentation_cache_.reset(
+      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
   runtime->GetInstrumentation()->AddListener(
       new jit::JitInstrumentationListener(instrumentation_cache_.get()),
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch);
+      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kInvokeVirtualOrInterface);
   runtime->GetThreadList()->ResumeAll();
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index ca6e7ea..643bc23 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -43,13 +43,14 @@
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
-  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 1 : 1000;
+  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000;
+  static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
   bool CompileMethod(ArtMethod* method, Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold);
+  void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
   void CreateThreadPool();
   CompilerCallbacks* GetCompilerCallbacks() {
     return compiler_callbacks_;
@@ -95,6 +96,9 @@
   size_t GetCompileThreshold() const {
     return compile_threshold_;
   }
+  size_t GetWarmupThreshold() const {
+    return warmup_threshold_;
+  }
   size_t GetCodeCacheCapacity() const {
     return code_cache_capacity_;
   }
@@ -112,6 +116,7 @@
   bool use_jit_;
   size_t code_cache_capacity_;
   size_t compile_threshold_;
+  size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
 
   JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index cd5f4cb..4c53162 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -82,9 +82,19 @@
   return code_cache_ptr_ - size;
 }
 
+uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
+  MutexLock mu(self, lock_);
+  size = RoundUp(size, sizeof(void*));
+  if (size > DataCacheRemain()) {
+    return nullptr;
+  }
+  data_cache_ptr_ += size;
+  return data_cache_ptr_ - size;
+}
+
 uint8_t* JitCodeCache::AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end) {
   MutexLock mu(self, lock_);
-  const size_t size = end - begin;
+  const size_t size = RoundUp(end - begin, sizeof(void*));
   if (size > DataCacheRemain()) {
     return nullptr;  // Out of space in the data cache.
   }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 9707f6f..f485e4a 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -86,6 +86,9 @@
   // Reserve a region of code of size at least "size". Returns null if there is no more room.
   uint8_t* ReserveCode(Thread* self, size_t size) REQUIRES(!lock_);
 
+  // Reserve a region of data of size at least "size". Returns null if there is no more room.
+  uint8_t* ReserveData(Thread* self, size_t size) REQUIRES(!lock_);
+
   // Add a data array of size (end - begin) with the associated contents, returns null if there
   // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 258c29d..f485682 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -26,16 +26,12 @@
 
 class JitCompileTask : public Task {
  public:
-  JitCompileTask(ArtMethod* method, JitInstrumentationCache* cache)
-      : method_(method), cache_(cache) {
-  }
+  explicit JitCompileTask(ArtMethod* method) : method_(method) {}
 
   virtual void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
     VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-    if (Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
-      cache_->SignalCompiled(self, method_);
-    } else {
+    if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
       VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
     }
   }
@@ -46,13 +42,14 @@
 
  private:
   ArtMethod* const method_;
-  JitInstrumentationCache* const cache_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
 };
 
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold)
-    : lock_("jit instrumentation lock"), hot_method_threshold_(hot_method_threshold) {
+JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
+                                                 size_t warm_method_threshold)
+    : hot_method_threshold_(hot_method_threshold),
+      warm_method_threshold_(warm_method_threshold) {
 }
 
 void JitInstrumentationCache::CreateThreadPool() {
@@ -60,20 +57,11 @@
 }
 
 void JitInstrumentationCache::DeleteThreadPool() {
+  DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
   thread_pool_.reset();
 }
 
-void JitInstrumentationCache::SignalCompiled(Thread* self, ArtMethod* method) {
-  ScopedObjectAccessUnchecked soa(self);
-  jmethodID method_id = soa.EncodeMethod(method);
-  MutexLock mu(self, lock_);
-  auto it = samples_.find(method_id);
-  if (it != samples_.end()) {
-    samples_.erase(it);
-  }
-}
-
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t count) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
   ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
@@ -81,34 +69,21 @@
       Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
     return;
   }
-  jmethodID method_id = soa.EncodeMethod(method);
-  bool is_hot = false;
-  {
-    MutexLock mu(self, lock_);
-    size_t sample_count = 0;
-    auto it = samples_.find(method_id);
-    if (it != samples_.end()) {
-      it->second += count;
-      sample_count = it->second;
-    } else {
-      sample_count = count;
-      samples_.insert(std::make_pair(method_id, count));
-    }
-    // If we have enough samples, mark as hot and request Jit compilation.
-    if (sample_count >= hot_method_threshold_ && sample_count - count < hot_method_threshold_) {
-      is_hot = true;
+  if (thread_pool_.get() == nullptr) {
+    DCHECK(Runtime::Current()->IsShuttingDown(self));
+    return;
+  }
+  uint16_t sample_count = method->IncrementCounter();
+  if (sample_count == warm_method_threshold_) {
+    ProfilingInfo* info = method->CreateProfilingInfo();
+    if (info != nullptr) {
+      VLOG(jit) << "Start profiling " << PrettyMethod(method);
     }
   }
-  if (is_hot) {
-    if (thread_pool_.get() != nullptr) {
-      thread_pool_->AddTask(self, new JitCompileTask(
-          method->GetInterfaceMethodIfProxy(sizeof(void*)), this));
-      thread_pool_->StartWorkers(self);
-    } else {
-      VLOG(jit) << "Compiling hot method " << PrettyMethod(method);
-      Runtime::Current()->GetJit()->CompileMethod(
-          method->GetInterfaceMethodIfProxy(sizeof(void*)), self);
-    }
+  if (sample_count == hot_method_threshold_) {
+    thread_pool_->AddTask(self, new JitCompileTask(
+        method->GetInterfaceMethodIfProxy(sizeof(void*))));
+    thread_pool_->StartWorkers(self);
   }
 }
 
@@ -117,5 +92,17 @@
   CHECK(instrumentation_cache_ != nullptr);
 }
 
+void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
+                                                          mirror::Object* this_object,
+                                                          ArtMethod* caller,
+                                                          uint32_t dex_pc,
+                                                          ArtMethod* callee ATTRIBUTE_UNUSED) {
+  DCHECK(this_object != nullptr);
+  ProfilingInfo* info = caller->GetProfilingInfo();
+  if (info != nullptr) {
+    info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 0deaf8a..6fdef65 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -45,18 +45,15 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
  public:
-  explicit JitInstrumentationCache(size_t hot_method_threshold);
+  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
   void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
-  void SignalCompiled(Thread* self, ArtMethod* method)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool();
 
  private:
-  Mutex lock_;
-  std::unordered_map<jmethodID, size_t> samples_;
   size_t hot_method_threshold_;
+  size_t warm_method_threshold_;
   std::unique_ptr<ThreadPool> thread_pool_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
@@ -66,37 +63,43 @@
  public:
   explicit JitInstrumentationListener(JitInstrumentationCache* cache);
 
-  virtual void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
-                             ArtMethod* method, uint32_t /*dex_pc*/)
+  void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
+                     ArtMethod* method, uint32_t /*dex_pc*/)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     instrumentation_cache_->AddSamples(thread, method, 1);
   }
-  virtual void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            const JValue& /*return_value*/)
+  void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                    const JValue& /*return_value*/)
       OVERRIDE { }
-  virtual void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
-  virtual void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                         ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                         ArtField* /*field*/) OVERRIDE { }
-  virtual void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            ArtField* /*field*/, const JValue& /*field_value*/)
+  void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
+  void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                 ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                 ArtField* /*field*/) OVERRIDE { }
+  void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                    ArtField* /*field*/, const JValue& /*field_value*/)
       OVERRIDE { }
-  virtual void ExceptionCaught(Thread* /*thread*/,
-                               mirror::Throwable* /*exception_object*/) OVERRIDE { }
+  void ExceptionCaught(Thread* /*thread*/,
+                       mirror::Throwable* /*exception_object*/) OVERRIDE { }
 
-  virtual void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
-                          ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
+  void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
+                  ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
-  // We only care about how many dex instructions were executed in the Jit.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     CHECK_LE(dex_pc_offset, 0);
     instrumentation_cache_->AddSamples(thread, method, 1);
   }
 
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   JitInstrumentationCache* const instrumentation_cache_;
 
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
new file mode 100644
index 0000000..0c039f2
--- /dev/null
+++ b/runtime/jit/profiling_info.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profiling_info.h"
+
+#include "art_method-inl.h"
+#include "dex_instruction.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+
+namespace art {
+
+ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+  // Walk over the dex instructions of the method and keep track of
+  // instructions we are interested in profiling.
+  const uint16_t* code_ptr = nullptr;
+  const uint16_t* code_end = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(!method->IsNative());
+    const DexFile::CodeItem& code_item = *method->GetCodeItem();
+    code_ptr = code_item.insns_;
+    code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+  }
+
+  uint32_t dex_pc = 0;
+  std::vector<uint32_t> entries;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    switch (instruction.Opcode()) {
+      case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_VIRTUAL_QUICK:
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      case Instruction::INVOKE_INTERFACE:
+      case Instruction::INVOKE_INTERFACE_RANGE:
+        entries.push_back(dex_pc);
+        break;
+
+      default:
+        break;
+    }
+    dex_pc += instruction.SizeInCodeUnits();
+    code_ptr += instruction.SizeInCodeUnits();
+  }
+
+  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
+  // object, it will never be filled.
+  if (entries.empty()) {
+    return nullptr;
+  }
+
+  // Allocate the `ProfilingInfo` object int the JIT's data space.
+  jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
+  size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
+  uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
+
+  if (data == nullptr) {
+    VLOG(jit) << "Cannot allocate profiling info anymore";
+    return nullptr;
+  }
+
+  return new (data) ProfilingInfo(entries);
+}
+
+void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = nullptr;
+  // TODO: binary search if array is too long.
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    if (cache_[i].dex_pc == dex_pc) {
+      cache = &cache_[i];
+      break;
+    }
+  }
+  DCHECK(cache != nullptr);
+
+  ScopedObjectAccess soa(self);
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+    if (existing == cls) {
+      // Receiver type is already in the cache, nothing else to do.
+      return;
+    } else if (existing == nullptr) {
+      // Cache entry is empty, try to put `cls` in it.
+      GcRoot<mirror::Class> expected_root(nullptr);
+      GcRoot<mirror::Class> desired_root(cls);
+      if (!reinterpret_cast<Atomic<GcRoot<mirror::Class>>*>(&cache->classes_[i])->
+              CompareExchangeStrongSequentiallyConsistent(expected_root, desired_root)) {
+        // Some other thread put a class in the cache, continue iteration starting at this
+        // entry in case the entry contains `cls`.
+        --i;
+      } else {
+        // We successfully set `cls`, just return.
+        return;
+      }
+    }
+  }
+  // Unsuccessfull - cache is full, making it megamorphic.
+  DCHECK(cache->IsMegamorphic());
+}
+
+}  // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
new file mode 100644
index 0000000..73ca41a
--- /dev/null
+++ b/runtime/jit/profiling_info.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_PROFILING_INFO_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "gc_root.h"
+
+namespace art {
+
+class ArtMethod;
+
+namespace mirror {
+class Class;
+}
+
+/**
+ * Profiling info for a method, created and filled by the interpreter once the
+ * method is warm, and used by the compiler to drive optimizations.
+ */
+class ProfilingInfo {
+ public:
+  static ProfilingInfo* Create(ArtMethod* method);
+
+  // Add information from an executed INVOKE instruction to the profile.
+  void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+
+  // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
+  template<typename RootVisitorType>
+  void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      InlineCache* cache = &cache_[i];
+      for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
+        visitor.VisitRootIfNonNull(cache->classes_[j].AddressWithoutBarrier());
+      }
+    }
+  }
+
+ private:
+  // Structure to store the classes seen at runtime for a specific instruction.
+  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+  struct InlineCache {
+    bool IsMonomorphic() const {
+      DCHECK_GE(kIndividualCacheSize, 2);
+      return !classes_[0].IsNull() && classes_[1].IsNull();
+    }
+
+    bool IsMegamorphic() const {
+      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+        if (classes_[i].IsNull()) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+    bool IsUnitialized() const {
+      return classes_[0].IsNull();
+    }
+
+    bool IsPolymorphic() const {
+      DCHECK_GE(kIndividualCacheSize, 3);
+      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+    }
+
+    static constexpr uint16_t kIndividualCacheSize = 5;
+    uint32_t dex_pc;
+    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+  };
+
+  explicit ProfilingInfo(const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()) {
+    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      cache_[i].dex_pc = entries[i];
+    }
+  }
+
+  // Number of instructions we are profiling in the ArtMethod.
+  const uint32_t number_of_inline_caches_;
+
+  // Dynamically allocated array of size `number_of_inline_caches_`.
+  InlineCache cache_[0];
+
+  DISALLOW_COPY_AND_ASSIGN(ProfilingInfo);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILING_INFO_H_
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 25b5e49..50e2053 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -158,6 +158,9 @@
       .Define("-Xjitthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITCompileThreshold)
+      .Define("-Xjitwarmupthreshold:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITWarmupThreshold)
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 4797564..7c71e13 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1749,7 +1749,8 @@
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() != nullptr) {
     compiler_callbacks_ = jit_->GetCompilerCallbacks();
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold());
+    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
+                                     jit_options_->GetWarmupThreshold());
     jit_->CreateThreadPool();
   } else {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 02ed3a2..d88e84b 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -68,6 +68,7 @@
 RUNTIME_OPTIONS_KEY (bool,                EnableHSpaceCompactForOOM,      true)
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity,           jit::JitCodeCache::kDefaultCapacity)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 4ab5c0e..d629ce6 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -806,6 +806,15 @@
   LOG(ERROR) << "Unexpected backward branch event in tracing" << PrettyMethod(method);
 }
 
+void Trace::InvokeVirtualOrInterface(Thread*,
+                                     mirror::Object*,
+                                     ArtMethod* method,
+                                     uint32_t dex_pc,
+                                     ArtMethod*) {
+  LOG(ERROR) << "Unexpected invoke event in tracing" << PrettyMethod(method)
+             << " " << dex_pc;
+}
+
 void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff) {
   if (UseThreadCpuClock()) {
     uint64_t clock_base = thread->GetTraceClockBase();
diff --git a/runtime/trace.h b/runtime/trace.h
index 04be3dd..87a691d 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -166,6 +166,12 @@
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   // Reuse an old stack trace if it exists, otherwise allocate a new one.
   static std::vector<ArtMethod*>* AllocStackTrace();
   // Clear and store an old stack trace for later use.