Add sampling profiler

This adds a sampling profiler thread that runs every so often, gathering
profiling data and writing the results to a file in /data/data (specific to
app running).  The intention is to use these files as input to the compiler
so that it can determine the best methods to compile.

Bug: 11539952
Change-Id: I0bfbb4146fb7966673c792f017ffac8107b6272d
diff --git a/runtime/Android.mk b/runtime/Android.mk
index a602c83..576ed1b 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -131,6 +131,7 @@
 	thread_pool.cc \
 	throw_location.cc \
 	trace.cc \
+	profiler.cc \
 	utf.cc \
 	utils.cc \
 	verifier/dex_gc_map.cc \
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index a644998..5f43bec 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -44,11 +44,27 @@
 void Barrier::Increment(Thread* self, int delta) {
   MutexLock mu(self, lock_);
   SetCountLocked(self, count_ + delta);
+
+  // Increment the count.  If it becomes zero after the increment
+  // then all the threads have already passed the barrier.  If
+  // it is non-zero then there is still one or more threads
+  // that have not yet called the Pass function.  When the
+  // Pass function is called by the last thread, the count will
+  // be decremented to zero and a Broadcast will be made on the
+  // condition variable, thus waking this up.
   if (count_ != 0) {
     condition_.Wait(self);
   }
 }
 
+void Barrier::Increment(Thread* self, int delta, uint32_t timeout_ms) {
+  MutexLock mu(self, lock_);
+  SetCountLocked(self, count_ + delta);
+  if (count_ != 0) {
+    condition_.TimedWait(self, timeout_ms, 0);
+  }
+}
+
 void Barrier::SetCountLocked(Thread* self, int count) {
   count_ = count;
   if (count_ == 0) {
diff --git a/runtime/barrier.h b/runtime/barrier.h
index 22f08e1..e335c32 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -41,6 +41,9 @@
   // Increment the count by delta, wait on condition if count is non zero.
   void Increment(Thread* self, int delta);
 
+  // Increment the count by delta, wait on condition if count is non zero, with a timeout
+  void Increment(Thread* self, int delta, uint32_t timeout_ms) LOCKS_EXCLUDED(lock_);
+
  private:
   void SetCountLocked(Thread* self, int count) EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
diff --git a/runtime/locks.cc b/runtime/locks.cc
index 51a40c3..5b462a1 100644
--- a/runtime/locks.cc
+++ b/runtime/locks.cc
@@ -30,6 +30,7 @@
 Mutex* Locks::thread_list_lock_ = NULL;
 Mutex* Locks::thread_suspend_count_lock_ = NULL;
 Mutex* Locks::trace_lock_ = NULL;
+Mutex* Locks::profiler_lock_ = NULL;
 Mutex* Locks::unexpected_signal_lock_ = NULL;
 
 void Locks::Init() {
@@ -44,6 +45,7 @@
     DCHECK(thread_list_lock_ != NULL);
     DCHECK(thread_suspend_count_lock_ != NULL);
     DCHECK(trace_lock_ != NULL);
+    DCHECK(profiler_lock_ != NULL);
     DCHECK(unexpected_signal_lock_ != NULL);
   } else {
     logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
@@ -66,6 +68,8 @@
     thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
     DCHECK(trace_lock_ == NULL);
     trace_lock_ = new Mutex("trace lock", kTraceLock);
+    DCHECK(profiler_lock_ == NULL);
+    profiler_lock_ = new Mutex("profiler lock", kProfilerLock);
     DCHECK(unexpected_signal_lock_ == NULL);
     unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
   }
diff --git a/runtime/locks.h b/runtime/locks.h
index 72d4f65..341319c 100644
--- a/runtime/locks.h
+++ b/runtime/locks.h
@@ -54,6 +54,7 @@
   kThreadListLock,
   kBreakpointInvokeLock,
   kTraceLock,
+  kProfilerLock,
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
@@ -148,8 +149,11 @@
   // Guards trace requests.
   static Mutex* trace_lock_ ACQUIRED_AFTER(breakpoint_lock_);
 
+  // Guards profile objects.
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(trace_lock_);
+
   // Guards lists of classes within the class linker.
-  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(trace_lock_);
+  static ReaderWriterMutex* classlinker_classes_lock_ ACQUIRED_AFTER(profiler_lock_);
 
   // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
   // doesn't try to hold a higher level Mutex.
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 726a8f1..7890071 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -485,6 +485,21 @@
   }
 }
 
+
+/*
+ * This is called by the framework when it knows the application directory and
+ * process name.  We use this information to start up the sampling profiler for
+ * for ART.
+ */
+static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring appDir, jstring procName) {
+  const char *appDirChars = env->GetStringUTFChars(appDir, NULL);
+  const char *procNameChars = env->GetStringUTFChars(procName, NULL);
+  std::string profileFile = std::string(appDirChars) + "/art-profile-" + std::string(procNameChars);
+  Runtime::Current()->StartProfiler(profileFile.c_str());
+  env->ReleaseStringUTFChars(appDir, appDirChars);
+  env->ReleaseStringUTFChars(procName, procNameChars);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMRuntime, addressOf, "!(Ljava/lang/Object;)J"),
   NATIVE_METHOD(VMRuntime, bootClassPath, "()Ljava/lang/String;"),
@@ -506,6 +521,7 @@
   NATIVE_METHOD(VMRuntime, vmVersion, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, vmLibrary, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
+  NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;Ljava/lang/String;)V"),
 };
 
 void register_dalvik_system_VMRuntime(JNIEnv* env) {
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
new file mode 100644
index 0000000..0e73812
--- /dev/null
+++ b/runtime/profiler.cc
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profiler.h"
+
+#include <sys/uio.h>
+
+#include "base/stl_util.h"
+#include "base/unix_file/fd_file.h"
+#include "class_linker.h"
+#include "common_throws.h"
+#include "debugger.h"
+#include "dex_file-inl.h"
+#include "instrumentation.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
+#include "mirror/object_array-inl.h"
+#include "mirror/object-inl.h"
+#include "object_utils.h"
+#include "os.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedLocalRef.h"
+#include "thread.h"
+#include "thread_list.h"
+#if !defined(ART_USE_PORTABLE_COMPILER)
+#include "entrypoints/quick/quick_entrypoints.h"
+#endif
+
+namespace art {
+
+BackgroundMethodSamplingProfiler* BackgroundMethodSamplingProfiler::profiler_ = nullptr;
+pthread_t BackgroundMethodSamplingProfiler::profiler_pthread_ = 0U;
+volatile bool BackgroundMethodSamplingProfiler::shutting_down_ = false;
+
+
+// TODO: this profiler runs regardless of the state of the machine.  Maybe we should use the
+// wakelock or something to modify the run characteristics.  This can be done when we
+// have some performance data after it's been used for a while.
+
+
+// This is called from either a thread list traversal or from a checkpoint.  Regardless
+// of which caller, the mutator lock must be held.
+static void GetSample(Thread* thread, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  BackgroundMethodSamplingProfiler* profiler =
+      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
+  mirror::ArtMethod* method = thread->GetCurrentMethod(nullptr);
+  if (false && method == nullptr) {
+    LOG(INFO) << "No current method available";
+    std::ostringstream os;
+    thread->Dump(os);
+    std::string data(os.str());
+    LOG(INFO) << data;
+  }
+  profiler->RecordMethod(method);
+}
+
+
+
+// A closure that is called by the thread checkpoint code.
+class SampleCheckpoint : public Closure {
+ public:
+  explicit SampleCheckpoint(BackgroundMethodSamplingProfiler* const profiler) :
+    profiler_(profiler) {}
+
+  virtual void Run(Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
+    Thread* self = Thread::Current();
+    if (thread == nullptr) {
+      LOG(ERROR) << "Checkpoint with nullptr thread";
+      return;
+    }
+
+    // Grab the mutator lock (shared access).
+    ScopedObjectAccess soa(self);
+
+    // Grab a sample.
+    GetSample(thread, this->profiler_);
+
+    // And finally tell the barrier that we're done.
+    this->profiler_->GetBarrier().Pass(self);
+  }
+
+ private:
+  BackgroundMethodSamplingProfiler* const profiler_;
+};
+
+bool BackgroundMethodSamplingProfiler::ShuttingDown(Thread* self) {
+  MutexLock mu(self, *Locks::profiler_lock_);
+  return shutting_down_;
+}
+
+void* BackgroundMethodSamplingProfiler::RunProfilerThread(void* arg) {
+  Runtime* runtime = Runtime::Current();
+  BackgroundMethodSamplingProfiler* profiler =
+      reinterpret_cast<BackgroundMethodSamplingProfiler*>(arg);
+
+  // Add a random delay for the first time run so that we don't hammer the CPU
+  // with all profiles running at the same time.
+  const int kRandomDelayMaxSecs = 30;
+  const double kMaxBackoffSecs = 24*60*60;   // Max backoff time.
+
+  srand(MicroTime() * getpid());
+  int startup_delay = rand() % kRandomDelayMaxSecs;   // random delay for startup.
+
+
+  CHECK(runtime->AttachCurrentThread("Profiler", true, runtime->GetSystemThreadGroup(),
+                                      !runtime->IsCompiler()));
+
+  Thread* self = Thread::Current();
+
+  while (true) {
+    if (ShuttingDown(self)) {
+      break;
+    }
+
+    {
+      // wait until we need to run another profile
+      uint64_t delay_secs = profiler->period_s_ * profiler->backoff_factor_;
+
+      // Add a startup delay to prevent all the profiles running at once.
+      delay_secs += startup_delay;
+
+      // Immediate startup for benchmarking?
+      if (profiler->start_immediately_ && startup_delay > 0) {
+        delay_secs = 0;
+      }
+
+      startup_delay = 0;
+
+      LOG(DEBUG) << "Delaying profile start for " << delay_secs << " secs";
+      MutexLock mu(self, profiler->wait_lock_);
+      profiler->period_condition_.TimedWait(self, delay_secs * 1000, 0);
+
+      // Expand the backoff by its coefficient, but don't go beyond the max.
+      double new_backoff = profiler->backoff_factor_ * profiler->backoff_coefficient_;
+      if (new_backoff < kMaxBackoffSecs) {
+        profiler->backoff_factor_ = new_backoff;
+      }
+    }
+
+    if (ShuttingDown(self)) {
+      break;
+    }
+
+
+    uint64_t start_us = MicroTime();
+    uint64_t end_us = start_us + profiler->duration_s_ * 1000000LL;
+    uint64_t now_us = start_us;
+
+    LOG(DEBUG) << "Starting profiling run now for " << PrettyDuration((end_us - start_us) * 1000);
+
+
+    SampleCheckpoint check_point(profiler);
+
+    while (now_us < end_us) {
+      if (ShuttingDown(self)) {
+        break;
+      }
+
+      usleep(profiler->interval_us_);    // Non-interruptible sleep.
+
+      ThreadList* thread_list = runtime->GetThreadList();
+
+      profiler->profiler_barrier_->Init(self, 0);
+      size_t barrier_count = thread_list->RunCheckpoint(&check_point);
+
+      ThreadState old_state = self->SetState(kWaitingForCheckPointsToRun);
+
+      // Wait for the barrier to be crossed by all runnable threads.  This wait
+      // is done with a timeout so that we can detect problems with the checkpoint
+      // running code.  We should never see this.
+      const uint32_t kWaitTimeoutMs = 10000;
+      const uint32_t kWaitTimeoutUs = kWaitTimeoutMs * 1000;
+
+      uint64_t waitstart_us = MicroTime();
+      // Wait for all threads to pass the barrier.
+      profiler->profiler_barrier_->Increment(self, barrier_count, kWaitTimeoutMs);
+      uint64_t waitend_us = MicroTime();
+      uint64_t waitdiff_us = waitend_us - waitstart_us;
+
+      // We should never get a timeout.  If we do, it suggests a problem with the checkpoint
+      // code.  Crash the process in this case.
+      CHECK_LT(waitdiff_us, kWaitTimeoutUs);
+
+      self->SetState(old_state);
+
+      // Update the current time.
+      now_us = MicroTime();
+    }
+
+    if (!ShuttingDown(self)) {
+      // After the profile has been taken, write it out.
+      ScopedObjectAccess soa(self);   // Acquire the mutator lock.
+      uint32_t size = profiler->WriteProfile();
+      LOG(DEBUG) << "Profile size: " << size;
+    }
+  }
+
+  LOG(INFO) << "Profiler shutdown";
+  runtime->DetachCurrentThread();
+  return nullptr;
+}
+
+// Write out the profile file if we are generating a profile.
+uint32_t BackgroundMethodSamplingProfiler::WriteProfile() {
+  UniquePtr<File> profile_file;
+  Runtime* runtime = Runtime::Current();
+  std::string classpath = runtime->GetClassPathString();
+  size_t colon = classpath.find(':');
+  if (colon != std::string::npos) {
+    // More than one file in the classpath.  Possible?
+    classpath = classpath.substr(0, colon);
+  }
+
+  std::replace(classpath.begin(), classpath.end(), '/', '@');
+  std::string full_name = profile_file_name_;
+  if (classpath != "") {
+    full_name = StringPrintf("%s-%s", profile_file_name_.c_str(), classpath.c_str());
+  }
+  LOG(DEBUG) << "Saving profile to " << full_name;
+
+  profile_file.reset(OS::CreateEmptyFile(full_name.c_str()));
+  if (profile_file.get() == nullptr) {
+    // Failed to open the profile file, ignore.
+    LOG(INFO) << "Failed to op file";
+    return 0;
+  }
+  std::ostringstream os;
+  uint32_t num_methods = DumpProfile(os);
+  std::string data(os.str());
+  profile_file->WriteFully(data.c_str(), data.length());
+  profile_file->Close();
+  return num_methods;
+}
+
+// Start a profile thread with the user-supplied arguments.
+void BackgroundMethodSamplingProfiler::Start(int period, int duration,
+                  std::string profile_file_name, int interval_us,
+                  double backoff_coefficient, bool startImmediately) {
+  Thread* self = Thread::Current();
+  {
+    MutexLock mu(self, *Locks::profiler_lock_);
+    // Don't start two profiler threads.
+    if (profiler_ != nullptr) {
+      return;
+    }
+  }
+
+  LOG(INFO) << "Starting profile with period " << period << "s, duration " << duration <<
+      "s, interval " << interval_us << "us.  Profile file " << profile_file_name;
+
+  {
+    MutexLock mu(self, *Locks::profiler_lock_);
+    profiler_ = new BackgroundMethodSamplingProfiler(period, duration, profile_file_name,
+                                      backoff_coefficient,
+                                      interval_us, startImmediately);
+
+    CHECK_PTHREAD_CALL(pthread_create, (&profiler_pthread_, nullptr, &RunProfilerThread,
+        reinterpret_cast<void*>(profiler_)),
+                       "Profiler thread");
+  }
+}
+
+
+
+void BackgroundMethodSamplingProfiler::Stop() {
+  BackgroundMethodSamplingProfiler* profiler = nullptr;
+  pthread_t profiler_pthread = 0U;
+  {
+    MutexLock trace_mu(Thread::Current(), *Locks::profiler_lock_);
+    profiler = profiler_;
+    shutting_down_ = true;
+    profiler_pthread = profiler_pthread_;
+  }
+
+  // Now wake up the sampler thread if it sleeping.
+  {
+    MutexLock profile_mu(Thread::Current(), profiler->wait_lock_);
+    profiler->period_condition_.Signal(Thread::Current());
+  }
+  // Wait for the sample thread to stop.
+  CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profiler thread shutdown");
+
+  {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    profiler_ = nullptr;
+  }
+  delete profiler;
+}
+
+
+void BackgroundMethodSamplingProfiler::Shutdown() {
+  Stop();
+}
+
+BackgroundMethodSamplingProfiler::BackgroundMethodSamplingProfiler(int period, int duration,
+                   std::string profile_file_name,
+                   double backoff_coefficient, int interval_us, bool startImmediately)
+    : profile_file_name_(profile_file_name),
+      period_s_(period), start_immediately_(startImmediately),
+      interval_us_(interval_us), backoff_factor_(1.0),
+      backoff_coefficient_(backoff_coefficient), duration_s_(duration),
+      wait_lock_("Profile wait lock"),
+      period_condition_("Profile condition", wait_lock_),
+      profile_table_(wait_lock_),
+      profiler_barrier_(new Barrier(0)) {
+  // Populate the filtered_methods set.
+  // This is empty right now, but to add a method, do this:
+  //
+  // filtered_methods_.insert("void java.lang.Object.wait(long, int)");
+}
+
+// A method has been hit, record its invocation in the method map.
+// The mutator_lock must be held (shared) when this is called.
+void BackgroundMethodSamplingProfiler::RecordMethod(mirror::ArtMethod* method) {
+  if (method == nullptr) {
+    profile_table_.NullMethod();
+    // Don't record a nullptr method.
+    return;
+  }
+
+  mirror::Class* cls = method->GetDeclaringClass();
+  if (cls != nullptr) {
+    if (cls->GetClassLoader() == nullptr) {
+      // Don't include things in the boot
+      profile_table_.BootMethod();
+      return;
+    }
+  }
+
+  bool is_filtered = false;
+
+  MethodHelper mh(method);
+  if (strcmp(mh.GetName(), "<clinit>") == 0) {
+    // always filter out class init
+    is_filtered = true;
+  }
+
+  // Filter out methods by name if there are any.
+  if (!is_filtered && filtered_methods_.size() > 0) {
+    std::string method_full_name = PrettyMethod(method);
+
+    // Don't include specific filtered methods.
+    is_filtered = filtered_methods_.count(method_full_name) != 0;
+  }
+
+  // Add to the profile table unless it is filtered out.
+  if (!is_filtered) {
+    profile_table_.Put(method);
+  }
+}
+
+// Clean out any recordings for the method traces.
+void BackgroundMethodSamplingProfiler::CleanProfile() {
+  profile_table_.Clear();
+}
+
+uint32_t BackgroundMethodSamplingProfiler::DumpProfile(std::ostream& os) {
+  return profile_table_.Write(os);
+}
+
+// Profile Table.
+// This holds a mapping of mirror::ArtMethod* to a count of how many times a sample
+// hit it at the top of the stack.
+ProfileSampleResults::ProfileSampleResults(Mutex& lock) : lock_(lock), num_samples_(0),
+    num_null_methods_(0),
+    num_boot_methods_(0) {
+  for (int i = 0; i < kHashSize; i++) {
+    table[i] = nullptr;
+  }
+}
+
+ProfileSampleResults::~ProfileSampleResults() {
+  for (int i = 0; i < kHashSize; i++) {
+     delete table[i];
+  }
+}
+
+// Add a method to the profile table.  If it the first time the method
+// has been seen, add it with count=1, otherwise increment the count.
+void ProfileSampleResults::Put(mirror::ArtMethod* method) {
+  lock_.Lock(Thread::Current());
+  uint32_t index = Hash(method);
+  if (table[index] == nullptr) {
+    table[index] = new Map();
+  }
+  Map::iterator i = table[index]->find(method);
+  if (i == table[index]->end()) {
+    (*table[index])[method] = 1;
+  } else {
+    i->second++;
+  }
+  num_samples_++;
+  lock_.Unlock(Thread::Current());
+}
+
+// Write the profile table to the output stream.
+uint32_t ProfileSampleResults::Write(std::ostream &os) {
+  ScopedObjectAccess soa(Thread::Current());
+  LOG(DEBUG) << "Profile: " << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_;
+  os << num_samples_ << "/" << num_null_methods_ << "/" << num_boot_methods_ << "\n";
+  uint32_t num_methods = 0;
+  for (int i = 0 ; i < kHashSize; i++) {
+    Map *map = table[i];
+    if (map != nullptr) {
+      for (const auto &meth_iter : *map) {
+         mirror::ArtMethod *method = meth_iter.first;
+         std::string method_name = PrettyMethod(method);
+         uint32_t method_size = method->GetCodeSize();
+         os << StringPrintf("%s/%u/%u\n",  method_name.c_str(), meth_iter.second, method_size);
+         ++num_methods;
+       }
+    }
+  }
+  return num_methods;
+}
+
+void ProfileSampleResults::Clear() {
+  num_samples_ = 0;
+  num_null_methods_ = 0;
+  num_boot_methods_ = 0;
+  for (int i = 0; i < kHashSize; i++) {
+     delete table[i];
+     table[i] = nullptr;
+  }
+}
+
+uint32_t ProfileSampleResults::Hash(mirror::ArtMethod* method) {
+  uint32_t value = reinterpret_cast<uint32_t>(method);
+  value >>= 2;
+  return value % kHashSize;
+}
+
+}  // namespace art
+
diff --git a/runtime/profiler.h b/runtime/profiler.h
new file mode 100644
index 0000000..e3af47c
--- /dev/null
+++ b/runtime/profiler.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_PROFILER_H_
+#define ART_RUNTIME_PROFILER_H_
+
+#include <ostream>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "globals.h"
+#include "instrumentation.h"
+#include "os.h"
+#include "safe_map.h"
+#include "base/mutex.h"
+#include "locks.h"
+#include "UniquePtr.h"
+#include "barrier.h"
+
+namespace art {
+
+namespace mirror {
+  class ArtMethod;
+  class Class;
+}  // namespace mirror
+class Thread;
+
+
+//
+// This class holds all the results for all runs of the profiler.  It also
+// counts the number of null methods (where we can't determine the method) and
+// the number of methods in the boot path (where we have already compiled the method).
+//
+// This object is an internal profiler object and uses the same locking as the profiler
+// itself.
+class ProfileSampleResults {
+ public:
+  explicit ProfileSampleResults(Mutex& lock);
+  ~ProfileSampleResults();
+
+  void Put(mirror::ArtMethod* method);
+  uint32_t Write(std::ostream &os);
+  void Clear();
+  uint32_t GetNumSamples() { return num_samples_; }
+  void NullMethod() { ++num_null_methods_; }
+  void BootMethod() { ++num_boot_methods_; }
+ private:
+  uint32_t Hash(mirror::ArtMethod* method);
+  static constexpr int kHashSize = 17;
+  Mutex& lock_;         // Reference to the main profiler lock - we don't need two of them.
+  uint32_t num_samples_;          // Total number of samples taken.
+  uint32_t num_null_methods_;     // Number of samples where can don't know the method.
+  uint32_t num_boot_methods_;     // Number of samples in the boot path.
+
+  typedef std::map<mirror::ArtMethod*, uint32_t> Map;   // Map of method vs its count.
+  Map *table[kHashSize];
+};
+
+//
+// The BackgroundMethodSamplingProfiler runs in a thread.  Most of the time it is sleeping but
+// occasionally wakes up and counts the number of times a method is called.  Each time
+// it ticks, it looks at the current method and records it in the ProfileSampleResults
+// table.
+//
+// The timing is controlled by a number of variables:
+// 1.  Period: the time between sampling runs.
+// 2.  Interval: the time between each sample in a run.
+// 3.  Duration: the duration of a run.
+//
+// So the profiler thread is sleeping for the 'period' time.  It wakes up and runs for the
+// 'duration'.  The run consists of a series of samples, each of which is 'interval' microseconds
+// apart.  At the end of a run, it writes the results table to a file and goes back to sleep.
+
+class BackgroundMethodSamplingProfiler {
+ public:
+  static void Start(int period, int duration, std::string profile_filename, int interval_us,
+                    double backoff_coefficient, bool startImmediately)
+  LOCKS_EXCLUDED(Locks::mutator_lock_,
+                 Locks::thread_list_lock_,
+                 Locks::thread_suspend_count_lock_,
+                 Locks::profiler_lock_);
+
+  static void Stop() LOCKS_EXCLUDED(Locks::profiler_lock_, wait_lock_);
+  static void Shutdown() LOCKS_EXCLUDED(Locks::profiler_lock_);
+
+  void RecordMethod(mirror::ArtMethod *method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  Barrier& GetBarrier() {
+    return *profiler_barrier_;
+  }
+
+ private:
+  explicit BackgroundMethodSamplingProfiler(int period, int duration, std::string profile_filename,
+                 double backoff_coefficient, int interval_us, bool startImmediately);
+
+  // The sampling interval in microseconds is passed as an argument.
+  static void* RunProfilerThread(void* arg) LOCKS_EXCLUDED(Locks::profiler_lock_);
+
+  uint32_t WriteProfile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void CleanProfile();
+  uint32_t DumpProfile(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static bool ShuttingDown(Thread* self) LOCKS_EXCLUDED(Locks::profiler_lock_);
+
+  static BackgroundMethodSamplingProfiler* profiler_ GUARDED_BY(Locks::profiler_lock_);
+
+  // We need to shut the sample thread down at exit.  Setting this to true will do that.
+  static volatile bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
+
+  // Sampling thread, non-zero when sampling.
+  static pthread_t profiler_pthread_;
+
+  // Some measure of the number of samples that are significant
+  static constexpr uint32_t kSignificantSamples = 10;
+
+  // File to write profile data out to.  Cannot be empty if we are profiling.
+  std::string profile_file_name_;
+
+  // Number of seconds between profile runs.
+  uint32_t period_s_;
+
+  // Most of the time we want to delay the profiler startup to prevent everything
+  // running at the same time (all processes).  This is the default, but if we
+  // want to override this, set the 'start_immediately_' to true.  This is done
+  // if the -Xprofile option is given on the command line.
+  bool start_immediately_;
+
+  uint32_t interval_us_;
+
+  // A backoff coefficent to adjust the profile period based on time.
+  double backoff_factor_;
+
+  // How much to increase the backoff by on each profile iteration.
+  double backoff_coefficient_;
+
+  // Duration of each profile run.  The profile file will be written at the end
+  // of each run.
+  uint32_t duration_s_;
+
+  // Profile condition support.
+  Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable period_condition_ GUARDED_BY(wait_lock_);
+
+  ProfileSampleResults profile_table_;
+
+  UniquePtr<Barrier> profiler_barrier_;
+
+  // Set of methods to be filtered out.  This will probably be rare because
+  // most of the methods we want to be filtered reside in the boot path and
+  // are automatically filtered.
+  typedef std::set<std::string> FilteredMethods;
+  FilteredMethods filtered_methods_;
+
+  DISALLOW_COPY_AND_ASSIGN(BackgroundMethodSamplingProfiler);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_PROFILER_H_
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 25623a1..5a28b2d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -59,6 +59,7 @@
 #include "thread.h"
 #include "thread_list.h"
 #include "trace.h"
+#include "profiler.h"
 #include "UniquePtr.h"
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
@@ -331,6 +332,24 @@
   return result;
 }
 
+double ParseDoubleOrDie(const std::string& option, const char* prefix,
+                        double min, double max, bool ignore_unrecognized,
+                        double defval) {
+  std::istringstream iss(option.substr(strlen(prefix)));
+  double value;
+  iss >> value;
+  // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
+  const bool sane_val = iss.eof() && (value >= min) && (value <= max);
+  if (!sane_val) {
+    if (ignore_unrecognized) {
+      return defval;
+    }
+    LOG(FATAL)<< "Invalid option '" << option << "'";
+    return defval;
+  }
+  return value;
+}
+
 void Runtime::SweepSystemWeaks(RootVisitor* visitor, void* arg) {
   GetInternTable()->SweepInternTableWeaks(visitor, arg);
   GetMonitorList()->SweepMonitorList(visitor, arg);
@@ -408,6 +427,12 @@
   parsed->method_trace_file_ = "/data/method-trace-file.bin";
   parsed->method_trace_file_size_ = 10 * MB;
 
+  parsed->profile_ = false;
+  parsed->profile_period_s_ = 10;           // Seconds.
+  parsed->profile_duration_s_ = 20;          // Seconds.
+  parsed->profile_interval_us_ = 500;       // Microseconds.
+  parsed->profile_backoff_coefficient_ = 2.0;
+
   for (size_t i = 0; i < options.size(); ++i) {
     const std::string option(options[i].first);
     if (true && options[0].first == "-Xzygote") {
@@ -495,19 +520,9 @@
       }
       parsed->heap_max_free_ = size;
     } else if (StartsWith(option, "-XX:HeapTargetUtilization=")) {
-      std::istringstream iss(option.substr(strlen("-XX:HeapTargetUtilization=")));
-      double value;
-      iss >> value;
-      // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
-      const bool sane_val = iss.eof() && (value >= 0.1) && (value <= 0.9);
-      if (!sane_val) {
-        if (ignore_unrecognized) {
-          continue;
-        }
-        LOG(FATAL) << "Invalid option '" << option << "'";
-        return NULL;
-      }
-      parsed->heap_target_utilization_ = value;
+      parsed->heap_target_utilization_ = ParseDoubleOrDie(option, "-XX:HeapTargetUtilization=",
+          0.1, 0.9, ignore_unrecognized,
+          parsed->heap_target_utilization_);
     } else if (StartsWith(option, "-XX:ParallelGCThreads=")) {
       parsed->parallel_gc_threads_ =
           ParseMemoryOption(option.substr(strlen("-XX:ParallelGCThreads=")).c_str(), 1024);
@@ -631,6 +646,19 @@
       Trace::SetDefaultClockSource(kProfilerClockSourceWall);
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
+    } else if (StartsWith(option, "-Xprofile:")) {
+      parsed->profile_output_filename_ = option.substr(strlen("-Xprofile:"));
+      parsed->profile_ = true;
+    } else if (StartsWith(option, "-Xprofile-period:")) {
+      parsed->profile_period_s_ = ParseIntegerOrDie(option);
+    } else if (StartsWith(option, "-Xprofile-duration:")) {
+      parsed->profile_duration_s_ = ParseIntegerOrDie(option);
+    } else if (StartsWith(option, "-Xprofile-interval:")) {
+      parsed->profile_interval_us_ = ParseIntegerOrDie(option);
+    } else if (StartsWith(option, "-Xprofile-backoff:")) {
+      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(option, "-Xprofile-backoff:",
+          1.0, 10.0, ignore_unrecognized,
+          parsed->profile_backoff_coefficient_);
     } else if (option == "-compiler-filter:interpret-only") {
       parsed->compiler_filter_ = kInterpretOnly;
     } else if (option == "-compiler-filter:space") {
@@ -779,6 +807,11 @@
 
   finished_starting_ = true;
 
+  if (profile_) {
+    // User has asked for a profile using -Xprofile
+    StartProfiler(profile_output_filename_.c_str(), true);
+  }
+
   return true;
 }
 
@@ -970,6 +1003,14 @@
   method_trace_file_ = options->method_trace_file_;
   method_trace_file_size_ = options->method_trace_file_size_;
 
+  // Extract the profile options.
+  profile_period_s_ = options->profile_period_s_;
+  profile_duration_s_ = options->profile_duration_s_;
+  profile_interval_us_ = options->profile_interval_us_;
+  profile_backoff_coefficient_ = options->profile_backoff_coefficient_;
+  profile_ = options->profile_;
+  profile_output_filename_ = options->profile_output_filename_;
+
   if (options->method_trace_) {
     Trace::Start(options->method_trace_file_.c_str(), -1, options->method_trace_file_size_, 0,
                  false, false, 0);
@@ -1401,4 +1442,8 @@
   method_verifiers_.erase(it);
 }
 
+void Runtime::StartProfiler(const char *appDir, bool startImmediately) {
+  BackgroundMethodSamplingProfiler::Start(profile_period_s_, profile_duration_s_, appDir, profile_interval_us_,
+      profile_backoff_coefficient_, startImmediately);
+}
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 7b57dda..50da0dc 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -139,6 +139,12 @@
     size_t tiny_method_threshold_;
     size_t num_dex_methods_threshold_;
     bool sea_ir_mode_;
+    bool profile_;
+    std::string profile_output_filename_;
+    int profile_period_s_;
+    int profile_duration_s_;
+    int profile_interval_us_;
+    double profile_backoff_coefficient_;
 
    private:
     ParsedOptions() {}
@@ -455,6 +461,8 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
+  void StartProfiler(const char *appDir, bool startImmediately = false);
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -566,6 +574,14 @@
   bool stats_enabled_;
   RuntimeStats stats_;
 
+  // Runtime profile support.
+  bool profile_;
+  std::string profile_output_filename_;
+  uint32_t profile_period_s_;                  // Generate profile every n seconds.
+  uint32_t profile_duration_s_;                // Run profile for n seconds.
+  uint32_t profile_interval_us_;                // Microseconds between samples.
+  double profile_backoff_coefficient_;  // Coefficient to exponential backoff.
+
   bool method_trace_;
   std::string method_trace_file_;
   size_t method_trace_file_size_;
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9faa60d..e2d51b7 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -73,7 +73,7 @@
 
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
-ConditionVariable* Thread::resume_cond_ = NULL;
+ConditionVariable* Thread::resume_cond_ = nullptr;
 
 static const char* kThreadNameDuringStartup = "<native thread without managed peer>";
 
@@ -124,7 +124,7 @@
 
 ShadowFrame* Thread::GetAndClearDeoptimizationShadowFrame(JValue* ret_val) {
   ShadowFrame* sf = deoptimization_shadow_frame_;
-  deoptimization_shadow_frame_ = NULL;
+  deoptimization_shadow_frame_ = nullptr;
   ret_val->SetJ(deoptimization_return_value_.GetJ());
   return sf;
 }
@@ -142,14 +142,14 @@
 void* Thread::CreateCallback(void* arg) {
   Thread* self = reinterpret_cast<Thread*>(arg);
   Runtime* runtime = Runtime::Current();
-  if (runtime == NULL) {
+  if (runtime == nullptr) {
     LOG(ERROR) << "Thread attaching to non-existent runtime: " << *self;
-    return NULL;
+    return nullptr;
   }
   {
     // TODO: pass self to MutexLock - requires self to equal Thread::Current(), which is only true
     //       after self->Init().
-    MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
+    MutexLock mu(nullptr, *Locks::runtime_shutdown_lock_);
     // Check that if we got here we cannot be shutting down (as shutdown should never have started
     // while threads are being born).
     CHECK(!runtime->IsShuttingDownLocked());
@@ -160,10 +160,10 @@
     ScopedObjectAccess soa(self);
 
     // Copy peer into self, deleting global reference when done.
-    CHECK(self->jpeer_ != NULL);
+    CHECK(self->jpeer_ != nullptr);
     self->opeer_ = soa.Decode<mirror::Object*>(self->jpeer_);
     self->GetJniEnv()->DeleteGlobalRef(self->jpeer_);
-    self->jpeer_ = NULL;
+    self->jpeer_ = nullptr;
 
     {
       SirtRef<mirror::String> thread_name(self, self->GetThreadName(soa));
@@ -177,14 +177,14 @@
     mirror::ArtMethod* m =
         receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     JValue result;
-    ArgArray arg_array(NULL, 0);
+    ArgArray arg_array(nullptr, 0);
     arg_array.Append(reinterpret_cast<uint32_t>(receiver));
     m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
   }
   // Detach and delete self.
   Runtime::Current()->GetThreadList()->Unregister(self);
 
-  return NULL;
+  return nullptr;
 }
 
 Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa,
@@ -195,7 +195,7 @@
   // to stop it from going away.
   if (kIsDebugBuild) {
     MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
-    if (result != NULL && !result->IsSuspended()) {
+    if (result != nullptr && !result->IsSuspended()) {
       Locks::thread_list_lock_->AssertHeld(soa.Self());
     }
   }
@@ -233,7 +233,7 @@
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
-  CHECK(java_peer != NULL);
+  CHECK(java_peer != nullptr);
   Thread* self = static_cast<JNIEnvExt*>(env)->self;
   Runtime* runtime = Runtime::Current();
 
@@ -279,9 +279,9 @@
     }
     // Manually delete the global reference since Thread::Init will not have been run.
     env->DeleteGlobalRef(child_thread->jpeer_);
-    child_thread->jpeer_ = NULL;
+    child_thread->jpeer_ = nullptr;
     delete child_thread;
-    child_thread = NULL;
+    child_thread = nullptr;
     // TODO: remove from thread group?
     env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
     {
@@ -298,7 +298,7 @@
   // (When we create a new thread from managed code, we allocate the Thread* in Thread::Create so
   // we can handshake with the corresponding native thread when it's ready.) Check this native
   // thread hasn't been through here already...
-  CHECK(Thread::Current() == NULL);
+  CHECK(Thread::Current() == nullptr);
   SetUpAlternateSignalStack();
   InitCpu();
   InitTlsEntryPoints();
@@ -322,15 +322,15 @@
                        bool create_peer) {
   Thread* self;
   Runtime* runtime = Runtime::Current();
-  if (runtime == NULL) {
+  if (runtime == nullptr) {
     LOG(ERROR) << "Thread attaching to non-existent runtime: " << thread_name;
-    return NULL;
+    return nullptr;
   }
   {
-    MutexLock mu(NULL, *Locks::runtime_shutdown_lock_);
+    MutexLock mu(nullptr, *Locks::runtime_shutdown_lock_);
     if (runtime->IsShuttingDownLocked()) {
       LOG(ERROR) << "Thread attaching while runtime is shutting down: " << thread_name;
-      return NULL;
+      return nullptr;
     } else {
       Runtime::Current()->StartThreadBirth();
       self = new Thread(as_daemon);
@@ -350,7 +350,7 @@
     self->CreatePeer(thread_name, as_daemon, thread_group);
   } else {
     // These aren't necessary, but they improve diagnostics for unit tests & command-line tools.
-    if (thread_name != NULL) {
+    if (thread_name != nullptr) {
       self->name_->assign(thread_name);
       ::art::SetThreadName(thread_name);
     }
@@ -364,7 +364,7 @@
   CHECK(runtime->IsStarted());
   JNIEnv* env = jni_env_;
 
-  if (thread_group == NULL) {
+  if (thread_group == nullptr) {
     thread_group = runtime->GetMainThreadGroup();
   }
   ScopedLocalRef<jobject> thread_name(env, env->NewStringUTF(name));
@@ -372,7 +372,7 @@
   jboolean thread_is_daemon = as_daemon;
 
   ScopedLocalRef<jobject> peer(env, env->AllocObject(WellKnownClasses::java_lang_Thread));
-  if (peer.get() == NULL) {
+  if (peer.get() == nullptr) {
     CHECK(IsExceptionPending());
     return;
   }
@@ -393,7 +393,7 @@
 
   ScopedObjectAccess soa(self);
   SirtRef<mirror::String> peer_thread_name(soa.Self(), GetThreadName(soa));
-  if (peer_thread_name.get() == NULL) {
+  if (peer_thread_name.get() == nullptr) {
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
     // available (in the compiler, in tests), we manually assign the
@@ -409,7 +409,7 @@
     peer_thread_name.reset(GetThreadName(soa));
   }
   // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
-  if (peer_thread_name.get() != NULL) {
+  if (peer_thread_name.get() != nullptr) {
     SetThreadName(peer_thread_name->ToModifiedUtf8().c_str());
   }
 }
@@ -495,7 +495,7 @@
 
 mirror::String* Thread::GetThreadName(const ScopedObjectAccessUnchecked& soa) const {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
-  return (opeer_ != NULL) ? reinterpret_cast<mirror::String*>(f->GetObject(opeer_)) : NULL;
+  return (opeer_ != nullptr) ? reinterpret_cast<mirror::String*>(f->GetObject(opeer_)) : nullptr;
 }
 
 void Thread::GetThreadName(std::string& name) const {
@@ -570,12 +570,33 @@
 }
 
 void Thread::RunCheckpointFunction() {
-  CHECK(checkpoint_function_ != NULL);
-  ATRACE_BEGIN("Checkpoint function");
-  checkpoint_function_->Run(this);
-  ATRACE_END();
-  checkpoint_function_ = NULL;
-  AtomicClearFlag(kCheckpointRequest);
+  Closure *checkpoints[kMaxCheckpoints];
+
+  // Grab the suspend_count lock and copy the current set of
+  // checkpoints.  Then clear the list and the flag.  The RequestCheckpoint
+  // function will also grab this lock so we prevent a race between setting
+  // the kCheckpointRequest flag and clearing it.
+  {
+    MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+    for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
+      checkpoints[i] = checkpoint_functions_[i];
+      checkpoint_functions_[i] = nullptr;
+    }
+    AtomicClearFlag(kCheckpointRequest);
+  }
+
+  // Outside the lock, run all the checkpoint functions that
+  // we collected.
+  bool found_checkpoint = false;
+  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
+    if (checkpoints[i] != nullptr) {
+      ATRACE_BEGIN("Checkpoint function");
+      checkpoints[i]->Run(this);
+      ATRACE_END();
+      found_checkpoint = true;
+    }
+  }
+  CHECK(found_checkpoint);
 }
 
 bool Thread::RequestCheckpoint(Closure* function) {
@@ -584,23 +605,34 @@
   if (old_state_and_flags.as_struct.state != kRunnable) {
     return false;  // Fail, thread is suspended and so can't run a checkpoint.
   }
-  if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
-    return false;  // Fail, already a checkpoint pending.
+
+  uint32_t available_checkpoint = kMaxCheckpoints;
+  for (uint32_t i = 0 ; i < kMaxCheckpoints; ++i) {
+    if (checkpoint_functions_[i] == nullptr) {
+      available_checkpoint = i;
+      break;
+    }
   }
-  CHECK(checkpoint_function_ == nullptr);
-  checkpoint_function_ = function;
+  if (available_checkpoint == kMaxCheckpoints) {
+    // No checkpoint functions available, we can't run a checkpoint
+    return false;
+  }
+  checkpoint_functions_[available_checkpoint] = function;
+
   // Checkpoint function installed now install flag bit.
   // We must be runnable to request a checkpoint.
   DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
   union StateAndFlags new_state_and_flags;
   new_state_and_flags.as_int = old_state_and_flags.as_int;
   new_state_and_flags.as_struct.flags |= kCheckpointRequest;
-  int succeeded = android_atomic_cmpxchg(old_state_and_flags.as_int, new_state_and_flags.as_int,
+  int succeeded = android_atomic_acquire_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
                                          &state_and_flags_.as_int);
   if (UNLIKELY(succeeded != 0)) {
     // The thread changed state before the checkpoint was installed.
-    CHECK(checkpoint_function_ == function);
-    checkpoint_function_ = NULL;
+    CHECK_EQ(checkpoint_functions_[available_checkpoint], function);
+    checkpoint_functions_[available_checkpoint] = nullptr;
+  } else {
+    CHECK_EQ(ReadFlag(kCheckpointRequest), true);
   }
   return succeeded == 0;
 }
@@ -622,7 +654,7 @@
   bool is_daemon = false;
   Thread* self = Thread::Current();
 
-  if (self != NULL && thread != NULL && thread->opeer_ != NULL) {
+  if (self != nullptr && thread != nullptr && thread->opeer_ != nullptr) {
     ScopedObjectAccessUnchecked soa(self);
     priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->GetInt(thread->opeer_);
     is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->GetBoolean(thread->opeer_);
@@ -630,12 +662,12 @@
     mirror::Object* thread_group =
         soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(thread->opeer_);
 
-    if (thread_group != NULL) {
+    if (thread_group != nullptr) {
       mirror::ArtField* group_name_field =
           soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
       mirror::String* group_name_string =
           reinterpret_cast<mirror::String*>(group_name_field->GetObject(thread_group));
-      group_name = (group_name_string != NULL) ? group_name_string->ToModifiedUtf8() : "<null>";
+      group_name = (group_name_string != nullptr) ? group_name_string->ToModifiedUtf8() : "<null>";
     }
   } else {
     priority = GetNativePriority();
@@ -646,7 +678,7 @@
     scheduler_group_name = "default";
   }
 
-  if (thread != NULL) {
+  if (thread != nullptr) {
     os << '"' << *thread->name_ << '"';
     if (is_daemon) {
       os << " daemon";
@@ -664,7 +696,7 @@
        << " (not attached)\n";
   }
 
-  if (thread != NULL) {
+  if (thread != nullptr) {
     MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     os << "  | group=\"" << group_name << "\""
        << " sCount=" << thread->suspend_count_
@@ -676,7 +708,7 @@
   os << "  | sysTid=" << tid
      << " nice=" << getpriority(PRIO_PROCESS, tid)
      << " cgrp=" << scheduler_group_name;
-  if (thread != NULL) {
+  if (thread != nullptr) {
     int policy;
     sched_param sp;
     CHECK_PTHREAD_CALL(pthread_getschedparam, (thread->pthread_self_, &policy, &sp), __FUNCTION__);
@@ -705,7 +737,7 @@
      << " stm=" << stime
      << " core=" << task_cpu
      << " HZ=" << sysconf(_SC_CLK_TCK) << "\n";
-  if (thread != NULL) {
+  if (thread != nullptr) {
     os << "  | stack=" << reinterpret_cast<void*>(thread->stack_begin_) << "-" << reinterpret_cast<void*>(thread->stack_end_)
        << " stackSize=" << PrettySize(thread->stack_size_) << "\n";
   }
@@ -719,7 +751,7 @@
   StackDumpVisitor(std::ostream& os, Thread* thread, Context* context, bool can_allocate)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(thread, context), os(os), thread(thread), can_allocate(can_allocate),
-        last_method(NULL), last_line_number(0), repetition_count(0), frame_count(0) {
+        last_method(nullptr), last_line_number(0), repetition_count(0), frame_count(0) {
   }
 
   virtual ~StackDumpVisitor() {
@@ -737,12 +769,12 @@
     mirror::Class* c = m->GetDeclaringClass();
     const mirror::DexCache* dex_cache = c->GetDexCache();
     int line_number = -1;
-    if (dex_cache != NULL) {  // be tolerant of bad input
+    if (dex_cache != nullptr) {  // be tolerant of bad input
       const DexFile& dex_file = *dex_cache->GetDexFile();
       line_number = dex_file.GetLineNumFromPC(m, GetDexPc());
     }
     if (line_number == last_line_number && last_method == m) {
-      repetition_count++;
+      ++repetition_count;
     } else {
       if (repetition_count >= kMaxRepetition) {
         os << "  ... repeated " << (repetition_count - kMaxRepetition) << " times\n";
@@ -758,7 +790,7 @@
       } else {
         mh.ChangeMethod(m);
         const char* source_file(mh.GetDeclaringClassSourceFile());
-        os << "(" << (source_file != NULL ? source_file : "unavailable")
+        os << "(" << (source_file != nullptr ? source_file : "unavailable")
            << ":" << line_number << ")";
       }
       os << "\n";
@@ -808,8 +840,8 @@
   // We don't just check kNative because native methods will be in state kSuspended if they're
   // calling back into the VM, or kBlocked if they're blocked on a monitor, or one of the
   // thread-startup states if it's early enough in their life cycle (http://b/7432159).
-  mirror::ArtMethod* current_method = thread->GetCurrentMethod(NULL);
-  return current_method != NULL && current_method->IsNative();
+  mirror::ArtMethod* current_method = thread->GetCurrentMethod(nullptr);
+  return current_method != nullptr && current_method->IsNative();
 }
 
 void Thread::DumpStack(std::ostream& os) const {
@@ -850,11 +882,11 @@
   {
     // MutexLock to keep annotalysis happy.
     //
-    // Note we use NULL for the thread because Thread::Current can
+    // Note we use nullptr for the thread because Thread::Current can
     // return garbage since (is_started_ == true) and
     // Thread::pthread_key_self_ is not yet initialized.
     // This was seen on glibc.
-    MutexLock mu(NULL, *Locks::thread_suspend_count_lock_);
+    MutexLock mu(nullptr, *Locks::thread_suspend_count_lock_);
     resume_cond_ = new ConditionVariable("Thread resumption condition variable",
                                          *Locks::thread_suspend_count_lock_);
   }
@@ -863,8 +895,8 @@
   CHECK_PTHREAD_CALL(pthread_key_create, (&Thread::pthread_key_self_, Thread::ThreadExitCallback), "self key");
 
   // Double-check the TLS slot allocation.
-  if (pthread_getspecific(pthread_key_self_) != NULL) {
-    LOG(FATAL) << "Newly-created pthread TLS slot is not NULL";
+  if (pthread_getspecific(pthread_key_self_) != nullptr) {
+    LOG(FATAL) << "Newly-created pthread TLS slot is not nullptr";
   }
 }
 
@@ -884,50 +916,49 @@
   is_started_ = false;
   CHECK_PTHREAD_CALL(pthread_key_delete, (Thread::pthread_key_self_), "self key");
   MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_);
-  if (resume_cond_ != NULL) {
+  if (resume_cond_ != nullptr) {
     delete resume_cond_;
-    resume_cond_ = NULL;
+    resume_cond_ = nullptr;
   }
 }
 
 Thread::Thread(bool daemon)
     : suspend_count_(0),
-      card_table_(NULL),
-      exception_(NULL),
-      stack_end_(NULL),
+      card_table_(nullptr),
+      exception_(nullptr),
+      stack_end_(nullptr),
       managed_stack_(),
-      jni_env_(NULL),
-      self_(NULL),
-      opeer_(NULL),
-      jpeer_(NULL),
-      stack_begin_(NULL),
+      jni_env_(nullptr),
+      self_(nullptr),
+      opeer_(nullptr),
+      jpeer_(nullptr),
+      stack_begin_(nullptr),
       stack_size_(0),
       thin_lock_thread_id_(0),
-      stack_trace_sample_(NULL),
+      stack_trace_sample_(nullptr),
       trace_clock_base_(0),
       tid_(0),
       wait_mutex_(new Mutex("a thread wait mutex")),
       wait_cond_(new ConditionVariable("a thread wait condition variable", *wait_mutex_)),
-      wait_monitor_(NULL),
+      wait_monitor_(nullptr),
       interrupted_(false),
-      wait_next_(NULL),
-      monitor_enter_object_(NULL),
-      top_sirt_(NULL),
-      runtime_(NULL),
-      class_loader_override_(NULL),
-      long_jump_context_(NULL),
+      wait_next_(nullptr),
+      monitor_enter_object_(nullptr),
+      top_sirt_(nullptr),
+      runtime_(nullptr),
+      class_loader_override_(nullptr),
+      long_jump_context_(nullptr),
       throwing_OutOfMemoryError_(false),
       debug_suspend_count_(0),
       debug_invoke_req_(new DebugInvokeReq),
       single_step_control_(new SingleStepControl),
-      deoptimization_shadow_frame_(NULL),
+      deoptimization_shadow_frame_(nullptr),
       instrumentation_stack_(new std::deque<instrumentation::InstrumentationStackFrame>),
       name_(new std::string(kThreadNameDuringStartup)),
       daemon_(daemon),
       pthread_self_(0),
       no_thread_suspension_(0),
-      last_no_thread_suspension_cause_(NULL),
-      checkpoint_function_(0),
+      last_no_thread_suspension_cause_(nullptr),
       thread_exit_check_count_(0),
       thread_local_start_(nullptr),
       thread_local_pos_(nullptr),
@@ -938,22 +969,25 @@
   state_and_flags_.as_struct.state = kNative;
   memset(&held_mutexes_[0], 0, sizeof(held_mutexes_));
   memset(rosalloc_runs_, 0, sizeof(rosalloc_runs_));
+  for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
+    checkpoint_functions_[i] = nullptr;
+  }
 }
 
 bool Thread::IsStillStarting() const {
   // You might think you can check whether the state is kStarting, but for much of thread startup,
   // the thread is in kNative; it might also be in kVmWait.
-  // You might think you can check whether the peer is NULL, but the peer is actually created and
+  // You might think you can check whether the peer is nullptr, but the peer is actually created and
   // assigned fairly early on, and needs to be.
   // It turns out that the last thing to change is the thread name; that's a good proxy for "has
   // this thread _ever_ entered kRunnable".
-  return (jpeer_ == NULL && opeer_ == NULL) || (*name_ == kThreadNameDuringStartup);
+  return (jpeer_ == nullptr && opeer_ == nullptr) || (*name_ == kThreadNameDuringStartup);
 }
 
 void Thread::AssertNoPendingException() const {
   if (UNLIKELY(IsExceptionPending())) {
     ScopedObjectAccess soa(Thread::Current());
-    mirror::Throwable* exception = GetException(NULL);
+    mirror::Throwable* exception = GetException(nullptr);
     LOG(FATAL) << "No pending exception expected: " << exception->Dump();
   }
 }
@@ -976,7 +1010,7 @@
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
 
-  if (opeer_ != NULL) {
+  if (opeer_ != nullptr) {
     ScopedObjectAccess soa(self);
     // We may need to call user-supplied managed code, do this before final clean-up.
     HandleUncaughtExceptions(soa);
@@ -999,30 +1033,35 @@
   }
 
   // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
-  if (jni_env_ != NULL) {
+  if (jni_env_ != nullptr) {
     jni_env_->monitors.VisitRoots(MonitorExitVisitor, self);
   }
 }
 
 Thread::~Thread() {
-  if (jni_env_ != NULL && jpeer_ != NULL) {
+  if (jni_env_ != nullptr && jpeer_ != nullptr) {
     // If pthread_create fails we don't have a jni env here.
     jni_env_->DeleteGlobalRef(jpeer_);
-    jpeer_ = NULL;
+    jpeer_ = nullptr;
   }
-  opeer_ = NULL;
+  opeer_ = nullptr;
 
   delete jni_env_;
-  jni_env_ = NULL;
+  jni_env_ = nullptr;
 
   CHECK_NE(GetState(), kRunnable);
+  CHECK_NE(ReadFlag(kCheckpointRequest), true);
+  CHECK(checkpoint_functions_[0] == nullptr);
+  CHECK(checkpoint_functions_[1] == nullptr);
+  CHECK(checkpoint_functions_[2] == nullptr);
+
   // We may be deleting a still born thread.
   SetStateUnsafe(kTerminated);
 
   delete wait_cond_;
   delete wait_mutex_;
 
-  if (long_jump_context_ != NULL) {
+  if (long_jump_context_ != nullptr) {
     delete long_jump_context_;
   }
 
@@ -1052,7 +1091,7 @@
   ScopedLocalRef<jobject> handler(jni_env_,
                                   jni_env_->GetObjectField(peer.get(),
                                                            WellKnownClasses::java_lang_Thread_uncaughtHandler));
-  if (handler.get() == NULL) {
+  if (handler.get() == nullptr) {
     // Otherwise use the thread group's default handler.
     handler.reset(jni_env_->GetObjectField(peer.get(), WellKnownClasses::java_lang_Thread_group));
   }
@@ -1070,7 +1109,7 @@
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
   mirror::Object* ogroup = soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(opeer_);
-  if (ogroup != NULL) {
+  if (ogroup != nullptr) {
     ScopedLocalRef<jobject> group(soa.Env(), soa.AddLocalReference<jobject>(ogroup));
     ScopedLocalRef<jobject> peer(soa.Env(), soa.AddLocalReference<jobject>(opeer_));
     ScopedThreadStateChange tsc(soa.Self(), kNative);
@@ -1101,7 +1140,7 @@
 void Thread::SirtVisitRoots(RootVisitor* visitor, void* arg) {
   for (StackIndirectReferenceTable* cur = top_sirt_; cur; cur = cur->GetLink()) {
     size_t num_refs = cur->NumberOfReferences();
-    for (size_t j = 0; j < num_refs; j++) {
+    for (size_t j = 0; j < num_refs; ++j) {
       mirror::Object* object = cur->GetReference(j);
       if (object != nullptr) {
         const mirror::Object* new_obj = visitor(object, arg);
@@ -1116,8 +1155,8 @@
 
 mirror::Object* Thread::DecodeJObject(jobject obj) const {
   Locks::mutator_lock_->AssertSharedHeld(this);
-  if (obj == NULL) {
-    return NULL;
+  if (obj == nullptr) {
+    return nullptr;
   }
   IndirectRef ref = reinterpret_cast<IndirectRef>(obj);
   IndirectRefKind kind = GetIndirectRefKind(ref);
@@ -1146,13 +1185,13 @@
     DCHECK_EQ(kind, kWeakGlobal);
     result = Runtime::Current()->GetJavaVM()->DecodeWeakGlobal(const_cast<Thread*>(this), ref);
     if (result == kClearedJniWeakGlobal) {
-      // This is a special case where it's okay to return NULL.
+      // This is a special case where it's okay to return nullptr.
       return nullptr;
     }
   }
 
-  if (UNLIKELY(result == NULL)) {
-    JniAbortF(NULL, "use of deleted %s %p", ToStr<IndirectRefKind>(kind).c_str(), obj);
+  if (UNLIKELY(result == nullptr)) {
+    JniAbortF(nullptr, "use of deleted %s %p", ToStr<IndirectRefKind>(kind).c_str(), obj);
   } else {
     if (kIsDebugBuild && (result != kInvalidIndirectRefObject)) {
       Runtime::Current()->GetHeap()->VerifyObject(result);
@@ -1192,7 +1231,7 @@
 }
 
 void Thread::NotifyLocked(Thread* self) {
-  if (wait_monitor_ != NULL) {
+  if (wait_monitor_ != nullptr) {
     wait_cond_->Signal(self);
   }
 }
@@ -1201,7 +1240,7 @@
  public:
   explicit CountStackDepthVisitor(Thread* thread)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, NULL),
+      : StackVisitor(thread, nullptr),
         depth_(0), skip_depth_(0), skipping_(true) {}
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1240,8 +1279,8 @@
 class BuildInternalStackTraceVisitor : public StackVisitor {
  public:
   explicit BuildInternalStackTraceVisitor(Thread* self, Thread* thread, int skip_depth)
-      : StackVisitor(thread, NULL), self_(self),
-        skip_depth_(skip_depth), count_(0), dex_pc_trace_(NULL), method_trace_(NULL) {}
+      : StackVisitor(thread, nullptr), self_(self),
+        skip_depth_(skip_depth), count_(0), dex_pc_trace_(nullptr), method_trace_(nullptr) {}
 
   bool Init(int depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1250,11 +1289,11 @@
         method_trace(self_,
                      Runtime::Current()->GetClassLinker()->AllocObjectArray<mirror::Object>(self_,
                                                                                             depth + 1));
-    if (method_trace.get() == NULL) {
+    if (method_trace.get() == nullptr) {
       return false;
     }
     mirror::IntArray* dex_pc_trace = mirror::IntArray::Alloc(self_, depth);
-    if (dex_pc_trace == NULL) {
+    if (dex_pc_trace == nullptr) {
       return false;
     }
     // Save PC trace in last element of method trace, also places it into the
@@ -1263,20 +1302,20 @@
     // Set the Object*s and assert that no thread suspension is now possible.
     const char* last_no_suspend_cause =
         self_->StartAssertNoThreadSuspension("Building internal stack trace");
-    CHECK(last_no_suspend_cause == NULL) << last_no_suspend_cause;
+    CHECK(last_no_suspend_cause == nullptr) << last_no_suspend_cause;
     method_trace_ = method_trace.get();
     dex_pc_trace_ = dex_pc_trace;
     return true;
   }
 
   virtual ~BuildInternalStackTraceVisitor() {
-    if (method_trace_ != NULL) {
-      self_->EndAssertNoThreadSuspension(NULL);
+    if (method_trace_ != nullptr) {
+      self_->EndAssertNoThreadSuspension(nullptr);
     }
   }
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (method_trace_ == NULL || dex_pc_trace_ == NULL) {
+    if (method_trace_ == nullptr || dex_pc_trace_ == nullptr) {
       return true;  // We're probably trying to fillInStackTrace for an OutOfMemoryError.
     }
     if (skip_depth_ > 0) {
@@ -1320,13 +1359,13 @@
   BuildInternalStackTraceVisitor build_trace_visitor(soa.Self(), const_cast<Thread*>(this),
                                                      skip_depth);
   if (!build_trace_visitor.Init(depth)) {
-    return NULL;  // Allocation failed.
+    return nullptr;  // Allocation failed.
   }
   build_trace_visitor.WalkStack();
   mirror::ObjectArray<mirror::Object>* trace = build_trace_visitor.GetInternalStackTrace();
   if (kIsDebugBuild) {
     for (int32_t i = 0; i < trace->GetLength(); ++i) {
-      CHECK(trace->Get(i) != NULL);
+      CHECK(trace->Get(i) != nullptr);
     }
   }
   return soa.AddLocalReference<jobjectArray>(trace);
@@ -1343,7 +1382,7 @@
 
   jobjectArray result;
 
-  if (output_array != NULL) {
+  if (output_array != nullptr) {
     // Reuse the array we were given.
     result = output_array;
     // ...adjusting the number of frames we'll write to not exceed the array length.
@@ -1354,13 +1393,13 @@
     // Create java_trace array and place in local reference table
     mirror::ObjectArray<mirror::StackTraceElement>* java_traces =
         class_linker->AllocStackTraceElementArray(soa.Self(), depth);
-    if (java_traces == NULL) {
-      return NULL;
+    if (java_traces == nullptr) {
+      return nullptr;
     }
     result = soa.AddLocalReference<jobjectArray>(java_traces);
   }
 
-  if (stack_depth != NULL) {
+  if (stack_depth != nullptr) {
     *stack_depth = depth;
   }
 
@@ -1397,17 +1436,17 @@
       }
     }
     const char* method_name = mh.GetName();
-    CHECK(method_name != NULL);
+    CHECK(method_name != nullptr);
     SirtRef<mirror::String> method_name_object(soa.Self(),
                                                mirror::String::AllocFromModifiedUtf8(soa.Self(),
                                                                                      method_name));
-    if (method_name_object.get() == NULL) {
-      return NULL;
+    if (method_name_object.get() == nullptr) {
+      return nullptr;
     }
     mirror::StackTraceElement* obj = mirror::StackTraceElement::Alloc(
         soa.Self(), class_name_object, method_name_object, source_name_object, line_number);
-    if (obj == NULL) {
-      return NULL;
+    if (obj == nullptr) {
+      return nullptr;
     }
     soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(result)->Set(i, obj);
   }
@@ -1445,7 +1484,7 @@
   SirtRef<mirror::Object> saved_throw_this(this, throw_location.GetThis());
   SirtRef<mirror::ArtMethod> saved_throw_method(this, throw_location.GetMethod());
   // Ignore the cause throw location. TODO: should we report this as a re-throw?
-  SirtRef<mirror::Throwable> cause(this, GetException(NULL));
+  SirtRef<mirror::Throwable> cause(this, GetException(nullptr));
   ClearException();
   Runtime* runtime = Runtime::Current();
 
@@ -1457,7 +1496,7 @@
   SirtRef<mirror::Class>
       exception_class(this, runtime->GetClassLinker()->FindClass(exception_class_descriptor,
                                                                  class_loader));
-  if (UNLIKELY(exception_class.get() == NULL)) {
+  if (UNLIKELY(exception_class.get() == nullptr)) {
     CHECK(IsExceptionPending());
     LOG(ERROR) << "No exception class " << PrettyDescriptor(exception_class_descriptor);
     return;
@@ -1481,21 +1520,21 @@
 
   // Choose an appropriate constructor and set up the arguments.
   const char* signature;
-  SirtRef<mirror::String> msg_string(this, NULL);
-  if (msg != NULL) {
+  SirtRef<mirror::String> msg_string(this, nullptr);
+  if (msg != nullptr) {
     // Ensure we remember this and the method over the String allocation.
     msg_string.reset(mirror::String::AllocFromModifiedUtf8(this, msg));
-    if (UNLIKELY(msg_string.get() == NULL)) {
+    if (UNLIKELY(msg_string.get() == nullptr)) {
       CHECK(IsExceptionPending());  // OOME.
       return;
     }
-    if (cause.get() == NULL) {
+    if (cause.get() == nullptr) {
       signature = "(Ljava/lang/String;)V";
     } else {
       signature = "(Ljava/lang/String;Ljava/lang/Throwable;)V";
     }
   } else {
-    if (cause.get() == NULL) {
+    if (cause.get() == nullptr) {
       signature = "()V";
     } else {
       signature = "(Ljava/lang/Throwable;)V";
@@ -1504,17 +1543,17 @@
   mirror::ArtMethod* exception_init_method =
       exception_class->FindDeclaredDirectMethod("<init>", signature);
 
-  CHECK(exception_init_method != NULL) << "No <init>" << signature << " in "
+  CHECK(exception_init_method != nullptr) << "No <init>" << signature << " in "
       << PrettyDescriptor(exception_class_descriptor);
 
   if (UNLIKELY(!runtime->IsStarted())) {
     // Something is trying to throw an exception without a started runtime, which is the common
     // case in the compiler. We won't be able to invoke the constructor of the exception, so set
     // the exception fields directly.
-    if (msg != NULL) {
+    if (msg != nullptr) {
       exception->SetDetailMessage(msg_string.get());
     }
-    if (cause.get() != NULL) {
+    if (cause.get() != nullptr) {
       exception->SetCause(cause.get());
     }
     ThrowLocation gc_safe_throw_location(saved_throw_this.get(), saved_throw_method.get(),
@@ -1523,10 +1562,10 @@
   } else {
     ArgArray args("VLL", 3);
     args.Append(reinterpret_cast<uint32_t>(exception.get()));
-    if (msg != NULL) {
+    if (msg != nullptr) {
       args.Append(reinterpret_cast<uint32_t>(msg_string.get()));
     }
-    if (cause.get() != NULL) {
+    if (cause.get() != nullptr) {
       args.Append(reinterpret_cast<uint32_t>(cause.get()));
     }
     JValue result;
@@ -1709,12 +1748,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(self, self->GetLongJumpContext()),
         self_(self), exception_(exception), is_deoptimization_(is_deoptimization),
-        to_find_(is_deoptimization ? NULL : exception->GetClass()), throw_location_(throw_location),
-        handler_quick_frame_(NULL), handler_quick_frame_pc_(0), handler_dex_pc_(0),
+        to_find_(is_deoptimization ? nullptr : exception->GetClass()), throw_location_(throw_location),
+        handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_dex_pc_(0),
         native_method_count_(0), clear_exception_(false),
         method_tracing_active_(is_deoptimization ||
                                Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()),
-        instrumentation_frames_to_pop_(0), top_shadow_frame_(NULL), prev_shadow_frame_(NULL) {
+        instrumentation_frames_to_pop_(0), top_shadow_frame_(nullptr), prev_shadow_frame_(nullptr) {
     // Exception not in root sets, can't allow GC.
     last_no_assert_suspension_cause_ = self->StartAssertNoThreadSuspension("Finding catch block");
   }
@@ -1725,7 +1764,7 @@
 
   bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* method = GetMethod();
-    if (method == NULL) {
+    if (method == nullptr) {
       // This is the upcall, we remember the frame and last pc so that we may long jump to them.
       handler_quick_frame_pc_ = GetCurrentQuickFramePc();
       handler_quick_frame_ = GetCurrentQuickFrame();
@@ -1734,7 +1773,7 @@
       if (UNLIKELY(method_tracing_active_ &&
                    GetQuickInstrumentationExitPc() == GetReturnPc())) {
         // Keep count of the number of unwinds during instrumentation.
-        instrumentation_frames_to_pop_++;
+        ++instrumentation_frames_to_pop_;
       }
       if (method->IsRuntimeMethod()) {
         // Ignore callee save method.
@@ -1751,7 +1790,7 @@
   bool HandleTryItems(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t dex_pc = DexFile::kDexNoIndex;
     if (method->IsNative()) {
-      native_method_count_++;
+      ++native_method_count_;
     } else {
       dex_pc = GetDexPc();
     }
@@ -1771,12 +1810,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     MethodHelper mh(m);
     const DexFile::CodeItem* code_item = mh.GetCodeItem();
-    CHECK(code_item != NULL);
+    CHECK(code_item != nullptr);
     uint16_t num_regs = code_item->registers_size_;
     uint32_t dex_pc = GetDexPc();
     const Instruction* inst = Instruction::At(code_item->insns_ + dex_pc);
     uint32_t new_dex_pc = dex_pc + inst->SizeInCodeUnits();
-    ShadowFrame* new_frame = ShadowFrame::Create(num_regs, NULL, m, new_dex_pc);
+    ShadowFrame* new_frame = ShadowFrame::Create(num_regs, nullptr, m, new_dex_pc);
     SirtRef<mirror::DexCache> dex_cache(self_, mh.GetDexCache());
     SirtRef<mirror::ClassLoader> class_loader(self_, mh.GetClassLoader());
     verifier::MethodVerifier verifier(&mh.GetDexFile(), &dex_cache, &class_loader,
@@ -1784,7 +1823,7 @@
                                       m->GetAccessFlags(), false, true);
     verifier.Verify();
     std::vector<int32_t> kinds = verifier.DescribeVRegs(dex_pc);
-    for (uint16_t reg = 0; reg < num_regs; reg++) {
+    for (uint16_t reg = 0; reg < num_regs; ++reg) {
       VRegKind kind = static_cast<VRegKind>(kinds.at(reg * 2));
       switch (kind) {
         case kUndefined:
@@ -1802,7 +1841,7 @@
           break;
       }
     }
-    if (prev_shadow_frame_ != NULL) {
+    if (prev_shadow_frame_ != nullptr) {
       prev_shadow_frame_->SetLink(new_frame);
     } else {
       top_shadow_frame_ = new_frame;
@@ -1813,7 +1852,7 @@
 
   void DoLongJump() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* catch_method = *handler_quick_frame_;
-    if (catch_method == NULL) {
+    if (catch_method == nullptr) {
       if (kDebugExceptionDelivery) {
         LOG(INFO) << "Handler is upcall";
       }
@@ -1893,7 +1932,7 @@
   // Get exception from thread.
   ThrowLocation throw_location;
   mirror::Throwable* exception = GetException(&throw_location);
-  CHECK(exception != NULL);
+  CHECK(exception != nullptr);
   // Don't leave exception visible while we try to find the handler, which may cause class
   // resolution.
   ClearException();
@@ -1901,7 +1940,7 @@
   if (kDebugExceptionDelivery) {
     if (!is_deoptimization) {
       mirror::String* msg = exception->GetDetailMessage();
-      std::string str_msg(msg != NULL ? msg->ToModifiedUtf8() : "");
+      std::string str_msg(msg != nullptr ? msg->ToModifiedUtf8() : "");
       DumpStack(LOG(INFO) << "Delivering exception: " << PrettyTypeOf(exception)
                 << ": " << str_msg << "\n");
     } else {
@@ -1916,10 +1955,10 @@
 
 Context* Thread::GetLongJumpContext() {
   Context* result = long_jump_context_;
-  if (result == NULL) {
+  if (result == nullptr) {
     result = Context::Create();
   } else {
-    long_jump_context_ = NULL;  // Avoid context being shared.
+    long_jump_context_ = nullptr;  // Avoid context being shared.
     result->Reset();
   }
   return result;
@@ -1928,14 +1967,14 @@
 struct CurrentMethodVisitor : public StackVisitor {
   CurrentMethodVisitor(Thread* thread, Context* context)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : StackVisitor(thread, context), this_object_(NULL), method_(NULL), dex_pc_(0) {}
+      : StackVisitor(thread, context), this_object_(nullptr), method_(nullptr), dex_pc_(0) {}
   virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
     if (m->IsRuntimeMethod()) {
       // Continue if this is a runtime method.
       return true;
     }
-    if (context_ != NULL) {
+    if (context_ != nullptr) {
       this_object_ = GetThisObject();
     }
     method_ = m;
@@ -1948,9 +1987,9 @@
 };
 
 mirror::ArtMethod* Thread::GetCurrentMethod(uint32_t* dex_pc) const {
-  CurrentMethodVisitor visitor(const_cast<Thread*>(this), NULL);
+  CurrentMethodVisitor visitor(const_cast<Thread*>(this), nullptr);
   visitor.WalkStack(false);
-  if (dex_pc != NULL) {
+  if (dex_pc != nullptr) {
     *dex_pc = visitor.dex_pc_;
   }
   return visitor.method_;
@@ -1965,7 +2004,7 @@
 }
 
 bool Thread::HoldsLock(mirror::Object* object) {
-  if (object == NULL) {
+  if (object == nullptr) {
     return false;
   }
   return object->GetLockOwnerThreadId() == thin_lock_thread_id_;
@@ -1985,7 +2024,7 @@
           << StringPrintf("@ PC:%04x", GetDexPc());
     }
     ShadowFrame* shadow_frame = GetCurrentShadowFrame();
-    if (shadow_frame != NULL) {
+    if (shadow_frame != nullptr) {
       mirror::ArtMethod* m = shadow_frame->GetMethod();
       size_t num_regs = shadow_frame->NumberOfVRegs();
       if (m->IsNative() || shadow_frame->HasReferenceArray()) {
@@ -2007,7 +2046,7 @@
         verifier::DexPcToReferenceMap dex_gc_map(gc_map);
         uint32_t dex_pc = GetDexPc();
         const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
-        DCHECK(reg_bitmap != NULL);
+        DCHECK(reg_bitmap != nullptr);
         num_regs = std::min(dex_gc_map.RegWidth() * 8, num_regs);
         for (size_t reg = 0; reg < num_regs; ++reg) {
           if (TestBitmap(reg, reg_bitmap)) {
@@ -2026,23 +2065,23 @@
       // Process register map (which native and runtime methods don't have)
       if (!m->IsNative() && !m->IsRuntimeMethod() && !m->IsProxyMethod()) {
         const uint8_t* native_gc_map = m->GetNativeGcMap();
-        CHECK(native_gc_map != NULL) << PrettyMethod(m);
+        CHECK(native_gc_map != nullptr) << PrettyMethod(m);
         mh_.ChangeMethod(m);
         const DexFile::CodeItem* code_item = mh_.GetCodeItem();
-        DCHECK(code_item != NULL) << PrettyMethod(m);  // Can't be NULL or how would we compile its instructions?
+        DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be nullptr or how would we compile its instructions?
         NativePcOffsetToReferenceMap map(native_gc_map);
         size_t num_regs = std::min(map.RegWidth() * 8,
                                    static_cast<size_t>(code_item->registers_size_));
         if (num_regs > 0) {
           const uint8_t* reg_bitmap = map.FindBitMap(GetNativePcOffset());
-          DCHECK(reg_bitmap != NULL);
+          DCHECK(reg_bitmap != nullptr);
           const VmapTable vmap_table(m->GetVmapTable());
           uint32_t core_spills = m->GetCoreSpillMask();
           uint32_t fp_spills = m->GetFpSpillMask();
           size_t frame_size = m->GetFrameSizeInBytes();
           // For all dex registers in the bitmap
           mirror::ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
-          DCHECK(cur_quick_frame != NULL);
+          DCHECK(cur_quick_frame != nullptr);
           for (size_t reg = 0; reg < num_regs; ++reg) {
             // Does this register hold a reference?
             if (TestBitmap(reg, reg_bitmap)) {
diff --git a/runtime/thread.h b/runtime/thread.h
index b01ec94..30c7e8f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -133,9 +133,19 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ThreadState GetState() const {
+    DCHECK(state_and_flags_.as_struct.state >= kTerminated && state_and_flags_.as_struct.state <= kSuspended);
     return static_cast<ThreadState>(state_and_flags_.as_struct.state);
   }
 
+  // This function can be used to make sure a thread's state is valid.
+  void CheckState(int id) const {
+    if (state_and_flags_.as_struct.state >= kTerminated && state_and_flags_.as_struct.state <= kSuspended) {
+      return;
+    }
+    LOG(INFO) << "Thread " << this << " state is invalid: " << state_and_flags_.as_struct.state << " id=" << id;
+    CHECK(false);
+  }
+
   ThreadState SetState(ThreadState new_state);
 
   int GetSuspendCount() const EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
@@ -780,9 +790,12 @@
   // Cause for last suspension.
   const char* last_no_thread_suspension_cause_;
 
+  // Maximum number of checkpoint functions.
+  static constexpr uint32_t kMaxCheckpoints = 3;
+
   // Pending checkpoint function or NULL if non-pending. Installation guarding by
   // Locks::thread_suspend_count_lock_.
-  Closure* checkpoint_function_;
+  Closure* checkpoint_functions_[kMaxCheckpoints];
 
  public:
   // Entrypoint function pointers
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index aed8c77..8bf099b 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -211,7 +211,7 @@
       if (thread != self) {
         while (true) {
           if (thread->RequestCheckpoint(checkpoint_function)) {
-            // This thread will run it's checkpoint some time in the near future.
+            // This thread will run its checkpoint some time in the near future.
             count++;
             break;
           } else {
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index 4d4bfb7..7615c41 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -21,7 +21,7 @@
 
 enum ThreadState {
   //                                   Thread.State   JDWP state
-  kTerminated,                      // TERMINATED     TS_ZOMBIE    Thread.run has returned, but Thread* still around
+  kTerminated = 66,                 // TERMINATED     TS_ZOMBIE    Thread.run has returned, but Thread* still around
   kRunnable,                        // RUNNABLE       TS_RUNNING   runnable
   kTimedWaiting,                    // TIMED_WAITING  TS_WAIT      in Object.wait() with a timeout
   kSleeping,                        // TIMED_WAITING  TS_SLEEPING  in Thread.sleep()