Java Heap Profiler for Perfetto.

This adds a new ART plugin that waits for a signal (number 38), when
received forks the process (so that the app stays responsive to the
user), then connects to Perfetto and writes out a proto describing the
graph of currently live ART objects.

This has several advantages over `am dumpheap`:
* Because it forks, it has less impact on the user.
* It only writes out the ownership graph, rather than also dumping the
  contents of the heap.
* As this is streamed into Perfetto, it can be correlated to other data
  sources in the same trace file.

Test: Flash; start data source; send signal 38; get heap dump
Bug: 136210868

Change-Id: I1f75bc532fd0a6b8d5f7f474ac7154180677dedd
diff --git a/Android.mk b/Android.mk
index aeeca54..21dc33c 100644
--- a/Android.mk
+++ b/Android.mk
@@ -426,6 +426,7 @@
     libopenjdkjvmti \
     profman \
     libadbconnection \
+    libperfetto_hprof \
 
 # Potentially add in debug variants:
 #
@@ -448,6 +449,7 @@
     libopenjdkjvmtid \
     profmand \
     libadbconnectiond \
+    libperfetto_hprofd \
 
 endif
 endif
diff --git a/build/apex/Android.bp b/build/apex/Android.bp
index 7a17b3a..8c93349 100644
--- a/build/apex/Android.bp
+++ b/build/apex/Android.bp
@@ -40,6 +40,11 @@
     "libopenjdkjvm",
     "libopenjdkjvmti",
 ]
+
+art_runtime_base_native_device_only_shared_libs = [
+    "libperfetto_hprof",
+]
+
 bionic_native_shared_libs = [
     // External API (having APEX stubs).
     "libc",
@@ -81,6 +86,10 @@
     "libopenjdkjvmtid",
 ]
 
+art_runtime_base_native_device_only_debug_shared_libs = [
+  "libperfetto_hprofd",
+]
+
 // Tools common to both device APEX and host APEX. Derived from art-tools in art/Android.mk.
 art_tools_common_binaries = [
     "dexdump",
@@ -248,6 +257,7 @@
     manifest: "manifest-art.json",
     java_libs: libcore_java_libs,
     native_shared_libs: art_runtime_base_native_shared_libs +
+        art_runtime_base_native_device_only_shared_libs +
         libcore_native_device_only_shared_libs +
         libcore_native_shared_libs,
     multilib: {
@@ -281,7 +291,8 @@
 apex_defaults {
     name: "com.android.art-dev-defaults",
     defaults: ["com.android.art-defaults"],
-    native_shared_libs: art_runtime_debug_native_shared_libs +
+    native_shared_libs: art_runtime_base_native_device_only_debug_shared_libs +
+        art_runtime_debug_native_shared_libs +
         libcore_debug_native_shared_libs,
     multilib: {
         both: {
diff --git a/build/apex/art_apex_test.py b/build/apex/art_apex_test.py
index 746bf7a..31553b0 100755
--- a/build/apex/art_apex_test.py
+++ b/build/apex/art_apex_test.py
@@ -528,6 +528,7 @@
 
     # Check internal libraries for ART.
     self._checker.check_prefer64_library('libart-disassembler')
+    self._checker.check_native_library('libperfetto_hprof')
 
     # Check exported native libraries for Managed Core Library.
     self._checker.check_native_library('libandroidicu')
@@ -621,6 +622,7 @@
     # Check ART internal libraries.
     self._checker.check_native_library('libdexfiled_external')
     self._checker.check_prefer64_library('libartd-disassembler')
+    self._checker.check_native_library('libperfetto_hprofd')
 
     # Check internal native library dependencies.
     #
diff --git a/perfetto_hprof/Android.bp b/perfetto_hprof/Android.bp
new file mode 100644
index 0000000..07024e2
--- /dev/null
+++ b/perfetto_hprof/Android.bp
@@ -0,0 +1,77 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Build variants {target} x {debug,ndebug} x {32,64}
+
+// This depends on the Perfetto client API. This uses the ProducerPort to
+// communicate to the system trace. This is an API whose ABI is maintained
+// to be backwards compatible, see
+// https://android.googlesource.com/platform/external/perfetto/+/refs/heads/master/protos/perfetto/ipc/producer_port.proto.
+
+cc_defaults {
+    name: "perfetto_hprof-defaults",
+    host_supported: false,
+    srcs: ["perfetto_hprof.cc"],
+    defaults: ["art_defaults"],
+    include_dirs: [
+        "external/perfetto/include",
+    ],
+
+    // Note that this tool needs to be built for both 32-bit and 64-bit since it requires
+    // to be same ISA as what it is attached to.
+    compile_multilib: "both",
+
+    shared_libs: [
+        "libbase",
+        "liblog",
+    ],
+    static_libs: [
+        "libperfetto_client_experimental",
+        "perfetto_src_tracing_ipc",
+        "perfetto_trace_protos",
+        // TODO(132880619): Remove this as soon as the Perfetto client API no
+        // longer depends on this.
+        "libprotobuf-cpp-lite",
+    ],
+    target: {
+        darwin: {
+            enabled: false,
+        },
+    },
+    header_libs: [
+        "libnativehelper_header_only",
+    ],
+}
+
+art_cc_library {
+    name: "libperfetto_hprof",
+    defaults: ["perfetto_hprof-defaults"],
+    shared_libs: [
+        "libart",
+        "libartbase",
+    ],
+}
+
+art_cc_library {
+    name: "libperfetto_hprofd",
+    defaults: [
+        "art_debug_defaults",
+        "perfetto_hprof-defaults",
+    ],
+    shared_libs: [
+        "libartd",
+        "libartbased",
+    ],
+}
diff --git a/perfetto_hprof/perfetto_hprof.cc b/perfetto_hprof/perfetto_hprof.cc
new file mode 100644
index 0000000..1a78a16
--- /dev/null
+++ b/perfetto_hprof/perfetto_hprof.cc
@@ -0,0 +1,415 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "perfetto_hprof"
+
+#include <android-base/logging.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <thread>
+
+#include "gc/heap-visit-objects-inl.h"
+#include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
+#include "mirror/object-refvisitor-inl.h"
+#include "nativehelper/scoped_local_ref.h"
+#include "perfetto/trace/interned_data/interned_data.pbzero.h"
+#include "perfetto/trace/profiling/heap_graph.pbzero.h"
+#include "perfetto/trace/profiling/profile_common.pbzero.h"
+#include "perfetto/tracing.h"
+#include "runtime-inl.h"
+#include "runtime_callbacks.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread_list.h"
+#include "well_known_classes.h"
+
+// There are three threads involved in this:
+// * listener thread: this is idle in the background when this plugin gets loaded, and waits
+//   for data on on g_signal_pipe_fds.
+// * signal thread: an arbitrary thread that handles the signal and writes data to
+//   g_signal_pipe_fds.
+// * perfetto producer thread: once the signal is received, the app forks. In the newly forked
+//   child, the Perfetto Client API spawns a thread to communicate with traced.
+
+namespace perfetto_hprof {
+
+constexpr int kJavaHeapprofdSignal = __SIGRTMIN + 6;
+constexpr time_t kWatchdogTimeoutSec = 120;
+constexpr size_t kObjectsPerPacket = 100;
+constexpr char kByte[1] = {'x'};
+
+enum class State {
+  kWaitForStart,
+  kStart,
+  kEnd,
+};
+
+static art::Mutex& GetStateMutex() {
+  static art::Mutex state_mutex("perfetto_hprof_state_mutex", art::LockLevel::kGenericBottomLock);
+  return state_mutex;
+}
+
+static art::ConditionVariable& GetStateCV() {
+  static art::ConditionVariable state_cv("perfetto_hprof_state_cv", GetStateMutex());
+  return state_cv;
+}
+
+static State g_state = State::kWaitForStart;
+
+// Pipe to signal from the signal handler into a worker thread that handles the
+// dump requests.
+int g_signal_pipe_fds[2];
+static struct sigaction g_orig_act = {};
+
+uint64_t FindOrAppend(std::map<std::string, uint64_t>* m,
+                      const std::string& s) {
+  auto it = m->find(s);
+  if (it == m->end()) {
+    std::tie(it, std::ignore) = m->emplace(s, m->size());
+  }
+  return it->second;
+}
+
+void ArmWatchdogOrDie() {
+  timer_t timerid{};
+  struct sigevent sev {};
+  sev.sigev_notify = SIGEV_SIGNAL;
+  sev.sigev_signo = SIGKILL;
+
+  if (timer_create(CLOCK_MONOTONIC, &sev, &timerid) == -1) {
+    // This only gets called in the child, so we can fatal without impacting
+    // the app.
+    PLOG(FATAL) << "failed to create watchdog timer";
+  }
+
+  struct itimerspec its {};
+  its.it_value.tv_sec = kWatchdogTimeoutSec;
+
+  if (timer_settime(timerid, 0, &its, nullptr) == -1) {
+    // This only gets called in the child, so we can fatal without impacting
+    // the app.
+    PLOG(FATAL) << "failed to arm watchdog timer";
+  }
+}
+
+class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
+ public:
+  // TODO(fmayer): Change Client API and reject configs that do not target
+  // this process.
+  void OnSetup(const SetupArgs&) override {}
+
+  void OnStart(const StartArgs&) override {
+    art::MutexLock lk(art_thread(), GetStateMutex());
+    if (g_state == State::kWaitForStart) {
+      g_state = State::kStart;
+      GetStateCV().Broadcast(art_thread());
+    }
+  }
+
+  void OnStop(const StopArgs&) override {}
+
+  static art::Thread* art_thread() {
+    // TODO(fmayer): Attach the Perfetto producer thread to ART and give it a name. This is
+    // not trivial, we cannot just attach the first time this method is called, because
+    // AttachCurrentThread deadlocks with the ConditionVariable::Wait in WaitForDataSource.
+    //
+    // We should attach the thread as soon as the Client API spawns it, but that needs more
+    // complicated plumbing.
+    return nullptr;
+  }
+
+ private:
+  static art::Thread* self_;
+};
+
+art::Thread* JavaHprofDataSource::self_ = nullptr;
+
+
+void WaitForDataSource(art::Thread* self) {
+  perfetto::TracingInitArgs args;
+  args.backends = perfetto::BackendType::kSystemBackend;
+  perfetto::Tracing::Initialize(args);
+
+  perfetto::DataSourceDescriptor dsd;
+  dsd.set_name("android.java_hprof");
+  JavaHprofDataSource::Register(dsd);
+
+  LOG(INFO) << "waiting for data source";
+
+  art::MutexLock lk(self, GetStateMutex());
+  while (g_state != State::kStart) {
+    GetStateCV().Wait(self);
+  }
+}
+
+class Writer {
+ public:
+  Writer(pid_t parent_pid, JavaHprofDataSource::TraceContext* ctx)
+      : parent_pid_(parent_pid), ctx_(ctx) {}
+
+  perfetto::protos::pbzero::HeapGraph* GetHeapGraph() {
+    if (!heap_graph_ || ++objects_written_ % kObjectsPerPacket == 0) {
+      if (heap_graph_) {
+        heap_graph_->set_continued(true);
+      }
+      Finalize();
+
+      trace_packet_ = ctx_->NewTracePacket();
+      heap_graph_ = trace_packet_->set_heap_graph();
+      heap_graph_->set_pid(parent_pid_);
+      heap_graph_->set_index(index_++);
+    }
+    return heap_graph_;
+  }
+
+  void Finalize() {
+    if (trace_packet_) {
+      trace_packet_->Finalize();
+    }
+    heap_graph_ = nullptr;
+  }
+
+  ~Writer() { Finalize(); }
+
+ private:
+  const pid_t parent_pid_;
+  JavaHprofDataSource::TraceContext* const ctx_;
+
+  perfetto::DataSource<JavaHprofDataSource>::TraceContext::TracePacketHandle
+      trace_packet_;
+  perfetto::protos::pbzero::HeapGraph* heap_graph_ = nullptr;
+
+  uint64_t index_ = 0;
+  size_t objects_written_ = 0;
+};
+
+class ReferredObjectsFinder {
+ public:
+  explicit ReferredObjectsFinder(
+      std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      : referred_objects_(referred_objects) {}
+
+  // For art::mirror::Object::VisitReferences.
+  void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset,
+                  bool is_static) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    art::mirror::Object* ref = obj->GetFieldObject<art::mirror::Object>(offset);
+    art::ArtField* field;
+    if (is_static) {
+      field = art::ArtField::FindStaticFieldWithOffset(obj->AsClass(), offset.Uint32Value());
+    } else {
+      field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value());
+    }
+    std::string field_name = "";
+    if (field != nullptr) {
+      field_name = field->PrettyField(/*with_type=*/false);
+    }
+    referred_objects_->emplace_back(std::move(field_name), ref);
+  }
+
+  void VisitRootIfNonNull(art::mirror::CompressedReference<art::mirror::Object>* root
+                              ATTRIBUTE_UNUSED) const {}
+  void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root
+                     ATTRIBUTE_UNUSED) const {}
+
+ private:
+  // We can use a raw Object* pointer here, because there are no concurrent GC threads after the
+  // fork.
+  std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_;
+};
+
+void DumpPerfetto(art::Thread* self) {
+  pid_t parent_pid = getpid();
+  LOG(INFO) << "preparing to dump heap for " << parent_pid;
+
+  // Need to take a heap dump while GC isn't running. See the comment in
+  // Heap::VisitObjects(). Also we need the critical section to avoid visiting
+  // the same object twice. See b/34967844.
+  //
+  // We need to do this before the fork, because otherwise it can deadlock
+  // waiting for the GC, as all other threads get terminated by the clone, but
+  // their locks are not released.
+  art::gc::ScopedGCCriticalSection gcs(self, art::gc::kGcCauseHprof,
+                                       art::gc::kCollectorTypeHprof);
+
+  art::ScopedSuspendAll ssa(__FUNCTION__, /* long_suspend=*/ true);
+
+  pid_t pid = fork();
+  if (pid != 0) {
+    return;
+  }
+
+  // Make sure that this is the first thing we do after forking, so if anything
+  // below hangs, the fork will go away from the watchdog.
+  ArmWatchdogOrDie();
+
+  WaitForDataSource(self);
+
+  JavaHprofDataSource::Trace(
+      [parent_pid](JavaHprofDataSource::TraceContext ctx)
+          NO_THREAD_SAFETY_ANALYSIS {
+            LOG(INFO) << "dumping heap for " << parent_pid;
+            Writer writer(parent_pid, &ctx);
+            // Make sure that intern ID 0 (default proto value for a uint64_t) always maps to ""
+            // (default proto value for a string).
+            std::map<std::string, uint64_t> interned_fields{{"", 0}};
+            std::map<std::string, uint64_t> interned_types{{"", 0}};
+
+            art::Runtime::Current()->GetHeap()->VisitObjectsPaused(
+                [&writer, &interned_types, &interned_fields](
+                    art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+                  perfetto::protos::pbzero::HeapGraphObject* object_proto =
+                    writer.GetHeapGraph()->add_objects();
+                  object_proto->set_id(reinterpret_cast<uintptr_t>(obj));
+                  object_proto->set_type_id(
+                      FindOrAppend(&interned_types, obj->PrettyTypeOf()));
+                  object_proto->set_self_size(obj->SizeOf());
+
+                  std::vector<std::pair<std::string, art::mirror::Object*>>
+                      referred_objects;
+                  ReferredObjectsFinder objf(&referred_objects);
+                  obj->VisitReferences(objf, art::VoidFunctor());
+                  for (const auto& p : referred_objects) {
+                    object_proto->add_reference_field_id(
+                        FindOrAppend(&interned_fields, p.first));
+                    object_proto->add_reference_object_id(
+                        reinterpret_cast<uintptr_t>(p.second));
+                  }
+                });
+
+            for (const auto& p : interned_fields) {
+              const std::string& str = p.first;
+              uint64_t id = p.second;
+
+              perfetto::protos::pbzero::InternedString* field_proto =
+                writer.GetHeapGraph()->add_field_names();
+              field_proto->set_iid(id);
+              field_proto->set_str(
+                  reinterpret_cast<const uint8_t*>(str.c_str()), str.size());
+            }
+            for (const auto& p : interned_types) {
+              const std::string& str = p.first;
+              uint64_t id = p.second;
+
+              perfetto::protos::pbzero::InternedString* type_proto =
+                writer.GetHeapGraph()->add_type_names();
+              type_proto->set_iid(id);
+              type_proto->set_str(reinterpret_cast<const uint8_t*>(str.c_str()),
+                                  str.size());
+            }
+
+            writer.Finalize();
+
+            ctx.Flush([] {
+              {
+                art::MutexLock lk(JavaHprofDataSource::art_thread(), GetStateMutex());
+                g_state = State::kEnd;
+                GetStateCV().Broadcast(JavaHprofDataSource::art_thread());
+              }
+            });
+          });
+
+  art::MutexLock lk(self, GetStateMutex());
+  while (g_state != State::kEnd) {
+    GetStateCV().Wait(self);
+  }
+  LOG(INFO) << "finished dumping heap for " << parent_pid;
+  // Prevent the atexit handlers to run. We do not want to call cleanup
+  // functions the parent process has registered.
+  _exit(0);
+}
+
+// The plugin initialization function.
+extern "C" bool ArtPlugin_Initialize()
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  if (art::Runtime::Current() == nullptr) {
+    return false;
+  }
+
+  if (pipe(g_signal_pipe_fds) == -1) {
+    PLOG(ERROR) << "Failed to pipe";
+    return false;
+  }
+
+  struct sigaction act = {};
+  act.sa_sigaction = [](int, siginfo_t*, void*) {
+    if (write(g_signal_pipe_fds[1], kByte, sizeof(kByte)) == -1) {
+      PLOG(ERROR) << "Failed to trigger heap dump";
+    }
+  };
+
+  // TODO(fmayer): We can probably use the SignalCatcher thread here to not
+  // have an idle thread.
+  if (sigaction(kJavaHeapprofdSignal, &act, &g_orig_act) != 0) {
+    close(g_signal_pipe_fds[0]);
+    close(g_signal_pipe_fds[1]);
+    PLOG(ERROR) << "Failed to sigaction";
+    return false;
+  }
+
+  std::thread th([] {
+    art::Runtime* runtime = art::Runtime::Current();
+    if (!runtime->AttachCurrentThread("hprof_listener", /*as_daemon=*/ true,
+                                      runtime->GetSystemThreadGroup(), /*create_peer=*/ false)) {
+      LOG(ERROR) << "failed to attach thread.";
+      return;
+    }
+    art::Thread* self = art::Thread::Current();
+    char buf[1];
+    for (;;) {
+      int res;
+      do {
+        res = read(g_signal_pipe_fds[0], buf, sizeof(buf));
+      } while (res == -1 && errno == EINTR);
+
+      if (res <= 0) {
+        if (res == -1) {
+          PLOG(ERROR) << "failed to read";
+        }
+        close(g_signal_pipe_fds[0]);
+        return;
+      }
+
+      perfetto_hprof::DumpPerfetto(self);
+    }
+  });
+  th.detach();
+  return true;
+}
+
+extern "C" bool ArtPlugin_Deinitialize() {
+  if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
+    PLOG(ERROR) << "failed to reset signal handler";
+    // We cannot close the pipe if the signal handler wasn't unregistered,
+    // to avoid receiving SIGPIPE.
+    return false;
+  }
+  close(g_signal_pipe_fds[1]);
+  return true;
+}
+
+}  // namespace perfetto_hprof
+
+namespace perfetto {
+
+PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(perfetto_hprof::JavaHprofDataSource);
+
+}
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index ca331df..156895d 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -149,6 +149,7 @@
   HIDDEN_API_ENFORCEMENT_POLICY_MASK = (1 << 12)
                                      | (1 << 13),
   PROFILE_SYSTEM_SERVER              = 1 << 14,
+  PROFILE_FROM_SHELL                 = 1 << 15,
   USE_APP_IMAGE_STARTUP_CACHE        = 1 << 16,
   DEBUG_IGNORE_APP_SIGNAL_HANDLER    = 1 << 17,
 
@@ -241,6 +242,9 @@
     runtime_flags &= ~DEBUG_IGNORE_APP_SIGNAL_HANDLER;
   }
 
+  runtime->SetProfileableFromShell((runtime_flags & PROFILE_FROM_SHELL) != 0);
+  runtime_flags &= ~PROFILE_FROM_SHELL;
+
   return runtime_flags;
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 766782d..c186770 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1025,6 +1025,13 @@
   // this to come last.
   ScopedObjectAccess soa(Thread::Current());
   GetRuntimeCallbacks()->StartDebugger();
+
+  if (Dbg::IsJdwpAllowed() || IsProfileableFromShell() || IsJavaDebuggable()) {
+    std::string err;
+    if (!EnsurePerfettoPlugin(&err)) {
+      LOG(WARNING) << "Failed to load perfetto_hprof: " << err;
+    }
+  }
 }
 
 void Runtime::StartSignalCatcher() {
@@ -1763,18 +1770,30 @@
   return true;
 }
 
-static bool EnsureJvmtiPlugin(Runtime* runtime,
-                              std::vector<Plugin>* plugins,
-                              std::string* error_msg) {
-  constexpr const char* plugin_name = kIsDebugBuild ? "libopenjdkjvmtid.so" : "libopenjdkjvmti.so";
-
+bool Runtime::EnsurePluginLoaded(const char* plugin_name, std::string* error_msg) {
   // Is the plugin already loaded?
-  for (const Plugin& p : *plugins) {
+  for (const Plugin& p : plugins_) {
     if (p.GetLibrary() == plugin_name) {
       return true;
     }
   }
+  Plugin new_plugin = Plugin::Create(plugin_name);
 
+  if (!new_plugin.Load(error_msg)) {
+    return false;
+  }
+  plugins_.push_back(std::move(new_plugin));
+  return true;
+}
+
+bool Runtime::EnsurePerfettoPlugin(std::string* error_msg) {
+  constexpr const char* plugin_name = kIsDebugBuild ?
+    "libperfetto_hprofd.so" : "libperfetto_hprof.so";
+  return EnsurePluginLoaded(plugin_name, error_msg);
+}
+
+static bool EnsureJvmtiPlugin(Runtime* runtime,
+                              std::string* error_msg) {
   // TODO Rename Dbg::IsJdwpAllowed is IsDebuggingAllowed.
   DCHECK(Dbg::IsJdwpAllowed() || !runtime->IsJavaDebuggable())
       << "Being debuggable requires that jdwp (i.e. debugging) is allowed.";
@@ -1785,14 +1804,8 @@
     return false;
   }
 
-  Plugin new_plugin = Plugin::Create(plugin_name);
-
-  if (!new_plugin.Load(error_msg)) {
-    return false;
-  }
-
-  plugins->push_back(std::move(new_plugin));
-  return true;
+  constexpr const char* plugin_name = kIsDebugBuild ? "libopenjdkjvmtid.so" : "libopenjdkjvmti.so";
+  return runtime->EnsurePluginLoaded(plugin_name, error_msg);
 }
 
 // Attach a new agent and add it to the list of runtime agents
@@ -1803,7 +1816,7 @@
 //
 void Runtime::AttachAgent(JNIEnv* env, const std::string& agent_arg, jobject class_loader) {
   std::string error_msg;
-  if (!EnsureJvmtiPlugin(this, &plugins_, &error_msg)) {
+  if (!EnsureJvmtiPlugin(this, &error_msg)) {
     LOG(WARNING) << "Could not load plugin: " << error_msg;
     ScopedObjectAccess soa(Thread::Current());
     ThrowIOException("%s", error_msg.c_str());
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6735216..120ca66 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -125,6 +125,9 @@
   static bool Create(const RuntimeOptions& raw_options, bool ignore_unrecognized)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
 
+  bool EnsurePluginLoaded(const char* plugin_name, std::string* error_msg);
+  bool EnsurePerfettoPlugin(std::string* error_msg);
+
   // IsAotCompiler for compilers that don't have a running runtime. Only dex2oat currently.
   bool IsAotCompiler() const {
     return !UseJitCompilation() && IsCompiler();
@@ -691,6 +694,14 @@
     return is_java_debuggable_;
   }
 
+  void SetProfileableFromShell(bool value) {
+    is_profileable_from_shell_ = value;
+  }
+
+  bool IsProfileableFromShell() const {
+    return is_profileable_from_shell_;
+  }
+
   void SetJavaDebuggable(bool value);
 
   // Deoptimize the boot image, called for Java debuggable apps.
@@ -1157,6 +1168,8 @@
   // Whether Java code needs to be debuggable.
   bool is_java_debuggable_;
 
+  bool is_profileable_from_shell_ = false;
+
   // The maximum number of failed boots we allow before pruning the dalvik cache
   // and trying again. This option is only inspected when we're running as a
   // zygote.