ART: Add GetStackTrace fast-path

Add a fast-path closure that fills stack frames directly into the
provided buffer, avoiding an intermediate vector and its allocations,
as well as a memcpy.

This only works when collecting stack frames from the top (start >= 0),
which is a common case.

Bug: 62065987
Test: m test-art-host
Change-Id: I8f0d34b3d18f70f4699ed8bdec2c88ca4c96cd4f
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 184fd80..550b972 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -178,6 +178,33 @@
   return ERR(NONE);
 }
 
+struct GetStackTraceDirectClosure : public art::Closure {
+ public:
+  GetStackTraceDirectClosure(jvmtiFrameInfo* frame_buffer_, size_t start, size_t stop)
+      : frame_buffer(frame_buffer_),
+        start_input(start),
+        stop_input(stop),
+        index(0) {
+    DCHECK_GE(start_input, 0u);
+  }
+
+  void Run(art::Thread* self) OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    auto frames_fn = [&](jvmtiFrameInfo info) {
+      frame_buffer[index] = info;
+      ++index;
+    };
+    auto visitor = MakeStackTraceVisitor(self, start_input, stop_input, frames_fn);
+    visitor.WalkStack(/* include_transitions */ false);
+  }
+
+  jvmtiFrameInfo* frame_buffer;
+
+  const size_t start_input;
+  const size_t stop_input;
+
+  size_t index = 0;
+};
+
 static jvmtiError GetThread(JNIEnv* env, jthread java_thread, art::Thread** thread) {
   if (java_thread == nullptr) {
     *thread = art::Thread::Current();
@@ -235,8 +262,20 @@
     return ERR(NONE);
   }
 
-  GetStackTraceVectorClosure closure(start_depth >= 0 ? static_cast<size_t>(start_depth) : 0,
-                                     start_depth >= 0 ? static_cast<size_t>(max_frame_count) : 0);
+  if (start_depth >= 0) {
+    // Fast path: Regular order of stack trace. Fill into the frame_buffer directly.
+    GetStackTraceDirectClosure closure(frame_buffer,
+                                       static_cast<size_t>(start_depth),
+                                       static_cast<size_t>(max_frame_count));
+    thread->RequestSynchronousCheckpoint(&closure);
+    *count_ptr = static_cast<jint>(closure.index);
+    if (closure.index < static_cast<size_t>(start_depth)) {
+      return ERR(ILLEGAL_ARGUMENT);
+    }
+    return ERR(NONE);
+  }
+
+  GetStackTraceVectorClosure closure(0, 0);
   thread->RequestSynchronousCheckpoint(&closure);
 
   return TranslateFrameVector(closure.frames,