8269240: java/foreign/stackwalk/TestAsyncStackWalk.java test failed with concurrent GC

Reviewed-by: vlivanov, dholmes
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
index dd12c17..760eb24 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
@@ -362,7 +362,7 @@
   return fr;
 }
 
-JavaFrameAnchor* OptimizedEntryBlob::jfa_for_frame(const frame& frame) const {
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
   ShouldNotCallThis();
   return nullptr;
 }
diff --git a/src/hotspot/cpu/arm/frame_arm.cpp b/src/hotspot/cpu/arm/frame_arm.cpp
index b3fd82b..6b8f77b 100644
--- a/src/hotspot/cpu/arm/frame_arm.cpp
+++ b/src/hotspot/cpu/arm/frame_arm.cpp
@@ -313,6 +313,11 @@
   return fr;
 }
 
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+  ShouldNotCallThis();
+  return nullptr;
+}
+
 bool frame::optimized_entry_frame_is_first() const {
   ShouldNotCallThis();
   return false;
diff --git a/src/hotspot/cpu/ppc/frame_ppc.cpp b/src/hotspot/cpu/ppc/frame_ppc.cpp
index 3d0f4ab..3658518 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.cpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.cpp
@@ -197,6 +197,11 @@
   return fr;
 }
 
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+  ShouldNotCallThis();
+  return nullptr;
+}
+
 bool frame::optimized_entry_frame_is_first() const {
   ShouldNotCallThis();
   return false;
diff --git a/src/hotspot/cpu/s390/frame_s390.cpp b/src/hotspot/cpu/s390/frame_s390.cpp
index 11d0b4a..2486c6c 100644
--- a/src/hotspot/cpu/s390/frame_s390.cpp
+++ b/src/hotspot/cpu/s390/frame_s390.cpp
@@ -208,6 +208,11 @@
   return fr;
 }
 
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+  ShouldNotCallThis();
+  return nullptr;
+}
+
 bool frame::optimized_entry_frame_is_first() const {
   ShouldNotCallThis();
   return false;
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index b36ef80..cc4e33f 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -353,9 +353,11 @@
   return fr;
 }
 
-JavaFrameAnchor* OptimizedEntryBlob::jfa_for_frame(const frame& frame) const {
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+  assert(frame.is_optimized_entry_frame(), "wrong frame");
   // need unextended_sp here, since normal sp is wrong for interpreter callees
-  return reinterpret_cast<JavaFrameAnchor*>(reinterpret_cast<char*>(frame.unextended_sp()) + in_bytes(jfa_sp_offset()));
+  return reinterpret_cast<OptimizedEntryBlob::FrameData*>(
+    reinterpret_cast<char*>(frame.unextended_sp()) + in_bytes(_frame_data_offset));
 }
 
 bool frame::optimized_entry_frame_is_first() const {
diff --git a/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp b/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp
index 30072e9..34771e9 100644
--- a/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp
+++ b/src/hotspot/cpu/x86/universalUpcallHandler_x86_64.cpp
@@ -316,47 +316,6 @@
 }
 #endif
 
-void save_java_frame_anchor(MacroAssembler* _masm, ByteSize store_offset, Register thread) {
-  __ block_comment("{ save_java_frame_anchor ");
-  // upcall->jfa._last_Java_fp = _thread->_anchor._last_Java_fp;
-  __ movptr(rscratch1, Address(thread, JavaThread::last_Java_fp_offset()));
-  __ movptr(Address(rsp, store_offset + JavaFrameAnchor::last_Java_fp_offset()), rscratch1);
-
-  // upcall->jfa._last_Java_pc = _thread->_anchor._last_Java_pc;
-  __ movptr(rscratch1, Address(thread, JavaThread::last_Java_pc_offset()));
-  __ movptr(Address(rsp, store_offset + JavaFrameAnchor::last_Java_pc_offset()), rscratch1);
-
-  // upcall->jfa._last_Java_sp = _thread->_anchor._last_Java_sp;
-  __ movptr(rscratch1, Address(thread, JavaThread::last_Java_sp_offset()));
-  __ movptr(Address(rsp, store_offset + JavaFrameAnchor::last_Java_sp_offset()), rscratch1);
-  __ block_comment("} save_java_frame_anchor ");
-}
-
-void restore_java_frame_anchor(MacroAssembler* _masm, ByteSize load_offset, Register thread) {
-  __ block_comment("{ restore_java_frame_anchor ");
-  // thread->_last_Java_sp = NULL
-  __ movptr(Address(thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
-
-  // ThreadStateTransition::transition_from_java(_thread, _thread_in_vm);
-  // __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
-  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
-
-  //_thread->frame_anchor()->copy(&_anchor);
-//  _thread->_last_Java_fp = upcall->_last_Java_fp;
-//  _thread->_last_Java_pc = upcall->_last_Java_pc;
-//  _thread->_last_Java_sp = upcall->_last_Java_sp;
-
-  __ movptr(rscratch1, Address(rsp, load_offset + JavaFrameAnchor::last_Java_fp_offset()));
-  __ movptr(Address(thread, JavaThread::last_Java_fp_offset()), rscratch1);
-
-  __ movptr(rscratch1, Address(rsp, load_offset + JavaFrameAnchor::last_Java_pc_offset()));
-  __ movptr(Address(thread, JavaThread::last_Java_pc_offset()), rscratch1);
-
-  __ movptr(rscratch1, Address(rsp, load_offset + JavaFrameAnchor::last_Java_sp_offset()));
-  __ movptr(Address(thread, JavaThread::last_Java_sp_offset()), rscratch1);
-  __ block_comment("} restore_java_frame_anchor ");
-}
-
 static void save_native_arguments(MacroAssembler* _masm, const CallRegs& conv, int arg_save_area_offset) {
   __ block_comment("{ save_native_args ");
   int store_offset = arg_save_area_offset;
@@ -442,6 +401,60 @@
   return result_size;
 }
 
+static int compute_res_save_area_size(const CallRegs& conv) {
+  int result_size = 0;
+  for (int i = 0; i < conv._rets_length; i++) {
+    VMReg reg = conv._ret_regs[i];
+    if (reg->is_Register()) {
+      result_size += 8;
+    } else if (reg->is_XMMRegister()) {
+      // Java API doesn't support vector args
+      result_size += 16;
+    } else {
+      ShouldNotReachHere(); // unhandled type
+    }
+  }
+  return result_size;
+}
+
+static void save_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) {
+  int offset = res_save_area_offset;
+  __ block_comment("{ save java result ");
+  for (int i = 0; i < conv._rets_length; i++) {
+    VMReg reg = conv._ret_regs[i];
+    if (reg->is_Register()) {
+      __ movptr(Address(rsp, offset), reg->as_Register());
+      offset += 8;
+    } else if (reg->is_XMMRegister()) {
+      // Java API doesn't support vector args
+      __ movdqu(Address(rsp, offset), reg->as_XMMRegister());
+      offset += 16;
+    } else {
+      ShouldNotReachHere(); // unhandled type
+    }
+  }
+  __ block_comment("} save java result ");
+}
+
+static void restore_java_result(MacroAssembler* _masm, const CallRegs& conv, int res_save_area_offset) {
+  int offset = res_save_area_offset;
+  __ block_comment("{ restore java result ");
+  for (int i = 0; i < conv._rets_length; i++) {
+    VMReg reg = conv._ret_regs[i];
+    if (reg->is_Register()) {
+      __ movptr(reg->as_Register(), Address(rsp, offset));
+      offset += 8;
+    } else if (reg->is_XMMRegister()) {
+      // Java API doesn't support vector args
+      __ movdqu(reg->as_XMMRegister(), Address(rsp, offset));
+      offset += 16;
+    } else {
+      ShouldNotReachHere(); // unhandled type
+    }
+  }
+  __ block_comment("} restore java result ");
+}
+
 constexpr int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
 
 static void preserve_callee_saved_registers(MacroAssembler* _masm, const ABIDescriptor& abi, int reg_save_area_offset) {
@@ -574,12 +587,6 @@
   }
 }
 
-struct AuxiliarySaves {
-  JavaFrameAnchor jfa;
-  uintptr_t thread;
-  bool should_detach;
-};
-
 address ProgrammableUpcallHandler::generate_optimized_upcall_stub(jobject receiver, Method* entry, jobject jabi, jobject jconv) {
   ResourceMark rm;
   const ABIDescriptor abi = ForeignGlobals::parse_abi_descriptor(jabi);
@@ -604,19 +611,17 @@
 
   int reg_save_area_size = compute_reg_save_area_size(abi);
   int arg_save_area_size = compute_arg_save_area_size(conv);
+  int res_save_area_size = compute_res_save_area_size(conv);
   // To spill receiver during deopt
   int deopt_spill_size = 1 * BytesPerWord;
 
   int shuffle_area_offset    = 0;
   int deopt_spill_offset     = shuffle_area_offset    + out_arg_area;
-  int arg_save_area_offset   = deopt_spill_offset     + deopt_spill_size;
+  int res_save_area_offset   = deopt_spill_offset     + deopt_spill_size;
+  int arg_save_area_offset   = res_save_area_offset   + res_save_area_size;
   int reg_save_area_offset   = arg_save_area_offset   + arg_save_area_size;
-  int auxiliary_saves_offset = reg_save_area_offset   + reg_save_area_size;
-  int frame_bottom_offset    = auxiliary_saves_offset + sizeof(AuxiliarySaves);
-
-  ByteSize jfa_offset           = in_ByteSize(auxiliary_saves_offset) + byte_offset_of(AuxiliarySaves, jfa);
-  ByteSize thread_offset        = in_ByteSize(auxiliary_saves_offset) + byte_offset_of(AuxiliarySaves, thread);
-  ByteSize should_detach_offset = in_ByteSize(auxiliary_saves_offset) + byte_offset_of(AuxiliarySaves, should_detach);
+  int frame_data_offset      = reg_save_area_offset   + reg_save_area_size;
+  int frame_bottom_offset    = frame_data_offset      + sizeof(OptimizedEntryBlob::FrameData);
 
   int frame_size = frame_bottom_offset;
   frame_size = align_up(frame_size, StackAlignmentInBytes);
@@ -627,8 +632,8 @@
   // FP-> |                     |
   //      |---------------------| = frame_bottom_offset = frame_size
   //      |                     |
-  //      | AuxiliarySaves      |
-  //      |---------------------| = auxiliary_saves_offset
+  //      | FrameData           |
+  //      |---------------------| = frame_data_offset
   //      |                     |
   //      | reg_save_area       |
   //      |---------------------| = reg_save_are_offset
@@ -636,6 +641,9 @@
   //      | arg_save_area       |
   //      |---------------------| = arg_save_are_offset
   //      |                     |
+  //      | res_save_area       |
+  //      |---------------------| = res_save_are_offset
+  //      |                     |
   //      | deopt_spill         |
   //      |---------------------| = deopt_spill_offset
   //      |                     |
@@ -646,7 +654,6 @@
   //////////////////////////////////////////////////////////////////////////////
 
   MacroAssembler* _masm = new MacroAssembler(&buffer);
-  Label call_return;
   address start = __ pc();
   __ enter(); // set up frame
   if ((abi._stack_alignment_bytes % 16) != 0) {
@@ -662,53 +669,14 @@
 
   preserve_callee_saved_registers(_masm, abi, reg_save_area_offset);
 
-  __ block_comment("{ get_thread");
+  __ block_comment("{ on_entry");
   __ vzeroupper();
-  __ lea(c_rarg0, Address(rsp, should_detach_offset));
+  __ lea(c_rarg0, Address(rsp, frame_data_offset));
   // stack already aligned
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::maybe_attach_and_get_thread)));
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_entry)));
   __ movptr(r15_thread, rax);
   __ reinit_heapbase();
-  __ movptr(Address(rsp, thread_offset), r15_thread);
-  __ block_comment("} get_thread");
-
-  // TODO:
-  // We expect not to be coming from JNI code, but we might be.
-  // We should figure out what our stance is on supporting that and then maybe add
-  // some more handling here for:
-  //   - handle blocks
-  //   - check for active exceptions (and emit an error)
-
-  __ block_comment("{ safepoint poll");
-  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
-
-  if (os::is_MP()) {
-    __ membar(Assembler::Membar_mask_bits(
-                Assembler::LoadLoad  | Assembler::StoreLoad |
-                Assembler::LoadStore | Assembler::StoreStore));
-   }
-
-  // check for safepoint operation in progress and/or pending suspend requests
-  Label L_after_safepoint_poll;
-  Label L_safepoint_poll_slow_path;
-
-  __ safepoint_poll(L_safepoint_poll_slow_path, r15_thread, false /* at_return */, false /* in_nmethod */);
-
-  __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
-  __ jcc(Assembler::notEqual, L_safepoint_poll_slow_path);
-
-  __ bind(L_after_safepoint_poll);
-  __ block_comment("} safepoint poll");
-  // change thread state
-  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
-
-  __ block_comment("{ reguard stack check");
-  Label L_reguard;
-  Label L_after_reguard;
-  __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled);
-  __ jcc(Assembler::equal, L_reguard);
-  __ bind(L_after_reguard);
-  __ block_comment("} reguard stack check");
+  __ block_comment("} on_entry");
 
   __ block_comment("{ argument shuffle");
   // TODO merge these somehow
@@ -724,13 +692,24 @@
 
   __ mov_metadata(rbx, entry);
   __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // just in case callee is deoptimized
-  __ reinit_heapbase();
-
-  save_java_frame_anchor(_masm, jfa_offset, r15_thread);
-  __ reset_last_Java_frame(r15_thread, true);
 
   __ call(Address(rbx, Method::from_compiled_offset()));
 
+  save_java_result(_masm, conv, res_save_area_offset);
+
+  __ block_comment("{ on_exit");
+  __ vzeroupper();
+  __ lea(c_rarg0, Address(rsp, frame_data_offset));
+  // stack already aligned
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::on_exit)));
+  __ reinit_heapbase();
+  __ block_comment("} on_exit");
+
+  restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
+
+  restore_java_result(_masm, conv, res_save_area_offset);
+
+  // return value shuffle
 #ifdef ASSERT
   if (conv._rets_length == 1) { // 0 or 1
     VMReg j_expected_result_reg;
@@ -757,55 +736,11 @@
   }
 #endif
 
-  __ bind(call_return);
-
-  // also sets last Java frame
-  __ movptr(r15_thread, Address(rsp, thread_offset));
-  // TODO corrupted thread pointer causes havoc. Can we verify it here?
-  restore_java_frame_anchor(_masm, jfa_offset, r15_thread); // also transitions to native state
-
-  __ block_comment("{ maybe_detach_thread");
-  Label L_after_detach;
-  __ cmpb(Address(rsp, should_detach_offset), 0);
-  __ jcc(Assembler::equal, L_after_detach);
-  __ vzeroupper();
-  __ mov(c_rarg0, r15_thread);
-  // stack already aligned
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ProgrammableUpcallHandler::detach_thread)));
-  __ reinit_heapbase();
-  __ bind(L_after_detach);
-  __ block_comment("} maybe_detach_thread");
-
-  restore_callee_saved_registers(_masm, abi, reg_save_area_offset);
-
   __ leave();
   __ ret(0);
 
   //////////////////////////////////////////////////////////////////////////////
 
-  __ block_comment("{ L_safepoint_poll_slow_path");
-  __ bind(L_safepoint_poll_slow_path);
-  __ vzeroupper();
-  __ mov(c_rarg0, r15_thread);
-  // stack already aligned
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
-  __ reinit_heapbase();
-  __ jmp(L_after_safepoint_poll);
-  __ block_comment("} L_safepoint_poll_slow_path");
-
-  //////////////////////////////////////////////////////////////////////////////
-
-  __ block_comment("{ L_reguard");
-  __ bind(L_reguard);
-  __ vzeroupper();
-  // stack already aligned
-  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
-  __ reinit_heapbase();
-  __ jmp(L_after_reguard);
-  __ block_comment("} L_reguard");
-
-  //////////////////////////////////////////////////////////////////////////////
-
   __ block_comment("{ exception handler");
 
   intptr_t exception_handler_offset = __ pc() - start;
@@ -835,7 +770,7 @@
   const char* name = "optimized_upcall_stub";
 #endif // PRODUCT
 
-  OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, jfa_offset);
+  OptimizedEntryBlob* blob = OptimizedEntryBlob::create(name, &buffer, exception_handler_offset, receiver, in_ByteSize(frame_data_offset));
 
   if (TraceOptimizedUpcallStubs) {
     blob->print_on(tty);
diff --git a/src/hotspot/cpu/zero/frame_zero.cpp b/src/hotspot/cpu/zero/frame_zero.cpp
index 6791e60..972b835 100644
--- a/src/hotspot/cpu/zero/frame_zero.cpp
+++ b/src/hotspot/cpu/zero/frame_zero.cpp
@@ -61,6 +61,11 @@
   return frame(zeroframe()->next(), sender_sp());
 }
 
+OptimizedEntryBlob::FrameData* OptimizedEntryBlob::frame_data_for_frame(const frame& frame) const {
+  ShouldNotCallThis();
+  return nullptr;
+}
+
 bool frame::optimized_entry_frame_is_first() const {
   ShouldNotCallThis();
   return false;
diff --git a/src/hotspot/share/code/codeBlob.cpp b/src/hotspot/share/code/codeBlob.cpp
index da225bc..94e2658 100644
--- a/src/hotspot/share/code/codeBlob.cpp
+++ b/src/hotspot/share/code/codeBlob.cpp
@@ -41,6 +41,8 @@
 #include "prims/jvmtiExport.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/javaFrameAnchor.hpp"
+#include "runtime/jniHandles.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/sharedRuntime.hpp"
@@ -713,26 +715,34 @@
 // Implementation of OptimizedEntryBlob
 
 OptimizedEntryBlob::OptimizedEntryBlob(const char* name, int size, CodeBuffer* cb, intptr_t exception_handler_offset,
-                     jobject receiver, ByteSize jfa_sp_offset) :
+                                       jobject receiver, ByteSize frame_data_offset) :
   BufferBlob(name, size, cb),
   _exception_handler_offset(exception_handler_offset),
   _receiver(receiver),
-  _jfa_sp_offset(jfa_sp_offset) {
+  _frame_data_offset(frame_data_offset) {
   CodeCache::commit(this);
 }
 
 OptimizedEntryBlob* OptimizedEntryBlob::create(const char* name, CodeBuffer* cb, intptr_t exception_handler_offset,
-                             jobject receiver, ByteSize jfa_sp_offset) {
+                                               jobject receiver, ByteSize frame_data_offset) {
   ThreadInVMfromUnknown __tiv;  // get to VM state in case we block on CodeCache_lock
 
   OptimizedEntryBlob* blob = nullptr;
   unsigned int size = CodeBlob::allocation_size(cb, sizeof(OptimizedEntryBlob));
   {
     MutexLocker mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    blob = new (size) OptimizedEntryBlob(name, size, cb, exception_handler_offset, receiver, jfa_sp_offset);
+    blob = new (size) OptimizedEntryBlob(name, size, cb, exception_handler_offset, receiver, frame_data_offset);
   }
   // Track memory usage statistic after releasing CodeCache_lock
   MemoryService::track_code_cache_memory_usage();
 
   return blob;
 }
+
+void OptimizedEntryBlob::oops_do(OopClosure* f, const frame& frame) {
+  frame_data_for_frame(frame)->old_handles->oops_do(f);
+}
+
+JavaFrameAnchor* OptimizedEntryBlob::jfa_for_frame(const frame& frame) const {
+  return &frame_data_for_frame(frame)->jfa;
+}
diff --git a/src/hotspot/share/code/codeBlob.hpp b/src/hotspot/share/code/codeBlob.hpp
index 61d3346..104e04f 100644
--- a/src/hotspot/share/code/codeBlob.hpp
+++ b/src/hotspot/share/code/codeBlob.hpp
@@ -27,6 +27,7 @@
 
 #include "asm/codeBuffer.hpp"
 #include "compiler/compilerDefinitions.hpp"
+#include "runtime/javaFrameAnchor.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/handles.hpp"
 #include "utilities/align.hpp"
@@ -34,6 +35,7 @@
 
 class ImmutableOopMap;
 class ImmutableOopMapSet;
+class JNIHandleBlock;
 class OopMapSet;
 
 // CodeBlob Types
@@ -77,7 +79,7 @@
 
 class CodeBlobLayout;
 class OptimizedEntryBlob; // for as_optimized_entry_blob()
-class JavaFrameAnchor; // for EntryBlob::jfa_for_frame
+class JavaFrameAnchor; // for OptimizedEntryBlob::jfa_for_frame
 
 class CodeBlob {
   friend class VMStructs;
@@ -726,29 +728,41 @@
 
 //----------------------------------------------------------------------------------------------------
 
-// For optimized upcall stubs
+class ProgrammableUpcallHandler;
+
 class OptimizedEntryBlob: public BufferBlob {
+  friend class ProgrammableUpcallHandler;
  private:
   intptr_t _exception_handler_offset;
   jobject _receiver;
-  ByteSize _jfa_sp_offset;
+  ByteSize _frame_data_offset;
 
   OptimizedEntryBlob(const char* name, int size, CodeBuffer* cb, intptr_t exception_handler_offset,
-            jobject receiver, ByteSize jfa_sp_offset);
+                     jobject receiver, ByteSize frame_data_offset);
 
+  struct FrameData {
+    JavaFrameAnchor jfa;
+    JavaThread* thread;
+    JNIHandleBlock* old_handles;
+    JNIHandleBlock* new_handles;
+    bool should_detach;
+  };
+
+  // defined in frame_ARCH.cpp
+  FrameData* frame_data_for_frame(const frame& frame) const;
  public:
   // Creation
   static OptimizedEntryBlob* create(const char* name, CodeBuffer* cb,
-                           intptr_t exception_handler_offset, jobject receiver,
-                           ByteSize jfa_sp_offset);
+                                    intptr_t exception_handler_offset, jobject receiver,
+                                    ByteSize frame_data_offset);
 
   address exception_handler() { return code_begin() + _exception_handler_offset; }
   jobject receiver() { return _receiver; }
-  ByteSize jfa_sp_offset() const { return _jfa_sp_offset; }
 
-  // defined in frame_ARCH.cpp
   JavaFrameAnchor* jfa_for_frame(const frame& frame) const;
 
+  void oops_do(OopClosure* f, const frame& frame);
+
   // Typing
   virtual bool is_optimized_entry_blob() const override { return true; }
 };
diff --git a/src/hotspot/share/prims/universalUpcallHandler.cpp b/src/hotspot/share/prims/universalUpcallHandler.cpp
index 852c4c1..44b6450 100644
--- a/src/hotspot/share/prims/universalUpcallHandler.cpp
+++ b/src/hotspot/share/prims/universalUpcallHandler.cpp
@@ -51,37 +51,117 @@
   JavaCalls::call_static(&result, upcall_method.klass, upcall_method.name, upcall_method.sig, &args, CATCH);
 }
 
-Thread* ProgrammableUpcallHandler::maybe_attach_and_get_thread(bool* should_detach) {
-  Thread* thread = Thread::current_or_null();
+JavaThread* ProgrammableUpcallHandler::maybe_attach_and_get_thread(bool* should_detach) {
+  JavaThread* thread = JavaThread::current_or_null();
   if (thread == nullptr) {
     JavaVM_ *vm = (JavaVM *)(&main_vm);
     JNIEnv* p_env = nullptr; // unused
     jint result = vm->functions->AttachCurrentThread(vm, (void**) &p_env, nullptr);
     guarantee(result == JNI_OK, "Could not attach thread for upcall. JNI error code: %d", result);
     *should_detach = true;
-    thread = Thread::current();
+    thread = JavaThread::current();
+    assert(!thread->has_last_Java_frame(), "newly-attached thread not expected to have last Java frame");
   } else {
     *should_detach = false;
   }
   return thread;
 }
 
-void ProgrammableUpcallHandler::detach_thread(Thread* thread) {
+void ProgrammableUpcallHandler::detach_current_thread() {
   JavaVM_ *vm = (JavaVM *)(&main_vm);
   vm->functions->DetachCurrentThread(vm);
 }
 
+// modelled after JavaCallWrapper::JavaCallWrapper
+JavaThread* ProgrammableUpcallHandler::on_entry(OptimizedEntryBlob::FrameData* context) {
+  JavaThread* thread = maybe_attach_and_get_thread(&context->should_detach);
+  context->thread = thread;
+
+  assert(thread->can_call_java(), "must be able to call Java");
+
+  // Allocate handle block for Java code. This must be done before we change thread_state to _thread_in_Java,
+  // since it can potentially block.
+  context->new_handles = JNIHandleBlock::allocate_block(thread);
+
+  // After this, we are officially in Java Code. This needs to be done before we change any of the thread local
+  // info, since we cannot find oops before the new information is set up completely.
+  ThreadStateTransition::transition_from_native(thread, _thread_in_Java);
+
+  // Make sure that we handle asynchronous stops and suspends _before_ we clear all thread state
+  // in OptimizedEntryBlob::FrameData. This way, we can decide if we need to do any pd actions
+  // to prepare for stop/suspend (cache sp, or other state).
+  bool clear_pending_exception = true;
+  if (thread->has_special_runtime_exit_condition()) {
+    thread->handle_special_runtime_exit_condition();
+    if (thread->has_pending_exception()) {
+      clear_pending_exception = false;
+    }
+  }
+
+  context->old_handles = thread->active_handles();
+
+  // For the profiler, the last_Java_frame information in thread must always be in
+  // legal state. We have no last Java frame if last_Java_sp == NULL so
+  // the valid transition is to clear _last_Java_sp and then reset the rest of
+  // the (platform specific) state.
+
+  context->jfa.copy(thread->frame_anchor());
+  thread->frame_anchor()->clear();
+
+  debug_only(thread->inc_java_call_counter());
+  thread->set_active_handles(context->new_handles);     // install new handle block and reset Java frame linkage
+
+  // clear any pending exception in thread (native calls start with no exception pending)
+  if (clear_pending_exception) {
+    thread->clear_pending_exception();
+  }
+
+  MACOS_AARCH64_ONLY(thread->enable_wx(WXExec));
+
+  return thread;
+}
+
+// modelled after JavaCallWrapper::~JavaCallWrapper
+void ProgrammableUpcallHandler::on_exit(OptimizedEntryBlob::FrameData* context) {
+  JavaThread* thread = context->thread;
+  assert(thread == JavaThread::current(), "must still be the same thread");
+
+  MACOS_AARCH64_ONLY(thread->enable_wx(WXWrite));
+
+  // restore previous handle block
+  thread->set_active_handles(context->old_handles);
+
+  thread->frame_anchor()->zap();
+
+  debug_only(thread->dec_java_call_counter());
+
+  // Old thread-local info. has been restored. We are now back in native code.
+  ThreadStateTransition::transition_from_java(thread, _thread_in_native);
+
+  thread->frame_anchor()->copy(&context->jfa);
+
+  // Release handles after we are marked as being in native code again, since this
+  // operation might block
+  JNIHandleBlock::release_block(context->new_handles, thread);
+
+  assert(!thread->has_pending_exception(), "Upcall can not throw an exception");
+
+  if (context->should_detach) {
+    detach_current_thread();
+  }
+}
+
 void ProgrammableUpcallHandler::attach_thread_and_do_upcall(jobject rec, address buff) {
   bool should_detach = false;
-  Thread* thread = maybe_attach_and_get_thread(&should_detach);
+  JavaThread* thread = maybe_attach_and_get_thread(&should_detach);
 
   {
     MACOS_AARCH64_ONLY(ThreadWXEnable wx(WXWrite, thread));
-    upcall_helper(thread->as_Java_thread(), rec, buff);
+    upcall_helper(thread, rec, buff);
   }
 
   if (should_detach) {
-    detach_thread(thread);
+    detach_current_thread();
   }
 }
 
diff --git a/src/hotspot/share/prims/universalUpcallHandler.hpp b/src/hotspot/share/prims/universalUpcallHandler.hpp
index feb39fc..3005762 100644
--- a/src/hotspot/share/prims/universalUpcallHandler.hpp
+++ b/src/hotspot/share/prims/universalUpcallHandler.hpp
@@ -25,6 +25,7 @@
 #define SHARE_VM_PRIMS_UNIVERSALUPCALLHANDLER_HPP
 
 #include "asm/codeBuffer.hpp"
+#include "code/codeBlob.hpp"
 #include "prims/foreign_globals.hpp"
 
 class JavaThread;
@@ -47,8 +48,11 @@
   static void attach_thread_and_do_upcall(jobject rec, address buff);
 
   static void handle_uncaught_exception(oop exception);
-  static Thread* maybe_attach_and_get_thread(bool* should_detach);
-  static void detach_thread(Thread* thread);
+  static JavaThread* maybe_attach_and_get_thread(bool* should_detach);
+  static void detach_current_thread();
+
+  static JavaThread* on_entry(OptimizedEntryBlob::FrameData* context);
+  static void on_exit(OptimizedEntryBlob::FrameData* context);
 public:
   static address generate_optimized_upcall_stub(jobject mh, Method* entry, jobject jabi, jobject jconv);
   static address generate_upcall_stub(jobject rec, jobject abi, jobject buffer_layout);
diff --git a/src/hotspot/share/runtime/frame.cpp b/src/hotspot/share/runtime/frame.cpp
index 89dbc98..528fb90 100644
--- a/src/hotspot/share/runtime/frame.cpp
+++ b/src/hotspot/share/runtime/frame.cpp
@@ -1068,9 +1068,7 @@
   } else if (is_entry_frame()) {
     oops_entry_do(f, map);
   } else if (is_optimized_entry_frame()) {
-   // Nothing to do
-   // receiver is a global ref
-   // handle block is for JNI
+    _cb->as_optimized_entry_blob()->oops_do(f, *this);
   } else if (CodeCache::contains(pc())) {
     oops_code_blob_do(f, cf, map, derived_mode);
   } else {
diff --git a/src/hotspot/share/runtime/javaFrameAnchor.hpp b/src/hotspot/share/runtime/javaFrameAnchor.hpp
index d73372d..732eff1 100644
--- a/src/hotspot/share/runtime/javaFrameAnchor.hpp
+++ b/src/hotspot/share/runtime/javaFrameAnchor.hpp
@@ -34,6 +34,7 @@
 //
 class JavaThread;
 class MacroAssembler;
+class ProgrammableUpcallHandler;
 
 class JavaFrameAnchor {
 // Too many friends...
@@ -52,6 +53,7 @@
 friend class JVMCIVMStructs;
 friend class BytecodeInterpreter;
 friend class JavaCallWrapper;
+friend class ProgrammableUpcallHandler;
 
  private:
   //
diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp
index a57ee93..1ffeefe 100644
--- a/src/hotspot/share/runtime/thread.cpp
+++ b/src/hotspot/share/runtime/thread.cpp
@@ -1964,6 +1964,15 @@
   }
 }
 
+#ifdef ASSERT
+void JavaThread::verify_frame_info() {
+  assert((!has_last_Java_frame() && java_call_counter() == 0) ||
+         (has_last_Java_frame() && java_call_counter() > 0),
+         "unexpected frame info: has_last_frame=%s, java_call_counter=%d",
+         has_last_Java_frame() ? "true" : "false", java_call_counter());
+}
+#endif
+
 void JavaThread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) {
   // Verify that the deferred card marks have been flushed.
   assert(deferred_card_mark().is_empty(), "Should be empty during GC");
@@ -1971,8 +1980,7 @@
   // Traverse the GCHandles
   Thread::oops_do_no_frames(f, cf);
 
-  assert((!has_last_Java_frame() && java_call_counter() == 0) ||
-         (has_last_Java_frame() && java_call_counter() > 0), "wrong java_sp info!");
+  DEBUG_ONLY(verify_frame_info();)
 
   if (has_last_Java_frame()) {
     // Traverse the monitor chunks
@@ -2020,18 +2028,12 @@
 #ifdef ASSERT
 void JavaThread::verify_states_for_handshake() {
   // This checks that the thread has a correct frame state during a handshake.
-  assert((!has_last_Java_frame() && java_call_counter() == 0) ||
-         (has_last_Java_frame() && java_call_counter() > 0),
-         "unexpected frame info: has_last_frame=%d, java_call_counter=%d",
-         has_last_Java_frame(), java_call_counter());
+  verify_frame_info();
 }
 #endif
 
 void JavaThread::nmethods_do(CodeBlobClosure* cf) {
-  assert((!has_last_Java_frame() && java_call_counter() == 0) ||
-         (has_last_Java_frame() && java_call_counter() > 0),
-         "unexpected frame info: has_last_frame=%d, java_call_counter=%d",
-         has_last_Java_frame(), java_call_counter());
+  DEBUG_ONLY(verify_frame_info();)
 
   if (has_last_Java_frame()) {
     // Traverse the execution stack
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
index aafa41e..e6aaa18 100644
--- a/src/hotspot/share/runtime/thread.hpp
+++ b/src/hotspot/share/runtime/thread.hpp
@@ -1125,6 +1125,8 @@
   void set_requires_cross_modify_fence(bool val) PRODUCT_RETURN NOT_PRODUCT({ _requires_cross_modify_fence = val; })
 
  private:
+  DEBUG_ONLY(void verify_frame_info();)
+
   // Support for thread handshake operations
   HandshakeState _handshake;
  public:
@@ -1421,6 +1423,8 @@
  public:
   // Returns the running thread as a JavaThread
   static inline JavaThread* current();
+  // Returns the current thread as a JavaThread, or NULL if not attached
+  static inline JavaThread* current_or_null();
 
   // Returns the active Java thread.  Do not use this if you know you are calling
   // from a JavaThread, as it's slower than JavaThread::current.  If called from
@@ -1591,6 +1595,11 @@
   return Thread::current()->as_Java_thread();
 }
 
+inline JavaThread* JavaThread::current_or_null() {
+  Thread* current = Thread::current_or_null();
+  return current != nullptr ? current->as_Java_thread() : nullptr;
+}
+
 inline JavaThread* Thread::as_Java_thread() {
   assert(is_Java_thread(), "incorrect cast to JavaThread");
   return static_cast<JavaThread*>(this);
diff --git a/test/jdk/java/foreign/stackwalk/TestAsyncStackWalk.java b/test/jdk/java/foreign/stackwalk/TestAsyncStackWalk.java
index b04c20b..b562312 100644
--- a/test/jdk/java/foreign/stackwalk/TestAsyncStackWalk.java
+++ b/test/jdk/java/foreign/stackwalk/TestAsyncStackWalk.java
@@ -22,7 +22,7 @@
  */
 
 /*
- * @test
+ * @test id=default_gc
  * @requires ((os.arch == "amd64" | os.arch == "x86_64") & sun.arch.data.model == "64") | os.arch == "aarch64"
  * @library /test/lib
  * @build sun.hotspot.WhiteBox
@@ -47,6 +47,63 @@
  *   TestAsyncStackWalk
  */
 
+/*
+ * @test id=zgc
+ * @requires (((os.arch == "amd64" | os.arch == "x86_64") & sun.arch.data.model == "64") | os.arch == "aarch64")
+ * @requires vm.gc.Z
+ * @library /test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=true
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseZGC
+ *   TestAsyncStackWalk
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=false
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseZGC
+ *   TestAsyncStackWalk
+ */
+/*
+ * @test id=shenandoah
+ * @requires (((os.arch == "amd64" | os.arch == "x86_64") & sun.arch.data.model == "64") | os.arch == "aarch64")
+ * @requires vm.gc.Shenandoah
+ * @library /test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=true
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseShenandoahGC
+ *   TestAsyncStackWalk
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=false
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseShenandoahGC
+ *   TestAsyncStackWalk
+ */
+
 import jdk.incubator.foreign.CLinker;
 import jdk.incubator.foreign.FunctionDescriptor;
 import jdk.incubator.foreign.SymbolLookup;
diff --git a/test/jdk/java/foreign/stackwalk/TestStackWalk.java b/test/jdk/java/foreign/stackwalk/TestStackWalk.java
index 800dc16..5d220cc 100644
--- a/test/jdk/java/foreign/stackwalk/TestStackWalk.java
+++ b/test/jdk/java/foreign/stackwalk/TestStackWalk.java
@@ -22,7 +22,7 @@
  */
 
 /*
- * @test
+ * @test id=default_gc
  * @requires ((os.arch == "amd64" | os.arch == "x86_64") & sun.arch.data.model == "64") | os.arch == "aarch64"
  * @library /test/lib
  * @build sun.hotspot.WhiteBox
@@ -47,6 +47,63 @@
  *   TestStackWalk
  */
 
+/*
+ * @test id=zgc
+ * @requires (((os.arch == "amd64" | os.arch == "x86_64") & sun.arch.data.model == "64") | os.arch == "aarch64")
+ * @requires vm.gc.Z
+ * @library /test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=true
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseZGC
+ *   TestStackWalk
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=false
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseZGC
+ *   TestStackWalk
+ */
+/*
+ * @test id=shenandoah
+ * @requires (((os.arch == "amd64" | os.arch == "x86_64") & sun.arch.data.model == "64") | os.arch == "aarch64")
+ * @requires vm.gc.Shenandoah
+ * @library /test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller sun.hotspot.WhiteBox
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=true
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseShenandoahGC
+ *   TestStackWalk
+ *
+ * @run main/othervm
+ *   -Xbootclasspath/a:.
+ *   -XX:+UnlockDiagnosticVMOptions
+ *   -XX:+WhiteBoxAPI
+ *   -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=false
+ *   --enable-native-access=ALL-UNNAMED
+ *   -Xbatch
+ *   -XX:+UseShenandoahGC
+ *   TestStackWalk
+ */
+
 import jdk.incubator.foreign.CLinker;
 import jdk.incubator.foreign.FunctionDescriptor;
 import jdk.incubator.foreign.SymbolLookup;