8234059: Stress test fails with "Unexpected Exception in thread JFR Event Stream"

Reviewed-by: egahlin
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp b/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp
index 1b03aa1..472e673 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.cpp
@@ -373,7 +373,7 @@
 
 typedef DiscardOp<DefaultDiscarder<JfrBuffer> > DiscardOperation;
 size_t JfrCheckpointManager::clear() {
-  JfrTypeSet::clear();
+  clear_type_set();
   DiscardOperation discarder(mutexed); // mutexed discard mode
   process_free_list(discarder, _free_list_mspace);
   process_free_list(discarder, _epoch_transition_mspace);
@@ -428,6 +428,15 @@
   notify_threads();
 }
 
+void JfrCheckpointManager::clear_type_set() {
+  assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
+  assert(!JfrRecorder::is_recording(), "invariant");
+  // can safepoint here
+  MutexLocker cld_lock(ClassLoaderDataGraph_lock);
+  MutexLocker module_lock(Module_lock);
+  JfrTypeSet::clear();
+}
+
 void JfrCheckpointManager::write_type_set() {
   assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
   if (LeakProfiler::is_running()) {
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.hpp b/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.hpp
index 5fe06b0..09eb333 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.hpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/jfrCheckpointManager.hpp
@@ -85,6 +85,7 @@
   size_t write_threads();
   size_t write_static_type_set_and_threads();
   bool is_type_set_required();
+  void clear_type_set();
   void write_type_set();
   static void write_type_set_for_unloaded_classes();
 
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp
index 9cd33cc..d665d9e 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSet.cpp
@@ -168,6 +168,7 @@
   assert(ptr != NULL, "invariant");
   SET_SERIALIZED(ptr);
   assert(IS_SERIALIZED(ptr), "invariant");
+  CLEAR_THIS_EPOCH_CLEARED_BIT(ptr);
 }
 
 /*
@@ -332,6 +333,26 @@
   assert(IS_NOT_SERIALIZED(value), "invariant");
 }
 
+typedef JfrArtifactCallbackHost<KlassPtr, KlassArtifactRegistrator> RegistrationCallback;
+
+static void register_klass(Klass* klass) {
+  assert(klass != NULL, "invariant");
+  assert(_subsystem_callback != NULL, "invariant");
+  do_previous_epoch_artifact(_subsystem_callback, klass);
+}
+
+static void do_register_klasses() {
+  ClassLoaderDataGraph::classes_do(&register_klass);
+}
+
+static void register_klasses() {
+  assert(!_artifacts->has_klass_entries(), "invariant");
+  KlassArtifactRegistrator reg(_artifacts);
+  RegistrationCallback callback(&reg);
+  _subsystem_callback = &callback;
+  do_register_klasses();
+}
+
 static int write_package(JfrCheckpointWriter* writer, PkgPtr pkg, bool leakp) {
   assert(writer != NULL, "invariant");
   assert(_artifacts != NULL, "invariant");
@@ -422,6 +443,15 @@
   _artifacts->tally(pw);
 }
 
+typedef JfrArtifactCallbackHost<PkgPtr, ClearArtifact<PkgPtr> > ClearPackageCallback;
+
+static void clear_packages() {
+  ClearArtifact<PkgPtr> clear;
+  ClearPackageCallback callback(&clear);
+  _subsystem_callback = &callback;
+  do_packages();
+}
+
 static int write_module(JfrCheckpointWriter* writer, ModPtr mod, bool leakp) {
   assert(mod != NULL, "invariant");
   assert(_artifacts != NULL, "invariant");
@@ -512,6 +542,15 @@
   _artifacts->tally(mw);
 }
 
+typedef JfrArtifactCallbackHost<ModPtr, ClearArtifact<ModPtr> > ClearModuleCallback;
+
+static void clear_modules() {
+  ClearArtifact<ModPtr> clear;
+  ClearModuleCallback callback(&clear);
+  _subsystem_callback = &callback;
+  do_modules();
+}
+
 static int write_classloader(JfrCheckpointWriter* writer, CldPtr cld, bool leakp) {
   assert(cld != NULL, "invariant");
   assert(!cld->is_unsafe_anonymous(), "invariant");
@@ -639,6 +678,15 @@
   _artifacts->tally(cldw);
 }
 
+typedef JfrArtifactCallbackHost<CldPtr, ClearArtifact<CldPtr> > ClearCLDCallback;
+
+static void clear_classloaders() {
+  ClearArtifact<CldPtr> clear;
+  ClearCLDCallback callback(&clear);
+  _subsystem_callback = &callback;
+  do_class_loaders();
+}
+
 static u1 get_visibility(MethodPtr method) {
   assert(method != NULL, "invariant");
   return const_cast<Method*>(method)->is_hidden() ? (u1)1 : (u1)0;
@@ -649,6 +697,7 @@
   assert(method != NULL, "invariant");
   SET_METHOD_SERIALIZED(method);
   assert(IS_METHOD_SERIALIZED(method), "invariant");
+  CLEAR_THIS_EPOCH_METHOD_CLEARED_BIT(method);
 }
 
 static int write_method(JfrCheckpointWriter* writer, MethodPtr method, bool leakp) {
@@ -888,24 +937,23 @@
   _artifacts->tally(sw);
 }
 
-static bool clear_artifacts = false;
-
-void JfrTypeSet::clear() {
-  clear_artifacts = true;
-}
-
 typedef Wrapper<KlassPtr, ClearArtifact> ClearKlassBits;
 typedef Wrapper<MethodPtr, ClearArtifact> ClearMethodFlag;
 typedef MethodIteratorHost<ClearMethodFlag, ClearKlassBits, AlwaysTrue, false> ClearKlassAndMethods;
 
+static bool clear_artifacts = false;
+
+static void clear_klasses_and_methods() {
+  ClearKlassAndMethods clear(_writer);
+  _artifacts->iterate_klasses(clear);
+}
+
 static size_t teardown() {
   assert(_artifacts != NULL, "invariant");
   const size_t total_count = _artifacts->total_count();
   if (previous_epoch()) {
-    assert(_writer != NULL, "invariant");
-    ClearKlassAndMethods clear(_writer);
-    _artifacts->iterate_klasses(clear);
-    JfrTypeSet::clear();
+    clear_klasses_and_methods();
+    clear_artifacts = true;
     ++checkpoint_id;
   }
   return total_count;
@@ -945,3 +993,16 @@
   write_symbols();
   return teardown();
 }
+
+/**
+ * Clear all tags from the previous epoch.
+ */
+void JfrTypeSet::clear() {
+  clear_artifacts = true;
+  setup(NULL, NULL, false, false);
+  register_klasses();
+  clear_packages();
+  clear_modules();
+  clear_classloaders();
+  clear_klasses_and_methods();
+}
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.hpp b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.hpp
index f75c0ef..ede4c93 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.hpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/jfrTypeSetUtils.hpp
@@ -79,9 +79,10 @@
 class ClearArtifact {
  public:
   bool operator()(T const& value) {
-    CLEAR_METHOD_AND_CLASS_PREV_EPOCH(value);
     CLEAR_SERIALIZED(value);
     assert(IS_NOT_SERIALIZED(value), "invariant");
+    SET_PREV_EPOCH_CLEARED_BIT(value);
+    CLEAR_METHOD_AND_CLASS_PREV_EPOCH(value);
     return true;
   }
 };
@@ -91,9 +92,10 @@
  public:
   bool operator()(const Method* method) {
     assert(METHOD_FLAG_USED_PREV_EPOCH(method), "invariant");
-    CLEAR_METHOD_FLAG_USED_PREV_EPOCH(method);
     CLEAR_METHOD_SERIALIZED(method);
     assert(METHOD_NOT_SERIALIZED(method), "invariant");
+    SET_PREV_EPOCH_METHOD_CLEARED_BIT(method);
+    CLEAR_METHOD_FLAG_USED_PREV_EPOCH(method);
     return true;
   }
 };
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceId.inline.hpp b/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceId.inline.hpp
index a2391aa..065e3c1 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceId.inline.hpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceId.inline.hpp
@@ -39,10 +39,27 @@
 #include "runtime/thread.inline.hpp"
 #include "utilities/debug.hpp"
 
+inline bool is_not_tagged(traceid value) {
+  const traceid this_epoch_bit = JfrTraceIdEpoch::in_use_this_epoch_bit();
+  return (value & ((this_epoch_bit << META_SHIFT) | this_epoch_bit)) != this_epoch_bit;
+}
+
+template <typename T>
+inline bool should_tag(const T* t) {
+  assert(t != NULL, "invariant");
+  return is_not_tagged(TRACE_ID_RAW(t));
+}
+
+template <>
+inline bool should_tag<Method>(const Method* method) {
+  assert(method != NULL, "invariant");
+  return is_not_tagged((traceid)method->trace_flags());
+}
+
 template <typename T>
 inline traceid set_used_and_get(const T* type) {
   assert(type != NULL, "invariant");
-  if (SHOULD_TAG(type)) {
+  if (should_tag(type)) {
     SET_USED_THIS_EPOCH(type);
     JfrTraceIdEpoch::set_changed_tag_state();
   }
@@ -62,7 +79,12 @@
 
 inline traceid JfrTraceId::use(const Klass* klass) {
   assert(klass != NULL, "invariant");
-  return set_used_and_get(klass);
+  if (should_tag(klass)) {
+    SET_USED_THIS_EPOCH(klass);
+    JfrTraceIdEpoch::set_changed_tag_state();
+  }
+  assert(USED_THIS_EPOCH(klass), "invariant");
+  return get(klass);
 }
 
 inline traceid JfrTraceId::use(const Method* method) {
diff --git a/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdMacros.hpp b/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdMacros.hpp
index 177055a..e1d0f0f 100644
--- a/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdMacros.hpp
+++ b/src/hotspot/share/jfr/recorder/checkpoint/types/traceid/jfrTraceIdMacros.hpp
@@ -44,15 +44,19 @@
 
 // static bits
 #define META_SHIFT                                8
-#define LEAKP_META_BIT                            USED_BIT
+#define EPOCH_1_CLEARED_META_BIT                  USED_BIT
+#define EPOCH_1_CLEARED_BIT                       (EPOCH_1_CLEARED_META_BIT << META_SHIFT)
+#define EPOCH_2_CLEARED_META_BIT                  (USED_BIT << 1)
+#define EPOCH_2_CLEARED_BIT                       (EPOCH_2_CLEARED_META_BIT << META_SHIFT)
+#define LEAKP_META_BIT                            (USED_BIT << 2)
 #define LEAKP_BIT                                 (LEAKP_META_BIT << META_SHIFT)
-#define TRANSIENT_META_BIT                        (USED_BIT << 1)
+#define TRANSIENT_META_BIT                        (USED_BIT << 3)
 #define TRANSIENT_BIT                             (TRANSIENT_META_BIT << META_SHIFT)
-#define SERIALIZED_META_BIT                       (USED_BIT << 2)
+#define SERIALIZED_META_BIT                       (USED_BIT << 4)
 #define SERIALIZED_BIT                            (SERIALIZED_META_BIT << META_SHIFT)
 #define TRACE_ID_SHIFT                            16
 #define METHOD_ID_NUM_MASK                        ((1 << TRACE_ID_SHIFT) - 1)
-#define META_BITS                                 (SERIALIZED_BIT | TRANSIENT_BIT | LEAKP_BIT)
+#define META_BITS                                 (SERIALIZED_BIT | TRANSIENT_BIT | LEAKP_BIT | EPOCH_2_CLEARED_BIT | EPOCH_1_CLEARED_BIT)
 #define EVENT_BITS                                (EVENT_HOST_KLASS | JDK_JFR_EVENT_KLASS | JDK_JFR_EVENT_SUBKLASS)
 #define USED_BITS                                 (METHOD_USED_EPOCH_2_BIT | METHOD_USED_EPOCH_1_BIT | USED_EPOCH_2_BIT | USED_EPOCH_1_BIT)
 #define ALL_BITS                                  (META_BITS | EVENT_BITS | USED_BITS)
@@ -130,12 +134,16 @@
 #define SHOULD_TAG_KLASS_METHOD(ptr)              (METHOD_NOT_USED_THIS_EPOCH(ptr))
 #define SET_SERIALIZED(ptr)                       (TRACE_ID_META_TAG(ptr, SERIALIZED_META_BIT))
 #define CLEAR_SERIALIZED(ptr)                     (TRACE_ID_META_CLEAR(ptr, META_MASK))
+#define SET_PREV_EPOCH_CLEARED_BIT(ptr)           (TRACE_ID_META_TAG(ptr, IN_USE_PREV_EPOCH_BIT))
 #define IS_METHOD_SERIALIZED(method)              (METHOD_FLAG_PREDICATE(method, SERIALIZED_BIT))
 #define IS_METHOD_LEAKP_USED(method)              (METHOD_FLAG_PREDICATE(method, LEAKP_BIT))
 #define METHOD_NOT_SERIALIZED(method)             (!(IS_METHOD_SERIALIZED(method)))
 #define SET_METHOD_LEAKP(method)                  (METHOD_META_TAG(method, LEAKP_META_BIT))
 #define SET_METHOD_SERIALIZED(method)             (METHOD_META_TAG(method, SERIALIZED_META_BIT))
 #define CLEAR_METHOD_SERIALIZED(method)           (METHOD_META_CLEAR(method, META_MASK))
+#define SET_PREV_EPOCH_METHOD_CLEARED_BIT(ptr)    (METHOD_META_TAG(ptr, IN_USE_PREV_EPOCH_BIT))
 #define CLEAR_LEAKP(ptr)                          (TRACE_ID_META_CLEAR(ptr, (~(LEAKP_META_BIT))))
+#define CLEAR_THIS_EPOCH_CLEARED_BIT(ptr)         (TRACE_ID_META_CLEAR(ptr,(~(IN_USE_THIS_EPOCH_BIT))))
+#define CLEAR_THIS_EPOCH_METHOD_CLEARED_BIT(ptr)  (METHOD_META_CLEAR(ptr,(~(IN_USE_THIS_EPOCH_BIT))))
 
 #endif // SHARE_JFR_RECORDER_CHECKPOINT_TYPES_TRACEID_JFRTRACEIDMACROS_HPP
diff --git a/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp b/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp
index 375a285..ccfc614 100644
--- a/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp
+++ b/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp
@@ -347,26 +347,48 @@
   _storage(JfrStorage::instance()),
   _string_pool(JfrStringPool::instance()) {}
 
-static bool recording = false;
+enum RecorderState {
+  STOPPED,
+  RUNNING
+};
 
-static void set_recording_state(bool is_recording) {
+static RecorderState recorder_state = STOPPED;
+
+static void set_recorder_state(RecorderState from, RecorderState to) {
+  assert(from == recorder_state, "invariant");
   OrderAccess::storestore();
-  recording = is_recording;
+  recorder_state = to;
+}
+
+static void start_recorder() {
+  set_recorder_state(STOPPED, RUNNING);
+  log_debug(jfr, system)("Recording service STARTED");
+}
+
+static void stop_recorder() {
+  set_recorder_state(RUNNING, STOPPED);
+  log_debug(jfr, system)("Recording service STOPPED");
 }
 
 bool JfrRecorderService::is_recording() {
-  return recording;
+  const bool is_running = recorder_state == RUNNING;
+  OrderAccess::loadload();
+  return is_running;
 }
 
 void JfrRecorderService::start() {
   MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
-  log_debug(jfr, system)("Request to START recording");
   assert(!is_recording(), "invariant");
   clear();
-  set_recording_state(true);
-  assert(is_recording(), "invariant");
   open_new_chunk();
-  log_debug(jfr, system)("Recording STARTED");
+  start_recorder();
+  assert(is_recording(), "invariant");
+}
+
+static void stop() {
+  assert(JfrRecorderService::is_recording(), "invariant");
+  stop_recorder();
+  assert(!JfrRecorderService::is_recording(), "invariant");
 }
 
 void JfrRecorderService::clear() {
@@ -392,9 +414,9 @@
   assert(SafepointSynchronize::is_at_safepoint(), "invariant");
   _checkpoint_manager.begin_epoch_shift();
   _string_pool.clear();
-  _stack_trace_repository.clear();
   _storage.clear();
   _chunkwriter.set_time_stamp();
+  _stack_trace_repository.clear();
   _checkpoint_manager.end_epoch_shift();
 }
 
@@ -412,14 +434,6 @@
   }
 }
 
-static void stop() {
-  assert(JfrStream_lock->owned_by_self(), "invariant");
-  assert(JfrRecorderService::is_recording(), "invariant");
-  log_debug(jfr, system)("Recording STOPPED");
-  set_recording_state(false);
-  assert(!JfrRecorderService::is_recording(), "invariant");
-}
-
 // 'rotation_safepoint_pending' is currently only relevant in the unusual case of an emergency dump.
 // Since the JfrStream_lock must be acquired using _no_safepoint_check,
 // if the thread running the emergency dump is a JavaThread, a pending safepoint, induced by rotation,
@@ -565,9 +579,9 @@
     write_stringpool_safepoint(_string_pool, _chunkwriter);
   }
   _checkpoint_manager.on_rotation();
-  write_stacktrace(_stack_trace_repository, _chunkwriter, true);
   _storage.write_at_safepoint();
   _chunkwriter.set_time_stamp();
+  write_stacktrace(_stack_trace_repository, _chunkwriter, true);
   _checkpoint_manager.end_epoch_shift();
 }
 
diff --git a/src/jdk.jfr/share/classes/jdk/jfr/internal/consumer/ConstantMap.java b/src/jdk.jfr/share/classes/jdk/jfr/internal/consumer/ConstantMap.java
index c45b805..cd5415f 100644
--- a/src/jdk.jfr/share/classes/jdk/jfr/internal/consumer/ConstantMap.java
+++ b/src/jdk.jfr/share/classes/jdk/jfr/internal/consumer/ConstantMap.java
@@ -25,6 +25,10 @@
 
 package jdk.jfr.internal.consumer;
 
+import jdk.jfr.internal.LogLevel;
+import jdk.jfr.internal.LogTag;
+import jdk.jfr.internal.Logger;
+
 import jdk.jfr.internal.LongMap;
 
 /**
@@ -90,14 +94,14 @@
             return new Reference(this, id);
         }
 
-        // should always have a value
+        // should ideally always have a value
         Object value = objects.get(id);
         if (value == null) {
-            // unless is 0 which is used to represent null
-            if (id == 0) {
-                return null;
+            // unless id is 0 which is used to represent null
+            if (id != 0) {
+                Logger.log(LogTag.JFR_SYSTEM_PARSER, LogLevel.INFO, "Missing object id=" + id + " in pool " + name + ". All ids should reference an object");
             }
-            throw new InternalError("Missing object id=" + id + " in pool " + name + ". All ids should reference object");
+            return null;
         }
 
         // id is resolved (but not the whole pool)