Merge 7a5b758 for LLVM update to 349610

Change-Id: I1ba0bbe7b606a2539855c4eb67804d1001bd8b0b
diff --git a/cmake/HandleOpenMPOptions.cmake b/cmake/HandleOpenMPOptions.cmake
index 5e5215d..97b616e 100644
--- a/cmake/HandleOpenMPOptions.cmake
+++ b/cmake/HandleOpenMPOptions.cmake
@@ -13,4 +13,7 @@
   append_if(OPENMP_HAVE_WERROR_FLAG "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
 endif()
 
-append_if(OPENMP_HAVE_STD_CPP11_FLAG "-std=c++11" CMAKE_CXX_FLAGS)
\ No newline at end of file
+append_if(OPENMP_HAVE_STD_GNUPP11_FLAG "-std=gnu++11" CMAKE_CXX_FLAGS)
+if (NOT OPENMP_HAVE_STD_GNUPP11_FLAG)
+  append_if(OPENMP_HAVE_STD_CPP11_FLAG "-std=c++11" CMAKE_CXX_FLAGS)
+endif()
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 912cbd0..13eace9 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -3,4 +3,5 @@
 
 check_c_compiler_flag(-Werror OPENMP_HAVE_WERROR_FLAG)
 
-check_cxx_compiler_flag(-std=c++11 OPENMP_HAVE_STD_CPP11_FLAG)
\ No newline at end of file
+check_cxx_compiler_flag(-std=gnu++11 OPENMP_HAVE_STD_GNUPP11_FLAG)
+check_cxx_compiler_flag(-std=c++11 OPENMP_HAVE_STD_CPP11_FLAG)
diff --git a/libomptarget/deviceRTLs/nvptx/src/cancel.cu b/libomptarget/deviceRTLs/nvptx/src/cancel.cu
index 77033db..9f92e2d 100644
--- a/libomptarget/deviceRTLs/nvptx/src/cancel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/cancel.cu
@@ -13,16 +13,16 @@
 
 #include "omptarget-nvptx.h"
 
-EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
                                         int32_t cancelVal) {
-  PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", cancelVal);
+  PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", (int)cancelVal);
   // disabled
   return FALSE;
 }
 
-EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
                              int32_t cancelVal) {
-  PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", cancelVal);
+  PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", (int)cancelVal);
   // disabled
   return FALSE;
 }
diff --git a/libomptarget/deviceRTLs/nvptx/src/critical.cu b/libomptarget/deviceRTLs/nvptx/src/critical.cu
index fef8101..9bf2a30 100644
--- a/libomptarget/deviceRTLs/nvptx/src/critical.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/critical.cu
@@ -16,17 +16,15 @@
 #include "omptarget-nvptx.h"
 
 EXTERN
-void __kmpc_critical(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
                      kmp_CriticalName *lck) {
   PRINT0(LD_IO, "call to kmpc_critical()\n");
-  omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
-  omp_set_lock(teamDescr.CriticalLock());
+  omp_set_lock((omp_lock_t *)lck);
 }
 
 EXTERN
-void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
                          kmp_CriticalName *lck) {
   PRINT0(LD_IO, "call to kmpc_end_critical()\n");
-  omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
-  omp_unset_lock(teamDescr.CriticalLock());
+  omp_unset_lock((omp_lock_t *)lck);
 }
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index bfb8208..f69daa1 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -84,7 +84,7 @@
            "Entering __kmpc_initialize_data_sharing_environment\n");
 
   unsigned WID = getWarpId();
-  DSPRINT(DSFLAG_INIT, "Warp ID: %d\n", WID);
+  DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
 
   omptarget_nvptx_TeamDescr *teamDescr =
       &omptarget_nvptx_threadPrivateContext->TeamContext();
@@ -95,15 +95,16 @@
 
   // We don't need to initialize the frame and active threads.
 
-  DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", InitialDataSize);
-  DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (long long)RootS);
+  DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", (unsigned)InitialDataSize);
+  DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (unsigned long long)RootS);
   DSPRINT(DSFLAG_INIT, "Root slot data-end at: %016llx \n",
-          (long long)RootS->DataEnd);
-  DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n", (long long)RootS->Next);
+          (unsigned long long)RootS->DataEnd);
+  DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n",
+          (unsigned long long)RootS->Next);
   DSPRINT(DSFLAG_INIT, "Shared slot ptr at: %016llx \n",
-          (long long)DataSharingState.SlotPtr[WID]);
+          (unsigned long long)DataSharingState.SlotPtr[WID]);
   DSPRINT(DSFLAG_INIT, "Shared stack ptr at: %016llx \n",
-          (long long)DataSharingState.StackPtr[WID]);
+          (unsigned long long)DataSharingState.StackPtr[WID]);
 
   DSPRINT0(DSFLAG_INIT, "Exiting __kmpc_initialize_data_sharing_environment\n");
 }
@@ -121,8 +122,9 @@
   if (!IsOMPRuntimeInitialized)
     return (void *)&DataSharingState;
 
-  DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize);
-  DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize);
+  DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize);
+  DSPRINT(DSFLAG, "Default Data Size %016llx\n",
+          (unsigned long long)SharingDefaultDataSize);
 
   unsigned WID = getWarpId();
   unsigned CurActiveThreads = getActiveThreadsMask();
@@ -139,11 +141,11 @@
   *SavedSharedFrame = FrameP;
   *SavedActiveThreads = ActiveT;
 
-  DSPRINT(DSFLAG, "Warp ID: %d\n", WID);
-  DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (long long)SlotP);
-  DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (long long)StackP);
+  DSPRINT(DSFLAG, "Warp ID: %u\n", WID);
+  DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (unsigned long long)SlotP);
+  DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (unsigned long long)StackP);
   DSPRINT(DSFLAG, "Saved frame ptr at: %016llx \n", (long long)FrameP);
-  DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT);
+  DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
 
   // Only the warp active master needs to grow the stack.
   if (IsWarpMasterActiveThread()) {
@@ -161,12 +163,16 @@
     const uintptr_t RequiredEndAddress =
         CurrentStartAddress + (uintptr_t)SharingDataSize;
 
-    DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize);
-    DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize);
-    DSPRINT(DSFLAG, "Current Start Address %016llx\n", CurrentStartAddress);
-    DSPRINT(DSFLAG, "Current End Address %016llx\n", CurrentEndAddress);
-    DSPRINT(DSFLAG, "Required End Address %016llx\n", RequiredEndAddress);
-    DSPRINT(DSFLAG, "Active Threads %08x\n", ActiveT);
+    DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize);
+    DSPRINT(DSFLAG, "Default Data Size %016llx\n",
+            (unsigned long long)SharingDefaultDataSize);
+    DSPRINT(DSFLAG, "Current Start Address %016llx\n",
+            (unsigned long long)CurrentStartAddress);
+    DSPRINT(DSFLAG, "Current End Address %016llx\n",
+            (unsigned long long)CurrentEndAddress);
+    DSPRINT(DSFLAG, "Required End Address %016llx\n",
+            (unsigned long long)RequiredEndAddress);
+    DSPRINT(DSFLAG, "Active Threads %08x\n", (unsigned)ActiveT);
 
     // If we require a new slot, allocate it and initialize it (or attempt to
     // reuse one). Also, set the shared stack and slot pointers to the new
@@ -184,11 +190,11 @@
                                      (uintptr_t)(&ExistingSlot->Data[0]);
         if (ExistingSlotSize >= NewSize) {
           DSPRINT(DSFLAG, "Reusing stack slot %016llx\n",
-                  (long long)ExistingSlot);
+                  (unsigned long long)ExistingSlot);
           NewSlot = ExistingSlot;
         } else {
           DSPRINT(DSFLAG, "Cleaning up -failed reuse - %016llx\n",
-                  (long long)SlotP->Next);
+                  (unsigned long long)SlotP->Next);
           free(ExistingSlot);
         }
       }
@@ -197,7 +203,7 @@
         NewSlot = (__kmpc_data_sharing_slot *)malloc(
             sizeof(__kmpc_data_sharing_slot) + NewSize);
         DSPRINT(DSFLAG, "New slot allocated %016llx (data size=%016llx)\n",
-                (long long)NewSlot, NewSize);
+                (unsigned long long)NewSlot, NewSize);
       }
 
       NewSlot->Next = 0;
@@ -213,7 +219,7 @@
       // not eliminate them because that may be used to return data.
       if (SlotP->Next) {
         DSPRINT(DSFLAG, "Cleaning up - old not required - %016llx\n",
-                (long long)SlotP->Next);
+                (unsigned long long)SlotP->Next);
         free(SlotP->Next);
         SlotP->Next = 0;
       }
@@ -275,8 +281,8 @@
     // have other threads that will return after the current ones.
     ActiveT &= ~CurActive;
 
-    DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n", CurActive,
-            ActiveT);
+    DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n",
+            (unsigned)CurActive, (unsigned)ActiveT);
 
     if (!ActiveT) {
       // No other active threads? Great, lets restore the stack.
@@ -290,10 +296,13 @@
       FrameP = *SavedSharedFrame;
       ActiveT = *SavedActiveThreads;
 
-      DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n", (long long)SlotP);
-      DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n", (long long)StackP);
-      DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n", (long long)FrameP);
-      DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT);
+      DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n",
+              (unsigned long long)SlotP);
+      DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n",
+              (unsigned long long)StackP);
+      DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n",
+              (unsigned long long)FrameP);
+      DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
     }
   }
 
@@ -319,7 +328,7 @@
 
   unsigned SourceWID = SourceThreadID / WARPSIZE;
 
-  DSPRINT(DSFLAG, "Source  warp: %d\n", SourceWID);
+  DSPRINT(DSFLAG, "Source  warp: %u\n", SourceWID);
 
   void * volatile P = DataSharingState.FramePtr[SourceWID];
   DSPRINT0(DSFLAG, "Exiting __kmpc_get_data_sharing_environment_frame\n");
diff --git a/libomptarget/deviceRTLs/nvptx/src/debug.h b/libomptarget/deviceRTLs/nvptx/src/debug.h
index 9f59d66..8577c8f 100644
--- a/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ b/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -127,6 +127,14 @@
 
 #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
 #include <stdio.h>
+#include "option.h"
+
+template <typename... Arguments>
+static NOINLINE void log(const char *fmt, Arguments... parameters) {
+  printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
+         (int)(threadIdx.x & 0x1F), parameters...);
+}
+
 #endif
 #if OMPTARGET_NVPTX_TEST
 #include <assert.h>
@@ -164,16 +172,14 @@
 #define PRINT0(_flag, _str)                                                    \
   {                                                                            \
     if (omptarget_device_environment.debug_level && DON(_flag)) {              \
-      printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x,   \
-             threadIdx.x / WARPSIZE, threadIdx.x & 0x1F);                      \
+      log("<b %2d, t %4d, w %2d, l %2d>: " _str);                              \
     }                                                                          \
   }
 
 #define PRINT(_flag, _str, _args...)                                           \
   {                                                                            \
     if (omptarget_device_environment.debug_level && DON(_flag)) {              \
-      printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x,   \
-             threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args);               \
+      log("<b %2d, t %4d, w %2d, l %2d>: " _str, _args);                       \
     }                                                                          \
   }
 #else
@@ -217,16 +223,14 @@
 #define ASSERT0(_flag, _cond, _str)                                            \
   {                                                                            \
     if (TON(_flag) && !(_cond)) {                                              \
-      printf("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n", blockIdx.x,    \
-             threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F);         \
+      log("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n");                  \
       assert(_cond);                                                           \
     }                                                                          \
   }
 #define ASSERT(_flag, _cond, _str, _args...)                                   \
   {                                                                            \
     if (TON(_flag) && !(_cond)) {                                              \
-      printf("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", blockIdx.x,    \
-             threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args);  \
+      log("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", _args);           \
       assert(_cond);                                                           \
     }                                                                          \
   }
@@ -253,15 +257,13 @@
 #define WARNING0(_flag, _str)                                                  \
   {                                                                            \
     if (WON(_flag)) {                                                          \
-      printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x,        \
-             threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F);         \
+      log("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str);                      \
     }                                                                          \
   }
 #define WARNING(_flag, _str, _args...)                                         \
   {                                                                            \
     if (WON(_flag)) {                                                          \
-      printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x,        \
-             threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args);  \
+      log("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, _args);               \
     }                                                                          \
   }
 
diff --git a/libomptarget/deviceRTLs/nvptx/src/interface.h b/libomptarget/deviceRTLs/nvptx/src/interface.h
index 7a37c04..2c2beae 100644
--- a/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -160,8 +160,36 @@
 
 } kmp_sched_t;
 
+/*!
+ * Enum for accesseing the reserved_2 field of the ident_t struct below.
+ */
+enum {
+  /*! Bit set to 1 when in SPMD mode. */
+  KMP_IDENT_SPMD_MODE = 0x01,
+  /*! Bit set to 1 when a simplified runtime is used. */
+  KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+};
+
+/*!
+ * The ident structure that describes a source location.
+ * The struct is identical to the one in the kmp.h file.
+ * We maintain the same data structure for compatibility.
+ */
+typedef int kmp_int32;
+typedef struct ident {
+  kmp_int32 reserved_1; /**<  might be used in Fortran; see above  */
+  kmp_int32 flags; /**<  also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
+                      identifies this union member  */
+  kmp_int32 reserved_2; /**<  not really used in Fortran any more; see above */
+  kmp_int32 reserved_3; /**<  source[4] in Fortran, do not use for C++  */
+  char const *psource; /**<  String describing the source location.
+                       The string is composed of semi-colon separated fields
+                       which describe the source file, the function and a pair
+                       of line numbers that delimit the construct. */
+} ident_t;
+
 // parallel defs
-typedef void kmp_Indent;
+typedef ident_t kmp_Ident;
 typedef void (*kmp_ParFctPtr)(int32_t *global_tid, int32_t *bound_tid, ...);
 typedef void (*kmp_ReductFctPtr)(void *lhsData, void *rhsData);
 typedef void (*kmp_InterWarpCopyFctPtr)(void *src, int32_t warp_num);
@@ -223,28 +251,28 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 // query
-EXTERN int32_t __kmpc_global_num_threads(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_bound_thread_num(kmp_Indent *loc);   // missing
-EXTERN int32_t __kmpc_bound_num_threads(kmp_Indent *loc);  // missing
-EXTERN int32_t __kmpc_in_parallel(kmp_Indent *loc);        // missing
+EXTERN int32_t __kmpc_global_num_threads(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_bound_thread_num(kmp_Ident *loc);   // missing
+EXTERN int32_t __kmpc_bound_num_threads(kmp_Ident *loc);  // missing
+EXTERN int32_t __kmpc_in_parallel(kmp_Ident *loc);        // missing
 
 // parallel
-EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc);
-EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc);
+EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t global_tid,
                                     int32_t num_threads);
 // simd
-EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t global_tid,
                                    int32_t simd_limit);
 // aee ... not supported
-// EXTERN void __kmpc_fork_call(kmp_Indent *loc, int32_t argc, kmp_ParFctPtr
+// EXTERN void __kmpc_fork_call(kmp_Ident *loc, int32_t argc, kmp_ParFctPtr
 // microtask, ...);
-EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
+EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
                                            uint32_t global_tid);
-EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid);
+EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid);
 
 // proc bind
-EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t global_tid,
                                   int proc_bind);
 EXTERN int omp_get_num_places(void);
 EXTERN int omp_get_place_num_procs(int place_num);
@@ -254,52 +282,52 @@
 EXTERN void omp_get_partition_place_nums(int *place_nums);
 
 // for static (no chunk or chunk)
-EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
                                      int32_t sched, int32_t *plastiter,
                                      int32_t *plower, int32_t *pupper,
                                      int32_t *pstride, int32_t incr,
                                      int32_t chunk);
-EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
                                       int32_t sched, int32_t *plastiter,
                                       uint32_t *plower, uint32_t *pupper,
                                       int32_t *pstride, int32_t incr,
                                       int32_t chunk);
-EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
                                      int32_t sched, int32_t *plastiter,
                                      int64_t *plower, int64_t *pupper,
                                      int64_t *pstride, int64_t incr,
                                      int64_t chunk);
-EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
                                       int32_t sched, int32_t *plastiter1,
                                       uint64_t *plower, uint64_t *pupper,
                                       int64_t *pstride, int64_t incr,
                                       int64_t chunk);
 EXTERN
-void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                           int32_t sched, int32_t *plastiter,
                                           int32_t *plower, int32_t *pupper,
                                           int32_t *pstride, int32_t incr,
                                           int32_t chunk);
 EXTERN
-void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                            int32_t sched, int32_t *plastiter,
                                            uint32_t *plower, uint32_t *pupper,
                                            int32_t *pstride, int32_t incr,
                                            int32_t chunk);
 EXTERN
-void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                           int32_t sched, int32_t *plastiter,
                                           int64_t *plower, int64_t *pupper,
                                           int64_t *pstride, int64_t incr,
                                           int64_t chunk);
 EXTERN
-void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                            int32_t sched, int32_t *plastiter1,
                                            uint64_t *plower, uint64_t *pupper,
                                            int64_t *pstride, int64_t incr,
                                            int64_t chunk);
 EXTERN
-void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc,
+void __kmpc_for_static_init_4_simple_generic(kmp_Ident *loc,
                                              int32_t global_tid, int32_t sched,
                                              int32_t *plastiter,
                                              int32_t *plower, int32_t *pupper,
@@ -307,11 +335,11 @@
                                              int32_t chunk);
 EXTERN
 void __kmpc_for_static_init_4u_simple_generic(
-    kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
+    kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
     uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
     int32_t chunk);
 EXTERN
-void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc,
+void __kmpc_for_static_init_8_simple_generic(kmp_Ident *loc,
                                              int32_t global_tid, int32_t sched,
                                              int32_t *plastiter,
                                              int64_t *plower, int64_t *pupper,
@@ -319,48 +347,48 @@
                                              int64_t chunk);
 EXTERN
 void __kmpc_for_static_init_8u_simple_generic(
-    kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
+    kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
     uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
     int64_t chunk);
 
-EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid);
 
 // for dynamic
-EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t global_tid,
                                    int32_t sched, int32_t lower, int32_t upper,
                                    int32_t incr, int32_t chunk);
-EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t global_tid,
                                     int32_t sched, uint32_t lower,
                                     uint32_t upper, int32_t incr,
                                     int32_t chunk);
-EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t global_tid,
                                    int32_t sched, int64_t lower, int64_t upper,
                                    int64_t incr, int64_t chunk);
-EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t global_tid,
                                     int32_t sched, uint64_t lower,
                                     uint64_t upper, int64_t incr,
                                     int64_t chunk);
 
-EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t global_tid,
                                   int32_t *plastiter, int32_t *plower,
                                   int32_t *pupper, int32_t *pstride);
-EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t global_tid,
                                    int32_t *plastiter, uint32_t *plower,
                                    uint32_t *pupper, int32_t *pstride);
-EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t global_tid,
                                   int32_t *plastiter, int64_t *plower,
                                   int64_t *pupper, int64_t *pstride);
-EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t global_tid,
                                    int32_t *plastiter, uint64_t *plower,
                                    uint64_t *pupper, int64_t *pstride);
 
-EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t global_tid);
 
 // Support for reducing conditional lastprivate variables
-EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc,
+EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc,
                                                   int32_t global_tid,
                                                   int32_t varNum, void *array);
 
@@ -391,67 +419,73 @@
     int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
     kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
     kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
+EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
+                                                       int32_t global_tid,
+                                                       kmp_CriticalName *crit);
+EXTERN void __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc,
+                                                        int32_t global_tid,
+                                                        kmp_CriticalName *crit);
 EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
 EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
 
 // sync barrier
-EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid);
-EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid);
-EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid);
-EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid);
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid);
 
 // single
-EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid);
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid);
 
 // sync
-EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_critical(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
                             kmp_CriticalName *crit);
-EXTERN void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
                                 kmp_CriticalName *crit);
-EXTERN void __kmpc_flush(kmp_Indent *loc);
+EXTERN void __kmpc_flush(kmp_Ident *loc);
 
 // vote
 EXTERN int32_t __kmpc_warp_active_thread_mask();
 
 // tasks
-EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Indent *loc,
+EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
                                             uint32_t global_tid, int32_t flag,
                                             size_t sizeOfTaskInclPrivate,
                                             size_t sizeOfSharedTable,
                                             kmp_TaskFctPtr sub);
-EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
                                kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
                                          kmp_TaskDescr *newLegacyTaskDescr,
                                          int32_t depNum, void *depList,
                                          int32_t noAliasDepNum,
                                          void *noAliasDepList);
-EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
                                       kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
                                          kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
                                  int32_t depNum, void *depList,
                                  int32_t noAliasDepNum, void *noAliasDepList);
-EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid);
-EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid);
+EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
                                     int end_part);
-EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
                             kmp_TaskDescr *newKmpTaskDescr, int if_val,
                             uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
                             int32_t sched, uint64_t grainsize, void *task_dup);
 
 // cancel
-EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
                                         int32_t cancelVal);
-EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
                              int32_t cancelVal);
 
 // non standard
@@ -460,7 +494,8 @@
 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
 EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
                                     int16_t RequiresDataSharing);
-EXTERN void __kmpc_spmd_kernel_deinit();
+EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit();
+EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
 EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
                                            int16_t IsOMPRuntimeInitialized);
 EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index ea9225d..91b270c 100644
--- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -222,9 +222,11 @@
                 " chunk %" PRIu64 "; tid %d, tnum %d, nthreads %d\n",
                 "ancestor", steps,
                 (currTaskDescr->IsParallelConstruct() ? "par" : "task"),
-                currTaskDescr->InParallelRegion(), sched,
-                currTaskDescr->RuntimeChunkSize(), currTaskDescr->ThreadId(),
-                currTaskDescr->ThreadsInTeam(), currTaskDescr->NThreads());
+                (int)currTaskDescr->InParallelRegion(), (int)sched,
+                currTaskDescr->RuntimeChunkSize(),
+                (int)currTaskDescr->ThreadId(),
+                (int)currTaskDescr->ThreadsInTeam(),
+                (int)currTaskDescr->NThreads());
         }
 
         if (currTaskDescr->IsParallelConstruct()) {
@@ -404,23 +406,21 @@
 #define SET 1
 
 EXTERN void omp_init_lock(omp_lock_t *lock) {
-  *lock = UNSET;
+  omp_unset_lock(lock);
   PRINT0(LD_IO, "call omp_init_lock()\n");
 }
 
 EXTERN void omp_destroy_lock(omp_lock_t *lock) {
+  omp_unset_lock(lock);
   PRINT0(LD_IO, "call omp_destroy_lock()\n");
 }
 
 EXTERN void omp_set_lock(omp_lock_t *lock) {
   // int atomicCAS(int* address, int compare, int val);
   // (old == compare ? val : old)
-  int compare = UNSET;
-  int val = SET;
 
   // TODO: not sure spinning is a good idea here..
-  while (atomicCAS(lock, compare, val) != UNSET) {
-
+  while (atomicCAS(lock, UNSET, SET) != UNSET) {
     clock_t start = clock();
     clock_t now;
     for (;;) {
@@ -436,9 +436,7 @@
 }
 
 EXTERN void omp_unset_lock(omp_lock_t *lock) {
-  int compare = SET;
-  int val = UNSET;
-  int old = atomicCAS(lock, compare, val);
+  (void)atomicExch(lock, UNSET);
 
   PRINT0(LD_IO, "call omp_unset_lock()\n");
 }
@@ -446,10 +444,7 @@
 EXTERN int omp_test_lock(omp_lock_t *lock) {
   // int atomicCAS(int* address, int compare, int val);
   // (old == compare ? val : old)
-  int compare = UNSET;
-  int val = SET;
-
-  int ret = atomicCAS(lock, compare, val);
+  int ret = atomicAdd(lock, 0);
 
   PRINT(LD_IO, "call omp_test_lock() return %d\n", ret);
 
diff --git a/libomptarget/deviceRTLs/nvptx/src/loop.cu b/libomptarget/deviceRTLs/nvptx/src/loop.cu
index bd84f0f..c100be5 100644
--- a/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -93,9 +93,10 @@
   ////////////////////////////////////////////////////////////////////////////////
   // Support for Static Init
 
-  INLINE static void for_static_init(int32_t schedtype, int32_t *plastiter,
-                                     T *plower, T *pupper, ST *pstride,
-                                     ST chunk, bool IsSPMDExecutionMode,
+  INLINE static void for_static_init(int32_t gtid, int32_t schedtype,
+                                     int32_t *plastiter, T *plower, T *pupper,
+                                     ST *pstride, ST chunk,
+                                     bool IsSPMDExecutionMode,
                                      bool IsRuntimeUninitialized) {
     // When IsRuntimeUninitialized is true, we assume that the caller is
     // in an L0 parallel region and that all worker threads participate.
@@ -112,108 +113,73 @@
     PRINT(LD_LOOP,
           "OMP Thread %d: schedule type %d, chunk size = %lld, mytid "
           "%d, num tids %d\n",
-          GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
-          schedtype, P64(chunk),
-          GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
-          GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                IsRuntimeUninitialized));
-    ASSERT0(
-        LT_FUSSY,
-        (GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized)) <
-            (GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                   IsRuntimeUninitialized)),
-        "current thread is not needed here; error");
+          (int)gtid, (int)schedtype, (long long)chunk, (int)gtid,
+          (int)numberOfActiveOMPThreads);
+    ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads,
+            "current thread is not needed here; error");
 
     // copy
     int lastiter = 0;
     T lb = *plower;
     T ub = *pupper;
     ST stride = *pstride;
-    T entityId, numberOfEntities;
     // init
     switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) {
     case kmp_sched_static_chunk: {
       if (chunk > 0) {
-        entityId =
-            GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
-        numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                                 IsRuntimeUninitialized);
-        ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
-                       numberOfEntities);
+        ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
+                       numberOfActiveOMPThreads);
         break;
       }
     } // note: if chunk <=0, use nochunk
     case kmp_sched_static_balanced_chunk: {
       if (chunk > 0) {
-        entityId =
-            GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
-        numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                                 IsRuntimeUninitialized);
-
         // round up to make sure the chunk is enough to cover all iterations
         T tripCount = ub - lb + 1; // +1 because ub is inclusive
-        T span = (tripCount + numberOfEntities - 1) / numberOfEntities;
+        T span = (tripCount + numberOfActiveOMPThreads - 1) /
+                 numberOfActiveOMPThreads;
         // perform chunk adjustment
         chunk = (span + chunk - 1) & ~(chunk - 1);
 
         ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
         T oldUb = ub;
-        ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
-                       numberOfEntities);
+        ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
+                       numberOfActiveOMPThreads);
         if (ub > oldUb)
           ub = oldUb;
         break;
       }
     } // note: if chunk <=0, use nochunk
     case kmp_sched_static_nochunk: {
-      entityId =
-          GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
-      numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                               IsRuntimeUninitialized);
-      ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId,
-                       numberOfEntities);
+      ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
+                       numberOfActiveOMPThreads);
       break;
     }
     case kmp_sched_distr_static_chunk: {
       if (chunk > 0) {
-        entityId = GetOmpTeamId();
-        numberOfEntities = GetNumberOfOmpTeams();
-        ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
-                       numberOfEntities);
+        ForStaticChunk(lastiter, lb, ub, stride, chunk, GetOmpTeamId(),
+                       GetNumberOfOmpTeams());
         break;
       } // note: if chunk <=0, use nochunk
     }
     case kmp_sched_distr_static_nochunk: {
-      entityId = GetOmpTeamId();
-      numberOfEntities = GetNumberOfOmpTeams();
-
-      ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId,
-                       numberOfEntities);
+      ForStaticNoChunk(lastiter, lb, ub, stride, chunk, GetOmpTeamId(),
+                       GetNumberOfOmpTeams());
       break;
     }
     case kmp_sched_distr_static_chunk_sched_static_chunkone: {
-      entityId =
-          GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                IsRuntimeUninitialized) *
-              GetOmpTeamId() +
-          GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
-      numberOfEntities = GetNumberOfOmpTeams() *
-                         GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                               IsRuntimeUninitialized);
-      ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
-                     numberOfEntities);
+      ForStaticChunk(lastiter, lb, ub, stride, chunk,
+                     numberOfActiveOMPThreads * GetOmpTeamId() + gtid,
+                     GetNumberOfOmpTeams() * numberOfActiveOMPThreads);
       break;
     }
     default: {
-      ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", schedtype);
+      ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", (int)schedtype);
       PRINT(LD_LOOP, "unknown schedtype %d, revert back to static chunk\n",
-            schedtype);
-      entityId =
-          GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
-      numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
-                                               IsRuntimeUninitialized);
-      ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
-                     numberOfEntities);
+            (int)schedtype);
+      ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
+                     numberOfActiveOMPThreads);
+      break;
     }
     }
     // copy back
@@ -221,13 +187,12 @@
     *plower = lb;
     *pupper = ub;
     *pstride = stride;
-    PRINT(
-        LD_LOOP,
-        "Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last "
-        "%d\n",
-        GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
-        GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride),
-        lastiter);
+    PRINT(LD_LOOP,
+          "Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last "
+          "%d\n",
+          (int)numberOfActiveOMPThreads, (int)GetNumberOfWorkersInTeam(),
+          (long long)(*plower), (long long)(*pupper), (long long)(*pstride),
+          (int)lastiter);
   }
 
   ////////////////////////////////////////////////////////////////////////////////
@@ -238,20 +203,17 @@
            schedule <= kmp_sched_ordered_last;
   }
 
-  INLINE static void dispatch_init(kmp_Indent *loc, int32_t threadId,
+  INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId,
                                    kmp_sched_t schedule, T lb, T ub, ST st,
                                    ST chunk) {
-    ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+    ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
             "Expected non-SPMD mode + initialized runtime.");
     int tid = GetLogicalThreadIdInBlock();
     omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
     T tnum = currTaskDescr->ThreadsInTeam();
     T tripCount = ub - lb + 1; // +1 because ub is inclusive
-    ASSERT0(
-        LT_FUSSY,
-        GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) <
-            GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
-        "current thread is not needed here; error");
+    ASSERT0(LT_FUSSY, threadId < tnum,
+            "current thread is not needed here; error");
 
     /* Currently just ignore the monotonic and non-monotonic modifiers
      * (the compiler isn't producing them * yet anyway).
@@ -269,7 +231,7 @@
         __kmpc_barrier(loc, threadId);
       PRINT(LD_LOOP,
             "go sequential as tnum=%ld, trip count %lld, ordered sched=%d\n",
-            (long)tnum, P64(tripCount), schedule);
+            (long)tnum, (long long)tripCount, (int)schedule);
       schedule = kmp_sched_static_chunk;
       chunk = tripCount; // one thread gets the whole loop
     } else if (schedule == kmp_sched_runtime) {
@@ -295,18 +257,20 @@
         break;
       }
       }
-      PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", schedule,
-            P64(chunk));
+      PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", (int)schedule,
+            (long long)chunk);
     } else if (schedule == kmp_sched_auto) {
       schedule = kmp_sched_static_chunk;
       chunk = 1;
-      PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", schedule,
-            P64(chunk));
+      PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", (int)schedule,
+            (long long)chunk);
     } else {
-      PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", schedule, P64(chunk));
+      PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", (int)schedule,
+            (long long)chunk);
       ASSERT(LT_FUSSY,
              schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
-             "unknown schedule %d & chunk %lld\n", schedule, P64(chunk));
+             "unknown schedule %d & chunk %lld\n", (int)schedule,
+             (long long)chunk);
     }
 
     // init schedules
@@ -319,9 +283,7 @@
       // compute static chunk
       ST stride;
       int lastiter = 0;
-      ForStaticChunk(
-          lastiter, lb, ub, stride, chunk,
-          GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+      ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
       // save computed params
       omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
       omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -329,10 +291,12 @@
       PRINT(LD_LOOP,
             "dispatch init (static chunk) : num threads = %d, ub =  %" PRId64
             ", next lower bound = %llu, stride = %llu\n",
-            GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+            (int)tnum,
             omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
-            omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
-            omptarget_nvptx_threadPrivateContext->Stride(tid));
+            (unsigned long long)
+                omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+            (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+                tid));
     } else if (schedule == kmp_sched_static_balanced_chunk) {
       ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
       // save sched state
@@ -348,9 +312,7 @@
       chunk = (span + chunk - 1) & ~(chunk - 1);
 
       T oldUb = ub;
-      ForStaticChunk(
-          lastiter, lb, ub, stride, chunk,
-          GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+      ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
       ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
       if (ub > oldUb)
         ub = oldUb;
@@ -361,10 +323,12 @@
       PRINT(LD_LOOP,
             "dispatch init (static chunk) : num threads = %d, ub =  %" PRId64
             ", next lower bound = %llu, stride = %llu\n",
-            GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+            (int)tnum,
             omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
-            omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
-            omptarget_nvptx_threadPrivateContext->Stride(tid));
+            (unsigned long long)
+                omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+            (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+                tid));
     } else if (schedule == kmp_sched_static_nochunk) {
       ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value");
       // save sched state
@@ -374,9 +338,7 @@
       // compute static chunk
       ST stride;
       int lastiter = 0;
-      ForStaticNoChunk(
-          lastiter, lb, ub, stride, chunk,
-          GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+      ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
       // save computed params
       omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
       omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -384,10 +346,12 @@
       PRINT(LD_LOOP,
             "dispatch init (static nochunk) : num threads = %d, ub = %" PRId64
             ", next lower bound = %llu, stride = %llu\n",
-            GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+            (int)tnum,
             omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
-            omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
-            omptarget_nvptx_threadPrivateContext->Stride(tid));
+            (unsigned long long)
+                omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+            (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+                tid));
 
     } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
       __kmpc_barrier(loc, threadId);
@@ -405,8 +369,9 @@
       PRINT(LD_LOOP,
             "dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64
             ", chunk %" PRIu64 "\n",
-            GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
-            omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
+            (int)tnum,
+            (unsigned long long)
+                omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
             omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId),
             omptarget_nvptx_threadPrivateContext->Chunk(teamId));
     }
@@ -430,41 +395,40 @@
     //  c. lb and ub >= loopUpperBound: empty chunk --> FINISHED
     // a.
     if (lb <= loopUpperBound && ub < loopUpperBound) {
-      PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n", P64(lb),
-            P64(ub), P64(loopUpperBound));
+      PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n",
+            (long long)lb, (long long)ub, (long long)loopUpperBound);
       return NOT_FINISHED;
     }
     // b.
     if (lb <= loopUpperBound) {
       PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; clip to loop ub\n",
-            P64(lb), P64(ub), P64(loopUpperBound));
+            (long long)lb, (long long)ub, (long long)loopUpperBound);
       ub = loopUpperBound;
       return LAST_CHUNK;
     }
     // c. if we are here, we are in case 'c'
     lb = loopUpperBound + 2;
     ub = loopUpperBound + 1;
-    PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", P64(lb),
-          P64(ub), P64(loopUpperBound));
+    PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", (long long)lb,
+          (long long)ub, (long long)loopUpperBound);
     return FINISHED;
   }
 
   // On Pascal, with inlining of the runtime into the user application,
   // this code deadlocks.  This is probably because different threads
   // in a warp cannot make independent progress.
-  NOINLINE static int dispatch_next(int32_t *plast, T *plower, T *pupper,
-                                    ST *pstride) {
+  NOINLINE static int dispatch_next(int32_t gtid, int32_t *plast, T *plower,
+                                    T *pupper, ST *pstride) {
     ASSERT0(LT_FUSSY, isRuntimeInitialized(),
             "Expected non-SPMD mode + initialized runtime.");
     // ID of a thread in its own warp
 
     // automatically selects thread or warp ID based on selected implementation
     int tid = GetLogicalThreadIdInBlock();
-    ASSERT0(
-        LT_FUSSY,
-        GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) <
-            GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
-        "current thread is not needed here; error");
+    ASSERT0(LT_FUSSY,
+            gtid < GetNumberOfOmpThreads(tid, isSPMDMode(),
+                                         isRuntimeUninitialized()),
+            "current thread is not needed here; error");
     // retrieve schedule
     kmp_sched_t schedule =
         omptarget_nvptx_threadPrivateContext->ScheduleType(tid);
@@ -477,7 +441,7 @@
       // finished?
       if (myLb > ub) {
         PRINT(LD_LOOP, "static loop finished with myLb %lld, ub %lld\n",
-              P64(myLb), P64(ub));
+              (long long)myLb, (long long)ub);
         return DISPATCH_FINISHED;
       }
       // not finished, save current bounds
@@ -493,7 +457,7 @@
       ST stride = omptarget_nvptx_threadPrivateContext->Stride(tid);
       omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = myLb + stride;
       PRINT(LD_LOOP, "static loop continues with myLb %lld, myUb %lld\n",
-            P64(*plower), P64(*pupper));
+            (long long)*plower, (long long)*pupper);
       return DISPATCH_NOTFINISHED;
     }
     ASSERT0(LT_FUSSY,
@@ -515,12 +479,13 @@
     *pupper = myUb;
     *pstride = 1;
 
-    PRINT(LD_LOOP,
-          "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
-          "last %d\n",
-          GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
-          GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride),
-          *plast);
+    PRINT(
+        LD_LOOP,
+        "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
+        "last %d\n",
+        (int)GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+        (int)GetNumberOfWorkersInTeam(), (long long)*plower, (long long)*pupper,
+        (long long)*pstride, (int)*plast);
     return DISPATCH_NOTFINISHED;
   }
 
@@ -538,7 +503,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 
 // init
-EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t tid,
                                    int32_t schedule, int32_t lb, int32_t ub,
                                    int32_t st, int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_dispatch_init_4\n");
@@ -546,7 +511,7 @@
       loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
 }
 
-EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t tid,
                                     int32_t schedule, uint32_t lb, uint32_t ub,
                                     int32_t st, int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_dispatch_init_4u\n");
@@ -554,7 +519,7 @@
       loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
 }
 
-EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t tid,
                                    int32_t schedule, int64_t lb, int64_t ub,
                                    int64_t st, int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_dispatch_init_8\n");
@@ -562,7 +527,7 @@
       loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
 }
 
-EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t tid,
                                     int32_t schedule, uint64_t lb, uint64_t ub,
                                     int64_t st, int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_dispatch_init_8u\n");
@@ -571,53 +536,53 @@
 }
 
 // next
-EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t tid, int32_t *p_last,
+EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t tid, int32_t *p_last,
                                   int32_t *p_lb, int32_t *p_ub, int32_t *p_st) {
   PRINT0(LD_IO, "call kmpc_dispatch_next_4\n");
   return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next(
-      p_last, p_lb, p_ub, p_st);
+      tid, p_last, p_lb, p_ub, p_st);
 }
 
-EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t tid,
+EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t tid,
                                    int32_t *p_last, uint32_t *p_lb,
                                    uint32_t *p_ub, int32_t *p_st) {
   PRINT0(LD_IO, "call kmpc_dispatch_next_4u\n");
   return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next(
-      p_last, p_lb, p_ub, p_st);
+      tid, p_last, p_lb, p_ub, p_st);
 }
 
-EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t tid, int32_t *p_last,
+EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t tid, int32_t *p_last,
                                   int64_t *p_lb, int64_t *p_ub, int64_t *p_st) {
   PRINT0(LD_IO, "call kmpc_dispatch_next_8\n");
   return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next(
-      p_last, p_lb, p_ub, p_st);
+      tid, p_last, p_lb, p_ub, p_st);
 }
 
-EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t tid,
+EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t tid,
                                    int32_t *p_last, uint64_t *p_lb,
                                    uint64_t *p_ub, int64_t *p_st) {
   PRINT0(LD_IO, "call kmpc_dispatch_next_8u\n");
   return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next(
-      p_last, p_lb, p_ub, p_st);
+      tid, p_last, p_lb, p_ub, p_st);
 }
 
 // fini
-EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_dispatch_fini_4\n");
   omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini();
 }
 
-EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_dispatch_fini_4u\n");
   omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini();
 }
 
-EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_dispatch_fini_8\n");
   omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini();
 }
 
-EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_dispatch_fini_8u\n");
   omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini();
 }
@@ -626,151 +591,143 @@
 // KMP interface implementation (static loops)
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
                                      int32_t schedtype, int32_t *plastiter,
                                      int32_t *plower, int32_t *pupper,
                                      int32_t *pstride, int32_t incr,
                                      int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
   omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
-      isRuntimeUninitialized());
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      checkSPMDMode(loc), checkRuntimeUninitialized(loc));
 }
 
-EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
                                       int32_t schedtype, int32_t *plastiter,
                                       uint32_t *plower, uint32_t *pupper,
                                       int32_t *pstride, int32_t incr,
                                       int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
   omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
-      isRuntimeUninitialized());
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      checkSPMDMode(loc), checkRuntimeUninitialized(loc));
 }
 
-EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
                                      int32_t schedtype, int32_t *plastiter,
                                      int64_t *plower, int64_t *pupper,
                                      int64_t *pstride, int64_t incr,
                                      int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
   omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
-      isRuntimeUninitialized());
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      checkSPMDMode(loc), checkRuntimeUninitialized(loc));
 }
 
-EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
                                       int32_t schedtype, int32_t *plastiter,
                                       uint64_t *plower, uint64_t *pupper,
                                       int64_t *pstride, int64_t incr,
                                       int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
   omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
-      isRuntimeUninitialized());
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      checkSPMDMode(loc), checkRuntimeUninitialized(loc));
 }
 
 EXTERN
-void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                           int32_t schedtype, int32_t *plastiter,
                                           int32_t *plower, int32_t *pupper,
                                           int32_t *pstride, int32_t incr,
                                           int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_spmd\n");
   omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/true,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
-void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                            int32_t schedtype,
                                            int32_t *plastiter, uint32_t *plower,
                                            uint32_t *pupper, int32_t *pstride,
                                            int32_t incr, int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_spmd\n");
   omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/true,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
-void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                           int32_t schedtype, int32_t *plastiter,
                                           int64_t *plower, int64_t *pupper,
                                           int64_t *pstride, int64_t incr,
                                           int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_spmd\n");
   omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/true,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
-void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
                                            int32_t schedtype,
                                            int32_t *plastiter, uint64_t *plower,
                                            uint64_t *pupper, int64_t *pstride,
                                            int64_t incr, int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_spmd\n");
   omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/true,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
 void __kmpc_for_static_init_4_simple_generic(
-    kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+    kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
     int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr,
     int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_generic\n");
   omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/false,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
 void __kmpc_for_static_init_4u_simple_generic(
-    kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+    kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
     uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
     int32_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_generic\n");
   omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/false,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
 void __kmpc_for_static_init_8_simple_generic(
-    kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+    kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
     int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr,
     int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_generic\n");
   omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/false,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
 }
 
 EXTERN
 void __kmpc_for_static_init_8u_simple_generic(
-    kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+    kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
     uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
     int64_t chunk) {
   PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_generic\n");
   omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
-      schedtype, plastiter, plower, pupper, pstride, chunk,
-      /*IsSPMDExecutionMode=*/false,
-      /*IsRuntimeUninitialized=*/true);
+      global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+      /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
 }
 
-EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid) {
   PRINT0(LD_IO, "call kmpc_for_static_fini\n");
 }
 
@@ -792,21 +749,20 @@
 }
 }; // namespace
 
-EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc, int32_t gtid,
+EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid,
                                                   int32_t varNum, void *array) {
   PRINT0(LD_IO, "call to __kmpc_reduce_conditional_lastprivate(...)\n");
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
           "Expected non-SPMD mode + initialized runtime.");
 
   omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
-  int tid = GetOmpThreadId(GetLogicalThreadIdInBlock(), isSPMDMode(),
-                           isRuntimeUninitialized());
-  uint32_t NumThreads = GetNumberOfOmpThreads(
-      GetLogicalThreadIdInBlock(), isSPMDMode(), isRuntimeUninitialized());
+  int tid = GetLogicalThreadIdInBlock();
+  uint32_t NumThreads = GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+                                              checkRuntimeUninitialized(loc));
   uint64_t *Buffer = teamDescr.getLastprivateIterBuffer();
   for (unsigned i = 0; i < varNum; i++) {
     // Reset buffer.
-    if (tid == 0)
+    if (gtid == 0)
       *Buffer = 0; // Reset to minimum loop iteration value.
 
     // Barrier.
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index 5d95eb1..b0b1290 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -150,7 +150,7 @@
   PRINT(LD_PAR,
         "thread will execute parallel region with id %d in a team of "
         "%d threads\n",
-        newTaskDescr->ThreadId(), newTaskDescr->ThreadsInTeam());
+        (int)newTaskDescr->ThreadId(), (int)newTaskDescr->ThreadsInTeam());
 
   if (RequiresDataSharing && threadId % WARPSIZE == 0) {
     // Warp master innitializes data sharing environment.
@@ -162,12 +162,16 @@
   }
 }
 
-EXTERN void __kmpc_spmd_kernel_deinit() {
+EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit() {
+  __kmpc_spmd_kernel_deinit_v2(isRuntimeInitialized());
+}
+
+EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
   // We're not going to pop the task descr stack of each thread since
   // there are no more parallel regions in SPMD mode.
   __syncthreads();
   int threadId = GetThreadIdInBlock();
-  if (isRuntimeUninitialized()) {
+  if (!RequiresOMPRuntime) {
     if (threadId == 0) {
       // Enqueue omp state object for use by another team.
       int slot = usedSlotIdx;
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 2a6de28..b63feae 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -251,7 +251,6 @@
   INLINE omptarget_nvptx_WorkDescr &WorkDescr() {
     return workDescrForActiveParallel;
   }
-  INLINE omp_lock_t *CriticalLock() { return &criticalLock; }
   INLINE uint64_t *getLastprivateIterBuffer() { return &lastprivateIterBuffer; }
 
   // init
@@ -303,7 +302,6 @@
       levelZeroTaskDescr; // icv for team master initial thread
   omptarget_nvptx_WorkDescr
       workDescrForActiveParallel; // one, ONLY for the active par
-  omp_lock_t criticalLock;
   uint64_t lastprivateIterBuffer;
 
   __align__(16)
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 13e64e4..fbcbeab 100644
--- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -76,7 +76,7 @@
   else
     *NumLanes = ConvergentSize;
   ASSERT(LT_FUSSY, *NumLanes > 0, "bad thread request of %d threads",
-         *NumLanes);
+         (int)*NumLanes);
 
   // Set to true for lanes participating in the simd region.
   bool isActive = false;
@@ -152,7 +152,7 @@
   else
     NumThreads = ConvergentSize;
   ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
-         NumThreads);
+         (int)NumThreads);
 
   // Set to true for workers participating in the parallel region.
   bool isActive = false;
@@ -260,7 +260,7 @@
   }
 
   ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
-         NumThreads);
+         (int)NumThreads);
   ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
           "only team master can create parallel");
 
@@ -307,7 +307,7 @@
     PRINT(LD_PAR,
           "thread will execute parallel region with id %d in a team of "
           "%d threads\n",
-          newTaskDescr->ThreadId(), newTaskDescr->NThreads());
+          (int)newTaskDescr->ThreadId(), (int)newTaskDescr->NThreads());
 
     isActive = true;
   }
@@ -332,11 +332,11 @@
 // support for parallel that goes sequential
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {
   PRINT0(LD_IO, "call to __kmpc_serialized_parallel\n");
 
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, isSPMDMode(),
+  if (checkRuntimeUninitialized(loc)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
     omptarget_nvptx_simpleThreadPrivateContext->IncParLevel();
     return;
@@ -370,12 +370,12 @@
                                                              newTaskDescr);
 }
 
-EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
+EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
                                            uint32_t global_tid) {
   PRINT0(LD_IO, "call to __kmpc_end_serialized_parallel\n");
 
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, isSPMDMode(),
+  if (checkRuntimeUninitialized(loc)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
     omptarget_nvptx_simpleThreadPrivateContext->DecParLevel();
     return;
@@ -393,11 +393,11 @@
   currTaskDescr->RestoreLoopData();
 }
 
-EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) {
   PRINT0(LD_IO, "call to __kmpc_parallel_level\n");
 
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, isSPMDMode(),
+  if (checkRuntimeUninitialized(loc)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
     return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
   }
@@ -417,27 +417,29 @@
 // cached by the compiler and used when calling the runtime. On nvptx
 // it's cheap to recalculate this value so we never use the result
 // of this call.
-EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc) {
-  return GetLogicalThreadIdInBlock();
+EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
+  int tid = GetLogicalThreadIdInBlock();
+  return GetOmpThreadId(tid, checkSPMDMode(loc),
+                        checkRuntimeUninitialized(loc));
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // push params
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
                                     int32_t num_threads) {
   PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads);
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
   tid = GetLogicalThreadIdInBlock();
   omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) =
       num_threads;
 }
 
-EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t tid,
                                    int32_t simd_limit) {
-  PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", simd_limit);
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+  PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", (int)simd_limit);
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
   tid = GetLogicalThreadIdInBlock();
   omptarget_nvptx_threadPrivateContext->SimdLimitForNextSimd(tid) = simd_limit;
 }
@@ -445,14 +447,14 @@
 // Do nothing. The host guarantees we started the requested number of
 // teams and we only need inspection of gridDim.
 
-EXTERN void __kmpc_push_num_teams(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid,
                                   int32_t num_teams, int32_t thread_limit) {
-  PRINT(LD_IO, "call kmpc_push_num_teams %d\n", num_teams);
+  PRINT(LD_IO, "call kmpc_push_num_teams %d\n", (int)num_teams);
   ASSERT0(LT_FUSSY, FALSE,
           "should never have anything with new teams on device");
 }
 
-EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t tid,
+EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t tid,
                                   int proc_bind) {
-  PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", proc_bind);
+  PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", (int)proc_bind);
 }
diff --git a/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index 21a419c..c0d22df 100644
--- a/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -31,7 +31,7 @@
 }
 
 EXTERN
-int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
+int32_t __kmpc_reduce_gpu(kmp_Ident *loc, int32_t global_tid, int32_t num_vars,
                           size_t reduce_size, void *reduce_data,
                           void *reduce_array_size, kmp_ReductFctPtr *reductFct,
                           kmp_CriticalName *lck) {
@@ -40,7 +40,8 @@
   int numthread;
   if (currTaskDescr->IsParallelConstruct()) {
     numthread =
-        GetNumberOfOmpThreads(threadId, isSPMDMode(), isRuntimeUninitialized());
+        GetNumberOfOmpThreads(threadId, checkSPMDMode(loc),
+                              checkRuntimeUninitialized(loc));
   } else {
     numthread = GetNumberOfOmpTeams();
   }
@@ -55,12 +56,12 @@
 }
 
 EXTERN
-int32_t __kmpc_reduce_combined(kmp_Indent *loc) {
+int32_t __kmpc_reduce_combined(kmp_Ident *loc) {
   return threadIdx.x == 0 ? 2 : 0;
 }
 
 EXTERN
-int32_t __kmpc_reduce_simd(kmp_Indent *loc) {
+int32_t __kmpc_reduce_simd(kmp_Ident *loc) {
   return (threadIdx.x % 32 == 0) ? 1 : 0;
 }
 
@@ -75,12 +76,12 @@
 }
 
 EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
-  int lo, hi;
-  asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
-  hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
-  lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
-  asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
-  return val;
+   int lo, hi;
+   asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
+   hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
+   lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
+   asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
+   return val;
 }
 
 static INLINE void gpu_regular_warp_reduce(void *reduce_data,
@@ -231,8 +232,7 @@
 
   // Get the OMP thread Id. This is different from BlockThreadId in the case of
   // an L2 parallel region.
-  return GetOmpThreadId(BlockThreadId, isSPMDExecutionMode,
-                        isRuntimeUninitialized) == 0;
+  return global_tid == 0;
 #endif // __CUDA_ARCH__ >= 700
 }
 
@@ -429,3 +429,22 @@
                                    /*isSPMDExecutionMode=*/false,
                                    /*isRuntimeUninitialized=*/true);
 }
+
+EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
+                                                       int32_t global_tid,
+                                                       kmp_CriticalName *crit) {
+  if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0)
+    return 0;
+  // The master thread of the team actually does the reduction.
+  while (atomicCAS((uint32_t *)crit, 0, 1))
+    ;
+  return 1;
+}
+
+EXTERN void
+__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
+                                            kmp_CriticalName *crit) {
+  __threadfence_system();
+  (void)atomicExch((uint32_t *)crit, 0);
+}
+
diff --git a/libomptarget/deviceRTLs/nvptx/src/supporti.h b/libomptarget/deviceRTLs/nvptx/src/supporti.h
index c93657e..e2ea2d1 100644
--- a/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -33,6 +33,59 @@
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE bool checkSPMDMode(kmp_Ident *loc) {
+  if (!loc)
+    return isSPMDMode();
+
+  // If SPMD is true then we are not in the UNDEFINED state so
+  // we can return immediately.
+  if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+    return true;
+
+  // If not in SPMD mode and runtime required is a valid
+  // combination of flags so we can return immediately.
+  if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+    return false;
+
+  // We are in underfined state.
+  return isSPMDMode();
+}
+
+INLINE bool checkGenericMode(kmp_Ident *loc) {
+  return !checkSPMDMode(loc);
+}
+
+INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
+  if (!loc)
+    return isRuntimeUninitialized();
+
+  // If runtime is required then we know we can't be
+  // in the undefined mode. We can return immediately.
+  if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+    return false;
+
+  // If runtime is required then we need to check is in
+  // SPMD mode or not. If not in SPMD mode then we end
+  // up in the UNDEFINED state that marks the orphaned
+  // functions.
+  if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+    return true;
+
+  // Check if we are in an UNDEFINED state. Undefined is denoted by
+  // non-SPMD + noRuntimeRequired which is a combination that
+  // cannot actually happen. Undefined states is used to mark orphaned
+  // functions.
+  return isRuntimeUninitialized();
+}
+
+INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
+  return !checkRuntimeUninitialized(loc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
 // support: get info from machine
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -78,8 +131,6 @@
 // id is GetMasterThreadID()) calls this routine, we return 0 because
 // it is a shadow for the first worker.
 INLINE int GetLogicalThreadIdInBlock() {
-  //  return GetThreadIdInBlock() % GetMasterThreadID();
-
   // Implemented using control flow (predication) instead of with a modulo
   // operation.
   int tid = GetThreadIdInBlock();
@@ -180,19 +231,20 @@
 {
   // compute the necessary padding to satisfy alignment constraint
   ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
-         "alignment %ld is not a power of 2\n", alignment);
+         "alignment %lu is not a power of 2\n", alignment);
   return (~(unsigned long)size + 1) & (alignment - 1);
 }
 
 INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
 {
   void *ptr = malloc(size);
-  PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg, P64(ptr));
+  PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg,
+        (unsigned long long)ptr);
   return ptr;
 }
 
 INLINE void *SafeFree(void *ptr, const char *msg) {
-  PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", P64(ptr), msg);
+  PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
   free(ptr);
   return NULL;
 }
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 0a99405..7cdb7ff 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -17,11 +17,11 @@
 // KMP Ordered calls
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_ordered\n");
 }
 
-EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_end_ordered\n");
 }
 
@@ -33,16 +33,16 @@
 // FIXME: what if not all threads (warps) participate to the barrier?
 // We may need to implement it differently
 
-EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
   PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
   __kmpc_barrier(loc_ref, tid);
   PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
   return 0;
 }
 
-EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, isSPMDMode(),
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
+  if (checkRuntimeUninitialized(loc_ref)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
             "Expected SPMD mode with uninitialized runtime.");
     __kmpc_barrier_simple_spmd(loc_ref, tid);
   } else {
@@ -50,9 +50,9 @@
     omptarget_nvptx_TaskDescr *currTaskDescr =
         omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
     int numberOfActiveOMPThreads = GetNumberOfOmpThreads(
-        tid, isSPMDMode(), /*isRuntimeUninitialized=*/false);
+        tid, checkSPMDMode(loc_ref), /*isRuntimeUninitialized=*/false);
     if (numberOfActiveOMPThreads > 1) {
-      if (isSPMDMode()) {
+      if (checkSPMDMode(loc_ref)) {
         __kmpc_barrier_simple_spmd(loc_ref, tid);
       } else {
         // The #threads parameter must be rounded up to the WARPSIZE.
@@ -61,7 +61,7 @@
 
         PRINT(LD_SYNC,
               "call kmpc_barrier with %d omp threads, sync parameter %d\n",
-              numberOfActiveOMPThreads, threads);
+              (int)numberOfActiveOMPThreads, (int)threads);
         // Barrier #1 is for synchronization among active threads.
         named_sync(L1_BARRIER, threads);
       }
@@ -72,7 +72,7 @@
 
 // Emit a simple barrier call in SPMD mode.  Assumes the caller is in an L0
 // parallel region and that all worker threads participate.
-EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
   PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
   __syncthreads();
   PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
@@ -80,7 +80,7 @@
 
 // Emit a simple barrier call in Generic mode.  Assumes the caller is in an L0
 // parallel region and that all worker threads participate.
-EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
   int numberOfActiveOMPThreads = GetNumberOfThreadsInBlock() - WARPSIZE;
   // The #threads parameter must be rounded up to the WARPSIZE.
   int threads =
@@ -89,7 +89,7 @@
   PRINT(LD_SYNC,
         "call kmpc_barrier_simple_generic with %d omp threads, sync parameter "
         "%d\n",
-        numberOfActiveOMPThreads, threads);
+        (int)numberOfActiveOMPThreads, (int)threads);
   // Barrier #1 is for synchronization among active threads.
   named_sync(L1_BARRIER, threads);
   PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
@@ -99,37 +99,30 @@
 // KMP MASTER
 ////////////////////////////////////////////////////////////////////////////////
 
-INLINE int32_t IsMaster() {
-  // only the team master updates the state
-  int tid = GetLogicalThreadIdInBlock();
-  int ompThreadId = GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized());
-  return IsTeamMaster(ompThreadId);
-}
-
-EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid) {
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
   PRINT0(LD_IO, "call kmpc_master\n");
-  return IsMaster();
+  return IsTeamMaster(global_tid);
 }
 
-EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
   PRINT0(LD_IO, "call kmpc_end_master\n");
-  ASSERT0(LT_FUSSY, IsMaster(), "expected only master here");
+  ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // KMP SINGLE
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid) {
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
   PRINT0(LD_IO, "call kmpc_single\n");
   // decide to implement single with master; master get the single
-  return IsMaster();
+  return IsTeamMaster(global_tid);
 }
 
-EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
   PRINT0(LD_IO, "call kmpc_end_single\n");
   // decide to implement single with master: master get the single
-  ASSERT0(LT_FUSSY, IsMaster(), "expected only master here");
+  ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
   // sync barrier is explicitely called... so that is not a problem
 }
 
@@ -137,9 +130,9 @@
 // Flush
 ////////////////////////////////////////////////////////////////////////////////
 
-EXTERN void __kmpc_flush(kmp_Indent *loc) {
+EXTERN void __kmpc_flush(kmp_Ident *loc) {
   PRINT0(LD_IO, "call kmpc_flush\n");
-  __threadfence_block();
+  __threadfence_system();
 }
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/libomptarget/deviceRTLs/nvptx/src/task.cu b/libomptarget/deviceRTLs/nvptx/src/task.cu
index f0431ab..2f47d4b 100644
--- a/libomptarget/deviceRTLs/nvptx/src/task.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/task.cu
@@ -31,7 +31,7 @@
 #include "omptarget-nvptx.h"
 
 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
-    kmp_Indent *loc,     // unused
+    kmp_Ident *loc,     // unused
     uint32_t global_tid, // unused
     int32_t flag, // unused (because in our impl, all are immediately exec
     size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
@@ -39,14 +39,15 @@
   PRINT(LD_IO,
         "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
         "fct 0x%llx)\n",
-        P64(sizeOfTaskInclPrivate), P64(sizeOfSharedTable), P64(taskSub));
+        (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
+        (unsigned long long)taskSub);
   // want task+priv to be a multiple of 8 bytes
   size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
   sizeOfTaskInclPrivate += padForTaskInclPriv;
   size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
   ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
          "need task descr of size %d to be a multiple of %d\n",
-         sizeof(omptarget_nvptx_TaskDescr), sizeof(void *));
+         (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
   size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
       (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
@@ -63,25 +64,27 @@
   newKmpTaskDescr->sub = taskSub;
   newKmpTaskDescr->destructors = NULL;
   PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
-        P64(newKmpTaskDescr), P64(newExplicitTaskDescr));
+        (unsigned long long)newKmpTaskDescr,
+        (unsigned long long)newExplicitTaskDescr);
 
   return newKmpTaskDescr;
 }
 
-EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
                                kmp_TaskDescr *newKmpTaskDescr) {
   return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
                                    0);
 }
 
-EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
                                          kmp_TaskDescr *newKmpTaskDescr,
                                          int32_t depNum, void *depList,
                                          int32_t noAliasDepNum,
                                          void *noAliasDepList) {
   PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
         P64(newKmpTaskDescr));
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
+          "Runtime must be initialized.");
   // 1. get explict task descr from kmp task descr
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -101,10 +104,11 @@
 
   // 3. call sub
   PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
-        P64(newKmpTaskDescr->sub), P64(newKmpTaskDescr));
+        (unsigned long long)newKmpTaskDescr->sub,
+        (unsigned long long)newKmpTaskDescr);
   newKmpTaskDescr->sub(0, newKmpTaskDescr);
   PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
-        P64(newKmpTaskDescr->sub));
+        (unsigned long long)newKmpTaskDescr->sub);
 
   // 4. pop context
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
@@ -114,11 +118,12 @@
   return 0;
 }
 
-EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
                                       kmp_TaskDescr *newKmpTaskDescr) {
   PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
-        P64(newKmpTaskDescr));
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+        (unsigned long long)newKmpTaskDescr);
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
+          "Runtime must be initialized.");
   // 1. get explict task descr from kmp task descr
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -139,11 +144,12 @@
   // 4 & 5 ... done in complete
 }
 
-EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
                                          kmp_TaskDescr *newKmpTaskDescr) {
   PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
-        P64(newKmpTaskDescr));
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+        (unsigned long long)newKmpTaskDescr);
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
+          "Runtime must be initialized.");
   // 1. get explict task descr from kmp task descr
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
       (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -164,37 +170,37 @@
   SafeFree(newExplicitTaskDescr, "explicit task descriptor");
 }
 
-EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
                                  int32_t depNum, void *depList,
                                  int32_t noAliasDepNum, void *noAliasDepList) {
   PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
   // nothing to do as all our tasks are executed as final
 }
 
-EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
   PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
   // nothing to do as all our tasks are executed as final
 }
 
-EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
   PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
   // nothing to do as all our tasks are executed as final
 }
 
-EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
                                     int end_part) {
   PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
   // do nothing: tasks are executed immediately, no yielding allowed
   return 0;
 }
 
-EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
   PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
   // nothing to do as all our tasks are executed as final
   return 0;
 }
 
-EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
                             kmp_TaskDescr *newKmpTaskDescr, int if_val,
                             uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
                             int32_t sched, uint64_t grainsize, void *task_dup) {
diff --git a/libomptarget/src/omptarget.cpp b/libomptarget/src/omptarget.cpp
index a1ffd04..a23d82b 100644
--- a/libomptarget/src/omptarget.cpp
+++ b/libomptarget/src/omptarget.cpp
@@ -638,19 +638,20 @@
         assert(tgtIdx != -1 && "Base address must be translated already.");
         // The parent lambda must be processed already and it must be the last
         // in tgt_args and tgt_offsets arrays.
-        void *HstPtrBegin = args[i];
-        void *HstPtrBase = args_base[i];
+        void *HstPtrVal = args[i];
+        void *HstPtrBegin = args_base[i];
+        void *HstPtrBase = args[idx];
         bool IsLast; // unused.
         void *TgtPtrBase =
             (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
         DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
         uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
         void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
-        void *Pointer_TgtPtrBegin = Device.getTgtPtrBegin(
-            *(void **)HstPtrBegin, arg_sizes[i], IsLast, false);
+        void *Pointer_TgtPtrBegin =
+            Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false);
         if (!Pointer_TgtPtrBegin) {
           DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
-             DPxPTR(*(void **)HstPtrBegin));
+             DPxPTR(HstPtrVal));
           continue;
         }
         DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
diff --git a/runtime/cmake/LibompHandleFlags.cmake b/runtime/cmake/LibompHandleFlags.cmake
index efe2099..0b829a5 100644
--- a/runtime/cmake/LibompHandleFlags.cmake
+++ b/runtime/cmake/LibompHandleFlags.cmake
@@ -50,6 +50,7 @@
   libomp_append(flags_local /GS LIBOMP_HAVE_GS_FLAG)
   libomp_append(flags_local /EHsc LIBOMP_HAVE_EHSC_FLAG)
   libomp_append(flags_local /Oy- LIBOMP_HAVE_OY__FLAG)
+  libomp_append(flags_local -mrtm LIBOMP_HAVE_MRTM_FLAG)
   # Intel(R) C Compiler flags
   libomp_append(flags_local /Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG)
   libomp_append(flags_local -Qoption,cpp,--extended_float_types LIBOMP_HAVE_EXTENDED_FLOAT_TYPES_FLAG)
@@ -158,6 +159,11 @@
   if(${IA32})
     libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY)
   endif()
+  IF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly")
+    libomp_append(libflags_local "-Wl,--no-as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG)
+    libomp_append(libflags_local "-lm")
+    libomp_append(libflags_local "-Wl,--as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG)
+  ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly")
   IF(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD")
     libomp_append(libflags_local -lm)
   ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD")
diff --git a/runtime/cmake/LibompMicroTests.cmake b/runtime/cmake/LibompMicroTests.cmake
index 0918fdd..bdecf7f 100644
--- a/runtime/cmake/LibompMicroTests.cmake
+++ b/runtime/cmake/LibompMicroTests.cmake
@@ -176,6 +176,9 @@
 elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
   set(libomp_expected_library_deps libc.so.12 libpthread.so.1 libm.so.0)
   libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
+elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly")
+  set(libomp_expected_library_deps libc.so.8 libpthread.so.0 libm.so.4)
+  libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
 elseif(APPLE)
   set(libomp_expected_library_deps /usr/lib/libSystem.B.dylib)
 elseif(WIN32)
diff --git a/runtime/cmake/config-ix.cmake b/runtime/cmake/config-ix.cmake
index 5415e57..019c83c 100644
--- a/runtime/cmake/config-ix.cmake
+++ b/runtime/cmake/config-ix.cmake
@@ -73,13 +73,16 @@
 libomp_check_architecture_flag(-mmic LIBOMP_HAVE_MMIC_FLAG)
 libomp_check_architecture_flag(-m32 LIBOMP_HAVE_M32_FLAG)
 if(WIN32)
-  # Check Windows MSVC style flags.
-  check_c_compiler_flag(/TP LIBOMP_HAVE_TP_FLAG)
-  check_cxx_compiler_flag(/EHsc LIBOMP_HAVE_EHSC_FLAG)
-  check_cxx_compiler_flag(/GS LIBOMP_HAVE_GS_FLAG)
-  check_cxx_compiler_flag(/Oy- LIBOMP_HAVE_Oy__FLAG)
-  check_cxx_compiler_flag(/arch:SSE2 LIBOMP_HAVE_ARCH_SSE2_FLAG)
-  check_cxx_compiler_flag(/Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG)
+  if(MSVC)
+    # Check Windows MSVC style flags.
+    check_c_compiler_flag(/TP LIBOMP_HAVE_TP_FLAG)
+    check_cxx_compiler_flag(/EHsc LIBOMP_HAVE_EHSC_FLAG)
+    check_cxx_compiler_flag(/GS LIBOMP_HAVE_GS_FLAG)
+    check_cxx_compiler_flag(/Oy- LIBOMP_HAVE_Oy__FLAG)
+    check_cxx_compiler_flag(/arch:SSE2 LIBOMP_HAVE_ARCH_SSE2_FLAG)
+    check_cxx_compiler_flag(/Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG)
+  endif()
+  check_c_compiler_flag(-mrtm LIBOMP_HAVE_MRTM_FLAG)
   # It is difficult to create a dummy masm assembly file
   # and then check the MASM assembler to see if these flags exist and work,
   # so we assume they do for Windows.
diff --git a/runtime/src/dllexports b/runtime/src/dllexports
index 1108930..963ac61 100644
--- a/runtime/src/dllexports
+++ b/runtime/src/dllexports
@@ -405,6 +405,7 @@
         __kmpc_task_reduction_get_th_data   269
 # USED FOR 4.5 __kmpc_critical_with_hint    270
         __kmpc_get_target_offload           271
+	__kmpc_omp_reg_task_with_affinity   272
     %endif
 %endif
 
@@ -546,6 +547,14 @@
     omp_get_default_allocator               893
     omp_alloc                               894
     omp_free                                895
+    omp_set_affinity_format                 748
+    omp_get_affinity_format                 749
+    omp_display_affinity                    750
+    omp_capture_affinity                    751
+    ompc_set_affinity_format                752
+    ompc_get_affinity_format                753
+    ompc_display_affinity                   754
+    ompc_capture_affinity                   755
 
     OMP_NULL_ALLOCATOR                     DATA
     omp_default_mem_alloc                  DATA
diff --git a/runtime/src/i18n/en_US.txt b/runtime/src/i18n/en_US.txt
index 067cb94..3e5283e 100644
--- a/runtime/src/i18n/en_US.txt
+++ b/runtime/src/i18n/en_US.txt
@@ -425,6 +425,7 @@
 AffHWSubsetManyTiles         "KMP_HW_SUBSET ignored: too many L2 Caches requested."
 AffHWSubsetManyProcs         "KMP_HW_SUBSET ignored: too many Procs requested."
 HierSchedInvalid             "Hierarchy ignored: unsupported level: %1$s."
+AffFormatDefault             "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}"
 
 
 # --------------------------------------------------------------------------------------------------
diff --git a/runtime/src/include/50/omp.h.var b/runtime/src/include/50/omp.h.var
index 7a626bd..81b6c85 100644
--- a/runtime/src/include/50/omp.h.var
+++ b/runtime/src/include/50/omp.h.var
@@ -25,6 +25,11 @@
     extern "C" {
 #   endif
 
+#   define omp_set_affinity_format   ompc_set_affinity_format
+#   define omp_get_affinity_format   ompc_get_affinity_format
+#   define omp_display_affinity      ompc_display_affinity
+#   define omp_capture_affinity      ompc_capture_affinity
+
 #   if defined(_WIN32)
 #       define __KAI_KMPC_CONVENTION __cdecl
 #       ifndef __KMP_IMP
@@ -235,6 +240,12 @@
     extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator);
 #endif
 
+    /* OpenMP 5.0 Affinity Format */
+    extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);
+    extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);
+    extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *);
+    extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *);
+
 #   undef __KAI_KMPC_CONVENTION
 #   undef __KMP_IMP
 
diff --git a/runtime/src/include/50/omp_lib.f.var b/runtime/src/include/50/omp_lib.f.var
index 8a02b62..d5a8057 100644
--- a/runtime/src/include/50/omp_lib.f.var
+++ b/runtime/src/include/50/omp_lib.f.var
@@ -375,6 +375,27 @@
             integer (kind=omp_allocator_kind) omp_get_default_allocator
           end function omp_get_default_allocator
 
+          subroutine omp_set_affinity_format(format)
+            character (len=*) format
+          end subroutine omp_set_affinity_format
+
+          function omp_get_affinity_format(buffer)
+            use omp_lib_kinds
+            character (len=*) buffer
+            integer (kind=kmp_size_t_kind) omp_get_affinity_format
+          end function omp_get_affinity_format
+
+          subroutine omp_display_affinity(format)
+            character (len=*) format
+          end subroutine omp_display_affinity
+
+          function omp_capture_affinity(buffer, format)
+            use omp_lib_kinds
+            character (len=*) format
+            character (len=*) buffer
+            integer (kind=kmp_size_t_kind) omp_capture_affinity
+          end function omp_capture_affinity
+
 !         ***
 !         *** kmp_* entry points
 !         ***
@@ -594,6 +615,10 @@
 !dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
 !dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority
 !dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool
+!dec$ attributes alias:'OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format
+!dec$ attributes alias:'OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format
+!dec$ attributes alias:'OMP_DISPLAY_AFFINITY' :: omp_display_affinity
+!dec$ attributes alias:'OMP_CAPTURE_AFFINITY' :: omp_capture_affinity
 
 !dec$ attributes alias:'omp_init_lock' :: omp_init_lock
 !dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint
@@ -675,6 +700,10 @@
 !dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
 !dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority
 !dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool
+!dec$ attributes alias:'_OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format
+!dec$ attributes alias:'_OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format
+!dec$ attributes alias:'_OMP_DISPLAY_AFFINITY' :: omp_display_affinity
+!dec$ attributes alias:'_OMP_CAPTURE_AFFINITY' :: omp_capture_affinity
 
 !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
 !dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint
@@ -758,6 +787,10 @@
 !dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation
 !dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device
 !dec$ attributes alias:'omp_get_max_task_priority_'::omp_get_max_task_priority
+!dec$ attributes alias:'omp_set_affinity_format_' :: omp_set_affinity_format
+!dec$ attributes alias:'omp_get_affinity_format_' :: omp_get_affinity_format
+!dec$ attributes alias:'omp_display_affinity_' :: omp_display_affinity
+!dec$ attributes alias:'omp_capture_affinity_' :: omp_capture_affinity
 
 !dec$ attributes alias:'omp_init_lock_'::omp_init_lock
 !dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint
@@ -852,6 +885,10 @@
 !dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
 !dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
 !dec$ attributes alias:'_omp_control_tool_'::omp_control_tool
+!dec$ attributes alias:'_omp_set_affinity_format_' :: omp_set_affinity_format
+!dec$ attributes alias:'_omp_get_affinity_format_' :: omp_get_affinity_format
+!dec$ attributes alias:'_omp_display_affinity_' :: omp_display_affinity
+!dec$ attributes alias:'_omp_capture_affinity_' :: omp_capture_affinity
 
 !dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
 !dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
diff --git a/runtime/src/include/50/omp_lib.f90.var b/runtime/src/include/50/omp_lib.f90.var
index 624774d..afc6d67 100644
--- a/runtime/src/include/50/omp_lib.f90.var
+++ b/runtime/src/include/50/omp_lib.f90.var
@@ -391,6 +391,27 @@
             integer (kind=omp_allocator_kind) omp_get_default_allocator
           end function omp_get_default_allocator
 
+          subroutine omp_set_affinity_format(format)
+            character (len=*) :: format
+          end subroutine omp_set_affinity_format
+
+          function omp_get_affinity_format(buffer)
+            use omp_lib_kinds
+            character (len=*) :: buffer
+            integer (kind=kmp_size_t_kind) :: omp_get_affinity_format
+          end function omp_get_affinity_format
+
+          subroutine omp_display_affinity(format)
+            character (len=*) :: format
+          end subroutine omp_display_affinity
+
+          function omp_capture_affinity(buffer, format)
+            use omp_lib_kinds
+            character (len=*) :: format
+            character (len=*) :: buffer
+            integer (kind=kmp_size_t_kind) :: omp_capture_affinity
+          end function omp_capture_affinity
+
 !         ***
 !         *** kmp_* entry points
 !         ***
diff --git a/runtime/src/include/50/omp_lib.h.var b/runtime/src/include/50/omp_lib.h.var
index 0e4c2c6..11dbc0a 100644
--- a/runtime/src/include/50/omp_lib.h.var
+++ b/runtime/src/include/50/omp_lib.h.var
@@ -424,6 +424,27 @@
           integer (kind=omp_allocator_kind) omp_get_default_allocator
         end function omp_get_default_allocator
 
+        subroutine omp_set_affinity_format(format)
+          character (len=*) :: format
+        end subroutine omp_set_affinity_format
+
+        function omp_get_affinity_format(buffer)
+          import
+          character (len=*) :: buffer
+          integer (kind=kmp_size_t_kind) :: omp_get_affinity_format
+        end function omp_get_affinity_format
+
+        subroutine omp_display_affinity(format)
+          character (len=*) :: format
+        end subroutine omp_display_affinity
+
+        function omp_capture_affinity(buffer, format)
+          import
+          character (len=*) :: format
+          character (len=*) :: buffer
+          integer (kind=kmp_size_t_kind) :: omp_capture_affinity
+        end function omp_capture_affinity
+
 !       ***
 !       *** kmp_* entry points
 !       ***
@@ -637,6 +658,10 @@
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_task_priority
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_affinity_format
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_affinity_format
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_display_affinity
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_capture_affinity
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s
 !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime
@@ -710,6 +735,10 @@
 !$omp declare target(omp_unset_nest_lock )
 !$omp declare target(omp_test_nest_lock )
 !$omp declare target(omp_get_max_task_priority )
+!$omp declare target(omp_set_affinity_format )
+!$omp declare target(omp_get_affinity_format )
+!$omp declare target(omp_display_affinity )
+!$omp declare target(omp_capture_affinity )
 !$omp declare target(kmp_set_stacksize )
 !$omp declare target(kmp_set_stacksize_s )
 !$omp declare target(kmp_set_blocktime )
diff --git a/runtime/src/include/50/ompt.h.var b/runtime/src/include/50/ompt.h.var
index 24fc90b..478c6cc 100644
--- a/runtime/src/include/50/ompt.h.var
+++ b/runtime/src/include/50/ompt.h.var
@@ -53,50 +53,50 @@
     macro(ompt_get_target_info)             \
     macro(ompt_get_num_devices)
 
-#define FOREACH_OMP_STATE(macro)                                                                \
+#define FOREACH_OMPT_STATE(macro)                                                                \
                                                                                                 \
     /* first available state */                                                                 \
-    macro (omp_state_undefined, 0x102)      /* undefined thread state */                        \
+    macro (ompt_state_undefined, 0x102)      /* undefined thread state */                        \
                                                                                                 \
     /* work states (0..15) */                                                                   \
-    macro (omp_state_work_serial, 0x000)    /* working outside parallel */                      \
-    macro (omp_state_work_parallel, 0x001)  /* working within parallel */                       \
-    macro (omp_state_work_reduction, 0x002) /* performing a reduction */                        \
+    macro (ompt_state_work_serial, 0x000)    /* working outside parallel */                      \
+    macro (ompt_state_work_parallel, 0x001)  /* working within parallel */                       \
+    macro (ompt_state_work_reduction, 0x002) /* performing a reduction */                        \
                                                                                                 \
     /* barrier wait states (16..31) */                                                          \
-    macro (omp_state_wait_barrier, 0x010)   /* waiting at a barrier */                          \
-    macro (omp_state_wait_barrier_implicit_parallel, 0x011)                                     \
+    macro (ompt_state_wait_barrier, 0x010)   /* waiting at a barrier */                          \
+    macro (ompt_state_wait_barrier_implicit_parallel, 0x011)                                     \
                                             /* implicit barrier at the end of parallel region */\
-    macro (omp_state_wait_barrier_implicit_workshare, 0x012)                                    \
+    macro (ompt_state_wait_barrier_implicit_workshare, 0x012)                                    \
                                             /* implicit barrier at the end of worksharing */    \
-    macro (omp_state_wait_barrier_implicit, 0x013)  /* implicit barrier */                      \
-    macro (omp_state_wait_barrier_explicit, 0x014)  /* explicit barrier */                      \
+    macro (ompt_state_wait_barrier_implicit, 0x013)  /* implicit barrier */                      \
+    macro (ompt_state_wait_barrier_explicit, 0x014)  /* explicit barrier */                      \
                                                                                                 \
     /* task wait states (32..63) */                                                             \
-    macro (omp_state_wait_taskwait, 0x020)  /* waiting at a taskwait */                         \
-    macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */                        \
+    macro (ompt_state_wait_taskwait, 0x020)  /* waiting at a taskwait */                         \
+    macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */                        \
                                                                                                 \
     /* mutex wait states (64..127) */                                                           \
-    macro (omp_state_wait_mutex, 0x040)                                                         \
-    macro (omp_state_wait_lock, 0x041)      /* waiting for lock */                              \
-    macro (omp_state_wait_critical, 0x042)  /* waiting for critical */                          \
-    macro (omp_state_wait_atomic, 0x043)    /* waiting for atomic */                            \
-    macro (omp_state_wait_ordered, 0x044)   /* waiting for ordered */                           \
+    macro (ompt_state_wait_mutex, 0x040)                                                         \
+    macro (ompt_state_wait_lock, 0x041)      /* waiting for lock */                              \
+    macro (ompt_state_wait_critical, 0x042)  /* waiting for critical */                          \
+    macro (ompt_state_wait_atomic, 0x043)    /* waiting for atomic */                            \
+    macro (ompt_state_wait_ordered, 0x044)   /* waiting for ordered */                           \
                                                                                                 \
     /* target wait states (128..255) */                                                         \
-    macro (omp_state_wait_target, 0x080)        /* waiting for target region */                 \
-    macro (omp_state_wait_target_map, 0x081)    /* waiting for target data mapping operation */ \
-    macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */       \
+    macro (ompt_state_wait_target, 0x080)        /* waiting for target region */                 \
+    macro (ompt_state_wait_target_map, 0x081)    /* waiting for target data mapping operation */ \
+    macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */       \
                                                                                                 \
     /* misc (256..511) */                                                                       \
-    macro (omp_state_idle, 0x100)           /* waiting for work */                              \
-    macro (omp_state_overhead, 0x101)       /* overhead excluding wait states */                \
+    macro (ompt_state_idle, 0x100)           /* waiting for work */                              \
+    macro (ompt_state_overhead, 0x101)       /* overhead excluding wait states */                \
                                                                                                 \
     /* implementation-specific states (512..) */
 
 
 #define FOREACH_KMP_MUTEX_IMPL(macro)                                                \
-    macro (ompt_mutex_impl_unknown, 0)     /* unknown implementation */              \
+    macro (ompt_mutex_impl_none, 0)        /* unknown implementation */              \
     macro (kmp_mutex_impl_spin, 1)         /* based on spin */                       \
     macro (kmp_mutex_impl_queuing, 2)      /* based on some fair policy */           \
     macro (kmp_mutex_impl_speculative, 3)  /* based on HW-supported speculation */
@@ -178,20 +178,11 @@
 
 static const ompt_data_t ompt_data_none = {0};
 
-typedef uint64_t omp_wait_id_t;
-static const omp_wait_id_t omp_wait_id_none = 0;
+typedef uint64_t ompt_wait_id_t;
+static const ompt_wait_id_t omp_wait_id_none = 0;
 
 typedef void ompt_device_t;
 
-/*---------------------
- * omp_frame_t
- *---------------------*/
-
-typedef struct omp_frame_t {
-    void *exit_frame;    /* next frame is user code     */
-    void *enter_frame;   /* previous frame is user code */
-} omp_frame_t;
-
 
 /*---------------------
  * dependences types
@@ -220,10 +211,18 @@
  *---------------------*/
 
 typedef enum {
-#define omp_state_macro(state, code) state = code,
-    FOREACH_OMP_STATE(omp_state_macro)
-#undef omp_state_macro
-} omp_state_t;
+#define ompt_state_macro(state, code) state = code,
+    FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
+} ompt_state_t;
+
+typedef enum ompt_frame_flag_t {
+  ompt_frame_runtime        = 0x00,
+  ompt_frame_application    = 0x01,
+  ompt_frame_cfa            = 0x10,
+  ompt_frame_framepointer   = 0x20,
+  ompt_frame_stackaddress   = 0x30
+} ompt_frame_flag_t; 
 
 
 /*---------------------
@@ -278,6 +277,12 @@
     ompt_thread_unknown = 4
 } ompt_thread_t;
 
+typedef struct ompt_frame_t {
+  ompt_data_t exit_frame;
+  ompt_data_t enter_frame;
+  int exit_frame_flags;
+  int enter_frame_flags;
+} ompt_frame_t;
 typedef enum ompt_parallel_flag_t {
     ompt_parallel_invoker_program = 0x00000001,   /* program invokes master task */
     ompt_parallel_invoker_runtime = 0x00000002,   /* runtime invokes master task */
@@ -295,7 +300,7 @@
 );
 
 typedef void (*ompt_wait_callback_t) (
-    omp_wait_id_t wait_id                /* wait data                           */
+    ompt_wait_id_t wait_id                /* wait data                           */
 );
 
 /* parallel and workshares */
@@ -316,7 +321,7 @@
 
 typedef void (*ompt_callback_parallel_begin_t) (
     ompt_data_t *encountering_task_data,         /* data of encountering task           */
-    const omp_frame_t *encountering_task_frame,  /* frame data of encountering task     */
+    const ompt_frame_t *encountering_task_frame,  /* frame data of encountering task     */
     ompt_data_t *parallel_data,                  /* data of parallel region             */
     unsigned int requested_team_size,            /* requested number of threads in team */
     int flag,                                    /* flag for additional information     */
@@ -358,7 +363,7 @@
 
 typedef void (*ompt_callback_task_create_t) (
     ompt_data_t *encountering_task_data,         /* data of parent task                 */
-    const omp_frame_t *encountering_task_frame,  /* frame data for parent task          */
+    const ompt_frame_t *encountering_task_frame,  /* frame data for parent task          */
     ompt_data_t *new_task_data,                  /* data of created task                */
     int flag,                                    /* type of created task                */
     int has_dependences,                         /* created task has dependences        */
@@ -479,19 +484,19 @@
     ompt_mutex_t kind,                    /* mutex kind                          */
     unsigned int hint,                    /* mutex hint                          */
     unsigned int impl,                    /* mutex implementation                */
-    omp_wait_id_t wait_id,                /* id of object being awaited          */
+    ompt_wait_id_t wait_id,                /* id of object being awaited          */
     const void *codeptr_ra                /* return address of runtime call      */
 );
 
 typedef void (*ompt_callback_mutex_t) (
     ompt_mutex_t kind,                    /* mutex kind                          */
-    omp_wait_id_t wait_id,                /* id of object being awaited          */
+    ompt_wait_id_t wait_id,                /* id of object being awaited          */
     const void *codeptr_ra                /* return address of runtime call      */
 );
 
 typedef void (*ompt_callback_nest_lock_t) (
     ompt_scope_endpoint_t endpoint,       /* endpoint of nested lock             */
-    omp_wait_id_t wait_id,                /* id of object being awaited          */
+    ompt_wait_id_t wait_id,                /* id of object being awaited          */
     const void *codeptr_ra                /* return address of runtime call      */
 );
 
@@ -592,8 +597,8 @@
  ***************************************************************************/
 
 /* state */
-OMPT_API_FUNCTION(omp_state_t, ompt_get_state, (
-    omp_wait_id_t *wait_id
+OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
+    ompt_wait_id_t *wait_id
 ));
 
 /* thread */
@@ -611,7 +616,7 @@
     int ancestor_level,
     int *type,
     ompt_data_t **task_data,
-    omp_frame_t **task_frame,
+    ompt_frame_t **task_frame,
     ompt_data_t **parallel_data,
     int *thread_num
 ));
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index ef9e0a9..23bbeb3 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -129,6 +129,11 @@
 #include "ompt-internal.h"
 #endif
 
+#if OMP_50_ENABLED
+// Affinity format function
+#include "kmp_str.h"
+#endif
+
 // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
 // 3 - fast allocation using sync, non-sync free lists of any size, non-self
 // free lists of limited size.
@@ -544,11 +549,15 @@
 
 #if KMP_OS_WINDOWS
 #define USE_CBLKDATA
+#if KMP_MSVC_COMPAT
 #pragma warning(push)
 #pragma warning(disable : 271 310)
+#endif
 #include <windows.h>
+#if KMP_MSVC_COMPAT
 #pragma warning(pop)
 #endif
+#endif
 
 #if KMP_OS_UNIX
 #include <dlfcn.h>
@@ -560,7 +569,7 @@
 
 // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
 #if KMP_OS_WINDOWS
-#if _MSC_VER < 1600
+#if _MSC_VER < 1600 && KMP_MSVC_COMPAT
 typedef struct GROUP_AFFINITY {
   KAFFINITY Mask;
   WORD Group;
@@ -793,6 +802,12 @@
 
 #endif /* OMP_40_ENABLED */
 
+#if OMP_50_ENABLED
+extern int __kmp_display_affinity;
+extern char *__kmp_affinity_format;
+static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
+#endif // OMP_50_ENABLED
+
 #if KMP_AFFINITY_SUPPORTED
 #define KMP_PLACE_ALL (-1)
 #define KMP_PLACE_UNDEFINED (-2)
@@ -1042,6 +1057,10 @@
 /* TODO: tune for KMP_OS_DARWIN */
 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#elif KMP_OS_DRAGONFLY
+/* TODO: tune for KMP_OS_DRAGONFLY */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
 #elif KMP_OS_FREEBSD
 /* TODO: tune for KMP_OS_FREEBSD */
 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
@@ -1054,6 +1073,10 @@
 /* TODO: tune for KMP_OS_HURD */
 #define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
 #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#elif KMP_OS_OPENBSD
+/* TODO: tune for KMP_OS_OPENBSD */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests   */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
 #endif
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -2222,6 +2245,18 @@
 #endif
 } kmp_dephash_t;
 
+#if OMP_50_ENABLED
+typedef struct kmp_task_affinity_info {
+  kmp_intptr_t base_addr;
+  size_t len;
+  struct {
+    bool flag1 : 1;
+    bool flag2 : 1;
+    kmp_int32 reserved : 30;
+  } flags;
+} kmp_task_affinity_info_t;
+#endif
+
 #endif
 
 #ifdef BUILD_TIED_TASK_STACK
@@ -2483,6 +2518,10 @@
   int th_last_place; /* last place in partition */
 #endif
 #endif
+#if OMP_50_ENABLED
+  int th_prev_level; /* previous level for affinity format */
+  int th_prev_num_threads; /* previous num_threads for affinity format */
+#endif
 #if USE_ITT_BUILD
   kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
   kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
@@ -2676,6 +2715,9 @@
   int t_first_place; // first & last place in parent thread's partition.
   int t_last_place; // Restore these values to master after par region.
 #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if OMP_50_ENABLED
+  int t_display_affinity;
+#endif
   int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
 // omp_set_num_threads() call
 #if OMP_50_ENABLED
@@ -3359,6 +3401,8 @@
 #if KMP_AFFINITY_SUPPORTED
 extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
                                        kmp_affin_mask_t *mask);
+extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
+                                                  kmp_affin_mask_t *mask);
 extern void __kmp_affinity_initialize(void);
 extern void __kmp_affinity_uninitialize(void);
 extern void __kmp_affinity_set_init_mask(
@@ -3378,6 +3422,14 @@
 extern int kmp_set_thread_affinity_mask_initial(void);
 #endif
 #endif /* KMP_AFFINITY_SUPPORTED */
+#if OMP_50_ENABLED
+// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
+// format string is for affinity, so platforms that do not support
+// affinity can still use the other fields, e.g., %n for num_threads
+extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
+                                         kmp_str_buf_t *buffer);
+extern void __kmp_aux_display_affinity(int gtid, const char *format);
+#endif
 
 extern void __kmp_cleanup_hierarchy();
 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
@@ -3530,6 +3582,8 @@
 #if OMP_40_ENABLED
 extern int __kmp_invoke_teams_master(int gtid);
 extern void __kmp_teams_master(int gtid);
+extern int __kmp_aux_get_team_num();
+extern int __kmp_aux_get_num_teams();
 #endif
 extern void __kmp_save_internal_controls(kmp_info_t *thread);
 extern void __kmp_user_set_library(enum library_type arg);
@@ -3783,6 +3837,9 @@
 #if OMP_50_ENABLED
 KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
 KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
+KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
+    ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
+    kmp_task_affinity_info_t *affin_list);
 #endif
 
 #endif
diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp
index a9a21cf..775862e 100644
--- a/runtime/src/kmp_affinity.cpp
+++ b/runtime/src/kmp_affinity.cpp
@@ -83,55 +83,135 @@
   }
 }
 
+#define KMP_ADVANCE_SCAN(scan)                                                 \
+  while (*scan != '\0') {                                                      \
+    scan++;                                                                    \
+  }
+
 // Print the affinity mask to the character array in a pretty format.
+// The format is a comma separated list of non-negative integers or integer
+// ranges: e.g., 1,2,3-5,7,9-15
+// The format can also be the string "{<empty>}" if no bits are set in mask
 char *__kmp_affinity_print_mask(char *buf, int buf_len,
                                 kmp_affin_mask_t *mask) {
+  int start = 0, finish = 0, previous = 0;
+  bool first_range;
+  KMP_ASSERT(buf);
   KMP_ASSERT(buf_len >= 40);
+  KMP_ASSERT(mask);
   char *scan = buf;
   char *end = buf + buf_len - 1;
 
-  // Find first element / check for empty set.
-  int i;
-  i = mask->begin();
-  if (i == mask->end()) {
+  // Check for empty set.
+  if (mask->begin() == mask->end()) {
     KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
-    while (*scan != '\0')
-      scan++;
+    KMP_ADVANCE_SCAN(scan);
     KMP_ASSERT(scan <= end);
     return buf;
   }
 
-  KMP_SNPRINTF(scan, end - scan + 1, "{%d", i);
-  while (*scan != '\0')
-    scan++;
-  i++;
-  for (; i != mask->end(); i = mask->next(i)) {
-    if (!KMP_CPU_ISSET(i, mask)) {
-      continue;
+  first_range = true;
+  start = mask->begin();
+  while (1) {
+    // Find next range
+    // [start, previous] is inclusive range of contiguous bits in mask
+    for (finish = mask->next(start), previous = start;
+         finish == previous + 1 && finish != mask->end();
+         finish = mask->next(finish)) {
+      previous = finish;
     }
 
-    // Check for buffer overflow.  A string of the form ",<n>" will have at most
-    // 10 characters, plus we want to leave room to print ",...}" if the set is
-    // too large to print for a total of 15 characters. We already left room for
-    // '\0' in setting end.
-    if (end - scan < 15) {
-      break;
+    // The first range does not need a comma printed before it, but the rest
+    // of the ranges do need a comma beforehand
+    if (!first_range) {
+      KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
+      KMP_ADVANCE_SCAN(scan);
+    } else {
+      first_range = false;
     }
-    KMP_SNPRINTF(scan, end - scan + 1, ",%-d", i);
-    while (*scan != '\0')
-      scan++;
+    // Range with three or more contiguous bits in the affinity mask
+    if (previous - start > 1) {
+      KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast<int>(start),
+                   static_cast<int>(previous));
+    } else {
+      // Range with one or two contiguous bits in the affinity mask
+      KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast<int>(start));
+      KMP_ADVANCE_SCAN(scan);
+      if (previous - start > 0) {
+        KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast<int>(previous));
+      }
+    }
+    KMP_ADVANCE_SCAN(scan);
+    // Start over with new start point
+    start = finish;
+    if (start == mask->end())
+      break;
+    // Check for overflow
+    if (end - scan < 2)
+      break;
   }
-  if (i != mask->end()) {
-    KMP_SNPRINTF(scan, end - scan + 1, ",...");
-    while (*scan != '\0')
-      scan++;
-  }
-  KMP_SNPRINTF(scan, end - scan + 1, "}");
-  while (*scan != '\0')
-    scan++;
+
+  // Check for overflow
   KMP_ASSERT(scan <= end);
   return buf;
 }
+#undef KMP_ADVANCE_SCAN
+
+// Print the affinity mask to the string buffer object in a pretty format
+// The format is a comma separated list of non-negative integers or integer
+// ranges: e.g., 1,2,3-5,7,9-15
+// The format can also be the string "{<empty>}" if no bits are set in mask
+kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
+                                           kmp_affin_mask_t *mask) {
+  int start = 0, finish = 0, previous = 0;
+  bool first_range;
+  KMP_ASSERT(buf);
+  KMP_ASSERT(mask);
+
+  __kmp_str_buf_clear(buf);
+
+  // Check for empty set.
+  if (mask->begin() == mask->end()) {
+    __kmp_str_buf_print(buf, "%s", "{<empty>}");
+    return buf;
+  }
+
+  first_range = true;
+  start = mask->begin();
+  while (1) {
+    // Find next range
+    // [start, previous] is inclusive range of contiguous bits in mask
+    for (finish = mask->next(start), previous = start;
+         finish == previous + 1 && finish != mask->end();
+         finish = mask->next(finish)) {
+      previous = finish;
+    }
+
+    // The first range does not need a comma printed before it, but the rest
+    // of the ranges do need a comma beforehand
+    if (!first_range) {
+      __kmp_str_buf_print(buf, "%s", ",");
+    } else {
+      first_range = false;
+    }
+    // Range with three or more contiguous bits in the affinity mask
+    if (previous - start > 1) {
+      __kmp_str_buf_print(buf, "%d-%d", static_cast<int>(start),
+                          static_cast<int>(previous));
+    } else {
+      // Range with one or two contiguous bits in the affinity mask
+      __kmp_str_buf_print(buf, "%d", static_cast<int>(start));
+      if (previous - start > 0) {
+        __kmp_str_buf_print(buf, ",%d", static_cast<int>(previous));
+      }
+    }
+    // Start over with new start point
+    start = finish;
+    if (start == mask->end())
+      break;
+  }
+  return buf;
+}
 
 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
   KMP_CPU_ZERO(mask);
diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h
index cb1a7e3..e62508a 100644
--- a/runtime/src/kmp_affinity.h
+++ b/runtime/src/kmp_affinity.h
@@ -376,26 +376,26 @@
       mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
     }
     void zero() override {
-      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+      for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] = 0;
     }
     void copy(const KMPAffinity::Mask *src) override {
       const Mask *convert = static_cast<const Mask *>(src);
-      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+      for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] = convert->mask[i];
     }
     void bitwise_and(const KMPAffinity::Mask *rhs) override {
       const Mask *convert = static_cast<const Mask *>(rhs);
-      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+      for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] &= convert->mask[i];
     }
     void bitwise_or(const KMPAffinity::Mask *rhs) override {
       const Mask *convert = static_cast<const Mask *>(rhs);
-      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+      for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] |= convert->mask[i];
     }
     void bitwise_not() override {
-      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+      for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] = ~(mask[i]);
     }
     int begin() const override {
diff --git a/runtime/src/kmp_atomic.h b/runtime/src/kmp_atomic.h
index 3b75a6b..288916c 100644
--- a/runtime/src/kmp_atomic.h
+++ b/runtime/src/kmp_atomic.h
@@ -364,7 +364,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_mutex_acquire) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
-        ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (omp_wait_id_t)lck,
+        ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (ompt_wait_id_t)lck,
         OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
@@ -374,7 +374,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_mutex_acquired) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-        ompt_mutex_atomic, (omp_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
+        ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 }
@@ -390,7 +390,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_mutex_released) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-        ompt_mutex_atomic, (omp_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
+        ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
   }
 #endif
 }
diff --git a/runtime/src/kmp_barrier.cpp b/runtime/src/kmp_barrier.cpp
index 2b78b54..79b6bf3 100644
--- a/runtime/src/kmp_barrier.cpp
+++ b/runtime/src/kmp_barrier.cpp
@@ -1253,7 +1253,7 @@
     // It is OK to report the barrier state after the barrier begin callback.
     // According to the OMPT specification, a compliant implementation may
     // even delay reporting this state until the barrier begins to wait.
-    this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
+    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
   }
 #endif
 
@@ -1502,7 +1502,7 @@
           my_task_data, return_address);
     }
 #endif
-    this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+    this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
   }
 #endif
   ANNOTATE_BARRIER_END(&team->t.t_bar);
@@ -1624,7 +1624,7 @@
     if (!KMP_MASTER_TID(ds_tid))
       this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
 #endif
-    this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
+    this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
   }
 #endif
 
@@ -1698,6 +1698,11 @@
     if (__kmp_tasking_mode != tskm_immediate_exec) {
       __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
     }
+#if OMP_50_ENABLED
+    if (__kmp_display_affinity) {
+      KMP_CHECK_UPDATE(team->t.t_display_affinity, 0);
+    }
+#endif
 #if KMP_STATS_ENABLED
     // Have master thread flag the workers to indicate they are now waiting for
     // next parallel region, Also wake them up so they switch their timers to
@@ -1882,12 +1887,12 @@
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+      this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
     int ds_tid = this_thr->th.th_info.ds.ds_tid;
     ompt_data_t *task_data = (team)
                                  ? OMPT_CUR_TASK_DATA(this_thr)
                                  : &(this_thr->th.ompt_thread_info.task_data);
-    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
 #if OMPT_OPTIONAL
     void *codeptr = NULL;
     if (KMP_MASTER_TID(ds_tid) &&
@@ -1985,6 +1990,19 @@
   }
 #endif
 #if OMP_50_ENABLED
+  // Perform the display affinity functionality
+  if (__kmp_display_affinity) {
+    if (team->t.t_display_affinity
+#if KMP_AFFINITY_SUPPORTED
+        || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed)
+#endif
+            ) {
+      // NULL means use the affinity-format-var ICV
+      __kmp_aux_display_affinity(gtid, NULL);
+      this_thr->th.th_prev_num_threads = team->t.t_nproc;
+      this_thr->th.th_prev_level = team->t.t_level;
+    }
+  }
   if (!KMP_MASTER_TID(tid))
     KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator);
 #endif
diff --git a/runtime/src/kmp_config.h.cmake b/runtime/src/kmp_config.h.cmake
index 6b778ea..c9ebbc0 100644
--- a/runtime/src/kmp_config.h.cmake
+++ b/runtime/src/kmp_config.h.cmake
@@ -74,6 +74,8 @@
 #if LIBOMP_TSAN_SUPPORT
 #define TSAN_SUPPORT
 #endif
+#cmakedefine01 MSVC
+#define KMP_MSVC_COMPAT MSVC
 
 // Configured cache line based on architecture
 #if KMP_ARCH_PPC64
diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp
index ac76794..4c62720 100644
--- a/runtime/src/kmp_csupport.cpp
+++ b/runtime/src/kmp_csupport.cpp
@@ -293,7 +293,7 @@
     va_start(ap, microtask);
 
 #if OMPT_SUPPORT
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       kmp_info_t *master_th = __kmp_threads[gtid];
       kmp_team_t *parent_team = master_th->th.th_team;
@@ -305,7 +305,7 @@
         ompt_frame = &(
             parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
       }
-      ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(gtid);
     }
 #endif
@@ -395,7 +395,7 @@
   int tid = __kmp_tid_from_gtid(gtid);
   if (ompt_enabled.enabled) {
     parent_team->t.t_implicit_task_taskdata[tid]
-        .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+        .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
   OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
@@ -506,8 +506,8 @@
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state != omp_state_overhead) {
-    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
+      this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
+    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
     if (ompt_enabled.ompt_callback_implicit_task) {
       ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
           ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
@@ -524,7 +524,7 @@
           ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
     }
     __ompt_lw_taskteam_unlink(this_thr);
-    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
   }
 #endif
 
@@ -606,8 +606,8 @@
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled)
     this_thr->th.ompt_thread_info.state =
-        ((this_thr->th.th_team_serialized) ? omp_state_work_serial
-                                           : omp_state_work_parallel);
+        ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
+                                           : ompt_state_work_parallel);
 #endif
 }
 
@@ -705,11 +705,11 @@
   }
 
 #if OMPT_SUPPORT
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    if (ompt_frame->enter_frame == NULL)
-      ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_frame->enter_frame.ptr == NULL)
+      ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(global_tid);
   }
 #endif
@@ -724,7 +724,7 @@
   __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 }
@@ -843,22 +843,22 @@
 
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   kmp_team_t *team;
-  omp_wait_id_t lck;
+  ompt_wait_id_t lck;
   void *codeptr_ra;
   if (ompt_enabled.enabled) {
     OMPT_STORE_RETURN_ADDRESS(gtid);
     team = __kmp_team_from_gtid(gtid);
-    lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
+    lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
     /* OMPT state update */
     th->th.ompt_thread_info.wait_id = lck;
-    th->th.ompt_thread_info.state = omp_state_wait_ordered;
+    th->th.ompt_thread_info.state = ompt_state_wait_ordered;
 
     /* OMPT event callback */
     codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
     if (ompt_enabled.ompt_callback_mutex_acquire) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
           ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
-          (omp_wait_id_t)lck, codeptr_ra);
+          (ompt_wait_id_t)lck, codeptr_ra);
     }
   }
 #endif
@@ -871,13 +871,13 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
     /* OMPT state update */
-    th->th.ompt_thread_info.state = omp_state_work_parallel;
+    th->th.ompt_thread_info.state = ompt_state_work_parallel;
     th->th.ompt_thread_info.wait_id = 0;
 
     /* OMPT event callback */
     if (ompt_enabled.ompt_callback_mutex_acquired) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-          ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
+          ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
     }
   }
 #endif
@@ -917,7 +917,7 @@
   if (ompt_enabled.ompt_callback_mutex_released) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
         ompt_mutex_ordered,
-        (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
+        (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
         OMPT_LOAD_RETURN_ADDRESS(gtid));
   }
 #endif
@@ -1144,7 +1144,7 @@
 #else
   KMP_COUNT_BLOCK(OMP_CRITICAL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
-  omp_state_t prev_state = omp_state_undefined;
+  ompt_state_t prev_state = ompt_state_undefined;
   ompt_thread_info_t ti;
 #endif
   kmp_user_lock_p lck;
@@ -1188,15 +1188,15 @@
     ti = __kmp_threads[global_tid]->th.ompt_thread_info;
     /* OMPT state update */
     prev_state = ti.state;
-    ti.wait_id = (omp_wait_id_t)lck;
-    ti.state = omp_state_wait_critical;
+    ti.wait_id = (ompt_wait_id_t)lck;
+    ti.state = ompt_state_wait_critical;
 
     /* OMPT event callback */
     codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
     if (ompt_enabled.ompt_callback_mutex_acquire) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
           ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
-          (omp_wait_id_t)crit, codeptr_ra);
+          (ompt_wait_id_t)crit, codeptr_ra);
     }
   }
 #endif
@@ -1216,7 +1216,7 @@
     /* OMPT event callback */
     if (ompt_enabled.ompt_callback_mutex_acquired) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-          ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
+          ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
     }
   }
 #endif
@@ -1292,7 +1292,7 @@
       return kmp_mutex_impl_speculative;
 #endif
     default:
-      return ompt_mutex_impl_unknown;
+      return ompt_mutex_impl_none;
     }
     ilock = KMP_LOOKUP_I_LOCK(user_lock);
   }
@@ -1316,7 +1316,7 @@
   case locktag_nested_drdpa:
     return kmp_mutex_impl_queuing;
   default:
-    return ompt_mutex_impl_unknown;
+    return ompt_mutex_impl_none;
   }
 }
 #else
@@ -1339,7 +1339,7 @@
     return kmp_mutex_impl_speculative;
 #endif
   default:
-    return ompt_mutex_impl_unknown;
+    return ompt_mutex_impl_none;
   }
 }
 #endif // KMP_USE_DYNAMIC_LOCK
@@ -1363,7 +1363,7 @@
   KMP_COUNT_BLOCK(OMP_CRITICAL);
   kmp_user_lock_p lck;
 #if OMPT_SUPPORT && OMPT_OPTIONAL
-  omp_state_t prev_state = omp_state_undefined;
+  ompt_state_t prev_state = ompt_state_undefined;
   ompt_thread_info_t ti;
   // This is the case, if called from __kmpc_critical:
   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
@@ -1402,14 +1402,14 @@
       ti = __kmp_threads[global_tid]->th.ompt_thread_info;
       /* OMPT state update */
       prev_state = ti.state;
-      ti.wait_id = (omp_wait_id_t)lck;
-      ti.state = omp_state_wait_critical;
+      ti.wait_id = (ompt_wait_id_t)lck;
+      ti.state = ompt_state_wait_critical;
 
       /* OMPT event callback */
       if (ompt_enabled.ompt_callback_mutex_acquire) {
         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
             ompt_mutex_critical, (unsigned int)hint,
-            __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
+            __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
       }
     }
 #endif
@@ -1440,14 +1440,14 @@
       ti = __kmp_threads[global_tid]->th.ompt_thread_info;
       /* OMPT state update */
       prev_state = ti.state;
-      ti.wait_id = (omp_wait_id_t)lck;
-      ti.state = omp_state_wait_critical;
+      ti.wait_id = (ompt_wait_id_t)lck;
+      ti.state = ompt_state_wait_critical;
 
       /* OMPT event callback */
       if (ompt_enabled.ompt_callback_mutex_acquire) {
         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
             ompt_mutex_critical, (unsigned int)hint,
-            __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
+            __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
       }
     }
 #endif
@@ -1467,7 +1467,7 @@
     /* OMPT event callback */
     if (ompt_enabled.ompt_callback_mutex_acquired) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-          ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
+          ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
     }
   }
 #endif
@@ -1565,7 +1565,7 @@
   OMPT_STORE_RETURN_ADDRESS(global_tid);
   if (ompt_enabled.ompt_callback_mutex_released) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-        ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
+        ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
   }
 #endif
 
@@ -1594,11 +1594,11 @@
     __kmp_check_barrier(global_tid, ct_barrier, loc);
 
 #if OMPT_SUPPORT
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    if (ompt_frame->enter_frame == NULL)
-      ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_frame->enter_frame.ptr == NULL)
+      ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(global_tid);
   }
 #endif
@@ -1608,7 +1608,7 @@
   status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 
@@ -1656,11 +1656,11 @@
   }
 
 #if OMPT_SUPPORT
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    if (ompt_frame->enter_frame == NULL)
-      ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_frame->enter_frame.ptr == NULL)
+      ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(global_tid);
   }
 #endif
@@ -1670,7 +1670,7 @@
   __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 
@@ -1867,6 +1867,59 @@
   return __kmp_get_team_size(__kmp_entry_gtid(), level);
 }
 
+#if OMP_50_ENABLED
+/* OpenMP 5.0 Affinity Format API */
+
+void ompc_set_affinity_format(char const *format) {
+  if (!__kmp_init_serial) {
+    __kmp_serial_initialize();
+  }
+  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
+                         format, KMP_STRLEN(format) + 1);
+}
+
+size_t ompc_get_affinity_format(char *buffer, size_t size) {
+  size_t format_size;
+  if (!__kmp_init_serial) {
+    __kmp_serial_initialize();
+  }
+  format_size = KMP_STRLEN(__kmp_affinity_format);
+  if (buffer && size) {
+    __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
+                           format_size + 1);
+  }
+  return format_size;
+}
+
+void ompc_display_affinity(char const *format) {
+  int gtid;
+  if (!TCR_4(__kmp_init_middle)) {
+    __kmp_middle_initialize();
+  }
+  gtid = __kmp_get_gtid();
+  __kmp_aux_display_affinity(gtid, format);
+}
+
+size_t ompc_capture_affinity(char *buffer, size_t buf_size,
+                             char const *format) {
+  int gtid;
+  size_t num_required;
+  kmp_str_buf_t capture_buf;
+  if (!TCR_4(__kmp_init_middle)) {
+    __kmp_middle_initialize();
+  }
+  gtid = __kmp_get_gtid();
+  __kmp_str_buf_init(&capture_buf);
+  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
+  if (buffer && buf_size) {
+    __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
+                           capture_buf.used + 1);
+  }
+  __kmp_str_buf_free(&capture_buf);
+  return num_required;
+}
+#endif /* OMP_50_ENABLED */
+
 void kmpc_set_stacksize(int arg) {
   // __kmp_aux_set_stacksize initializes the library if needed
   __kmp_aux_set_stacksize(arg);
@@ -2006,11 +2059,11 @@
     *data_ptr = cpy_data;
 
 #if OMPT_SUPPORT
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    if (ompt_frame->enter_frame == NULL)
-      ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (ompt_frame->enter_frame.ptr == NULL)
+      ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
@@ -2038,7 +2091,7 @@
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 }
@@ -2136,7 +2189,7 @@
   if (ompt_enabled.ompt_callback_lock_init) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
         ompt_mutex_lock, (omp_lock_hint_t)hint,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -2160,7 +2213,7 @@
   if (ompt_enabled.ompt_callback_lock_init) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
         ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -2186,7 +2239,7 @@
   if (ompt_enabled.ompt_callback_lock_init) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
         ompt_mutex_lock, omp_lock_hint_none,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -2229,7 +2282,7 @@
   if (ompt_enabled.ompt_callback_lock_init) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
         ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
-        (omp_wait_id_t)user_lock, codeptr);
+        (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2258,7 +2311,7 @@
   if (ompt_enabled.ompt_callback_lock_init) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
         ompt_mutex_nest_lock, omp_lock_hint_none,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -2304,7 +2357,7 @@
   if (ompt_enabled.ompt_callback_lock_init) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
         ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
-        (omp_wait_id_t)user_lock, codeptr);
+        (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2340,7 +2393,7 @@
       lck = (kmp_user_lock_p)user_lock;
     }
     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
-        ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
   KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
@@ -2368,7 +2421,7 @@
     codeptr = OMPT_GET_RETURN_ADDRESS(0);
   if (ompt_enabled.ompt_callback_lock_destroy) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
-        ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2408,7 +2461,7 @@
     codeptr = OMPT_GET_RETURN_ADDRESS(0);
   if (ompt_enabled.ompt_callback_lock_destroy) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
-        ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+        ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
   KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
@@ -2440,7 +2493,7 @@
     codeptr = OMPT_GET_RETURN_ADDRESS(0);
   if (ompt_enabled.ompt_callback_lock_destroy) {
     ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
-        ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+        ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2485,7 +2538,7 @@
   if (ompt_enabled.ompt_callback_mutex_acquire) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
         ompt_mutex_lock, omp_lock_hint_none,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -2507,7 +2560,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_mutex_acquired) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-        ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2540,7 +2593,7 @@
   if (ompt_enabled.ompt_callback_mutex_acquire) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
         ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
-        (omp_wait_id_t)lck, codeptr);
+        (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -2553,7 +2606,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.ompt_callback_mutex_acquired) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-        ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -2575,7 +2628,7 @@
     if (ompt_enabled.ompt_callback_mutex_acquire) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
           ompt_mutex_nest_lock, omp_lock_hint_none,
-          __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+          __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
           codeptr);
     }
   }
@@ -2593,13 +2646,13 @@
       if (ompt_enabled.ompt_callback_mutex_acquired) {
         // lock_first
         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-            ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+            ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
       }
     } else {
       if (ompt_enabled.ompt_callback_nest_lock) {
         // lock_next
         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-            ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
+            ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
       }
     }
   }
@@ -2637,7 +2690,7 @@
     if (ompt_enabled.ompt_callback_mutex_acquire) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
           ompt_mutex_nest_lock, omp_lock_hint_none,
-          __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
+          __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
     }
   }
 #endif
@@ -2654,13 +2707,13 @@
       if (ompt_enabled.ompt_callback_mutex_acquired) {
         // lock_first
         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-            ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+            ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
       }
     } else {
       if (ompt_enabled.ompt_callback_nest_lock) {
         // lock_next
         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-            ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
+            ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
       }
     }
   }
@@ -2696,7 +2749,7 @@
     codeptr = OMPT_GET_RETURN_ADDRESS(0);
   if (ompt_enabled.ompt_callback_mutex_released) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-        ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
   }
 #endif
 
@@ -2725,7 +2778,7 @@
       codeptr = OMPT_GET_RETURN_ADDRESS(0);
     if (ompt_enabled.ompt_callback_mutex_released) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-          ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+          ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
     }
 #endif
 
@@ -2757,7 +2810,7 @@
     codeptr = OMPT_GET_RETURN_ADDRESS(0);
   if (ompt_enabled.ompt_callback_mutex_released) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-        ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -2785,12 +2838,12 @@
       if (ompt_enabled.ompt_callback_mutex_released) {
         // release_lock_last
         ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-            ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+            ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
       }
     } else if (ompt_enabled.ompt_callback_nest_lock) {
       // release_lock_prev
       ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-          ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
+          ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
     }
   }
 #endif
@@ -2834,12 +2887,12 @@
         if (ompt_enabled.ompt_callback_mutex_released) {
           // release_lock_last
           ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-              ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+              ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
         }
       } else if (ompt_enabled.ompt_callback_nest_lock) {
         // release_lock_previous
         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-            ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
+            ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
       }
     }
 #endif
@@ -2876,12 +2929,12 @@
       if (ompt_enabled.ompt_callback_mutex_released) {
         // release_lock_last
         ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
-            ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+            ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
       }
     } else if (ompt_enabled.ompt_callback_nest_lock) {
       // release_lock_previous
       ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-          ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
+          ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
     }
   }
 #endif
@@ -2907,7 +2960,7 @@
   if (ompt_enabled.ompt_callback_mutex_acquire) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
         ompt_mutex_lock, omp_lock_hint_none,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -2930,7 +2983,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     if (ompt_enabled.ompt_callback_mutex_acquired) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-          ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+          ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
     }
 #endif
     return FTN_TRUE;
@@ -2971,7 +3024,7 @@
   if (ompt_enabled.ompt_callback_mutex_acquire) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
         ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
-        (omp_wait_id_t)lck, codeptr);
+        (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -2986,7 +3039,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-        ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+        ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
   }
 #endif
 
@@ -3012,7 +3065,7 @@
   if (ompt_enabled.ompt_callback_mutex_acquire) {
     ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
         ompt_mutex_nest_lock, omp_lock_hint_none,
-        __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+        __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
         codeptr);
   }
 #endif
@@ -3030,13 +3083,13 @@
       if (ompt_enabled.ompt_callback_mutex_acquired) {
         // lock_first
         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-            ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+            ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
       }
     } else {
       if (ompt_enabled.ompt_callback_nest_lock) {
         // lock_next
         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-            ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
+            ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
       }
     }
   }
@@ -3077,7 +3130,7 @@
         ompt_enabled.ompt_callback_mutex_acquire) {
       ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
           ompt_mutex_nest_lock, omp_lock_hint_none,
-          __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
+          __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
     }
 #endif
 
@@ -3095,13 +3148,13 @@
       if (ompt_enabled.ompt_callback_mutex_acquired) {
         // lock_first
         ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
-            ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+            ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
       }
     } else {
       if (ompt_enabled.ompt_callback_nest_lock) {
         // lock_next
         ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
-            ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
+            ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
       }
     }
   }
@@ -3392,11 +3445,11 @@
     // JP: as long as there is a barrier in the implementation, OMPT should and
     // will provide the barrier events
     //         so we set-up the necessary frame/return addresses.
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-      if (ompt_frame->enter_frame == NULL)
-        ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      if (ompt_frame->enter_frame.ptr == NULL)
+        ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(global_tid);
     }
 #endif
@@ -3409,7 +3462,7 @@
     retval = (retval != 0) ? (0) : (1);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     if (ompt_enabled.enabled) {
-      ompt_frame->enter_frame = NULL;
+      ompt_frame->enter_frame = ompt_data_none;
     }
 #endif
 
@@ -3573,11 +3626,11 @@
 // this barrier should be visible to a customer and to the threading profile
 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
 #if OMPT_SUPPORT
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-      if (ompt_frame->enter_frame == NULL)
-        ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      if (ompt_frame->enter_frame.ptr == NULL)
+        ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(global_tid);
     }
 #endif
@@ -3591,7 +3644,7 @@
     retval = (retval != 0) ? (0) : (1);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     if (ompt_enabled.enabled) {
-      ompt_frame->enter_frame = NULL;
+      ompt_frame->enter_frame = ompt_data_none;
     }
 #endif
 
@@ -3659,11 +3712,11 @@
 
 // TODO: implicit barrier: should be exposed
 #if OMPT_SUPPORT
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-      if (ompt_frame->enter_frame == NULL)
-        ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      if (ompt_frame->enter_frame.ptr == NULL)
+        ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(global_tid);
     }
 #endif
@@ -3673,7 +3726,7 @@
     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     if (ompt_enabled.enabled) {
-      ompt_frame->enter_frame = NULL;
+      ompt_frame->enter_frame = ompt_data_none;
     }
 #endif
 
@@ -3683,11 +3736,11 @@
 
 // TODO: implicit barrier: should be exposed
 #if OMPT_SUPPORT
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-      if (ompt_frame->enter_frame == NULL)
-        ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      if (ompt_frame->enter_frame.ptr == NULL)
+        ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(global_tid);
     }
 #endif
@@ -3697,18 +3750,18 @@
     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     if (ompt_enabled.enabled) {
-      ompt_frame->enter_frame = NULL;
+      ompt_frame->enter_frame = ompt_data_none;
     }
 #endif
 
   } else if (packed_reduction_method == atomic_reduce_block) {
 
 #if OMPT_SUPPORT
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     if (ompt_enabled.enabled) {
       __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-      if (ompt_frame->enter_frame == NULL)
-        ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      if (ompt_frame->enter_frame.ptr == NULL)
+        ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(global_tid);
     }
 #endif
@@ -3719,7 +3772,7 @@
     __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
     if (ompt_enabled.enabled) {
-      ompt_frame->enter_frame = NULL;
+      ompt_frame->enter_frame = ompt_data_none;
     }
 #endif
 
diff --git a/runtime/src/kmp_dispatch.cpp b/runtime/src/kmp_dispatch.cpp
index b4192df..1090e9d 100644
--- a/runtime/src/kmp_dispatch.cpp
+++ b/runtime/src/kmp_dispatch.cpp
@@ -24,7 +24,7 @@
 #include "kmp_itt.h"
 #include "kmp_stats.h"
 #include "kmp_str.h"
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
 #include <float.h>
 #endif
 #include "kmp_lock.h"
@@ -478,7 +478,7 @@
         /* commonly used term: (2 nproc - 1)/(2 nproc) */
         DBL x;
 
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
         /* Linux* OS already has 64-bit computation by default for long double,
            and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
            Windows* OS on IA-32 architecture, we need to set precision to 64-bit
@@ -573,7 +573,7 @@
         pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
                                  tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
                         cross * chunk;
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
         // restore FPCW
         _control87(oldFpcw, _MCW_PC);
 #endif
@@ -1625,7 +1625,7 @@
   case kmp_sch_guided_analytical_chunked: {
     T chunkspec = pr->u.p.parm1;
     UT chunkIdx;
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
     /* for storing original FPCW value for Windows* OS on
        IA-32 architecture 8-byte version */
     unsigned int oldFpcw;
@@ -1662,7 +1662,7 @@
    Windows* OS.
    This check works around the possible effect that init != 0 for chunkIdx == 0.
  */
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
         /* If we haven't already done so, save original
            FPCW and set precision to 64-bit, as Windows* OS
            on IA-32 architecture defaults to 53-bit */
@@ -1690,7 +1690,7 @@
         } // if
       } // if
     } // while (1)
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
     /* restore FPCW if necessary
        AC: check fpcwSet flag first because oldFpcw can be uninitialized here
     */
diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h
index 6910c37..abf1892 100644
--- a/runtime/src/kmp_ftn_entry.h
+++ b/runtime/src/kmp_ftn_entry.h
@@ -21,6 +21,12 @@
 
 #include "kmp_i18n.h"
 
+#if OMP_50_ENABLED
+// For affinity format functions
+#include "kmp_io.h"
+#include "kmp_str.h"
+#endif
+
 #if OMPT_SUPPORT
 #include "ompt-specific.h"
 #endif
@@ -355,9 +361,9 @@
   }
   kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()];
   ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
-  parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+  parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   int ret = __kmp_control_tool(command, modifier, arg);
-  parent_task_info->frame.enter_frame = 0;
+  parent_task_info->frame.enter_frame.ptr = 0;
   return ret;
 #endif
 }
@@ -389,6 +395,137 @@
   __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
 #endif
 }
+
+/* OpenMP 5.0 affinity format support */
+
+#ifndef KMP_STUB
+static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size,
+                                           char const *csrc, size_t csrc_size) {
+  size_t capped_src_size = csrc_size;
+  if (csrc_size >= buf_size) {
+    capped_src_size = buf_size - 1;
+  }
+  KMP_STRNCPY_S(buffer, buf_size, csrc, capped_src_size);
+  if (csrc_size >= buf_size) {
+    KMP_DEBUG_ASSERT(buffer[buf_size - 1] == '\0');
+    buffer[buf_size - 1] = csrc[buf_size - 1];
+  } else {
+    for (size_t i = csrc_size; i < buf_size; ++i)
+      buffer[i] = ' ';
+  }
+}
+
+// Convert a Fortran string to a C string by adding null byte
+class ConvertedString {
+  char *buf;
+  kmp_info_t *th;
+
+public:
+  ConvertedString(char const *fortran_str, size_t size) {
+    th = __kmp_get_thread();
+    buf = (char *)__kmp_thread_malloc(th, size + 1);
+    KMP_STRNCPY_S(buf, size + 1, fortran_str, size);
+    buf[size] = '\0';
+  }
+  ~ConvertedString() { __kmp_thread_free(th, buf); }
+  const char *get() const { return buf; }
+};
+#endif // KMP_STUB
+
+/*
+ * Set the value of the affinity-format-var ICV on the current device to the
+ * format specified in the argument.
+*/
+void FTN_STDCALL FTN_SET_AFFINITY_FORMAT(char const *format, size_t size) {
+#ifdef KMP_STUB
+  return;
+#else
+  if (!__kmp_init_serial) {
+    __kmp_serial_initialize();
+  }
+  ConvertedString cformat(format, size);
+  // Since the __kmp_affinity_format variable is a C string, do not
+  // use the fortran strncpy function
+  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
+                         cformat.get(), KMP_STRLEN(cformat.get()));
+#endif
+}
+
+/*
+ * Returns the number of characters required to hold the entire affinity format
+ * specification (not including null byte character) and writes the value of the
+ * affinity-format-var ICV on the current device to buffer. If the return value
+ * is larger than size, the affinity format specification is truncated.
+*/
+size_t FTN_STDCALL FTN_GET_AFFINITY_FORMAT(char *buffer, size_t size) {
+#ifdef KMP_STUB
+  return 0;
+#else
+  size_t format_size;
+  if (!__kmp_init_serial) {
+    __kmp_serial_initialize();
+  }
+  format_size = KMP_STRLEN(__kmp_affinity_format);
+  if (buffer && size) {
+    __kmp_fortran_strncpy_truncate(buffer, size, __kmp_affinity_format,
+                                   format_size);
+  }
+  return format_size;
+#endif
+}
+
+/*
+ * Prints the thread affinity information of the current thread in the format
+ * specified by the format argument. If the format is NULL or a zero-length
+ * string, the value of the affinity-format-var ICV is used.
+*/
+void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) {
+#ifdef KMP_STUB
+  return;
+#else
+  int gtid;
+  if (!TCR_4(__kmp_init_middle)) {
+    __kmp_middle_initialize();
+  }
+  gtid = __kmp_get_gtid();
+  ConvertedString cformat(format, size);
+  __kmp_aux_display_affinity(gtid, cformat.get());
+#endif
+}
+
+/*
+ * Returns the number of characters required to hold the entire affinity format
+ * specification (not including null byte) and prints the thread affinity
+ * information of the current thread into the character string buffer with the
+ * size of size in the format specified by the format argument. If the format is
+ * NULL or a zero-length string, the value of the affinity-format-var ICV is
+ * used. The buffer must be allocated prior to calling the routine. If the
+ * return value is larger than size, the affinity format specification is
+ * truncated.
+*/
+size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format,
+                                        size_t buf_size, size_t for_size) {
+#if defined(KMP_STUB)
+  return 0;
+#else
+  int gtid;
+  size_t num_required;
+  kmp_str_buf_t capture_buf;
+  if (!TCR_4(__kmp_init_middle)) {
+    __kmp_middle_initialize();
+  }
+  gtid = __kmp_get_gtid();
+  __kmp_str_buf_init(&capture_buf);
+  ConvertedString cformat(format, for_size);
+  num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf);
+  if (buffer && buf_size) {
+    __kmp_fortran_strncpy_truncate(buffer, buf_size, capture_buf.str,
+                                   capture_buf.used);
+  }
+  __kmp_str_buf_free(&capture_buf);
+  return num_required;
+#endif
+}
 #endif /* OMP_50_ENABLED */
 
 int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) {
@@ -397,7 +534,8 @@
 #else
   int gtid;
 
-#if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||    \
+        KMP_OS_HURD
   gtid = __kmp_entry_gtid();
 #elif KMP_OS_WINDOWS
   if (!__kmp_init_parallel ||
@@ -777,34 +915,7 @@
 #ifdef KMP_STUB
   return 1;
 #else
-  kmp_info_t *thr = __kmp_entry_thread();
-  if (thr->th.th_teams_microtask) {
-    kmp_team_t *team = thr->th.th_team;
-    int tlevel = thr->th.th_teams_level;
-    int ii = team->t.t_level; // the level of the teams construct
-    int dd = team->t.t_serialized;
-    int level = tlevel + 1;
-    KMP_DEBUG_ASSERT(ii >= tlevel);
-    while (ii > level) {
-      for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
-      }
-      if (team->t.t_serialized && (!dd)) {
-        team = team->t.t_parent;
-        continue;
-      }
-      if (ii > level) {
-        team = team->t.t_parent;
-        ii--;
-      }
-    }
-    if (dd > 1) {
-      return 1; // teams region is serialized ( 1 team of 1 thread ).
-    } else {
-      return team->t.t_parent->t.t_nproc;
-    }
-  } else {
-    return 1;
-  }
+  return __kmp_aux_get_num_teams();
 #endif
 }
 
@@ -812,34 +923,7 @@
 #ifdef KMP_STUB
   return 0;
 #else
-  kmp_info_t *thr = __kmp_entry_thread();
-  if (thr->th.th_teams_microtask) {
-    kmp_team_t *team = thr->th.th_team;
-    int tlevel = thr->th.th_teams_level; // the level of the teams construct
-    int ii = team->t.t_level;
-    int dd = team->t.t_serialized;
-    int level = tlevel + 1;
-    KMP_DEBUG_ASSERT(ii >= tlevel);
-    while (ii > level) {
-      for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
-      }
-      if (team->t.t_serialized && (!dd)) {
-        team = team->t.t_parent;
-        continue;
-      }
-      if (ii > level) {
-        team = team->t.t_parent;
-        ii--;
-      }
-    }
-    if (dd > 1) {
-      return 0; // teams region is serialized ( 1 team of 1 thread ).
-    } else {
-      return team->t.t_master_tid;
-    }
-  } else {
-    return 0;
-  }
+  return __kmp_aux_get_team_num();
 #endif
 }
 
diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h
index 5d0aaa2..47188fc 100644
--- a/runtime/src/kmp_ftn_os.h
+++ b/runtime/src/kmp_ftn_os.h
@@ -139,6 +139,10 @@
 #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator
 #define FTN_ALLOC omp_alloc
 #define FTN_FREE omp_free
+#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format
+#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format
+#define FTN_DISPLAY_AFFINITY omp_display_affinity
+#define FTN_CAPTURE_AFFINITY omp_capture_affinity
 #endif
 
 #endif /* KMP_FTN_PLAIN */
@@ -265,6 +269,10 @@
 #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_
 #define FTN_ALLOC omp_alloc_
 #define FTN_FREE omp_free_
+#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_
+#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_
+#define FTN_DISPLAY_AFFINITY omp_display_affinity_
+#define FTN_CAPTURE_AFFINITY omp_capture_affinity_
 #endif
 
 #endif /* KMP_FTN_APPEND */
@@ -391,6 +399,10 @@
 #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR
 #define FTN_ALLOC OMP_ALLOC
 #define FTN_FREE OMP_FREE
+#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT
+#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT
+#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY
+#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY
 #endif
 
 #endif /* KMP_FTN_UPPER */
@@ -517,6 +529,10 @@
 #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_
 #define FTN_ALLOC OMP_ALLOC_
 #define FTN_FREE OMP_FREE_
+#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_
+#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_
+#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_
+#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY_
 #endif
 
 #endif /* KMP_FTN_UAPPEND */
diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp
index 34465de..ef8a116 100644
--- a/runtime/src/kmp_global.cpp
+++ b/runtime/src/kmp_global.cpp
@@ -282,6 +282,11 @@
 int __kmp_affinity_num_places = 0;
 #endif
 
+#if OMP_50_ENABLED
+int __kmp_display_affinity = FALSE;
+char *__kmp_affinity_format = NULL;
+#endif // OMP_50_ENABLED
+
 kmp_hws_item_t __kmp_hws_socket = {0, 0};
 kmp_hws_item_t __kmp_hws_node = {0, 0};
 kmp_hws_item_t __kmp_hws_tile = {0, 0};
diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp
index e218018..646d75d 100644
--- a/runtime/src/kmp_gsupport.cpp
+++ b/runtime/src/kmp_gsupport.cpp
@@ -32,17 +32,17 @@
   MKLOC(loc, "GOMP_barrier");
   KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
 #if OMPT_SUPPORT && OMPT_OPTIONAL
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
   __kmpc_barrier(&loc, gtid);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 }
@@ -178,10 +178,10 @@
 // and for all other threads to reach this point.
 
 #if OMPT_SUPPORT && OMPT_OPTIONAL
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
@@ -198,7 +198,7 @@
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
   return retval;
@@ -214,10 +214,10 @@
   // propagated to all threads before trying to reuse the t_copypriv_data field.
   __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
 #if OMPT_SUPPORT && OMPT_OPTIONAL
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
@@ -230,7 +230,7 @@
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 }
@@ -284,8 +284,8 @@
                                  void *data) {
 #if OMPT_SUPPORT
   kmp_info_t *thr;
-  omp_frame_t *ompt_frame;
-  omp_state_t enclosing_state;
+  ompt_frame_t *ompt_frame;
+  ompt_state_t enclosing_state;
 
   if (ompt_enabled.enabled) {
     // get pointer to thread data structure
@@ -293,11 +293,11 @@
 
     // save enclosing task state; set current state for task
     enclosing_state = thr->th.ompt_thread_info.state;
-    thr->th.ompt_thread_info.state = omp_state_work_parallel;
+    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
 
     // set task frame
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+    ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
@@ -306,7 +306,7 @@
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     // clear task frame
-    ompt_frame->exit_frame = NULL;
+    ompt_frame->exit_frame = ompt_data_none;
 
     // restore enclosing state
     thr->th.ompt_thread_info.state = enclosing_state;
@@ -331,18 +331,18 @@
 
 #if OMPT_SUPPORT
   kmp_info_t *thr;
-  omp_frame_t *ompt_frame;
-  omp_state_t enclosing_state;
+  ompt_frame_t *ompt_frame;
+  ompt_state_t enclosing_state;
 
   if (ompt_enabled.enabled) {
     thr = __kmp_threads[*gtid];
     // save enclosing task state; set current state for task
     enclosing_state = thr->th.ompt_thread_info.state;
-    thr->th.ompt_thread_info.state = omp_state_work_parallel;
+    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
 
     // set task frame
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+    ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
@@ -352,7 +352,7 @@
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     // clear task frame
-    ompt_frame->exit_frame = NULL;
+    ompt_frame->exit_frame = ompt_data_none;
 
     // reset enclosing state
     thr->th.ompt_thread_info.state = enclosing_state;
@@ -403,7 +403,7 @@
           &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid));
       task_info->thread_num = __kmp_tid_from_gtid(gtid);
     }
-    thr->th.ompt_thread_info.state = omp_state_work_parallel;
+    thr->th.ompt_thread_info.state = ompt_state_work_parallel;
   }
 #endif
 }
@@ -422,11 +422,11 @@
   int gtid = __kmp_entry_gtid();
 
 #if OMPT_SUPPORT
-  omp_frame_t *parent_frame, *frame;
+  ompt_frame_t *parent_frame, *frame;
 
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
-    parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
@@ -448,7 +448,7 @@
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
-    frame->exit_frame = OMPT_GET_FRAME_ADDRESS(1);
+    frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 }
@@ -471,7 +471,7 @@
       // Implicit task is finished here, in the barrier we might schedule
       // deferred tasks,
       // these don't see the implicit task on the stack
-      OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = NULL;
+      OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
     }
 #endif
 
@@ -764,17 +764,17 @@
   KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
 
 #if OMPT_SUPPORT && OMPT_OPTIONAL
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 
@@ -1075,16 +1075,16 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
 
 #define OMPT_LOOP_PRE()                                                        \
-  omp_frame_t *parent_frame;                                                   \
+  ompt_frame_t *parent_frame;                                                  \
   if (ompt_enabled.enabled) {                                                  \
     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);   \
-    parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);                     \
+    parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);                 \
     OMPT_STORE_RETURN_ADDRESS(gtid);                                           \
   }
 
 #define OMPT_LOOP_POST()                                                       \
   if (ompt_enabled.enabled) {                                                  \
-    parent_frame->enter_frame = NULL;                                          \
+    parent_frame->enter_frame = ompt_data_none;                                \
   }
 
 #else
@@ -1164,7 +1164,7 @@
   if (ompt_enabled.enabled) {
     OMPT_STORE_RETURN_ADDRESS(gtid);
     current_task = __kmp_threads[gtid]->th.th_current_task;
-    current_task->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
@@ -1198,8 +1198,8 @@
       taskdata = KMP_TASK_TO_TASKDATA(task);
       oldInfo = thread->th.ompt_thread_info;
       thread->th.ompt_thread_info.wait_id = 0;
-      thread->th.ompt_thread_info.state = omp_state_work_parallel;
-      taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+      thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+      taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       OMPT_STORE_RETURN_ADDRESS(gtid);
     }
 #endif
@@ -1211,13 +1211,13 @@
 #if OMPT_SUPPORT
     if (ompt_enabled.enabled) {
       thread->th.ompt_thread_info = oldInfo;
-      taskdata->ompt_task_info.frame.exit_frame = NULL;
+      taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
     }
 #endif
   }
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    current_task->ompt_task_info.frame.enter_frame = NULL;
+    current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
   }
 #endif
 
@@ -1302,11 +1302,11 @@
   int gtid = __kmp_entry_gtid();
 
 #if OMPT_SUPPORT
-  omp_frame_t *parent_frame;
+  ompt_frame_t *parent_frame;
 
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
-    parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
@@ -1328,7 +1328,7 @@
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    parent_frame->enter_frame = NULL;
+    parent_frame->enter_frame = ompt_data_none;
   }
 #endif
 
@@ -1342,17 +1342,17 @@
   KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
 
 #if OMPT_SUPPORT
-  omp_frame_t *ompt_frame;
+  ompt_frame_t *ompt_frame;
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
   __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
   }
 #endif
 
@@ -1383,7 +1383,7 @@
   ompt_task_info_t *parent_task_info, *task_info;
   if (ompt_enabled.enabled) {
     parent_task_info = __ompt_get_task_info_object(0);
-    parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
@@ -1403,7 +1403,7 @@
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     task_info = __ompt_get_task_info_object(0);
-    task_info->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+    task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
   task(data);
@@ -1415,8 +1415,8 @@
   KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    task_info->frame.exit_frame = NULL;
-    parent_task_info->frame.enter_frame = NULL;
+    task_info->frame.exit_frame = ompt_data_none;
+    parent_task_info->frame.enter_frame = ompt_data_none;
   }
 #endif
 }
diff --git a/runtime/src/kmp_io.cpp b/runtime/src/kmp_io.cpp
index 4f58ea0..24c6e72 100644
--- a/runtime/src/kmp_io.cpp
+++ b/runtime/src/kmp_io.cpp
@@ -27,11 +27,15 @@
 #include "kmp_str.h"
 
 #if KMP_OS_WINDOWS
+#if KMP_MSVC_COMPAT
 #pragma warning(push)
 #pragma warning(disable : 271 310)
+#endif
 #include <windows.h>
+#if KMP_MSVC_COMPAT
 #pragma warning(pop)
 #endif
+#endif
 
 /* ------------------------------------------------------------------------ */
 
@@ -42,10 +46,7 @@
 
 #if KMP_OS_WINDOWS
 
-#ifdef KMP_DEBUG
-/* __kmp_stdout is used only for dev build */
 static HANDLE __kmp_stdout = NULL;
-#endif
 static HANDLE __kmp_stderr = NULL;
 static int __kmp_console_exists = FALSE;
 static kmp_str_buf_t __kmp_console_buf;
@@ -72,10 +73,7 @@
   /* wait until user presses return before closing window */
   /* TODO only close if a window was opened */
   if (__kmp_console_exists) {
-#ifdef KMP_DEBUG
-    /* standard out is used only in dev build */
     __kmp_stdout = NULL;
-#endif
     __kmp_stderr = NULL;
     __kmp_str_buf_free(&__kmp_console_buf);
     __kmp_console_exists = FALSE;
@@ -88,21 +86,17 @@
   __kmp_acquire_bootstrap_lock(&__kmp_console_lock);
 
   if (!__kmp_console_exists) {
-#ifdef KMP_DEBUG
-    /* standard out is used only in dev build */
     HANDLE ho;
-#endif
     HANDLE he;
 
     __kmp_str_buf_init(&__kmp_console_buf);
 
     AllocConsole();
-// We do not check the result of AllocConsole because
-//  1. the call is harmless
-//  2. it is not clear how to communicate failue
-//  3. we will detect failure later when we get handle(s)
+    // We do not check the result of AllocConsole because
+    //  1. the call is harmless
+    //  2. it is not clear how to communicate failue
+    //  3. we will detect failure later when we get handle(s)
 
-#ifdef KMP_DEBUG
     ho = GetStdHandle(STD_OUTPUT_HANDLE);
     if (ho == INVALID_HANDLE_VALUE || ho == NULL) {
 
@@ -114,7 +108,6 @@
 
       __kmp_stdout = ho; // temporary code, need new global for ho
     }
-#endif
     he = GetStdHandle(STD_ERROR_HANDLE);
     if (he == INVALID_HANDLE_VALUE || he == NULL) {
 
@@ -133,22 +126,22 @@
 
 #else
 #define __kmp_stderr (stderr)
+#define __kmp_stdout (stdout)
 #endif /* KMP_OS_WINDOWS */
 
-void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) {
+void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap) {
 #if KMP_OS_WINDOWS
   if (!__kmp_console_exists) {
     __kmp_redirect_output();
   }
-  if (!__kmp_stderr && __kmp_io == kmp_err) {
+  if (!__kmp_stderr && out_stream == kmp_err) {
     return;
   }
-#ifdef KMP_DEBUG
-  if (!__kmp_stdout && __kmp_io == kmp_out) {
+  if (!__kmp_stdout && out_stream == kmp_out) {
     return;
   }
-#endif
 #endif /* KMP_OS_WINDOWS */
+  auto stream = ((out_stream == kmp_out) ? __kmp_stdout : __kmp_stderr);
 
   if (__kmp_debug_buf && __kmp_debug_buffer != NULL) {
 
@@ -170,14 +163,14 @@
                                                 "overflow; increase "
                                                 "KMP_DEBUG_BUF_CHARS to %d\n",
                             chars + 1);
-        WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used,
-                  &count, NULL);
+        WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count,
+                  NULL);
         __kmp_str_buf_clear(&__kmp_console_buf);
 #else
-        fprintf(__kmp_stderr, "OMP warning: Debugging buffer overflow; "
-                              "increase KMP_DEBUG_BUF_CHARS to %d\n",
+        fprintf(stream, "OMP warning: Debugging buffer overflow; "
+                        "increase KMP_DEBUG_BUF_CHARS to %d\n",
                 chars + 1);
-        fflush(__kmp_stderr);
+        fflush(stream);
 #endif
         __kmp_debug_buf_warn_chars = chars + 1;
       }
@@ -192,15 +185,15 @@
     __kmp_str_buf_print(&__kmp_console_buf, "pid=%d: ", (kmp_int32)getpid());
 #endif
     __kmp_str_buf_vprint(&__kmp_console_buf, format, ap);
-    WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used,
-              &count, NULL);
+    WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count,
+              NULL);
     __kmp_str_buf_clear(&__kmp_console_buf);
 #else
 #ifdef KMP_DEBUG_PIDS
-    fprintf(__kmp_stderr, "pid=%d: ", (kmp_int32)getpid());
+    fprintf(stream, "pid=%d: ", (kmp_int32)getpid());
 #endif
-    vfprintf(__kmp_stderr, format, ap);
-    fflush(__kmp_stderr);
+    vfprintf(stream, format, ap);
+    fflush(stream);
 #endif
   }
 }
@@ -224,3 +217,14 @@
 
   va_end(ap);
 }
+
+void __kmp_fprintf(enum kmp_io stream, char const *format, ...) {
+  va_list ap;
+  va_start(ap, format);
+
+  __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
+  __kmp_vprintf(stream, format, ap);
+  __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
+
+  va_end(ap);
+}
diff --git a/runtime/src/kmp_io.h b/runtime/src/kmp_io.h
index 84ac67b..dac7a42 100644
--- a/runtime/src/kmp_io.h
+++ b/runtime/src/kmp_io.h
@@ -26,9 +26,10 @@
 extern kmp_bootstrap_lock_t
     __kmp_console_lock; /* Control console initialization */
 
-extern void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap);
+extern void __kmp_vprintf(enum kmp_io stream, char const *format, va_list ap);
 extern void __kmp_printf(char const *format, ...);
 extern void __kmp_printf_no_lock(char const *format, ...);
+extern void __kmp_fprintf(enum kmp_io stream, char const *format, ...);
 extern void __kmp_close_console(void);
 
 #ifdef __cplusplus
diff --git a/runtime/src/kmp_lock.cpp b/runtime/src/kmp_lock.cpp
index 16834c6..5c2eeed 100644
--- a/runtime/src/kmp_lock.cpp
+++ b/runtime/src/kmp_lock.cpp
@@ -1108,7 +1108,7 @@
   kmp_int32 need_mf = 1;
 
 #if OMPT_SUPPORT
-  omp_state_t prev_state = omp_state_undefined;
+  ompt_state_t prev_state = ompt_state_undefined;
 #endif
 
   KA_TRACE(1000,
@@ -1216,7 +1216,7 @@
 #endif
 
 #if OMPT_SUPPORT
-        if (ompt_enabled.enabled && prev_state != omp_state_undefined) {
+        if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
           /* change the state before clearing wait_id */
           this_thr->th.ompt_thread_info.state = prev_state;
           this_thr->th.ompt_thread_info.wait_id = 0;
@@ -1231,11 +1231,11 @@
     }
 
 #if OMPT_SUPPORT
-    if (ompt_enabled.enabled && prev_state == omp_state_undefined) {
+    if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
       /* this thread will spin; set wait_id before entering wait state */
       prev_state = this_thr->th.ompt_thread_info.state;
       this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
-      this_thr->th.ompt_thread_info.state = omp_state_wait_lock;
+      this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
     }
 #endif
 
@@ -1716,7 +1716,9 @@
 
 /* RTM Adaptive locks */
 
-#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
+#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) ||                          \
+    (KMP_COMPILER_MSVC && _MSC_VER >= 1700) ||                                 \
+    (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT)
 
 #include <immintrin.h>
 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
@@ -3357,7 +3359,7 @@
 #endif
 
 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
-  return lck == lck->lk.initialized;
+  return lck == lck->lk.self;
 }
 
 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h
index 220236d..6a88d7b 100644
--- a/runtime/src/kmp_lock.h
+++ b/runtime/src/kmp_lock.h
@@ -649,7 +649,7 @@
       }                                                                        \
     }                                                                          \
     if (lck->tas.lk.poll != 0 ||                                               \
-        !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {        \
+        !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {     \
       kmp_uint32 spins;                                                        \
       KMP_FSYNC_PREPARE(lck);                                                  \
       KMP_INIT_YIELD(spins);                                                   \
@@ -659,8 +659,8 @@
       } else {                                                                 \
         KMP_YIELD_SPIN(spins);                                                 \
       }                                                                        \
-      while (lck->tas.lk.poll != 0 ||                                          \
-             !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {   \
+      while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq(         \
+                                          &lck->tas.lk.poll, 0, gtid + 1)) {   \
         if (TCR_4(__kmp_nth) >                                                 \
             (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {             \
           KMP_YIELD(TRUE);                                                     \
@@ -702,7 +702,7 @@
       }
     }
     return ((lck->tas.lk.poll == 0) &&
-            __kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
+            __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
   } else {
     KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL);
     return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
@@ -767,7 +767,7 @@
       *depth = KMP_LOCK_ACQUIRED_NEXT;                                         \
     } else {                                                                   \
       if ((lck->tas.lk.poll != 0) ||                                           \
-          !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {      \
+          !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) {   \
         kmp_uint32 spins;                                                      \
         KMP_FSYNC_PREPARE(lck);                                                \
         KMP_INIT_YIELD(spins);                                                 \
@@ -777,8 +777,9 @@
         } else {                                                               \
           KMP_YIELD_SPIN(spins);                                               \
         }                                                                      \
-        while ((lck->tas.lk.poll != 0) ||                                      \
-               !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
+        while (                                                                \
+            (lck->tas.lk.poll != 0) ||                                         \
+            !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
           if (TCR_4(__kmp_nth) >                                               \
               (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {           \
             KMP_YIELD(TRUE);                                                   \
@@ -826,7 +827,7 @@
       return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
     }
     retval = ((lck->tas.lk.poll == 0) &&
-              __kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
+              __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
     if (retval) {
       KMP_MB();
       lck->tas.lk.depth_locked = 1;
diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
index 93743ad..3c2426b 100644
--- a/runtime/src/kmp_os.h
+++ b/runtime/src/kmp_os.h
@@ -86,9 +86,12 @@
    128-bit extended precision type yet */
 typedef long double _Quad;
 #elif KMP_COMPILER_GCC
+/* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad */
+#if !KMP_OS_NETBSD
 typedef __float128 _Quad;
 #undef KMP_HAVE_QUAD
 #define KMP_HAVE_QUAD 1
+#endif
 #elif KMP_COMPILER_MSVC
 typedef long double _Quad;
 #endif
@@ -100,7 +103,9 @@
 #endif
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
+#define KMP_USE_X87CONTROL 0
 #if KMP_OS_WINDOWS
+#define KMP_END_OF_LINE "\r\n"
 typedef char kmp_int8;
 typedef unsigned char kmp_uint8;
 typedef short kmp_int16;
@@ -122,6 +127,10 @@
 typedef struct kmp_struct64 kmp_uint64;
 /* Not sure what to use for KMP_[U]INT64_SPEC here */
 #endif
+#if KMP_ARCH_X86 && KMP_MSVC_COMPAT
+#undef KMP_USE_X87CONTROL
+#define KMP_USE_X87CONTROL 1
+#endif
 #if KMP_ARCH_X86_64
 #define KMP_INTPTR 1
 typedef __int64 kmp_intptr_t;
@@ -132,6 +141,7 @@
 #endif /* KMP_OS_WINDOWS */
 
 #if KMP_OS_UNIX
+#define KMP_END_OF_LINE "\n"
 typedef char kmp_int8;
 typedef unsigned char kmp_uint8;
 typedef short kmp_int16;
@@ -246,7 +256,7 @@
 
 #define KMP_EXPORT extern /* export declaration in guide libraries */
 
-#if __GNUC__ >= 4
+#if __GNUC__ >= 4 && !defined(__MINGW32__)
 #define __forceinline __inline
 #endif
 
@@ -296,7 +306,7 @@
 #define KMP_NORETURN __attribute__((noreturn))
 #endif
 
-#if KMP_OS_WINDOWS
+#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
 #define KMP_ALIGN(bytes) __declspec(align(bytes))
 #define KMP_THREAD_LOCAL __declspec(thread)
 #define KMP_ALIAS /* Nothing */
@@ -356,10 +366,12 @@
 
 #if KMP_ASM_INTRINS && KMP_OS_WINDOWS
 
+#if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG
 #pragma intrinsic(InterlockedExchangeAdd)
 #pragma intrinsic(InterlockedCompareExchange)
 #pragma intrinsic(InterlockedExchange)
 #pragma intrinsic(InterlockedExchange64)
+#endif
 
 // Using InterlockedIncrement / InterlockedDecrement causes a library loading
 // ordering problem, so we use InterlockedExchangeAdd instead.
diff --git a/runtime/src/kmp_platform.h b/runtime/src/kmp_platform.h
index 7610484..bb23de0 100644
--- a/runtime/src/kmp_platform.h
+++ b/runtime/src/kmp_platform.h
@@ -17,8 +17,10 @@
 /* ---------------------- Operating system recognition ------------------- */
 
 #define KMP_OS_LINUX 0
+#define KMP_OS_DRAGONFLY 0
 #define KMP_OS_FREEBSD 0
 #define KMP_OS_NETBSD 0
+#define KMP_OS_OPENBSD 0
 #define KMP_OS_DARWIN 0
 #define KMP_OS_WINDOWS 0
 #define KMP_OS_CNK 0
@@ -45,6 +47,11 @@
 #else
 #endif
 
+#if (defined __DragonFly__)
+#undef KMP_OS_DRAGONFLY
+#define KMP_OS_DRAGONFLY 1
+#endif
+
 #if (defined __FreeBSD__)
 #undef KMP_OS_FREEBSD
 #define KMP_OS_FREEBSD 1
@@ -55,6 +62,11 @@
 #define KMP_OS_NETBSD 1
 #endif
 
+#if (defined __OpenBSD__)
+#undef KMP_OS_OPENBSD
+#define KMP_OS_OPENBSD 1
+#endif
+
 #if (defined __bgq__)
 #undef KMP_OS_CNK
 #define KMP_OS_CNK 1
@@ -66,12 +78,13 @@
 #endif
 
 #if (1 !=                                                                      \
-     KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN +           \
-         KMP_OS_WINDOWS + KMP_OS_HURD)
+     KMP_OS_LINUX + KMP_OS_DRAGONFLY + KMP_OS_FREEBSD + KMP_OS_NETBSD +        \
+         KMP_OS_OPENBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS + KMP_OS_HURD)
 #error Unknown OS
 #endif
 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+        KMP_OS_OPENBSD || KMP_OS_DARWIN || KMP_OS_HURD
 #undef KMP_OS_UNIX
 #define KMP_OS_UNIX 1
 #endif
@@ -88,7 +101,7 @@
 #define KMP_ARCH_MIPS64 0
 
 #if KMP_OS_WINDOWS
-#if defined _M_AMD64
+#if defined(_M_AMD64) || defined(__x86_64)
 #undef KMP_ARCH_X86_64
 #define KMP_ARCH_X86_64 1
 #else
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index b861c06..3dd9ab6 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -1092,6 +1092,19 @@
 #endif
   }
 
+#if OMP_50_ENABLED
+  if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
+    for (i = 0; i < team->t.t_nproc; i++) {
+      kmp_info_t *thr = team->t.t_threads[i];
+      if (thr->th.th_prev_num_threads != team->t.t_nproc ||
+          thr->th.th_prev_level != team->t.t_level) {
+        team->t.t_display_affinity = 1;
+        break;
+      }
+    }
+  }
+#endif
+
   KMP_MB();
 }
 
@@ -1213,12 +1226,12 @@
   ompt_data_t *implicit_task_data;
   void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+      this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
 
     ompt_task_info_t *parent_task_info;
     parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
 
-    parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     if (ompt_enabled.ompt_callback_parallel_begin) {
       int team_size = 1;
 
@@ -1382,13 +1395,27 @@
   KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
 #endif
 
+#if OMP_50_ENABLED
+  // Perform the display affinity functionality for
+  // serialized parallel regions
+  if (__kmp_display_affinity) {
+    if (this_thr->th.th_prev_level != serial_team->t.t_level ||
+        this_thr->th.th_prev_num_threads != 1) {
+      // NULL means use the affinity-format-var ICV
+      __kmp_aux_display_affinity(global_tid, NULL);
+      this_thr->th.th_prev_level = serial_team->t.t_level;
+      this_thr->th.th_prev_num_threads = 1;
+    }
+  }
+#endif
+
   if (__kmp_env_consistency_check)
     __kmp_push_parallel(global_tid, NULL);
 #if OMPT_SUPPORT
   serial_team->t.ompt_team_info.master_return_address = codeptr;
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state != omp_state_overhead) {
-    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
+      this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
+    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
 
     ompt_lw_taskteam_t lw_taskteam;
     __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
@@ -1408,8 +1435,8 @@
     }
 
     /* OMPT state */
-    this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
-    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
+    this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+    OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 }
@@ -1478,7 +1505,7 @@
 #if OMPT_SUPPORT
     ompt_data_t ompt_parallel_data = ompt_data_none;
     ompt_data_t *parent_task_data;
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     ompt_data_t *implicit_task_data;
     void *return_address = NULL;
 
@@ -1518,7 +1545,7 @@
             parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
             OMPT_INVOKER(call_context), return_address);
       }
-      master_th->th.ompt_thread_info.state = omp_state_overhead;
+      master_th->th.ompt_thread_info.state = ompt_state_overhead;
     }
 #endif
 
@@ -1558,7 +1585,7 @@
         if (ompt_enabled.enabled) {
           __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
                                   &ompt_parallel_data, return_address);
-          exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
+          exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
 
           __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
           // don't use lw_taskteam after linking. content was swaped
@@ -1574,7 +1601,7 @@
           }
 
           /* OMPT state */
-          master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+          master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
         } else {
           exit_runtime_p = &dummy;
         }
@@ -1594,7 +1621,7 @@
 #if OMPT_SUPPORT
         *exit_runtime_p = NULL;
         if (ompt_enabled.enabled) {
-          OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
+          OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
           if (ompt_enabled.ompt_callback_implicit_task) {
             ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                 ompt_scope_end, NULL, implicit_task_data, 1,
@@ -1607,7 +1634,7 @@
                 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
                 OMPT_INVOKER(call_context), return_address);
           }
-          master_th->th.ompt_thread_info.state = omp_state_overhead;
+          master_th->th.ompt_thread_info.state = ompt_state_overhead;
         }
 #endif
         return TRUE;
@@ -1776,7 +1803,7 @@
             // don't use lw_taskteam after linking. content was swaped
 
             task_info = OMPT_CUR_TASK_INFO(master_th);
-            exit_runtime_p = &(task_info->frame.exit_frame);
+            exit_runtime_p = &(task_info->frame.exit_frame.ptr);
             if (ompt_enabled.ompt_callback_implicit_task) {
               ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
                   ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
@@ -1786,7 +1813,7 @@
             }
 
             /* OMPT state */
-            master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+            master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
           } else {
             exit_runtime_p = &dummy;
           }
@@ -1819,7 +1846,7 @@
                   OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
                   OMPT_INVOKER(call_context), return_address);
             }
-            master_th->th.ompt_thread_info.state = omp_state_overhead;
+            master_th->th.ompt_thread_info.state = ompt_state_overhead;
           }
 #endif
         } else if (microtask == (microtask_t)__kmp_teams_master) {
@@ -1874,7 +1901,7 @@
             __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
             // don't use lw_taskteam after linking. content was swaped
             task_info = OMPT_CUR_TASK_INFO(master_th);
-            exit_runtime_p = &(task_info->frame.exit_frame);
+            exit_runtime_p = &(task_info->frame.exit_frame.ptr);
 
             /* OMPT implicit task begin */
             implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
@@ -1887,7 +1914,7 @@
             }
 
             /* OMPT state */
-            master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+            master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
           } else {
             exit_runtime_p = &dummy;
           }
@@ -1920,7 +1947,7 @@
                   &ompt_parallel_data, parent_task_data,
                   OMPT_INVOKER(call_context), return_address);
             }
-            master_th->th.ompt_thread_info.state = omp_state_overhead;
+            master_th->th.ompt_thread_info.state = ompt_state_overhead;
           }
 #endif
 #if OMP_40_ENABLED
@@ -1932,7 +1959,7 @@
         __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
                                 return_address);
 
-        lwt.ompt_task_info.frame.exit_frame = NULL;
+        lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
         __ompt_lw_taskteam_link(&lwt, master_th, 1);
 // don't use lw_taskteam after linking. content was swaped
 #endif
@@ -1948,7 +1975,7 @@
       KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
       KMP_MB();
       return FALSE;
-    }
+    } // if (nthreads == 1)
 
     // GEH: only modify the executing flag in the case when not serialized
     //      serialized case is handled in kmpc_serialized_parallel
@@ -2132,6 +2159,7 @@
         master_th->th.th_task_state_top++;
 #if KMP_NESTED_HOT_TEAMS
         if (master_th->th.th_hot_teams &&
+            active_level < __kmp_hot_teams_max_level &&
             team == master_th->th.th_hot_teams[active_level].hot_team) {
           // Restore master's nested state if nested hot team
           master_th->th.th_task_state =
@@ -2195,7 +2223,7 @@
                          &master_th->th.th_current_task->td_icvs, loc);
 
 #if OMPT_SUPPORT
-    master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+    master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
 #endif
 
     __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -2276,7 +2304,7 @@
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    master_th->th.ompt_thread_info.state = omp_state_overhead;
+    master_th->th.ompt_thread_info.state = ompt_state_overhead;
   }
 #endif
 
@@ -2288,8 +2316,8 @@
                                             kmp_team_t *team) {
   // restore state outside the region
   thread->th.ompt_thread_info.state =
-      ((team->t.t_serialized) ? omp_state_work_serial
-                              : omp_state_work_parallel);
+      ((team->t.t_serialized) ? ompt_state_work_serial
+                              : ompt_state_work_parallel);
 }
 
 static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
@@ -2302,7 +2330,7 @@
         codeptr);
   }
 
-  task_info->frame.enter_frame = NULL;
+  task_info->frame.enter_frame = ompt_data_none;
   __kmp_join_restore_state(thread, team);
 }
 #endif
@@ -2337,7 +2365,7 @@
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    master_th->th.ompt_thread_info.state = omp_state_overhead;
+    master_th->th.ompt_thread_info.state = ompt_state_overhead;
   }
 #endif
 
@@ -2516,7 +2544,7 @@
           OMPT_CUR_TASK_INFO(master_th)->thread_num);
     }
 
-    task_info->frame.exit_frame = NULL;
+    task_info->frame.exit_frame = ompt_data_none;
     task_info->task_data = ompt_data_none;
   }
 #endif
@@ -2649,6 +2677,8 @@
 
   KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
   thread = __kmp_threads[gtid];
+  if (thread->th.th_current_task->td_icvs.nproc == new_nth)
+    return; // nothing to do
 
   __kmp_save_internal_controls(thread);
 
@@ -3816,6 +3846,8 @@
 #endif /* KMP_AFFINITY_SUPPORTED */
 #if OMP_50_ENABLED
   root_thread->th.th_def_allocator = __kmp_def_allocator;
+  root_thread->th.th_prev_level = 0;
+  root_thread->th.th_prev_num_threads = 1;
 #endif
 
   __kmp_root_counter++;
@@ -3825,7 +3857,7 @@
 
     kmp_info_t *root_thread = ompt_get_thread();
 
-    ompt_set_thread_state(root_thread, omp_state_overhead);
+    ompt_set_thread_state(root_thread, ompt_state_overhead);
 
     if (ompt_enabled.ompt_callback_thread_begin) {
       ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
@@ -3839,7 +3871,7 @@
       // initial task has nothing to return to
     }
 
-    ompt_set_thread_state(root_thread, omp_state_work_serial);
+    ompt_set_thread_state(root_thread, ompt_state_work_serial);
   }
 #endif
 
@@ -3978,7 +4010,7 @@
   if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
 #if OMPT_SUPPORT
     // the runtime is shutting down so we won't report any events
-    thread->th.ompt_thread_info.state = omp_state_undefined;
+    thread->th.ompt_thread_info.state = ompt_state_undefined;
 #endif
     __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
   }
@@ -4357,6 +4389,8 @@
 #endif
 #if OMP_50_ENABLED
   new_thr->th.th_def_allocator = __kmp_def_allocator;
+  new_thr->th.th_prev_level = 0;
+  new_thr->th.th_prev_num_threads = 1;
 #endif
 
   TCW_4(new_thr->th.th_in_pool, FALSE);
@@ -4545,6 +4579,12 @@
       th->th.th_first_place = first_place;
       th->th.th_last_place = last_place;
       th->th.th_new_place = masters_place;
+#if OMP_50_ENABLED
+      if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
+          team->t.t_display_affinity != 1) {
+        team->t.t_display_affinity = 1;
+      }
+#endif
 
       KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
                      "partition = [%d,%d]\n",
@@ -4578,6 +4618,12 @@
         th->th.th_first_place = first_place;
         th->th.th_last_place = last_place;
         th->th.th_new_place = place;
+#if OMP_50_ENABLED
+        if (__kmp_display_affinity && place != th->th.th_current_place &&
+            team->t.t_display_affinity != 1) {
+          team->t.t_display_affinity = 1;
+        }
+#endif
 
         KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
                        "partition = [%d,%d]\n",
@@ -4599,6 +4645,12 @@
         th->th.th_first_place = first_place;
         th->th.th_last_place = last_place;
         th->th.th_new_place = place;
+#if OMP_50_ENABLED
+        if (__kmp_display_affinity && place != th->th.th_current_place &&
+            team->t.t_display_affinity != 1) {
+          team->t.t_display_affinity = 1;
+        }
+#endif
         s_count++;
 
         if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -4667,6 +4719,12 @@
 
           th->th.th_first_place = place;
           th->th.th_new_place = place;
+#if OMP_50_ENABLED
+          if (__kmp_display_affinity && place != th->th.th_current_place &&
+              team->t.t_display_affinity != 1) {
+            team->t.t_display_affinity = 1;
+          }
+#endif
           s_count = 1;
           while (s_count < S) {
             if (place == last_place) {
@@ -4758,7 +4816,12 @@
             th->th.th_first_place = first;
             th->th.th_new_place = place;
             th->th.th_last_place = last;
-
+#if OMP_50_ENABLED
+            if (__kmp_display_affinity && place != th->th.th_current_place &&
+                team->t.t_display_affinity != 1) {
+              team->t.t_display_affinity = 1;
+            }
+#endif
             KA_TRACE(100,
                      ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
                       "partition = [%d,%d], spacing = %.4f\n",
@@ -4787,6 +4850,12 @@
         th->th.th_first_place = place;
         th->th.th_last_place = place;
         th->th.th_new_place = place;
+#if OMP_50_ENABLED
+        if (__kmp_display_affinity && place != th->th.th_current_place &&
+            team->t.t_display_affinity != 1) {
+          team->t.t_display_affinity = 1;
+        }
+#endif
         s_count++;
 
         if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -5601,7 +5670,7 @@
     thread_data = &(this_thr->th.ompt_thread_info.thread_data);
     *thread_data = ompt_data_none;
 
-    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
     this_thr->th.ompt_thread_info.wait_id = 0;
     this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
     if (ompt_enabled.ompt_callback_thread_begin) {
@@ -5613,7 +5682,7 @@
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    this_thr->th.ompt_thread_info.state = omp_state_idle;
+    this_thr->th.ompt_thread_info.state = ompt_state_idle;
   }
 #endif
   /* This is the place where threads wait for work */
@@ -5629,7 +5698,7 @@
 
 #if OMPT_SUPPORT
     if (ompt_enabled.enabled) {
-      this_thr->th.ompt_thread_info.state = omp_state_overhead;
+      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
     }
 #endif
 
@@ -5649,7 +5718,7 @@
 
 #if OMPT_SUPPORT
         if (ompt_enabled.enabled) {
-          this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+          this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
         }
 #endif
 
@@ -5664,9 +5733,9 @@
 #if OMPT_SUPPORT
       if (ompt_enabled.enabled) {
         /* no frame set while outside task */
-        __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
+        __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
 
-        this_thr->th.ompt_thread_info.state = omp_state_overhead;
+        this_thr->th.ompt_thread_info.state = ompt_state_overhead;
       }
 #endif
       /* join barrier after parallel region */
@@ -6960,7 +7029,7 @@
 
   if (ompt_enabled.enabled) {
     exit_runtime_p = &(
-        team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
+        team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
   } else {
     exit_runtime_p = &dummy;
   }
@@ -7200,10 +7269,10 @@
   __kmp_join_barrier(gtid); /* wait for everyone */
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled &&
-      this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+      this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
     int ds_tid = this_thr->th.th_info.ds.ds_tid;
     ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
-    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
 #if OMPT_OPTIONAL
     void *codeptr = NULL;
     if (KMP_MASTER_TID(ds_tid) &&
@@ -7407,6 +7476,12 @@
   __kmp_nested_proc_bind.bind_types = NULL;
   __kmp_nested_proc_bind.size = 0;
   __kmp_nested_proc_bind.used = 0;
+#if OMP_50_ENABLED
+  if (__kmp_affinity_format) {
+    KMP_INTERNAL_FREE(__kmp_affinity_format);
+    __kmp_affinity_format = NULL;
+  }
+#endif
 
   __kmp_i18n_catclose();
 
@@ -7563,6 +7638,339 @@
   }
 }
 
+/* Getting team information common for all team API */
+// Returns NULL if not in teams construct
+static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
+  kmp_info_t *thr = __kmp_entry_thread();
+  teams_serialized = 0;
+  if (thr->th.th_teams_microtask) {
+    kmp_team_t *team = thr->th.th_team;
+    int tlevel = thr->th.th_teams_level; // the level of the teams construct
+    int ii = team->t.t_level;
+    teams_serialized = team->t.t_serialized;
+    int level = tlevel + 1;
+    KMP_DEBUG_ASSERT(ii >= tlevel);
+    while (ii > level) {
+      for (teams_serialized = team->t.t_serialized;
+           (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
+      }
+      if (team->t.t_serialized && (!teams_serialized)) {
+        team = team->t.t_parent;
+        continue;
+      }
+      if (ii > level) {
+        team = team->t.t_parent;
+        ii--;
+      }
+    }
+    return team;
+  }
+  return NULL;
+}
+
+int __kmp_aux_get_team_num() {
+  int serialized;
+  kmp_team_t *team = __kmp_aux_get_team_info(serialized);
+  if (team) {
+    if (serialized > 1) {
+      return 0; // teams region is serialized ( 1 team of 1 thread ).
+    } else {
+      return team->t.t_master_tid;
+    }
+  }
+  return 0;
+}
+
+int __kmp_aux_get_num_teams() {
+  int serialized;
+  kmp_team_t *team = __kmp_aux_get_team_info(serialized);
+  if (team) {
+    if (serialized > 1) {
+      return 1;
+    } else {
+      return team->t.t_parent->t.t_nproc;
+    }
+  }
+  return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#if OMP_50_ENABLED
+/*
+ * Affinity Format Parser
+ *
+ * Field is in form of: %[[[0].]size]type
+ * % and type are required (%% means print a literal '%')
+ * type is either single char or long name surrounded by {},
+ * e.g., N or {num_threads}
+ * 0 => leading zeros
+ * . => right justified when size is specified
+ * by default output is left justified
+ * size is the *minimum* field length
+ * All other characters are printed as is
+ *
+ * Available field types:
+ * L {thread_level}      - omp_get_level()
+ * n {thread_num}        - omp_get_thread_num()
+ * h {host}              - name of host machine
+ * P {process_id}        - process id (integer)
+ * T {thread_identifier} - native thread identifier (integer)
+ * N {num_threads}       - omp_get_num_threads()
+ * A {ancestor_tnum}     - omp_get_ancestor_thread_num(omp_get_level()-1)
+ * a {thread_affinity}   - comma separated list of integers or integer ranges
+ *                         (values of affinity mask)
+ *
+ * Implementation-specific field types can be added
+ * If a type is unknown, print "undefined"
+*/
+
+// Structure holding the short name, long name, and corresponding data type
+// for snprintf.  A table of these will represent the entire valid keyword
+// field types.
+typedef struct kmp_affinity_format_field_t {
+  char short_name; // from spec e.g., L -> thread level
+  const char *long_name; // from spec thread_level -> thread level
+  char field_format; // data type for snprintf (typically 'd' or 's'
+  // for integer or string)
+} kmp_affinity_format_field_t;
+
+static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
+#if KMP_AFFINITY_SUPPORTED
+    {'A', "thread_affinity", 's'},
+#endif
+    {'t', "team_num", 'd'},
+    {'T', "num_teams", 'd'},
+    {'L', "nesting_level", 'd'},
+    {'n', "thread_num", 'd'},
+    {'N', "num_threads", 'd'},
+    {'a', "ancestor_tnum", 'd'},
+    {'H', "host", 's'},
+    {'P', "process_id", 'd'},
+    {'i', "native_thread_id", 'd'}};
+
+// Return the number of characters it takes to hold field
+static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
+                                            const char **ptr,
+                                            kmp_str_buf_t *field_buffer) {
+  int rc, format_index, field_value;
+  const char *width_left, *width_right;
+  bool pad_zeros, right_justify, parse_long_name, found_valid_name;
+  static const int FORMAT_SIZE = 20;
+  char format[FORMAT_SIZE] = {0};
+  char absolute_short_name = 0;
+
+  KMP_DEBUG_ASSERT(gtid >= 0);
+  KMP_DEBUG_ASSERT(th);
+  KMP_DEBUG_ASSERT(**ptr == '%');
+  KMP_DEBUG_ASSERT(field_buffer);
+
+  __kmp_str_buf_clear(field_buffer);
+
+  // Skip the initial %
+  (*ptr)++;
+
+  // Check for %% first
+  if (**ptr == '%') {
+    __kmp_str_buf_cat(field_buffer, "%", 1);
+    (*ptr)++; // skip over the second %
+    return 1;
+  }
+
+  // Parse field modifiers if they are present
+  pad_zeros = false;
+  if (**ptr == '0') {
+    pad_zeros = true;
+    (*ptr)++; // skip over 0
+  }
+  right_justify = false;
+  if (**ptr == '.') {
+    right_justify = true;
+    (*ptr)++; // skip over .
+  }
+  // Parse width of field: [width_left, width_right)
+  width_left = width_right = NULL;
+  if (**ptr >= '0' && **ptr <= '9') {
+    width_left = *ptr;
+    SKIP_DIGITS(*ptr);
+    width_right = *ptr;
+  }
+
+  // Create the format for KMP_SNPRINTF based on flags parsed above
+  format_index = 0;
+  format[format_index++] = '%';
+  if (!right_justify)
+    format[format_index++] = '-';
+  if (pad_zeros)
+    format[format_index++] = '0';
+  if (width_left && width_right) {
+    int i = 0;
+    // Only allow 8 digit number widths.
+    // This also prevents overflowing format variable
+    while (i < 8 && width_left < width_right) {
+      format[format_index++] = *width_left;
+      width_left++;
+      i++;
+    }
+  }
+
+  // Parse a name (long or short)
+  // Canonicalize the name into absolute_short_name
+  found_valid_name = false;
+  parse_long_name = (**ptr == '{');
+  if (parse_long_name)
+    (*ptr)++; // skip initial left brace
+  for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
+                             sizeof(__kmp_affinity_format_table[0]);
+       ++i) {
+    char short_name = __kmp_affinity_format_table[i].short_name;
+    const char *long_name = __kmp_affinity_format_table[i].long_name;
+    char field_format = __kmp_affinity_format_table[i].field_format;
+    if (parse_long_name) {
+      int length = KMP_STRLEN(long_name);
+      if (strncmp(*ptr, long_name, length) == 0) {
+        found_valid_name = true;
+        (*ptr) += length; // skip the long name
+      }
+    } else if (**ptr == short_name) {
+      found_valid_name = true;
+      (*ptr)++; // skip the short name
+    }
+    if (found_valid_name) {
+      format[format_index++] = field_format;
+      format[format_index++] = '\0';
+      absolute_short_name = short_name;
+      break;
+    }
+  }
+  if (parse_long_name) {
+    if (**ptr != '}') {
+      absolute_short_name = 0;
+    } else {
+      (*ptr)++; // skip over the right brace
+    }
+  }
+
+  // Attempt to fill the buffer with the requested
+  // value using snprintf within __kmp_str_buf_print()
+  switch (absolute_short_name) {
+  case 't':
+    rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
+    break;
+  case 'T':
+    rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
+    break;
+  case 'L':
+    rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
+    break;
+  case 'n':
+    rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
+    break;
+  case 'H': {
+    static const int BUFFER_SIZE = 256;
+    char buf[BUFFER_SIZE];
+    __kmp_expand_host_name(buf, BUFFER_SIZE);
+    rc = __kmp_str_buf_print(field_buffer, format, buf);
+  } break;
+  case 'P':
+    rc = __kmp_str_buf_print(field_buffer, format, getpid());
+    break;
+  case 'i':
+    rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
+    break;
+  case 'N':
+    rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
+    break;
+  case 'a':
+    field_value =
+        __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
+    rc = __kmp_str_buf_print(field_buffer, format, field_value);
+    break;
+#if KMP_AFFINITY_SUPPORTED
+  case 'A': {
+    kmp_str_buf_t buf;
+    __kmp_str_buf_init(&buf);
+    __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
+    rc = __kmp_str_buf_print(field_buffer, format, buf.str);
+    __kmp_str_buf_free(&buf);
+  } break;
+#endif
+  default:
+    // According to spec, If an implementation does not have info for field
+    // type, then "undefined" is printed
+    rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
+    // Skip the field
+    if (parse_long_name) {
+      SKIP_TOKEN(*ptr);
+      if (**ptr == '}')
+        (*ptr)++;
+    } else {
+      (*ptr)++;
+    }
+  }
+
+  KMP_ASSERT(format_index <= FORMAT_SIZE);
+  return rc;
+}
+
+/*
+ * Return number of characters needed to hold the affinity string
+ * (not including null byte character)
+ * The resultant string is printed to buffer, which the caller can then
+ * handle afterwards
+*/
+size_t __kmp_aux_capture_affinity(int gtid, const char *format,
+                                  kmp_str_buf_t *buffer) {
+  const char *parse_ptr;
+  size_t retval;
+  const kmp_info_t *th;
+  kmp_str_buf_t field;
+
+  KMP_DEBUG_ASSERT(buffer);
+  KMP_DEBUG_ASSERT(gtid >= 0);
+
+  __kmp_str_buf_init(&field);
+  __kmp_str_buf_clear(buffer);
+
+  th = __kmp_threads[gtid];
+  retval = 0;
+
+  // If format is NULL or zero-length string, then we use
+  // affinity-format-var ICV
+  parse_ptr = format;
+  if (parse_ptr == NULL || *parse_ptr == '\0') {
+    parse_ptr = __kmp_affinity_format;
+  }
+  KMP_DEBUG_ASSERT(parse_ptr);
+
+  while (*parse_ptr != '\0') {
+    // Parse a field
+    if (*parse_ptr == '%') {
+      // Put field in the buffer
+      int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
+      __kmp_str_buf_catbuf(buffer, &field);
+      retval += rc;
+    } else {
+      // Put literal character in buffer
+      __kmp_str_buf_cat(buffer, parse_ptr, 1);
+      retval++;
+      parse_ptr++;
+    }
+  }
+  __kmp_str_buf_free(&field);
+  return retval;
+}
+
+// Displays the affinity string to stdout
+void __kmp_aux_display_affinity(int gtid, const char *format) {
+  kmp_str_buf_t buf;
+  __kmp_str_buf_init(&buf);
+  __kmp_aux_capture_affinity(gtid, format, &buf);
+  __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
+  __kmp_str_buf_free(&buf);
+}
+#endif // OMP_50_ENABLED
+
 /* ------------------------------------------------------------------------ */
 
 void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
@@ -7667,8 +8075,8 @@
 
 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||       \
-    KMP_OS_DARWIN || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+    KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
 
     int teamsize_cutoff = 4;
 
@@ -7691,8 +8099,8 @@
     }
 #else
 #error "Unknown or unsupported OS"
-#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
-// KMP_OS_DARWIN
+#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
+       // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
 
 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
 
diff --git a/runtime/src/kmp_safe_c_api.h b/runtime/src/kmp_safe_c_api.h
index 9d0da0b..d894fe3 100644
--- a/runtime/src/kmp_safe_c_api.h
+++ b/runtime/src/kmp_safe_c_api.h
@@ -11,11 +11,14 @@
 #ifndef KMP_SAFE_C_API_H
 #define KMP_SAFE_C_API_H
 
+#include "kmp_platform.h"
+#include <string.h>
+
 // Replacement for banned C API
 
 // Not every unsafe call listed here is handled now, but keeping everything
 // in one place should be handy for future maintenance.
-#if KMP_OS_WINDOWS
+#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
 
 #define RSIZE_MAX_STR (4UL << 10) // 4KB
 
@@ -57,4 +60,16 @@
 
 #endif // KMP_OS_WINDOWS
 
+// Offer truncated version of strncpy
+static inline void __kmp_strncpy_truncate(char *buffer, size_t buf_size,
+                                          char const *src, size_t src_size) {
+  if (src_size >= buf_size) {
+    src_size = buf_size - 1;
+    KMP_STRNCPY_S(buffer, buf_size, src, src_size);
+    buffer[buf_size - 1] = '\0';
+  } else {
+    KMP_STRNCPY_S(buffer, buf_size, src, src_size);
+  }
+}
+
 #endif // KMP_SAFE_C_API_H
diff --git a/runtime/src/kmp_settings.cpp b/runtime/src/kmp_settings.cpp
index d855de8..6d049e4 100644
--- a/runtime/src/kmp_settings.cpp
+++ b/runtime/src/kmp_settings.cpp
@@ -410,7 +410,7 @@
                                       int *out_range, char *out_routine,
                                       char *out_file, int *out_lb,
                                       int *out_ub) {
-  size_t len = KMP_STRLEN(value + 1);
+  size_t len = KMP_STRLEN(value) + 1;
   par_range_to_print = (char *)KMP_INTERNAL_MALLOC(len + 1);
   KMP_STRNCPY_S(par_range_to_print, len + 1, value, len + 1);
   __kmp_par_range = +1;
@@ -418,7 +418,7 @@
   __kmp_par_range_ub = INT_MAX;
   for (;;) {
     unsigned int len;
-    if ((value == NULL) || (*value == '\0')) {
+    if (*value == '\0') {
       break;
     }
     if (!__kmp_strcasecmp_with_sentinel("routine", value, '=')) {
@@ -3252,7 +3252,29 @@
 #endif /* OMP_40_ENABLED */
 
 #if OMP_50_ENABLED
-
+static void __kmp_stg_parse_display_affinity(char const *name,
+                                             char const *value, void *data) {
+  __kmp_stg_parse_bool(name, value, &__kmp_display_affinity);
+}
+static void __kmp_stg_print_display_affinity(kmp_str_buf_t *buffer,
+                                             char const *name, void *data) {
+  __kmp_stg_print_bool(buffer, name, __kmp_display_affinity);
+}
+static void __kmp_stg_parse_affinity_format(char const *name, char const *value,
+                                            void *data) {
+  size_t length = KMP_STRLEN(value);
+  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, value,
+                         length);
+}
+static void __kmp_stg_print_affinity_format(kmp_str_buf_t *buffer,
+                                            char const *name, void *data) {
+  if (__kmp_env_format) {
+    KMP_STR_BUF_PRINT_NAME_EX(name);
+  } else {
+    __kmp_str_buf_print(buffer, "   %s='", name);
+  }
+  __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format);
+}
 // OMP_ALLOCATOR sets default allocator
 static void __kmp_stg_parse_allocator(char const *name, char const *value,
                                       void *data) {
@@ -4879,7 +4901,12 @@
 #endif
 
 #endif // KMP_AFFINITY_SUPPORTED
-
+#if OMP_50_ENABLED
+    {"OMP_DISPLAY_AFFINITY", __kmp_stg_parse_display_affinity,
+     __kmp_stg_print_display_affinity, NULL, 0, 0},
+    {"OMP_AFFINITY_FORMAT", __kmp_stg_parse_affinity_format,
+     __kmp_stg_print_affinity_format, NULL, 0, 0},
+#endif
     {"KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork,
      __kmp_stg_print_init_at_fork, NULL, 0, 0},
     {"KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL,
@@ -5409,6 +5436,21 @@
   }
 #endif /* OMP_40_ENABLED */
 
+#if OMP_50_ENABLED
+  // Set up the affinity format ICV
+  // Grab the default affinity format string from the message catalog
+  kmp_msg_t m =
+      __kmp_msg_format(kmp_i18n_msg_AffFormatDefault, "%P", "%i", "%n", "%A");
+  KMP_DEBUG_ASSERT(KMP_STRLEN(m.str) < KMP_AFFINITY_FORMAT_SIZE);
+
+  if (__kmp_affinity_format == NULL) {
+    __kmp_affinity_format =
+        (char *)KMP_INTERNAL_MALLOC(sizeof(char) * KMP_AFFINITY_FORMAT_SIZE);
+  }
+  KMP_STRCPY_S(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, m.str);
+  __kmp_str_free(&m.str);
+#endif
+
   // Now process all of the settings.
   for (i = 0; i < block.count; ++i) {
     __kmp_stg_parse(block.vars[i].name, block.vars[i].value);
@@ -5513,7 +5555,7 @@
         // then determine if it is equal to that single group.
         if (within_one_group) {
           DWORD num_bits_in_group = __kmp_GetActiveProcessorCount(group);
-          int num_bits_in_mask = 0;
+          DWORD num_bits_in_mask = 0;
           for (int bit = init_mask->begin(); bit != init_mask->end();
                bit = init_mask->next(bit))
             num_bits_in_mask++;
diff --git a/runtime/src/kmp_str.cpp b/runtime/src/kmp_str.cpp
index 04c4056..5338edf 100644
--- a/runtime/src/kmp_str.cpp
+++ b/runtime/src/kmp_str.cpp
@@ -143,13 +143,28 @@
   KMP_STR_BUF_INVARIANT(buffer);
 } // __kmp_str_buf_cat
 
-void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
-                          va_list args) {
+void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) {
+  KMP_DEBUG_ASSERT(dest);
+  KMP_DEBUG_ASSERT(src);
+  KMP_STR_BUF_INVARIANT(dest);
+  KMP_STR_BUF_INVARIANT(src);
+  if (!src->str || !src->used)
+    return;
+  __kmp_str_buf_reserve(dest, dest->used + src->used + 1);
+  KMP_MEMCPY(dest->str + dest->used, src->str, src->used);
+  dest->str[dest->used + src->used] = 0;
+  dest->used += src->used;
+  KMP_STR_BUF_INVARIANT(dest);
+} // __kmp_str_buf_catbuf
+
+// Return the number of characters written
+int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+                         va_list args) {
+  int rc;
   KMP_STR_BUF_INVARIANT(buffer);
 
   for (;;) {
     int const free = buffer->size - buffer->used;
-    int rc;
     int size;
 
     // Try to format string.
@@ -198,13 +213,17 @@
 
   KMP_DEBUG_ASSERT(buffer->size > 0);
   KMP_STR_BUF_INVARIANT(buffer);
+  return rc;
 } // __kmp_str_buf_vprint
 
-void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
+// Return the number of characters written
+int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
+  int rc;
   va_list args;
   va_start(args, format);
-  __kmp_str_buf_vprint(buffer, format, args);
+  rc = __kmp_str_buf_vprint(buffer, format, args);
   va_end(args);
+  return rc;
 } // __kmp_str_buf_print
 
 /* The function prints specified size to buffer. Size is expressed using biggest
diff --git a/runtime/src/kmp_str.h b/runtime/src/kmp_str.h
index 02a2032..c30255d 100644
--- a/runtime/src/kmp_str.h
+++ b/runtime/src/kmp_str.h
@@ -51,9 +51,10 @@
 void __kmp_str_buf_detach(kmp_str_buf_t *buffer);
 void __kmp_str_buf_free(kmp_str_buf_t *buffer);
 void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len);
-void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
-                          va_list args);
-void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...);
+void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src);
+int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+                         va_list args);
+int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...);
 void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size);
 
 /* File name parser.
diff --git a/runtime/src/kmp_stub.cpp b/runtime/src/kmp_stub.cpp
index e26e084..c1f3bf3 100644
--- a/runtime/src/kmp_stub.cpp
+++ b/runtime/src/kmp_stub.cpp
@@ -35,6 +35,10 @@
 #define omp_set_num_threads ompc_set_num_threads
 #define omp_set_dynamic ompc_set_dynamic
 #define omp_set_nested ompc_set_nested
+#define omp_set_affinity_format ompc_set_affinity_format
+#define omp_get_affinity_format ompc_get_affinity_format
+#define omp_display_affinity ompc_display_affinity
+#define omp_capture_affinity ompc_capture_affinity
 #define kmp_set_stacksize kmpc_set_stacksize
 #define kmp_set_stacksize_s kmpc_set_stacksize_s
 #define kmp_set_blocktime kmpc_set_blocktime
@@ -350,6 +354,17 @@
 const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6;
 const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7;
 const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8;
+/* OpenMP 5.0 Affinity Format */
+void omp_set_affinity_format(char const *format) { i; }
+size_t omp_get_affinity_format(char *buffer, size_t size) {
+  i;
+  return 0;
+}
+void omp_display_affinity(char const *format) { i; }
+size_t omp_capture_affinity(char *buffer, size_t buf_size, char const *format) {
+  i;
+  return 0;
+}
 #endif /* OMP_50_ENABLED */
 
 // end of file //
diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp
index b48c5b6..6c810dd 100644
--- a/runtime/src/kmp_taskdeps.cpp
+++ b/runtime/src/kmp_taskdeps.cpp
@@ -466,9 +466,9 @@
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     OMPT_STORE_RETURN_ADDRESS(gtid);
-    if (!current_task->ompt_task_info.frame.enter_frame)
-      current_task->ompt_task_info.frame.enter_frame =
-          OMPT_GET_FRAME_ADDRESS(1);
+    if (!current_task->ompt_task_info.frame.enter_frame.ptr)
+      current_task->ompt_task_info.frame.enter_frame.ptr =
+          OMPT_GET_FRAME_ADDRESS(0);
     if (ompt_enabled.ompt_callback_task_create) {
       ompt_data_t task_data = ompt_data_none;
       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
@@ -479,7 +479,7 @@
           OMPT_LOAD_RETURN_ADDRESS(gtid));
     }
 
-    new_taskdata->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(0);
+    new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 
 #if OMPT_OPTIONAL
@@ -566,7 +566,7 @@
                     gtid, loc_ref, new_taskdata));
 #if OMPT_SUPPORT
       if (ompt_enabled.enabled) {
-        current_task->ompt_task_info.frame.enter_frame = NULL;
+        current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
       }
 #endif
       return TASK_CURRENT_NOT_QUEUED;
@@ -586,7 +586,7 @@
   kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
-    current_task->ompt_task_info.frame.enter_frame = NULL;
+    current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
   }
 #endif
   return ret;
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 2d74686..9c61a12 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -547,8 +547,10 @@
 static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
   // The calls to __ompt_task_init already have the ompt_enabled condition.
   task->ompt_task_info.task_data.value = 0;
-  task->ompt_task_info.frame.exit_frame = NULL;
-  task->ompt_task_info.frame.enter_frame = NULL;
+  task->ompt_task_info.frame.exit_frame = ompt_data_none;
+  task->ompt_task_info.frame.enter_frame = ompt_data_none;
+  task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
+  task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
 #if OMP_40_ENABLED
   task->ompt_task_info.ndeps = 0;
   task->ompt_task_info.deps = NULL;
@@ -627,9 +629,11 @@
 
 #if OMPT_SUPPORT
   if (ompt) {
-    if (current_task->ompt_task_info.frame.enter_frame == NULL) {
-      current_task->ompt_task_info.frame.enter_frame =
-          taskdata->ompt_task_info.frame.exit_frame = frame_address;
+    if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
+      current_task->ompt_task_info.frame.enter_frame.ptr =
+          taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
+      current_task->ompt_task_info.frame.enter_frame_flags =
+          taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer;
     }
     if (ompt_enabled.ompt_callback_task_create) {
       ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
@@ -811,8 +815,10 @@
                               kmp_taskdata_t *resumed_task) {
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
   kmp_info_t *thread = __kmp_threads[gtid];
+#if OMP_45_ENABLED
   kmp_task_team_t *task_team =
       thread->th.th_task_team; // might be NULL for serial teams...
+#endif // OMP_45_ENABLED
   kmp_int32 children = 0;
 
   KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
@@ -964,9 +970,10 @@
 
 #if OMPT_SUPPORT
   if (ompt) {
-    omp_frame_t *ompt_frame;
+    ompt_frame_t *ompt_frame;
     __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
-    ompt_frame->enter_frame = NULL;
+    ompt_frame->enter_frame = ompt_data_none;
+    ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
   }
 #endif
 
@@ -1392,6 +1399,28 @@
   return retval;
 }
 
+#if OMP_50_ENABLED
+/*!
+@ingroup TASKING
+@param loc_ref location of the original task directive
+@param gtid Global Thread ID of encountering thread
+@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
+task''
+@param naffins Number of affinity items
+@param affin_list List of affinity items
+@return Returns non-zero if registering affinity information was not successful.
+ Returns 0 if registration was successful
+This entry registers the affinity information attached to a task with the task
+thunk structure kmp_taskdata_t.
+*/
+kmp_int32
+__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_task_t *new_task, kmp_int32 naffins,
+                                  kmp_task_affinity_info_t *affin_list) {
+  return 0;
+}
+#endif
+
 //  __kmp_invoke_task: invoke the specified task
 //
 // gtid: global thread ID of caller
@@ -1438,9 +1467,9 @@
     oldInfo = thread->th.ompt_thread_info;
     thread->th.ompt_thread_info.wait_id = 0;
     thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
-                                            ? omp_state_work_serial
-                                            : omp_state_work_parallel;
-    taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+                                            ? ompt_state_work_serial
+                                            : ompt_state_work_parallel;
+    taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
 
@@ -1566,7 +1595,7 @@
     if (UNLIKELY(ompt_enabled.enabled)) {
       thread->th.ompt_thread_info = oldInfo;
       if (taskdata->td_flags.tiedness == TASK_TIED) {
-        taskdata->ompt_task_info.frame.exit_frame = NULL;
+        taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
       }
       __kmp_task_finish<true>(gtid, task, current_task);
     } else
@@ -1634,7 +1663,7 @@
   ANNOTATE_HAPPENS_BEFORE(new_task);
 #if OMPT_SUPPORT
   if (UNLIKELY(ompt_enabled.enabled)) {
-    parent->ompt_task_info.frame.enter_frame = NULL;
+    parent->ompt_task_info.frame.enter_frame = ompt_data_none;
   }
 #endif
   return TASK_CURRENT_NOT_QUEUED;
@@ -1703,8 +1732,8 @@
     if (!new_taskdata->td_flags.started) {
       OMPT_STORE_RETURN_ADDRESS(gtid);
       parent = new_taskdata->td_parent;
-      if (!parent->ompt_task_info.frame.enter_frame) {
-        parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+      if (!parent->ompt_task_info.frame.enter_frame.ptr) {
+        parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
       }
       if (ompt_enabled.ompt_callback_task_create) {
         ompt_data_t task_data = ompt_data_none;
@@ -1721,7 +1750,7 @@
       __ompt_task_finish(new_task,
                          new_taskdata->ompt_task_info.scheduling_parent,
                          ompt_task_switch);
-      new_taskdata->ompt_task_info.frame.exit_frame = NULL;
+      new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
     }
   }
 #endif
@@ -1733,7 +1762,7 @@
                 gtid, loc_ref, new_taskdata));
 #if OMPT_SUPPORT
   if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
-    parent->ompt_task_info.frame.enter_frame = NULL;
+    parent->ompt_task_info.frame.enter_frame = ompt_data_none;
   }
 #endif
   return res;
@@ -1767,8 +1796,8 @@
   kmp_taskdata_t *parent = NULL;
   if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
     parent = new_taskdata->td_parent;
-    if (!parent->ompt_task_info.frame.enter_frame)
-      parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+    if (!parent->ompt_task_info.frame.enter_frame.ptr)
+      parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
     if (ompt_enabled.ompt_callback_task_create) {
       ompt_data_t task_data = ompt_data_none;
       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
@@ -1788,7 +1817,7 @@
                 gtid, loc_ref, new_taskdata));
 #if OMPT_SUPPORT
   if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
-    parent->ompt_task_info.frame.enter_frame = NULL;
+    parent->ompt_task_info.frame.enter_frame = ompt_data_none;
   }
 #endif
   return res;
@@ -1817,7 +1846,7 @@
       my_task_data = &(taskdata->ompt_task_info.task_data);
       my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
 
-      taskdata->ompt_task_info.frame.enter_frame = frame_address;
+      taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
 
       if (ompt_enabled.ompt_callback_sync_region) {
         ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
@@ -1886,7 +1915,7 @@
             ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
             my_task_data, return_address);
       }
-      taskdata->ompt_task_info.frame.enter_frame = NULL;
+      taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
     }
 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
 
@@ -1916,7 +1945,7 @@
 #if OMPT_SUPPORT && OMPT_OPTIONAL
   if (UNLIKELY(ompt_enabled.enabled)) {
     OMPT_STORE_RETURN_ADDRESS(gtid);
-    return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1),
+    return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
                                     OMPT_LOAD_RETURN_ADDRESS(gtid));
   }
 #endif
diff --git a/runtime/src/kmp_utility.cpp b/runtime/src/kmp_utility.cpp
index 06090e6..dc4c714 100644
--- a/runtime/src/kmp_utility.cpp
+++ b/runtime/src/kmp_utility.cpp
@@ -375,7 +375,11 @@
         case 'I':
         case 'i': {
           pid_t id = getpid();
+#if KMP_ARCH_X86_64 && defined(__MINGW32__)
+          snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*lld", width, id);
+#else
           snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id);
+#endif
           if (snp_result >= 0 && snp_result <= end - pos) {
             while (*pos)
               ++pos;
diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h
index ec489d1..e2984a8 100644
--- a/runtime/src/kmp_wait_release.h
+++ b/runtime/src/kmp_wait_release.h
@@ -120,12 +120,12 @@
 
 #if OMPT_SUPPORT
 static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
-                                            omp_state_t omp_state,
+                                            ompt_state_t ompt_state,
                                             ompt_data_t *tId,
                                             ompt_data_t *pId) {
   int ds_tid = this_thr->th.th_info.ds.ds_tid;
-  if (omp_state == omp_state_wait_barrier_implicit) {
-    this_thr->th.ompt_thread_info.state = omp_state_overhead;
+  if (ompt_state == ompt_state_wait_barrier_implicit) {
+    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
 #if OMPT_OPTIONAL
     void *codeptr = NULL;
     if (ompt_enabled.ompt_callback_sync_region_wait) {
@@ -143,9 +143,9 @@
             ompt_scope_end, NULL, tId, 0, ds_tid);
       }
       // return to idle state
-      this_thr->th.ompt_thread_info.state = omp_state_idle;
+      this_thr->th.ompt_thread_info.state = ompt_state_idle;
     } else {
-      this_thr->th.ompt_thread_info.state = omp_state_overhead;
+      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
     }
   }
 }
@@ -199,27 +199,27 @@
 function.
        Events are triggered in the calling code (__kmp_barrier):
 
-                state := omp_state_overhead
+                state := ompt_state_overhead
             barrier-begin
             barrier-wait-begin
-                state := omp_state_wait_barrier
+                state := ompt_state_wait_barrier
           call join-barrier-implementation (finally arrive here)
           {}
           call fork-barrier-implementation (finally arrive here)
           {}
-                state := omp_state_overhead
+                state := ompt_state_overhead
             barrier-wait-end
             barrier-end
-                state := omp_state_work_parallel
+                state := ompt_state_work_parallel
 
 
   __kmp_fork_barrier  (after thread creation, before executing implicit task)
           call fork-barrier-implementation (finally arrive here)
-          {} // worker arrive here with state = omp_state_idle
+          {} // worker arrive here with state = ompt_state_idle
 
 
   __kmp_join_barrier  (implicit barrier at end of parallel region)
-                state := omp_state_barrier_implicit
+                state := ompt_state_barrier_implicit
             barrier-begin
             barrier-wait-begin
           call join-barrier-implementation (finally arrive here
@@ -234,19 +234,19 @@
             barrier-end
             implicit-task-end
             idle-begin
-                state := omp_state_idle
+                state := ompt_state_idle
 
-       Before leaving, if state = omp_state_idle
+       Before leaving, if state = ompt_state_idle
             idle-end
-                state := omp_state_overhead
+                state := ompt_state_overhead
 */
 #if OMPT_SUPPORT
-  omp_state_t ompt_entry_state;
+  ompt_state_t ompt_entry_state;
   ompt_data_t *pId = NULL;
   ompt_data_t *tId;
   if (ompt_enabled.enabled) {
     ompt_entry_state = this_thr->th.ompt_thread_info.state;
-    if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
+    if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
         KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
       ompt_lw_taskteam_t *team =
           this_thr->th.th_team->t.ompt_serialized_team_info;
@@ -432,16 +432,16 @@
   }
 
 #if OMPT_SUPPORT
-  omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
-  if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
+  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
+  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
 #if OMPT_OPTIONAL
     if (final_spin) {
       __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
       ompt_exit_state = this_thr->th.ompt_thread_info.state;
     }
 #endif
-    if (ompt_exit_state == omp_state_idle) {
-      this_thr->th.ompt_thread_info.state = omp_state_overhead;
+    if (ompt_exit_state == ompt_state_idle) {
+      this_thr->th.ompt_thread_info.state = ompt_state_overhead;
     }
   }
 #endif
diff --git a/runtime/src/kmp_wrapper_getpid.h b/runtime/src/kmp_wrapper_getpid.h
index 5b4081a..47e2728 100644
--- a/runtime/src/kmp_wrapper_getpid.h
+++ b/runtime/src/kmp_wrapper_getpid.h
@@ -24,6 +24,9 @@
 #if KMP_OS_DARWIN
 // OS X
 #define __kmp_gettid() syscall(SYS_thread_selfid)
+#elif KMP_OS_NETBSD
+#include <lwp.h>
+#define __kmp_gettid() _lwp_self()
 #elif defined(SYS_gettid)
 // Hopefully other Unix systems define SYS_gettid syscall for getting os thread
 // id
@@ -39,7 +42,9 @@
 // "process.h".
 #include <process.h>
 // Let us simulate Unix.
+#if KMP_MSVC_COMPAT
 typedef int pid_t;
+#endif
 #define getpid _getpid
 #define __kmp_gettid() GetCurrentThreadId()
 
diff --git a/runtime/src/kmp_wrapper_malloc.h b/runtime/src/kmp_wrapper_malloc.h
index cf6f2be..c8d2c70 100644
--- a/runtime/src/kmp_wrapper_malloc.h
+++ b/runtime/src/kmp_wrapper_malloc.h
@@ -93,8 +93,10 @@
 // Include alloca() declaration.
 #if KMP_OS_WINDOWS
 #include <malloc.h> // Windows* OS: _alloca() declared in "malloc.h".
+#if KMP_MSVC_COMPAT
 #define alloca _alloca // Allow to use alloca() with no underscore.
-#elif KMP_OS_FREEBSD || KMP_OS_NETBSD
+#endif
+#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD
 // Declared in "stdlib.h".
 #elif KMP_OS_UNIX
 #include <alloca.h> // Linux* OS and OS X*: alloc() declared in "alloca".
diff --git a/runtime/src/libomp.rc.var b/runtime/src/libomp.rc.var
index cf6a9c9..32449e2 100644
--- a/runtime/src/libomp.rc.var
+++ b/runtime/src/libomp.rc.var
@@ -11,7 +11,7 @@
 ////===----------------------------------------------------------------------===//
 //
 
-#include "winres.h"
+#include "winresrc.h"
 #include "kmp_config.h"
 
 LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US    // English (U.S.) resources
diff --git a/runtime/src/ompt-general.cpp b/runtime/src/ompt-general.cpp
index 8da5610..705b452 100644
--- a/runtime/src/ompt-general.cpp
+++ b/runtime/src/ompt-general.cpp
@@ -52,8 +52,8 @@
 
 typedef struct {
   const char *state_name;
-  omp_state_t state_id;
-} omp_state_info_t;
+  ompt_state_t state_id;
+} ompt_state_info_t;
 
 typedef struct {
   const char *name;
@@ -73,10 +73,10 @@
 
 ompt_callbacks_active_t ompt_enabled;
 
-omp_state_info_t omp_state_info[] = {
-#define omp_state_macro(state, code) {#state, state},
-    FOREACH_OMP_STATE(omp_state_macro)
-#undef omp_state_macro
+ompt_state_info_t ompt_state_info[] = {
+#define ompt_state_macro(state, code) {#state, state},
+    FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
 };
 
 kmp_mutex_impl_info_t kmp_mutex_impl_info[] = {
@@ -353,7 +353,7 @@
 
     kmp_info_t *root_thread = ompt_get_thread();
 
-    ompt_set_thread_state(root_thread, omp_state_overhead);
+    ompt_set_thread_state(root_thread, ompt_state_overhead);
 
     if (ompt_enabled.ompt_callback_thread_begin) {
       ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
@@ -366,7 +366,7 @@
           NULL, NULL, task_data, ompt_task_initial, 0, NULL);
     }
 
-    ompt_set_thread_state(root_thread, omp_state_work_serial);
+    ompt_set_thread_state(root_thread, ompt_state_work_serial);
   }
 }
 
@@ -388,13 +388,13 @@
 
 OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state,
                                            const char **next_state_name) {
-  const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t);
+  const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
   int i = 0;
 
   for (i = 0; i < len - 1; i++) {
-    if (omp_state_info[i].state_id == current_state) {
-      *next_state = omp_state_info[i + 1].state_id;
-      *next_state_name = omp_state_info[i + 1].state_name;
+    if (ompt_state_info[i].state_id == current_state) {
+      *next_state = ompt_state_info[i + 1].state_id;
+      *next_state_name = ompt_state_info[i + 1].state_name;
       return 1;
     }
   }
@@ -482,11 +482,11 @@
                                            team_size);
 }
 
-OMPT_API_ROUTINE omp_state_t ompt_get_state(omp_wait_id_t *wait_id) {
-  omp_state_t thread_state = __ompt_get_state_internal(wait_id);
+OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *wait_id) {
+  ompt_state_t thread_state = __ompt_get_state_internal(wait_id);
 
-  if (thread_state == omp_state_undefined) {
-    thread_state = omp_state_work_serial;
+  if (thread_state == ompt_state_undefined) {
+    thread_state = ompt_state_work_serial;
   }
 
   return thread_state;
@@ -502,7 +502,7 @@
 
 OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
                                         ompt_data_t **task_data,
-                                        omp_frame_t **task_frame,
+                                        ompt_frame_t **task_frame,
                                         ompt_data_t **parallel_data,
                                         int *thread_num) {
   return __ompt_get_task_info_internal(ancestor_level, type, task_data,
diff --git a/runtime/src/ompt-internal.h b/runtime/src/ompt-internal.h
index 6b92eaa..c6823fc 100644
--- a/runtime/src/ompt-internal.h
+++ b/runtime/src/ompt-internal.h
@@ -54,7 +54,7 @@
       (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
 
 typedef struct {
-  omp_frame_t frame;
+  ompt_frame_t frame;
   ompt_data_t task_data;
   struct kmp_taskdata *scheduling_parent;
   int thread_num;
@@ -81,8 +81,8 @@
   ompt_data_t task_data; /* stored here from implicit barrier-begin until
                             implicit-task-end */
   void *return_address; /* stored here on entry of runtime */
-  omp_state_t state;
-  omp_wait_id_t wait_id;
+  ompt_state_t state;
+  ompt_wait_id_t wait_id;
   int ompt_task_yielded;
   void *idle_frame;
 } ompt_thread_info_t;
diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp
index 23d09aa..cc4f1de 100644
--- a/runtime/src/ompt-specific.cpp
+++ b/runtime/src/ompt-specific.cpp
@@ -211,10 +211,10 @@
 void __ompt_thread_assign_wait_id(void *variable) {
   kmp_info_t *ti = ompt_get_thread();
 
-  ti->th.ompt_thread_info.wait_id = (omp_wait_id_t)variable;
+  ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
 }
 
-omp_state_t __ompt_get_state_internal(omp_wait_id_t *omp_wait_id) {
+ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) {
   kmp_info_t *ti = ompt_get_thread();
 
   if (ti) {
@@ -222,7 +222,7 @@
       *omp_wait_id = ti->th.ompt_thread_info.wait_id;
     return ti->th.ompt_thread_info.state;
   }
-  return omp_state_undefined;
+  return ompt_state_undefined;
 }
 
 //----------------------------------------------------------
@@ -259,8 +259,8 @@
   lwt->ompt_team_info.parallel_data = *ompt_pid;
   lwt->ompt_team_info.master_return_address = codeptr;
   lwt->ompt_task_info.task_data.value = 0;
-  lwt->ompt_task_info.frame.enter_frame = NULL;
-  lwt->ompt_task_info.frame.exit_frame = NULL;
+  lwt->ompt_task_info.frame.enter_frame = ompt_data_none;
+  lwt->ompt_task_info.frame.exit_frame = ompt_data_none;
   lwt->ompt_task_info.scheduling_parent = NULL;
   lwt->ompt_task_info.deps = NULL;
   lwt->ompt_task_info.ndeps = 0;
@@ -328,7 +328,7 @@
 
 int __ompt_get_task_info_internal(int ancestor_level, int *type,
                                   ompt_data_t **task_data,
-                                  omp_frame_t **task_frame,
+                                  ompt_frame_t **task_frame,
                                   ompt_data_t **parallel_data,
                                   int *thread_num) {
   if (__kmp_get_gtid() < 0)
diff --git a/runtime/src/ompt-specific.h b/runtime/src/ompt-specific.h
index 8cf7450..317580f 100644
--- a/runtime/src/ompt-specific.h
+++ b/runtime/src/ompt-specific.h
@@ -41,7 +41,7 @@
 
 int __ompt_get_task_info_internal(int ancestor_level, int *type,
                                   ompt_data_t **task_data,
-                                  omp_frame_t **task_frame,
+                                  ompt_frame_t **task_frame,
                                   ompt_data_t **parallel_data, int *thread_num);
 
 ompt_data_t *__ompt_get_thread_data_internal();
@@ -93,7 +93,7 @@
   return ompt_get_thread_gtid(gtid);
 }
 
-inline void ompt_set_thread_state(kmp_info_t *thread, omp_state_t state) {
+inline void ompt_set_thread_state(kmp_info_t *thread, ompt_state_t state) {
   thread->th.ompt_thread_info.state = state;
 }
 
diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/runtime/src/thirdparty/ittnotify/ittnotify_static.c
index 75ef966..63e1b0c 100644
--- a/runtime/src/thirdparty/ittnotify/ittnotify_static.c
+++ b/runtime/src/thirdparty/ittnotify/ittnotify_static.c
@@ -12,7 +12,11 @@
 #include "ittnotify_config.h"
 
 #if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if defined(__MINGW32__)
+#include <limits.h>
+#else
 #define PATH_MAX 512
+#endif
 #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
 #include <limits.h>
 #include <dlfcn.h>
@@ -28,7 +32,9 @@
 #include "ittnotify.h"
 #include "legacy/ittnotify.h"
 
+#if KMP_MSVC_COMPAT
 #include "disable_warnings.h"
+#endif
 
 static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n";
 
@@ -194,7 +200,7 @@
 
 #pragma pack(pop)
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
 #pragma warning(push)
 #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -217,7 +223,7 @@
     {NULL, NULL, NULL, NULL, __itt_group_none}
 };
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
 #pragma warning(pop)
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 
@@ -252,7 +258,7 @@
 ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args);
 #endif /* ITT_NOTIFY_EXT_REPORT */
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
 #pragma warning(push)
 #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -278,7 +284,7 @@
     va_end(args);
 }
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
 #pragma warning(pop)
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
 
@@ -1013,7 +1019,7 @@
         *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
 }
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
 #pragma warning(push)
 #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
 #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
@@ -1191,7 +1197,6 @@
     return prev;
 }
 
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
 #pragma warning(pop)
 #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-
diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp
index a8d9324..ab9c353 100644
--- a/runtime/src/z_Linux_util.cpp
+++ b/runtime/src/z_Linux_util.cpp
@@ -22,7 +22,7 @@
 #include "kmp_wait_release.h"
 #include "kmp_wrapper_getpid.h"
 
-#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD
+#if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
 #include <alloca.h>
 #endif
 #include <math.h> // HUGE_VAL.
@@ -50,8 +50,11 @@
 #elif KMP_OS_DARWIN
 #include <mach/mach.h>
 #include <sys/sysctl.h>
-#elif KMP_OS_FREEBSD
+#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
 #include <pthread_np.h>
+#elif KMP_OS_NETBSD
+#include <sys/types.h>
+#include <sys/sysctl.h>
 #endif
 
 #include <ctype.h>
@@ -444,7 +447,8 @@
    determined exactly, FALSE if incremental refinement is necessary. */
 static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
   int stack_data;
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+        KMP_OS_HURD
   pthread_attr_t attr;
   int status;
   size_t size = 0;
@@ -458,7 +462,7 @@
     /* Fetch the real thread attributes */
     status = pthread_attr_init(&attr);
     KMP_CHECK_SYSFAIL("pthread_attr_init", status);
-#if KMP_OS_FREEBSD || KMP_OS_NETBSD
+#if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
     status = pthread_attr_get_np(pthread_self(), &attr);
     KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
 #else
@@ -482,7 +486,8 @@
     TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
     return TRUE;
   }
-#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */
+#endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
+              KMP_OS_HURD */
   /* Use incremental refinement starting from initial conservative estimate */
   TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
   TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
@@ -496,7 +501,8 @@
   sigset_t new_set, old_set;
 #endif /* KMP_BLOCK_SIGNALS */
   void *exit_val;
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+        KMP_OS_OPENBSD || KMP_OS_HURD
   void *volatile padding = 0;
 #endif
   int gtid;
@@ -544,7 +550,8 @@
   KMP_CHECK_SYSFAIL("pthread_sigmask", status);
 #endif /* KMP_BLOCK_SIGNALS */
 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+        KMP_OS_OPENBSD
   if (__kmp_stkoffset > 0 && gtid > 0) {
     padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
   }
@@ -1771,7 +1778,8 @@
 
   int r = 0;
 
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||     \
+        KMP_OS_OPENBSD || KMP_OS_HURD
 
   r = sysconf(_SC_NPROCESSORS_ONLN);
 
@@ -1934,20 +1942,27 @@
 kmp_uint64 __kmp_now_nsec() {
   struct timeval t;
   gettimeofday(&t, NULL);
-  return KMP_NSEC_PER_SEC * t.tv_sec + 1000 * t.tv_usec;
+  kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
+                    (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
+  return nsec;
 }
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 /* Measure clock ticks per millisecond */
 void __kmp_initialize_system_tick() {
+  kmp_uint64 now, nsec2, diff;
   kmp_uint64 delay = 100000; // 50~100 usec on most machines.
   kmp_uint64 nsec = __kmp_now_nsec();
   kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
-  kmp_uint64 now;
   while ((now = __kmp_hardware_timestamp()) < goal)
     ;
-  __kmp_ticks_per_msec =
-      (kmp_uint64)(1e6 * (delay + (now - goal)) / (__kmp_now_nsec() - nsec));
+  nsec2 = __kmp_now_nsec();
+  diff = nsec2 - nsec;
+  if (diff > 0) {
+    kmp_uint64 tpms = (kmp_uint64)(1e6 * (delay + (now - goal)) / diff);
+    if (tpms > 0)
+      __kmp_ticks_per_msec = tpms;
+  }
 }
 #endif
 
@@ -2017,9 +2032,39 @@
     found = 1;
   }
 
-#elif KMP_OS_FREEBSD || KMP_OS_NETBSD
+#elif KMP_OS_NETBSD
 
-  // FIXME(FreeBSD, NetBSD): Implement this
+  int mib[5];
+  mib[0] = CTL_VM;
+  mib[1] = VM_PROC;
+  mib[2] = VM_PROC_MAP;
+  mib[3] = getpid();
+  mib[4] = sizeof(struct kinfo_vmentry);
+
+  size_t size;
+  rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
+  KMP_ASSERT(!rc);
+  KMP_ASSERT(size);
+
+  size = size * 4 / 3;
+  struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
+  KMP_ASSERT(kiv);
+
+  rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
+  KMP_ASSERT(!rc);
+  KMP_ASSERT(size);
+
+  for (size_t i = 0; i < size; i++) {
+    if (kiv[i].kve_start >= (uint64_t)addr &&
+        kiv[i].kve_end <= (uint64_t)addr) {
+      found = 1;
+      break;
+    }
+  }
+  KMP_INTERNAL_FREE(kiv);
+#elif KMP_OS_DRAGONFLY || KMP_OS_OPENBSD
+
+  // FIXME(DragonFly, OpenBSD): Implement this
   found = 1;
 
 #else
@@ -2034,7 +2079,7 @@
 
 #ifdef USE_LOAD_BALANCE
 
-#if KMP_OS_DARWIN
+#if KMP_OS_DARWIN || KMP_OS_NETBSD
 
 // The function returns the rounded value of the system load average
 // during given time interval which depends on the value of
diff --git a/runtime/src/z_Windows_NT_util.cpp b/runtime/src/z_Windows_NT_util.cpp
index e8ed660..f3d667f 100644
--- a/runtime/src/z_Windows_NT_util.cpp
+++ b/runtime/src/z_Windows_NT_util.cpp
@@ -887,6 +887,7 @@
   return 1e9 * __kmp_win32_tick * now.QuadPart;
 }
 
+extern "C"
 void *__stdcall __kmp_launch_worker(void *arg) {
   volatile void *stack_data;
   void *exit_val;
diff --git a/runtime/test/affinity/format/affinity_display.1.c b/runtime/test/affinity/format/affinity_display.1.c
new file mode 100644
index 0000000..b900c3c
--- /dev/null
+++ b/runtime/test/affinity/format/affinity_display.1.c
@@ -0,0 +1,92 @@
+// RUN: %libomp-compile
+// RUN: env OMP_DISPLAY_AFFINITY=TRUE OMP_NUM_THREADS=4 OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' %libomp-run | python %S/check.py -c 'CHECK' %s
+
+// Affinity Display examples
+#include <stdio.h>
+#include <stdlib.h> // also null is in <stddef.h>
+#include <stddef.h>
+#include <omp.h>
+#include <string.h>
+
+// ENVIRONMENT
+// OMP_DISPLAY_AFFINITY=TRUE
+// OMP_NUM_THREADS=4
+// OMP_PLACES='{0,1},{2,3},{4,5},{6,7}'
+
+// CHECK: num_threads=1 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0-7])|(0,1)|(undefined)\}
+// CHECK: num_threads=4 Thread id [0-3] reporting in
+// CHECK: num_threads=4 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0-7])|([0246],[1357])|(undefined)\}
+// CHECK: num_threads=1 Default Affinity Format is:
+// CHECK: num_threads=1 Affinity Format set to: host=%20H tid=%0.4n binds_to=%A
+// CHECK: num_threads=4 tid=[0-3] affinity:host=[a-zA-Z0-9_.-]+[ ]+tid=000[0-4][ ]+binds_to=(([0-7])|([0246],[1357])|(undefined))
+
+#define FORMAT_STORE 80
+#define BUFFER_STORE 80
+
+int main(int argc, char** argv) {
+  int i, n, tid, max_req_store = 0;
+  size_t nchars;
+  char default_format[FORMAT_STORE];
+  char my_format[] = "host=%20H tid=%0.4n binds_to=%A";
+  char **buffer;
+
+  // CODE SEGMENT 1 AFFINITY DISPLAY
+  omp_display_affinity(NULL);
+
+  // OMP_DISPLAY_AFFINITY=TRUE,
+  // Affinity reported for 1 parallel region
+  #pragma omp parallel
+  {
+    printf("Thread id %d reporting in.\n", omp_get_thread_num());
+  }
+
+  // Get and Display Default Affinity Format
+  nchars = omp_get_affinity_format(default_format, (size_t)FORMAT_STORE);
+  printf("Default Affinity Format is: %s\n", default_format);
+
+  if (nchars > FORMAT_STORE) {
+    printf("Caution: Reported Format is truncated. Increase\n");
+    printf(" FORMAT_STORE by %d.\n", (int)nchars - FORMAT_STORE);
+  }
+
+  // Set Affinity Format
+  omp_set_affinity_format(my_format);
+  printf("Affinity Format set to: %s\n", my_format);
+
+  // CODE SEGMENT 3 CAPTURE AFFINITY
+  // Set up buffer for affinity of n threads
+  n = omp_get_max_threads();
+  buffer = (char **)malloc(sizeof(char *) * n);
+  for (i = 0; i < n; i++) {
+    buffer[i] = (char *)malloc(sizeof(char) * BUFFER_STORE);
+  }
+
+  // Capture Affinity using Affinity Format set above.
+  // Use critical reduction to check size of buffer areas
+  #pragma omp parallel private(tid, nchars)
+  {
+    tid = omp_get_thread_num();
+    nchars = omp_capture_affinity(buffer[tid], (size_t)BUFFER_STORE, NULL);
+    #pragma omp critical
+    {
+      if (nchars > max_req_store)
+        max_req_store = nchars;
+    }
+  }
+
+  for (i = 0; i < n; i++) {
+    printf("tid=%d affinity:%s:\n", i, buffer[i]);
+  }
+  // for 4 threads with OMP_PLACES='{0,1},{2,3},{4,5},{6,7}'
+  // host=%20H tid=%0.4n binds_to=%A
+  // host=<hostname> tid=0000 binds_to=0,1
+  // host=<hostname> tid=0001 binds_to=2,3
+  // host=<hostname> tid=0002 binds_to=4,5
+  // host=<hostname> tid=0003 binds_to=6,7
+
+  if (max_req_store > BUFFER_STORE) {
+    printf("Caution: Affinity string truncated. Increase\n");
+    printf(" BUFFER_STORE by %d\n", max_req_store - BUFFER_STORE);
+  }
+  return 0;
+}
diff --git a/runtime/test/affinity/format/affinity_values.c b/runtime/test/affinity/format/affinity_values.c
new file mode 100644
index 0000000..37ab210
--- /dev/null
+++ b/runtime/test/affinity/format/affinity_values.c
@@ -0,0 +1,135 @@
+// RUN: %libomp-compile
+// RUN: env OMP_PROC_BIND=close OMP_PLACES=threads %libomp-run
+// RUN: env OMP_PROC_BIND=close OMP_PLACES=cores %libomp-run
+// RUN: env OMP_PROC_BIND=close OMP_PLACES=sockets %libomp-run
+// RUN: env KMP_AFFINITY=compact %libomp-run
+// RUN: env KMP_AFFINITY=scatter %libomp-run
+// REQUIRES: affinity
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition)                                                       \
+  if (!(condition)) {                                                          \
+    fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__,           \
+            __LINE__);                                                         \
+    exit(1);                                                                   \
+  }
+
+#define DEBUG 0
+
+#if DEBUG
+#include <stdarg.h>
+#endif
+
+#define BUFFER_SIZE 1024
+
+char buf[BUFFER_SIZE];
+#pragma omp threadprivate(buf)
+
+static int debug_printf(const char* format, ...) {
+  int retval = 0;
+#if DEBUG
+  va_list args;
+  va_start(args, format);
+  retval = vprintf(format, args);
+  va_end(args);
+#endif
+  return retval;
+}
+
+static void display_affinity_environment() {
+#if DEBUG
+  printf("Affinity Environment:\n");
+  printf("  OMP_PROC_BIND=%s\n", getenv("OMP_PROC_BIND"));
+  printf("  OMP_PLACES=%s\n", getenv("OMP_PLACES"));
+  printf("  KMP_AFFINITY=%s\n", getenv("KMP_AFFINITY"));
+#endif
+}
+
+// Reads in a list of integers into ids array (not going past ids_size)
+// e.g., if affinity = "0-4,6,8-10,14,16,17-20,23"
+//       then ids = [0,1,2,3,4,6,8,9,10,14,16,17,18,19,20,23]
+void list_to_ids(const char* affinity, int* ids, int ids_size) {
+  int id, b, e, ids_index;
+  char *aff, *begin, *end, *absolute_end;
+  aff = strdup(affinity);
+  absolute_end = aff + strlen(aff);
+  ids_index = 0;
+  begin = end = aff;
+  while (end < absolute_end) {
+    end = begin;
+    while (*end != '\0' && *end != ',')
+      end++;
+    *end = '\0';
+    if (strchr(begin, '-') != NULL) {
+      // Range
+      sscanf(begin, "%d-%d", &b, &e);
+    } else {
+      // Single Number
+      sscanf(begin, "%d", &b);
+      e = b;
+    }
+    for (id = b; id <= e; ++id) {
+      ids[ids_index++] = id;
+      if (ids_index >= ids_size) {
+        free(aff);
+        return;
+      }
+    }
+    begin = end + 1;
+  }
+  free(aff);
+}
+
+void check_thread_affinity() {
+  int i;
+  const char *formats[2] = {"%{thread_affinity}", "%A"};
+  for (i = 0; i < sizeof(formats) / sizeof(formats[0]); ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel
+    {
+      int j, k;
+      int place = omp_get_place_num();
+      int num_procs = omp_get_place_num_procs(place);
+      int *ids = (int *)malloc(sizeof(int) * num_procs);
+      int *ids2 = (int *)malloc(sizeof(int) * num_procs);
+      char buf[256];
+      size_t n = omp_capture_affinity(buf, 256, NULL);
+      check(n <= 256);
+      omp_get_place_proc_ids(place, ids);
+      list_to_ids(buf, ids2, num_procs);
+
+      #pragma omp for schedule(static) ordered
+      for (k = 0; k < omp_get_num_threads(); ++k) {
+        #pragma omp ordered
+        {
+          debug_printf("Thread %d: captured affinity = %s\n",
+                       omp_get_thread_num(), buf);
+          for (j = 0; j < num_procs; ++j) {
+            debug_printf("Thread %d: ids[%d] = %d ids2[%d] = %d\n",
+                         omp_get_thread_num(), j, ids[j], j, ids2[j]);
+            check(ids[j] == ids2[j]);
+          }
+        }
+      }
+
+      free(ids);
+      free(ids2);
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  omp_set_nested(1);
+  display_affinity_environment();
+  check_thread_affinity();
+  return 0;
+}
diff --git a/runtime/test/affinity/format/api.c b/runtime/test/affinity/format/api.c
new file mode 100644
index 0000000..df6be66
--- /dev/null
+++ b/runtime/test/affinity/format/api.c
@@ -0,0 +1,56 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition)                                                       \
+  if (!(condition)) {                                                          \
+    fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__,           \
+            __LINE__);                                                         \
+    exit(1);                                                                   \
+  }
+
+#define BUFFER_SIZE 1024
+
+int main(int argc, char** argv) {
+  char buf[BUFFER_SIZE];
+  size_t needed;
+
+  omp_set_affinity_format("0123456789");
+
+  needed = omp_get_affinity_format(buf, BUFFER_SIZE);
+  check(streqls(buf, "0123456789"));
+  check(needed == 10)
+
+  // Check that it is truncated properly
+  omp_get_affinity_format(buf, 5);
+  check(streqls(buf, "0123"));
+
+  #pragma omp parallel
+  {
+    char my_buf[512];
+    size_t needed = omp_capture_affinity(my_buf, 512, NULL);
+    check(streqls(my_buf, "0123456789"));
+    check(needed == 10);
+    // Check that it is truncated properly
+    omp_capture_affinity(my_buf, 5, NULL);
+    check(streqls(my_buf, "0123"));
+  }
+
+  #pragma omp parallel num_threads(4)
+  {
+    omp_display_affinity(NULL);
+  }
+
+  return 0;
+}
+
+// CHECK: num_threads=4 0123456789
diff --git a/runtime/test/affinity/format/api2.c b/runtime/test/affinity/format/api2.c
new file mode 100644
index 0000000..7b2d700
--- /dev/null
+++ b/runtime/test/affinity/format/api2.c
@@ -0,0 +1,84 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition)                                                       \
+  if (!(condition)) {                                                          \
+    fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__,           \
+            __LINE__);                                                         \
+    exit(1);                                                                   \
+  }
+
+#if defined(_WIN32)
+#define snprintf _snprintf
+#endif
+
+#define BUFFER_SIZE 1024
+
+int main(int argc, char** argv) {
+  char buf[BUFFER_SIZE];
+  size_t needed, length;
+  const char* format = "tl:%L tn:%n nt:%N an:%a";
+  const char* second_format = "nesting_level:%{nesting_level} thread_num:%{thread_num} num_threads:%{num_threads} ancestor_tnum:%{ancestor_tnum}";
+
+  length = strlen(format);
+  omp_set_affinity_format(format);
+
+  needed = omp_get_affinity_format(buf, BUFFER_SIZE);
+  check(streqls(buf, format));
+  check(needed == length)
+
+  // Check that it is truncated properly
+  omp_get_affinity_format(buf, 5);
+  check(streqls(buf, "tl:%"));
+
+  #pragma omp parallel
+  {
+    char my_buf[512];
+    char supposed[512];
+    int tl, tn, nt, an;
+    size_t needed, needed2;
+    tl = omp_get_level();
+    tn = omp_get_thread_num();
+    nt = omp_get_num_threads();
+    an = omp_get_ancestor_thread_num(omp_get_level()-1);
+    needed = omp_capture_affinity(my_buf, 512, NULL);
+    needed2 = (size_t)snprintf(supposed, 512, "tl:%d tn:%d nt:%d an:%d", tl, tn, nt, an);
+    check(streqls(my_buf, supposed));
+    check(needed == needed2);
+    // Check that it is truncated properly
+    supposed[4] = '\0';
+    omp_capture_affinity(my_buf, 5, NULL);
+    check(streqls(my_buf, supposed));
+
+    needed = omp_capture_affinity(my_buf, 512, second_format);
+    needed2 = (size_t)snprintf(supposed, 512, "nesting_level:%d thread_num:%d num_threads:%d ancestor_tnum:%d", tl, tn, nt, an);
+    check(streqls(my_buf, supposed));
+    check(needed == needed2);
+
+    // Check that it is truncated properly
+    supposed[25] = '\0';
+    omp_capture_affinity(my_buf, 26, second_format);
+    check(streqls(my_buf, supposed));
+  }
+
+  #pragma omp parallel num_threads(4)
+  {
+    omp_display_affinity(NULL);
+    omp_display_affinity(second_format);
+  }
+
+  return 0;
+}
+
+// CHECK: num_threads=4 tl:[0-9]+ tn:[0-9]+ nt:[0-9]+ an:[0-9]+
+// CHECK: num_threads=4 nesting_level:[0-9]+ thread_num:[0-9]+ num_threads:[0-9]+ ancestor_tnum:[0-9]+
diff --git a/runtime/test/affinity/format/check.py b/runtime/test/affinity/format/check.py
new file mode 100644
index 0000000..0adddbd
--- /dev/null
+++ b/runtime/test/affinity/format/check.py
@@ -0,0 +1,73 @@
+import os
+import sys
+import argparse
+import re
+
+class Checks(object):
+    class CheckError(Exception):
+        pass
+
+    def __init__(self, filename, prefix):
+        self.checks = []
+        self.lines = []
+        self.check_no_output = False
+        self.filename = filename
+        self.prefix = prefix
+    def readStdin(self):
+        self.lines = [l.rstrip('\r\n') for l in sys.stdin.readlines()]
+    def readChecks(self):
+        with open(self.filename) as f:
+            for line in f:
+                match = re.search('{}: NO_OUTPUT'.format(self.prefix), line)
+                if match is not None:
+                    self.check_no_output = True
+                    return
+                match = re.search('{}: num_threads=([0-9]+) (.*)$'.format(self.prefix), line)
+                if match is not None:
+                    num_threads = int(match.group(1))
+                    for i in range(num_threads):
+                        self.checks.append(match.group(2))
+                    continue
+    def check(self):
+        # If no checks at all, then nothing to do
+        if len(self.checks) == 0 and not self.check_no_output:
+            print('Nothing to check for')
+            return
+        # Check if we are expecting no output
+        if self.check_no_output:
+            if len(self.lines) == 0:
+                return
+            else:
+                raise Checks.CheckError('{}: Output was found when expecting none.'.format(self.prefix))
+        # Run through each check line and see if it exists in the output
+        # If it does, then delete the line from output and look for the
+        # next check line.
+        # If you don't find the line then raise Checks.CheckError
+        # If there are extra lines of output then raise Checks.CheckError
+        for c in self.checks:
+            found = False
+            index = -1
+            for idx, line in enumerate(self.lines):
+                if re.search(c, line) is not None:
+                    found = True
+                    index = idx
+                    break
+            if not found:
+                raise Checks.CheckError('{}: Did not find: {}'.format(self.prefix, c))
+            else:
+                del self.lines[index]
+        if len(self.lines) != 0:
+            raise Checks.CheckError('{}: Extra output: {}'.format(self.prefix, self.lines))
+
+# Setup argument parsing
+parser = argparse.ArgumentParser(description='''This script checks output of
+    a program against "CHECK" lines in filename''')
+parser.add_argument('filename', default=None, help='filename to check against')
+parser.add_argument('-c', '--check-prefix', dest='prefix',
+                    default='CHECK', help='check prefix token default: %(default)s')
+command_args = parser.parse_args()
+# Do the checking
+checks = Checks(command_args.filename, command_args.prefix)
+checks.readStdin()
+checks.readChecks()
+checks.check()
diff --git a/runtime/test/affinity/format/fields_modifiers.c b/runtime/test/affinity/format/fields_modifiers.c
new file mode 100644
index 0000000..c180271
--- /dev/null
+++ b/runtime/test/affinity/format/fields_modifiers.c
@@ -0,0 +1,117 @@
+// RUN: %libomp-compile-and-run
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition)                                                       \
+  if (!(condition)) {                                                          \
+    fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__,           \
+            __LINE__);                                                         \
+    exit(1);                                                                   \
+  }
+
+#define BUFFER_SIZE 1024
+
+char buf[BUFFER_SIZE];
+#pragma omp threadprivate(buf)
+
+char* get_string(size_t check_needed) {
+  size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL);
+  //printf("buf = %s\n", buf);
+  check(needed < BUFFER_SIZE);
+  if (check_needed != 0) {
+    check(needed == check_needed);
+  }
+  return buf;
+}
+
+void check_thread_num_padded_rjustified() {
+  int i;
+  const char* formats[2] = {"%0.8{thread_num}", "%0.8n"};
+  for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      int j;
+      int tid = omp_get_thread_num();
+      char ctid = '0' + (char)tid;
+      char* s = get_string(8);
+      for (j = 0; j < 7; ++j) {
+        check(s[j] == '0');
+      }
+      check(s[j] == ctid);
+    }
+  }
+}
+
+void check_thread_num_rjustified() {
+  int i;
+  const char* formats[2] = {"%.12{thread_num}", "%.12n"};
+  for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      int j;
+      int tid = omp_get_thread_num();
+      char ctid = '0' + (char)tid;
+      char* s = get_string(12);
+      for (j = 0; j < 11; ++j) {
+        check(s[j] == ' ');
+      }
+      check(s[j] == ctid);
+    }
+  }
+}
+
+void check_thread_num_ljustified() {
+  int i;
+  const char* formats[2] = {"%5{thread_num}", "%5n"};
+  for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      int j;
+      int tid = omp_get_thread_num();
+      char ctid = '0' + (char)tid;
+      char* s = get_string(5);
+      check(s[0] == ctid);
+      for (j = 1; j < 5; ++j) {
+        check(s[j] == ' ');
+      }
+    }
+  }
+}
+
+void check_thread_num_padded_ljustified() {
+  int i;
+  const char* formats[2] = {"%018{thread_num}", "%018n"};
+  for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      int j;
+      int tid = omp_get_thread_num();
+      char ctid = '0' + (char)tid;
+      char* s = get_string(18);
+      check(s[0] == ctid);
+      for (j = 1; j < 18; ++j) {
+        check(s[j] == ' ');
+      }
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  check_thread_num_ljustified();
+  check_thread_num_rjustified();
+  check_thread_num_padded_ljustified();
+  check_thread_num_padded_rjustified();
+  return 0;
+}
diff --git a/runtime/test/affinity/format/fields_values.c b/runtime/test/affinity/format/fields_values.c
new file mode 100644
index 0000000..e56ce27
--- /dev/null
+++ b/runtime/test/affinity/format/fields_values.c
@@ -0,0 +1,152 @@
+// RUN: %libomp-compile-and-run
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition)                                                       \
+  if (!(condition)) {                                                          \
+    fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__,           \
+            __LINE__);                                                         \
+    exit(1);                                                                   \
+  }
+
+#if defined(_WIN32)
+#include <windows.h>
+#define getpid _getpid
+typedef int pid_t;
+#define gettid GetCurrentThreadId
+#define my_gethostname(buf, sz) GetComputerNameA(buf, &(sz))
+#else
+#include <unistd.h>
+#include <sys/types.h>
+#define my_gethostname(buf, sz) gethostname(buf, sz)
+#endif
+
+#define BUFFER_SIZE 256
+
+int get_integer() {
+  int n, retval;
+  char buf[BUFFER_SIZE];
+  size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL);
+  check(needed < BUFFER_SIZE);
+  n = sscanf(buf, "%d", &retval);
+  check(n == 1);
+  return retval;
+}
+
+char* get_string() {
+  int n, retval;
+  char buf[BUFFER_SIZE];
+  size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL);
+  check(needed < BUFFER_SIZE);
+  return strdup(buf);
+}
+
+void check_integer(const char* formats[2], int(*func)()) {
+  int i;
+  for (i = 0; i < 2; ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      check(get_integer() == func());
+      #pragma omp parallel num_threads(3)
+      {
+        check(get_integer() == func());
+      }
+      check(get_integer() == func());
+    }
+  }
+}
+
+void check_nesting_level() {
+  // Check %{nesting_level} and %L
+  const char* formats[2] = {"%{nesting_level}", "%L"};
+  check_integer(formats, omp_get_level);
+}
+
+void check_thread_num() {
+  // Check %{thread_num} and %n
+  const char* formats[2] = {"%{thread_num}", "%n"};
+  check_integer(formats, omp_get_thread_num);
+}
+
+void check_num_threads() {
+  // Check %{num_threads} and %N
+  const char* formats[2] = {"%{num_threads}", "%N"};
+  check_integer(formats, omp_get_num_threads);
+}
+
+int ancestor_helper() {
+  return omp_get_ancestor_thread_num(omp_get_level() - 1);
+}
+void check_ancestor_tnum() {
+  // Check %{ancestor_tnum} and %a
+  const char* formats[2] = {"%{ancestor_tnum}", "%a"};
+  check_integer(formats, ancestor_helper);
+}
+
+int my_get_pid() { return (int)getpid(); }
+void check_process_id() {
+  // Check %{process_id} and %P
+  const char* formats[2] = {"%{process_id}", "%P"};
+  check_integer(formats, my_get_pid);
+}
+
+/*
+int my_get_tid() { return (int)gettid(); }
+void check_native_thread_id() {
+  // Check %{native_thread_id} and %i
+  const char* formats[2] = {"%{native_thread_id}", "%i"};
+  check_integer(formats, my_get_tid);
+}
+*/
+
+void check_host() {
+  int i;
+  int buffer_size = 256;
+  const char* formats[2] = {"%{host}", "%H"};
+  char hostname[256];
+  my_gethostname(hostname, buffer_size);
+  for (i = 0; i < 2; ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      char* host = get_string();
+      check(streqls(host, hostname));
+      free(host);
+    }
+  }
+}
+
+void check_undefined() {
+  int i;
+  const char* formats[2] = {"%{foobar}", "%X"};
+  for (i = 0; i < 2; ++i) {
+    omp_set_affinity_format(formats[i]);
+    #pragma omp parallel num_threads(8)
+    {
+      char* undef = get_string();
+      check(streqls(undef, "undefined"));
+      free(undef);
+    }
+  }
+}
+
+int main(int argc, char** argv) {
+  omp_set_nested(1);
+  check_nesting_level();
+  check_num_threads();
+  check_ancestor_tnum();
+  check_process_id();
+  //check_native_thread_id();
+  check_host();
+  check_undefined();
+  return 0;
+}
diff --git a/runtime/test/affinity/format/increase.c b/runtime/test/affinity/format/increase.c
new file mode 100644
index 0000000..46d8edb
--- /dev/null
+++ b/runtime/test/affinity/format/increase.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
+  // should print all for first parallel
+  omp_set_num_threads(4);
+  #pragma omp parallel
+  { }
+  // should print all because of new threads
+  omp_set_num_threads(8);
+  #pragma omp parallel
+  { }
+  // should not print anything here
+  omp_set_num_threads(6);
+  #pragma omp parallel
+  { }
+  // should print all because of new thread
+  omp_set_num_threads(9);
+  #pragma omp parallel
+  { }
+  // should not print anything here
+  omp_set_num_threads(2);
+  #pragma omp parallel
+  { }
+  return 0;
+}
+
+// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8
+// CHECK: num_threads=6 TESTER: tl:1 tn:[0-5] nt:6
+// CHECK: num_threads=9 TESTER: tl:1 tn:[0-8] nt:9
+// CHECK: num_threads=2 TESTER: tl:1 tn:[01] nt:2
diff --git a/runtime/test/affinity/format/lit.local.cfg b/runtime/test/affinity/format/lit.local.cfg
new file mode 100644
index 0000000..80583af
--- /dev/null
+++ b/runtime/test/affinity/format/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'openmp-5.0' not in config.available_features:
+    config.unsupported = True
diff --git a/runtime/test/affinity/format/nested.c b/runtime/test/affinity/format/nested.c
new file mode 100644
index 0000000..502c1da
--- /dev/null
+++ b/runtime/test/affinity/format/nested.c
@@ -0,0 +1,23 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close %libomp-run | python %S/check.py -c 'CHECK' %s
+// REQUIRES: affinity
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
+  omp_set_nested(1);
+  #pragma omp parallel num_threads(4)
+  {
+    #pragma omp parallel num_threads(3)
+    { }
+  }
+  return 0;
+}
+
+// CHECK: num_threads=4 TESTER: tl:1 at:0 tn:[0-3] nt:4
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
diff --git a/runtime/test/affinity/format/nested2.c b/runtime/test/affinity/format/nested2.c
new file mode 100644
index 0000000..3dd4956
--- /dev/null
+++ b/runtime/test/affinity/format/nested2.c
@@ -0,0 +1,29 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+// Currently, KMP_HOT_TEAMS_MAX_LEVEL has to be equal to the
+// nest depth for intuitive behavior
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
+  omp_set_nested(1);
+  #pragma omp parallel num_threads(4)
+  {
+    #pragma omp parallel num_threads(3)
+    { }
+    #pragma omp parallel num_threads(3)
+    { }
+  }
+  #pragma omp parallel num_threads(4)
+  { }
+  return 0;
+}
+
+// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
diff --git a/runtime/test/affinity/format/nested_mixed.c b/runtime/test/affinity/format/nested_mixed.c
new file mode 100644
index 0000000..a39b4fd
--- /dev/null
+++ b/runtime/test/affinity/format/nested_mixed.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
+  omp_set_nested(1);
+  #pragma omp parallel num_threads(1)
+  {
+    #pragma omp parallel num_threads(2)
+    { }
+    #pragma omp parallel num_threads(2)
+    {
+      #pragma omp parallel num_threads(1)
+      {
+        #pragma omp parallel num_threads(2)
+        { }
+      }
+    }
+    #pragma omp parallel num_threads(1)
+    { }
+  }
+  #pragma omp parallel num_threads(2)
+  { }
+  #pragma omp parallel num_threads(1)
+  { }
+  return 0;
+}
+
+// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
+
+// CHECK: num_threads=2 TESTER: tl:2 at:[0-9] tn:[01] nt:2
+
+// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1
+
+// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2
+// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2
+
+// CHECK: num_threads=1 TESTER: tl:2 at:[0-9] tn:0 nt:1
+
+// CHECK: num_threads=2 TESTER: tl:1 at:[0-9] tn:[01] nt:2
+
+// CHECK: num_threads=1 TESTER: tl:1 at:[0-9] tn:0 nt:1
diff --git a/runtime/test/affinity/format/nested_serial.c b/runtime/test/affinity/format/nested_serial.c
new file mode 100644
index 0000000..87ff2bd
--- /dev/null
+++ b/runtime/test/affinity/format/nested_serial.c
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
+  omp_set_nested(1);
+  #pragma omp parallel num_threads(1)
+  {
+    #pragma omp parallel num_threads(1)
+    { }
+    #pragma omp parallel num_threads(1)
+    { }
+    #pragma omp parallel num_threads(1)
+    {
+      #pragma omp parallel num_threads(1)
+      { }
+    }
+    #pragma omp parallel num_threads(1)
+    { }
+  }
+  #pragma omp parallel num_threads(1)
+  { }
+  #pragma omp parallel num_threads(1)
+  { }
+  return 0;
+}
+
+// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:3 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
diff --git a/runtime/test/affinity/format/proc_bind.c b/runtime/test/affinity/format/proc_bind.c
new file mode 100644
index 0000000..e88e1aa
--- /dev/null
+++ b/runtime/test/affinity/format/proc_bind.c
@@ -0,0 +1,31 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES='{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0}' %libomp-run | python %S/check.py -c 'CHECK' %s
+// REQUIRES: affinity
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N aff:{%A}");
+  omp_set_num_threads(8);
+  // Initial parallel
+  #pragma omp parallel proc_bind(spread)
+  { }
+  #pragma omp parallel proc_bind(spread)
+  { }
+  // Affinity changes here
+  #pragma omp parallel proc_bind(close)
+  { }
+  #pragma omp parallel proc_bind(close)
+  { }
+  // Affinity changes here
+  #pragma omp parallel proc_bind(master)
+  { }
+  #pragma omp parallel proc_bind(master)
+  { }
+  return 0;
+}
+
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
diff --git a/runtime/test/affinity/format/simple.c b/runtime/test/affinity/format/simple.c
new file mode 100644
index 0000000..954aa74
--- /dev/null
+++ b/runtime/test/affinity/format/simple.c
@@ -0,0 +1,27 @@
+// RUN: %libomp-compile
+// RUN: env OMP_DISPLAY_AFFINITY=false %libomp-run | python %S/check.py -c 'NOTHING' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=1 %libomp-run | python %S/check.py -c 'CHECK' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=2 %libomp-run | python %S/check.py -c 'CHECK-2' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=3 %libomp-run | python %S/check.py -c 'CHECK-3' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=4 %libomp-run | python %S/check.py -c 'CHECK-4' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
+  #pragma omp parallel
+  { }
+  #pragma omp parallel
+  { }
+  return 0;
+}
+
+// NOTHING: NO_OUTPUT
+// CHECK: num_threads=1 TESTER: tl:1 tn:0 nt:1
+// CHECK-2: num_threads=2 TESTER: tl:1 tn:[01] nt:2
+// CHECK-3: num_threads=3 TESTER: tl:1 tn:[0-2] nt:3
+// CHECK-4: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
+// CHECK-8: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8
diff --git a/runtime/test/affinity/format/simple_env.c b/runtime/test/affinity/format/simple_env.c
new file mode 100644
index 0000000..7aab1cf
--- /dev/null
+++ b/runtime/test/affinity/format/simple_env.c
@@ -0,0 +1,16 @@
+// RUN: %libomp-compile
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_AFFINITY_FORMAT='TESTER-ENV: tl:%L tn:%n nt:%N' OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+  #pragma omp parallel
+  { }
+  #pragma omp parallel
+  { }
+  return 0;
+}
+
+// CHECK-8: num_threads=8 TESTER-ENV: tl:1 tn:[0-7] nt:8
diff --git a/runtime/test/api/omp_alloc.c b/runtime/test/api/omp_alloc.c
index afad4a5..2002adb 100644
--- a/runtime/test/api/omp_alloc.c
+++ b/runtime/test/api/omp_alloc.c
@@ -1,4 +1,7 @@
 // RUN: %libomp-compile-and-run
+
+// REQUIRES: openmp-5.0
+
 #include <stdio.h>
 #include <stdint.h>
 #include <omp.h>
diff --git a/runtime/test/api/omp_get_wtick.c b/runtime/test/api/omp_get_wtick.c
index 8b35226..11a320f 100644
--- a/runtime/test/api/omp_get_wtick.c
+++ b/runtime/test/api/omp_get_wtick.c
@@ -7,7 +7,7 @@
   double tick;
   tick = -1.;
   tick = omp_get_wtick ();
-  return ((tick > 0.0) && (tick < 0.01));
+  return ((tick > 0.0) && (tick <= 0.01));
 }
 
 int main()
diff --git a/runtime/test/api/omp_in_parallel.c b/runtime/test/api/omp_in_parallel.c
index d09313e..5e9e635 100644
--- a/runtime/test/api/omp_in_parallel.c
+++ b/runtime/test/api/omp_in_parallel.c
@@ -30,6 +30,11 @@
   int i;
   int num_failed=0;
 
+  // the test requires more than 1 thread to pass
+  omp_set_dynamic(0); // disable dynamic adjustment of threads
+  if (omp_get_max_threads() == 1)
+    omp_set_num_threads(2); // set 2 threads if no HW resources available
+
   for(i = 0; i < REPETITIONS; i++) {
     if(!test_omp_in_parallel()) {
       num_failed++;
diff --git a/runtime/test/flush/omp_flush.c b/runtime/test/flush/omp_flush.c
index 3fd3cdf..95a406d 100644
--- a/runtime/test/flush/omp_flush.c
+++ b/runtime/test/flush/omp_flush.c
@@ -36,6 +36,11 @@
   int i;
   int num_failed=0;
 
+  // the test requires more than 1 thread to pass
+  omp_set_dynamic(0); // disable dynamic adjustment of threads
+  if (omp_get_max_threads() == 1)
+    omp_set_num_threads(2); // set 2 threads if no HW resources available
+
   for (i = 0; i < REPETITIONS; i++) {
     if(!test_omp_flush()) {
       num_failed++;
diff --git a/runtime/test/lit.cfg b/runtime/test/lit.cfg
index 9f0c059..066929e 100644
--- a/runtime/test/lit.cfg
+++ b/runtime/test/lit.cfg
@@ -55,7 +55,6 @@
     libs += " -latomic"
 
 # Allow REQUIRES / UNSUPPORTED / XFAIL to work
-config.target_triple = [ ]
 for feature in config.test_compiler_features:
     config.available_features.add(feature)
 
@@ -91,9 +90,21 @@
     # for callback.h
     config.test_flags += " -I " + config.test_source_root + "/ompt"
 
+if config.libomp_omp_version >= 50:
+    config.available_features.add("openmp-5.0")
+
+if config.libomp_omp_version >= 45:
+    config.available_features.add("openmp-4.5")
+
+if config.libomp_omp_version >= 40:
+    config.available_features.add("openmp-4.0")
+
 if 'Linux' in config.operating_system:
     config.available_features.add("linux")
 
+if config.operating_system in ['Linux', 'Windows']:
+    config.available_features.add('affinity')
+
 # to run with icc INTEL_LICENSE_FILE must be set
 if 'INTEL_LICENSE_FILE' in os.environ:
     config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']
@@ -116,7 +127,7 @@
 
 if config.has_ompt:
     config.substitutions.append(("FileCheck", config.test_filecheck))
-    config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
+    config.substitutions.append(("%sort-threads", "sort -n -s"))
     if config.operating_system == 'Windows':
         # No such environment variable on Windows.
         config.substitutions.append(("%preload-tool", "true ||"))
diff --git a/runtime/test/lit.site.cfg.in b/runtime/test/lit.site.cfg.in
index c2825ee..fe4a372 100644
--- a/runtime/test/lit.site.cfg.in
+++ b/runtime/test/lit.site.cfg.in
@@ -1,11 +1,13 @@
 @AUTO_GEN_COMMENT@
 
+config.target_triple = "@TARGET_TRIPLE@"
 config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
 config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
 config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@
 config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@"
 config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@"
 config.test_extra_flags = "@OPENMP_TEST_FLAGS@"
+config.libomp_omp_version = @LIBOMP_OMP_VERSION@
 config.libomp_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
 config.library_dir = "@LIBOMP_LIBRARY_DIR@"
 config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
diff --git a/runtime/test/ompt/callback.h b/runtime/test/ompt/callback.h
index df83043..0304cff 100755
--- a/runtime/test/ompt/callback.h
+++ b/runtime/test/ompt/callback.h
@@ -79,7 +79,7 @@
 static void print_ids(int level)
 {
   int task_type, thread_num;
-  omp_frame_t *frame;
+  ompt_frame_t *frame;
   ompt_data_t *task_parallel_data;
   ompt_data_t *task_data;
   int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
@@ -92,8 +92,8 @@
            "task_type=%s=%d, thread_num=%d\n",
            ompt_get_thread_data()->value, level,
            exists_task ? task_parallel_data->value : 0,
-           exists_task ? task_data->value : 0, frame->exit_frame,
-           frame->enter_frame, buffer, task_type, thread_num);
+           exists_task ? task_data->value : 0, frame->exit_frame.ptr,
+           frame->enter_frame.ptr, buffer, task_type, thread_num);
 }
 
 #define get_frame_address(level) __builtin_frame_address(level)
@@ -197,7 +197,7 @@
   ompt_mutex_t kind,
   unsigned int hint,
   unsigned int impl,
-  omp_wait_id_t wait_id,
+  ompt_wait_id_t wait_id,
   const void *codeptr_ra)
 {
   switch(kind)
@@ -225,7 +225,7 @@
 static void
 on_ompt_callback_mutex_acquired(
   ompt_mutex_t kind,
-  omp_wait_id_t wait_id,
+  ompt_wait_id_t wait_id,
   const void *codeptr_ra)
 {
   switch(kind)
@@ -253,7 +253,7 @@
 static void
 on_ompt_callback_mutex_released(
   ompt_mutex_t kind,
-  omp_wait_id_t wait_id,
+  ompt_wait_id_t wait_id,
   const void *codeptr_ra)
 {
   switch(kind)
@@ -281,7 +281,7 @@
 static void
 on_ompt_callback_nest_lock(
     ompt_scope_endpoint_t endpoint,
-    omp_wait_id_t wait_id,
+    ompt_wait_id_t wait_id,
     const void *codeptr_ra)
 {
   switch(endpoint)
@@ -460,7 +460,7 @@
   ompt_mutex_t kind,
   unsigned int hint,
   unsigned int impl,
-  omp_wait_id_t wait_id,
+  ompt_wait_id_t wait_id,
   const void *codeptr_ra)
 {
   switch(kind)
@@ -479,7 +479,7 @@
 static void
 on_ompt_callback_lock_destroy(
   ompt_mutex_t kind,
-  omp_wait_id_t wait_id,
+  ompt_wait_id_t wait_id,
   const void *codeptr_ra)
 {
   switch(kind)
@@ -583,7 +583,7 @@
 
 static void on_ompt_callback_parallel_begin(
     ompt_data_t *encountering_task_data,
-    const omp_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
+    const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
     uint32_t requested_team_size, int flag, const void *codeptr_ra) {
   if(parallel_data->ptr)
     printf("0: parallel_data initially not null\n");
@@ -593,8 +593,8 @@
          "parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32
          ", codeptr_ra=%p, invoker=%d\n",
          ompt_get_thread_data()->value, encountering_task_data->value,
-         encountering_task_frame->exit_frame,
-         encountering_task_frame->enter_frame, parallel_data->value,
+         encountering_task_frame->exit_frame.ptr,
+         encountering_task_frame->enter_frame.ptr, parallel_data->value,
          requested_team_size, codeptr_ra, flag);
 }
 
@@ -610,7 +610,7 @@
 static void
 on_ompt_callback_task_create(
     ompt_data_t *encountering_task_data,
-    const omp_frame_t *encountering_task_frame,
+    const ompt_frame_t *encountering_task_frame,
     ompt_data_t* new_task_data,
     int type,
     int has_dependences,
@@ -634,7 +634,7 @@
     parallel_data->value = ompt_get_unique_id();
   }
 
-  printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
+  printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL, encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
 }
 
 static void
@@ -692,9 +692,9 @@
   void *arg,
   const void *codeptr_ra)
 {
-  omp_frame_t* omptTaskFrame;
+  ompt_frame_t* omptTaskFrame;
   ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
-  printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame, omptTaskFrame->enter_frame);
+  printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
   return 0; //success
 }
 
diff --git a/runtime/test/ompt/misc/api_calls_from_other_thread.cpp b/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
index 470d7cd..e2ef1fc 100644
--- a/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
+++ b/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
@@ -31,12 +31,12 @@
 
   printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL));
 
-  int state = omp_state_undefined;
+  int state = ompt_state_undefined;
   const char *state_name;
   printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue,
          ompt_enumerate_states(state, &state, &state_name));
 
-  int impl = ompt_mutex_impl_unknown;
+  int impl = ompt_mutex_impl_none;
   const char *impl_name;
   printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue,
          ompt_enumerate_mutex_impls(impl, &impl, &impl_name));
diff --git a/runtime/test/ompt/misc/api_calls_misc.c b/runtime/test/ompt/misc/api_calls_misc.c
index d567b1b..884421e 100644
--- a/runtime/test/ompt/misc/api_calls_misc.c
+++ b/runtime/test/ompt/misc/api_calls_misc.c
@@ -19,7 +19,7 @@
            ompt_get_state(NULL));
 
     // ompt_enumerate_states()
-    int state = omp_state_undefined;
+    int state = ompt_state_undefined;
     const char *state_name;
     int steps = 0;
     while (ompt_enumerate_states(state, &state, &state_name) && steps < 1000) {
@@ -35,7 +35,7 @@
     }
 
     // ompt_enumerate_mutex_impls()
-    int impl = ompt_mutex_impl_unknown;
+    int impl = ompt_mutex_impl_none;
     const char *impl_name;
     steps = 0;
     while (ompt_enumerate_mutex_impls(impl, &impl, &impl_name) &&
diff --git a/runtime/test/ompt/misc/api_calls_places.c b/runtime/test/ompt/misc/api_calls_places.c
index ad338a7..3385c9c 100644
--- a/runtime/test/ompt/misc/api_calls_places.c
+++ b/runtime/test/ompt/misc/api_calls_places.c
@@ -42,7 +42,7 @@
     int omp_nums[omp_nums_size];
     omp_get_partition_place_nums(omp_nums);
     print_list("omp_get_partition_place_nums", omp_nums_size, omp_nums);
-    int ompt_nums_size = ompt_get_partition_place_nums(0, NULL);
+    int ompt_nums_size = ompt_get_partition_place_nums(0, omp_nums);
     int ompt_nums[ompt_nums_size];
     ompt_get_partition_place_nums(ompt_nums_size, ompt_nums);
     print_list("ompt_get_partition_place_nums", ompt_nums_size, ompt_nums);
diff --git a/runtime/test/ompt/misc/control_tool.c b/runtime/test/ompt/misc/control_tool.c
index 2c59666..7995614 100644
--- a/runtime/test/ompt/misc/control_tool.c
+++ b/runtime/test/ompt/misc/control_tool.c
@@ -1,6 +1,7 @@
 // RUN: %libomp-compile-and-run | FileCheck %s
 // REQUIRES: ompt
 // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+// XFAIL: powerpc64le, ppc64le
 #define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
 #include "callback.h"
 #include <omp.h>
@@ -22,7 +23,7 @@
 
   // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address({{.}})=[[EXIT_FRAME:0x[0-f]*]]
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter={{0x[0-f]*}}
   // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
 
   return 0;
diff --git a/runtime/test/ompt/misc/control_tool_no_ompt_support.c b/runtime/test/ompt/misc/control_tool_no_ompt_support.c
index ee64da0..23daf8b 100644
--- a/runtime/test/ompt/misc/control_tool_no_ompt_support.c
+++ b/runtime/test/ompt/misc/control_tool_no_ompt_support.c
@@ -1,4 +1,7 @@
 // RUN: %libomp-compile-and-run
+
+// REQUIRES: openmp-5.0
+
 #include <omp.h>
 
 int main()
diff --git a/runtime/test/ompt/misc/interoperability.cpp b/runtime/test/ompt/misc/interoperability.cpp
index 102e6de..b07814e 100644
--- a/runtime/test/ompt/misc/interoperability.cpp
+++ b/runtime/test/ompt/misc/interoperability.cpp
@@ -3,7 +3,11 @@
 
 #include <iostream>
 #include <thread>
+#if !defined(__NetBSD__)
 #include <alloca.h>
+#else
+#include <cstdlib>
+#endif
 
 #include "callback.h"
 #include "omp.h"
diff --git a/runtime/test/ompt/parallel/nested.c b/runtime/test/ompt/parallel/nested.c
index 035529c..d91597b 100644
--- a/runtime/test/ompt/parallel/nested.c
+++ b/runtime/test/ompt/parallel/nested.c
@@ -80,25 +80,25 @@
 
   // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]]
   // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
 
   // nested parallel masters
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
-  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+  // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]]
   // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
-  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
-  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
+  // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
   // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
   // explicit barrier
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
-  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+  // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=0x{{[0-f]+}}
   // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
   // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]]
   // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
diff --git a/runtime/test/ompt/parallel/nested_thread_num.c b/runtime/test/ompt/parallel/nested_thread_num.c
index e952f80..f14f87a 100644
--- a/runtime/test/ompt/parallel/nested_thread_num.c
+++ b/runtime/test/ompt/parallel/nested_thread_num.c
@@ -80,7 +80,7 @@
 // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
 // THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], 
 // THREADS-SAME: parent_task_frame.exit=[[NULL]],
-// THREADS-SAME: parent_task_frame.reenter=[[MAIN_REENTER]],
+// THREADS-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
 // THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2,
 // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
 // THREADS-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]]
@@ -101,14 +101,14 @@
 // THREADS: {{^}}[[MASTER_ID]]: task level 1:
 // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], 
 // THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], 
-// THREADS-SAME: reenter_frame=[[MAIN_REENTER]]
+// THREADS-SAME: reenter_frame=0x{{[0-f]+}}
 
 // THREADS: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
 
 // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin:
 // THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], 
 // THREADS-SAME: parent_task_frame.exit=[[EXIT]],
-// THREADS-SAME: parent_task_frame.reenter=[[REENTER]],
+// THREADS-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
 // THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], 
 // THREADS-SAME: requested_team_size=2,
 // THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
@@ -129,12 +129,12 @@
 
 // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]],
 // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
-// THREADS-SAME: reenter_frame=[[REENTER]]
+// THREADS-SAME: reenter_frame=0x{{[0-f]+}}
 
 // THREADS: {{^}}[[MASTER_ID]]: task level 2:
 // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], 
 // THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], 
-// THREADS-SAME: reenter_frame=[[MAIN_REENTER]]
+// THREADS-SAME: reenter_frame=0x{{[0-f]+}}
 
 // THREADS: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
 
@@ -149,7 +149,7 @@
 // THREADS: {{^}}[[MASTER_ID]]: task level 0:
 // THREADS-SAME:  parallel_id=[[NESTED_PARALLEL_ID]], 
 // THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
-// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=0x{{[0-f]+}}
 
 // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end:
 // THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], 
diff --git a/runtime/test/ompt/parallel/nested_threadnum.c b/runtime/test/ompt/parallel/nested_threadnum.c
new file mode 100644
index 0000000..a248530
--- /dev/null
+++ b/runtime/test/ompt/parallel/nested_threadnum.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include <omp.h>
+#include "callback.h"
+
+int main() {
+  omp_set_nested(1);
+#pragma omp parallel num_threads(2)
+  {
+#pragma omp barrier
+#pragma omp parallel num_threads(2)
+    { print_frame(0); }
+  }
+
+  // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+  // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM1:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin:
+  // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1]]
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM1:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM1]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+  // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+  // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM1]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+
+  // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+  // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM2:[0-9]+]]
+  // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_parallel_begin:
+  // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2:[0-9]+]]
+  // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2]]
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM2:[0-9]+]]
+  // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_end
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM2]]
+  // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_parallel_end:
+  // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2]]
+  // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_end
+  // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM2]]
+
+  // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM3:[0-9]+]]
+  // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_implicit_task_end
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM3]]
+
+  // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_implicit_task_begin:
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM4:[0-9]+]]
+  // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_implicit_task_end
+  // CHECK-SAME: thread_num=[[INNER_THREAD_NUM4]]
+
+  return 0;
+}
diff --git a/runtime/test/ompt/synchronization/taskwait.c b/runtime/test/ompt/synchronization/taskwait.c
index c431024..cb30f3b 100644
--- a/runtime/test/ompt/synchronization/taskwait.c
+++ b/runtime/test/ompt/synchronization/taskwait.c
@@ -1,6 +1,7 @@
 // RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
 // REQUIRES: ompt
 // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+// XFAIL: powerpc64le, ppc64le
 #include "callback.h"
 #include <omp.h>
 
diff --git a/runtime/test/ompt/tasks/explicit_task.c b/runtime/test/ompt/tasks/explicit_task.c
index 01fb3f8..a986c48 100644
--- a/runtime/test/ompt/tasks/explicit_task.c
+++ b/runtime/test/ompt/tasks/explicit_task.c
@@ -52,22 +52,22 @@
   // make sure initial data pointers are null
   // CHECK-NOT: 0: new_task_data initially not null
   
-  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // CHECK--doesnotwork: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
   // nested parallel masters
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // <- ompt_event_task_create would be expected here
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
   // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // explicit barrier after master
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // implicit barrier parallel
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -78,16 +78,16 @@
   // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
   // this is expected to come earlier and at MASTER:
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
diff --git a/runtime/test/ompt/tasks/serialized.c b/runtime/test/ompt/tasks/serialized.c
index 12a0281..b1ef45d 100644
--- a/runtime/test/ompt/tasks/serialized.c
+++ b/runtime/test/ompt/tasks/serialized.c
@@ -58,7 +58,7 @@
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
   // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]]
   // CHECK-SAME: parent_task_frame.exit=[[NULL]]
-  // CHECK-SAME: parent_task_frame.reenter=[[MAIN_REENTER]]
+  // CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}
   // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2
   // CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
 
@@ -76,13 +76,13 @@
   // CHECK: {{^}}[[MASTER_ID]]: task level 1
   // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]]
   // CHECK-SAME: task_id=[[PARENT_TASK_ID]],
-  // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
 
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create
   // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]]
   // CHECK-SAME: parent_task_frame.exit=[[EXIT]]
-  // CHECK-SAME: parent_task_frame.reenter=[[REENTER]]
+  // CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}
   // CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]]
   // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
 
@@ -96,12 +96,12 @@
 
   // CHECK: {{^}}[[MASTER_ID]]: task level 1
   // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
 
   // CHECK: {{^}}[[MASTER_ID]]: task level 2
   // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
   // CHECK-SAME: task_id=[[PARENT_TASK_ID]]
-  // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
 
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule
   // CHECK-SAME: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
@@ -135,7 +135,7 @@
   // CHECK: {{^}}[[THREAD_ID]]: task level 1
   // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
   // CHECK-SAME: task_id=[[PARENT_TASK_ID]]
-  // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
 
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)={{0x[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
diff --git a/runtime/test/ompt/tasks/task_in_joinbarrier.c b/runtime/test/ompt/tasks/task_in_joinbarrier.c
index 25b57a9..8228add 100644
--- a/runtime/test/ompt/tasks/task_in_joinbarrier.c
+++ b/runtime/test/ompt/tasks/task_in_joinbarrier.c
@@ -50,16 +50,16 @@
   // CHECK-NOT: 0: new_task_data initially not null
 
   // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
   // nested parallel masters
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // <- ompt_event_task_create would be expected here
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // implicit barrier parallel
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -70,7 +70,7 @@
   // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // implicit barrier parallel
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -79,7 +79,7 @@
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
diff --git a/runtime/test/ompt/tasks/untied_task.c b/runtime/test/ompt/tasks/untied_task.c
index e68fa26..4ee3f11 100644
--- a/runtime/test/ompt/tasks/untied_task.c
+++ b/runtime/test/ompt/tasks/untied_task.c
@@ -60,20 +60,20 @@
   // CHECK-NOT: 0: new_task_data initially not null
   
   // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
   // nested parallel masters
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // <- ompt_event_task_create would be expected here
-  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+  // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
   // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
   // explicit barrier after master
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
   // implicit barrier parallel
   // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -84,16 +84,16 @@
   // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
   // this is expected to come earlier and at MASTER:
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
   // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
-  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+  // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
+  // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
   // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
diff --git a/runtime/test/parallel/omp_nested.c b/runtime/test/parallel/omp_nested.c
index 8b78088..d2d5b08 100644
--- a/runtime/test/parallel/omp_nested.c
+++ b/runtime/test/parallel/omp_nested.c
@@ -12,6 +12,8 @@
 #ifdef _OPENMP
   if (omp_get_max_threads() > 4)
     omp_set_num_threads(4);
+  if (omp_get_max_threads() < 2)
+    omp_set_num_threads(2);
 #endif
 
   int counter = 0;
diff --git a/runtime/test/tasking/bug_nested_proxy_task.c b/runtime/test/tasking/bug_nested_proxy_task.c
index 6c00822..84e4dfd 100644
--- a/runtime/test/tasking/bug_nested_proxy_task.c
+++ b/runtime/test/tasking/bug_nested_proxy_task.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile -lpthread && %libomp-run
+// REQUIRES: openmp-4.5
 // The runtime currently does not get dependency information from GCC.
 // UNSUPPORTED: gcc
 
diff --git a/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/runtime/test/tasking/bug_proxy_task_dep_waiting.c
index e6dd895..fe8f18d 100644
--- a/runtime/test/tasking/bug_proxy_task_dep_waiting.c
+++ b/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile -lpthread && %libomp-run
+// REQUIRES: openmp-4.5
 // The runtime currently does not get dependency information from GCC.
 // UNSUPPORTED: gcc
 
diff --git a/runtime/test/tasking/kmp_task_reduction_nest.cpp b/runtime/test/tasking/kmp_task_reduction_nest.cpp
index 63dffe4..019a9fe 100644
--- a/runtime/test/tasking/kmp_task_reduction_nest.cpp
+++ b/runtime/test/tasking/kmp_task_reduction_nest.cpp
@@ -1,5 +1,6 @@
 // RUN: %libomp-cxx-compile-and-run
 // RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run
+// REQUIRES: openmp-5.0
 // GCC-5 is needed for OpenMP 4.0 support (taskgroup)
 // XFAIL: gcc-4
 #include <cstdio>
diff --git a/runtime/test/tasking/kmp_taskloop.c b/runtime/test/tasking/kmp_taskloop.c
index 4b13793..359f7a4 100644
--- a/runtime/test/tasking/kmp_taskloop.c
+++ b/runtime/test/tasking/kmp_taskloop.c
@@ -1,5 +1,6 @@
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+// REQUIRES: openmp-4.5
 #include <stdio.h>
 #include <omp.h>
 #include "omp_my_sleep.h"
diff --git a/runtime/test/tasking/omp_task.c b/runtime/test/tasking/omp_task.c
index c534abe..5703225 100644
--- a/runtime/test/tasking/omp_task.c
+++ b/runtime/test/tasking/omp_task.c
@@ -43,6 +43,9 @@
   int i;
   int num_failed=0;
 
+  if (omp_get_max_threads() < 2)
+    omp_set_num_threads(8);
+
   for(i = 0; i < REPETITIONS; i++) {
     if(!test_omp_task()) {
       num_failed++;
diff --git a/runtime/test/tasking/omp_task_priority.c b/runtime/test/tasking/omp_task_priority.c
index 7b62360..6acb4a8 100644
--- a/runtime/test/tasking/omp_task_priority.c
+++ b/runtime/test/tasking/omp_task_priority.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run
+// REQUIRES: openmp-4.5
 // Test OMP 4.5 task priorities
 // Currently only API function and envirable parsing implemented.
 // Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below.
diff --git a/runtime/test/tasking/omp_taskloop_grainsize.c b/runtime/test/tasking/omp_taskloop_grainsize.c
index 0833073..c5756a4 100644
--- a/runtime/test/tasking/omp_taskloop_grainsize.c
+++ b/runtime/test/tasking/omp_taskloop_grainsize.c
@@ -1,5 +1,6 @@
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+// REQUIRES: openmp-4.5
 
 // These compilers don't support the taskloop construct
 // UNSUPPORTED: gcc-4, gcc-5, icc-16
diff --git a/runtime/test/tasking/omp_taskloop_num_tasks.c b/runtime/test/tasking/omp_taskloop_num_tasks.c
index 7c3c704..75efea6 100644
--- a/runtime/test/tasking/omp_taskloop_num_tasks.c
+++ b/runtime/test/tasking/omp_taskloop_num_tasks.c
@@ -1,5 +1,6 @@
 // RUN: %libomp-compile-and-run
 // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+// REQUIRES: openmp-4.5
 
 // These compilers don't support the taskloop construct
 // UNSUPPORTED: gcc-4, gcc-5, icc-16
diff --git a/runtime/test/tasking/omp_taskyield.c b/runtime/test/tasking/omp_taskyield.c
index 5bb6984..7f85413 100644
--- a/runtime/test/tasking/omp_taskyield.c
+++ b/runtime/test/tasking/omp_taskyield.c
@@ -49,6 +49,9 @@
   int i;
   int num_failed=0;
 
+  if (omp_get_max_threads() < 2)
+    omp_set_num_threads(8);
+
   for(i = 0; i < REPETITIONS; i++) {
     if(!test_omp_taskyield()) {
       num_failed++;
diff --git a/runtime/test/worksharing/for/kmp_doacross_check.c b/runtime/test/worksharing/for/kmp_doacross_check.c
index 59b61e3..4eea328 100644
--- a/runtime/test/worksharing/for/kmp_doacross_check.c
+++ b/runtime/test/worksharing/for/kmp_doacross_check.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
 // UNSUPPORTED: gcc
 // This test is incompatible with gcc because of the explicit call to
 // __kmpc_doacross_fini().  gcc relies on an implicit call to this function
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/runtime/test/worksharing/for/kmp_sch_simd_guided.c
index 5c6f94b..6cf5d2f 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_guided.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_guided.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
 /*
   Test for the 'schedule(simd:guided)' clause.
   Compiler needs to generate a dynamic dispatching and pass the schedule
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
index bb538d1..8b5f34a 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
 
 // The test checks schedule(simd:runtime)
 // in combination with omp_set_schedule()
@@ -66,6 +67,7 @@
   int ub;   // Chunk upper bound.
   int st;   // Chunk stride.
   int rc;
+  int nthreads = omp_get_num_threads();
   int tid = omp_get_thread_num();
   int gtid = __kmpc_global_thread_num(&loc);
   int last;
@@ -134,7 +136,7 @@
         printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
       // Update maximum for the next chunk.
       if (last) {
-        if (!no_chunk && cur > ch)
+        if (!no_chunk && cur > ch && nthreads > 1)
           printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
                  (int)cur, ch, tid, ++err);
       } else {
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
index d137831..142e9b3 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
@@ -6,6 +6,7 @@
 // RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
 // RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
 // RUN: env OMP_SCHEDULE=auto      %libomp-run
+// REQUIRES: openmp-4.5
 
 // The test checks schedule(simd:runtime)
 // in combination with OMP_SCHEDULE=guided[,chunk]
@@ -74,6 +75,7 @@
   int ub;   // Chunk upper bound.
   int st;   // Chunk stride.
   int rc;
+  int nthreads = omp_get_num_threads();
   int tid = omp_get_thread_num();
   int gtid = __kmpc_global_thread_num(&loc);
   int last;
@@ -144,7 +146,7 @@
       if (!last && cur % ch)
         printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
                chunk, (int)cur, ch, tid, ++err);
-      if (last && !no_chunk && cur > ch)
+      if (last && !no_chunk && cur > ch && nthreads > 1)
         printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
                (int)cur, ch, tid, ++err);
       if (cur < max)
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
index 4cb15d6..e2c878f 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
@@ -1,5 +1,6 @@
 // RUN: %libomp-compile && %libomp-run
 // RUN: %libomp-run 1 && %libomp-run 2
+// REQUIRES: openmp-4.5
 
 // The test checks schedule(simd:runtime)
 // in combination with OMP_SCHEDULE=static[,chunk]
@@ -67,6 +68,7 @@
   int ub;   // Chunk upper bound.
   int st;   // Chunk stride.
   int rc;
+  int nthreads = omp_get_num_threads();
   int tid = omp_get_thread_num();
   int gtid = __kmpc_global_thread_num(&loc);
   int last;
@@ -135,7 +137,7 @@
         printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
       // Update maximum for the next chunk.
       if (last) {
-        if (!no_chunk && cur > ch)
+        if (!no_chunk && cur > ch && nthreads > 1)
           printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
                  (int)cur, ch, tid, ++err);
       } else {
diff --git a/runtime/test/worksharing/for/omp_doacross.c b/runtime/test/worksharing/for/omp_doacross.c
index 4187112..32e8e82 100644
--- a/runtime/test/worksharing/for/omp_doacross.c
+++ b/runtime/test/worksharing/for/omp_doacross.c
@@ -1,4 +1,5 @@
 // RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
 // XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16
 #include <stdio.h>
 #include <stdlib.h>
@@ -51,6 +52,8 @@
 int main(int argc, char **argv) {
   int i;
   int num_failed = 0;
+  if (omp_get_max_threads() < 2)
+    omp_set_num_threads(4);
   for (i = 0; i < REPETITIONS; i++) {
     if (!test_doacross()) {
       num_failed++;