[caffe2][cuda] Trace `allocate` and `local_raw_delete` events with PyTorch USDTs (#107322) Summary: Adds new tracepoints to CUDA allocator code for tracking alloc and dealloc events in the allocator code. Test Plan: This change simply adds static tracepoints to CUDA allocator code, and does not otherwise change any logic. Testing is not required. Reviewed By: chaekit Differential Revision: D48229150 Pull Request resolved: https://github.com/pytorch/pytorch/pull/107322 Approved by: https://github.com/chaekit

commit: e74049167457e5f4ca8214380fcd46a52371876b [log] [tgz]
author: vlad-scherbich <22730518+vlad-scherbich@users.noreply.github.com> Tue Aug 22 16:31:30 2023 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Tue Aug 22 16:31:30 2023 +0000
tree: f24d3cc882cf6f8f0a9931065647df079fa47653
parent: a408920817ac3852d27fc96ba85109ff4c981d52 [diff]
diff --git a/c10/cuda/CUDACachingAllocator.cpp b/c10/cuda/CUDACachingAllocator.cpp
index e86cd24..25bf35b 100644
--- a/c10/cuda/CUDACachingAllocator.cpp
+++ b/c10/cuda/CUDACachingAllocator.cpp

@@ -9,6 +9,7 @@
 #include <c10/util/flat_hash_map.h>
 #include <c10/util/irange.h>
 #include <c10/util/llvmMathExtras.h>
+#include <c10/util/static_tracepoint.h>
 
 #if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED)
 #include <c10/cuda/driver_api.h>
@@ -33,6 +34,9 @@
 #include <utility>
 #include <vector>
 
+TORCH_SDT_DEFINE_SEMAPHORE(malloc)
+TORCH_SDT_DEFINE_SEMAPHORE(free)
+
 namespace c10 {
 
 C10_DEFINE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback);
@@ -3306,6 +3310,10 @@
       return {r, r, &uncached_delete, Device(DeviceType::CUDA, device)};
     }
     if (size != 0) {
+      if (TORCH_SDT_IS_ENABLED(malloc)) {
+        TORCH_SDT_WITH_SEMAPHORE(malloc, &r, device, size, 0);
+      }
+
       // Allocator declars allocate const!?
       const_cast<NativeCachingAllocator*>(this)->malloc(
           &r, device, size, cuda::getCurrentCUDAStream(device));
@@ -3483,6 +3491,10 @@
 NativeCachingAllocator allocator;
 
 void local_raw_delete(void* ptr) {
+  if (TORCH_SDT_IS_ENABLED(free)) {
+    TORCH_SDT_WITH_SEMAPHORE(free, ptr);
+  }
+
   allocator.free(ptr);
 }
commit	e74049167457e5f4ca8214380fcd46a52371876b	[log] [tgz]
author	vlad-scherbich <22730518+vlad-scherbich@users.noreply.github.com>	Tue Aug 22 16:31:30 2023 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Tue Aug 22 16:31:30 2023 +0000
tree	f24d3cc882cf6f8f0a9931065647df079fa47653
parent	a408920817ac3852d27fc96ba85109ff4c981d52 [diff]