[iOS][coreml] Add CoreML memory observer (#76251)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/76251

Add an observer to `PTMCoreMLExecutor` so we can inspect OOMs in production to help with T115554493.

The behaviour of the logger is as such:

1. Each time a model is compiled, there is a chance we publish all logs to QPL. This is determined by the randomly generated `_model_load_id` and `_sample_thresh`.
2. If we are publishing all logs, then every `_sample_every` inferences will be logged via QPL.
3. Every QPL log will collect memory metrics before and after model compilation/inference
4. If memory pressure is not normal (remaining mem < 400 MB) before or after compilation/inference, then that compilation/inference will be logged to QPL no matter what.

Test Plan:
We can test in pytorch playground and inspect the QPL logs through Flipper:

```
arc focus2 -b pp-ios -a ModelRunner -a //xplat/caffe2/c10:c10Apple -a //xplat/caffe2:torch_mobile_coreApple  -a //xplat/caffe2/fb/dynamic_pytorch:dynamic_pytorch_implApple -a //xplat/caffe2:coreml_delegateApple  -a ModelRunnerDevOps -a //xplat/caffe2:torch_mobile_all_opsApple -a coreml_memory_observer -a //xplat/perflogger:perfloggerApple -fd --force-with-wrong-xcode
```

To check results in Hive/Scuba, test in instagram:

```
arc focus2 -b igios-no-extensions -a //fbobjc/Apps/Instagram/AppLibraries/Core/QPL/IGPerformanceLogging:IGPerformanceLogging -a //xplat/caffe2/c10:c10Apple -a //xplat/caffe2:torch_mobile_coreApple  -a //xplat/caffe2/fb/dynamic_pytorch:dynamic_pytorch_implApple -a //xplat/caffe2:coreml_delegateApple -a //xplat/caffe2:torch_mobile_all_opsApple -a //xplat/perflogger:perfloggerApple -a coreml_memory_observerApple -c pt.enable_qpl=1 --force-with-wrong-xcode
```

Note that we need to change `_sample_thresh` to ensure logs show up.

Reviewed By: kimishpatel

Differential Revision: D35511873

fbshipit-source-id: 59f2fa2d021178ceab1fcf5ee94b2f15ceca32ee
(cherry picked from commit 8b8af55410ea1231693ee980c80d8a749f5ad870)
diff --git a/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm b/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
index 858cf5e..2df7352 100644
--- a/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
+++ b/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
@@ -7,6 +7,9 @@
 #import <UIKit/UIKit.h>
 #endif
 
+// Observer
+#import <torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h>
+
 #include <sys/utsname.h>
 #include <fstream>
 #include <iostream>
@@ -68,6 +71,14 @@
   MLModel* _mlModel;
   NSURL* _modelPath;
   NSURL* _compiledModelPath;
+
+  int32_t _model_load_id;
+  int32_t _inferences;
+
+  int32_t _sample_thresh;
+  int32_t _sample_every;
+
+  size_t _init_mem_limit;
 }
 
 + (void)setModelCacheDirectory:(NSString*)dir {
@@ -110,6 +121,24 @@
   [self _saveModel:modelSpecs];
   NSError* error = nil;
   _compiledModelPath = [self _compiledModelFilePath:_modelPath.path];
+
+  // Get observer and create an instance key
+  PTMCoreMLObserver* observer = coreMLObserverConfig().getCoreMLObserver();
+  int32_t instance_key = std::rand();
+  _model_load_id = std::rand();
+  _inferences = 0;
+
+  _init_mem_limit = 0;
+
+  _sample_thresh =
+      static_cast<int32_t>(1.0 / 1000.0 * static_cast<double>(RAND_MAX));
+  _sample_every = 500;
+
+  if (observer) {
+    _init_mem_limit = observer->getRemainingMemory();
+    observer->onEnterCompileModel(instance_key, _model_load_id);
+  }
+
   // Compile the model when OS version changes
   if ([self _shouldRecompileModel]) {
     if (@available(iOS 11.0, macOS 10.13, *)) {
@@ -128,11 +157,20 @@
         }
       }
     } else {
+      // Always log on failure
+      if (observer) {
+        observer->onExitCompileModel(instance_key, false, true);
+      }
       TORCH_CHECK(false, "CoreML is not available on your deivce");
     }
   }
 
   if (error) {
+    // Always log on failure
+    if (observer) {
+      observer->onExitCompileModel(instance_key, false, true);
+    }
+
     // remove cached models if compalition failed.
     [self cleanup];
     TORCH_CHECK(
@@ -158,17 +196,37 @@
     _mlModel = [MLModel modelWithContentsOfURL:_compiledModelPath error:&error];
   }
   if (error || !_mlModel) {
+    // Always log on failure
+    if (observer) {
+      observer->onExitCompileModel(instance_key, false, true);
+    }
+
     TORCH_CHECK(
         false,
         "Error loading the MLModel",
         error.localizedDescription.UTF8String);
   }
+
+  if (observer) {
+    bool should_log = _model_load_id < _sample_thresh;
+    observer->onExitCompileModel(instance_key, true, should_log);
+  }
+
   return YES;
 }
 
 - (id<MLFeatureProvider>)forwardWithInputs:
     (const std::vector<PTMCoreMLFeatureSpecs>&)inputs {
   @autoreleasepool {
+    // Get observer and create an instance key
+    PTMCoreMLObserver* observer = coreMLObserverConfig().getCoreMLObserver();
+    int32_t instance_key = std::rand();
+
+    if (observer) {
+      observer->onEnterExecuteModel(
+          instance_key, _model_load_id, _init_mem_limit, _inferences);
+    }
+
     NSError* error = nil;
     PTMCoreMLFeatureProvider* inputFeature = [[PTMCoreMLFeatureProvider alloc]
         initWithFeatureSpecs:inputs
@@ -189,8 +247,25 @@
             error.localizedDescription.UTF8String);
       }
 
+      ++_inferences;
+      if (observer) {
+        // Check if this inference session is being logged.
+        // If so, only log every N inferences
+        bool should_log = _model_load_id < _sample_thresh && _inferences > 1;
+        if (should_log) {
+          should_log = _inferences % _sample_every == 0;
+        }
+        observer->onExitExecuteModel(
+            instance_key, _inferences, true, should_log);
+      }
+
       return outputFeature;
     } else {
+      // Always log on failure
+      if (observer) {
+        observer->onExitExecuteModel(instance_key, _inferences, true, true);
+      }
+
       TORCH_CHECK(false, "Core ML is not available on your device");
       return nil;
     }
diff --git a/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h
new file mode 100644
index 0000000..57d1152
--- /dev/null
+++ b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h
@@ -0,0 +1,47 @@
+#include <memory>
+
+class PTMCoreMLObserver {
+ public:
+  virtual ~PTMCoreMLObserver() = default;
+
+  virtual size_t getRemainingMemory() {
+    return 0;
+  }
+
+  virtual void onEnterCompileModel(const int32_t, const int32_t) {}
+  virtual void onExitCompileModel(const int32_t, bool, bool) {}
+
+  virtual void onEnterExecuteModel(
+      const int32_t,
+      const int32_t,
+      const size_t,
+      const int32_t) {}
+  virtual void onExitExecuteModel(const int32_t, const int32_t, bool, bool) {}
+};
+
+class PTMCoreMLObserverConfig {
+ public:
+  PTMCoreMLObserverConfig();
+
+  // Do not allow copying/moving.
+  // There should be only one global instance of this class.
+  PTMCoreMLObserverConfig(const PTMCoreMLObserverConfig&) = delete;
+  PTMCoreMLObserverConfig& operator=(const PTMCoreMLObserverConfig&) = delete;
+
+  PTMCoreMLObserverConfig(PTMCoreMLObserverConfig&&) = delete;
+  PTMCoreMLObserverConfig& operator=(PTMCoreMLObserverConfig&&) = delete;
+
+ private:
+  std::unique_ptr<PTMCoreMLObserver> observer_;
+
+ public:
+  void setCoreMLObserver(std::unique_ptr<PTMCoreMLObserver> observer) {
+    observer_ = std::move(observer);
+  }
+
+  PTMCoreMLObserver* getCoreMLObserver() {
+    return observer_.get();
+  }
+};
+
+PTMCoreMLObserverConfig& coreMLObserverConfig();
diff --git a/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.mm b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.mm
new file mode 100644
index 0000000..372fc53
--- /dev/null
+++ b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.mm
@@ -0,0 +1,8 @@
+#import <torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h>
+
+PTMCoreMLObserverConfig::PTMCoreMLObserverConfig() : observer_{nullptr} {}
+
+PTMCoreMLObserverConfig& coreMLObserverConfig() {
+  static PTMCoreMLObserverConfig global_instance;
+  return global_instance;
+}