[iOS][coreml] Add CoreML memory observer (#76251)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/76251
Add an observer to `PTMCoreMLExecutor` so we can inspect OOMs in production to help with T115554493.
The behaviour of the logger is as such:
1. Each time a model is compiled, there is a chance we publish all logs to QPL. This is determined by the randomly generated `_model_load_id` and `_sample_thresh`.
2. If we are publishing all logs, then every `_sample_every` inferences will be logged via QPL.
3. Every QPL log will collect memory metrics before and after model compilation/inference
4. If memory pressure is not normal (remaining mem < 400 MB) before or after compilation/inference, then that compilation/inference will be logged to QPL no matter what.
Test Plan:
We can test in pytorch playground and inspect the QPL logs through Flipper:
```
arc focus2 -b pp-ios -a ModelRunner -a //xplat/caffe2/c10:c10Apple -a //xplat/caffe2:torch_mobile_coreApple -a //xplat/caffe2/fb/dynamic_pytorch:dynamic_pytorch_implApple -a //xplat/caffe2:coreml_delegateApple -a ModelRunnerDevOps -a //xplat/caffe2:torch_mobile_all_opsApple -a coreml_memory_observer -a //xplat/perflogger:perfloggerApple -fd --force-with-wrong-xcode
```
To check results in Hive/Scuba, test in instagram:
```
arc focus2 -b igios-no-extensions -a //fbobjc/Apps/Instagram/AppLibraries/Core/QPL/IGPerformanceLogging:IGPerformanceLogging -a //xplat/caffe2/c10:c10Apple -a //xplat/caffe2:torch_mobile_coreApple -a //xplat/caffe2/fb/dynamic_pytorch:dynamic_pytorch_implApple -a //xplat/caffe2:coreml_delegateApple -a //xplat/caffe2:torch_mobile_all_opsApple -a //xplat/perflogger:perfloggerApple -a coreml_memory_observerApple -c pt.enable_qpl=1 --force-with-wrong-xcode
```
Note that we need to change `_sample_thresh` to ensure logs show up.
Reviewed By: kimishpatel
Differential Revision: D35511873
fbshipit-source-id: 59f2fa2d021178ceab1fcf5ee94b2f15ceca32ee
(cherry picked from commit 8b8af55410ea1231693ee980c80d8a749f5ad870)
diff --git a/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm b/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
index 858cf5e..2df7352 100644
--- a/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
+++ b/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.mm
@@ -7,6 +7,9 @@
#import <UIKit/UIKit.h>
#endif
+// Observer
+#import <torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h>
+
#include <sys/utsname.h>
#include <fstream>
#include <iostream>
@@ -68,6 +71,14 @@
MLModel* _mlModel;
NSURL* _modelPath;
NSURL* _compiledModelPath;
+
+ int32_t _model_load_id;
+ int32_t _inferences;
+
+ int32_t _sample_thresh;
+ int32_t _sample_every;
+
+ size_t _init_mem_limit;
}
+ (void)setModelCacheDirectory:(NSString*)dir {
@@ -110,6 +121,24 @@
[self _saveModel:modelSpecs];
NSError* error = nil;
_compiledModelPath = [self _compiledModelFilePath:_modelPath.path];
+
+ // Get observer and create an instance key
+ PTMCoreMLObserver* observer = coreMLObserverConfig().getCoreMLObserver();
+ int32_t instance_key = std::rand();
+ _model_load_id = std::rand();
+ _inferences = 0;
+
+ _init_mem_limit = 0;
+
+ _sample_thresh =
+ static_cast<int32_t>(1.0 / 1000.0 * static_cast<double>(RAND_MAX));
+ _sample_every = 500;
+
+ if (observer) {
+ _init_mem_limit = observer->getRemainingMemory();
+ observer->onEnterCompileModel(instance_key, _model_load_id);
+ }
+
// Compile the model when OS version changes
if ([self _shouldRecompileModel]) {
if (@available(iOS 11.0, macOS 10.13, *)) {
@@ -128,11 +157,20 @@
}
}
} else {
+ // Always log on failure
+ if (observer) {
+ observer->onExitCompileModel(instance_key, false, true);
+ }
TORCH_CHECK(false, "CoreML is not available on your deivce");
}
}
if (error) {
+ // Always log on failure
+ if (observer) {
+ observer->onExitCompileModel(instance_key, false, true);
+ }
+
// remove cached models if compalition failed.
[self cleanup];
TORCH_CHECK(
@@ -158,17 +196,37 @@
_mlModel = [MLModel modelWithContentsOfURL:_compiledModelPath error:&error];
}
if (error || !_mlModel) {
+ // Always log on failure
+ if (observer) {
+ observer->onExitCompileModel(instance_key, false, true);
+ }
+
TORCH_CHECK(
false,
"Error loading the MLModel",
error.localizedDescription.UTF8String);
}
+
+ if (observer) {
+ bool should_log = _model_load_id < _sample_thresh;
+ observer->onExitCompileModel(instance_key, true, should_log);
+ }
+
return YES;
}
- (id<MLFeatureProvider>)forwardWithInputs:
(const std::vector<PTMCoreMLFeatureSpecs>&)inputs {
@autoreleasepool {
+ // Get observer and create an instance key
+ PTMCoreMLObserver* observer = coreMLObserverConfig().getCoreMLObserver();
+ int32_t instance_key = std::rand();
+
+ if (observer) {
+ observer->onEnterExecuteModel(
+ instance_key, _model_load_id, _init_mem_limit, _inferences);
+ }
+
NSError* error = nil;
PTMCoreMLFeatureProvider* inputFeature = [[PTMCoreMLFeatureProvider alloc]
initWithFeatureSpecs:inputs
@@ -189,8 +247,25 @@
error.localizedDescription.UTF8String);
}
+ ++_inferences;
+ if (observer) {
+ // Check if this inference session is being logged.
+ // If so, only log every N inferences
+ bool should_log = _model_load_id < _sample_thresh && _inferences > 1;
+ if (should_log) {
+ should_log = _inferences % _sample_every == 0;
+ }
+ observer->onExitExecuteModel(
+ instance_key, _inferences, true, should_log);
+ }
+
return outputFeature;
} else {
+ // Always log on failure
+ if (observer) {
+ observer->onExitExecuteModel(instance_key, _inferences, true, true);
+ }
+
TORCH_CHECK(false, "Core ML is not available on your device");
return nil;
}
diff --git a/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h
new file mode 100644
index 0000000..57d1152
--- /dev/null
+++ b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h
@@ -0,0 +1,47 @@
+#include <memory>
+
+class PTMCoreMLObserver {
+ public:
+ virtual ~PTMCoreMLObserver() = default;
+
+ virtual size_t getRemainingMemory() {
+ return 0;
+ }
+
+ virtual void onEnterCompileModel(const int32_t, const int32_t) {}
+ virtual void onExitCompileModel(const int32_t, bool, bool) {}
+
+ virtual void onEnterExecuteModel(
+ const int32_t,
+ const int32_t,
+ const size_t,
+ const int32_t) {}
+ virtual void onExitExecuteModel(const int32_t, const int32_t, bool, bool) {}
+};
+
+class PTMCoreMLObserverConfig {
+ public:
+ PTMCoreMLObserverConfig();
+
+ // Do not allow copying/moving.
+ // There should be only one global instance of this class.
+ PTMCoreMLObserverConfig(const PTMCoreMLObserverConfig&) = delete;
+ PTMCoreMLObserverConfig& operator=(const PTMCoreMLObserverConfig&) = delete;
+
+ PTMCoreMLObserverConfig(PTMCoreMLObserverConfig&&) = delete;
+ PTMCoreMLObserverConfig& operator=(PTMCoreMLObserverConfig&&) = delete;
+
+ private:
+ std::unique_ptr<PTMCoreMLObserver> observer_;
+
+ public:
+ void setCoreMLObserver(std::unique_ptr<PTMCoreMLObserver> observer) {
+ observer_ = std::move(observer);
+ }
+
+ PTMCoreMLObserver* getCoreMLObserver() {
+ return observer_.get();
+ }
+};
+
+PTMCoreMLObserverConfig& coreMLObserverConfig();
diff --git a/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.mm b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.mm
new file mode 100644
index 0000000..372fc53
--- /dev/null
+++ b/torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.mm
@@ -0,0 +1,8 @@
+#import <torch/csrc/jit/backends/coreml/observer/PTMCoreMLObserver.h>
+
+PTMCoreMLObserverConfig::PTMCoreMLObserverConfig() : observer_{nullptr} {}
+
+PTMCoreMLObserverConfig& coreMLObserverConfig() {
+ static PTMCoreMLObserverConfig global_instance;
+ return global_instance;
+}