Metal gpu throughput benchmarking app.
PiperOrigin-RevId: 409903272
Change-Id: I7ae7cfd936915a93f836563581b7439b1b197af2
diff --git a/tensorflow/lite/delegates/gpu/metal/benchmarking/BUILD b/tensorflow/lite/delegates/gpu/metal/benchmarking/BUILD
new file mode 100644
index 0000000..708fe0e
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/metal/benchmarking/BUILD
@@ -0,0 +1,67 @@
+load("@build_bazel_rules_apple//apple:ios.bzl", "ios_application")
+load("@build_bazel_rules_apple//apple:macos.bzl", "macos_application")
+
+package(
+ default_visibility = ["//visibility:public"],
+ licenses = ["notice"],
+)
+
+objc_library(
+ name = "benchmark_lib",
+ srcs = ["main.mm"],
+ data = glob([
+ "models/*.tflite",
+ ]),
+ sdk_frameworks = [
+ "Metal",
+ "QuartzCore",
+ ],
+ deps = [
+ "//tensorflow/lite:framework",
+ "//tensorflow/lite:kernel_api",
+ "//tensorflow/lite/c:common",
+ "//tensorflow/lite/delegates/gpu/common:gpu_info",
+ "//tensorflow/lite/delegates/gpu/common:model",
+ "//tensorflow/lite/delegates/gpu/common:model_builder",
+ "//tensorflow/lite/delegates/gpu/common:model_transformer",
+ "//tensorflow/lite/delegates/gpu/common:precision",
+ "//tensorflow/lite/delegates/gpu/common:shape",
+ "//tensorflow/lite/delegates/gpu/common:util",
+ "//tensorflow/lite/delegates/gpu/common/transformations:model_transformations",
+ "//tensorflow/lite/delegates/gpu/metal:inference_context",
+ "//tensorflow/lite/kernels:builtin_ops",
+ "//tensorflow/lite/kernels:kernel_util",
+ ],
+)
+
+# Applications for local benchmarking in XCode
+ios_application(
+ name = "iOSBenchmark",
+ testonly = 1,
+ bundle_id = "com.tensorflow.lite.delegates.gpu.metal.benchmarking",
+ families = [
+ "iphone",
+ "ipad",
+ ],
+ infoplists = ["Info.plist"],
+ minimum_os_version = "12.0",
+ provisioning_profile = "//tensorflow/lite/delegates/gpu/metal/benchmarking:provisioning_profile.mobileprovision",
+ tags = [
+ "local",
+ "notap",
+ ],
+ deps = [":benchmark_lib"],
+)
+
+macos_application(
+ name = "MacOSBenchmark",
+ testonly = 1,
+ bundle_id = "com.tensorflow.lite.delegates.gpu.metal.benchmarking",
+ infoplists = ["Info.plist"],
+ minimum_os_version = "10.13",
+ tags = [
+ "local",
+ "notap",
+ ],
+ deps = [":benchmark_lib"],
+)
diff --git a/tensorflow/lite/delegates/gpu/metal/benchmarking/Benchmark.tulsiproj/Configs/Benchmark.tulsigen b/tensorflow/lite/delegates/gpu/metal/benchmarking/Benchmark.tulsiproj/Configs/Benchmark.tulsigen
new file mode 100644
index 0000000..3fe6142
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/metal/benchmarking/Benchmark.tulsiproj/Configs/Benchmark.tulsigen
@@ -0,0 +1,17 @@
+{
+ "sourceFilters" : [
+ "tensorflow/lite/delegates/gpu/...",
+ "tensorflow/lite/delegates/gpu/metal/benchmarking/models",
+ ],
+ "buildTargets" : [
+ "//tensorflow/lite/delegates/gpu/metal/benchmarking:MacOSBenchmark",
+ "//tensorflow/lite/delegates/gpu/metal/benchmarking:iOSBenchmark",
+ ],
+ "projectName" : "Benchmark",
+ "additionalFilePaths" : [
+ "tensorflow/lite/delegates/gpu/BUILD",
+ "tensorflow/lite/delegates/gpu/metal/BUILD",
+ "tensorflow/lite/delegates/gpu/metal/kernels/BUILD",
+ "tensorflow/lite/delegates/gpu/metal/benchmarking/BUILD",
+ ]
+}
diff --git a/tensorflow/lite/delegates/gpu/metal/benchmarking/Benchmark.tulsiproj/project.tulsiconf b/tensorflow/lite/delegates/gpu/metal/benchmarking/Benchmark.tulsiproj/project.tulsiconf
new file mode 100644
index 0000000..ed7355f
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/metal/benchmarking/Benchmark.tulsiproj/project.tulsiconf
@@ -0,0 +1,7 @@
+{
+ "packages" : [
+ "tensorflow/lite/delegates/gpu/metal"
+ ],
+ "projectName" : "Benchmark",
+ "workspaceRoot" : "../../../../../../.."
+}
diff --git a/tensorflow/lite/delegates/gpu/metal/benchmarking/Info.plist b/tensorflow/lite/delegates/gpu/metal/benchmarking/Info.plist
new file mode 100644
index 0000000..e20e81a
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/metal/benchmarking/Info.plist
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>CFBundleDevelopmentRegion</key>
+ <string>en</string>
+ <key>CFBundleExecutable</key>
+ <string>$(EXECUTABLE_NAME)</string>
+ <key>CFBundleIdentifier</key>
+ <string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+ <key>CFBundleInfoDictionaryVersion</key>
+ <string>6.0</string>
+ <key>CFBundleName</key>
+ <string>$(PRODUCT_NAME)</string>
+ <key>CFBundlePackageType</key>
+ <string>APPL</string>
+ <key>CFBundleShortVersionString</key>
+ <string>1.0</string>
+ <key>CFBundleVersion</key>
+ <string>1</string>
+ <key>LSRequiresIPhoneOS</key>
+ <true/>
+ <key>UIRequiredDeviceCapabilities</key>
+ <array>
+ <string>armv7</string>
+ </array>
+ <key>UISupportedInterfaceOrientations</key>
+ <array>
+ <string>UIInterfaceOrientationPortrait</string>
+ <string>UIInterfaceOrientationLandscapeLeft</string>
+ <string>UIInterfaceOrientationLandscapeRight</string>
+ </array>
+ <key>UISupportedInterfaceOrientations~ipad</key>
+ <array>
+ <string>UIInterfaceOrientationPortrait</string>
+ <string>UIInterfaceOrientationPortraitUpsideDown</string>
+ <string>UIInterfaceOrientationLandscapeLeft</string>
+ <string>UIInterfaceOrientationLandscapeRight</string>
+ </array>
+ <key>UIStatusBarStyle</key>
+ <string>UIStatusBarStyleLightContent</string>
+ <key>UIViewControllerBasedStatusBarAppearance</key>
+ <false/>
+</dict>
+</plist>
\ No newline at end of file
diff --git a/tensorflow/lite/delegates/gpu/metal/benchmarking/main.mm b/tensorflow/lite/delegates/gpu/metal/benchmarking/main.mm
new file mode 100644
index 0000000..02be26b
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/metal/benchmarking/main.mm
@@ -0,0 +1,207 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the Licensgoe is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#import <Metal/Metal.h>
+
+#include <iostream>
+#include <string>
+
+#include "tensorflow/lite/builtin_ops.h"
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
+#include "tensorflow/lite/delegates/gpu/common/model.h"
+#include "tensorflow/lite/delegates/gpu/common/model_builder.h"
+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h"
+#include "tensorflow/lite/delegates/gpu/common/precision.h"
+#include "tensorflow/lite/delegates/gpu/common/shape.h"
+#include "tensorflow/lite/delegates/gpu/common/transformations/model_transformations.h"
+#include "tensorflow/lite/delegates/gpu/common/util.h"
+#include "tensorflow/lite/delegates/gpu/metal/inference_context.h"
+#include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/interpreter_builder.h"
+#include "tensorflow/lite/kernels/kernel_util.h"
+#include "tensorflow/lite/kernels/register.h"
+#include "tensorflow/lite/model_builder.h"
+
+namespace tflite {
+namespace gpu {
+namespace metal {
+namespace {
+
+absl::Status GPUBenchmark(GraphFloat32* graph, int num_tests, int iterations,
+ bool use_fp16 = true) {
+ id<MTLDevice> device = MTLCreateSystemDefaultDevice();
+ std::string device_name = std::string([[device name] UTF8String]);
+ GpuInfo gpu_info;
+ GetGpuInfoFromDeviceDescription(device_name, GpuApi::kMetal, &gpu_info);
+ CalculationsPrecision precision;
+ if (use_fp16) {
+ if (gpu_info.IsRoundToNearestSupported()) {
+ precision = CalculationsPrecision::F16;
+ } else {
+ precision = CalculationsPrecision::F32_F16;
+ }
+ } else {
+ precision = CalculationsPrecision::F32;
+ }
+
+ InferenceContext::CreateInferenceInfo create_info;
+ create_info.precision = precision;
+ create_info.storage_type = TensorStorageType::BUFFER;
+ create_info.hints.Add(ModelHints::kAllowSpecialKernels);
+ InferenceContext inference_context;
+ RETURN_IF_ERROR(inference_context.InitFromGraphWithTransforms(create_info, graph, device));
+
+ id<MTLCommandQueue> command_queue = [device newCommandQueue];
+ bool kPerOpProfiling = false;
+ if (kPerOpProfiling) {
+ ProfilingInfo profiling_info;
+ inference_context.Profile(device, &profiling_info);
+ std::cout << profiling_info.GetDetailedReport() << std::endl;
+ }
+ const std::string precision_str = use_fp16 ? "FP16" : "FP32";
+ std::cout << "Measuring started: (" << num_tests << " tests, " << iterations
+ << " iterations every test, " << precision_str << " precision)" << std::endl;
+ for (int j = 0; j < num_tests; ++j) {
+ auto start = std::chrono::high_resolution_clock::now();
+ for (int i = 0; i < iterations; ++i) {
+ @autoreleasepool {
+ id<MTLCommandBuffer> command_buffer = [command_queue commandBuffer];
+ id<MTLComputeCommandEncoder> encoder =
+ [command_buffer computeCommandEncoder];
+ inference_context.EncodeWithEncoder(encoder);
+ [encoder endEncoding];
+ [command_buffer commit];
+ if (i == iterations - 1) {
+ [command_buffer waitUntilCompleted];
+ }
+ }
+ }
+ auto end = std::chrono::high_resolution_clock::now();
+ double t0 = double(std::chrono::duration_cast<std::chrono::milliseconds>(
+ end - start)
+ .count()) /
+ iterations;
+ std::cout << " Test: #" << j << " - " << t0 << "ms" << std::endl;
+ }
+ return absl::OkStatus();
+}
+
+class DelegateContext {
+ public:
+ bool Init(TfLiteContext* context,
+ const TfLiteDelegateParams* delegate_params) {
+ auto denormalized_graph =
+ reinterpret_cast<GraphFloat32*>(delegate_params->delegate->data_);
+ absl::Status status =
+ BuildModel(context, delegate_params, denormalized_graph);
+ if (!status.ok()) {
+ TF_LITE_KERNEL_LOG(context, std::string(status.message()).c_str());
+ }
+ return status.ok();
+ }
+};
+
+TfLiteStatus DelegatePrepare(TfLiteContext* context, TfLiteDelegate* delegate) {
+ const TfLiteRegistration kRegistration = {
+ .init = [](TfLiteContext* context, const char* buffer, size_t) -> void* {
+ auto* delegate_context = new DelegateContext();
+ if (!delegate_context->Init(
+ context,
+ reinterpret_cast<const TfLiteDelegateParams*>(buffer))) {
+ delete delegate_context;
+ return nullptr;
+ }
+ return delegate_context;
+ },
+ .free = [](TfLiteContext* context, void* buffer) -> void {
+ delete reinterpret_cast<DelegateContext*>(buffer);
+ },
+ .prepare = [](TfLiteContext* context, TfLiteNode* node) -> TfLiteStatus {
+ return node->user_data ? kTfLiteOk : kTfLiteError;
+ },
+ .invoke = nullptr,
+ };
+
+ TfLiteIntArray* ops_to_replace = GetOpsToReplace(context);
+ const auto status = context->ReplaceNodeSubsetsWithDelegateKernels(
+ context, kRegistration, ops_to_replace, delegate);
+ TfLiteIntArrayFree(ops_to_replace);
+ return status;
+}
+
+absl::Status FlatBufferToGPUGraph(
+ const std::unique_ptr<tflite::FlatBufferModel>& flatbuffer,
+ GraphFloat32* graph) {
+ ops::builtin::BuiltinOpResolver op_resolver;
+ std::unique_ptr<tflite::Interpreter> interpreter;
+ tflite::InterpreterBuilder interpreter_builder(*flatbuffer, op_resolver);
+ if (interpreter_builder(&interpreter) != kTfLiteOk || !interpreter) {
+ return absl::InternalError("Unable to prepare TfLite interpreter.");
+ }
+ TfLiteDelegate delegate;
+ delegate.data_ = graph;
+ delegate.flags = kTfLiteDelegateFlagsNone;
+ delegate.Prepare = DelegatePrepare;
+ delegate.CopyFromBufferHandle = nullptr;
+ delegate.CopyToBufferHandle = nullptr;
+ delegate.FreeBufferHandle = nullptr;
+
+ if (interpreter->ModifyGraphWithDelegate(&delegate) != kTfLiteOk) {
+ return absl::InternalError("Conversion from TfLite model failed.");
+ }
+
+ ModelTransformer transformer(graph);
+ if (!ApplyModelTransformations(&transformer)) {
+ return absl::InternalError("Graph transformations failed");
+ }
+
+ return absl::OkStatus();
+}
+
+} // namespace
+} // namespace metal
+} // namespace gpu
+} // namespace tflite
+
+int main(int argc, char** argv) {
+ @autoreleasepool {
+ NSBundle *main = [NSBundle mainBundle];
+ NSArray<NSString*>* model_paths = [main pathsForResourcesOfType:@"tflite" inDirectory:nil];
+ for (id model_path in model_paths) {
+ NSString *model_name = [[model_path lastPathComponent] stringByDeletingPathExtension];
+ std::string m_name = std::string([model_name UTF8String]);
+ std::string path = std::string([model_path UTF8String]);
+ std::cout << m_name << std::endl;
+ auto flatbuffer = tflite::FlatBufferModel::BuildFromFile(path.c_str());
+ if (!flatbuffer) {
+ std::cout << "Failed flatbuffer reading." << std::endl;
+ }
+
+ tflite::gpu::GraphFloat32 graph;
+ auto s = tflite::gpu::metal::FlatBufferToGPUGraph(flatbuffer, &graph);
+ if (!s.ok()) {
+ std::cout << "Failed flatbuffer to graph conversion. " << s.message() << std::endl;
+ }
+
+ s = tflite::gpu::metal::GPUBenchmark(&graph, 5, 200, true);
+ if (!s.ok()) {
+ std::cout << "Error in GPUBenchmark. " << s.message() << std::endl;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/tensorflow/lite/delegates/gpu/metal/benchmarking/models/README.md b/tensorflow/lite/delegates/gpu/metal/benchmarking/models/README.md
new file mode 100644
index 0000000..1ca1483
--- /dev/null
+++ b/tensorflow/lite/delegates/gpu/metal/benchmarking/models/README.md
@@ -0,0 +1 @@
+.tflite models must be added to this folder before xcodeproj generation