[easy] ThroughputBenchmark: make ScriptModuleBenchmark usable from c++ (#35848)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/35848
This class so far was used from Python binding only. As a result, testing in c++ only environment is not currently possible. More specifically, adding inputs requires using
py::args and py::kwargs. This PR fixes this by adding another addInput function to ScriptModuleBenchmark class.
Test Plan: Imported from OSS
Differential Revision: D20820772
Pulled By: ilia-cher
fbshipit-source-id: f1ea1b7baa637b297cc0dec5ca6375f6caff21f5
diff --git a/torch/csrc/utils/throughput_benchmark.cpp b/torch/csrc/utils/throughput_benchmark.cpp
index 45769bc..f83b82b 100644
--- a/torch/csrc/utils/throughput_benchmark.cpp
+++ b/torch/csrc/utils/throughput_benchmark.cpp
@@ -6,6 +6,11 @@
namespace torch {
namespace throughput_benchmark {
+std::ostream& operator<<(std::ostream& os, const BenchmarkExecutionStats& value) {
+ return os << "Average latency / iter (ms): " << value.latency_avg_ms
+ << "\n Total number of iters: " << value.num_iters;
+}
+
void ThroughputBenchmark::addInput(py::args args, py::kwargs kwargs) {
CHECK(script_module_.initialized() ^ module_.initialized());
if (script_module_.initialized()) {
@@ -105,6 +110,12 @@
}
template <>
+void ScriptModuleBenchmark::addInput(ScriptModuleInput&& input) {
+ input.insert(input.begin(), model_._ivalue());
+ inputs_.emplace_back(std::move(input));
+}
+
+template <>
void ModuleBenchmark::addInput(py::args&& args, py::kwargs&& kwargs) {
inputs_.emplace_back(std::move(args), std::move(kwargs));
}
diff --git a/torch/csrc/utils/throughput_benchmark.h b/torch/csrc/utils/throughput_benchmark.h
index 3e1952f..676f29c 100644
--- a/torch/csrc/utils/throughput_benchmark.h
+++ b/torch/csrc/utils/throughput_benchmark.h
@@ -6,8 +6,9 @@
#include <torch/csrc/jit/python/pybind_utils.h>
-#include <vector>
+#include <iostream>
#include <memory>
+#include <vector>
namespace py = pybind11;
@@ -23,6 +24,8 @@
int64_t num_iters{-1};
};
+std::ostream& operator<<(std::ostream& os, const BenchmarkExecutionStats& value);
+
/**
* Use this struct in order to configure a throughput benchmark run.
* This struct should include parameters related to threading, batching, number
@@ -72,6 +75,7 @@
// Aggregate input in the format Model expects in order to avoid further
// conversions at the benchmark time
void addInput(py::args&&, py::kwargs&&);
+ void addInput(Input&&);
BenchmarkExecutionStats benchmark(const BenchmarkConfig& config) const;
bool initialized() const { return initialized_; }
@@ -135,6 +139,9 @@
template <>
void ScriptModuleBenchmark::addInput(py::args&& args, py::kwargs&& kwargs);
+template <>
+void ScriptModuleBenchmark::addInput(ScriptModuleInput&& input);
+
template <>
void ModuleBenchmark::addInput(py::args&& args, py::kwargs&& kwargs);