[easy] ThroughputBenchmark: make ScriptModuleBenchmark usable from c++ (#35848)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/35848

This class so far was used from Python binding only. As a result, testing in c++ only environment is not currently possible. More specifically, adding inputs requires using
py::args and py::kwargs. This PR fixes this by adding another addInput function to ScriptModuleBenchmark class.

Test Plan: Imported from OSS

Differential Revision: D20820772

Pulled By: ilia-cher

fbshipit-source-id: f1ea1b7baa637b297cc0dec5ca6375f6caff21f5
diff --git a/torch/csrc/utils/throughput_benchmark.cpp b/torch/csrc/utils/throughput_benchmark.cpp
index 45769bc..f83b82b 100644
--- a/torch/csrc/utils/throughput_benchmark.cpp
+++ b/torch/csrc/utils/throughput_benchmark.cpp
@@ -6,6 +6,11 @@
 namespace torch {
 namespace throughput_benchmark {
 
+std::ostream& operator<<(std::ostream& os, const BenchmarkExecutionStats& value) {
+    return os << "Average latency / iter (ms): " << value.latency_avg_ms
+              << "\n Total number of iters: " << value.num_iters;
+}
+
 void ThroughputBenchmark::addInput(py::args args, py::kwargs kwargs) {
   CHECK(script_module_.initialized() ^ module_.initialized());
   if (script_module_.initialized()) {
@@ -105,6 +110,12 @@
 }
 
 template <>
+void ScriptModuleBenchmark::addInput(ScriptModuleInput&& input) {
+  input.insert(input.begin(), model_._ivalue());
+  inputs_.emplace_back(std::move(input));
+}
+
+template <>
 void ModuleBenchmark::addInput(py::args&& args, py::kwargs&& kwargs) {
   inputs_.emplace_back(std::move(args), std::move(kwargs));
 }
diff --git a/torch/csrc/utils/throughput_benchmark.h b/torch/csrc/utils/throughput_benchmark.h
index 3e1952f..676f29c 100644
--- a/torch/csrc/utils/throughput_benchmark.h
+++ b/torch/csrc/utils/throughput_benchmark.h
@@ -6,8 +6,9 @@
 
 #include <torch/csrc/jit/python/pybind_utils.h>
 
-#include <vector>
+#include <iostream>
 #include <memory>
+#include <vector>
 
 namespace py = pybind11;
 
@@ -23,6 +24,8 @@
   int64_t num_iters{-1};
 };
 
+std::ostream& operator<<(std::ostream& os, const BenchmarkExecutionStats& value);
+
 /**
  * Use this struct in order to configure a throughput benchmark run.
  * This struct should include parameters related to threading, batching, number
@@ -72,6 +75,7 @@
   // Aggregate input in the format Model expects in order to avoid further
   // conversions at the benchmark time
   void addInput(py::args&&, py::kwargs&&);
+  void addInput(Input&&);
   BenchmarkExecutionStats benchmark(const BenchmarkConfig& config) const;
 
   bool initialized() const { return initialized_; }
@@ -135,6 +139,9 @@
 
 template <>
 void ScriptModuleBenchmark::addInput(py::args&& args, py::kwargs&& kwargs);
+template <>
+void ScriptModuleBenchmark::addInput(ScriptModuleInput&& input);
+
 
 template <>
 void ModuleBenchmark::addInput(py::args&& args, py::kwargs&& kwargs);