Back out "Revert D30599136: [Pytorch Edge][tracing-based] build tracer in OSS" (#66267)
Summary:
Previously https://github.com/pytorch/pytorch/pull/64087 broke the test `binary_macos_wheel_3_7_cpu_build`, because wheel build is not happy with `model_tracer`. Considering it's prototype and there is no need to ship model_tracer via wheel at the moment, using the option `TRACING_BASED` for building tracer. When tracing-based is mature enough, we can ship the tracer binary via wheel eventually.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/66267
Original commit changeset: 8ac3d75a52d0
ghstack-source-id: 140122106
Test Plan:
binary_macos_wheel_3_7_cpu_build passes
{F668643831}
Reviewed By: dhruvbird
Differential Revision: D31478593
fbshipit-source-id: 726cab1b31c4596f6268b7824eecb20e2e59d161
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 04b32ba..a0f1d0e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -427,6 +427,10 @@
"Path to the yaml file that contains the op dependency graph for custom build.")
set(STATIC_DISPATCH_BACKEND "" CACHE STRING
"Name of the backend for which static dispatch code is generated, e.g.: CPU.")
+option(
+ TRACING_BASED
+ "Master flag to build Lite Interpreter with tracing build option"
+ OFF)
# This is a fix for a rare build issue on Ubuntu:
# symbol lookup error: miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol: mkl_blas_dsyrk
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 4b3330e..e37e333 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1009,6 +1009,14 @@
add_dependencies(torch_cpu Caffe2_PROTO)
endif()
+# Build model tracer for tracing-based selective build
+if(TRACING_BASED AND NOT BUILD_LITE_INTERPRETER AND NOT INTERN_BUILD_MOBILE)
+ add_subdirectory(
+ ${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer
+ ${CMAKE_BINARY_DIR}/model_tracer
+ )
+endif()
+
# Codegen selected_mobile_ops.h for template selective build
if(BUILD_LITE_INTERPRETER AND SELECTED_OP_LIST)
add_custom_command(
diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
index e25c3a7..fe4ab30 100644
--- a/tools/build_variables.bzl
+++ b/tools/build_variables.bzl
@@ -444,6 +444,14 @@
libtorch_core_jit_sources = sorted(jit_sources_full)
+torch_mobile_tracer_sources = [
+ "torch/csrc/jit/mobile/model_tracer/tracer.cpp",
+ "torch/csrc/jit/mobile/model_tracer/TensorUtils.cpp",
+ "torch/csrc/jit/mobile/model_tracer/MobileModelRunner.cpp",
+ "torch/csrc/jit/mobile/model_tracer/OperatorCallTracer.cpp",
+ "torch/csrc/jit/mobile/model_tracer/KernelDTypeTracer.cpp",
+]
+
torch_mobile_core = [
# backend_debug_info.cpp provides
# __torch__.torch.classes.backend.BackendDebugInfo class
diff --git a/torch/csrc/jit/mobile/model_tracer/CMakeLists.txt b/torch/csrc/jit/mobile/model_tracer/CMakeLists.txt
new file mode 100644
index 0000000..678f3f4
--- /dev/null
+++ b/torch/csrc/jit/mobile/model_tracer/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(
+ MODEL_TRACER_DIR
+ "${TORCH_ROOT}/torch/csrc/jit/mobile/model_tracer")
+
+list(APPEND MODEL_TRACER_SOURCES "")
+
+append_filelist("torch_mobile_tracer_sources" MODEL_TRACER_SOURCES)
+
+add_executable(
+ model_tracer
+ ${MODEL_TRACER_SOURCES})
+
+target_link_libraries(model_tracer PRIVATE torch)
+
+install(TARGETS model_tracer DESTINATION bin)
diff --git a/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.cpp b/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.cpp
new file mode 100644
index 0000000..7e2930a
--- /dev/null
+++ b/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.cpp
@@ -0,0 +1,248 @@
+#include <torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h>
+#include <torch/csrc/jit/mobile/model_tracer/TensorUtils.h>
+
+namespace torch {
+namespace jit {
+namespace mobile {
+
+std::vector<std::vector<at::IValue>> MobileModelRunner::
+ ivalue_to_bundled_inputs(const c10::IValue& bundled_inputs) {
+ CAFFE_ENFORCE(
+ bundled_inputs.isList(),
+ "Expected get_all_bundled_inputs to ",
+ "return a list but got a ",
+ bundled_inputs.tagKind(),
+ " instead");
+
+ c10::List<at::IValue> all_inputs = bundled_inputs.toList();
+ CAFFE_ENFORCE(
+ !all_inputs.empty(),
+ "Expected at least 1 bundled input, ",
+ "but found none. Please use ",
+ "torch.utils.bundled_inputs.augment_model_with_bundled_inputs to add.");
+
+ std::vector<std::vector<at::IValue>> ret;
+ for (at::IValue input : all_inputs) {
+ CAFFE_ENFORCE(
+ input.isTuple(),
+ "Expected list element to be a tuple ",
+ "but got a ",
+ input.tagKind(),
+ " instead");
+ ret.push_back(input.toTuple()->elements());
+ }
+
+ return ret;
+}
+
+std::unordered_map<std::string, std::string> MobileModelRunner::
+ ivalue_to_bundled_inputs_map(const c10::IValue& bundled_inputs) {
+ CAFFE_ENFORCE(
+ bundled_inputs.isGenericDict(),
+ "Expected get_bundled_inputs_functions_and_info to ",
+ "return a dict but got a ",
+ bundled_inputs.tagKind(),
+ " instead");
+
+ c10::Dict<at::IValue, at::IValue> all_inputs = bundled_inputs.toGenericDict();
+ CAFFE_ENFORCE(
+ !all_inputs.empty(),
+ "Expected at least 1 function with bundled inputs, ",
+ "but found none. Please use ",
+ "torch.utils.bundled_inputs.augment_model_with_bundled_inputs to add.");
+
+ std::unordered_map<std::string, std::string> ret;
+ for (auto& input : all_inputs) {
+ at::IValue function_name = input.key();
+ at::IValue nested_dict = input.value();
+ CAFFE_ENFORCE(
+ function_name.isString(),
+ "Expected function with inputs to be a string ",
+ "but got a ",
+ function_name.tagKind(),
+ " instead");
+ CAFFE_ENFORCE(
+ nested_dict.isGenericDict(),
+ "Expected function name to map to dictionary ",
+ "but got a ",
+ nested_dict.tagKind(),
+ " instead");
+
+ // Got the nested dict now need to convert that into std types
+ c10::Dict<at::IValue, at::IValue> function_and_info_ival_dict =
+ nested_dict.toGenericDict();
+ std::unordered_map<std::string, std::vector<std::string>>
+ function_and_info_dict;
+ for (auto& entry : function_and_info_ival_dict) {
+ at::IValue key = entry.key();
+ at::IValue value = entry.value();
+ CAFFE_ENFORCE(
+ key.isString(),
+ "Expected extra information key to be a string ",
+ "but got a ",
+ value.tagKind(),
+ " instead");
+ CAFFE_ENFORCE(
+ value.isList(),
+ "Expected extra information values to be a list ",
+ "but got a ",
+ value.tagKind(),
+ " instead");
+
+ // Got the value of the nested dict entry now need to convert it to std
+ // types
+ std::vector<std::string> data_list;
+ c10::List<at::IValue> ival_data = value.toList();
+ for (at::IValue data : ival_data) {
+ CAFFE_ENFORCE(
+ data.isString(),
+ "Expected list element of nested dict entries to be a string ",
+ "but got a ",
+ data.tagKind(),
+ " instead");
+ data_list.push_back(data.toStringRef());
+ }
+
+ // Add entry into std type mapping
+ function_and_info_dict[key.toStringRef()] = data_list;
+ }
+
+ // Could store the full mapping of std types, but the 'info' section isnt
+ // needed here
+ std::string input_function =
+ function_and_info_dict["get_inputs_function_name"][0];
+ ret[function_name.toStringRef()] = input_function;
+ }
+
+ return ret;
+}
+
+std::vector<std::vector<at::IValue>> MobileModelRunner::
+ get_all_bundled_inputs() {
+ auto has_bundled_input = module_->find_method("get_all_bundled_inputs");
+ CAFFE_ENFORCE(
+ has_bundled_input,
+ "Model does not have bundled inputs. ",
+ "Use torch.utils.bundled_inputs.augment_model_with_bundled_inputs to add.");
+
+ c10::IValue bundled_inputs = module_->run_method("get_all_bundled_inputs");
+ return ivalue_to_bundled_inputs(bundled_inputs);
+}
+
+std::unordered_map<std::string, std::vector<std::vector<at::IValue>>>
+MobileModelRunner::get_many_functions_bundled_inputs() {
+ auto has_bundled_input =
+ module_->find_method("get_bundled_inputs_functions_and_info");
+ CAFFE_ENFORCE(
+ has_bundled_input,
+ "Model does not have bundled inputs. ",
+ "Use torch.utils.bundled_inputs.augment_many_model_functions_with_bundled_inputs to add.");
+
+ auto ival_bundled_inputs_mapping =
+ module_->run_method("get_bundled_inputs_functions_and_info");
+ auto bundled_inputs_mapping =
+ ivalue_to_bundled_inputs_map(ival_bundled_inputs_mapping);
+
+ std::unordered_map<std::string, std::vector<std::vector<at::IValue>>> ret;
+
+ for (auto& entry : bundled_inputs_mapping) {
+ std::string function_name = entry.first;
+ std::string function_to_call = entry.second;
+
+ auto has_func_to_call = module_->find_method(function_to_call);
+ CAFFE_ENFORCE(
+ has_func_to_call,
+ "Model does not have ",
+ function_to_call,
+ "Use torch.utils.bundled_inputs.augment_many_model_functions_with_bundled_inputs to add.");
+
+ c10::IValue bundled_inputs = module_->run_method(function_to_call);
+ ret[function_name] = ivalue_to_bundled_inputs(bundled_inputs);
+ }
+ return ret;
+}
+
+std::vector<at::IValue> MobileModelRunner::run_with_inputs(
+ std::vector<std::vector<at::IValue>> const& bundled_inputs) {
+ std::vector<at::IValue> ret;
+ ret.reserve(bundled_inputs.size());
+ for (std::vector<at::IValue> const& input : bundled_inputs) {
+ ret.emplace_back(module_->forward(input));
+ }
+ return ret;
+}
+
+std::vector<at::IValue> MobileModelRunner::run_with_inputs(
+ const std::string& function_name,
+ std::vector<std::vector<at::IValue>> const& bundled_inputs) const {
+ std::vector<at::IValue> ret;
+ ret.reserve(bundled_inputs.size());
+ auto has_bundled_input = module_->find_method(function_name);
+ CAFFE_ENFORCE(
+ has_bundled_input,
+ "Model does not have the method named ",
+ function_name,
+ "Please ensure that it was exported correctly");
+ for (std::vector<at::IValue> const& input : bundled_inputs) {
+ auto func = module_->get_method(function_name);
+ ret.emplace_back(func(input));
+ }
+ return ret;
+}
+
+void MobileModelRunner::run_argless_functions(
+ const std::vector<std::string>& functions) {
+ for (auto& function_name : functions) {
+ if (module_->find_method(function_name)) {
+ module_->run_method(function_name);
+ }
+ }
+}
+
+std::string MobileModelRunner::get_extra_file_contents(
+ std::string const& file_path,
+ std::string const& extra_file_name) {
+ std::unordered_map<std::string, std::string> extra;
+ extra[extra_file_name] = "";
+ torch::jit::_load_extra_only_for_mobile(file_path, c10::nullopt, extra);
+ return extra[extra_file_name];
+}
+
+bool MobileModelRunner::set_has_metal_gpu_operators(
+ std::set<std::string> const& op_list) {
+ for (std::string const& op : op_list) {
+ if (op.find("metal::") == 0) {
+ return true;
+ }
+ if (op.find("metal_prepack_unet::") == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void MobileModelRunner::for_each_tensor_in_bundled_inputs(
+ std::function<void(const ::at::Tensor&)> const& func) {
+ if (has_new_style_bundled_inputs()) {
+ // Get the bundled inputs and access the arg level ivalues stored within
+ auto bundled_inputs_mapping = this->get_many_functions_bundled_inputs();
+
+ // Loop over functions
+ for (auto& entry : bundled_inputs_mapping) {
+ std::vector<std::vector<at::IValue>> bundled_inputs = entry.second;
+ // Loop through inputs
+ for (const std::vector<at::IValue>& input : bundled_inputs) {
+ // Loop through values in an input
+ for (const at::IValue& iv : input) {
+ for_each_tensor_in_ivalue(iv, func);
+ }
+ }
+ }
+ } else {
+ c10::IValue iv = module_->run_method("get_all_bundled_inputs");
+ for_each_tensor_in_ivalue(iv, func);
+ }
+}
+} // namespace mobile
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h b/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h
new file mode 100644
index 0000000..5f394be
--- /dev/null
+++ b/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h
@@ -0,0 +1,162 @@
+#pragma once
+
+#include <iostream>
+#include <sstream>
+
+#include <torch/csrc/autograd/grad_mode.h>
+#include <torch/csrc/jit/mobile/import.h>
+#include <torch/csrc/jit/mobile/module.h>
+#include <torch/csrc/jit/serialization/export.h>
+#include <torch/script.h>
+
+namespace torch {
+namespace jit {
+namespace mobile {
+
+class MobileModelRunner {
+ std::shared_ptr<torch::jit::mobile::Module> module_;
+
+ public:
+ explicit MobileModelRunner(std::string const& file_path) {
+ module_ = std::make_shared<torch::jit::mobile::Module>(
+ torch::jit::_load_for_mobile(file_path));
+ }
+
+ MobileModelRunner(
+ std::string const& file_path,
+ uint64_t module_load_options) {
+ std::unordered_map<std::string, std::string> extra_files;
+ module_ = std::make_shared<torch::jit::mobile::Module>(
+ torch::jit::_load_for_mobile(
+ file_path,
+ at::Device(at::DeviceType::CPU, 0),
+ extra_files,
+ module_load_options));
+ }
+
+ /**
+ * Fetches the contents of the file named "extra/" + extra_file_name from the
+ * .ptl archive at location file_path. The contents are returned as an
+ * std::string.
+ *
+ * An empty string is returned if the file at the location "extra/" +
+ * extra_file_name does not exist or is an empty file (within the .ptl
+ * archive).
+ *
+ * An exception is thrown if the .ptl file at location file_path does not
+ * exist.
+ *
+ */
+ static std::string get_extra_file_contents(
+ std::string const& file_path,
+ std::string const& extra_file_name);
+
+ /**
+ * Returns true if the list of operators passed in has a Metal GPU operator,
+ * and false otherwise.
+ *
+ */
+ static bool set_has_metal_gpu_operators(std::set<std::string> const& op_list);
+
+ /**
+ * Fetches the set of root operators in the file "extra/mobile_info.json"
+ * within the .ptl archive at location file_path.
+ *
+ * An exception is thrown if:
+ *
+ * 1. The file at file_path does not exist, or
+ * 2. The contents of extra/mobile_info.json is not a JSON, or
+ * 3. The file extra/mobile_info.json does not exist, or
+ * 4. The JSON is malformed in some way and the operator list can not be
+ * extracted correctly.
+ *
+ */
+ static std::set<std::string> get_operators_from_mobile_info_json(
+ std::string const& file_path);
+
+ static std::vector<std::vector<at::IValue>> ivalue_to_bundled_inputs(
+ const c10::IValue& bundled_inputs);
+
+ static std::unordered_map<std::string, std::string>
+ ivalue_to_bundled_inputs_map(const c10::IValue& bundled_inputs);
+
+ /**
+ * Fetches all the bundled inputs of the loaded mobile model.
+ *
+ * A bundled input itself is of type std::vector<at::IValue> and the
+ * elements of this vector<> are the arguments that the "forward"
+ * method of the model accepts. i.e. each of the at::IValue is a
+ * single argument to the model's "forward" method.
+ *
+ * The outer vector holds a bundled input. For models with bundled
+ * inputs, the outer most vector will have size > 0.
+ */
+ std::vector<std::vector<at::IValue>> get_all_bundled_inputs();
+
+ /**
+ * Fetches all the bundled inputs for all functions of the loaded mobile
+ * model.
+ *
+ * The mapping is from 'function_names' eg 'forward' to bundled inputs for
+ * that function
+ *
+ * A bundled input itself is of type std::vector<at::IValue> and the
+ * elements of this vector<> are the arguments that the corresponding
+ * method of the model accepts. i.e. each of the at::IValue in the entry
+ * for forward is a single argument to the model's "forward" method.
+ *
+ * The outer vector of each value holds a bundled input. For models with
+ * bundled inputs, the outer most vector will have size > 0.
+ */
+ std::unordered_map<std::string, std::vector<std::vector<at::IValue>>>
+ get_many_functions_bundled_inputs();
+
+ /**
+ * Returns true if a model possesses get_bundled_inputs_functions_and_info()
+ */
+ bool has_new_style_bundled_inputs() const {
+ return module_->find_method("get_bundled_inputs_functions_and_info") !=
+ c10::nullopt;
+ }
+
+ /**
+ * For each tensor in bundled inputs, call the user-provided function 'func'.
+ */
+ void for_each_tensor_in_bundled_inputs(
+ std::function<void(const ::at::Tensor&)> const& func);
+
+ /**
+ * Get the root operators directly called by this model's Bytecode.
+ */
+ std::set<std::string> get_root_operators() {
+ return torch::jit::mobile::_export_operator_list(*module_);
+ }
+
+ /**
+ * Runs the model against all of the provided inputs using the model's
+ * "forward" method. Returns an std::vector<at::IValue>, where each element
+ * of the returned vector is one of the return values from calling forward().
+ */
+ std::vector<at::IValue> run_with_inputs(
+ std::vector<std::vector<at::IValue>> const& bundled_inputs);
+
+ /**
+ * Runs the model against all of the provided inputs for all the specified
+ * function. Returns an std::vector<at::IValue>, where each element
+ * of the returned vector is one of the return values from calling the
+ * method named "function_name" on this model.
+ */
+ std::vector<at::IValue> run_with_inputs(
+ const std::string& function_name,
+ std::vector<std::vector<at::IValue>> const& bundled_inputs) const;
+
+ /**
+ * Attempts to run all functions in the passed in list if they exist. All
+ * funcs should require no args
+ */
+ void run_argless_functions(const std::vector<std::string>& functions);
+};
+
+} // namespace mobile
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/mobile/model_tracer/TensorUtils.cpp b/torch/csrc/jit/mobile/model_tracer/TensorUtils.cpp
new file mode 100644
index 0000000..9a23814
--- /dev/null
+++ b/torch/csrc/jit/mobile/model_tracer/TensorUtils.cpp
@@ -0,0 +1,42 @@
+#include <c10/util/Exception.h>
+#include <torch/csrc/jit/mobile/model_tracer/TensorUtils.h>
+
+namespace torch {
+namespace jit {
+namespace mobile {
+void for_each_tensor_in_ivalue(
+ const c10::IValue& iv,
+ std::function<void(const ::at::Tensor&)> const& func) {
+ const bool is_leaf_type = iv.isString() || iv.isNone() || iv.isScalar() ||
+ iv.isDouble() || iv.isInt() || iv.isBool() || iv.isDevice() ||
+ iv.isIntList() || iv.isDoubleList() || iv.isBoolList();
+ if (is_leaf_type) {
+ // Do Nothing.
+ return;
+ }
+
+ if (iv.isTensor()) {
+ func(iv.toTensor());
+ } else if (iv.isTuple()) {
+ c10::intrusive_ptr<at::ivalue::Tuple> tup_ptr = iv.toTuple();
+ for (const auto& e : tup_ptr->elements()) {
+ for_each_tensor_in_ivalue(e, func);
+ }
+ } else if (iv.isList()) {
+ c10::List<c10::IValue> l = iv.toList();
+ for (auto&& i : l) {
+ c10::IValue item = i;
+ for_each_tensor_in_ivalue(item, func);
+ }
+ } else if (iv.isGenericDict()) {
+ c10::Dict<c10::IValue, c10::IValue> dict = iv.toGenericDict();
+ for (auto& it : dict) {
+ for_each_tensor_in_ivalue(it.value(), func);
+ }
+ } else {
+ AT_ERROR("Unhandled type of IValue. Got ", iv.tagKind());
+ }
+}
+} // namespace mobile
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/mobile/model_tracer/TensorUtils.h b/torch/csrc/jit/mobile/model_tracer/TensorUtils.h
new file mode 100644
index 0000000..6837a7b
--- /dev/null
+++ b/torch/csrc/jit/mobile/model_tracer/TensorUtils.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <ATen/core/ivalue.h>
+
+namespace torch {
+namespace jit {
+namespace mobile {
+/**
+ * Recursively scan the IValue object, traversing lists, tuples, dicts, and stop
+ * and call the user provided callback function 'func' when a Tensor is found.
+ */
+void for_each_tensor_in_ivalue(
+ const ::c10::IValue& iv,
+ std::function<void(const ::at::Tensor&)> const& func);
+} // namespace mobile
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/mobile/model_tracer/tracer.cpp b/torch/csrc/jit/mobile/model_tracer/tracer.cpp
new file mode 100644
index 0000000..72fa6cd
--- /dev/null
+++ b/torch/csrc/jit/mobile/model_tracer/tracer.cpp
@@ -0,0 +1,327 @@
+#include <iostream>
+#include <string>
+
+/**
+ * The tracer.cpp generates a binary that accepts a TorchScript model or a
+ * Torch Mobile Model (with bytecode.pkl) which has at least 1 bundled
+ * input. This binary then feeds the bundled input(s) into the model
+ * and executes using the lite interpreter.
+ *
+ * Both root operators as well as called operators are recorded and saved
+ * into a YAML file (whose path is provided on the command line).
+ *
+ * Note: Root operators may include primary and other operators that
+ * are not invoked using the dispatcher, and hence they may not show
+ * up in the Traced Operator list.
+ *
+ */
+
+#include <ATen/core/dispatch/ObservedOperators.h>
+#include <torch/csrc/autograd/grad_mode.h>
+#include <torch/csrc/jit/mobile/import.h>
+#include <torch/csrc/jit/mobile/model_tracer/KernelDTypeTracer.h>
+#include <torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h>
+#include <torch/csrc/jit/mobile/model_tracer/OperatorCallTracer.h>
+#include <torch/csrc/jit/mobile/model_tracer/TensorUtils.h>
+#include <torch/csrc/jit/mobile/module.h>
+#include <torch/csrc/jit/mobile/parse_operators.h>
+#include <torch/script.h>
+
+typedef std::map<std::string, std::set<std::string>> kt_type;
+
+C10_DEFINE_string(
+ model_input_path,
+ "",
+ "The path of the input model file (.ptl).");
+
+C10_DEFINE_string(
+ build_yaml_path,
+ "",
+ "The path of the output YAML file containing traced operator information.");
+
+#define REQUIRE_STRING_ARG(name) \
+ if (FLAGS_##name.empty()) { \
+ std::cerr << "You must specify the flag --" #name "\n"; \
+ return 1; \
+ }
+
+#define REQUIRE_INT_ARG(name) \
+ if (FLAGS_##name == -1) { \
+ std::cerr << "You must specify the flag --" #name "\n"; \
+ return 1; \
+ }
+
+const std::vector<std::string> always_included_traced_ops = {
+ // The following are called from setup sections.
+ "aten::resize_",
+ "aten::slice.Tensor",
+};
+
+// Fetched from caffe2/aten/src/ATen/native/metal/MetalAten.mm
+// Diffusion Link: https://fburl.com/diffusion/atwwmax2
+const std::vector<std::string> gpu_metal_operators = {
+ "aten::conv2d",
+ "aten::add.Tensor",
+ "aten::add_.Tensor",
+ "aten::addmm",
+ "aten::empty.memory_format",
+ "aten::empty_strided",
+ "aten::log_softmax.int",
+ "aten::max_pool2d",
+ "aten::mul.Tensor",
+ "aten::relu",
+ "aten::relu_",
+ "aten::sigmoid",
+ "aten::sub.Tensor",
+ "aten::upsample_nearest2d.vec",
+ "aten::view",
+ "aten::adaptive_avg_pool2d",
+ "aten::hardtanh_",
+ "aten::reshape",
+ "aten::flatten.using_ints",
+};
+
+void printOpYAML(
+ std::ostream& out,
+ int indent,
+ const std::string& op_name,
+ bool is_used_for_training,
+ bool is_root_operator,
+ bool include_all_overloads) {
+ out << std::string(indent, ' ') << op_name << ":" << std::endl;
+ out << std::string(indent + 2, ' ')
+ << "is_used_for_training: " << (is_used_for_training ? "true" : "false")
+ << std::endl;
+ out << std::string(indent + 2, ' ')
+ << "is_root_operator: " << (is_root_operator ? "true" : "false")
+ << std::endl;
+ out << std::string(indent + 2, ' ')
+ << "include_all_overloads: " << (include_all_overloads ? "true" : "false")
+ << std::endl;
+}
+
+void printOpsYAML(
+ std::ostream& out,
+ const std::set<std::string>& operator_list,
+ bool is_used_for_training,
+ bool is_root_operator,
+ bool include_all_overloads) {
+ for (auto& it : operator_list) {
+ printOpYAML(out, 2, it, false, is_root_operator, false);
+ }
+}
+
+/**
+ * These are a collection of some common ATen methods that are usually
+ * called outside of the Model's forward() run, and they need to be
+ * traced to ensure that the used operators are included in the build.
+ * If/When this list becomes too long, we can consider making it a
+ * per-model list.
+ */
+void call_setup_methods() {
+ at::zeros({2, 2});
+ at::ones({2, 2});
+ at::Tensor t1 = at::empty({7, 7});
+ at::Tensor t2 = t1.fill_(3);
+ at::narrow(t2, 1, 0, 1);
+ at::eq(t1, t2);
+ const volatile bool nz = at::zeros({1}).is_nonzero();
+ (void)nz;
+
+ // Create a byte tensor and copy it
+ auto zb = at::zeros({10}, at::kByte);
+ auto zf = at::zeros({10}, at::kFloat);
+ zb.copy_(zf);
+ t2.div(1);
+
+ // Typically, failures show up in CopyKernel.cpp, so enumerating
+ // common dtypes that may show up.
+ const auto all_dtypes_for_copy = {
+ at::kByte,
+ at::kFloat,
+ at::kInt,
+ at::kChar,
+ at::kDouble,
+ at::kShort,
+ at::kLong};
+ for (const auto dtype : all_dtypes_for_copy) {
+ auto tensor1 = at::empty({10}, dtype);
+ tensor1.copy_(at::zeros({10}, at::kFloat));
+ }
+
+ torch::zeros({0, 0}, torch::ScalarType::Float);
+ std::vector<float> storage(20, 1.0);
+ std::vector<int64_t> sizes({2, 10});
+ torch::from_blob(storage.data(), at::IntArrayRef(sizes), at::kFloat);
+}
+
+/**
+ * Call methods on the Tensor object that we expect to be called
+ * in production on this Tensor.
+ */
+void consume_tensor(const at::Tensor& t) {
+ const at::Tensor& c = t;
+ c.copy_(t.cpu());
+}
+
+void run_model(
+ const std::string& input_module_path,
+ std::set<std::string>& root_ops,
+ std::set<std::string>& enabled_backends,
+ torch::jit::mobile::KernelDTypeTracer::kernel_tags_type&
+ called_kernel_tags) {
+ // Load the module on CPU with the flag to skip the operator exists check.
+ // This is needed so that we can load any TorchBind objects (custom classes)
+ // that this model refers to so that any operators being called from those
+ // TorchBind objects can be traced by the model tracer.
+ //
+ torch::jit::mobile::MobileModelRunner module_runner(input_module_path, 0);
+ root_ops = module_runner.get_root_operators();
+ std::cout << "Got " << root_ops.size() << " Root Operators." << std::endl;
+
+ if (torch::jit::mobile::MobileModelRunner::set_has_metal_gpu_operators(
+ root_ops)) {
+ std::cout << "Inferred Metal GPU Model." << std::endl;
+ root_ops.insert(gpu_metal_operators.begin(), gpu_metal_operators.end());
+ called_kernel_tags["__unused__"] = {"Float"};
+ enabled_backends.insert("Metal GPU");
+
+ // When we encounter a GPU model, we should call .cpu().copy_() on the
+ // tensors in the bundled inputs, since this is what will happen when
+ // such a model is executed on an iOS device (to copy the Tensor to Metal
+ // memory via a call to .metal()).
+ module_runner.for_each_tensor_in_bundled_inputs(consume_tensor);
+ } else {
+ std::cout << "Inferred CPU Model." << std::endl;
+ enabled_backends.insert("CPU");
+ torch::jit::mobile::MobileModelRunner mobile_module_runner(
+ input_module_path);
+
+ // When we encounter a CPU model, we should call .cpu().copy_() on the
+ // tensors in the bundled inputs, since this is what will happen when
+ // such a model is executed on an Android device since the PyTorch JNI
+ // bindings call .cpu() in JIValue::newJIValueFromAtIValue().
+ module_runner.for_each_tensor_in_bundled_inputs(consume_tensor);
+
+ // If a user has bundled inputs since that api was updated to accept
+ // bundled inputs for multiple methods They should go down this route.
+ // Even if they only bundle inputs for forward they will have the new
+ // style bundled inputs. Since at this time in tracer.cpp we do not know
+ // what functions have bundled inputs we must call
+ // get_bundled_inputs_functions_and_info if it exists to get the set.
+ if (mobile_module_runner.has_new_style_bundled_inputs()) {
+ auto bundled_inputs_mapping =
+ mobile_module_runner.get_many_functions_bundled_inputs();
+ for (auto& entry : bundled_inputs_mapping) {
+ std::string function_name = entry.first;
+ std::vector<std::vector<at::IValue>> bundled_inputs = entry.second;
+ std::cout << "Got " << bundled_inputs.size() << " bundled input(s) for "
+ << function_name << "\n\n";
+ std::vector<at::IValue> results =
+ mobile_module_runner.run_with_inputs(function_name, bundled_inputs);
+
+ for (auto& result : results) {
+ // Consume the result Tensor(s) when tracing on CPU since the
+ // Android/Java JNI bindings will do the same.
+ torch::jit::mobile::for_each_tensor_in_ivalue(result, consume_tensor);
+ }
+ }
+ // If get_bundled_inputs_functions_and_info does not exists we default
+ // to assuming they bundled before that change was made. If no bundled
+ // inputs are found here either an error will be thrown
+ } else {
+ std::vector<std::vector<at::IValue>> bundled_inputs =
+ mobile_module_runner.get_all_bundled_inputs();
+ std::cout << "Got " << bundled_inputs.size() << " bundled input(s)\n\n";
+ std::vector<at::IValue> results =
+ mobile_module_runner.run_with_inputs(bundled_inputs);
+
+ for (auto& result : results) {
+ // Consume the result Tensor(s) when tracing on CPU since the
+ // Android/Java JNI bindings will do the same.
+ torch::jit::mobile::for_each_tensor_in_ivalue(result, consume_tensor);
+ }
+ }
+ }
+}
+
+/**
+ * Converts a pytorch model (full/lite) to lite interpreter model for
+ * mobile, and additionally writes out a list of root and called
+ * operators.
+ */
+int main(int argc, char* argv[]) {
+ if (!c10::ParseCommandLineFlags(&argc, &argv)) {
+ std::cerr << "Failed to parse command line flags!" << std::endl;
+ return 1;
+ }
+
+ REQUIRE_STRING_ARG(model_input_path);
+ REQUIRE_STRING_ARG(build_yaml_path);
+
+ const std::string input_module_path = FLAGS_model_input_path;
+
+ std::ofstream yaml_out(FLAGS_build_yaml_path);
+
+ std::cout << "Processing: " << input_module_path << std::endl;
+ std::cout << "Output: " << FLAGS_build_yaml_path << std::endl;
+
+ at::globalContext().setQEngine(at::QEngine::QNNPACK);
+ c10::ObservedOperators::getUnobservedOperatorList().clear();
+
+ torch::jit::mobile::OperatorCallTracer op_tracer;
+ torch::jit::mobile::KernelDTypeTracer kdtype_tracer;
+
+ call_setup_methods();
+
+ std::set<std::string> root_ops, traced_operators, enabled_backends;
+ torch::jit::mobile::KernelDTypeTracer::kernel_tags_type called_kernel_tags;
+
+ using torch::jit::MobileModuleLoadOptions;
+
+ // run with QNNPACK
+ run_model(input_module_path, root_ops, enabled_backends, called_kernel_tags);
+ at::globalContext().setQEngine(at::QEngine::FBGEMM);
+ run_model(input_module_path, root_ops, enabled_backends, called_kernel_tags);
+
+ traced_operators = op_tracer.getCalledOperators();
+ called_kernel_tags.insert(
+ kdtype_tracer.getCalledKernelTags().begin(),
+ kdtype_tracer.getCalledKernelTags().end());
+ traced_operators.insert(
+ always_included_traced_ops.begin(), always_included_traced_ops.end());
+
+ if (traced_operators.size() <= always_included_traced_ops.size()) {
+ std::cerr
+ << c10::str(
+ "Error traced_operators size: ",
+ traced_operators.size(),
+ ". Expected the traced operator list to be bigger then the default size ",
+ always_included_traced_ops.size(),
+ ". Please report a bug in PyTorch.")
+ << std::endl;
+ }
+
+ // If the op exist in both traced_ops and root_ops, leave it in root_ops only
+ for (const auto& root_op : root_ops) {
+ if (traced_operators.find(root_op) != traced_operators.end()) {
+ traced_operators.erase(root_op);
+ }
+ }
+
+ yaml_out << "include_all_kernel_dtypes: true" << std::endl;
+ yaml_out << "operators:" << std::endl;
+ printOpsYAML(
+ yaml_out,
+ root_ops,
+ false /* is_used_for_training */,
+ true /* is_root_operator */,
+ false /* include_all_overloads */);
+ printOpsYAML(
+ yaml_out,
+ traced_operators,
+ false /* is_used_for_training */,
+ false /* is_root_operator */,
+ false /* include_all_overloads */);
+ return 0;
+}