torch/csrc/jit/script/init.cpp - platform/external/pytorch - Git at Google

 #include <torch/csrc/jit/script/init.h>

 #include <torch/csrc/Device.h>
 #include <torch/csrc/Dtype.h>
 #include <torch/csrc/Layout.h>
 #include <torch/csrc/jit/import.h>
 #include <torch/csrc/jit/script/compiler.h>
 #include <torch/csrc/jit/script/module.h>
 #include <torch/csrc/jit/script/schema_matching.h>
 #include <torch/csrc/jit/script/sugared_value.h>
 #include <torch/csrc/jit/testing/file_check.h>

 #include <torch/csrc/jit/constants.h>
 #include <torch/csrc/jit/graph_executor.h>
 #include <torch/csrc/jit/hooks_for_testing.h>
 #include <torch/csrc/jit/import_source.h>
 #include <torch/csrc/jit/irparser.h>
 #include <torch/csrc/jit/passes/python_print.h>
 #include <torch/csrc/jit/pybind_utils.h>
 #include <torch/csrc/jit/python_tracer.h>
 #include <torch/csrc/jit/script/logging.h>
 #include <torch/csrc/jit/script/parser.h>
 #include <torch/csrc/jit/tracer.h>

 #include <torch/csrc/api/include/torch/ordered_dict.h>

 #include <ATen/ATen.h>
 #include <ATen/core/function_schema.h>

 #include <pybind11/functional.h>
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <pybind11/stl_bind.h>
 #include <chrono>
 #include <cstddef>
 #include <memory>
 #include <sstream>
 #include <string>
 #include <tuple>
 #include <utility>
 #include <vector>

 PYBIND11_MAKE_OPAQUE(torch::jit::script::ExtraFilesMap);

 namespace torch {
 namespace jit {
 namespace script {

 using ::c10::Argument;
 using ::c10::FunctionSchema;

 using ResolutionCallback = std::function<py::function(std::string)>;
 using FunctionDefaults = std::unordered_map<std::string, py::object>;

 static std::string typeString(py::handle h) {
   return py::str(h.get_type().attr("__name__"));
 }

 inline std::shared_ptr<SugaredValue> toSimple(Value* v) {
   return std::make_shared<SimpleValue>(v);
 }

 // NB: This should be the single entry-point for instantiating a SugaredValue
 // from a Python object. If you are adding support for converting a new Python
 // type, *add it in this function's implementation*.
 std::shared_ptr<SugaredValue> toSugaredValue(
     py::object obj,
     Function& m,
     SourceRange loc,
     bool is_constant = false);

 struct VISIBILITY_HIDDEN PythonValue : public SugaredValue {
   PythonValue(py::object self) : self(std::move(self)) {}

   FunctionSchema getSchema(const size_t n_args, const size_t n_binders) {
     auto annotations = py::module::import("torch.jit.annotations");
     auto signature = annotations.attr("get_signature")(self);
     std::vector<Argument> args, rets;
     // We may mutate this if we can determine the number of args from Python
     // introspection.
     size_t actual_n_args = n_args;
     if (!signature.is_none()) {
       std::vector<TypePtr> arg_types;
       TypePtr ret_type;
       std::tie(arg_types, ret_type) =
           py::cast<std::pair<std::vector<TypePtr>, TypePtr>>(signature);
       args.reserve(arg_types.size());
       size_t idx = 0; // Fake argument names by putting in the index
       for (auto& arg_type : arg_types) {
         args.push_back(Argument(
             std::to_string(idx++), std::move(arg_type), {}, {}, false));
       }
       rets.push_back(Argument("0", std::move(ret_type), {}, {}, false));
     } else {
       // Create a default signature using what information we have

       // First see if we can introspect the number of function parameters
       // irrespective of the presence of explicit type annotations
       auto num_params = annotations.attr("get_num_params")(self);
       if (!num_params.is_none()) {
         // Return a signature with the correct number of params according to the
         // Python function. The error handling in call() will catch any mismatch
         // later.
         actual_n_args = py::cast<size_t>(num_params);
       }
       // Construct the default signature: all arguments and returns will be
       // DynamicType
       args.reserve(actual_n_args);
       for (size_t i = 0; i < actual_n_args; ++i) {
         args.push_back(
             Argument(std::to_string(i), TensorType::get(), {}, {}, false));
       }
       TypePtr ret_type = TensorType::get();
       if (n_binders == 0) {
         ret_type = NoneType::get();
       } else if (n_binders > 1) {
         std::vector<TypePtr> tuple_values(n_binders, ret_type);
         ret_type = TupleType::create(std::move(tuple_values));
       }
       rets.push_back(Argument("0", ret_type, {}, {}, false));
     }
     return FunctionSchema("", "", std::move(args), std::move(rets));
   }

   // call it like a function, e.g. `outputs = this(inputs)`
   std::shared_ptr<SugaredValue> call(
       const SourceRange& loc,
       Function& m,
       at::ArrayRef<NamedValue> inputs_,
       at::ArrayRef<NamedValue> attributes,
       size_t n_binders) override {
     auto inputs = toValues(*m.graph(), inputs_);
     auto schema = getSchema(inputs.size(), n_binders);

     std::stringstream failure_messages;
     c10::optional<MatchedSchema> matched_schema = tryMatchSchema(
         schema,
         loc,
         *m.graph(),
         c10::nullopt,
         inputs_,
         attributes,
         failure_messages,
         /*conv_tensor_to_num*/ true);
     if (!matched_schema)
       throw ErrorReport(loc) << failure_messages.str();

     // Release the function object so we can wrap it in a PythonOp
     py::object func = self;
     std::string cconv(inputs.size(), 'd');
     Node* new_node = m.graph()->insertNode(m.graph()->createPythonOp(
         THPObjectPtr(func.release().ptr()), cconv, {}));

     // Mark if function is ignored on export
     if (py::cast<bool>(py::module::import("torch.jit")
                            .attr("_try_get_ignored_op")(self))) {
       auto python_op = static_cast<PythonOp*>(new_node);
       python_op->ignore_on_export = true;
     }
     new_node->setSourceLocation(std::make_shared<SourceRange>(loc));
     for (auto& i : matched_schema->inputs)
       new_node->addInput(i);

     Value* output =
         new_node->addOutput()->setType(matched_schema->return_types.at(0));
     return std::make_shared<SimpleValue>(output);
   }

   std::string kind() const override {
     std::stringstream ss;
     ss << "python value of type '" << typeString(self) << "'";
     return ss.str();
   }

   void checkForAddToConstantsError(std::stringstream& ss) {
     auto nn = py::module::import("torch.nn");
     if (py::isinstance(self, nn.attr("ModuleList")) ||
         py::isinstance(self, nn.attr("Sequential"))) {
       ss << ". Did you forget to add it to __constants__? ";
     }
   }

   std::vector<std::shared_ptr<SugaredValue>> asTuple(
       const SourceRange& loc,
       Function& m,
       const c10::optional<size_t>& size_hint = {}) override {
     const std::string type_str = typeString(self);
     std::stringstream ss;
     ss << kind() << " cannot be used as a tuple";
     checkForAddToConstantsError(ss);
     throw ErrorReport(loc) << ss.str();
   }

   std::shared_ptr<SugaredValue> attr(
       const SourceRange& loc,
       Function& m,
       const std::string& field) override {
     const std::string type_str = typeString(self);
     std::stringstream ss;
     ss << "attribute lookup is not defined on " << kind();
     checkForAddToConstantsError(ss);
     throw ErrorReport(loc) << ss.str();
   }

  protected:
   py::object getattr(const SourceRange& loc, const std::string& name) {
     try {
       return py::getattr(self, name.c_str());
     } catch (py::error_already_set& e) {
       throw ErrorReport(loc) << "object has no attribute " << name;
     }
   }

   py::object self;
 };

 struct VISIBILITY_HIDDEN PythonModuleValue : public PythonValue {
   explicit PythonModuleValue(py::object mod) : PythonValue(std::move(mod)) {}

   std::shared_ptr<SugaredValue> attr(
       const SourceRange& loc,
       Function& m,
       const std::string& field) override {
     py::object member = getattr(loc, field);
     // note: is_constant = true because we consider that global properties
     // on modules like math.pi or torch.float to be constants
     // eventhough it is possible, though rare, for someone to mutate them
     return toSugaredValue(member, m, loc, /*is_constant=*/true);
   }
 };

 struct VISIBILITY_HIDDEN ConstantPythonTupleValue : public PythonValue {
   explicit ConstantPythonTupleValue(py::object tup)
       : PythonValue(std::move(tup)) {}
   std::vector<std::shared_ptr<SugaredValue>> asTuple(
       const SourceRange& loc,
       Function& m,
       const c10::optional<size_t>& size_hint = {}) override {
     py::tuple tup = self;
     std::vector<std::shared_ptr<SugaredValue>> result;
     result.reserve(tup.size());
     for (py::handle t : tup) {
       py::object obj = py::reinterpret_borrow<py::object>(t);
       result.push_back(toSugaredValue(obj, m, loc, true));
     }
     return result;
   }

   Value* asValue(const SourceRange& loc, Function& m) override {
     std::vector<Value*> values;
     for (const auto& sugared_item : asTuple(loc, m)) {
       values.push_back(sugared_item->asValue(loc, m));
     }
     auto node = m.graph()->createTuple(values);
     return m.graph()->insertNode(node)->output();
   }
 };

 // Represents all the parameters of a module as a List[Tensor]
 struct VISIBILITY_HIDDEN ConstantParameterList : public SugaredValue {
   ConstantParameterList(Value* the_list) : the_list_(the_list) {}
   std::string kind() const override {
     return "constant parameter list";
   }
   std::shared_ptr<SugaredValue> call(
       const SourceRange& loc,
       Function& caller,
       at::ArrayRef<NamedValue> inputs,
       at::ArrayRef<NamedValue> attributes,
       size_t n_binders) override {
     return toSimple(the_list_);
   }

  private:
   Value* the_list_;
 };

 struct VISIBILITY_HIDDEN OverloadedFunctionValue : public SugaredValue {
   OverloadedFunctionValue(Value* module, std::vector<std::string> method_names)
       : module_(module), method_names_(std::move(method_names)) {}

   std::string kind() const override {
     return "overloaded function";
   }

   std::shared_ptr<SugaredValue> call(
       const SourceRange& loc,
       Function& caller,
       at::ArrayRef<NamedValue> inputs,
       at::ArrayRef<NamedValue> attributes,
       size_t n_binders) override {
     std::stringstream err;
     std::vector<NamedValue> new_inputs = inputs.vec();
     new_inputs.insert(new_inputs.begin(), module_);

     for (const std::string& method_name : method_names_) {
       auto cls = module_->type()->expect<ClassType>();
       Function* fn = cls->getMethod(method_name);
       auto match = tryMatchSchema(
           fn->getSchema(),
           loc,
           *caller.graph().get(),
           c10::nullopt,
           new_inputs,
           attributes,
           err,
           true);
       if (match) {
         return MethodValue(module_, *fn)
             .call(loc, caller, inputs, attributes, n_binders);
       }
     }
     throw ErrorReport(loc) << "Could not find any matching overloads\n"
                            << err.str();
   }

  private:
   Value* module_;
   std::vector<std::string> method_names_;
 };

 // defines how modules/methods behave inside the script subset.
 // for now this does not have any interaction with python.
 // in the future, we will add the ability to resolve `self.foo` to python
 // {functions, modules, contants} so this SugaredValue is defined here
 // anticipating we will eventually need to replace Module with a py::object
 // holding the actual nn.Module class.

 struct ModuleValue : public SugaredValue {
   ModuleValue(Value* self, std::shared_ptr<Module> module)
       : self_(self), module_(std::move(module)) {}

   std::string kind() const override {
     return "module";
   }

   // select an attribute on it, e.g. `this.field`
   std::shared_ptr<SugaredValue> attr(
       const SourceRange& loc,
       Function& m,
       const std::string& field) override {
     // workaround to make self.training work
     // it adds a buffer 'training' to the model if one doesn't exist
     // and then loads that parameter, casting it to bool
     if (field == "training") {
       Slot* v = module_->find_buffer(field);
       if (!v) {
         py::object py_module = py::cast(module_);
         bool training = py::cast<bool>(py::getattr(py_module, "training"));
         auto t =
             autograd::make_variable(at::full({}, training ? 1 : 0, at::kLong));
         module_->register_buffer("training", std::move(t));
         v = module_->find_buffer(field);
       }
       Value* the_tensor = m.graph()->insertGetAttr(self_, "training");
       Value* the_bool = m.graph()->insert(prim::Bool, {the_tensor});
       return std::make_shared<SimpleValue>(the_bool);
     }

     if (std::shared_ptr<Module> v = module_->find_module(field)) {
       return std::make_shared<ModuleValue>(
           m.graph()->insertGetAttr(self_, field), v);
     } else if (auto kind = module_->kind_of(field)) {
       // methods, parameters, attributes, and buffers are all first class
       return SimpleValue(self_).attr(loc, m, field);
     }

     // This can also be a call to a non-script module, or a plain
     // python method. If so return this as a python value.
     py::object py_module = py::cast(module_);

     py::object overloads =
         py_module.attr("_overloads").attr("get")(field, py::none());
     if (!overloads.is_none()) {
       return std::make_shared<OverloadedFunctionValue>(
           self_, py::cast<std::vector<std::string>>(overloads));
     }
     if (py::object attr = py::getattr(py_module, field.c_str(), py::none())) {
       if (py::isinstance<py::function>(attr) &&
           py::hasattr(attr, "_parameter_names_fn")) {
         // Fetch the names of the parameters in the list so they're in the
         // right order
         auto fn_self = py::getattr(attr, "__self__");
         auto param_names = py::getattr(attr, "_parameter_names_fn")(fn_self);

         Graph& g = *m.graph();
         // Add all module parameters as inputs to the graph
         std::vector<Value*> params;
         for (auto name : param_names) {
           params.emplace_back(g.insertGetAttr(self_, py::str(name)));
         }
         auto list = g.insertNode(g.createTuple(params))->output();
         return std::make_shared<ConstantParameterList>(list);
       }
       if (py::isinstance<py::function>(attr) ||
           py::isinstance(attr, py::module::import("torch.nn").attr("Module")) ||
           py_module.attr("_constants_set").contains(field.c_str())) {
         return toSugaredValue(attr, m, loc, true);
       } else {
         std::string hint = "did you forget to add it __constants__?";
         if (py::isinstance(attr, py::module::import("torch").attr("Tensor"))) {
           hint = "Tensors must be added to a module as a buffer or parameter";
         }
         throw ErrorReport(loc)
             << "attribute '" << field << "' of type '" << typeString(attr)
             << "' is not usable in a script method (" << hint << ")";
       }
     }
     throw ErrorReport(loc) << "module has no attribute '" << field << "'";
   }

   // call module.forward
   std::shared_ptr<SugaredValue> call(
       const SourceRange& loc,
       Function& caller,
       at::ArrayRef<NamedValue> inputs,
       at::ArrayRef<NamedValue> attributes,
       size_t n_binders) override {
     return attr(loc, caller, "forward")
         ->call(loc, caller, inputs, attributes, n_binders);
   }

   std::vector<std::shared_ptr<SugaredValue>> asTuple(
       const SourceRange& loc,
       Function& m,
       const c10::optional<size_t>& size_hint = {}) override {
     py::object py_module = py::cast(module_);
     if (!py::isinstance(
             py_module,
             py::module::import("torch.jit").attr("_ConstModuleList")))
       return SugaredValue::asTuple(loc, m, size_hint);
     std::vector<std::shared_ptr<SugaredValue>> result;
     for (py::handle py_submodule : py_module) {
       py::object obj = py::reinterpret_borrow<py::object>(py_submodule);
       if (py::isinstance<Module>(obj)) {
         auto sub_module = py::cast<std::shared_ptr<Module>>(obj);
         Value* module_v = m.graph()->insertGetAttr(self_, sub_module->name());
         result.emplace_back(
             std::make_shared<ModuleValue>(module_v, sub_module));
       } else {
         result.push_back(toSugaredValue(
             obj,
             m,
             loc,
             /*is_constant =*/false));
       }
     }
     return result;
   }

  private:
   Value* self_;
   std::shared_ptr<Module> module_;
 };

 struct VISIBILITY_HIDDEN BooleanDispatchValue : public SugaredValue {
   BooleanDispatchValue(py::dict dispatched_fn)
       : dispatched_fn_(std::move(dispatched_fn)) {}

   std::string kind() const override {
     return "boolean dispatch";
   }

   std::shared_ptr<SugaredValue> call(
       const SourceRange& loc,
       Function& caller,
       at::ArrayRef<NamedValue> inputs,
       at::ArrayRef<NamedValue> attributes,
       size_t n_binders) override {
     c10::optional<bool> result;
     Graph& graph = *(caller.graph());

     auto index = py::cast<size_t>(dispatched_fn_["index"]);
     auto arg_name = py::str(dispatched_fn_["arg_name"]);

     if (index < inputs.size()) {
       // Dispatch flag is in arg list
       result = constant_as<bool>(inputs.at(index).value(graph));
     } else if (auto i = findInputWithName(arg_name, attributes)) {
       // Dispatch flag is in kwargs
       result = constant_as<bool>(attributes[*i].value(graph));
     } else {
       // Didn't find dispatch flag, so use default value
       result = py::cast<bool>(dispatched_fn_["default"]);
     }

     if (!result) {
       throw ErrorReport(loc) << "value for boolean dispatch was not constant";
     }

     std::shared_ptr<SugaredValue> value;
     if (*result) {
       value = toSugaredValue(dispatched_fn_["if_true"], caller, loc);
     } else {
       value = toSugaredValue(dispatched_fn_["if_false"], caller, loc);
     }
     return value->call(loc, caller, inputs, attributes, n_binders);
   }

  private:
   py::dict dispatched_fn_;
 };

 std::shared_ptr<MethodValue> moduleToMethod(
     const std::shared_ptr<Module>& mod) {
   // this path only supports calling raw script functions
   // but because they are not distinguished from models, we have to check
   // that they are function-like here. They must not have state, and they
   // must have a forward method. When we expose functions to python
   //  this will be replaced with a direct py::isinstance<Function> call.

   if (mod->get_parameters().size() != 0) {
     throw ErrorReport()
         << "Attempted to inline a Module with parameters. "
            "Stateful modules to be inlined must be submodules of the callee.";
   }
   Method* forward = mod->find_method("forward");
   if (!forward) {
     throw ErrorReport() << " expected this module to have a forward function.";
   }
   return std::make_shared<MethodValue>(at::nullopt, forward->function());
 }

 std::shared_ptr<SugaredValue> toSugaredValue(
     py::object obj,
     Function& m,
     SourceRange loc,
     bool is_constant) {
   // directly create SimpleValues when possible, because they are first-class
   // and can be re-assigned. Otherwise, this would be invalid:
   // f = python_constant
   // while ...
   //   f = f + 1
   auto& g = *m.graph();
   if (is_constant) {
     if (py::isinstance<py::bool_>(obj)) {
       return toSimple(g.insertConstant(py::cast<bool>(obj), nullptr, loc));
     } else if (py::isinstance<py::int_>(obj)) {
       return toSimple(g.insertConstant(py::cast<int64_t>(obj), nullptr, loc));
     } else if (py::isinstance<py::float_>(obj)) {
       return toSimple(g.insertConstant(py::cast<double>(obj), nullptr, loc));
     } else if (py::isinstance<py::str>(obj)) {
       return toSimple(
           g.insertConstant(py::cast<std::string>(obj), nullptr, loc));
     } else if (obj.is(py::none())) {
       return toSimple(g.insertConstant(IValue(), nullptr, loc));
     } else if (THPDevice_Check(obj.ptr())) {
       auto device = reinterpret_cast<THPDevice*>(obj.ptr());
       return toSimple(g.insertConstant(device->device));
     } else if (THPLayout_Check(obj.ptr())) {
       auto layout = reinterpret_cast<THPLayout*>(obj.ptr());
       const auto v = static_cast<int64_t>(layout->layout);
       return toSimple(g.insertConstant(v, nullptr, loc));
     } else if (THPDtype_Check(obj.ptr())) {
       auto dtype = reinterpret_cast<THPDtype*>(obj.ptr());
       const auto v = static_cast<int64_t>(dtype->scalar_type);
       return toSimple(g.insertConstant(v, nullptr, loc));
     } else if (py::isinstance<py::tuple>(obj)) {
       return std::make_shared<ConstantPythonTupleValue>(obj);
     }
   }

   auto weak_obj =
       py::module::import("torch.jit").attr("_try_get_weak_module")(obj);
   if (!weak_obj.is_none()) {
     obj = weak_obj;
   }
   if (py::isinstance<Module>(obj)) {
     // In the case that this Python object is not a submodule, inline *ONLY
     // PURE* ScriptModules. This allows us to call arbitrary @script functions
     // within a scripting context while still enforcing that parameters from
     // stateful submodules are properly accounted for.
     auto mod = py::cast<std::shared_ptr<Module>>(obj);
     return moduleToMethod(mod);
   } else if (py::isinstance<py::module>(obj)) {
     return std::make_shared<PythonModuleValue>(obj);
   } else if (obj.ptr() == py::module::import("torch.jit").attr("_fork").ptr()) {
     return std::make_shared<ForkValue>();
   } else if (
       obj.ptr() == py::module::import("torch.jit").attr("annotate").ptr()) {
     return std::make_shared<AnnotateValue>();
   }

   py::object builtin_name =
       py::module::import("torch.jit").attr("_find_builtin")(obj);
   if (!builtin_name.is_none()) {
     return std::make_shared<BuiltinFunction>(
         Symbol::fromQualString(py::str(builtin_name)), c10::nullopt);
   }

   if (py::isinstance<py::function>(obj)) {
     auto compiled_fn =
         py::module::import("torch.jit").attr("_try_compile_weak_script")(obj);
     if (!compiled_fn.is(py::none())) {
       auto mod = py::cast<std::shared_ptr<Module>>(compiled_fn);
       return moduleToMethod(mod);
     }
   }

   py::object dispatched_fn =
       py::module::import("torch.jit").attr("_try_get_dispatched_fn")(obj);
   if (!dispatched_fn.is_none()) {
     return std::make_shared<BooleanDispatchValue>(std::move(dispatched_fn));
   }

   return std::make_shared<PythonValue>(obj);
 }

 py::object unpackVariableTensorList(std::vector<at::Tensor> outputs) {
   // if we don't tell pybind these are variables it chokes on the
   // conversion.
   // TODO: fix conversions to be sane and make sure this works.
   if (outputs.size() == 0) {
     return py::none();
   } else if (outputs.size() == 1) {
     return py::cast(autograd::as_variable_ref(outputs[0]));
   } else {
     py::tuple tuple(outputs.size());
     for (size_t i = 0; i < outputs.size(); i++) {
       tuple[i] = py::cast(autograd::as_variable_ref(outputs[i]));
     }
     return std::move(tuple);
   }
 }

 static void gatherParametersAndBuffers(
     std::vector<Slot>& values,
     const Module& m) {
   for (auto& param : m.get_parameters()) {
     values.push_back(param);
   }
   for (auto& param : m.get_attributes()) {
     if (param.type()->isSubtypeOf(TensorType::get())) {
       values.push_back(param);
     }
   }
   for (const auto& sub : m.get_modules()) {
     gatherParametersAndBuffers(values, *sub);
   }
 }

 namespace {

 // A resolver that will inspect the outer Python scope to find `name`.
 struct PythonResolver : public Resolver {
   explicit PythonResolver(ResolutionCallback rcb) : rcb_(std::move(rcb)) {}
   std::shared_ptr<SugaredValue> resolveValue(
       const std::string& name,
       Function& m,
       const SourceRange& loc) const override {
     AutoGIL ag;
     py::object obj = rcb_(name);
     if (obj.is(py::none())) {
       return nullptr;
     }
     return toSugaredValue(obj, m, loc);
   }

   TypePtr resolveType(const std::string& name) const override {
     return ClassType::get(name);
   }

  private:
   ResolutionCallback rcb_;
 };

 std::shared_ptr<PythonResolver> pythonResolver(ResolutionCallback rcb) {
   return std::make_shared<PythonResolver>(rcb);
 }
 } // namespace

 FunctionSchema getSchemaWithNameAndDefaults(
     const SourceRange& range,
     const FunctionSchema& schema,
     const at::optional<std::string>& new_name,
     const FunctionDefaults& default_args) {
   std::vector<Argument> new_args;
   for (auto& arg : schema.arguments()) {
     auto it = default_args.find(arg.name());
     if (it != default_args.end()) {
       try {
         IValue value;
         auto n = arg.N();
         auto list_type = arg.type()->cast<ListType>();
         if (n && *n > 0 && list_type) {
           // BroadcastingList, allow default values T for arg types List[T]
           value = toIValue(it->second, list_type->getElementType());
         } else {
           value = toIValue(it->second, arg.type());
         }
         new_args.emplace_back(
             arg.name(), arg.type(), arg.N(), value, arg.kwarg_only());
       } catch (py::cast_error& e) {
         throw ErrorReport(range)
             << "Expected a default value of type " << arg.type()->str()
             << " on parameter \"" << arg.name() << "\"";
       }
     } else {
       new_args.push_back(arg);
     }
   }

   return FunctionSchema(
       new_name.value_or(schema.name()),
       schema.overload_name(),
       new_args,
       schema.returns(),
       schema.is_vararg(),
       schema.is_varret());
 }

 static Self moduleSelf(const std::shared_ptr<Module>& m) {
   return [m](Value* v) {
     v->setType(m->module_object()->type());
     return std::make_shared<ModuleValue>(v, m);
   };
 }

 static void setInputTensorTypes(Graph& g, const Stack& stack) {
   AT_ASSERT(stack.size() == g.inputs().size());
   for (size_t i = 0; i < stack.size(); ++i) {
     g.inputs().at(i)->setType(
         DimensionedTensorType::create(stack.at(i).toTensor()));
   }
 }

 static std::shared_ptr<Graph> _propagate_shapes(
     Graph& graph,
     std::vector<at::Tensor> inputs,
     bool with_grad = false) {
   Stack stack(inputs.begin(), inputs.end());
   auto retval = graph.copy();
   setInputTensorTypes(*retval, stack);
   PropagateInputShapes(retval);
   return retval;
 }

 static std::shared_ptr<Graph> _propagate_and_assign_input_and_output_shapes(
     Graph& graph,
     std::vector<at::Tensor> inputs,
     std::vector<at::Tensor> outputs,
     bool with_grad = false,
     bool propagate = true) {
   auto retval = graph.copy();
   if (propagate) {
     setInputTensorTypes(*retval, fmap<IValue>(inputs));
     PropagateInputShapes(retval);
   }
   AT_ASSERT(retval->inputs().size() == inputs.size());
   for (size_t i = 0; i < retval->inputs().size(); ++i) {
     auto scalar_type = inputs[i].scalar_type();
     auto sizes = inputs[i].sizes();
     auto type =
         torch::jit::CompleteTensorType::create(scalar_type, at::kCPU, sizes);
     retval->inputs()[i]->setType(type);
   }
   at::ArrayRef<Value*> output_values = retval->outputs();
   // patch this to still work if we are returning a tuple of multiple values
   if (output_values.at(0)->type()->kind() == TupleType::Kind) {
     AT_ASSERT(output_values.at(0)->node()->kind() == prim::TupleConstruct);
     output_values = output_values.at(0)->node()->inputs();
   }
   AT_ASSERT(output_values.size() == outputs.size());
   for (size_t i = 0; i < retval->outputs().size(); ++i) {
     auto scalar_type = outputs[i].scalar_type();
     auto sizes = outputs[i].sizes();
     auto type =
         torch::jit::CompleteTensorType::create(scalar_type, at::kCPU, sizes);
     output_values[i]->setType(type);
   }
   return retval;
 }

 void initJitScriptBindings(PyObject* module) {
   auto m = py::handle(module).cast<py::module>();

   // STL containers are not mutable by default and hence we need to bind as
   // follows.
   py::bind_map<ExtraFilesMap>(m, "ExtraFilesMap");

   // torch.jit.ScriptModule is a subclass of this C++ object.
   // Methods here are prefixed with _ since they should not be
   // public.
   py::class_<Module, std::shared_ptr<Module>>(m, "ScriptModule")
       .def(py::init<>())
       .def(
           "save",
           [](std::shared_ptr<Module> m,
              const std::string& filename,
              const ExtraFilesMap& _extra_files = ExtraFilesMap()) {
             m->save(filename, _extra_files);
           },
           py::arg("filename"),
           py::arg("_extra_files") = ExtraFilesMap())
       .def(
           "save_to_buffer",
           [](std::shared_ptr<Module> m,
              const ExtraFilesMap& _extra_files = ExtraFilesMap()) {
             std::ostringstream buf;
             m->save(buf, _extra_files);
             return py::bytes(buf.str());
           },
           py::arg("_extra_files") = ExtraFilesMap())
       .def("_set_optimized", &Module::set_optimized)
       .def(
           "_define",
           [](std::shared_ptr<Module> m,
              const std::string& script,
              ResolutionCallback rcb,
              bool has_self) {
             if (has_self) {
               m->class_compilation_unit().define(
                   script, pythonResolver(rcb), moduleSelf(m));
             } else {
               m->_define_lowered(script, pythonResolver(rcb));
             }
             didFinishEmitModule(m);
           })
       .def(
           "_create_methods",
           [](std::shared_ptr<Module> m,
              const std::vector<Def>& defs,
              const std::vector<ResolutionCallback>& rcbs,
              const std::vector<FunctionDefaults>& defaults) {
             std::vector<ResolverPtr> resolvers;
             resolvers.reserve(rcbs.size());
             for (auto& callback : rcbs) {
               resolvers.push_back(pythonResolver(callback));
             }
             m->class_compilation_unit().define(defs, resolvers, moduleSelf(m));
             // Stitch in default arguments for each Def if provided
             auto defaults_it = defaults.begin();
             auto defs_it = defs.begin();
             while (defs_it != defs.end()) {
               auto& method = m->class_compilation_unit().get_function(
                   (*defs_it).name().name());
               method.setSchema(getSchemaWithNameAndDefaults(
                   defs_it->range(),
                   method.getSchema(),
                   at::nullopt,
                   *defaults_it));
               ++defs_it;
               ++defaults_it;
             }
             didFinishEmitModule(m);
           })
       .def(
           "_get_method",
           [](Module& self, const std::string& name) -> const Method& {
             return self.get_method(name);
           },
           py::return_value_policy::reference_internal)
       .def("_register_parameter", &Module::register_parameter)
       .def(
           "_register_attribute",
           [](Module& self, std::string name, TypePtr type, py::object value) {
             self.register_attribute(name, type, toIValue(value, type));
           })
       .def("_register_module", &Module::register_module)
       .def("_register_buffer", &Module::register_buffer)
       .def("_set_parameter", &Module::set_parameter)
       .def("_get_parameter", &Module::get_parameter)
       .def("_get_buffer", &Module::get_buffer)
       .def("_get_attribute", &Module::get_attribute)
       .def("_get_module", &Module::get_module)
       .def(
           "_get_modules",
           [](Module& self) -> py::tuple {
             auto modules = self.get_modules();
             py::tuple result(modules.size());
             for (size_t i = 0; i < modules.size(); ++i) {
               auto& item = modules[i];
               result[i] = std::make_pair(item->name(), item);
             }
             return result;
           })
       .def(
           "_get_parameters",
           [](Module& self) -> py::tuple {
             auto parameters = self.get_parameters();
             py::tuple result(parameters.size());
             for (size_t i = 0; i < parameters.size(); ++i) {
               auto& p = parameters[i];
               py::tuple r(2);
               result[i] = std::make_tuple(
                   p.name(), autograd::as_variable_ref(p.value().toTensor()));
             }
             return result;
           })
       .def(
           "_get_attributes",
           [](Module& self) -> py::tuple {
             auto attributes = self.get_attributes();
             py::tuple result(attributes.size());
             for (size_t i = 0; i < attributes.size(); ++i) {
               auto& buffer = attributes[i];
               py::tuple r(3);
               IValue v = buffer.value();
               result[i] = std::make_tuple(
                   buffer.name(), buffer.type(), toPyObject(std::move(v)));
             }
             return result;
           })
       .def(
           "_has_attribute",
           [](Module& self, const std::string& name) -> bool {
             return self.find_attribute(name);
           })
       .def(
           "_has_parameter",
           [](Module& self, const std::string& name) -> bool {
             return self.find_parameter(name);
           })
       .def(
           "_has_buffer",
           [](Module& self, const std::string& name) -> bool {
             return self.find_buffer(name);
           })
       .def(
           "_has_module",
           [](Module& self, const std::string& name) {
             return bool(self.find_module(name));
           })
       .def(
           "_has_method",
           [](Module& self, const std::string& name) {
             return bool(self.find_method(name));
           })
       .def(
           "_method_names",
           [](Module& self) {
             return fmap(
                 self.get_methods(), [](const std::unique_ptr<Method>& method) {
                   return method->name();
                 });
           })
       .def(
           "_create_method_from_graph",
           [](Module& self,
              const std::string& name,
              std::shared_ptr<Graph> graph) {
             self._define_lowered(name, std::move(graph), {});
           })
       .def(
           "_create_method_from_trace",
           [](std::shared_ptr<Module> self,
              const std::string& name,
              py::function func,
              py::tuple input_tuple,
              py::function var_lookup_fn,
              bool force_outplace) {
             // prereq: Module's buffers and parameters are unique
             // this was ensured in python before calling this function
             std::vector<Slot> parameters;
             gatherParametersAndBuffers(parameters, *self);
             auto typed_inputs = toTypedStack(input_tuple);
             if (parameters.size() > 0) {
               auto inputs = typed_inputs.stack();
               auto input_types = typed_inputs.types()->elements().vec();
               for (const Slot& param : parameters) {
                 inputs.emplace_back(param.value());
                 input_types.push_back(incompleteInferTypeFrom(param.value()));
               }
               typed_inputs = TypedStack(inputs, TupleType::create(input_types));
             }
             auto graph = tracer::createGraphByTracing(
                 func,
                 typed_inputs,
                 var_lookup_fn,
                 force_outplace,
                 input_tuple.size());
             self->_define_lowered(
                 name, std::move(graph), std::move(parameters));
             didFinishEmitModule(self);
           })
       .def(
           "get_debug_state",
           [](Module& self) {
             if (self.find_method("forward")) {
               Method& m = self.get_method("forward");
               return m.get_executor().getDebugState();
             }
             throw std::runtime_error(
                 "Attempted to call get_debug_state on a Module without a compiled forward()");
           })
       .def(
           "forward",
           [](py::args args, py::kwargs kwargs) {
             // We implement this in C++ to avoid incurring the pybind11 dispatch
             // overhead twice: once to call into the method lookup for "forward"
             // and once to actually invoke the method.
             //
             // There is a thin wrapper on top of this method in the C++ version
             // of ScriptModule.

             // see: [pybind11 varargs]
             Module& self = py::cast<Module&>(args[0]);
             return invokeScriptMethodFromPython(
                 self.get_method("forward"),
                 tuple_slice(std::move(args), 1),
                 std::move(kwargs));
           })
       .def_property_readonly(
           "code",
           [](Module& self) {
             std::ostringstream ss;
             std::vector<at::Tensor> tensors;
             std::vector<ClassTypePtr> classes;
             PythonPrint(ss, self, tensors, classes, false);
             return ss.str();
           })
       .def("apply", &Module::apply)
       .def("_copy_into", &Module::copy_into)
       .def(
           "_copy_method",
           [](std::shared_ptr<Module> m,
              std::string name,
              std::vector<std::tuple<std::shared_ptr<Module>, std::string>>
                  params,
              std::shared_ptr<Module> orig) {
             std::vector<Slot> member_inputs;
             for (auto& p : params) {
               Slot* np = std::get<0>(p)->find_parameter(std::get<1>(p));
               if (np == nullptr) {
                 np = std::get<0>(p)->find_buffer(std::get<1>(p));
               }
               AT_ASSERT(np != nullptr);
               member_inputs.push_back(*np);
             }

             Method* orig_method = orig->find_method(name);
             m->_define_lowered(
                 name, orig_method->graph()->copy(), std::move(member_inputs));
           });

   py::class_<Method>(m, "ScriptMethod", py::dynamic_attr())
       .def(
           "__call__",
           [](py::args args, py::kwargs kwargs) {
             // see: [pybind11 varargs]
             Method& method = py::cast<Method&>(args[0]);
             return invokeScriptMethodFromPython(
                 method, tuple_slice(std::move(args), 1), std::move(kwargs));
           })
       .def_property_readonly("graph", &Method::graph)
       .def(
           "initial_ivalues",
           [](Method& m) {
             std::vector<at::Tensor> tensors;
             for (auto& t : m.initial_ivalues()) {
               tensors.push_back(t.value().toTensor());
             }
             return tensors;
           })
       .def_property_readonly("schema", &Method::getSchema)
       .def_property_readonly("code", [](Method& self) {
         std::ostringstream ss;
         std::vector<at::Tensor> tensors;
         std::vector<ClassTypePtr> classes;
         PythonPrint(ss, self, tensors, classes, false);
         return ss.str();
       });

   m.def(
       "_jit_script_compile",
       [](std::shared_ptr<Module> mod,
          const Def& def,
          ResolutionCallback rcb,
          FunctionDefaults defaults) {
         auto def_f = def.withName("forward");

         mod->_define_lowered({def_f}, {pythonResolver(rcb)});
         auto& func = mod->lowered_methods().get_function("forward");
         func.setSchema(getSchemaWithNameAndDefaults(
             def.range(), func.getSchema(), def.name().name(), defaults));
         auto& func2 = mod->class_compilation_unit().get_function("forward");
         func2.setSchema(getSchemaWithNameAndDefaults(
             def.range(), func2.getSchema(), def.name().name(), defaults));
         didFinishEmitModule(mod);
         return mod;
       });

   m.def(
       "_jit_script_class_compile",
       [](const ClassDef& classDef, ResolutionCallback rcb) {
         auto cu = std::make_shared<CompilationUnit>();
         auto classType = ClassType::create(classDef.name().name(), cu);
         std::vector<ResolverPtr> rcbs;
         std::vector<Def> methodDefs;
         for (const auto& def : classDef.defs()) {
           methodDefs.push_back(def);
           rcbs.push_back(pythonResolver(rcb));
         }
         cu->define(methodDefs, rcbs, simpleSelf(classType));
       });

   m.def("parse_type_comment", [](const std::string& comment) {
     Parser p(comment);
     return Decl(p.parseTypeComment());
   });

   m.def("merge_type_from_type_comment", &mergeTypesFromTypeComment);
   m.def(
       "import_ir_module",
       [](ModuleLookup module_lookup,
          const std::string& filename,
          py::object map_location,
          ExtraFilesMap& extra_files) {
         c10::optional<at::Device> optional_device;
         if (!map_location.is(py::none())) {
           AT_ASSERT(THPDevice_Check(map_location.ptr()));
           optional_device =
               reinterpret_cast<THPDevice*>(map_location.ptr())->device;
         }
         import_ir_module(module_lookup, filename, optional_device, extra_files);
       });
   m.def(
       "import_ir_module_from_buffer",
       [](ModuleLookup module_lookup,
          const std::string& buffer,
          py::object map_location,
          ExtraFilesMap& extra_files) {
         std::istringstream in(buffer);
         c10::optional<at::Device> optional_device;
         if (!map_location.is(py::none())) {
           AT_ASSERT(THPDevice_Check(map_location.ptr()));
           optional_device =
               reinterpret_cast<THPDevice*>(map_location.ptr())->device;
         }
         import_ir_module(module_lookup, in, optional_device, extra_files);
       });
   m.def("_jit_import_methods", import_methods);
   m.def("_jit_set_emit_module_hook", setEmitModuleHook);
   m.def("_jit_clear_class_registry", ClassType::clearRegistry);
   m.def(
       "_debug_set_autodiff_subgraph_inlining",
       debugSetAutodiffSubgraphInlining);
   m.def("_propagate_shapes", _propagate_shapes);
   m.def(
       "_propagate_and_assign_input_and_output_shapes",
       _propagate_and_assign_input_and_output_shapes);
   m.def("_jit_python_print", [](py::object obj) {
     std::ostringstream ss;
     std::vector<at::Tensor> constants;
     std::vector<ClassTypePtr> classes;
     if (py::isinstance<Module>(obj)) {
       auto& self = py::cast<Module&>(obj);
       PythonPrint(ss, self, constants, classes, true);
     } else {
       auto& m = py::cast<Method&>(obj);
       PythonPrint(ss, m, constants, classes, true);
     }
     return std::make_pair(ss.str(), std::move(constants));
   });
   m.def(
       "_last_executed_optimized_graph",
       []() { return lastExecutedOptimizedGraph(); },
       "Retrieve the optimized graph that was run the last time the graph executor ran on this thread");

   py::class_<testing::FileCheck>(m, "FileCheck")
       .def(py::init<>())
       .def("check", &testing::FileCheck::check)
       .def("check_not", &testing::FileCheck::check_not)
       .def("check_same", &testing::FileCheck::check_same)
       .def("check_next", &testing::FileCheck::check_next)
       .def("check_count", &testing::FileCheck::check_count)
       .def("check_dag", &testing::FileCheck::check_dag)
       .def("check_count", &testing::FileCheck::check_count)
       .def(
           "check_count",
           [](testing::FileCheck& f,
              const std::string& str,
              size_t count,
              bool exactly) { return f.check_count(str, count, exactly); },
           "Check Count",
           py::arg("str"),
           py::arg("count"),
           py::arg("exactly") = false)
       .def(
           "run",
           [](testing::FileCheck& f, const std::string& str) {
             return f.run(str);
           })
       .def(
           "run", [](testing::FileCheck& f, const Graph& g) { return f.run(g); })
       .def(
           "run",
           [](testing::FileCheck& f,
              const std::string& input,
              const std::string& output) { return f.run(input, output); },
           "Run",
           py::arg("checks_file"),
           py::arg("test_file"))
       .def(
           "run",
           [](testing::FileCheck& f, const std::string& input, const Graph& g) {
             return f.run(input, g);
           },
           "Run",
           py::arg("checks_file"),
           py::arg("graph"));

   m.def(
       "_logging_set_logger",
       [](logging::LoggerBase* logger) { return logging::setLogger(logger); },
       py::return_value_policy::reference);
   py::class_<logging::LoggerBase, std::shared_ptr<logging::LoggerBase>>(
       m, "LoggerBase");
   py::enum_<logging::LockingLogger::AggregationType>(m, "AggregationType")
       .value("SUM", logging::LockingLogger::AggregationType::SUM)
       .value("AVG", logging::LockingLogger::AggregationType::AVG)
       .export_values();
   py::class_<
       logging::LockingLogger,
       logging::LoggerBase,
       std::shared_ptr<logging::LockingLogger>>(m, "LockingLogger")
       .def(py::init<>())
       .def("set_aggregation_type", &logging::LockingLogger::setAggregationType)
       .def("get_counter_val", &logging::LockingLogger::getCounterValue);
   py::class_<
       logging::NoopLogger,
       logging::LoggerBase,
       std::shared_ptr<logging::NoopLogger>>(m, "NoopLogger")
       .def(py::init<>());
 }
 } // namespace script
 } // namespace jit
 } // namespace torch