torch/csrc/autograd/variable.h - platform/external/pytorch - Git at Google

 #pragma once

 #include <torch/csrc/utils/python_stub.h>

 #include <torch/csrc/WindowsTorchApiMacro.h>
 #include <torch/csrc/autograd/edge.h>
 #include <torch/csrc/autograd/function_hook.h>

 #include <ATen/ATen.h>
 #include <c10/util/Exception.h>

 #include <memory>
 #include <mutex>
 #include <stdexcept>
 #include <string>
 #include <utility>
 #include <vector>

 namespace torch { namespace autograd {

 struct Function;

 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ///                                Variable
 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// A `Variable` augments a `Tensor` with the ability to interact in our
 /// autograd machinery. Conceptually, `Variable`s travel along `Edge`s between
 /// `Function`s in the autograd graph. A `Variable` can either be a leaf, like a
 /// weight in a neural network, or an interior variable, when it is the result
 /// of an operation between variables. Every `Variable` also stores another
 /// `Variable` called its `grad` (gradient). If the variable is a leaf, its
 /// gradient will be accumulated into this variable.
 ///
 ///                              Gradient Edges
 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// Furthermore, `Variable`s have the notion of a `gradient_edge`, which is the
 /// edge in the autograd graph that connects the variable to a particular input
 /// of the gradient function that will be invoked with the variable during the
 /// backward pass. More precisely, this gradient function can be one of two
 /// things:
 /// 1. A `grad_fn`, if the variable is in the interior of the graph. This is the
 ///    gradient of the function that produced the variable.
 /// 2. A `grad_accumulator`, if the variable is a leaf, which accumulates a
 ///    scalar gradient value into its `grad` variable.
 ///
 ///                               Versioning
 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// Another major feature of `Variable`s are *versions*. Versions are
 /// incremented when an in-place mutation of a variable occurs. Versions are
 /// useful when constructing `SavedVariable`s, which take a snapshot of a
 /// `Variable` at a certain version. You can retrieve a `Variable`'s version
 /// through its `current_version()` method.
 ///
 ///                                 Views
 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// It is possible for a  `Variable` to be a *view* of another `Variable`, in
 /// which case it tracks that `Variable`'s data and autograd history. Beyond
 /// construction, the interface of a view is identical to that of a regular
 /// `Variable`. You can determine whether `Variable` is in fact a view by
 /// probing its `is_view()` method. Note that the *view* semantics are only
 /// meaningful for `Variable` relations that are relevant to autograd. For
 /// example, if you hide your code from autograd using `.no_grad()`, the
 /// `Variable`s will not be registered as having view relations, even if they
 /// share storage.
 /// See NOTE [ Autograd View Variables ] for more details.
 ///
 ///
 ///                               Interface
 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// `Variable` inherits from `Tensor` and thus its API is a superset of that of
 /// `Tensor`. This means you can perform all the usual mathematical and other
 /// operations you can perform on `Tensor`s also on `Variable`s. Furthermore,
 /// `Variable` and `Tensor` actually convert implicitly between each other. You
 /// can thus call functions defined on `Tensor`s also with `Variable`s. For
 /// this, the `Variable` class allows implicit construction from `Tensor`. It is
 /// the responsibility of calling code to ensure that this constructor is
 /// invoked only when the `Tensor` contains autograd metadata. Most notably, it
 /// is *not* correct to construct a brand new `Variable` from a `Tensor` using
 /// this constructor. To do so, you must use the `make_variable` free function
 /// instead. To create a view variable, use `make_variable_view`.
 ///~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 struct TORCH_API Variable : public at::Tensor {
   /// Default constructor.
   Variable() = default;

   // Factory Functions
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   // TODO: These factory functions don't need to be friends anymore. Move them out of
   // the Variable class.

   /// Creates a `Variable` that is a *view* of another (*base*) variable.
   /// The `gradient_edge` is an optional (gradient_function, input_number) pair.
   /// `is_differentiable` is a bool that specifies whether this view is
   /// differentiable, i.e., whether the relation should be tracked by autograd.
   /// See NOTE [ Autograd View Variables ] for details.
   friend Variable make_variable_view(
       Variable base,
       at::Tensor data,
       bool is_differentiable,
       bool allow_tensor_metadata_change,
       Edge gradient_edge);

   /// Creates a `Variable` from the given `Tensor`, copying its underlying `TensorImpl`.
   /// `requires_grad` should be
   /// set only for leaves, and determines whether the `Variable` will accumulate
   /// gradients. NOTE: `data` must *not* be a `Variable` already. Its dynamic
   /// type *must* be `Tensor`.
   friend Variable make_variable(
       at::Tensor data,
       bool requires_grad,
       bool allow_tensor_metadata_change);

   /// Creates a `Variable` from the given `Tensor`, consuming its underlying `TensorImpl`.
   /// This is intended to be used from functions that immediately create a `Tensor`,
   /// convert it to a `Variable`, and then free it; it has been found to
   /// decrease the overhead of those operations, in some situations.
   /// The comments about `requires_grad` and `data` on the above version also apply to this one.
   friend Variable make_variable_consuming(
       at::Tensor data,
       bool requires_grad,
       bool allow_tensor_metadata_change);

   /// Creates a `Variable` from the given `Tensor`, copying its underlying `TensorImpl`.
   /// `gradient_edge` should be a (function, input_nr) pair specifying the function
   /// in the autograd graph, and what particular input of that function, this
   /// variable is connected to.
   friend Variable make_variable(
       at::Tensor data,
       Edge gradient_edge,
       bool allow_tensor_metadata_change);

   // Tensor Conversions
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   // "Downcasts" a `Tensor` into a `Variable`. Only call this on tensors you
   // know are Variables.
   /*implicit*/ Variable(at::Tensor const& rhs) : at::Tensor(rhs) {
     TORCH_CHECK(
         is_variable() || !defined(),
         "Tensor that was converted to Variable was not actually a Variable");
   }

   /*implicit*/ Variable(at::Tensor&& rhs)
       : at::Tensor(std::move(rhs)) {
     TORCH_CHECK(
         is_variable() || !defined(),
         "Tensor that was converted to Variable was not actually a Variable");
   }

   // NOTE: Assignment operators to Tensor come for free from the constructors.

   /// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended
   /// to be used from functions that need to access the `Variable`'s equivalent `Tensor`
   /// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`).
   ///
   /// One notable difference with the legacy `.data()` function is that changes to the
   /// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset)
   /// will not update the original `Variable`, due to the fact that this function
   /// shallow-copies the `Variable`'s underlying TensorImpl.
   at::Tensor tensor_data() const noexcept;

   /// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data`
   /// in Python, which create a new `Variable` that shares the same storage and
   /// tensor metadata with the original `Variable`, but with a completely new
   /// autograd history.
   ///
   /// NOTE: If we change the tensor metadata (e.g. sizes / strides /
   /// storage / storage_offset) of a variable created from `var.variable_data()`, those
   /// changes will not update the original variable `var`. In `.variable_data()`, we set
   /// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal,
   /// in order to prevent users from changing metadata of `var.variable_data()`
   /// and expecting the original variable `var` to also be updated.
   at::Tensor variable_data() const noexcept;

   // Gradient Function and Edges
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   /// Gets the gradient function of the `Variable`. If this is a leaf variable,
   /// the pointer returned will be null.
   ///
   /// For View Variables:
   /// Gets the up-to-date grad_fn. If the shared data or base was modified, we
   /// re-create the grad_fn to express the up-to-date view relationship between
   /// this and the base Variable.
   const std::shared_ptr<Function>& grad_fn() const;

   /// Gets the raw gradient function pointer, whatever it currently is.
   Function* grad_fn_unsafe() const;

   /// Set the gradient accumulator of the `Variable`. This is only applicable to
   /// leaf variables. Interior variables should call `set_gradient_edge()`.
   void set_grad_accumulator(std::weak_ptr<Function> grad_accumulator);

   /// Attempts to get a pointer to the gradient accumulator of the `Variable`,
   /// if it still exists. If the gradient accumulator function has been
   /// destroyed, returns a `nullptr`.
   std::shared_ptr<Function> try_get_grad_accumulator() const;

   /// Gets the gradient accumulator of the `Variable` if it has one, or else
   /// create one on the fly and return it.
   std::shared_ptr<Function> grad_accumulator() const;

   /// Returns the "canonical" gradient edge of this `Variable`, i.e. either the
   /// gradient function if this is an interior `Variable`, or the gradient
   /// accumulator otherwise. If the `Variable` is interior, the returned `Edge`
   /// will store the input index of the `Function` to which this variable is
   /// connected in its `input_nr` field. For leaves, the `input_nr` is always
   /// zero. Note that `set_gradient_edge` and `gradient_edge` are not
   /// symmetric. You must use `set_gradient_edge` to set the `grad_fn` and
   /// `set_grad_accumulator` to set the accumulator.
   Edge gradient_edge() const {
     // If grad_fn is null (as is the case for a leaf node), we instead
     // interpret the gradient function to be a gradient accumulator, which will
     // accumulate its inputs into the grad property of the variable. These
     // nodes get suppressed in some situations, see "suppress gradient
     // accumulation" below. Note that only variables which have `requires_grad =
     // True` can have gradient accumulators.
     if (const auto& gradient = grad_fn()) {
       return Edge(gradient, output_nr());
     } else {
       return Edge(grad_accumulator(), 0);
     }
   }

   /// Returns a copy of this `Variable` that is detached from its autograd graph
   /// and has a blank version. This method is OK to call if the `Variable` is a
   /// view.
   /// NOTE: Previously, if we change the tensor metadata (e.g. sizes / strides /
   /// storage / storage_offset) of a tensor created from `detach()`, those metadata
   /// in the original tensor will also be updated. However, the new behavior is that
   /// those metadata changes to the detached tensor will not update the original tensor
   /// anymore, and in the `detach()` function we need to set `allow_tensor_metadata_change_`
   /// to false to make such changes explicitly illegal, in order to prevent users from
   /// changing metadata of the detached tensor and expecting the original tensor to also
   /// be updated.
   Variable detach() const;

   /// Like `detach()`, but removes this `Variable` in-place. This method may
   /// only be called on non-view `Variable`s. You can use `is_view()` to check
   /// this. If this `Variable` is a view, throws an `std::runtime_error()`.
   void detach_();

   /// Computes the gradient of current tensor w.r.t. graph leaves.
   void backward(
       c10::optional<Tensor> gradient,
       bool keep_graph,
       bool create_graph) const;

   /// Sets the tensor data held by this `Variable` to be the same as `new_data`.
   /// It requires that `new_data` has the same derived type of TensorImpl as
   /// this `Variable`, by checking `_has_same_tensorimpl_type(this, new_data)`.
   void set_data(const at::Tensor &new_data);

   /// Set the gradient edge -- i.e. `grad_fn` and `input_nr` -- of the
   /// `Variable`.
   /// NOTE: This will always set the `grad_fn`, even if this is a leaf variable,
   /// and never the `grad_accumulator`. For the latter, use
   /// `set_grad_accumulator`. This allows late construction of an interior
   /// `Variable`.
   void set_gradient_edge(Edge edge) noexcept;

   /// Returns the input index of the gradient `Function` to which this
   /// `Variable` is connected.  Note: input indexes of the gradient `Function`
   /// correspond to output indexes of the corresponding forward `Function`.
   uint32_t output_nr() const noexcept;

   /// True if this `Variable` is a leaf and thus does not have a `grad_fn`.
   bool is_leaf() const noexcept;

   // Versions
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   /// Increments the version count of this `Variable`.
   void bump_version() noexcept;
   void set_version_counter(const c10::VariableVersion& version_counter) noexcept;

   /// Retrieves this `Variable`s version counter.
   const c10::VariableVersion& version_counter() const noexcept;

   /// Retrieves the current value of the `Variable`'s version counter.
   /// Equivalent to calling `version_counter().current_version()`.
   uint32_t current_version() const noexcept;

   // Autograd Graph Interaction
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   /// Update the `grad_fn` of an existing Variable. Called after in-place
   /// modifications.
   ///
   /// For View Variables:
   /// Called after in-place modifications. Modifies the grad_fn of the base
   /// Variable.
   void rebase_history(Edge gradient_edge);

   // Hooks
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   void add_hook(std::shared_ptr<FunctionPreHook> hook);
   const std::vector<std::shared_ptr<FunctionPreHook>>& hooks() const noexcept;
   void clear_hooks();

   // View Variables
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   /// Returns true if this `Variable` is a view of another `Variable`.
   bool is_view() const noexcept;

   /// Returns the `Variable` that this `Variable` is a view of. If this
   /// `Variable` is not a view, throw a `std::runtime_error`.
   const Variable& base() const;

   // Miscellaneous
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   void set_name(const std::string& name);
   const std::string& name() const noexcept;

   PyObject* pyobj() const noexcept;
   void set_pyobj(PyObject* pyobj) noexcept;

   struct AutogradMeta;
   Variable::AutogradMeta* get_autograd_meta() const noexcept;

  private:
   struct DifferentiableViewMeta;

   // Private Methods
   //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

   Variable(c10::intrusive_ptr<at::TensorImpl> self);
   at::TensorImpl* get() const;
 };

 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 //                            Variable::AutogradMeta
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 /// Each `Variable` has one unique `AutogradMeta` struct, which stores autograd
 /// metadata fields that are necessary for tracking the Variable's autograd history.

 struct TORCH_API Variable::AutogradMeta : public c10::AutogradMetaInterface {
   std::string name;

   Variable grad_;
   std::shared_ptr<Function> grad_fn_;
   std::weak_ptr<Function> grad_accumulator_;

   std::vector<std::shared_ptr<FunctionPreHook>> hooks_;

   // Only meaningful on leaf variables (must be false otherwise)
   bool requires_grad_;

   bool is_view_;

   // The "output number" of this variable; e.g., if this variable
   // was the second output of a function, then output_nr == 1.
   // We use this to make sure we can setup the backwards trace
   // correctly when this variable is passed to another function.
   uint32_t output_nr_;

   // Mutex to ensure that concurrent read operations that modify internal
   // state are still thread-safe. Used by grad_fn() and
   // grad_accumulator().
   std::mutex mutex_;

   /// Sets the `requires_grad` property of `Variable`. This should be true for
   /// leaf variables that want to accumulate gradients, and false for all other
   /// variables.
   void set_requires_grad(bool requires_grad, at::TensorImpl* self_impl) override {
     TORCH_CHECK(
       !requires_grad || at::isFloatingType(at::typeMetaToScalarType(self_impl->dtype())),
       "Only Tensors of floating point dtype can require gradients");
     requires_grad_ = requires_grad;
   }

   bool requires_grad() const override {
     return requires_grad_ || grad_fn_;
   }

   /// Accesses the gradient `Variable` of this `Variable`.
   Variable& grad() override {
     return grad_;
   }

   const Variable& grad() const override {
     return grad_;
   }

   AutogradMeta(
     at::TensorImpl* self_impl,
     bool requires_grad = false,
     Edge gradient_edge = Edge());
 };

 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 //                     Variable::DifferentiableViewMeta
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 /// NOTE [ Autograd View Variables ]
 ///
 /// Many operations return Variable that shares storage with an input Variable.
 /// The returned Variable is called a **view** Variable on the input **base**
 /// Variable.
 ///
 /// In PyTorch, we have two types of views: differentiable views, and
 /// non-differentiable views. In either type, to support proper version
 /// checking, the base and view Variables must always share the same
 /// version_counter.
 ///
 ///
 /// Differentiable Views
 /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// Differentiable views are the view variables where you want gradients to flow
 /// back to the base variables. Out-of-place operations on views are quite
 /// straightforward, but in-place ones are very tricky. Even if the base
 /// variable may not require grad when we create the view, we still need to
 /// track the view relation because future in-place ops may require back-proping
 /// through it. For example, we need to support
 ///
 ///   (1) in-place operation on view, e.g.,
 ///
 ///     # Have:
 ///     #   base.requires_grad = False
 ///     #   var.requires_grad = True
 ///     base[1] = var  # i.e., base[1].copy_(var)
 ///     torch.autograd.grad(base.sum(), var)  <- should return an all ones tensor
 ///
 ///   (2) in-place operation on base after view is created, e.g.,
 ///
 ///     # Have:
 ///     #   base.requires_grad = False
 ///     #   var.requires_grad = True
 ///     view = base[1]
 ///     base.copy_(var)
 ///     torch.autograd.grad(view.sum(), var)  <- should return a tensor with
 ///                                              var[1] filled with all ones and
 ///                                              zeros everywhere else
 ///
 /// Variable::DifferentiableViewMeta is created to support gradient tracking of
 /// such **in-place** operations. In particular,
 ///   + if an in-place op is done on base, the grad_fn field of the view may
 ///     become stale. So accesses should always go through grad_fn(), which
 ///     reconstructs an updated grad_fn if the version_counter has incremented.
 ///     All other fields are always valid.
 ///   + if an in-place op is done on view, in rebase_history() of view, which is
 ///     called after every in-place op in VariableType.cpp, the grad_fn of base
 ///     is updated.
 ///
 ///
 /// Non-Differentiable Views
 /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 /// In certain cases, although function outputs share storage with inputs, they
 /// will **never** require gradient history tracking. Instead of registering the
 /// view relation via DifferentiableViewMeta in autograd, the views will be
 /// using usual AutogradMeta and just share the version counters with the base
 /// Variables.
 /// Such views include:
 ///   1. Views created from .detach()
 ///   2. Views that are non-differentiable by its nature.
 ///      E.g., `sparse_tensor.indices()` is a integral view on a (possibly)
 ///      floating point tensor.
 ///      See top of `derivatives.yaml` on how to specify that outputs of a
 ///      function are non-differentiable.
 /// These are called non-differentiable views as the gradients do not flow
 /// through the view relation.
 /// Relevant logic for non-differentiable views is implemented in
 /// make_variable_view below, and wrap_output of gen_variable_type.py.
 struct TORCH_API Variable::DifferentiableViewMeta : public Variable::AutogradMeta {
   /// The base `Variable` (never a view).
   Variable base_;

   /// The value of the version_counter at the time grad_fn was created. The
   /// grad_fn field is stale if attr_version !=
   /// version_counter.current_version().
   uint32_t attr_version;

   bool requires_grad() const override {
     return requires_grad_ || grad_fn_ || (is_view_ && base_.requires_grad());
   }

   DifferentiableViewMeta(at::TensorImpl* self_impl, Variable base, Edge gradient_edge);
   ~DifferentiableViewMeta();
 };

 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 //                        Variable Implementation
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 // Factory Functions
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 /// NOTE: `allow_tensor_metadata_change` is set to true by default, because there
 /// are a lot of call sites to these factory functions that need to change the
 /// variable's size or storage afterwards, and they don't expect the original
 /// tensor (where the variable is created from) to be updated. Setting
 /// `allow_tensor_metadata_change_` to false by default would unnecessarily
 /// prevent those changes from happening and is undesirable.

 // See NOTE [ Autograd View Variables ] for details.
 inline Variable make_variable_view(
     Variable base,
     at::Tensor data,
     bool is_differentiable = true,
     bool allow_tensor_metadata_change = true,
     Edge gradient_edge = Edge()) {
   if (data.defined()) {
     if (is_differentiable) {
       /// Differentiable view. Track history with DifferentiableViewMeta.
       auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
         /*version_counter=*/0,
         /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
       data_impl_copy->set_autograd_meta(c10::guts::make_unique<Variable::DifferentiableViewMeta>(
         data_impl_copy.get(), std::move(base), std::move(gradient_edge)));
       return Variable(data_impl_copy);
     } else {
       /// Non-differentiable view. Just share version counter.
       auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
         /*version_counter=*/base.version_counter(),
         /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
       data_impl_copy->set_autograd_meta(c10::guts::make_unique<Variable::AutogradMeta>(
         data_impl_copy.get(), false, std::move(gradient_edge)));
       return Variable(data_impl_copy);
     }
   }
   return Variable();
 }

 inline Variable make_variable(
     at::Tensor data,
     bool requires_grad = false,
     bool allow_tensor_metadata_change = true) {
   TORCH_CHECK(
       !data.is_variable(),
       "Must not create a new variable from a variable, use its .tensor_data()");
   if (data.defined()) {
     auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
       /*version_counter=*/0,
       /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
     data_impl_copy->set_autograd_meta(c10::guts::make_unique<Variable::AutogradMeta>(
       data_impl_copy.get(), requires_grad));
     return Variable(data_impl_copy);
   }
   return Variable();
 }

 inline Variable make_variable_consuming(
     at::Tensor data,
     bool requires_grad = false,
     bool allow_tensor_metadata_change = true) {
   TORCH_CHECK(
       !data.is_variable(),
       "Must not create a new variable from a variable, use its .tensor_data()");
   if (data.defined()) {
     AT_ASSERT(data.getIntrusivePtr().use_count() == 1);
     auto data_impl = data.getIntrusivePtr();
     data_impl->set_allow_tensor_metadata_change(allow_tensor_metadata_change);
     data_impl->set_autograd_meta(c10::guts::make_unique<Variable::AutogradMeta>(data_impl.get(), requires_grad));
     return Variable(std::move(data_impl));
   }
   return Variable();
 }

 inline Variable make_variable(
     at::Tensor data,
     Edge gradient_edge,
     bool allow_tensor_metadata_change = true) {
   TORCH_CHECK(
       !data.is_variable(),
       "Must not create a new variable from a variable, use its .tensor_data()");
   if (data.defined()) {
     auto data_impl_copy = data.getIntrusivePtr()->shallow_copy_and_detach(
       /*version_counter=*/0,
       /*allow_tensor_metadata_change=*/allow_tensor_metadata_change);
     data_impl_copy->set_autograd_meta(c10::guts::make_unique<Variable::AutogradMeta>(
       data_impl_copy.get(), false, std::move(gradient_edge)));
     return Variable(data_impl_copy);
   }
   return Variable();
 }

 // Tensor Conversion
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 /// Downcasts the `Tensor` reference to a `Variable` reference. If compiling
 /// in DEBUG mode and the tensor's dynamic type is not in fact `Variable`,
 /// throws a `std::invalid_argument` exception.
 inline Variable& as_variable_ref(at::Tensor& tensor) {
   TORCH_CHECK(
       tensor.is_variable(),
       "Attempted to cast a Tensor to a Variable, but "
       "the dynamic type of the value is not Variable.");
   return static_cast<Variable&>(tensor);
 }

 inline const Variable& as_variable_ref(const at::Tensor& tensor) {
   TORCH_CHECK(
       tensor.is_variable(),
       "Attempted to cast a Tensor to a Variable, but "
       "the dynamic type of the value is not Variable.");
   return static_cast<const Variable&>(tensor);
 }

 inline at::Tensor Variable::tensor_data() const noexcept {
   auto self_impl_copy = get()->shallow_copy_and_detach(
     /*version_counter=*/get()->version_counter(),
     /*allow_tensor_metadata_change=*/get()->allow_tensor_metadata_change());
   return at::Tensor(self_impl_copy);
 }

 inline at::Tensor Variable::variable_data() const noexcept {
   auto self_impl_copy = get()->shallow_copy_and_detach(
     /*version_counter=*/0,
     /*allow_tensor_metadata_change=*/false);
   self_impl_copy->set_autograd_meta(c10::guts::make_unique<Variable::AutogradMeta>(self_impl_copy.get(), false));
   return at::Tensor(self_impl_copy);
 }

 // Gradient Function and Edges
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 inline Function* Variable::grad_fn_unsafe() const {
   return get_autograd_meta()->grad_fn_.get();
 }

 inline void Variable::set_grad_accumulator(
     std::weak_ptr<Function> grad_accumulator) {
   get_autograd_meta()->grad_accumulator_ = std::move(grad_accumulator);
 }

 inline std::shared_ptr<Function> Variable::try_get_grad_accumulator() const {
   return get_autograd_meta()->grad_accumulator_.lock();
 }

 inline Variable Variable::detach() const {
   auto var = make_variable_view(*this, *this, /*is_differentiable=*/false, /*allow_tensor_metadata_change=*/false, Edge());
   return var;
 }

 inline void Variable::set_gradient_edge(Edge edge) noexcept {
   get_autograd_meta()->grad_fn_ = std::move(edge.function);
   get_autograd_meta()->output_nr_ = edge.input_nr;
 }

 inline uint32_t Variable::output_nr() const noexcept {
   return get_autograd_meta()->output_nr_;
 }

 inline bool Variable::is_leaf() const noexcept {
   return get_autograd_meta()->grad_fn_ == nullptr;
 }

 // Versions
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 inline void Variable::set_version_counter(
     const c10::VariableVersion& version_counter) noexcept {
   unsafeGetTensorImpl()->set_version_counter(version_counter);
 }

 inline void Variable::bump_version() noexcept {
   unsafeGetTensorImpl()->bump_version();
 }

 inline uint32_t Variable::current_version() const noexcept {
   return unsafeGetTensorImpl()->version_counter().current_version();
 }

 inline const c10::VariableVersion& Variable::version_counter() const noexcept {
   return unsafeGetTensorImpl()->version_counter();
 }

 // Hooks
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 inline void Variable::add_hook(std::shared_ptr<FunctionPreHook> hook) {
   get_autograd_meta()->hooks_.push_back(std::move(hook));
 }

 inline const std::vector<std::shared_ptr<FunctionPreHook>>& Variable::hooks()
     const noexcept {
   return get_autograd_meta()->hooks_;
 }

 inline void Variable::clear_hooks() {
   get_autograd_meta()->hooks_.clear();
 }

 // View Variables
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 inline bool Variable::is_view() const noexcept {
   return get_autograd_meta()->is_view_;
 }

 inline const Variable& Variable::base() const {
   if (is_view()) {
     auto diff_view_meta = static_cast<Variable::DifferentiableViewMeta*>(get_autograd_meta());
     return diff_view_meta->base_;
   } else {
     throw std::runtime_error("Can't get base of non-view Variable");
   }
 }

 // Miscellaneous
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 inline void Variable::set_name(const std::string& name) {
   get_autograd_meta()->name = name;
 }

 inline const std::string& Variable::name() const noexcept {
   return get_autograd_meta()->name;
 }

 inline void Variable::set_pyobj(PyObject* pyobj) noexcept {
   get()->set_pyobj(pyobj);
 }

 inline PyObject* Variable::pyobj() const noexcept {
   return get()->pyobj();
 }

 inline Variable::AutogradMeta* Variable::get_autograd_meta() const noexcept {
   return static_cast<Variable::AutogradMeta*>(get()->autograd_meta());
 }

 // Private Methods
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

 inline Variable::Variable(c10::intrusive_ptr<at::TensorImpl> self)
     : at::Tensor(std::move(self)) {}

 inline at::TensorImpl* Variable::get() const {
   TORCH_CHECK(defined(), "Called Variable::get() on an undefined Variable");
   return unsafeGetTensorImpl();
 }
 }} // namespace torch::autograd