Rename DefaultBackend to CompositeExplicitAutograd (#54470) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/54470 ``` git grep -l 'DefaultBackend' | xargs sed -i 's/DefaultBackend/CompositeExplicitAutograd/g' ``` Plus a quick fixup in native/README.md Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Reviewed By: bdhirsh Differential Revision: D27253240 Pulled By: ezyang fbshipit-source-id: 964df951ea8b52fa72937f3cc66aeaf49a702e6f

commit: 13b1ca9466bdc2d5caed78c3b919c270f56ad92e [log] [tgz]
author: Edward Yang <ezyang@fb.com> Fri Mar 26 10:50:06 2021 -0700
committer: Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> Fri Mar 26 10:53:30 2021 -0700
tree: fc5a83c3bc3eb5b6ab48f49b6224b7def44bbb4b
parent: 70dd2a2bdd56ab58d95558223f05d768eb2e9249 [diff]
diff --git a/BUILD.bazel b/BUILD.bazel
index 00e726a..8f4c26d 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel

@@ -132,7 +132,7 @@
         "aten/src/ATen/RegisterSparseCPU.cpp",
         "aten/src/ATen/RegisterCompositeImplicitAutograd.cpp",
         "aten/src/ATen/RegisterMeta.cpp",
-        "aten/src/ATen/RegisterDefaultBackend.cpp",
+        "aten/src/ATen/RegisterCompositeExplicitAutograd.cpp",
         "aten/src/ATen/RegisterSchema.cpp",
         "aten/src/ATen/CPUFunctions.h",
         "aten/src/ATen/CUDAFunctions.h",

diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.cpp b/aten/src/ATen/core/dispatch/OperatorEntry.cpp
index a136009..bdc315e 100644
--- a/aten/src/ATen/core/dispatch/OperatorEntry.cpp
+++ b/aten/src/ATen/core/dispatch/OperatorEntry.cpp

@@ -183,16 +183,16 @@
   // For any dispatch key, it'll pick a kernel using the following order:
   //  (1) Use kernel if it's directly registered to this key
   //  (2) Handle runtime keys that have kernels available from alias keys
-  //    (2.1) Use kernel from DispatchKey::DefaultBackend if available.
+  //    (2.1) Use kernel from DispatchKey::CompositeExplicitAutograd if available.
   //          This is used to register a kernel that works for all backend in inference. But it requires
   //          separate registration for Autograd keys to support training.
   //    (2.2) Use kernel from DispatchKey::CompositeImplicitAutograd if available.
   //          For autograd keys, we only use kernel from CompositeImplicitAutograd when there's no direct registration
-  //          to its corresponding backend key or DefaultBackend. See Note [DefaultBackend and CompositeImplicitAutograd].
+  //          to its corresponding backend key or CompositeExplicitAutograd. See Note [CompositeExplicitAutograd and CompositeImplicitAutograd].
   //          For AutogradOther, we eagerly return ambiguousAutogradOtherKernel_ if there's registration to any of
   //          its backends and ask backend extender to request a decicated Autograd key for the backend.
   //          See Note [Ambiguity in AutogradOther kernel] for more details.
-  //          A DefaultBackend kernel prevents CompositeImplicitAutograd kernel being used for Autograd keys, but it doesn't
+  //          A CompositeExplicitAutograd kernel prevents CompositeImplicitAutograd kernel being used for Autograd keys, but it doesn't
   //          cause confusion for AutogradOther. It's pretty straightforward to use Autograd (if available)
   //          in this case.
   //    (2.3) Use kernel from DispatchKey::Autograd if available
@@ -201,11 +201,11 @@
   //    backend key. See Note [Refresh Runtime Autograd entries in dispatchTable_]
   //  (3) Use fallthrough kernel that are registered as fallback.
   // Alias Key Precedence:
-  //   DefaultBackend > CompositeImplicitAutograd > Autograd
-  // Note [DefaultBackend and CompositeImplicitAutograd]
-  //   When there're registrations to both DefaultBackend & CompositeImplicitAutograd & Autograd, from (2.2) we know DefaultBackend
+  //   CompositeExplicitAutograd > CompositeImplicitAutograd > Autograd
+  // Note [CompositeExplicitAutograd and CompositeImplicitAutograd]
+  //   When there're registrations to both CompositeExplicitAutograd & CompositeImplicitAutograd & Autograd, from (2.2) we know CompositeExplicitAutograd
   //   and Autograd kernels will be picked up and CompositeImplicitAutograd is overriden.
-  //   This is fine and in practice DefaultBackend and CompositeImplicitAutograd shouldn't co-exist for an op.
+  //   This is fine and in practice CompositeExplicitAutograd and CompositeImplicitAutograd shouldn't co-exist for an op.
   // TODO: Update alias key precedence after we add new alias keys AutogradDispatchCPUOrCUDA .
 
   // 1. Operator registration
@@ -213,21 +213,21 @@
     return {*direct_registration.value(), "kernel"};
   }
 
-  // 2.1 Use DefaultBackend kernel if available.
+  // 2.1 Use CompositeExplicitAutograd kernel if available.
   //     See Note [Undefined in dispatchTable_] for the special handling for Undefined.
-  if (dispatch_key == DispatchKey::Undefined || isIncludedInAlias(dispatch_key, DispatchKey::DefaultBackend)) {
-    if (auto default_backend_registration = getKernelForDispatchKey(DispatchKey::DefaultBackend)) {
+  if (dispatch_key == DispatchKey::Undefined || isIncludedInAlias(dispatch_key, DispatchKey::CompositeExplicitAutograd)) {
+    if (auto default_backend_registration = getKernelForDispatchKey(DispatchKey::CompositeExplicitAutograd)) {
       return {*default_backend_registration.value(), "default backend kernel"};
     }
   }
 
-  // Note when there's direct registration to DefaultBackend, this code path will only be hit by
+  // Note when there's direct registration to CompositeExplicitAutograd, this code path will only be hit by
   // non backend keys (e.g AutogradXXX, Batched etc) due to (2.1).
   bool has_backend_kernel =
-    hasKernelForAnyDispatchKey(getBackendKeySetFromAutograd(dispatch_key).add(DispatchKey::DefaultBackend));
+    hasKernelForAnyDispatchKey(getBackendKeySetFromAutograd(dispatch_key).add(DispatchKey::CompositeExplicitAutograd));
 
   // 2.2. Use CompositeImplicitAutograd kernel if available. For autograd keys, we only use kernel from CompositeImplicitAutograd
-  //      when there's no direct registration to its corresponding backend key or DefaultBackend.
+  //      when there's no direct registration to its corresponding backend key or CompositeExplicitAutograd.
   //      For AutogradOther, we return ambiguousAutogradOtherKernel_ if there's registration
   //      to any of its backends.
   //      See Note [Undefined in dispatchTable_] for the special handling for Undefined.
@@ -286,9 +286,9 @@
   for (auto k : c10::getRuntimeDispatchKeySet(dispatch_key)) {
     updateDispatchTableEntry_(dispatcher, k);
   }
-  // Registration to DefaultBackend and CompositeImplicitAutograd should be populated to Undefined.
+  // Registration to CompositeExplicitAutograd and CompositeImplicitAutograd should be populated to Undefined.
   // We cannot do this above since Undefined cannot be represented in DispatchKeySet.
-  if (dispatch_key == DispatchKey::CompositeImplicitAutograd || dispatch_key == DispatchKey::DefaultBackend) {
+  if (dispatch_key == DispatchKey::CompositeImplicitAutograd || dispatch_key == DispatchKey::CompositeExplicitAutograd) {
     updateDispatchTableEntry_(dispatcher, DispatchKey::Undefined);
   }
   // Note [Refresh Runtime Autograd entries in dispatchTable_]
@@ -318,7 +318,7 @@
   //     no dispatch keys are available we just slide into the undefined handler which would then raise
   //     the error message.
   // In the old world of catchAll, the only way to "register" a kernel to Undefined is by registering it to
-  // catchAll. After catchAllKernel_ is removed, Undefined now can get a kernel from either DefaultBackend
+  // catchAll. After catchAllKernel_ is removed, Undefined now can get a kernel from either CompositeExplicitAutograd
   // or CompositeImplicitAutograd alias key so that we don't break the support. Ideally isIncludedInAlias(Undefined, CompositeImplicitAutograd)
   // should return true, it returns false because Undefined cannot be represented in a DispatchKeySet.
   for (uint8_t iter = 0; iter != static_cast<uint8_t>(DispatchKey::NumDispatchKeys); ++iter) {

diff --git a/aten/src/ATen/core/op_registration/op_registration_test.cpp b/aten/src/ATen/core/op_registration/op_registration_test.cpp
index bb296a3..2cc3d7e 100644
--- a/aten/src/ATen/core/op_registration/op_registration_test.cpp
+++ b/aten/src/ATen/core/op_registration/op_registration_test.cpp

@@ -1502,10 +1502,10 @@
   }
 }
 
-TEST(NewOperatorRegistrationTest, dispatchWithDefaultBackendKernel) {
+TEST(NewOperatorRegistrationTest, dispatchWithCompositeExplicitAutogradKernel) {
   bool called = false;
   auto m = MAKE_TORCH_LIBRARY(test);
-  m.def("fn", torch::dispatch(c10::DispatchKey::DefaultBackend, [&](const Tensor& x) { called = true; return x; }));
+  m.def("fn", torch::dispatch(c10::DispatchKey::CompositeExplicitAutograd, [&](const Tensor& x) { called = true; return x; }));
 
   auto op = Dispatcher::singleton().findSchema({"test::fn", ""});
   ASSERT_TRUE(op.has_value());
@@ -1550,11 +1550,11 @@
   }
 }
 
-TEST(NewOperatorRegistrationTest, dispatchWithDefaultBackendAndCompositeImplicitAutogradKernel) {
+TEST(NewOperatorRegistrationTest, dispatchWithCompositeExplicitAutogradAndCompositeImplicitAutogradKernel) {
   bool backend_called = false;
   bool math_called = false;
   auto m = MAKE_TORCH_LIBRARY(test);
-  m.def("fn", torch::dispatch(c10::DispatchKey::DefaultBackend, [&](const Tensor& x) { backend_called = true; return x; }));
+  m.def("fn", torch::dispatch(c10::DispatchKey::CompositeExplicitAutograd, [&](const Tensor& x) { backend_called = true; return x; }));
   m.impl("fn", c10::DispatchKey::CompositeImplicitAutograd, [&](const Tensor& x) { math_called = true; return x; });
 
   auto op = Dispatcher::singleton().findSchema({"test::fn", ""});
@@ -1606,11 +1606,11 @@
   }
 }
 
-TEST(NewOperatorRegistrationTest, BackendOverridesDefaultBackendKernel) {
+TEST(NewOperatorRegistrationTest, BackendOverridesCompositeExplicitAutogradKernel) {
   bool default_called = false;
   bool backend_called = false;
   auto m = MAKE_TORCH_LIBRARY(test);
-  m.def("fn", torch::dispatch(c10::DispatchKey::DefaultBackend, [&](const Tensor& x) { default_called = true; return x; }));
+  m.def("fn", torch::dispatch(c10::DispatchKey::CompositeExplicitAutograd, [&](const Tensor& x) { default_called = true; return x; }));
   m.impl("fn", c10::DispatchKey::CPU, [&](const Tensor& x) { backend_called = true; return x; });
 
   auto op = Dispatcher::singleton().findSchema({"test::fn", ""});

diff --git a/aten/src/ATen/native/README.md b/aten/src/ATen/native/README.md
index 3bde885..0fc35b5 100644
--- a/aten/src/ATen/native/README.md
+++ b/aten/src/ATen/native/README.md

@@ -485,7 +485,7 @@
     - Yes, but you still want to provide a numerically stable gradient formula instead of using autograd, write
       ```
       dispatch:
-        DefaultBackend: kernel
+        CompositeExplicitAutograd: kernel
       ```
 
       You're done. This op will be called in inference for all backends.
@@ -505,7 +505,7 @@
       For `sign` and `sign_`, write
       ```
       dispatch:
-        DefaultBackend: kernel
+        CompositeExplicitAutograd: kernel
       ```
 
       You're done. This op will be called in inference for all backends.
@@ -529,8 +529,8 @@
 
 Note that in native_functions.yaml you can mix using backend keywords and alias keywords above for one op:
   - direct registration to backend always has higher precendence than alias
-  - DO NOT provide multiple alias keywords to the same op: alias keywords have precedence `DefaultBackend > CompositeImplicitAutograd`,
-    e.g. adding both `CompositeImplicitAutograd` and `DefaultBackend` kernels for one op will completely ignore `CompositeImplicitAutograd` kernel for
+  - DO NOT provide multiple alias keywords to the same op: alias keywords have precedence `CompositeExplicitAutograd > CompositeImplicitAutograd`,
+    e.g. adding both `CompositeImplicitAutograd` and `CompositeExplicitAutograd` kernels for one op will completely ignore `CompositeImplicitAutograd` kernel for
     both inference and training. Thus this will trigger an error when native_functions.yaml is parsed.
 
 

diff --git a/aten/src/ATen/native/UpSampleNearest3d.cpp b/aten/src/ATen/native/UpSampleNearest3d.cpp
index 2ea356e..df7cffb 100644
--- a/aten/src/ATen/native/UpSampleNearest3d.cpp
+++ b/aten/src/ATen/native/UpSampleNearest3d.cpp

@@ -91,7 +91,7 @@
   return at::upsample_nearest3d(input, osize, scale_d, scale_h, scale_w);
 }
 
-// when structured kernels can handle QuantizedCPU, update these overloads to be DefaultBackend
+// when structured kernels can handle QuantizedCPU, update these overloads to be CompositeExplicitAutograd
 Tensor upsample_nearest3d_backward_cpu(
     const Tensor& grad_output,
     c10::optional<IntArrayRef> output_size,

diff --git a/aten/src/ATen/native/cuda/UpSampleNearest3d.cu b/aten/src/ATen/native/cuda/UpSampleNearest3d.cu
index b08fd57..9330433 100644
--- a/aten/src/ATen/native/cuda/UpSampleNearest3d.cu
+++ b/aten/src/ATen/native/cuda/UpSampleNearest3d.cu

@@ -289,7 +289,7 @@
   return at::upsample_nearest3d(input, osize, scale_d, scale_h, scale_w);
 }
 
-// when structured kernels can handle QuantizedCPU, update these overloads to be DefaultBackend
+// when structured kernels can handle QuantizedCPU, update these overloads to be CompositeExplicitAutograd
 Tensor upsample_nearest3d_backward_cuda(
     const Tensor& grad_output,
     c10::optional<IntArrayRef> output_size,

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
index 3c19908..32b7161 100644
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml

@@ -92,7 +92,7 @@
 - func: _fw_primal(Tensor(a) self, int level) -> Tensor(a)
   variants: method
   dispatch:
-    DefaultBackend: _fw_primal
+    CompositeExplicitAutograd: _fw_primal
 
 - func: _make_dual(Tensor(a) primal, Tensor tangent, int level) -> Tensor(a)
   variants: function
@@ -199,12 +199,12 @@
 - func: abs(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: abs
+    CompositeExplicitAutograd: abs
 
 - func: abs_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: abs_
+    CompositeExplicitAutograd: abs_
 
 - func: abs.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -267,12 +267,12 @@
 - func: sgn(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: sgn
+    CompositeExplicitAutograd: sgn
 
 - func: sgn_(Tensor(a!) self) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: sgn_
+    CompositeExplicitAutograd: sgn_
 
 - func: sgn.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -294,7 +294,7 @@
 - func: _conj(Tensor self) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: _conj
+    CompositeExplicitAutograd: _conj
 
 - func: acos(Tensor self) -> Tensor
   variants: function, method
@@ -304,7 +304,7 @@
 - func: acos_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: acos_
+    CompositeExplicitAutograd: acos_
 
 - func: acos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -368,12 +368,12 @@
 - func: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: add
+    CompositeExplicitAutograd: add
 
 - func: add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: add_
+    CompositeExplicitAutograd: add_
 
 - func: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   variants: function, method
@@ -403,7 +403,7 @@
 - func: addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: addr_
+    CompositeExplicitAutograd: addr_
 
 - func: addr.out(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -413,7 +413,7 @@
 - func: affine_grid_generator(Tensor theta, int[] size, bool align_corners) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: affine_grid_generator
+    CompositeExplicitAutograd: affine_grid_generator
 
 - func: affine_grid_generator_backward(Tensor grad, int[] size, bool align_corners) -> Tensor
   variants: function
@@ -498,7 +498,7 @@
 - func: acosh_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: acosh_
+    CompositeExplicitAutograd: acosh_
 
 - func: acosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -521,7 +521,7 @@
 - func: asinh_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: asinh_
+    CompositeExplicitAutograd: asinh_
 
 - func: asinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -544,7 +544,7 @@
 - func: atanh_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: atanh_
+    CompositeExplicitAutograd: atanh_
 
 - func: atanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -571,7 +571,7 @@
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: as_strided_
+    CompositeExplicitAutograd: as_strided_
 
 - func: asin(Tensor self) -> Tensor
   variants: function, method
@@ -607,7 +607,7 @@
 - func: atan_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: atan_
+    CompositeExplicitAutograd: atan_
 
 - func: atan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -680,7 +680,7 @@
 - func: bernoulli(Tensor self, *, Generator? generator=None) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: bernoulli
+    CompositeExplicitAutograd: bernoulli
 
 - func: bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)
   variants: function
@@ -738,7 +738,7 @@
 - func: binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: binary_cross_entropy_with_logits
+    CompositeExplicitAutograd: binary_cross_entropy_with_logits
 
 - func: binary_cross_entropy_with_logits_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
   variants: function
@@ -863,11 +863,11 @@
 
 - func: cat(Tensor[] tensors, int dim=0) -> Tensor
   dispatch:
-    DefaultBackend: cat
+    CompositeExplicitAutograd: cat
 
 - func: cat.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: cat_out
+    CompositeExplicitAutograd: cat_out
 
 - func: cat.names(Tensor[] tensors, Dimname dim) -> Tensor
 
@@ -879,12 +879,12 @@
 - func: ceil(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: ceil
+    CompositeExplicitAutograd: ceil
 
 - func: ceil_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: ceil_
+    CompositeExplicitAutograd: ceil_
 
 - func: ceil.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -919,7 +919,7 @@
 - func: clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: clamp_
+    CompositeExplicitAutograd: clamp_
 
 - func: clamp.out(Tensor self, Scalar? min=None, Scalar? max=None, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -928,12 +928,12 @@
 - func: clamp_max(Tensor self, Scalar max) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: clamp_max
+    CompositeExplicitAutograd: clamp_max
 
 - func: clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: clamp_max_
+    CompositeExplicitAutograd: clamp_max_
 
 - func: clamp_max.out(Tensor self, Scalar max, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -942,12 +942,12 @@
 - func: clamp_min(Tensor self, Scalar min) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: clamp_min
+    CompositeExplicitAutograd: clamp_min
 
 - func: clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: clamp_min_
+    CompositeExplicitAutograd: clamp_min_
 
 - func: clamp_min.out(Tensor self, Scalar min, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -969,7 +969,7 @@
 - func: complex(Tensor real, Tensor imag) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: complex
+    CompositeExplicitAutograd: complex
 
 - func: complex.out(Tensor real, Tensor imag, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -978,7 +978,7 @@
 - func: polar(Tensor abs, Tensor angle) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: polar
+    CompositeExplicitAutograd: polar
 
 - func: polar.out(Tensor abs, Tensor angle, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -987,7 +987,7 @@
 - func: constant_pad_nd(Tensor self, int[] pad, Scalar value=0) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: constant_pad_nd
+    CompositeExplicitAutograd: constant_pad_nd
 
 - func: contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a)
   variants: method
@@ -997,11 +997,11 @@
 
 - func: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups) -> Tensor
   dispatch:
-    DefaultBackend: convolution_overrideable
+    CompositeExplicitAutograd: convolution_overrideable
 
 - func: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
   dispatch:
-    DefaultBackend: convolution_backward_overrideable
+    CompositeExplicitAutograd: convolution_backward_overrideable
 
 - func: _convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
 
@@ -1031,7 +1031,7 @@
 
 - func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
   dispatch:
-    DefaultBackend: conv_tbc
+    CompositeExplicitAutograd: conv_tbc
 
 - func: conv_tbc_backward(Tensor self, Tensor input, Tensor weight, Tensor bias, int pad) -> (Tensor, Tensor, Tensor)
 
@@ -1046,7 +1046,7 @@
   variants: method
   device_guard: False
   dispatch:
-    DefaultBackend: copy_
+    CompositeExplicitAutograd: copy_
 
 - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
   dispatch: {}
@@ -1059,7 +1059,7 @@
 - func: cos_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: cos_
+    CompositeExplicitAutograd: cos_
 
 - func: cos.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1073,7 +1073,7 @@
 - func: cosh_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: cosh_
+    CompositeExplicitAutograd: cosh_
 
 - func: cosh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1089,7 +1089,7 @@
 - func: count_nonzero(Tensor self, int? dim=None) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: count_nonzero
+    CompositeExplicitAutograd: count_nonzero
 
 - func: cudnn_affine_grid_generator(Tensor theta, int N, int C, int H, int W) -> Tensor grid
   dispatch:
@@ -1179,11 +1179,11 @@
 - func: cummax(Tensor self, int dim) -> (Tensor values, Tensor indices)
   variants: function, method
   dispatch:
-    DefaultBackend: cummax
+    CompositeExplicitAutograd: cummax
 
 - func: cummax.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
-    DefaultBackend: cummax_out
+    CompositeExplicitAutograd: cummax_out
 
 - func: cummax.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)
   variants: function, method
@@ -1200,11 +1200,11 @@
 - func: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
   variants: function, method
   dispatch:
-    DefaultBackend: cummin
+    CompositeExplicitAutograd: cummin
 
 - func: cummin.out(Tensor self, int dim, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
-    DefaultBackend: cummin_out
+    CompositeExplicitAutograd: cummin_out
 
 - func: cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices)
   variants: function, method
@@ -1225,16 +1225,16 @@
 - func: cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: cumprod
+    CompositeExplicitAutograd: cumprod
 
 - func: cumprod_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: cumprod_
+    CompositeExplicitAutograd: cumprod_
 
 - func: cumprod.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: cumprod_out
+    CompositeExplicitAutograd: cumprod_out
 
 - func: cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
@@ -1251,16 +1251,16 @@
 - func: cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: cumsum
+    CompositeExplicitAutograd: cumsum
 
 - func: cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: cumsum_
+    CompositeExplicitAutograd: cumsum_
 
 - func: cumsum.out(Tensor self, int dim, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: cumsum_out
+    CompositeExplicitAutograd: cumsum_out
 
 - func: cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor
   variants: function, method
@@ -1294,7 +1294,7 @@
 - func: diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)
   variants: function, method
   dispatch:
-    DefaultBackend: diagonal
+    CompositeExplicitAutograd: diagonal
 
 - func: diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a)
   variants: function, method
@@ -1349,22 +1349,22 @@
 - func: div.Scalar(Tensor self, Scalar other) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: div
+    CompositeExplicitAutograd: div
 
 - func: div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: div_
+    CompositeExplicitAutograd: div_
 
 - func: div.Scalar_mode(Tensor self, Scalar other, *, str rounding_mode) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: div
+    CompositeExplicitAutograd: div
 
 - func: div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str rounding_mode) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: div_
+    CompositeExplicitAutograd: div_
 
 # divide, alias for div
 - func: divide.Tensor(Tensor self, Tensor other) -> Tensor
@@ -1418,7 +1418,7 @@
 
 - func: dot.out(Tensor self, Tensor tensor, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: dot_out
+    CompositeExplicitAutograd: dot_out
 
 - func: vdot(Tensor self, Tensor other) -> Tensor
   variants: function, method
@@ -1428,13 +1428,13 @@
 
 - func: vdot.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: vdot_out
+    CompositeExplicitAutograd: vdot_out
 
 - func: einsum(str equation, Tensor[] tensors) -> Tensor
 
 - func: embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
   dispatch:
-    DefaultBackend: embedding
+    CompositeExplicitAutograd: embedding
 
 - func: embedding_backward(Tensor grad, Tensor indices, int num_weights, int padding_idx, bool scale_grad_by_freq, bool sparse) -> Tensor
 
@@ -1571,7 +1571,7 @@
 - func: erf_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: erf_
+    CompositeExplicitAutograd: erf_
 
 - func: erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1585,7 +1585,7 @@
 - func: erfc_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: erfc_
+    CompositeExplicitAutograd: erfc_
 
 - func: erfc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1599,7 +1599,7 @@
 - func: exp_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: exp_
+    CompositeExplicitAutograd: exp_
 
 - func: exp.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1613,7 +1613,7 @@
 - func: exp2_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: exp2_
+    CompositeExplicitAutograd: exp2_
 
 - func: exp2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1627,7 +1627,7 @@
 - func: expm1_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: expm1_
+    CompositeExplicitAutograd: expm1_
 
 - func: expm1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1637,7 +1637,7 @@
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
   device_guard: False
   dispatch:
-    DefaultBackend: expand
+    CompositeExplicitAutograd: expand
 
 - func: expand_as(Tensor(a) self, Tensor other) -> Tensor(a)
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
@@ -1690,12 +1690,12 @@
 - func: floor(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: floor
+    CompositeExplicitAutograd: floor
 
 - func: floor_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: floor_
+    CompositeExplicitAutograd: floor_
 
 - func: floor.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1727,12 +1727,12 @@
 - func: frac(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: frac
+    CompositeExplicitAutograd: frac
 
 - func: frac_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: frac_
+    CompositeExplicitAutograd: frac_
 
 - func: frac.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -1805,7 +1805,7 @@
 # See NOTE [ grid_sample CPU fallback ]
 - func: _grid_sampler_2d_cpu_fallback(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
   dispatch:
-    DefaultBackend: _grid_sampler_2d_cpu_fallback
+    CompositeExplicitAutograd: _grid_sampler_2d_cpu_fallback
 
 - func: _grid_sampler_2d_cpu_fallback_backward(Tensor grad_output, Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> (Tensor, Tensor)
 
@@ -1919,7 +1919,7 @@
 - func: index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: index_copy_
+    CompositeExplicitAutograd: index_copy_
 
 - func: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
   variants: function, method
@@ -1933,7 +1933,7 @@
 - func: index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: index_put_
+    CompositeExplicitAutograd: index_put_
   # NB: The following functions are declared in aten/src/ATen/templates/TensorBody.h and defined in aten/src/ATen/TensorIndexing.cpp:
   # - Tensor & Tensor::index_put_(ArrayRef<TensorIndex> indices, Tensor const & rhs)
   # - Tensor & Tensor::index_put_(ArrayRef<TensorIndex> indices, Scalar v)
@@ -1954,11 +1954,11 @@
 - func: inverse(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: inverse
+    CompositeExplicitAutograd: inverse
 
 - func: inverse.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: inverse_out
+    CompositeExplicitAutograd: inverse_out
 
 - func: _inverse_helper(Tensor self) -> Tensor
   variants: function
@@ -2008,7 +2008,7 @@
 
 - func: kl_div(Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
   dispatch:
-    DefaultBackend: kl_div
+    CompositeExplicitAutograd: kl_div
 
 - func: kl_div_backward(Tensor grad_output, Tensor self, Tensor target, int reduction=Mean, *, bool log_target=False) -> Tensor
   dispatch:
@@ -2023,7 +2023,7 @@
 - func: kthvalue(Tensor self, int k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
   dispatch:
-    DefaultBackend: kthvalue
+    CompositeExplicitAutograd: kthvalue
 
 - func: kthvalue.values(Tensor self, int k, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
@@ -2051,16 +2051,16 @@
 - func: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: nan_to_num
+    CompositeExplicitAutograd: nan_to_num
 
 - func: nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: nan_to_num_
+    CompositeExplicitAutograd: nan_to_num_
 
 - func: nan_to_num.out(Tensor self, float? nan=None, float? posinf=None, float? neginf=None, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: nan_to_num_out
+    CompositeExplicitAutograd: nan_to_num_out
 
 - func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
   python_module: nn
@@ -2125,7 +2125,7 @@
 - func: log_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: log_
+    CompositeExplicitAutograd: log_
 
 - func: log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2139,7 +2139,7 @@
 - func: log10_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: log10_
+    CompositeExplicitAutograd: log10_
 
 - func: log10.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2170,7 +2170,7 @@
 - func: log2_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: log2_
+    CompositeExplicitAutograd: log2_
 
 - func: log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2183,7 +2183,7 @@
 - func: logaddexp(Tensor self, Tensor other) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: logaddexp
+    CompositeExplicitAutograd: logaddexp
 
 - func: logaddexp2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2192,7 +2192,7 @@
 - func: logaddexp2(Tensor self, Tensor other) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: logaddexp2
+    CompositeExplicitAutograd: logaddexp2
 
 - func: xlogy.Tensor(Tensor self, Tensor other) -> Tensor
   variants: function, method
@@ -2239,7 +2239,7 @@
 - func: logdet(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: logdet
+    CompositeExplicitAutograd: logdet
 
 - func: logspace(Scalar start, Scalar end, int? steps=None, float base=10.0, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -2279,11 +2279,11 @@
 - func: logcumsumexp(Tensor self, int dim) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: logcumsumexp
+    CompositeExplicitAutograd: logcumsumexp
 
 - func: logcumsumexp.out(Tensor self, int dim, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: logcumsumexp_out
+    CompositeExplicitAutograd: logcumsumexp_out
 
 - func: logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor
   variants: function, method
@@ -2293,11 +2293,11 @@
 - func: logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: logsumexp
+    CompositeExplicitAutograd: logsumexp
 
 - func: logsumexp.out(Tensor self, int[1] dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: logsumexp_out
+    CompositeExplicitAutograd: logsumexp_out
 
 - func: logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor
   variants: function, method
@@ -2368,7 +2368,7 @@
 - func: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: amax
+    CompositeExplicitAutograd: amax
 
 - func: amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2440,7 +2440,7 @@
 - func: median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
   dispatch:
-    DefaultBackend: median
+    CompositeExplicitAutograd: median
 
 - func: median.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
@@ -2461,7 +2461,7 @@
 - func: nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
   dispatch:
-    DefaultBackend: nanmedian
+    CompositeExplicitAutograd: nanmedian
 
 - func: nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
@@ -2490,7 +2490,7 @@
 - func: amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: amin
+    CompositeExplicitAutograd: amin
 
 - func: amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2498,7 +2498,7 @@
 
 - func: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, int[] padding, int[] stride, int[] dilation, int groups) -> Tensor
   dispatch:
-    DefaultBackend: mkldnn_convolution
+    CompositeExplicitAutograd: mkldnn_convolution
 
 - func: mkldnn_convolution_backward_input(int[] self_size, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool bias_defined) -> Tensor
 
@@ -2506,7 +2506,7 @@
 
 - func: mkldnn_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, int[] padding, int[] stride, int[] dilation, int groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
   dispatch:
-    DefaultBackend: mkldnn_convolution_backward
+    CompositeExplicitAutograd: mkldnn_convolution_backward
 
 - func: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
   dispatch:
@@ -2610,7 +2610,7 @@
 
 - func: mode.values(Tensor self, int dim=-1, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices)
   dispatch:
-    DefaultBackend: mode_out
+    CompositeExplicitAutograd: mode_out
 
 - func: mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices)
   variants: function, method
@@ -2644,12 +2644,12 @@
 - func: mul.Scalar(Tensor self, Scalar other) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: mul
+    CompositeExplicitAutograd: mul
 
 - func: mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: mul_
+    CompositeExplicitAutograd: mul_
 
 # multiply, alias for mul
 - func: multiply.Tensor(Tensor self, Tensor other) -> Tensor
@@ -2674,24 +2674,24 @@
 
 - func: mv.out(Tensor self, Tensor vec, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: mv_out
+    CompositeExplicitAutograd: mv_out
 
 - func: mvlgamma(Tensor self, int p) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: mvlgamma
+    CompositeExplicitAutograd: mvlgamma
 
 - func: mvlgamma_(Tensor(a!) self, int p) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: mvlgamma_
+    CompositeExplicitAutograd: mvlgamma_
 
 - func: narrow_copy(Tensor self, int dim, int start, int length) -> Tensor
   variants: function, method
   dispatch:
     CPU: narrow_copy_dense_cpu
     SparseCPU, SparseCUDA: narrow_copy_sparse
-    DefaultBackend: narrow_copy_dense
+    CompositeExplicitAutograd: narrow_copy_dense
 
 - func: narrow_copy.out(Tensor self, int dim, int start, int length, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -2764,7 +2764,7 @@
 - func: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, int[2] padding, int[2] stride=1) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: _nnpack_spatial_convolution
+    CompositeExplicitAutograd: _nnpack_spatial_convolution
 
 - func: _nnpack_spatial_convolution_backward(Tensor input, Tensor grad_output, Tensor weight, int[2] padding, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
   variants: function
@@ -2793,7 +2793,7 @@
 
 - func: _euclidean_dist(Tensor x1, Tensor x2) -> Tensor
   dispatch:
-    DefaultBackend: _euclidean_dist
+    CompositeExplicitAutograd: _euclidean_dist
 
 - func: _cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor
   dispatch:
@@ -2819,7 +2819,7 @@
 - func: permute(Tensor(a) self, int[] dims) -> Tensor(a)
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
   dispatch:
-    DefaultBackend: permute
+    CompositeExplicitAutograd: permute
 
 - func: movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a)
   variants: function, method
@@ -2868,30 +2868,30 @@
 - func: rad2deg(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: rad2deg
+    CompositeExplicitAutograd: rad2deg
 
 - func: rad2deg_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: rad2deg_
+    CompositeExplicitAutograd: rad2deg_
 
 - func: rad2deg.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: rad2deg_out
+    CompositeExplicitAutograd: rad2deg_out
 
 - func: deg2rad(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: deg2rad
+    CompositeExplicitAutograd: deg2rad
 
 - func: deg2rad_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: deg2rad_
+    CompositeExplicitAutograd: deg2rad_
 
 - func: deg2rad.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: deg2rad_out
+    CompositeExplicitAutograd: deg2rad_out
 
 - func: scalar_tensor(Scalar s, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -2999,7 +2999,7 @@
 - func: reciprocal_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: reciprocal_
+    CompositeExplicitAutograd: reciprocal_
 
 - func: reciprocal.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3033,7 +3033,7 @@
 - func: repeat(Tensor self, int[] repeats) -> Tensor
   variants: method  # This is method-only to match the previous tensor API. In the future we could make this a function too.
   dispatch:
-    DefaultBackend: repeat
+    CompositeExplicitAutograd: repeat
 
 - func: repeat_interleave.Tensor(Tensor repeats) -> Tensor
   variants: function
@@ -3063,12 +3063,12 @@
 - func: round(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: round
+    CompositeExplicitAutograd: round
 
 - func: round_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: round_
+    CompositeExplicitAutograd: round_
 
 - func: round.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3146,7 +3146,7 @@
 - func: rsqrt_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: rsqrt_
+    CompositeExplicitAutograd: rsqrt_
 
 - func: rsqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3160,7 +3160,7 @@
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: select
+    CompositeExplicitAutograd: select
 
 - func: select_backward(Tensor grad, int[] input_sizes, int dim, int index) -> Tensor
   variants: function
@@ -3172,21 +3172,21 @@
 
 - func: celu(Tensor self, Scalar alpha=1.0) -> Tensor
   dispatch:
-    DefaultBackend: celu
+    CompositeExplicitAutograd: celu
 
 - func: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
   dispatch:
-    DefaultBackend: celu_
+    CompositeExplicitAutograd: celu_
 
 - func: silu(Tensor self) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: silu
+    CompositeExplicitAutograd: silu
 
 - func: silu_(Tensor(a!) self) -> Tensor(a!)
   python_module: nn
   dispatch:
-    DefaultBackend: silu_
+    CompositeExplicitAutograd: silu_
 
 - func: silu.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -3266,7 +3266,7 @@
 - func: sinh_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: sinh_
+    CompositeExplicitAutograd: sinh_
 
 - func: sinh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3286,7 +3286,7 @@
 - func: detach(Tensor(a) self) -> Tensor(a)
   variants: function, method
   dispatch:
-    DefaultBackend: detach
+    CompositeExplicitAutograd: detach
 
 # Like `detach()`, but modifies this `Variable` in-place. This method may
 # only be called on non-view `Variable`s. You can use `is_view()` to check
@@ -3294,7 +3294,7 @@
 - func: detach_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: detach_
+    CompositeExplicitAutograd: detach_
 
 - func: size.int(Tensor self, int dim) -> int
   variants: function
@@ -3309,7 +3309,7 @@
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: slice
+    CompositeExplicitAutograd: slice
 
 - func: slice_backward(Tensor grad, int[] input_sizes, int dim, int start, int end, int step) -> Tensor
   variants: function
@@ -3318,7 +3318,7 @@
 - func: slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet)
   variants: function, method
   dispatch:
-    DefaultBackend: slogdet
+    CompositeExplicitAutograd: slogdet
 
 - func: smm(Tensor self, Tensor mat2) -> Tensor
   variants: function, method
@@ -3345,37 +3345,37 @@
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: unsafe_split
+    CompositeExplicitAutograd: unsafe_split
 
 - func: split.Tensor(Tensor(a) self, int split_size, int dim=0) -> Tensor(a)[]
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: split
+    CompositeExplicitAutograd: split
 
 - func: unsafe_split_with_sizes(Tensor self, int[] split_sizes, int dim=0) -> Tensor[]
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: unsafe_split_with_sizes
+    CompositeExplicitAutograd: unsafe_split_with_sizes
 
 - func: split_with_sizes(Tensor(a) self, int[] split_sizes, int dim=0) -> Tensor(a)[]
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: split_with_sizes
+    CompositeExplicitAutograd: split_with_sizes
 
 - func: squeeze(Tensor(a) self) -> Tensor(a)
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: squeeze
+    CompositeExplicitAutograd: squeeze
 
 - func: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: squeeze
+    CompositeExplicitAutograd: squeeze
 
 - func: squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a)
   variants: function, method
@@ -3385,13 +3385,13 @@
   variants: method
   device_guard: False
   dispatch:
-    DefaultBackend: squeeze_
+    CompositeExplicitAutograd: squeeze_
 
 - func: squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!)
   variants: method
   device_guard: False
   dispatch:
-    DefaultBackend: squeeze_
+    CompositeExplicitAutograd: squeeze_
 
 - func: squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!)
   variants: method
@@ -3409,21 +3409,21 @@
 
 - func: stack(Tensor[] tensors, int dim=0) -> Tensor
   dispatch:
-    DefaultBackend: stack
+    CompositeExplicitAutograd: stack
 
 - func: stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
-    DefaultBackend: stack_out
+    CompositeExplicitAutograd: stack_out
 
 - func: _stack(Tensor[] tensors, int dim=0) -> Tensor
   dispatch: # match the backends supported by _cat
     CPU: _stack_cpu
-    DefaultBackend: _stack
+    CompositeExplicitAutograd: _stack
 
 - func: _stack.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!)
   dispatch: # match the backends supported by _cat_out
     CPU: _stack_out_cpu
-    DefaultBackend: _stack_out
+    CompositeExplicitAutograd: _stack_out
 
 - func: hstack(Tensor[] tensors) -> Tensor
 
@@ -3574,13 +3574,13 @@
   device_guard: False
   variants: function, method
   dispatch:
-    DefaultBackend: t
+    CompositeExplicitAutograd: t
 
 - func: t_(Tensor(a!) self) -> Tensor(a!)
   device_guard: False
   variants: method
   dispatch:
-    DefaultBackend: t_
+    CompositeExplicitAutograd: t_
 
 - func: tan(Tensor self) -> Tensor
   variants: function, method
@@ -3590,7 +3590,7 @@
 - func: tan_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: tan_
+    CompositeExplicitAutograd: tan_
 
 - func: tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3605,7 +3605,7 @@
 - func: tanh_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: tanh_
+    CompositeExplicitAutograd: tanh_
 
 - func: tanh.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3652,7 +3652,7 @@
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: transpose
+    CompositeExplicitAutograd: transpose
 
 - func: transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a)
   variants: function, method
@@ -3667,7 +3667,7 @@
   variants: method
   device_guard: False
   dispatch:
-    DefaultBackend: transpose_
+    CompositeExplicitAutograd: transpose_
 
 - func: _mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
   device_guard: False
@@ -3701,7 +3701,7 @@
 - func: rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: rot90
+    CompositeExplicitAutograd: rot90
 
 - func: trapz.x(Tensor y, Tensor x, *, int dim=-1) -> Tensor
 
@@ -3709,19 +3709,19 @@
 
 - func: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
   dispatch:
-    DefaultBackend: _trilinear
+    CompositeExplicitAutograd: _trilinear
 
 - func: triplet_margin_loss(Tensor anchor, Tensor positive, Tensor negative, float margin=1.0, float p=2, float eps=1e-06, bool swap=False, int reduction=Mean) -> Tensor
 
 - func: trunc(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: trunc
+    CompositeExplicitAutograd: trunc
 
 - func: trunc_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: trunc_
+    CompositeExplicitAutograd: trunc_
 
 - func: trunc.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -3778,19 +3778,19 @@
 
 - func: _unsafe_view(Tensor self, int[] size) -> Tensor
   dispatch:
-    DefaultBackend: _unsafe_view
+    CompositeExplicitAutograd: _unsafe_view
 
 - func: unsqueeze(Tensor(a) self, int dim) -> Tensor(a)
   variants: function, method
   device_guard: False
   dispatch:
-    DefaultBackend: unsqueeze
+    CompositeExplicitAutograd: unsqueeze
 
 - func: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
   variants: method
   device_guard: False
   dispatch:
-    DefaultBackend: unsqueeze_
+    CompositeExplicitAutograd: unsqueeze_
 
 - func: vander(Tensor x, int? N=None, bool increasing=False) -> Tensor
 
@@ -3937,7 +3937,7 @@
 
 - func: _sparse_sum.dim(Tensor self, int[1] dim) -> Tensor
   dispatch:
-    DefaultBackend: _sparse_sum
+    CompositeExplicitAutograd: _sparse_sum
 
 - func: _sparse_sum.dim_dtype(Tensor self, int[1] dim, *, ScalarType dtype) -> Tensor
 
@@ -4020,11 +4020,11 @@
 - func: frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent)
   variants: method, function
   dispatch:
-    DefaultBackend: frexp
+    CompositeExplicitAutograd: frexp
 
 - func: frexp.Tensor_out(Tensor self, *, Tensor(a!) mantissa, Tensor(b!) exponent) -> (Tensor(a!) mantissa, Tensor(b!) exponent)
   dispatch:
-    DefaultBackend: frexp_out
+    CompositeExplicitAutograd: frexp_out
 
 - func: frobenius_norm(Tensor self) -> Tensor
   variants: function
@@ -4058,7 +4058,7 @@
 - func: resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: resize_as_
+    CompositeExplicitAutograd: resize_as_
 
 - func: zero_(Tensor(a!) self) -> Tensor(a!)
   variants: method, function
@@ -4090,12 +4090,12 @@
 - func: sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: sub
+    CompositeExplicitAutograd: sub
 
 - func: sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: sub_
+    CompositeExplicitAutograd: sub_
 
 # subtract, alias for sub
 - func: subtract.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
@@ -4133,13 +4133,13 @@
 - func: rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
   variants: function
   dispatch:
-    DefaultBackend: rsub
+    CompositeExplicitAutograd: rsub
 
 # Functionally the same as addmm, but we give it a different derivative formula
 # that doesn't propagate gradients to non-present entries on sparse.
 - func: _sparse_addmm(Tensor self, Tensor sparse, Tensor dense, *, Scalar beta=1, Scalar alpha=1) -> Tensor
   dispatch:
-    DefaultBackend: _sparse_addmm
+    CompositeExplicitAutograd: _sparse_addmm
 
 - func: addmm.out(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -4418,7 +4418,7 @@
 - func: unbind.int(Tensor(a) self, int dim=0) -> Tensor(a)[]
   variants: function, method
   dispatch:
-    DefaultBackend: unbind
+    CompositeExplicitAutograd: unbind
 
 - func: unbind.Dimname(Tensor(a) self, Dimname dim) -> Tensor(a)[]
   variants: function, method
@@ -4700,7 +4700,7 @@
 # PackedSequence utilities
 - func: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)
   dispatch:
-    DefaultBackend: _pack_padded_sequence
+    CompositeExplicitAutograd: _pack_padded_sequence
 
 - func: _pack_padded_sequence_backward(Tensor grad, int[] input_size, Tensor batch_sizes, bool batch_first) -> Tensor
 
@@ -4784,7 +4784,7 @@
   variants: method
   device_guard: False
   dispatch:
-    DefaultBackend: view_dtype
+    CompositeExplicitAutograd: view_dtype
 
 - func: put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!)
   variants: method
@@ -4879,12 +4879,12 @@
 - func: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: eq_
+    CompositeExplicitAutograd: eq_
 
 - func: eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: eq_
+    CompositeExplicitAutograd: eq_
 
 - func: bitwise_and.Tensor_out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   variants: function
@@ -5108,7 +5108,7 @@
 - func: addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: addcdiv_
+    CompositeExplicitAutograd: addcdiv_
 
 - func: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
   variants: method
@@ -5160,7 +5160,7 @@
 - func: diag(Tensor self, int diagonal=0) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: diag
+    CompositeExplicitAutograd: diag
 
 - func: diag_backward(Tensor grad, int[] input_sizes, int diagonal) -> Tensor
   variants: function
@@ -5183,7 +5183,7 @@
 - func: triu(Tensor self, int diagonal=0) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: triu
+    CompositeExplicitAutograd: triu
 
 - func: tril.out(Tensor self, int diagonal=0, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -5193,7 +5193,7 @@
 - func: tril(Tensor self, int diagonal=0) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: tril
+    CompositeExplicitAutograd: tril
 
 - func: tril_indices(int row, int col, int offset=0, *, ScalarType? dtype=long, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
   dispatch:
@@ -5240,12 +5240,12 @@
 - func: ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: ne_
+    CompositeExplicitAutograd: ne_
 
 - func: ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: ne_
+    CompositeExplicitAutograd: ne_
 
 # not_equal, alias for torch.ne
 - func: not_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -5311,12 +5311,12 @@
 - func: ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: ge_
+    CompositeExplicitAutograd: ge_
 
 - func: ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: ge_
+    CompositeExplicitAutograd: ge_
 
 # greater_equal, alias for torch.ge
 - func: greater_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -5360,12 +5360,12 @@
 - func: le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: le_
+    CompositeExplicitAutograd: le_
 
 - func: le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: le_
+    CompositeExplicitAutograd: le_
 
 # less_equal, alias for torch.le
 - func: less_equal.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -5409,12 +5409,12 @@
 - func: gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: gt_
+    CompositeExplicitAutograd: gt_
 
 - func: gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: gt_
+    CompositeExplicitAutograd: gt_
 
 #  greater, alias for torch.gt
 - func: greater.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -5458,12 +5458,12 @@
 - func: lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: lt_
+    CompositeExplicitAutograd: lt_
 
 - func: lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: lt_
+    CompositeExplicitAutograd: lt_
 
 #  less, alias for torch.lt
 - func: less.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
@@ -5586,12 +5586,12 @@
 - func: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: addcmul
+    CompositeExplicitAutograd: addcmul
 
 - func: addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: addcmul_
+    CompositeExplicitAutograd: addcmul_
 
 - func: addcdiv.out(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -5600,7 +5600,7 @@
 - func: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: addcdiv
+    CompositeExplicitAutograd: addcdiv
 
 - func: cross_entropy_loss(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean, int ignore_index=-100) -> Tensor
   python_module: nn
@@ -5629,12 +5629,12 @@
 - func: symeig.e(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: symeig_out
+    CompositeExplicitAutograd: symeig_out
 
 - func: symeig(Tensor self, bool eigenvectors=False, bool upper=True) -> (Tensor eigenvalues, Tensor eigenvectors)
   variants: method, function
   dispatch:
-    DefaultBackend: symeig
+    CompositeExplicitAutograd: symeig
 
 - func: _symeig_helper(Tensor self, bool eigenvectors, bool upper) -> (Tensor, Tensor)
   variants: function
@@ -5645,12 +5645,12 @@
 - func: eig.e(Tensor self, bool eigenvectors=False, *, Tensor(a!) e, Tensor(b!) v) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: eig_out
+    CompositeExplicitAutograd: eig_out
 
 - func: eig(Tensor self, bool eigenvectors=False) -> (Tensor eigenvalues, Tensor eigenvectors)
   variants: method, function
   dispatch:
-    DefaultBackend: eig
+    CompositeExplicitAutograd: eig
 
 - func: svd.U(Tensor self, bool some=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -5685,12 +5685,12 @@
 - func: cholesky.out(Tensor self, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: cholesky_out
+    CompositeExplicitAutograd: cholesky_out
 
 - func: cholesky(Tensor self, bool upper=False) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: cholesky
+    CompositeExplicitAutograd: cholesky
 
 - func: _cholesky_helper(Tensor self, bool upper) -> Tensor
   variants: function
@@ -5701,12 +5701,12 @@
 - func: cholesky_solve.out(Tensor self, Tensor input2, bool upper=False, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: cholesky_solve_out
+    CompositeExplicitAutograd: cholesky_solve_out
 
 - func: cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: cholesky_solve
+    CompositeExplicitAutograd: cholesky_solve
 
 - func: _cholesky_solve_helper(Tensor self, Tensor A, bool upper) -> Tensor
   variants: function
@@ -5717,12 +5717,12 @@
 - func: solve(Tensor self, Tensor A) -> (Tensor solution, Tensor LU)
   variants: function, method
   dispatch:
-    DefaultBackend: solve
+    CompositeExplicitAutograd: solve
 
 - func: solve.solution(Tensor self, Tensor A, *, Tensor(a!) solution, Tensor(b!) lu) -> (Tensor(a!) solution, Tensor(b!) LU)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: solve_out
+    CompositeExplicitAutograd: solve_out
 
 - func: _solve_helper(Tensor self, Tensor A) -> (Tensor, Tensor)
   variants: function
@@ -5782,12 +5782,12 @@
 - func: lu_solve.out(Tensor self, Tensor LU_data, Tensor LU_pivots, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: lu_solve_out
+    CompositeExplicitAutograd: lu_solve_out
 
 - func: lu_solve(Tensor self, Tensor LU_data, Tensor LU_pivots) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: lu_solve
+    CompositeExplicitAutograd: lu_solve
 
 - func: _lu_solve_helper(Tensor self, Tensor LU_data, Tensor LU_pivots) -> Tensor
   variants: function
@@ -5838,7 +5838,7 @@
 - func: polygamma(int n, Tensor self) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: polygamma
+    CompositeExplicitAutograd: polygamma
 
 - func: erfinv(Tensor self) -> Tensor
   variants: method, function
@@ -5858,12 +5858,12 @@
 - func: i0(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: i0
+    CompositeExplicitAutograd: i0
 
 - func: i0_(Tensor(a!) self) -> Tensor(a!)
   variants: function, method
   dispatch:
-    DefaultBackend: i0_
+    CompositeExplicitAutograd: i0_
 
 - func: i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -5873,12 +5873,12 @@
 - func: sign(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: sign
+    CompositeExplicitAutograd: sign
 
 - func: sign_(Tensor(a!) self) -> Tensor(a!)
   variants: method
   dispatch:
-    DefaultBackend: sign_
+    CompositeExplicitAutograd: sign_
 
 - func: sign.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -5895,7 +5895,7 @@
 - func: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
   variants: method, function
   dispatch:
-    DefaultBackend: dist
+    CompositeExplicitAutograd: dist
 
 - func: atan2.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -5972,7 +5972,7 @@
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   variants: method
   dispatch:
-    DefaultBackend: hypot_
+    CompositeExplicitAutograd: hypot_
 
 - func: igamma.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -6019,7 +6019,7 @@
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   variants: method
   dispatch:
-    DefaultBackend: nextafter_
+    CompositeExplicitAutograd: nextafter_
 
 - func: remainder.Scalar_out(Tensor self, Scalar other, *, Tensor(a!) out) -> Tensor(a!)
   dispatch:
@@ -6336,7 +6336,7 @@
 - func: alias(Tensor(a) self) -> Tensor(a)
   variants: method, function
   dispatch:
-    DefaultBackend: alias
+    CompositeExplicitAutograd: alias
 
 - func: _index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!)
   dispatch:
@@ -7013,12 +7013,12 @@
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: nn
   dispatch:
-    DefaultBackend: l1_loss_out
+    CompositeExplicitAutograd: l1_loss_out
 
 - func: l1_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: l1_loss
+    CompositeExplicitAutograd: l1_loss
 
 - func: l1_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -7029,7 +7029,7 @@
 - func: l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: l1_loss_backward
+    CompositeExplicitAutograd: l1_loss_backward
 
 - func: multi_margin_loss.out(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -7181,7 +7181,7 @@
 - func: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: smooth_l1_loss_backward
+    CompositeExplicitAutograd: smooth_l1_loss_backward
 
 - func: huber_loss.out(Tensor self, Tensor target, int reduction=Mean, float delta=1.0, *, Tensor(a!) out) -> Tensor(a!)
   python_module: nn
@@ -7201,29 +7201,29 @@
 - func: huber_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: huber_loss_backward
+    CompositeExplicitAutograd: huber_loss_backward
 
 - func: soft_margin_loss.out(Tensor self, Tensor target, int reduction=Mean, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: nn
   dispatch:
-    DefaultBackend: soft_margin_loss_out
+    CompositeExplicitAutograd: soft_margin_loss_out
 
 - func: soft_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: soft_margin_loss
+    CompositeExplicitAutograd: soft_margin_loss
 
 - func: soft_margin_loss_backward.grad_input(Tensor grad_output, Tensor self, Tensor target, int reduction, *, Tensor(a!) grad_input) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: nn
   dispatch:
-    DefaultBackend: soft_margin_loss_backward_out
+    CompositeExplicitAutograd: soft_margin_loss_backward_out
 
 - func: soft_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: soft_margin_loss_backward
+    CompositeExplicitAutograd: soft_margin_loss_backward
 
 - func: elu.out(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -7244,7 +7244,7 @@
 - func: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!)
   python_module: nn
   dispatch:
-    DefaultBackend: elu_
+    CompositeExplicitAutograd: elu_
 
 - func: glu.out(Tensor self, int dim=-1, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -7418,7 +7418,7 @@
 - func: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: rrelu_with_noise_backward
+    CompositeExplicitAutograd: rrelu_with_noise_backward
 
 - func: rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
   python_module: nn
@@ -7943,62 +7943,62 @@
 - func: upsample_linear1d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_linear1d
+    CompositeExplicitAutograd: upsample_linear1d
 
 - func: upsample_linear1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_linear1d_backward
+    CompositeExplicitAutograd: upsample_linear1d_backward
 
 - func: upsample_bilinear2d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_bilinear2d
+    CompositeExplicitAutograd: upsample_bilinear2d
 
 - func: upsample_bilinear2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_bilinear2d_backward
+    CompositeExplicitAutograd: upsample_bilinear2d_backward
 
 - func: upsample_trilinear3d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_trilinear3d
+    CompositeExplicitAutograd: upsample_trilinear3d
 
 - func: upsample_trilinear3d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_trilinear3d_backward
+    CompositeExplicitAutograd: upsample_trilinear3d_backward
 
 - func: upsample_bicubic2d.vec(Tensor input, int[]? output_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_bicubic2d
+    CompositeExplicitAutograd: upsample_bicubic2d
 
 - func: upsample_bicubic2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, bool align_corners, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_bicubic2d_backward
+    CompositeExplicitAutograd: upsample_bicubic2d_backward
 
 - func: upsample_nearest1d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_nearest1d
+    CompositeExplicitAutograd: upsample_nearest1d
 
 - func: upsample_nearest1d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_nearest1d_backward
+    CompositeExplicitAutograd: upsample_nearest1d_backward
 
 - func: upsample_nearest2d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_nearest2d
+    CompositeExplicitAutograd: upsample_nearest2d
 
 - func: upsample_nearest2d_backward.vec(Tensor grad_output, int[]? output_size, int[] input_size, float[]? scale_factors) -> Tensor
   python_module: nn
   dispatch:
-    DefaultBackend: upsample_nearest2d_backward
+    CompositeExplicitAutograd: upsample_nearest2d_backward
 
 - func: upsample_nearest3d.vec(Tensor input, int[]? output_size, float[]? scale_factors) -> Tensor
   python_module: nn
@@ -8517,7 +8517,7 @@
   python_module: special
   variants: function
   dispatch:
-    DefaultBackend: special_entr
+    CompositeExplicitAutograd: special_entr
 
 - func: special_entr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   python_module: special
@@ -8733,14 +8733,14 @@
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_cholesky
+    CompositeExplicitAutograd: linalg_cholesky
 
 - func: linalg_cholesky.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_cholesky_out
+    CompositeExplicitAutograd: linalg_cholesky_out
 
 # torch.linalg.det, alias for torch.det
 - func: linalg_det(Tensor self) -> Tensor
@@ -8750,13 +8750,13 @@
 - func: det(Tensor self) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: det
+    CompositeExplicitAutograd: det
 
 - func: linalg_lstsq(Tensor self, Tensor b, float? cond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_lstsq
+    CompositeExplicitAutograd: linalg_lstsq
 
 - func: _lstsq_helper(Tensor a, Tensor b, float cond, str? driver_name) -> (Tensor, Tensor, Tensor)
   variants: function
@@ -8785,25 +8785,25 @@
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_eigh
+    CompositeExplicitAutograd: linalg_eigh
 
 - func: linalg_eigh.eigvals(Tensor self, str UPLO="L", *, Tensor(a!) eigvals, Tensor(b!) eigvecs) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: linalg
   dispatch:
-    DefaultBackend: linalg_eigh_out
+    CompositeExplicitAutograd: linalg_eigh_out
 
 - func: linalg_eigvalsh(Tensor self, str UPLO="L") -> Tensor
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_eigvalsh
+    CompositeExplicitAutograd: linalg_eigvalsh
 
 - func: linalg_eigvalsh.out(Tensor self, str UPLO='L', *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: linalg
   dispatch:
-    DefaultBackend: linalg_eigvalsh_out
+    CompositeExplicitAutograd: linalg_eigvalsh_out
 
 - func: linalg_householder_product(Tensor input, Tensor tau) -> Tensor
   python_module: linalg
@@ -8826,14 +8826,14 @@
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_inv
+    CompositeExplicitAutograd: linalg_inv
 
 - func: linalg_inv.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
   python_module: linalg
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   variants: function
   dispatch:
-    DefaultBackend: linalg_inv_out
+    CompositeExplicitAutograd: linalg_inv_out
 
 - func: inner(Tensor self, Tensor other) -> Tensor
   variants: function, method
@@ -8851,12 +8851,12 @@
 - func: ger(Tensor self, Tensor vec2) -> Tensor
   variants: function, method
   dispatch:
-    DefaultBackend: ger
+    CompositeExplicitAutograd: ger
 
 - func: ger.out(Tensor self, Tensor vec2, *, Tensor(a!) out) -> Tensor(a!)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: ger_out
+    CompositeExplicitAutograd: ger_out
 
 - func: linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
   python_module: linalg
@@ -8880,12 +8880,12 @@
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_vector_norm
+    CompositeExplicitAutograd: linalg_vector_norm
 
 - func: linalg_vector_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)
   python_module: linalg
   dispatch:
-    DefaultBackend: linalg_vector_norm_out
+    CompositeExplicitAutograd: linalg_vector_norm_out
 
 - func: linalg_svd.U(Tensor self, bool full_matrices=True, bool compute_uv=True, *, Tensor(a!) U, Tensor(b!) S, Tensor(c!) V) -> (Tensor(a!) U, Tensor(b!) S, Tensor(c!) V)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -8941,13 +8941,13 @@
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_solve
+    CompositeExplicitAutograd: linalg_solve
 
 - func: linalg_solve.out(Tensor input, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
   python_module: linalg
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   dispatch:
-    DefaultBackend: linalg_solve_out
+    CompositeExplicitAutograd: linalg_solve_out
 
 - func: linalg_tensorinv(Tensor self, int ind=2) -> Tensor
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
@@ -8972,14 +8972,14 @@
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_qr
+    CompositeExplicitAutograd: linalg_qr
 
 - func: linalg_qr.out(Tensor self, str mode='reduced', *, Tensor(a!) Q, Tensor(b!) R) -> (Tensor(a!) Q, Tensor(b!) R)
   use_c10_dispatcher: hacky_wrapper_for_legacy_signatures
   python_module: linalg
   variants: function
   dispatch:
-    DefaultBackend: linalg_qr_out
+    CompositeExplicitAutograd: linalg_qr_out
 
 - func: _linalg_qr_helper(Tensor self, str mode) -> (Tensor, Tensor)
   variants: function

diff --git a/c10/core/DispatchKey.cpp b/c10/core/DispatchKey.cpp
index 12ec486..cbc94dd 100644
--- a/c10/core/DispatchKey.cpp
+++ b/c10/core/DispatchKey.cpp

@@ -112,8 +112,8 @@
     case DispatchKey::CompositeImplicitAutograd:
       return "CompositeImplicitAutograd";
 
-    case DispatchKey::DefaultBackend:
-      return "DefaultBackend";
+    case DispatchKey::CompositeExplicitAutograd:
+      return "CompositeExplicitAutograd";
 
     case DispatchKey::TESTING_ONLY_GenericWrapper:
       return "TESTING_ONLY_GenericWrapper";

diff --git a/c10/core/DispatchKey.h b/c10/core/DispatchKey.h
index c16c72b..b67ada0 100644
--- a/c10/core/DispatchKey.h
+++ b/c10/core/DispatchKey.h

@@ -271,18 +271,19 @@
   // See Note [Alias Dispatch Key : Autograd]
   Autograd,
   CompositeImplicitAutograd, // registered at build/aten/src/ATen/RegisterCompositeImplicitAutograd.cpp
-  DefaultBackend, // registered at
-                  // build/aten/src/ATen/RegisterDefaultBackend.cpp
+  CompositeExplicitAutograd, // registered at
+                  // build/aten/src/ATen/RegisterCompositeExplicitAutograd.cpp
 
   // Define an alias key to represent end of alias dispatch keys.
   // If you add new alias keys after Autograd, please also update it here.
-  EndOfAliasKeys = DefaultBackend, //
+  EndOfAliasKeys = CompositeExplicitAutograd, //
 
   // ~~~~~~~~~~~~~~~~~~~~~~~~~ BC ALIASES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
   // The aliases exist for backwards compatibility reasons, they shouldn't
   // be used
   CPUTensorId = CPU,
   CUDATensorId = CUDA,
+  DefaultBackend = CompositeExplicitAutograd,
   PrivateUse1_PreAutograd = AutogradPrivateUse1,
   PrivateUse2_PreAutograd = AutogradPrivateUse2,
   PrivateUse3_PreAutograd = AutogradPrivateUse3,

diff --git a/c10/core/DispatchKeySet.cpp b/c10/core/DispatchKeySet.cpp
index 2b9ea17..2a29b96 100644
--- a/c10/core/DispatchKeySet.cpp
+++ b/c10/core/DispatchKeySet.cpp

@@ -3,7 +3,7 @@
 namespace c10 {
 
 // backend_dispatch_keyset should include all runtime backend keys.
-// Alias key DispatchKey::DefaultBackend maps to backend_dispatch_keyset
+// Alias key DispatchKey::CompositeExplicitAutograd maps to backend_dispatch_keyset
 // NestedTensor has been explicitly removed due to incompatibility with some
 // kernels, such as structured kernels, that use the DefaultBackend key.
 constexpr DispatchKeySet backend_dispatch_keyset = autogradother_backends |
@@ -33,7 +33,7 @@
       return autograd_dispatch_keyset;
     case DispatchKey::CompositeImplicitAutograd:
       return math_dispatch_keyset;
-    case DispatchKey::DefaultBackend:
+    case DispatchKey::CompositeExplicitAutograd:
       return backend_dispatch_keyset;
     default:
       return DispatchKeySet(t);

diff --git a/test/test_dispatch.py b/test/test_dispatch.py
index 8f8ecdb..aa39ce9 100644
--- a/test/test_dispatch.py
+++ b/test/test_dispatch.py

@@ -562,8 +562,8 @@
             lambda m: m.def_("foo(Tensor x) -> Tensor"),
             # m.impl("foo", torch::kCPU, [](const Tensor & x) { return x })
             lambda m: m.impl_t_t("foo", "CPU", debug="fn_cpu"),
-            # m.impl("foo", torch::kDefaultBackend, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "DefaultBackend", debug="fn_defaultbackend"),
+            # m.impl("foo", torch::kCompositeExplicitAutograd, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "CompositeExplicitAutograd", debug="fn_defaultbackend"),
         ])
         state, table = result.state, result.table
         self.assertExpectedInline(state, '''\
@@ -572,7 +572,7 @@
 debug: registered at /dev/null:0
 alias analysis kind: FROM_SCHEMA
 CPU: fn_cpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
-DefaultBackend[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')
 
         # computed dispatch table is too big, so we only check on a few entries we're interested in.
@@ -597,8 +597,8 @@
             lambda m: m.impl_t_t("foo", "CPU", debug="fn_cpu"),
             # m.impl("foo", torch::kAutograd, [](const Tensor & x) { return x })
             lambda m: m.impl_t_t("foo", "Autograd", debug="fn_autograd"),
-            # m.impl("foo", torch::kDefaultBackend, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "DefaultBackend", debug="fn_defaultbackend"),
+            # m.impl("foo", torch::kCompositeExplicitAutograd, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "CompositeExplicitAutograd", debug="fn_defaultbackend"),
         ])
         state, table = result.state, result.table
         self.assertExpectedInline(state, '''\
@@ -608,7 +608,7 @@
 alias analysis kind: FROM_SCHEMA
 CPU: fn_cpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 Autograd[alias]: fn_autograd :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
-DefaultBackend[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')
 
         # computed dispatch table is too big, so we only check on a few entries we're interested in.
@@ -636,8 +636,8 @@
             lambda m: m.impl_t_t("foo", "Autograd", debug="fn_autograd"),
             # m.impl("foo", torch::kCompositeImplicitAutograd, [](const Tensor & x) { return x })
             lambda m: m.impl_t_t("foo", "CompositeImplicitAutograd", debug="fn_math"),
-            # m.impl("foo", torch::kDefaultBackend, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "DefaultBackend", debug="fn_defaultbackend"),
+            # m.impl("foo", torch::kCompositeExplicitAutograd, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "CompositeExplicitAutograd", debug="fn_defaultbackend"),
         ])
         state, table = result.state, result.table
         self.assertExpectedInline(state, '''\
@@ -648,7 +648,7 @@
 CPU: fn_cpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 Autograd[alias]: fn_autograd :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 CompositeImplicitAutograd[alias]: fn_math :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
-DefaultBackend[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')
 
         # computed dispatch table is too big, so we only check on a few entries we're interested in.
@@ -809,7 +809,7 @@
 
     def test_defaultbackend_autogradcpu(self):
         dispatcher = PythonDispatcher()
-        dispatcher.register(["CPU", "XLA", "DefaultBackend", "AutogradCPU"])
+        dispatcher.register(["CPU", "XLA", "CompositeExplicitAutograd", "AutogradCPU"])
         self.assertExpectedInline(
             dispatcher.dispatchTable(),
             '''\
@@ -819,7 +819,7 @@
 ---------------------------
 CPU             fn_CPU [kernel]
 XLA             fn_XLA [kernel]
-QuantizedCPU    fn_DefaultBackend [default backend kernel]
+QuantizedCPU    fn_CompositeExplicitAutograd [default backend kernel]
 AutogradOther   fallthrough [backend fallback]
 AutogradCPU     fn_AutogradCPU [kernel]
 AutogradXLA     fallthrough [backend fallback]
@@ -836,7 +836,7 @@
 CPU             fn_CPU
 XLA             fn_XLA
 AutogradCPU     fn_AutogradCPU
-DefaultBackend[alias] fn_DefaultBackend
+CompositeExplicitAutograd[alias] fn_CompositeExplicitAutograd
 '''
         )
 
@@ -883,8 +883,8 @@
 
         with self.assertRaisesRegex(
                 RuntimeError,
-                r"Registration to both CompositeImplicitAutograd and DefaultBackend is not allowed"):
-            dispatcher.register(["DefaultBackend", "CompositeImplicitAutograd"])
+                r"Registration to both CompositeImplicitAutograd and CompositeExplicitAutograd is not allowed"):
+            dispatcher.register(["CompositeExplicitAutograd", "CompositeImplicitAutograd"])
 
 
 if __name__ == '__main__':

diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py
index 29bc567..505cca9 100644
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py

@@ -325,7 +325,7 @@
         if name in MANUAL_AUTOGRAD_AND_TRACER or (fn.info and fn.info.has_derivatives):
             msg = (f'There\'s a formula for {name}(or its functional variant) in derivatives.yaml. '
                    f'It\'s required to add a dispatch section for it with explicit supported backends e.g CPU/CUDA '
-                   f'or DefaultBackend in native_functions.yaml. Please see '
+                   f'or CompositeExplicitAutograd in native_functions.yaml. Please see '
                    f'https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native#choosing-the-right-dispatch-keyword '
                    f'for instructions to choose the right dispatch keyword.')
             assert f.is_abstract, msg

diff --git a/tools/codegen/dest/register_dispatch_key.py b/tools/codegen/dest/register_dispatch_key.py
index 2dca455..7a5de26 100644
--- a/tools/codegen/dest/register_dispatch_key.py
+++ b/tools/codegen/dest/register_dispatch_key.py

@@ -67,9 +67,9 @@
             assert self.dispatch_key not in g.out.dispatch, \
                 "Do not explicitly specify Meta dispatch key on structured " \
                 "functions, they will be automatically generated for you"
-        elif self.dispatch_key == DispatchKey.DefaultBackend:
+        elif self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
             assert self.dispatch_key not in g.out.dispatch, \
-                "Do not explicitly specify DefaultBackend dispatch key on structured " \
+                "Do not explicitly specify CompositeExplicitAutograd dispatch key on structured " \
                 "functions, they will be automatically generated for you"
         elif not is_structured_dispatch_key(self.dispatch_key):
             return list(mapMaybe(self.gen_unstructured, g.functions()))
@@ -233,7 +233,7 @@
 """
 
     def gen_class_set_output_body(self, k: SchemaKind) -> str:
-        if self.dispatch_key in [DispatchKey.CUDA, DispatchKey.DefaultBackend]:
+        if self.dispatch_key in [DispatchKey.CUDA, DispatchKey.CompositeExplicitAutograd]:
             maybe_set_guard = """
 auto current_device = guard_.current_device();
 if (C10_UNLIKELY(current_device.has_value())) {
@@ -264,7 +264,7 @@
                 elif self.dispatch_key == DispatchKey.CUDA:
                     empty_impl = "at::native::empty_cuda"
                     empty_strided_impl = "at::native::empty_strided_cuda"
-                elif self.dispatch_key == DispatchKey.DefaultBackend:
+                elif self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
                     empty_impl = "at::empty"
                     empty_strided_impl = "at::empty_strided"
                 else:
@@ -337,7 +337,7 @@
                 guard_field = 'c10::hip::OptionalHIPGuardMasqueradingAsCUDA guard_;'
             else:
                 guard_field = 'c10::cuda::OptionalCUDAGuard guard_;'
-        elif self.dispatch_key == DispatchKey.DefaultBackend:
+        elif self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
             guard_field = 'c10::OptionalDeviceGuard guard_;'
         else:
             guard_field = ''
@@ -362,7 +362,7 @@
             return None
 
         # TODO: Now, there is something interesting going on here.  In the code below,
-        # we generate DefaultBackend implementations of functional and inplace
+        # we generate CompositeExplicitAutograd implementations of functional and inplace
         # based on the out implementation.  But in fact, out is definable by
         # functional too (just not very efficiently), and this is honestly the
         # MORE likely situation for a backend implementor.  How do we pick?
@@ -372,7 +372,7 @@
         # someone to implement one or the other.  We'd have to do a little bit
         # of work to not register one of these "weak" definitions unless there
         # is a strong definition somewhere in the DAG!  So it's not implemented yet.
-        if self.dispatch_key == DispatchKey.DefaultBackend and f.func.kind() is SchemaKind.out:
+        if self.dispatch_key == DispatchKey.CompositeExplicitAutograd and f.func.kind() is SchemaKind.out:
             # Never generate a default implementation for out, that's what you
             # have to define as a backend implementor
             return None
@@ -421,7 +421,7 @@
             if self.dispatch_key is DispatchKey.Meta:
                 class_name = f"structured_{meta.name(self.g)}_meta_{k.name}"
                 parent_class = f"at::meta::{meta.name(self.g)}"
-            elif self.dispatch_key is DispatchKey.DefaultBackend:
+            elif self.dispatch_key is DispatchKey.CompositeExplicitAutograd:
                 # TODO: dedup this branch
                 class_name = f"structured_{meta.name(self.g)}_default_backend_{k.name}"
                 parent_class = f"at::meta::{meta.name(self.g)}"
@@ -464,7 +464,7 @@
 
             # With the expanded context, do the impl call (if not a meta
             # function)
-            if self.dispatch_key == DispatchKey.DefaultBackend:
+            if self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
                 # TODO: https://github.com/pytorch/pytorch/issues/53023
                 out_sig_group = CppSignatureGroup.from_native_function(
                     self.g.out, method=False, fallback_binding=f.manual_cpp_binding)

diff --git a/tools/codegen/gen.py b/tools/codegen/gen.py
index 61b9f0e..8859e66 100644
--- a/tools/codegen/gen.py
+++ b/tools/codegen/gen.py

@@ -124,7 +124,7 @@
         return ''
     return f"""
 #include <ATen/{backend}Functions.h>
-#include <ATen/DefaultBackendFunctions.h>
+#include <ATen/CompositeExplicitAutogradFunctions.h>
 #include <ATen/CompositeImplicitAutogradFunctions.h>
 """
 
@@ -147,7 +147,7 @@
         # migrate math/default_backend ops to use structured delegate.
         return f'return at::{backend.lower()}::{name}({exprs_str});'
 
-    for dispatch_key in (backend, DispatchKey.DefaultBackend, DispatchKey.CompositeImplicitAutograd):
+    for dispatch_key in (backend, DispatchKey.CompositeExplicitAutograd, DispatchKey.CompositeImplicitAutograd):
         if dispatch_key in f.dispatch:
             return f'return at::{dispatch_key.lower()}::{name}({exprs_str});'
 
@@ -863,7 +863,7 @@
         DispatchKey.QuantizedCPU,
         DispatchKey.QuantizedCUDA,
         DispatchKey.CompositeImplicitAutograd,
-        DispatchKey.DefaultBackend,
+        DispatchKey.CompositeExplicitAutograd,
         # Meta is a magic key: it is automatically generated for structured
         # kernels
         DispatchKey.Meta,
@@ -874,7 +874,7 @@
         DispatchKey.CPU,
         DispatchKey.CUDA,
         DispatchKey.CompositeImplicitAutograd,
-        DispatchKey.DefaultBackend,
+        DispatchKey.CompositeExplicitAutograd,
     }
     if options.backend_whitelist:
         dispatch_keys = [k for k in dispatch_keys if is_generic_dispatch_key(k) or str(k) in options.backend_whitelist]

diff --git a/tools/codegen/model.py b/tools/codegen/model.py
index 9dbf7eb..e739424 100644
--- a/tools/codegen/model.py
+++ b/tools/codegen/model.py

@@ -103,8 +103,8 @@
     NumDispatchKeys = auto()
     Autograd = auto()
     CompositeImplicitAutograd = auto()
-    DefaultBackend = auto()
-    EndOfAliasKeys = DefaultBackend
+    CompositeExplicitAutograd = auto()
+    EndOfAliasKeys = CompositeExplicitAutograd
 
     CPUTensorId = CPU
     CUDATensorId = CUDA
@@ -134,7 +134,7 @@
 # Dispatch keys that "support all backends".  These codegen slightly differently
 # then backend specific keys.
 def is_generic_dispatch_key(dk: DispatchKey) -> bool:
-    return dk in {DispatchKey.DefaultBackend, DispatchKey.CompositeImplicitAutograd}
+    return dk in {DispatchKey.CompositeExplicitAutograd, DispatchKey.CompositeImplicitAutograd}
 
 # CUDA specific dispatch keys
 def is_cuda_dispatch_key(dk: DispatchKey) -> bool:
@@ -347,10 +347,10 @@
         elif not structured and structured_delegate is None:
             dispatch[DispatchKey.CompositeImplicitAutograd] = cpp.name(func)
 
-        assert not (DispatchKey.DefaultBackend in dispatch and DispatchKey.CompositeImplicitAutograd in dispatch), \
-            "cannot specify both DefaultBackend and CompositeImplicitAutograd on a single kernel; each " \
+        assert not (DispatchKey.CompositeExplicitAutograd in dispatch and DispatchKey.CompositeImplicitAutograd in dispatch), \
+            "cannot specify both CompositeExplicitAutograd and CompositeImplicitAutograd on a single kernel; each " \
             "strictly subsumes the other.  If you wanted to provide an explicit autograd " \
-            "implementation, specify DefaultBackend; otherwise specify CompositeImplicitAutograd only"
+            "implementation, specify CompositeExplicitAutograd; otherwise specify CompositeImplicitAutograd only"
 
         e.pop('__line__')
         assert not e, f"leftover entries: {e}"

diff --git a/torch/_python_dispatcher.py b/torch/_python_dispatcher.py
index 9ed1b88..d13b0aa 100644
--- a/torch/_python_dispatcher.py
+++ b/torch/_python_dispatcher.py

@@ -22,16 +22,18 @@
     kernel defined in pytorch core library. Backend owner is responsible for registering both
     inference & autograd kernels in their extensions(e.g. torch-xla) for the operators they support.
     E.g. XLA, XPU, MLC
-- DefaultBackend: alias key mapped to inference kernels of all backends like CPU, CUDA, XLA etc.
+- CompositeExplicitAutograd: alias key mapped to inference kernels of all backends like CPU, CUDA, XLA etc.
     Kernels registered to this key MUST work for inference for all backends.
 - Autograd: alias key mapped to autograd of all backends like AutogradCPU, AutogradXLA, AutogradOther.
     Kernels registered to this key MUST work for autograd for all backends.
-- CompositeImplicitAutograd: alias key CompositeImplicitAutograd = DefaultBackend + Autograd
+- CompositeImplicitAutograd: alias key CompositeImplicitAutograd = CompositeExplicitAutograd + Autograd
     Kernels registered to this key MUST work for both inference + autograd for all backends.
 
-Note we only allow registrations to alias keys inside pytorch core library. E.g you shouldn't register
-a CompositeImplicitAutograd or DefaultBackend kernel from torch-xla extension, instead you should upstream the kernel into
-pytorch/pytorch repo so that it's available for all backends and continuously tested even without the extension.
+Note we only allow registrations to alias keys inside pytorch core library. E.g
+you shouldn't register a CompositeImplicitAutograd or CompositeExplicitAutograd
+kernel from torch-xla extension, instead you should upstream the kernel into
+pytorch/pytorch repo so that it's available for all backends and continuously
+tested even without the extension.
 
 Usage:
   dispatcher = PythonDispatcher()
@@ -55,7 +57,7 @@
         "XLA", "AutogradXLA",
     ]
     alias_keys = [
-        "DefaultBackend",
+        "CompositeExplicitAutograd",
         "Autograd",
         "CompositeImplicitAutograd",
     ]
@@ -85,8 +87,8 @@
         if len(set(dispatchKeys)) != len(dispatchKeys):
             raise RuntimeError(f"Overriden is not allowed but found duplicates in {dispatchKeys}.")
         # We currently forbid this in codegen instead of C++ dispatcher.
-        if 'CompositeImplicitAutograd' in dispatchKeys and 'DefaultBackend' in dispatchKeys:
-            raise RuntimeError("Registration to both CompositeImplicitAutograd and DefaultBackend is not allowed.")
+        if 'CompositeImplicitAutograd' in dispatchKeys and 'CompositeExplicitAutograd' in dispatchKeys:
+            raise RuntimeError("Registration to both CompositeImplicitAutograd and CompositeExplicitAutograd is not allowed.")
         for key in dispatchKeys:
             if key not in self.supported_keys:
                 raise RuntimeError(f"{key} is not supported, please select a dispatch key in {self.supported_keys}.")

diff --git a/torch/csrc/autograd/VariableTypeManual.cpp b/torch/csrc/autograd/VariableTypeManual.cpp
index 90e055f..6414313 100644
--- a/torch/csrc/autograd/VariableTypeManual.cpp
+++ b/torch/csrc/autograd/VariableTypeManual.cpp

@@ -373,14 +373,14 @@
 // Ops in the following registration list are registered as
 //   (1) CompositeImplicitAutograd kernels
 //   (2) Autograd kernels
-//   (3) DefaultBackend kernels and additionally Autograd kernels
+//   (3) CompositeExplicitAutograd kernels and additionally Autograd kernels
 // The reason for (3) is that ops that also use dispatch (e.g. register CPU/CUDA/QuantizedCPU
 // kernels) will skip picking up CompositeImplicitAutograd kernels for Autograd, so we register them to both
-// DefaultBackend and Autograd instead. See
+// CompositeExplicitAutograd and Autograd instead. See
 // https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native#choosing-the-right-dispatch-keyword
 // for more details.
 // Invariant:
-// - Ops registered to CompositeImplicitAutograd or DefaultBackend below must match `MANUAL_BACKEND` set in tools/autograd/gen_variable_type.py.
+// - Ops registered to CompositeImplicitAutograd or CompositeExplicitAutograd below must match `MANUAL_BACKEND` set in tools/autograd/gen_variable_type.py.
 //   and they have manual_kernel_registration=True in native_functions.yaml.
 // - Ops registered to DispatchKey::Autograd below must be included in `MANUAL_AUTOGRAD` in tools/autograd/gen_variable_type.py
 
@@ -393,9 +393,9 @@
   m.impl("_fw_primal", torch::dispatch(DispatchKey::Autograd, TORCH_FN(VariableType::_fw_primal)));
 }
 
-TORCH_LIBRARY_IMPL(aten, DefaultBackend, m) {
-  m.impl("_backward", torch::dispatch(DispatchKey::DefaultBackend, TORCH_FN(VariableType::_backward)));
-  m.impl("requires_grad_", torch::dispatch(DispatchKey::DefaultBackend, TORCH_FN(VariableType::requires_grad_)));
+TORCH_LIBRARY_IMPL(aten, CompositeExplicitAutograd, m) {
+  m.impl("_backward", torch::dispatch(DispatchKey::CompositeExplicitAutograd, TORCH_FN(VariableType::_backward)));
+  m.impl("requires_grad_", torch::dispatch(DispatchKey::CompositeExplicitAutograd, TORCH_FN(VariableType::requires_grad_)));
 }
 
 TORCH_LIBRARY_IMPL(aten, CompositeImplicitAutograd, m) {

diff --git a/torch/csrc/utils/python_dispatch.cpp b/torch/csrc/utils/python_dispatch.cpp
index 2ff386d..995c87c 100644
--- a/torch/csrc/utils/python_dispatch.cpp
+++ b/torch/csrc/utils/python_dispatch.cpp

@@ -34,7 +34,7 @@
     {"QuantizedCPU", c10::DispatchKey::QuantizedCPU},
     {"CompositeImplicitAutograd", c10::DispatchKey::CompositeImplicitAutograd},
     {"Autograd", c10::DispatchKey::Autograd},
-    {"DefaultBackend", c10::DispatchKey::DefaultBackend},
+    {"CompositeExplicitAutograd", c10::DispatchKey::CompositeExplicitAutograd},
     {"AutogradCPU", c10::DispatchKey::AutogradCPU},
     {"", c10::DispatchKey::Undefined},
   };
commit	13b1ca9466bdc2d5caed78c3b919c270f56ad92e	[log] [tgz]
author	Edward Yang <ezyang@fb.com>	Fri Mar 26 10:50:06 2021 -0700
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>	Fri Mar 26 10:53:30 2021 -0700
tree	fc5a83c3bc3eb5b6ab48f49b6224b7def44bbb4b
parent	70dd2a2bdd56ab58d95558223f05d768eb2e9249 [diff]