blob: da7c3c564e142ed151631d13695beecb8a40d0c7 [file] [log] [blame]
#pragma once
#include <iostream>
#include <string>
#include "c10/macros/Macros.h"
namespace c10 {
// Semantically, a dispatch key identifies a possible "level" in our
// dispatch, for which a handler may be registered. Traditional
// backends like CPU and CUDA get dispatch keys; however, so do
// "wrapping" layers like Variable (for autograd handling).
//
// In implementation terms, the dispatch key identifies a specific "bit" in a
// DispatchKeySet. Higher bit indexes get handled by dispatching first (because
// we "count leading zeros" when we extract the highest priority dispatch
// key.)
enum class DispatchKey : uint8_t {
// ~~~~~~~~~~~~~~~~~~~~~~~~~~ UNDEFINED ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
// This is not a "real" tensor id, but it exists to give us a "nullopt"
// element we can return for cases when a DispatchKeySet contains no elements.
// You can think a more semantically accurate definition of DispatchKey is:
//
// using DispatchKey = optional<RealDispatchKey>
//
// and Undefined == nullopt. We didn't actually represent
// it this way because optional<RealDispatchKey> would take two
// words, when DispatchKey fits in eight bits.
Undefined = 0,
// ~~~~~~~~~~~~~~~~~~~~~~~~~~ BACKENDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
// A "backend" is colloquially used to refer to handlers for dispatch
// which actually implement the numerics of an operation in question.
//
// Due to the nature of the enum, these backends are specified in
// an ordered way, but for most backends this order is not semantically
// meaningful (e.g., it's valid to reorder these backends without changing
// semantics). The only situation when backend ordering is meaningful
// is when the backend participates in multiple dispatch with another
// backend; e.g., CPUTensorId and SparseCPUTensorId (sparse must have
// higher priority).
// Here are backends which you think of as traditionally specifying
// how to implement operations on some device.
CPUTensorId, // registered at build/aten/src/ATen/CPUType.cpp
CUDATensorId, // registered at build/aten/src/ATen/CUDAType.cpp
HIPTensorId, // NB: I think this is not actually used, due to Note [Masquerading as CUDA]
MSNPUTensorId, // unused externally, but tested at test/cpp_extensions/msnpu_extension.cpp
XLATensorId, // lives out of tree at https://github.com/pytorch/xla
// These are Caffe2 device types which we grandfathered into
// DispatchKey.
// TODO: Caffe2-only DispatchKeys actually should be removed from this enum
// and just simply be undispatchable.
MKLDNNTensorId, // (MKLDNN is treated as another "device" in Caffe2)
OpenGLTensorId,
OpenCLTensorId,
IDEEPTensorId,
// Here are backends which specify more specialized operators
// based on the dtype of the tensor.
QuantizedCPUTensorId, // registered at build/aten/src/ATen/QuantizedCPUType.cpp
ComplexCPUTensorId, // lives out of tree at https://gitlab.com/pytorch-complex/pytorch-cpu-strided-complex
ComplexCUDATensorId, // and https://gitlab.com/pytorch-complex/pytorch-cuda-strided-complex
// tested at test/cpp_extensions/complex_registration_extension.cpp
// TODO: Remove Complex dispatch keys when Complex is moved in tree
// This backend is to support custom RNGs; it lets you go
// to a different kernel if you pass in a generator that is not a
// traditional CPUGenerator/CUDAGenerator. To make use of this
// key:
// 1) set it as a second parameter of at::Generator constructor call in
// the user-defined PRNG class.
// 2) use it as a dispatch key while registering custom kernels
// (templatized kernels specialized for user-defined PRNG class)
// intended for out of tree use; tested by aten/src/ATen/test/rng_test.cpp
CustomRNGKeyId,
// Here are backends which specify more specialized operators
// based on the layout of the tensor. Note that the sparse backends
// are one case where ordering matters: sparse multi-dispatches with
// the corresponding dense tensors, and must be handled before them.
MkldnnCPUTensorId, // registered at build/aten/src/ATen/MkldnnCPUType.cpp
// NB: not to be confused with MKLDNNTensorId, which is Caffe2 only
SparseCPUTensorId, // registered at build/aten/src/ATen/SparseCPUType.cpp
SparseCUDATensorId, // registered at build/aten/src/ATen/SparseCUDAType.cpp
SparseHIPTensorId, // TODO: I think this is not actually used, due to Note [Masquerading as CUDA]
// Here are reserved backends for user-defined backends, see Note [Private use TensorId]
// To see some example about how to use this, check out MSNPU
PrivateUse1_TensorId,
PrivateUse2_TensorId,
PrivateUse3_TensorId,
// In some situations, it is not immediately obvious what the correct
// backend for function is, because the function in question doesn't
// have any "tensor" arguments. In this case, a BackendSelect function
// can be registered to implement the custom determination of the
// correct backend.
BackendSelect,
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ AUTOGRAD ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
// All backends are oblivious to autograd; autograd is handled as a
// layer which happens on top of all backends. It inspects the autograd
// metadata of all inputs, determines what autograd metadata should be
// constructed by the output, and otherwise defers to the backend to
// actually do the numeric computation. VariableTensorId contains
// the bulk of this logic.
VariableTensorId,
// Pre-autograd dispatch keys allow backends to override the autograd behavior
// (aka VariableTensorId) for operators which have a Variable kernel
// already registered. For example, XLA wants to define autograd for
// einsum directly. Registering a custom autograd implementation at the
// XLATensorId key won't work because we process VariableTensorId
// before XLATensorId. This key has higher priority and gets processed
// first. You generally should NOT redispatch after handling autograd
// here (since that would result in execution of the VariableTensorId
// operator, which you're trying to skip). In PreAutograd implementations,
// you are responsible for handling autograd yourself, or deferring to other
// operators which support autograd.
XLAPreAutograd,
// Here are some reserved pre-autograd keys for user-defined backends, see Note [Private use TensorId]
PrivateUse1_PreAutogradTensorId,
PrivateUse2_PreAutogradTensorId,
PrivateUse3_PreAutogradTensorId,
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ WRAPPERS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
// There are a number of alternative modes which may want to handle before
// autograd; for example, error checking, tracing, profiling or vmap. They
// go here.
// TESTING: This is intended to be a generic testing tensor type id.
// Don't use it for anything real; its only acceptable use is within a single
// process test. Use it by creating a TensorImpl with this DispatchKey, and
// then registering operators to operate on this type id. See
// aten/src/ATen/test/backend_fallback_test.cpp for a usage example.
TESTING_ONLY_GenericWrapperTensorId,
// TESTING: This is intended to be a generic testing tensor type id.
// Don't use it for anything real; its only acceptable use is within a ingle
// process test. Use it by toggling the mode on and off via
// TESTING_ONLY_tls_generic_mode_set_enabled and then registering operators
// to operate on this type id. See aten/src/ATen/test/backend_fallback_test.cpp
// for a usage example
TESTING_ONLY_GenericModeTensorId,
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FIN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
NumDispatchKeys, // Sentinel
};
// Note [Private use TensorId]
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~
// Private use tensor IDs are preallocated tensor type IDs for use in user
// applications. Similar to private use fields in HTTP, they can be used
// by end users for experimental or private applications, without needing
// to "standardize" the tensor ID (which would be done by submitting a PR
// to PyTorch to add your type ID).
//
// Private use tensor IDs are appropriate to use if you want to experiment
// with adding a new tensor type (without having to patch PyTorch first) or
// have a private, non-distributed application that needs to make use of a
// new tensor type. Private use tensor IDs are NOT appropriate to use for
// libraries intended to be distributed to further users: please contact
// the PyTorch developers to get a type ID registered in this case.
//
// We provide two classes of private user tensor id: regular TensorIds
// and PreAutogradTensorIds. TensorIds serve the role of ordinary "backend"
// TensorIds; if you were adding support for a new type of accelerator, you
// would use a TensorId, and reuse autograd definitions already defined in
// PyTorch for operators you define. PreAutogradTensorIds serve as "wrapper"
// TensorIds: they are most appropriate for tensors that compose multiple
// internal tensors, and for cases when the built-in autograd formulas for
// operators are not appropriate.
static_assert(
static_cast<uint8_t>(DispatchKey::NumDispatchKeys) < 64,
"DispatchKey is used as index into 64-bit bitmask; you must have less than 64 entries");
C10_API const char* toString(DispatchKey);
C10_API std::ostream& operator<<(std::ostream&, DispatchKey);
// For backwards compatibility with XLA repository
// (I don't want to fix this in XLA right now because there might be
// more renaming coming in the future.)
static inline DispatchKey XLATensorId() {
return DispatchKey::XLATensorId;
}
} // namespace c10
// NB: You really shouldn't use this instance; this enum is guaranteed
// to be pretty small so a regular array should be acceptable.
namespace std {
template <>
struct hash<c10::DispatchKey> {
typedef size_t result_type;
typedef c10::DispatchKey argument_type;
size_t operator()(c10::DispatchKey x) const {
return static_cast<size_t>(x);
}
};
}