blob: dc51241ffde933d8a596e98b2fd891b0ca77e9ab [file] [log] [blame]
#include <torch/csrc/python_headers.h>
#include <c10/core/DeviceType.h>
#include <c10/core/InferenceMode.h>
#include <torch/csrc/Exceptions.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/csrc/autograd/autograd.h>
#include <torch/csrc/autograd/grad_mode.h>
#include <ATen/autocast_mode.h>
#include <torch/csrc/autograd/profiler.h>
#include <torch/csrc/autograd/python_function.h>
#include <torch/csrc/autograd/function.h>
#include <torch/csrc/autograd/saved_variable.h>
#include <torch/csrc/autograd/python_saved_variable_hooks.h>
#include <torch/csrc/autograd/utils/wrap_outputs.h>
#include <torch/csrc/autograd/utils/python_arg_parsing.h>
#include <torch/csrc/utils/pycfunction_helpers.h>
#include <c10/core/ScalarType.h>
#include <set>
struct DisableTorchDispatch {
DisableTorchDispatch() : guard_(c10::DispatchKey::Python) {
}
c10::impl::ExcludeDispatchKeyGuard guard_;
};
PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject *unused) {
using namespace torch::autograd::profiler;
auto tensor_module = THPObjectPtr(PyImport_ImportModule("torch._tensor"));
if (!tensor_module)
return nullptr;
// NOTE: "leaks" THPVariableClass
THPVariableClass = PyObject_GetAttrString(tensor_module, "Tensor");
if (!THPVariableClass)
return nullptr;
auto autograd_module = THPObjectPtr(PyImport_ImportModule("torch.autograd"));
if (!autograd_module)
return nullptr;
// NOTE: "leaks" Function
THPFunctionClass = PyObject_GetAttrString(autograd_module, "Function");
if (!THPFunctionClass)
return nullptr;
auto torch_C_module = THPObjectPtr(PyImport_ImportModule("torch._C"));
if (!torch_C_module)
return nullptr;
auto _C_m = py::handle(torch_C_module).cast<py::module>();
auto m = _C_m.def_submodule("_autograd", "autograd bindings");
auto parameter_module = THPObjectPtr(PyImport_ImportModule("torch.nn.parameter"));
if (!parameter_module)
return nullptr;
// NOTE: "leaks" ParameterClass
ParameterClass = PyObject_GetAttrString(parameter_module, "Parameter");
if (!ParameterClass)
return nullptr;
py::enum_<ProfilerState>(m, "ProfilerState")
.value("Disabled", ProfilerState::Disabled)
.value("CPU", ProfilerState::CPU)
.value("CUDA", ProfilerState::CUDA)
.value("NVTX", ProfilerState::NVTX)
.value("KINETO", ProfilerState::KINETO)
.value("KINETO_GPU_FALLBACK", ProfilerState::KINETO_GPU_FALLBACK);
py::enum_<ActivityType>(m, "ProfilerActivity")
.value("CPU", ActivityType::CPU)
.value("CUDA", ActivityType::CUDA);
py::class_<ProfilerConfig>(m, "ProfilerConfig")
.def(py::init<ProfilerState,
bool, /* record_input_shapes */
bool, /* profile_memory */
bool, /* with_stac k*/
bool, /* with_flops */
bool /* with_modules */
>());
py::class_<LegacyEvent>(m, "ProfilerEvent")
.def("kind", &LegacyEvent::kindStr)
.def("name", [](const LegacyEvent& e) { return e.name(); })
.def("thread_id", &LegacyEvent::threadId)
.def("fwd_thread_id", &LegacyEvent::fwdThreadId)
.def("device", &LegacyEvent::device)
.def("cpu_elapsed_us", &LegacyEvent::cpuElapsedUs)
.def("cuda_elapsed_us", &LegacyEvent::cudaElapsedUs)
.def("has_cuda", &LegacyEvent::hasCuda)
.def("shapes", &LegacyEvent::shapes)
.def("cpu_memory_usage", &LegacyEvent::cpuMemoryUsage)
.def("cuda_memory_usage", &LegacyEvent::cudaMemoryUsage)
.def("handle", &LegacyEvent::handle)
.def("node_id", &LegacyEvent::nodeId)
.def("is_remote", &LegacyEvent::isRemote)
.def("sequence_nr", &LegacyEvent::sequenceNr)
.def("stack", &LegacyEvent::stack)
.def("scope", &LegacyEvent::scope)
.def("correlation_id", &LegacyEvent::correlationId)
.def("start_us", &LegacyEvent::cpuUs)
.def("flops", &LegacyEvent::flops)
.def("is_async", &LegacyEvent::isAsync);
py::enum_<c10::DeviceType>(m, "DeviceType")
.value("CPU", c10::DeviceType::CPU)
.value("CUDA", c10::DeviceType::CUDA)
.value("MKLDNN", c10::DeviceType::MKLDNN)
.value("OPENGL", c10::DeviceType::OPENGL)
.value("OPENCL", c10::DeviceType::OPENCL)
.value("IDEEP", c10::DeviceType::IDEEP)
.value("HIP", c10::DeviceType::HIP)
.value("FPGA", c10::DeviceType::FPGA)
.value("MSNPU", c10::DeviceType::MSNPU)
.value("XLA", c10::DeviceType::XLA)
.value("Lazy", c10::DeviceType::Lazy)
.value("MLC", c10::DeviceType::MLC)
.value("HPU", c10::DeviceType::HPU)
.value("Meta", c10::DeviceType::Meta)
.value("Vulkan", c10::DeviceType::Vulkan)
.value("Metal", c10::DeviceType::Metal);
py::class_<KinetoEvent>(m, "_KinetoEvent")
// name of the event
.def("name", [](const KinetoEvent& e) {
return e.name();
})
// PyTorch thread id of the start callback
.def("start_thread_id", [](const KinetoEvent& e) {
return e.startThreadId();
})
// PyTorch thread id of the end callback
.def("end_thread_id", [](const KinetoEvent& e) {
return e.endThreadId();
})
// for events of scope BACKWARD_FUNCTION - PyTorch thread id
// of the corresponding forward op
.def("fwd_thread_id", [](const KinetoEvent& e) {
return e.fwdThreadId();
})
// together with fwd_thread_id, used to uniquely identify
// the forward op
.def("sequence_nr", [](const KinetoEvent& e) {
return e.sequenceNr();
})
// absolute start time (since unix epoch) in us
.def("start_us", [](const KinetoEvent& e) {
return e.startUs();
})
// duration in us
.def("duration_us", [](const KinetoEvent& e) {
return e.durationUs();
})
// used for correlation between high-level PyTorch events
// and low-level device events
.def("correlation_id", [](const KinetoEvent& e) {
return e.correlationId();
})
// shapes of input tensors
.def("shapes", [](const KinetoEvent& e) {
if (e.hasShapes()) {
return e.shapes();
} else {
return std::vector<std::vector<int64_t>>();
}
})
.def("dtypes", [](const KinetoEvent& e) {
if (e.hasTypes()) {
return e.dtypes();
} else {
return std::vector<std::string>();
}
})
// stack traces of the PyTorch CPU events
.def("stack", [](const KinetoEvent& e) {
if (e.hasStack()) {
return e.stack();
} else {
return std::vector<std::string>();
}
})
// type of the RecordFunction that generated a PyTorch CPU event
// (op, torchscript function, user label, etc)
.def("scope", [](const KinetoEvent& e) {
return e.scope();
})
// device number, for CPU - process id
.def("device_index", [](const KinetoEvent& e) {
return e.deviceIndex();
})
// for CUDA - stream id, for CPU - start thread id
.def("device_resource_id", [](const KinetoEvent& e) {
return e.deviceResourceId();
})
// device type
.def("device_type", [](const KinetoEvent& e) {
return e.deviceType();
})
// correlation id of a linked event
.def("linked_correlation_id", [](const KinetoEvent& e) {
return e.linkedCorrelationId();
})
// compute flops
.def("flops", [](const KinetoEvent& e) {
return e.flops();
})
// Whether this is async event or not
.def("is_async", [](const KinetoEvent& e) {
return e.isAsync();
})
.def("cuda_elapsed_us", &KinetoEvent::cudaElapsedUs)
.def("nbytes", [](const KinetoEvent& e) {
return e.nBytes();
});
py::class_<ProfilerResult>(m, "_ProfilerResult")
.def("trace_start_us", &ProfilerResult::trace_start_us)
.def("events", &ProfilerResult::events)
#ifdef USE_KINETO
.def("save", &ProfilerResult::save)
#endif // USE_KINETO
;
m.def("_enable_profiler", enableProfiler);
m.def("_disable_profiler", disableProfiler);
m.def("_prepare_profiler", prepareProfiler);
m.def("_add_metadata_json", [](const std::string& key, const std::string& value) {
#ifdef USE_KINETO
addMetadataJson(key, value);
#else
LOG(WARNING) << "Adding profiling metadata requires using "
<< "torch.profiler with Kineto support (USE_KINETO=1)";
#endif // USE_KINETO
});
m.def("kineto_available", []() {
#ifdef USE_KINETO
return true;
#else
return false;
#endif
});
m.def("_supported_activities", []() {
std::set<ActivityType> activities {ActivityType::CPU};
#if defined(USE_KINETO) && !defined(LIBKINETO_NOCUPTI)
if (at::getNumGPUs() > 0 && !at::hasHIP()) {
activities.insert(ActivityType::CUDA);
}
#endif
return activities;
});
m.def("_enable_profiler_legacy", enableProfilerLegacy);
py::class_<ProfilerDisableOptions>(m, "_ProfilerDisableOptions")
.def(py::init<bool, bool>());
m.def(
"_disable_profiler_legacy",
disableProfilerLegacy,
py::arg("profiler_disable_options") = ProfilerDisableOptions());
m.def("_profiler_enabled", profilerEnabled);
m.def("_enable_record_function", [](bool enable) {
at::enableRecordFunction(enable);
});
m.def("_set_empty_test_observer", [](bool is_global, double sampling_prob) {
auto cb = at::RecordFunctionCallback(nullptr)
.needsInputs(true)
.samplingProb(sampling_prob);
if (is_global) {
at::addGlobalCallback(cb);
} else {
at::addThreadLocalCallback(cb);
}
});
m.def("_clear_callbacks", []() {
at::clearCallbacks();
});
m.def("_register_saved_tensors_default_hooks", [](py::function &pack_hook, py::function &unpack_hook) {
torch::autograd::PyDefaultSavedVariableHooks::set_hooks(pack_hook, unpack_hook);
});
m.def("_reset_saved_tensors_default_hooks", []() {
torch::autograd::PyDefaultSavedVariableHooks::reset_hooks();
});
py::class_<c10::InferenceMode>(_C_m, "_InferenceMode")
.def(py::init<bool>());
py::class_<DisableTorchDispatch>(_C_m, "_DisableTorchDispatch")
.def(py::init<>());
py::class_<torch::autograd::SavedVariable>(m, "SavedTensor")
.def(py::init([]()->torch::autograd::SavedVariable {
TORCH_CHECK(false, "Trying to create a SavedTensor object from Python is forbidden.");
}))
.def("register_hooks", [](torch::autograd::SavedVariable &s, py::function &pack_hook, py::function &unpack_hook) {
// Because we use a py::object, pybind will increment the refcount of the hook functions for us
s.register_hooks(std::make_unique<torch::autograd::PySavedVariableHooks>(pack_hook, unpack_hook));
});
Py_RETURN_TRUE;
}
namespace torch { namespace autograd {
static PyObject * set_autocast_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (!PyBool_Check(arg)) {
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
}
at::autocast::set_enabled(arg == Py_True);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static PyObject * is_autocast_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (at::autocast::is_enabled()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
END_HANDLE_TH_ERRORS
}
static PyObject * set_autocast_cpu_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (!PyBool_Check(arg)) {
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
}
at::autocast::set_cpu_enabled(arg == Py_True);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static PyObject * is_autocast_cpu_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (at::autocast::is_cpu_enabled()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
END_HANDLE_TH_ERRORS
}
static PyObject * set_autocast_gpu_dtype(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (!THPDtype_Check(arg)) {
throw TypeError(
"dtype must be a torch.dtype (got %s)", Py_TYPE(arg)->tp_name);
}
at::ScalarType targetType = reinterpret_cast<THPDtype*>(arg)->scalar_type;
at::autocast::set_autocast_gpu_dtype(targetType);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static PyObject * set_autocast_cpu_dtype(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (!THPDtype_Check(arg)) {
throw TypeError(
"dtype must be a torch.dtype (got %s)", Py_TYPE(arg)->tp_name);
}
at::ScalarType targetType = reinterpret_cast<THPDtype*>(arg)->scalar_type;
at::autocast::set_autocast_cpu_dtype(targetType);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static const char* scalarTypeName(const at::ScalarType type) {
switch (type) {
#define DEFINE_CASE(ctype, name) \
case at::ScalarType::name: \
return #ctype;
AT_FORAUTOCAST_SCALAR_TYPES(DEFINE_CASE)
#undef DEFINE_CASE
default:
throw std::runtime_error("unknown scalar type for autocast");
}
}
static PyObject * get_autocast_gpu_dtype(PyObject* _unused, PyObject *arg){
HANDLE_TH_ERRORS
at::ScalarType current_dtype = at::autocast::get_autocast_gpu_dtype();
return THPDtype_New(current_dtype, scalarTypeName(current_dtype));
END_HANDLE_TH_ERRORS
}
static PyObject * get_autocast_cpu_dtype(PyObject* _unused, PyObject *arg){
HANDLE_TH_ERRORS
at::ScalarType current_dtype = at::autocast::get_autocast_cpu_dtype();
return THPDtype_New(current_dtype, scalarTypeName(current_dtype));
END_HANDLE_TH_ERRORS
}
static PyObject * clear_autocast_cache(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
at::autocast::clear_cache();
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static PyObject * autocast_increment_nesting(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
return THPUtils_packInt64(at::autocast::increment_nesting());
END_HANDLE_TH_ERRORS
}
static PyObject * autocast_decrement_nesting(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
return THPUtils_packInt64(at::autocast::decrement_nesting());
END_HANDLE_TH_ERRORS
}
static PyObject * set_grad_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (!PyBool_Check(arg)) {
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
}
GradMode::set_enabled(arg == Py_True);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static PyObject * is_grad_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (GradMode::is_enabled()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
END_HANDLE_TH_ERRORS
}
static PyObject * is_inference_mode_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (c10::InferenceMode::is_enabled()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
END_HANDLE_TH_ERRORS
}
static PyObject * set_anomaly_mode_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (!PyBool_Check(arg)) {
throw TypeError("enabled must be a bool (got %s)", Py_TYPE(arg)->tp_name);
}
AnomalyMode::set_enabled(arg == Py_True);
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
static PyObject * is_anomaly_mode_enabled(PyObject* _unused, PyObject *arg) {
HANDLE_TH_ERRORS
if (AnomalyMode::is_enabled()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
END_HANDLE_TH_ERRORS
}
static PyObject * python_enter_dual_level(PyObject* _unused, PyObject* arg) {
HANDLE_TH_ERRORS
// It is unlikely that the depth of forward nesting will overflow int64_t so we
// just static cast here.
return utils::wrap(static_cast<int64_t>(forward_ad::enter_dual_level()));
END_HANDLE_TH_ERRORS
}
static PyObject * python_exit_dual_level(PyObject* _unused, PyObject* args, PyObject* kwargs) {
HANDLE_TH_ERRORS
static PythonArgParser parser({
"exit_dual_level(int64_t level)"
});
ParsedArgs<1> parsed_args;
auto _r = parser.parse(args, kwargs, parsed_args);
auto idx = _r.toInt64(0);
// Make sure the given index is valid before casting it
TORCH_CHECK(idx >= 0, "Dual level must be a positive number.");
forward_ad::exit_dual_level(static_cast<uint64_t>(idx));
Py_RETURN_NONE;
END_HANDLE_TH_ERRORS
}
// autograd methods on torch._C
static PyMethodDef methods[] = { // NOLINT
{"_set_grad_enabled", set_grad_enabled, METH_O, nullptr},
{"is_grad_enabled", is_grad_enabled, METH_NOARGS, nullptr},
{"is_inference_mode_enabled", is_inference_mode_enabled, METH_NOARGS, nullptr},
{"set_autocast_enabled", set_autocast_enabled, METH_O, nullptr},
{"is_autocast_enabled", is_autocast_enabled, METH_NOARGS, nullptr},
{"clear_autocast_cache", clear_autocast_cache, METH_NOARGS, nullptr},
{"set_autocast_cpu_enabled", set_autocast_cpu_enabled, METH_O, nullptr},
{"is_autocast_cpu_enabled", is_autocast_cpu_enabled, METH_NOARGS, nullptr},
{"set_autocast_cpu_dtype", set_autocast_cpu_dtype, METH_O, nullptr},
{"get_autocast_cpu_dtype", get_autocast_cpu_dtype, METH_NOARGS, nullptr},
{"set_autocast_gpu_dtype", set_autocast_gpu_dtype, METH_O, nullptr},
{"get_autocast_gpu_dtype", get_autocast_gpu_dtype, METH_NOARGS, nullptr},
{"autocast_increment_nesting", autocast_increment_nesting, METH_NOARGS, nullptr},
{"autocast_decrement_nesting", autocast_decrement_nesting, METH_NOARGS, nullptr},
{"set_anomaly_enabled", set_anomaly_mode_enabled, METH_O, nullptr},
{"is_anomaly_enabled", is_anomaly_mode_enabled, METH_NOARGS, nullptr},
{"_enter_dual_level", python_enter_dual_level, METH_NOARGS, nullptr},
{"_exit_dual_level", castPyCFunctionWithKeywords(python_exit_dual_level), METH_VARARGS | METH_KEYWORDS, nullptr},
{nullptr, nullptr, 0, nullptr}
};
PyMethodDef* python_functions() {
return methods;
}
}} // namespace torch::autograd