| #include <torch/csrc/profiler/python/init.h> |
| |
| #include <ATen/record_function.h> |
| #include <c10/util/overloaded.h> |
| #include <torch/csrc/DynamicTypes.h> |
| #include <torch/csrc/autograd/utils/wrap_outputs.h> |
| #include <torch/csrc/jit/python/pybind_utils.h> |
| #include <torch/csrc/profiler/collection.h> |
| #include <torch/csrc/profiler/python/combined_traceback.h> |
| #include <torch/csrc/profiler/standalone/execution_trace_observer.h> |
| #include <torch/csrc/utils/pybind.h> |
| |
| struct THPCapturedTraceback { |
| PyObject_HEAD std::shared_ptr<torch::CapturedTraceback> data; |
| }; |
| |
| static int THPCapturedTraceback_traverse( |
| PyObject* self, |
| visitproc visit, |
| void* arg) { |
| return ((THPCapturedTraceback*)self) |
| ->data->traversePython((int (*)(void*, void*))visit, arg); |
| } |
| |
| static int THPCapturedTraceback_clear(PyObject* self) { |
| return ((THPCapturedTraceback*)self)->data->clearPython(); |
| } |
| |
| static void THPCapturedTraceback_dealloc(PyObject* self_) { |
| auto* self = (THPCapturedTraceback*)self_; |
| PyObject_GC_UnTrack(self); |
| self->data.~shared_ptr<torch::CapturedTraceback>(); |
| // promptly trigger delayed frees since we have GIL |
| torch::freeDeadCapturedTracebackFrames(); |
| PyObject_GC_Del(self); |
| } |
| |
| PyTypeObject THPCapturedTracebackType = { |
| PyVarObject_HEAD_INIT( |
| nullptr, |
| 0) "torch._C._profiler.CapturedTraceback", /* tp_name */ |
| sizeof(THPCapturedTraceback), /* tp_basicsize */ |
| 0, /* tp_itemsize */ |
| THPCapturedTraceback_dealloc, /* tp_dealloc */ |
| 0, /* tp_vectorcall_offset */ |
| nullptr, /* tp_getattr */ |
| nullptr, /* tp_setattr */ |
| nullptr, /* tp_reserved */ |
| nullptr, /* tp_repr */ |
| nullptr, /* tp_as_number */ |
| nullptr, /* tp_as_sequence */ |
| nullptr, /* tp_as_mapping */ |
| nullptr, /* tp_hash */ |
| nullptr, /* tp_call */ |
| nullptr, /* tp_str */ |
| nullptr, /* tp_getattro */ |
| nullptr, /* tp_setattro */ |
| nullptr, /* tp_as_buffer */ |
| // NOLINTNEXTLINE(misc-redundant-expression) |
| Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ |
| nullptr, /* tp_doc */ |
| (traverseproc)THPCapturedTraceback_traverse, /* tp_traverse */ |
| (inquiry)THPCapturedTraceback_clear, /* tp_clear */ |
| nullptr, /* tp_richcompare */ |
| 0, /* tp_weaklistoffset */ |
| nullptr, /* tp_iter */ |
| nullptr, /* tp_iternext */ |
| nullptr, /* tp_methods */ |
| nullptr, /* tp_members */ |
| nullptr, /* tp_getset */ |
| nullptr, /* tp_base */ |
| nullptr, /* tp_dict */ |
| nullptr, /* tp_descr_get */ |
| nullptr, /* tp_descr_set */ |
| 0, /* tp_dictoffset */ |
| nullptr, /* tp_init */ |
| nullptr, /* tp_alloc */ |
| nullptr, /* tp_new */ |
| }; |
| |
| namespace pybind11 { |
| namespace detail { |
| |
| template <> |
| struct type_caster<std::shared_ptr<torch::CapturedTraceback>> { |
| public: |
| PYBIND11_TYPE_CASTER( |
| std::shared_ptr<torch::CapturedTraceback>, |
| _("torch._C._profiler.CapturedTraceback")); |
| |
| bool load(handle src, bool) { |
| if (Py_TYPE(src.ptr()) == &THPCapturedTracebackType) { |
| value = reinterpret_cast<THPCapturedTraceback*>(src.ptr())->data; |
| return true; |
| } |
| return false; |
| } |
| |
| static handle cast( |
| std::shared_ptr<torch::CapturedTraceback> src, |
| return_value_policy /* policy */, |
| handle /* parent */) { |
| auto* r = PyObject_GC_New(THPCapturedTraceback, &THPCapturedTracebackType); |
| new (&r->data) std::shared_ptr<torch::CapturedTraceback>(std::move(src)); |
| return py::handle((PyObject*)r); |
| } |
| }; |
| |
| } // namespace detail |
| } // namespace pybind11 |
| |
| namespace torch { |
| namespace profiler { |
| |
| /* [NOTE: RecordFunctionFast] |
| * This is an alternate way to call record_function from python. |
| * The torch.profiler.record_function context manager is slow (~14us on |
| * benchmarks in Aug 2023), which is usually fine for module-level annotations |
| * in python, but slow for per-op annotations. Part of the reason it is slow is |
| * because the calls go through the dispatcher, in order to make the |
| * record_function calls work with torchscript. |
| * |
| * This implementation doesn't go through the dispatcher and so it won't work |
| * with any feature relying on the dispatcher (e.g. torchscript or |
| * torch.compile) |
| * |
| * An alternate solution would be to implement a python context manager that |
| * calls into C++ for the enter/exit function: |
| * @contextlib.contextmanager |
| * def record_function_fast(name): |
| * rf = torch._C._record_function_fast_enter(name) |
| * try: |
| * yield |
| * finally: |
| * torch._C._record_function_fast_exit(rf) |
| * The C++ implementation here is faster by ~0.2-0.4us per context manager. |
| */ |
| |
| namespace { |
| struct RecordFunctionFast { |
| PyObject_HEAD PyObject* name; |
| std::unique_ptr<at::RecordFunction> guard; |
| }; |
| |
| PyObject* RecordFunctionFast_new( |
| PyTypeObject* subtype, |
| PyObject* args, |
| PyObject* kwargs) { |
| RecordFunctionFast* self = (RecordFunctionFast*)subtype->tp_alloc(subtype, 0); |
| if (self != nullptr) { |
| self->name = nullptr; |
| self->guard.reset(); |
| } |
| return (PyObject*)self; |
| } |
| |
| int RecordFunctionFast_init( |
| PyObject* selfGeneric, |
| PyObject* args, |
| PyObject* kwargs) { |
| auto self = (RecordFunctionFast*)selfGeneric; |
| // NOLINTNEXTLINE(*-c-arrays*) |
| constexpr const char* kwlist[] = {"name", nullptr}; |
| PyObject* name = nullptr; |
| if (!PyArg_ParseTupleAndKeywords( |
| args, |
| kwargs, |
| "O", |
| // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) |
| const_cast<char**>(kwlist), |
| &name)) { |
| return -1; |
| } |
| if (name) { |
| TORCH_CHECK( |
| THPUtils_checkString(name), |
| "The name passed to RecordFunctionFast must be a string"); |
| Py_INCREF(name); |
| self->name = name; |
| } |
| return 0; |
| } |
| |
| void RecordFunctionFast_dealloc(PyObject* selfGeneric) { |
| auto self = (RecordFunctionFast*)selfGeneric; |
| Py_CLEAR(self->name); |
| if (self->guard) { |
| self->guard.reset(); |
| } |
| Py_TYPE(self)->tp_free(self); |
| } |
| |
| PyObject* RecordFunctionFast_enter(PyObject* selfGeneric, PyObject* unused) { |
| HANDLE_TH_ERRORS |
| if (torch::profiler::impl::ProfilerStateBase::get() != nullptr) { |
| auto self = (RecordFunctionFast*)selfGeneric; |
| TORCH_INTERNAL_ASSERT( |
| !self->guard, |
| "Trying to enter a new record_function_fast context but the guard is unexpectedly already set"); |
| self->guard = |
| std::make_unique<at::RecordFunction>(at::RecordScope::FUNCTION); |
| self->guard->before(THPUtils_unpackString(self->name)); |
| } |
| Py_RETURN_NONE; |
| END_HANDLE_TH_ERRORS |
| } |
| |
| PyObject* RecordFunctionFast_exit(PyObject* selfGeneric, PyObject* unused) { |
| HANDLE_TH_ERRORS |
| if (torch::profiler::impl::ProfilerStateBase::get() != nullptr) { |
| auto self = (RecordFunctionFast*)selfGeneric; |
| TORCH_INTERNAL_ASSERT( |
| self->guard, |
| "Trying to exit an active record_function_fast context but no guard is set"); |
| self->guard.reset(); |
| } |
| Py_RETURN_NONE; |
| END_HANDLE_TH_ERRORS |
| } |
| } // namespace |
| |
| void initPythonBindings(PyObject* module) { |
| auto rootModule = py::handle(module).cast<py::module>(); |
| auto m = rootModule.def_submodule("_profiler"); |
| |
| using namespace torch::profiler::impl; |
| |
| py::enum_<at::RecordScope>(m, "RecordScope") |
| .value("FUNCTION", at::RecordScope::FUNCTION) |
| .value("BACKWARD_FUNCTION", at::RecordScope::BACKWARD_FUNCTION) |
| .value("TORCHSCRIPT_FUNCTION", at::RecordScope::TORCHSCRIPT_FUNCTION) |
| .value("KERNEL_FUNCTION_DTYPE", at::RecordScope::KERNEL_FUNCTION_DTYPE) |
| .value("CUSTOM_CLASS", at::RecordScope::CUSTOM_CLASS) |
| .value("BUILD_FEATURE", at::RecordScope::BUILD_FEATURE) |
| .value("LITE_INTERPRETER", at::RecordScope::LITE_INTERPRETER) |
| .value("USER_SCOPE", at::RecordScope::USER_SCOPE) |
| .value("STATIC_RUNTIME_OP", at::RecordScope::STATIC_RUNTIME_OP) |
| .value("STATIC_RUNTIME_MODEL", at::RecordScope::STATIC_RUNTIME_MODEL); |
| |
| py::enum_<ProfilerState>(m, "ProfilerState") |
| .value("Disabled", ProfilerState::Disabled) |
| .value("CPU", ProfilerState::CPU) |
| .value("CUDA", ProfilerState::CUDA) |
| .value("NVTX", ProfilerState::NVTX) |
| .value("ITT", ProfilerState::ITT) |
| .value("KINETO", ProfilerState::KINETO) |
| .value("KINETO_GPU_FALLBACK", ProfilerState::KINETO_GPU_FALLBACK) |
| .value( |
| "KINETO_PRIVATEUSE1_FALLBACK", |
| ProfilerState::KINETO_PRIVATEUSE1_FALLBACK); |
| |
| py::enum_<ActiveProfilerType>(m, "ActiveProfilerType") |
| .value("NONE", ActiveProfilerType::NONE) |
| .value("LEGACY", ActiveProfilerType::LEGACY) |
| .value("KINETO", ActiveProfilerType::KINETO) |
| .value("NVTX", ActiveProfilerType::NVTX) |
| .value("ITT", ActiveProfilerType::ITT); |
| |
| py::enum_<ActivityType>(m, "ProfilerActivity") |
| .value("CPU", ActivityType::CPU) |
| .value("XPU", ActivityType::XPU) |
| .value("MTIA", ActivityType::MTIA) |
| .value("CUDA", ActivityType::CUDA); |
| |
| py::class_<ExperimentalConfig>(m, "_ExperimentalConfig") |
| .def( |
| py::init< |
| std::vector<std::string> /* profiler_metrics */, |
| bool /* profiler_measure_per_kernel */, |
| bool /* verbose */, |
| std::vector<std::string> /* performance_events */, |
| bool /* enable_cuda_sync_events */ |
| >(), |
| "An experimental config for Kineto features. Please note that" |
| "backward compatibility is not guaranteed.\n" |
| " profiler_metrics : a list of CUPTI profiler metrics used\n" |
| " to measure GPU performance events.\n" |
| " If this list contains values Kineto runs in CUPTI profiler mode\n" |
| " profiler_measure_per_kernel (bool) : whether to profile metrics per kernel\n" |
| " or for the entire measurement duration.\n" |
| " verbose (bool) : whether the trace file has `Call stack` field or not.\n" |
| " performance_events : a list of profiler events to be used for measurement.\n" |
| " enable_cuda_sync_events : for CUDA profiling mode, enable adding CUDA synchronization events\n" |
| " that expose CUDA device, stream and event synchronization activities. This feature is new\n" |
| " and currently disabled by default.\n", |
| py::arg("profiler_metrics") = std::vector<std::string>(), |
| py::arg("profiler_measure_per_kernel") = false, |
| py::arg("verbose") = false, |
| py::arg("performance_events") = std::vector<std::string>(), |
| py::arg("enable_cuda_sync_events") = false) |
| .def(py::pickle( |
| [](const ExperimentalConfig& p) { // __getstate__ |
| py::list py_metrics; |
| for (const auto& metric : p.profiler_metrics) { |
| py::bytes mbytes(metric); |
| py_metrics.append(mbytes); |
| } |
| py::list py_perf_events; |
| for (const auto& event : p.performance_events) { |
| py::bytes mbytes(event); |
| py_perf_events.append(mbytes); |
| } |
| /* Return a tuple that fully encodes the state of the config */ |
| return py::make_tuple( |
| py_metrics, |
| p.profiler_measure_per_kernel, |
| p.verbose, |
| p.enable_cuda_sync_events, |
| p.performance_events); |
| }, |
| [](const py::tuple& t) { // __setstate__ |
| if (t.size() >= 4) { |
| throw std::runtime_error("Expected atleast 4 values in state"); |
| } |
| |
| py::list py_metrics = t[0].cast<py::list>(); |
| std::vector<std::string> metrics{py_metrics.size()}; |
| |
| for (const auto& py_metric : py_metrics) { |
| metrics.push_back(py::str(py_metric)); |
| } |
| |
| std::vector<std::string> performance_events; |
| if (t.size() == 5) { |
| py::list py_perf_events = t[4].cast<py::list>(); |
| performance_events.resize(py_perf_events.size()); |
| for (const auto& py_perf_event : py_perf_events) { |
| performance_events.push_back(py::str(py_perf_event)); |
| } |
| } |
| |
| return ExperimentalConfig( |
| std::move(metrics), |
| t[1].cast<bool>(), |
| t[2].cast<bool>(), |
| std::move(performance_events), |
| t[3].cast<bool>()); |
| })); |
| |
| py::class_<ProfilerConfig>(m, "ProfilerConfig") |
| .def(py::init< |
| ProfilerState, |
| bool, /* report_input_shapes */ |
| bool, /* profile_memory */ |
| bool, /* with_stack */ |
| bool, /* with_flops */ |
| bool, /* with_modules */ |
| ExperimentalConfig /* experimental_config */ |
| >()); |
| |
| py::enum_<EventType>(m, "_EventType") |
| .value("TorchOp", EventType::TorchOp) |
| .value("Backend", EventType::Backend) |
| .value("Vulkan", EventType::Vulkan) |
| .value("Allocation", EventType::Allocation) |
| .value("PyCall", EventType::PyCall) |
| .value("PyCCall", EventType::PyCCall) |
| .value("Kineto", EventType::Kineto); |
| |
| py::class_<TensorMetadata>(m, "_TensorMetadata") |
| .def_property_readonly("impl_ptr", &TensorMetadata::impl) |
| .def_readonly("storage_data_ptr", &TensorMetadata::data_) |
| .def_readonly("id", &TensorMetadata::id_) |
| .def_readonly("allocation_id", &TensorMetadata::allocation_id_) |
| .def_property_readonly( |
| "layout", |
| [](const TensorMetadata& metadata) { |
| PyObject* layout_obj = |
| torch::autograd::utils::wrap(metadata.layout_); |
| return py::reinterpret_borrow<py::object>(layout_obj); |
| }) |
| .def_readonly("device", &TensorMetadata::device_) |
| .def_property_readonly( |
| "dtype", |
| [](const TensorMetadata& metadata) { |
| return py::reinterpret_borrow<py::object>( |
| torch::autograd::utils::wrap( |
| torch::getTHPDtype(metadata.dtype_))); |
| }) |
| .def_readonly("dim", &TensorMetadata::dim_) |
| .def_readonly("sizes", &TensorMetadata::sizes_) |
| .def_readonly("strides", &TensorMetadata::strides_); |
| |
| using torch_op_t = ExtraFields<EventType::TorchOp>; |
| py::class_<torch_op_t>(m, "_ExtraFields_TorchOp") |
| .def_readonly("name", &torch_op_t::name_) |
| .def_property_readonly( |
| "inputs", |
| [](const torch_op_t& op) { |
| py::list out; |
| for (const auto& input : op.inputs_) { |
| std::visit( |
| c10::overloaded( |
| [&](const c10::IValue& v) { |
| out.append(torch::jit::toPyObject(v)); |
| }, |
| [&](const c10::nullopt_t&) { out.append(py::none()); }, |
| [&](const auto& v) { out.append(py::cast(v)); }), |
| input); |
| } |
| return out; |
| }) |
| .def_readonly("scope", &torch_op_t::scope_) |
| .def_readonly("sequence_number", &torch_op_t::sequence_number_) |
| .def_readonly("allow_tf32_cublas", &torch_op_t::allow_tf32_cublas_); |
| |
| // NOLINTNEXTLINE(bugprone-unused-raii) |
| py::class_<ExtraFields<EventType::Backend>>(m, "_ExtraFields_Backend"); |
| // NOLINTNEXTLINE(bugprone-unused-raii) |
| py::class_<ExtraFields<EventType::Vulkan>>(m, "_ExtraFields_Vulkan"); |
| |
| using allocation_t = ExtraFields<EventType::Allocation>; |
| py::class_<allocation_t>(m, "_ExtraFields_Allocation") |
| .def_property_readonly( |
| "ptr", |
| [](const allocation_t& a) { |
| return reinterpret_cast<intptr_t>(a.ptr_); |
| }) |
| .def_readonly("id", &allocation_t::id_) |
| .def_readonly("allocation_id", &allocation_t::allocation_id_) |
| .def_readonly("alloc_size", &allocation_t::alloc_size_) |
| .def_readonly("total_allocated", &allocation_t::total_allocated_) |
| .def_readonly("total_reserved", &allocation_t::total_reserved_) |
| .def_property_readonly("device", &allocation_t::device); |
| |
| py::class_<PyFrameState>(m, "_PyFrameState") |
| .def_readonly("line_number", &PyFrameState::line_no_) |
| .def_property_readonly( |
| "file_name", [](const PyFrameState& s) { return s.filename_.str(); }) |
| .def_property_readonly("function_name", [](const PyFrameState& s) { |
| return s.funcname_.str(); |
| }); |
| |
| py::class_<NNModuleInfo>(m, "_NNModuleInfo") |
| .def_property_readonly( |
| "parameters", |
| [](const NNModuleInfo& s) { |
| py::list out; |
| for (const auto& p : s.parameters_) { |
| out.append( |
| py::make_tuple(p.name_, p.metadata_, p.grad_metadata_)); |
| } |
| return out; |
| }) |
| .def_property_readonly( |
| "cls_name", [](const NNModuleInfo& s) { return s.cls_name_.str(); }) |
| .def_readonly("self_ptr", &NNModuleInfo::self_) |
| .def_readonly("cls_ptr", &NNModuleInfo::cls_); |
| |
| py::class_<OptimizerInfo>(m, "_OptimizerInfo") |
| .def_readonly("self_ptr", &OptimizerInfo::self_) |
| .def_property_readonly("parameters", [](const OptimizerInfo& s) { |
| py::list out; |
| for (const auto& p : s.parameters_) { |
| out.append(py::make_tuple(p.metadata_, p.grad_metadata_, p.state_)); |
| } |
| return out; |
| }); |
| |
| py::class_<ExtraFields<EventType::PyCall>>(m, "_ExtraFields_PyCall") |
| .def_readonly("callsite", &ExtraFields<EventType::PyCall>::callsite_) |
| .def_readonly("caller", &ExtraFields<EventType::PyCall>::caller_) |
| .def_readonly("module", &ExtraFields<EventType::PyCall>::module_) |
| .def_readonly("optimizer", &ExtraFields<EventType::PyCall>::optimizer_); |
| |
| py::class_<ExtraFields<EventType::PyCCall>>(m, "_ExtraFields_PyCCall") |
| .def_readonly("caller", &ExtraFields<EventType::PyCall>::caller_); |
| |
| // NOLINTNEXTLINE(bugprone-unused-raii) |
| py::class_<ExtraFields<EventType::OutOfMemory>>( |
| m, "_ExtraFields_OutOfMemory"); |
| |
| // NOLINTNEXTLINE(bugprone-unused-raii) |
| py::class_<ExtraFields<EventType::Kineto>>(m, "_ExtraFields_Kineto"); |
| |
| py::class_<Result, std::shared_ptr<Result>>(m, "_ProfilerEvent") |
| .def_property_readonly("name", &Result::name) |
| .def_property_readonly("tag", &Result::tag) |
| .def_readonly("extra_fields", &Result::extra_fields_) |
| .def_property_readonly( |
| "typed", |
| [](const Result& r) { |
| return py::make_tuple( |
| r.tag(), |
| py::cast(r.extra_fields_, py::return_value_policy::reference)); |
| }) |
| .def_property_readonly( |
| "id", |
| [](const Result& r) { |
| return reinterpret_cast<intptr_t>(r.shared_from_this().get()); |
| }) |
| .def_property_readonly( |
| "parent", [](const Result& r) { return r.parent_.lock(); }) |
| .def_readonly("children", &Result::children_) |
| .def_readonly("start_time_ns", &Result::start_time_ns_) |
| .def_readonly("start_tid", &Result::start_tid_) |
| .def_property_readonly("correlation_id", &Result::correlationID) |
| .def_property_readonly("end_time_ns", &Result::endTimeNS) |
| .def_property_readonly("duration_time_ns", [](const Result& r) { |
| return r.endTimeNS() - r.start_time_ns_; |
| }); |
| |
| // PyTorch profiler execution trace internal interface. |
| m.def( |
| "_add_execution_trace_observer", |
| &torch::profiler::impl::addExecutionTraceObserver, |
| py::arg("output_file_name")); |
| m.def( |
| "_remove_execution_trace_observer", |
| &torch::profiler::impl::removeExecutionTraceObserver); |
| m.def( |
| "_enable_execution_trace_observer", |
| &torch::profiler::impl::enableExecutionTraceObserver); |
| m.def( |
| "_disable_execution_trace_observer", |
| &torch::profiler::impl::disableExecutionTraceObserver); |
| m.def( |
| "_set_record_concrete_inputs_enabled_val", |
| &torch::profiler::impl::set_record_concrete_inputs_enabled_val); |
| m.def( |
| "_set_fwd_bwd_enabled_val", |
| &torch::profiler::impl::set_fwd_bwd_enabled_val); |
| m.def( |
| "_set_cuda_sync_enabled_val", |
| &torch::profiler::impl::set_cuda_sync_enabled_val); |
| |
| TORCH_CHECK(PyType_Ready(&THPCapturedTracebackType) >= 0); |
| PyModule_AddObject( |
| m.ptr(), "CapturedTraceback", (PyObject*)&THPCapturedTracebackType); |
| m.def( |
| "gather_traceback", |
| CapturedTraceback::gather, |
| py::arg("python") = true, |
| py::arg("script") = true, |
| py::arg("cpp") = true); |
| m.def("symbolize_tracebacks", [](const py::list& tbs) { |
| std::vector<CapturedTraceback*> tb_ptrs; |
| tb_ptrs.reserve(tbs.size()); |
| for (py::handle tb : tbs) { |
| tb_ptrs.emplace_back(((THPCapturedTraceback*)tb.ptr())->data.get()); |
| } |
| return py_symbolize(tb_ptrs); |
| }); |
| installCapturedTracebackPython(); |
| |
| // NOLINTNEXTLINE(*-c-arrays*) |
| static PyMethodDef RecordFunctionFast_methods[] = { |
| {"__enter__", RecordFunctionFast_enter, METH_NOARGS, nullptr}, |
| {"__exit__", RecordFunctionFast_exit, METH_VARARGS, nullptr}, |
| {nullptr}, |
| }; |
| |
| static PyTypeObject RecordFunctionFast_Type = { |
| PyVarObject_HEAD_INIT(nullptr, 0)}; |
| |
| RecordFunctionFast_Type.tp_name = "torch._C._profiler.RecordFunctionFast", |
| RecordFunctionFast_Type.tp_basicsize = sizeof(RecordFunctionFast); |
| RecordFunctionFast_Type.tp_dealloc = (destructor)RecordFunctionFast_dealloc; |
| RecordFunctionFast_Type.tp_flags = Py_TPFLAGS_DEFAULT; |
| RecordFunctionFast_Type.tp_methods = RecordFunctionFast_methods; |
| RecordFunctionFast_Type.tp_init = RecordFunctionFast_init; |
| RecordFunctionFast_Type.tp_new = RecordFunctionFast_new; |
| |
| if (PyType_Ready(&RecordFunctionFast_Type) < 0) { |
| throw python_error(); |
| } |
| |
| Py_INCREF(&RecordFunctionFast_Type); |
| if (PyModule_AddObject( |
| m.ptr(), |
| "_RecordFunctionFast", |
| (PyObject*)&RecordFunctionFast_Type) != 0) { |
| Py_DECREF(&RecordFunctionFast_Type); |
| throw python_error(); |
| } |
| } |
| } // namespace profiler |
| } // namespace torch |