reduce python tracer overhead by keep reference of PyCodeObject and PyCFunctionObject, (instead of using TraceMe and generate trace string).
decouple python tracer from TraceMe, serialize to separate plane.
PiperOrigin-RevId: 328854114
Change-Id: I33422f7bc51182e90571923673a77d41d1899e77
diff --git a/tensorflow/core/profiler/internal/cpu/python_tracer.cc b/tensorflow/core/profiler/internal/cpu/python_tracer.cc
index 4233c5f..a6bc2a5 100644
--- a/tensorflow/core/profiler/internal/cpu/python_tracer.cc
+++ b/tensorflow/core/profiler/internal/cpu/python_tracer.cc
@@ -58,7 +58,7 @@
PythonTracer::~PythonTracer() {
Stop().IgnoreError();
- PythonHooks::GetSingleton()->Finalize();
+ PythonHooks::GetSingleton()->Finalize(nullptr);
}
Status PythonTracer::Start() {
@@ -76,7 +76,7 @@
return errors::Internal("TraceMeRecorder not started");
}
VLOG(1) << __FUNCTION__;
- PythonHooks::GetSingleton()->Stop(options_);
+ PythonHooks::GetSingleton()->Stop();
recording_ = false;
return Status::OK();
}
@@ -87,17 +87,12 @@
// in the wrong threads.
// We had assumed HostTracer::Stop is called when ProfilerSession try to
// serialize PythonTracer.
- PythonHooks::GetSingleton()->Finalize();
+ PythonHooks::GetSingleton()->Finalize(nullptr);
return Status::OK();
}
Status PythonTracer::CollectData(XSpace* space) {
- // This ProfilerInterface rely on HostTracer to serialize its trace.
- // Make sure unpaired traceme don't get recorded, because it will end up
- // in the wrong threads.
- // We had assumed HostTracer::Stop is called when ProfilerSession try to
- // serialize PythonTracer.
- PythonHooks::GetSingleton()->Finalize();
+ PythonHooks::GetSingleton()->Finalize(space);
return Status::OK();
}
@@ -107,8 +102,7 @@
std::unique_ptr<ProfilerInterface> CreatePythonTracer(
const ProfileOptions& options) {
PythonHooksOptions pyhooks_options;
- pyhooks_options.enable_trace_python_function =
- options.python_tracer_level() && options.host_tracer_level();
+ pyhooks_options.enable_trace_python_function = options.python_tracer_level();
pyhooks_options.enable_python_traceme = options.host_tracer_level() != 0;
return absl::make_unique<PythonTracer>(pyhooks_options);
}
diff --git a/tensorflow/python/profiler/internal/BUILD b/tensorflow/python/profiler/internal/BUILD
index 98e9336..5eeaf96 100644
--- a/tensorflow/python/profiler/internal/BUILD
+++ b/tensorflow/python/profiler/internal/BUILD
@@ -142,7 +142,10 @@
visibility = ["//visibility:public"],
deps = [
"//tensorflow/core:lib",
- "//tensorflow/core/profiler/lib:traceme",
+ "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+ "//tensorflow/core/profiler/utils:xplane_builder",
+ "//tensorflow/core/profiler/utils:xplane_schema",
+ "//tensorflow/core/profiler/utils:xplane_utils",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",
"@pybind11",
diff --git a/tensorflow/python/profiler/internal/python_hooks.cc b/tensorflow/python/profiler/internal/python_hooks.cc
index ee2ad1e..aa59305 100644
--- a/tensorflow/python/profiler/internal/python_hooks.cc
+++ b/tensorflow/python/profiler/internal/python_hooks.cc
@@ -16,13 +16,19 @@
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
+#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/path.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
namespace tensorflow {
namespace profiler {
namespace py = ::pybind11;
+namespace {
+
template <typename T>
int ProfileFunction(PyObject* obj, PyFrameObject* frame, int what,
PyObject* arg) {
@@ -40,40 +46,153 @@
setprofile(callback);
}
+std::string GetEventName(PyCodeObject* py_code) {
+ string filename(py::reinterpret_borrow<py::str>(py_code->co_filename));
+ string function;
+ if (py_code->co_name == nullptr) {
+ function = "<unknown>";
+ } else {
+ function = py::reinterpret_borrow<py::str>(py_code->co_name);
+ }
+
+ return absl::StrCat("$", io::Basename(filename), ":", py_code->co_firstlineno,
+ " ", function);
+}
+
+string GetEventName(PyCFunctionObject* py_cfunc) {
+ PyObject* module = py_cfunc->m_module;
+ string filename;
+ bool filename_ok;
+#if PY_MAJOR_VERSION < 3
+ filename_ok = (module != nullptr && PyString_Check(module));
+#else
+ filename_ok = (module != nullptr && PyUnicode_Check(module));
+#endif
+ if (filename_ok) {
+ filename = py::reinterpret_borrow<py::str>(module);
+ } else {
+ filename = "<unknown>";
+ }
+
+ return absl::StrCat("$", filename, " ", py_cfunc->m_ml->ml_name);
+}
+
+void AddEventToXLine(const PythonTraceEntry& event, XLineBuilder* line,
+ XPlaneBuilder* plane) {
+ // TODO(jiesun): maybe add full filename as event stats.
+ auto xevent = line->AddEvent(*plane->GetOrCreateEventMetadata(event.Name()));
+ xevent.SetTimestampNs(event.start_time_ns);
+ xevent.SetEndTimestampNs(event.end_time_ns);
+}
+
+} // namespace
+
+std::string PythonTraceEntry::Name() const {
+ std::string event_name;
+ if (code_object) {
+ return GetEventName(code_object);
+ } else if (function_object) {
+ return GetEventName(function_object);
+ }
+ return "<unknown>";
+}
+
PythonHooks* PythonHooks::GetSingleton() {
static PythonHooks* singleton = new PythonHooks;
return singleton;
}
-void PythonHooks::Start(const PythonHooksOptions& option) {
+void PythonHooks::Start(const PythonHooksOptions& options) {
if (!Py_IsInitialized()) return;
- if (option.enable_python_traceme || option.enable_trace_python_function) {
+ options_ = options;
+ start_timestamp_ns_ = EnvTime::NowNanos();
+ if (options_.enable_python_traceme || options_.enable_trace_python_function) {
PyGILState_STATE gil_state = PyGILState_Ensure();
- if (option.enable_trace_python_function) {
+ if (options_.enable_trace_python_function) {
SetProfilerInAllThreads();
}
- if (option.enable_python_traceme) {
+ if (options_.enable_python_traceme) {
EnableTraceMe(true);
}
+ if (options_.end_to_end_mode) {
+ // When end to end mode is used, Stop() and Finalize() i.e. symbolization
+ // and data collection happens during C's atexit(), when Py_FinalizeEx()
+ // already called.
+ try {
+ auto atexit = py::module::import("atexit");
+ atexit.attr("register")(py::cpp_function([]() {
+ PythonHooks* singleton = PythonHooks::GetSingleton();
+ singleton->Stop();
+ singleton->CollectData(&(singleton->end_to_end_xplane_.emplace()));
+ }));
+ } catch (const py::error_already_set& e) {
+ LOG(ERROR) << "Can't install atexit handler for e2e mode." << e.what();
+ }
+ }
PyGILState_Release(gil_state);
+ active_session_ = true;
}
}
-void PythonHooks::Stop(const PythonHooksOptions& option) {
+void PythonHooks::Stop() {
if (!Py_IsInitialized()) return;
- if (option.enable_python_traceme || option.enable_trace_python_function) {
+ if (!active_session_) return; // Makes sure Stop() can be reentrant.
+ if (options_.enable_python_traceme || options_.enable_trace_python_function) {
PyGILState_STATE gil_state = PyGILState_Ensure();
- if (option.enable_trace_python_function) {
+ if (options_.enable_trace_python_function) {
ClearProfilerInAllThreads();
}
- if (option.enable_python_traceme) {
+ if (options_.enable_python_traceme) {
EnableTraceMe(false);
}
PyGILState_Release(gil_state);
+ active_session_ = false;
}
}
-void PythonHooks::Finalize() { tracemes_.clear(); }
+void PythonHooks::CollectData(XPlane* raw_plane) {
+ DCHECK(raw_plane);
+ XPlaneBuilder plane(raw_plane);
+ for (auto& it : entries_) {
+ uint64 thread_id = it.first;
+ auto& thread_events = it.second;
+ VLOG(1) << "Collecting " << thread_events.completed.size() << ":"
+ << thread_events.active.size() << " events on thread " << thread_id;
+ auto line = plane.GetOrCreateLine(thread_id);
+ line.SetTimestampNs(start_timestamp_ns_);
+ for (const auto& event : thread_events.completed) {
+ AddEventToXLine(event, &line, &plane);
+ }
+ if (options_.include_incomplete_events) {
+ uint64 now = EnvTime::NowNanos();
+ while (!thread_events.active.empty()) {
+ auto& event = thread_events.active.top();
+ event.end_time_ns = now;
+ AddEventToXLine(event, &line, &plane);
+ thread_events.active.pop();
+ }
+ }
+ }
+ entries_.clear();
+}
+
+void PythonHooks::Finalize(XSpace* space) {
+ if (space) {
+ XPlane* plane =
+ FindOrAddMutablePlaneWithName(space, kPythonTracerPlaneName);
+ if (options_.end_to_end_mode) {
+ if (end_to_end_xplane_) {
+ end_to_end_xplane_->set_name(plane->name());
+ plane->Swap(&*end_to_end_xplane_);
+ end_to_end_xplane_.reset();
+ }
+ } else {
+ PyGILState_STATE gil_state = PyGILState_Ensure();
+ CollectData(plane);
+ PyGILState_Release(gil_state);
+ }
+ }
+}
void PythonHooks::ProfileSlow(const py::object& frame, const string& event,
const py::object& arg) {
@@ -106,52 +225,58 @@
}
void PythonHooks::ProfileFast(PyFrameObject* frame, int what, PyObject* arg) {
- const int64 thread_id = PyThread_get_thread_ident();
+ const int64 thread_id = Env::Default()->GetCurrentThreadId();
+ uint64 now = EnvTime::NowNanos();
+ auto& thread_traces = entries_[thread_id];
- if (what == PyTrace_CALL) {
- PyCodeObject* f_code = frame->f_code;
- string filename(py::reinterpret_borrow<py::str>(f_code->co_filename));
- int line_no = frame->f_lineno;
-
- string function;
- if (f_code->co_name == nullptr) {
- function = "<unknown>";
- } else {
- function = py::reinterpret_borrow<py::str>(f_code->co_name);
+ switch (what) {
+ case PyTrace_CALL: {
+ PyCodeObject* f_code = frame->f_code;
+ thread_traces.active.emplace(now, 0, f_code, nullptr);
+ break;
}
-
- tracemes_[thread_id].push_back(
- absl::make_unique<TraceMe>([&filename, line_no, &function] {
- return absl::StrCat("$", io::Basename(filename), ":", line_no, " ",
- function);
- }));
- } else if (what == PyTrace_C_CALL && PyCFunction_Check(arg)) {
- // Python stack does not have a filename/line_no for native calls.
- auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
- PyObject* module = func->m_module;
- string filename;
- bool filename_ok;
-#if PY_MAJOR_VERSION < 3
- filename_ok = (module != nullptr && PyString_Check(module));
-#else
- filename_ok = (module != nullptr && PyUnicode_Check(module));
-#endif
- if (filename_ok) {
- filename = py::reinterpret_borrow<py::str>(module);
- } else {
- filename = "<unknown>";
+ case PyTrace_RETURN:
+ case PyTrace_EXCEPTION: {
+ if (!thread_traces.active.empty()) {
+ auto& entry = thread_traces.active.top();
+ entry.end_time_ns = now;
+ thread_traces.completed.emplace_back(std::move(entry));
+ thread_traces.active.pop();
+ } else if (options_.include_incomplete_events) {
+ PyCodeObject* f_code = frame->f_code;
+ thread_traces.completed.emplace_back(start_timestamp_ns_, now, f_code,
+ nullptr);
+ }
+ break;
}
-
- tracemes_[thread_id].push_back(
- absl::make_unique<TraceMe>([&filename, func] {
- return absl::StrCat(filename, " ", func->m_ml->ml_name);
- }));
- } else if (what == PyTrace_RETURN || what == PyTrace_C_RETURN ||
- what == PyTrace_EXCEPTION || what == PyTrace_C_EXCEPTION) {
- auto& thread_tracemes = tracemes_[thread_id];
- if (!thread_tracemes.empty()) {
- thread_tracemes.pop_back();
+ case PyTrace_C_CALL: {
+ if (PyCFunction_Check(arg)) {
+ // Python stack does not have a filename/line_no for native calls.
+ auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
+ entries_[thread_id].active.emplace(now, 0, nullptr, func);
+ }
+ break;
}
+ case PyTrace_C_RETURN:
+ case PyTrace_C_EXCEPTION: {
+ if (!thread_traces.active.empty()) {
+ auto& entry = thread_traces.active.top();
+ entry.end_time_ns = now;
+ thread_traces.completed.emplace_back(std::move(entry));
+ thread_traces.active.pop();
+ } else if (options_.include_incomplete_events) {
+ // Only the end of the events is recorded, use profiler start as start.
+ if (PyCFunction_Check(arg)) {
+ // Python stack does not have a filename/line_no for native calls.
+ auto* func = reinterpret_cast<PyCFunctionObject*>(arg);
+ entries_[thread_id].completed.emplace_back(start_timestamp_ns_, now,
+ nullptr, func);
+ }
+ }
+ break;
+ }
+ default:
+ break;
}
}
diff --git a/tensorflow/python/profiler/internal/python_hooks.h b/tensorflow/python/profiler/internal/python_hooks.h
index 582edf4..b30fcc3 100644
--- a/tensorflow/python/profiler/internal/python_hooks.h
+++ b/tensorflow/python/profiler/internal/python_hooks.h
@@ -16,14 +16,16 @@
#define TENSORFLOW_PYTHON_PROFILER_INTERNAL_PYTHON_HOOKS_H_
#include <memory>
+#include <stack>
#include <vector>
#include "absl/container/flat_hash_map.h"
#include "pybind11/cast.h"
#include "pybind11/pybind11.h"
#include "pybind11/pytypes.h"
+#include "tensorflow/core/platform/macros.h"
#include "tensorflow/core/platform/types.h"
-#include "tensorflow/core/profiler/lib/traceme.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
namespace tensorflow {
namespace profiler {
@@ -33,6 +35,52 @@
struct PythonHooksOptions {
bool enable_trace_python_function = false;
bool enable_python_traceme = true;
+ bool end_to_end_mode = false;
+ // Incomplete events are defined as those python calls which we only see
+ // either start or end, but not both. If we want to include them in the final
+ // result, profiler start, end time are used respectively to the absent
+ // timestamps.
+ bool include_incomplete_events = true;
+};
+
+struct PythonTraceEntry {
+ PythonTraceEntry(uint64 start, uint64 end, PyCodeObject* code,
+ PyCFunctionObject* func)
+ : start_time_ns(start),
+ end_time_ns(end),
+ code_object(code),
+ function_object(func) {
+ Py_XINCREF(code_object);
+ Py_XINCREF(function_object);
+ }
+ ~PythonTraceEntry() {
+ Py_XDECREF(code_object);
+ Py_XDECREF(function_object);
+ }
+ PythonTraceEntry(PythonTraceEntry&& other) {
+ start_time_ns = other.start_time_ns;
+ end_time_ns = other.end_time_ns;
+ code_object = other.code_object;
+ function_object = other.function_object;
+ other.code_object = nullptr;
+ other.function_object = nullptr;
+ }
+
+ std::string Name() const;
+
+ uint64 start_time_ns;
+ uint64 end_time_ns;
+ PyCodeObject* code_object;
+ PyCFunctionObject* function_object;
+
+ PythonTraceEntry(const PythonTraceEntry& other) = delete;
+ void operator=(const PythonTraceEntry&) = delete;
+ void operator=(PythonTraceEntry&&) = delete;
+};
+
+struct PerThreadEvents {
+ std::deque<PythonTraceEntry> completed;
+ std::stack<PythonTraceEntry> active;
};
// Singleton for tracing python function calls.
@@ -41,19 +89,27 @@
static PythonHooks* GetSingleton();
void Start(const PythonHooksOptions& option);
- void Stop(const PythonHooksOptions& option);
- void Finalize();
+ void Stop();
+ void Finalize(XSpace* space);
void ProfileSlow(const py::object& frame, const string& event,
const py::object& arg);
void ProfileFast(PyFrameObject* frame, int what, PyObject* arg);
private:
void EnableTraceMe(bool enable);
+ void CollectData(XPlane* raw_plane);
void SetProfilerInAllThreads();
void ClearProfilerInAllThreads();
- absl::flat_hash_map<int64, std::vector<std::unique_ptr<TraceMe>>> tracemes_;
+ // entries_ are accessed when GIL is held, therefore no race conditions.
+ absl::flat_hash_map<int64, PerThreadEvents> entries_;
+ uint64 start_timestamp_ns_;
+ bool active_session_ = false;
+ PythonHooksOptions options_;
+ // In end to end mode, Python get uninitialized before Stop()/Finalize(), we
+ // need to buffer the result.
+ absl::optional<XPlane> end_to_end_xplane_;
};
} // namespace profiler