[Profiler] Add tf-function stats to be part of op_stats.
PiperOrigin-RevId: 308861068
Change-Id: Ibca41b8b9676e187c37aad284b6dfab73bb97e63
diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD
index 9fa4bd9..bc12796 100644
--- a/tensorflow/core/profiler/convert/BUILD
+++ b/tensorflow/core/profiler/convert/BUILD
@@ -209,10 +209,12 @@
":xplane_to_kernel_stats_db",
":xplane_to_op_metrics_db",
":xplane_to_step_events",
+ ":xplane_to_tf_functions",
"//tensorflow/core:lib",
"//tensorflow/core/profiler/protobuf:hardware_types_proto_cc",
"//tensorflow/core/profiler/protobuf:kernel_stats_proto_cc",
"//tensorflow/core/profiler/protobuf:op_stats_proto_cc",
+ "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
"//tensorflow/core/profiler/protobuf:xplane_proto_cc",
"//tensorflow/core/profiler/utils:event_span",
"//tensorflow/core/profiler/utils:hardware_type_utils",
@@ -229,6 +231,7 @@
srcs = ["xplane_to_op_stats_test.cc"],
deps = [
":xplane_to_op_stats",
+ ":xplane_to_tf_functions",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
@@ -371,3 +374,43 @@
"//tensorflow/core/profiler/utils:xplane_visitor",
],
)
+
+cc_library(
+ name = "xplane_to_tf_functions",
+ srcs = ["xplane_to_tf_functions.cc"],
+ hdrs = ["xplane_to_tf_functions.h"],
+ deps = [
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
+ "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+ "//tensorflow/core/profiler/utils:tf_xplane_visitor",
+ "//tensorflow/core/profiler/utils:timespan",
+ "//tensorflow/core/profiler/utils:xplane_schema",
+ "//tensorflow/core/profiler/utils:xplane_utils",
+ "//tensorflow/core/profiler/utils:xplane_visitor",
+ "@com_google_absl//absl/algorithm:container",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+tf_cc_test(
+ name = "xplane_to_tf_functions_test",
+ size = "small",
+ srcs = ["xplane_to_tf_functions_test.cc"],
+ deps = [
+ ":xplane_to_tf_functions",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ "//tensorflow/core/profiler/protobuf:tf_function_proto_cc",
+ "//tensorflow/core/profiler/utils:tf_xplane_visitor",
+ "//tensorflow/core/profiler/utils:xplane_builder",
+ "//tensorflow/core/profiler/utils:xplane_schema",
+ "//tensorflow/core/profiler/utils:xplane_utils",
+ ],
+)
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
index f1182e0..7fdd6ff 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats.cc
@@ -20,12 +20,13 @@
#include "tensorflow/core/profiler/convert/xplane_to_kernel_stats_db.h"
#include "tensorflow/core/profiler/convert/xplane_to_op_metrics_db.h"
#include "tensorflow/core/profiler/convert/xplane_to_step_events.h"
+#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include "tensorflow/core/profiler/protobuf/hardware_types.pb.h"
#include "tensorflow/core/profiler/protobuf/kernel_stats.pb.h"
+#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
#include "tensorflow/core/profiler/utils/event_span.h"
#include "tensorflow/core/profiler/utils/hardware_type_utils.h"
#include "tensorflow/core/profiler/utils/kernel_stats_utils.h"
-#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
#include "tensorflow/core/profiler/utils/xplane_schema.h"
#include "tensorflow/core/profiler/utils/xplane_utils.h"
@@ -77,6 +78,24 @@
env->set_device_core_count(accelerator_count);
}
+void ProcessHostPlane(const XPlane* host_plane, bool use_device_step_events,
+ OpMetricsDb* op_metrics_db, StepEvents* step_events,
+ TfFunctionDb* tf_function_db) {
+ absl::flat_hash_map<int64, TfOp> tf_ops =
+ CollectTfOpsFromHostThreadsXPlane(*host_plane);
+ OpMetricsDbCombiner combiner(op_metrics_db);
+ XPlaneVisitor plane = CreateTfXPlaneVisitor(host_plane);
+ plane.ForEachLine([&](const XLineVisitor& line) {
+ ConsumeTfMetricsDbData(
+ ConvertHostThreadsXLineToTfMetricsDbData(line, tf_ops), &combiner);
+ CombineStepEvents(ConvertHostThreadsXLineToStepEvents(
+ line, use_device_step_events, *step_events),
+ step_events);
+ CombineTfFunctionDb(ConvertHostThreadsXLineToTfFunctionDb(line),
+ tf_function_db);
+ });
+}
+
} // namespace
OpStats ConvertXSpaceToOpStats(const XSpace& space) {
@@ -112,12 +131,9 @@
// Convert a host plane.
bool has_device = !device_planes.empty();
if (host_plane) {
- *op_stats.mutable_host_op_metrics_db() =
- ConvertHostThreadsXPlaneToOpMetricsDb(*host_plane);
- CombineStepEvents(
- ConvertHostThreadsXPlaneToStepEvents(
- *host_plane, /*use_device_step_events=*/has_device, step_events),
- &step_events);
+ ProcessHostPlane(host_plane, has_device,
+ op_stats.mutable_host_op_metrics_db(), &step_events,
+ op_stats.mutable_tf_function_db());
}
StepEvents nonoverlapped_step_events = ToNonOverlappedStepEvents(step_events);
*op_stats.mutable_step_db() =
diff --git a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
index 69cf591..c7b140b 100644
--- a/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
+++ b/tensorflow/core/profiler/convert/xplane_to_op_stats_test.cc
@@ -16,6 +16,7 @@
#include "tensorflow/core/profiler/convert/xplane_to_op_stats.h"
#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
#include "tensorflow/core/profiler/protobuf/steps_db.pb.h"
#include "tensorflow/core/profiler/utils/group_events.h"
@@ -96,7 +97,7 @@
CreateXEvent(&host_plane_builder, &tf_executor_thread,
HostEventType::kExecutorStateProcess, 20, 80,
{{StatType::kStepId, 0}});
- CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70, {});
+ CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 70);
GroupTfEvents(&space, /*event_group_name_map=*/nullptr);
OpStats op_stats = ConvertXSpaceToOpStats(space);
@@ -144,6 +145,42 @@
EXPECT_EQ(precision_stats.compute_32bit_ps(), 40);
}
+TEST(ConcertXPlaneToOpStats, TfFunctionTest) {
+ XSpace space;
+ XPlaneBuilder host_plane_builder(space.add_planes());
+ host_plane_builder.SetName(kHostThreads);
+ host_plane_builder.ReserveLines(1);
+ std::string kFunctionName = "increment";
+
+ auto main_thread = host_plane_builder.GetOrCreateLine(0);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
+ 10, 100, "traced-nonXla", 1);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
+ 150, 20, "notTraced-nonXla", 1);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
+ 200, 80, "traced-nonXla", 2);
+
+ OpStats op_stats = ConvertXSpaceToOpStats(space);
+ const TfFunctionDb& tf_function_db = op_stats.tf_function_db();
+
+ EXPECT_EQ(tf_function_db.tf_functions().size(), 1);
+ EXPECT_EQ(tf_function_db.tf_functions().count(kFunctionName), 1);
+ const TfFunction& tf_function =
+ tf_function_db.tf_functions().at(kFunctionName);
+ EXPECT_EQ(tf_function.total_tracing_count(), 2);
+ EXPECT_EQ(tf_function.compiler(), OTHER_COMPILER);
+ const auto& metrics = tf_function.metrics();
+ EXPECT_EQ(metrics.size(), 2);
+ EXPECT_EQ(metrics.count(TRACED_MODE), 1);
+ EXPECT_EQ(metrics.count(NOT_TRACED_MODE), 1);
+ const auto& traced_mode = metrics.at(TRACED_MODE);
+ EXPECT_EQ(traced_mode.count(), 2);
+ EXPECT_EQ(traced_mode.self_time_ps(), 180);
+ const auto& not_traced_mode = metrics.at(NOT_TRACED_MODE);
+ EXPECT_EQ(not_traced_mode.count(), 1);
+ EXPECT_EQ(not_traced_mode.self_time_ps(), 20);
+}
+
} // namespace
} // namespace profiler
} // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc
new file mode 100644
index 0000000..ea57a53
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.cc
@@ -0,0 +1,271 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+You may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
+
+#include <stack>
+
+#include "absl/algorithm/container.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/lib/gtl/map_util.h"
+#include "tensorflow/core/lib/strings/proto_serialization.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/timespan.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
+
+namespace tensorflow {
+namespace profiler {
+
+namespace {
+
+std::pair<TfFunctionExecutionMode, TfFunctionCompiler> Decode(
+ absl::string_view function_name, absl::string_view mode) {
+ // mode is one of ["eager", "concrete", "traced-xla", "traced-nonXla",
+ // "notTraced-xla", "notTraced-nonXla"]
+ if (mode == "eager") return {EAGER_MODE, INVALID_COMPILER};
+ if (mode == "concrete") return {CONCRETE_MODE, INVALID_COMPILER};
+ if (mode == "traced-xla") return {TRACED_MODE, XLA_COMPILER};
+ if (mode == "traced-nonXla") return {TRACED_MODE, OTHER_COMPILER};
+ if (mode == "notTraced-xla") return {NOT_TRACED_MODE, XLA_COMPILER};
+ if (mode == "notTraced-nonXla") return {NOT_TRACED_MODE, OTHER_COMPILER};
+ // Shouldn't reach here.
+ LOG(ERROR) << absl::StrCat("tf-function '", function_name,
+ "' has an unexpected execution mode '", mode, "'")
+ << std::endl;
+ return {INVALID_MODE, INVALID_COMPILER};
+ DCHECK(false);
+}
+
+// Each invocation of a tf-function creates an ActivationRecord.
+struct ActivationRecord {
+ std::string function_name; // name of the tf-function.
+ Timespan timespan; // timespan of this invocation.
+ TfFunctionExecutionMode execution_mode; // execution mode.
+ TfFunctionCompiler compiler; // compiler used.
+ int64 tracing_count; // the total tracing count of this function when this
+ // invocation happened.
+ uint64 children_duration_ps; // Sum of the duration of all (immediate)
+ // children tf-functions of this function.
+ ActivationRecord()
+ : function_name(""),
+ execution_mode(INVALID_MODE),
+ compiler(INVALID_COMPILER),
+ tracing_count(0),
+ children_duration_ps(0) {}
+ ActivationRecord(absl::string_view name, const Timespan& timespan,
+ TfFunctionExecutionMode exe_mode,
+ TfFunctionCompiler compiler, int64 tracing_cnt)
+ : function_name(std::string(name)),
+ timespan(timespan),
+ execution_mode(exe_mode),
+ compiler(compiler),
+ tracing_count(tracing_cnt),
+ children_duration_ps(0) {}
+ std::string DebugString() const {
+ return absl::StrCat("{", function_name, ", ",
+ TfFunctionExecutionMode_Name(execution_mode), ", ",
+ TfFunctionCompiler_Name(compiler),
+ ", tracing_count:", tracing_count,
+ ", children_duration:", children_duration_ps,
+ " ps, timespan:", timespan.DebugString(), "}");
+ }
+};
+
+// Entry or exit point of a tf-function.
+struct EntryOrExit {
+ bool is_entry; // true for entry, false for exit.
+ int64 index; // index to the ActivationRecord.
+ uint64 timestamp_ps; // the time when this entry/exit happens.
+ EntryOrExit() : is_entry(false), index(-1), timestamp_ps(0) {}
+ EntryOrExit(bool is_entry, int64 index, uint64 timestamp_ps)
+ : is_entry(is_entry), index(index), timestamp_ps(timestamp_ps) {}
+ std::string DebugString() const {
+ std::string entry_or_exit = is_entry ? "entry, " : "exit, ";
+ return absl::StrCat("{", entry_or_exit, "idx:", index,
+ ", timestamp:", timestamp_ps, "}");
+ }
+};
+
+TfFunctionCompiler CombineCompilers(TfFunctionCompiler a,
+ TfFunctionCompiler b) {
+ if (a == INVALID_COMPILER) return b;
+ if (b == INVALID_COMPILER) return a;
+ if (a == b) return a;
+ return MIXED_COMPILER;
+}
+
+void CombineTfFunctionMetrics(const TfFunctionMetrics& src,
+ TfFunctionMetrics* dst) {
+ dst->set_count(src.count() + dst->count());
+ dst->set_self_time_ps(src.self_time_ps() + dst->self_time_ps());
+}
+
+void CombineTfFunction(const TfFunction& src, TfFunction* dst) {
+ dst->set_total_tracing_count(
+ std::max(src.total_tracing_count(), dst->total_tracing_count()));
+ dst->set_compiler(CombineCompilers(src.compiler(), dst->compiler()));
+ for (const auto& mode_metrics : src.metrics()) {
+ int32 execution_mode = mode_metrics.first;
+ const TfFunctionMetrics& src_metrics = mode_metrics.second;
+ TfFunctionMetrics* dst_metrics =
+ gtl::FindOrNull(*dst->mutable_metrics(), execution_mode);
+ if (dst_metrics == nullptr) {
+ (*dst->mutable_metrics())[execution_mode] = src_metrics;
+ } else {
+ CombineTfFunctionMetrics(src_metrics, dst_metrics);
+ }
+ }
+}
+
+// Execution history of all tf-functions invoked.
+class TfFunctionExecutions {
+ public:
+ explicit TfFunctionExecutions(const XLineVisitor& line) {
+ // Creates points_ and activations_ from line.
+ line.ForEachEvent([&](const XEventVisitor& event) {
+ std::string mode = "";
+ int64 tracing_count = 0;
+ event.ForEachStat([&mode, &tracing_count](const XStatVisitor& stat) {
+ if (stat.Type() == StatType::kTfFunctionCall)
+ mode = std::string(stat.StrValue());
+ if (stat.Type() == StatType::kTfFunctionTracingCount)
+ tracing_count = stat.IntValue();
+ });
+ if (mode.empty()) return;
+
+ // event is a tf-function.
+ int64 index = activations_.size();
+ auto timespan = event.GetTimespan();
+ auto mode_compiler = Decode(event.Name(), mode);
+ ActivationRecord activation_record =
+ ActivationRecord(event.Name(), timespan, mode_compiler.first,
+ mode_compiler.second, tracing_count);
+ activations_.push_back(activation_record);
+ EntryOrExit entry_point =
+ EntryOrExit(/*is_entry=*/true, index, timespan.begin_ps());
+ EntryOrExit exit_point =
+ EntryOrExit(/*is_entry=*/false, index, timespan.end_ps());
+ points_.push_back(entry_point);
+ points_.push_back(exit_point);
+ });
+
+ // Sorts points_ in ascending order of timestamps.
+ auto ascending_in_timestamp = [](const EntryOrExit& a,
+ const EntryOrExit& b) {
+ return a.timestamp_ps < b.timestamp_ps;
+ };
+ absl::c_sort(points_, ascending_in_timestamp);
+
+ // Calculates the children duration for each activation record.
+ CalculateChildrenDurations();
+ }
+
+ std::string DebugString() const {
+ std::string result = "\nActivations:\n";
+ for (auto i = 0; i < activations_.size(); i++) {
+ absl::StrAppend(&result, "[", i, "] ", activations_[i].DebugString(),
+ "\n");
+ }
+ absl::StrAppend(&result, "tf-function Entry/Exit Points:\n");
+ for (const auto& pt : points_) {
+ absl::StrAppend(&result, pt.DebugString(), "\n");
+ }
+ return result;
+ }
+
+ // Converts this execution history to a TfFunctionDb.
+ TfFunctionDb ConvertToTfFunctionDb() {
+ TfFunctionDb result;
+ for (const auto& record : activations_) {
+ TfFunction* fun = &(*result.mutable_tf_functions())[record.function_name];
+ fun->set_total_tracing_count(
+ std::max(static_cast<int64>(fun->total_tracing_count()),
+ record.tracing_count));
+ fun->set_compiler(CombineCompilers(fun->compiler(), record.compiler));
+ // The self-time of this function is the difference between the duration
+ // of this function and the duration of its children.
+ uint64 self_time_ps =
+ record.timespan.duration_ps() - record.children_duration_ps;
+ // Updates the metrics for this execution mode with this invocation.
+ TfFunctionMetrics* metrics =
+ &(*fun->mutable_metrics())[record.execution_mode];
+ metrics->set_count(metrics->count() + 1);
+ metrics->set_self_time_ps(metrics->self_time_ps() + self_time_ps);
+ }
+ return result;
+ }
+
+ // Calculates the children duration of every tf-function.
+ void CalculateChildrenDurations() {
+ std::stack<int64> call_stack;
+ for (const auto& pt : points_) {
+ if (pt.is_entry) {
+ // Function entry.
+ call_stack.push(pt.index);
+ } else {
+ // Function exit.
+ DCHECK(call_stack.top() == pt.index); // must be well nested.
+ uint64 call_duration = activations_[pt.index].timespan.duration_ps();
+ call_stack.pop();
+ if (!call_stack.empty()) {
+ // call_stack.top() is the parent tf-function; adds call_duration to
+ // its children_duration.
+ activations_[call_stack.top()].children_duration_ps += call_duration;
+ }
+ }
+ }
+ }
+
+ private:
+ // ActivationRecords for all tf-function invocations.
+ std::vector<ActivationRecord> activations_;
+ // Entry and exit points of all invocations.
+ std::vector<EntryOrExit> points_;
+};
+
+} // namespace
+
+std::string DebugString(const TfFunctionDb tf_function_db) {
+ std::string str;
+ ::tensorflow::protobuf::TextFormat::PrintToString(tf_function_db, &str);
+ return str;
+}
+
+void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst) {
+ for (const auto& name_function : src.tf_functions()) {
+ const auto& name = name_function.first;
+ const auto& src_fun = name_function.second;
+ TfFunction* dst_fun = gtl::FindOrNull(*dst->mutable_tf_functions(), name);
+ if (dst_fun == nullptr) {
+ (*dst->mutable_tf_functions())[name] = src_fun;
+ } else {
+ CombineTfFunction(src_fun, dst_fun);
+ }
+ }
+}
+
+TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line) {
+ TfFunctionExecutions tf_function_executions = TfFunctionExecutions(line);
+ return tf_function_executions.ConvertToTfFunctionDb();
+}
+
+} // namespace profiler
+} // namespace tensorflow
diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions.h b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h
new file mode 100644
index 0000000..470b22d
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions.h
@@ -0,0 +1,38 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
+
+#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/xplane_visitor.h"
+
+namespace tensorflow {
+namespace profiler {
+
+// Converts from the given XLine to a TfFunctionDb.
+TfFunctionDb ConvertHostThreadsXLineToTfFunctionDb(const XLineVisitor& line);
+
+// Returns a debugging string for the given TfFunctionDb.
+std::string DebugString(const TfFunctionDb tf_function_db);
+
+// Combines the tf-function statistics from src and dst into dst.
+void CombineTfFunctionDb(const TfFunctionDb& src, TfFunctionDb* dst);
+
+} // namespace profiler
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CORE_PROFILER_CONVERT_XPLANE_TO_TF_FUNCTIONS_H_
diff --git a/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc
new file mode 100644
index 0000000..253ef1a
--- /dev/null
+++ b/tensorflow/core/profiler/convert/xplane_to_tf_functions_test.cc
@@ -0,0 +1,171 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/convert/xplane_to_tf_functions.h"
+
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/profiler/protobuf/tf_function.pb.h"
+#include "tensorflow/core/profiler/utils/tf_xplane_visitor.h"
+#include "tensorflow/core/profiler/utils/xplane_builder.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/profiler/utils/xplane_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+const absl::string_view kEager = "eager";
+const absl::string_view kConcrete = "concrete";
+const absl::string_view kTracedNonXla = "traced-nonXla";
+const absl::string_view kTracedXla = "traced-xla";
+const absl::string_view kNotTracedNonXla = "notTraced-nonXla";
+const absl::string_view kNotTracedXla = "notTraced-xla";
+
+TfFunctionDb ConvertXSpaceToTfFunctionDb(const XSpace& space) {
+ TfFunctionDb result;
+ const XPlane* host_plane = FindPlaneWithName(space, kHostThreads);
+ if (host_plane) {
+ XPlaneVisitor plane = CreateTfXPlaneVisitor(host_plane);
+ plane.ForEachLine([&result](const XLineVisitor& line) {
+ CombineTfFunctionDb(ConvertHostThreadsXLineToTfFunctionDb(line), &result);
+ });
+ }
+ return result;
+}
+
+TEST(ConvertXPlaneToTfFunctions, CombineTwoThreads) {
+ XSpace space;
+ XPlaneBuilder host_plane_builder(space.add_planes());
+ host_plane_builder.SetName(kHostThreads);
+ host_plane_builder.ReserveLines(2);
+ std::string kFunctionName = "decrement";
+
+ auto main_thread = host_plane_builder.GetOrCreateLine(0);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
+ 10, 100, kTracedNonXla, 1);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
+ 150, 20, kNotTracedNonXla, 2);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread, kFunctionName,
+ 200, 80, kTracedNonXla, 3);
+
+ auto other_thread = host_plane_builder.GetOrCreateLine(1);
+ CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName,
+ 20, 100, kTracedNonXla, 2);
+ CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName,
+ 160, 20, kNotTracedNonXla, 2);
+ CreateTfFunctionCallEvent(&host_plane_builder, &other_thread, kFunctionName,
+ 210, 80, kTracedXla, 4);
+
+ TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space);
+ EXPECT_EQ(tf_function_db.tf_functions().size(), 1);
+ EXPECT_EQ(tf_function_db.tf_functions().count(kFunctionName), 1);
+ const TfFunction& tf_function =
+ tf_function_db.tf_functions().at(kFunctionName);
+ EXPECT_EQ(tf_function.total_tracing_count(), 4);
+ EXPECT_EQ(tf_function.compiler(), MIXED_COMPILER);
+ const auto& metrics = tf_function.metrics();
+ EXPECT_EQ(metrics.size(), 2);
+ EXPECT_EQ(metrics.count(TRACED_MODE), 1);
+ EXPECT_EQ(metrics.count(NOT_TRACED_MODE), 1);
+ const auto& traced_mode = metrics.at(TRACED_MODE);
+ EXPECT_EQ(traced_mode.count(), 4);
+ EXPECT_EQ(traced_mode.self_time_ps(), 360);
+ const auto& not_traced_mode = metrics.at(NOT_TRACED_MODE);
+ EXPECT_EQ(not_traced_mode.count(), 2);
+ EXPECT_EQ(not_traced_mode.self_time_ps(), 40);
+}
+
+TEST(ConvertXPlaneToTfFunctions, NestedFunctions) {
+ XSpace space;
+ XPlaneBuilder host_plane_builder(space.add_planes());
+ host_plane_builder.SetName(kHostThreads);
+ host_plane_builder.ReserveLines(1);
+ std::string kOuterFunctionName = "outer";
+ std::string kInnerFunctionName = "inner";
+
+ auto main_thread = host_plane_builder.GetOrCreateLine(0);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread,
+ kOuterFunctionName, 10, 100, kTracedNonXla, 1);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread,
+ kInnerFunctionName, 30, 40, kNotTracedXla, 0);
+ TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space);
+ EXPECT_EQ(tf_function_db.tf_functions().size(), 2);
+ EXPECT_EQ(tf_function_db.tf_functions().count(kOuterFunctionName), 1);
+ EXPECT_EQ(tf_function_db.tf_functions().count(kInnerFunctionName), 1);
+ const TfFunction& outer =
+ tf_function_db.tf_functions().at(kOuterFunctionName);
+ EXPECT_EQ(outer.total_tracing_count(), 1);
+ EXPECT_EQ(outer.compiler(), OTHER_COMPILER);
+ const auto& outer_metrics = outer.metrics();
+ EXPECT_EQ(outer_metrics.size(), 1);
+ EXPECT_EQ(outer_metrics.count(TRACED_MODE), 1);
+ const auto& traced_mode = outer_metrics.at(TRACED_MODE);
+ EXPECT_EQ(traced_mode.count(), 1);
+ EXPECT_EQ(traced_mode.self_time_ps(), 60);
+ const TfFunction& inner =
+ tf_function_db.tf_functions().at(kInnerFunctionName);
+ EXPECT_EQ(inner.total_tracing_count(), 0);
+ EXPECT_EQ(inner.compiler(), XLA_COMPILER);
+ const auto& inner_metrics = inner.metrics();
+ EXPECT_EQ(inner_metrics.size(), 1);
+ EXPECT_EQ(inner_metrics.count(NOT_TRACED_MODE), 1);
+ const auto& not_traced_mode = inner_metrics.at(NOT_TRACED_MODE);
+ EXPECT_EQ(not_traced_mode.count(), 1);
+ EXPECT_EQ(not_traced_mode.self_time_ps(), 40);
+}
+
+TEST(ConvertXPlaneToTfFunctions, EagerPlusConcrete) {
+ XSpace space;
+ XPlaneBuilder host_plane_builder(space.add_planes());
+ host_plane_builder.SetName(kHostThreads);
+ host_plane_builder.ReserveLines(2);
+ std::string kEagerFunctionName = "i_am_eager";
+ std::string kConcreteFunctionName = "i_am_concrete";
+
+ auto main_thread = host_plane_builder.GetOrCreateLine(0);
+ CreateTfFunctionCallEvent(&host_plane_builder, &main_thread,
+ kEagerFunctionName, 10, 200, kEager);
+ auto other_thread = host_plane_builder.GetOrCreateLine(1);
+ CreateTfFunctionCallEvent(&host_plane_builder, &other_thread,
+ kConcreteFunctionName, 20, 40, kConcrete);
+ TfFunctionDb tf_function_db = ConvertXSpaceToTfFunctionDb(space);
+ EXPECT_EQ(tf_function_db.tf_functions().size(), 2);
+ EXPECT_EQ(tf_function_db.tf_functions().count(kEagerFunctionName), 1);
+ EXPECT_EQ(tf_function_db.tf_functions().count(kConcreteFunctionName), 1);
+ const TfFunction& eager =
+ tf_function_db.tf_functions().at(kEagerFunctionName);
+ EXPECT_EQ(eager.total_tracing_count(), 0);
+ EXPECT_EQ(eager.compiler(), INVALID_COMPILER);
+ const auto& eager_metrics = eager.metrics();
+ EXPECT_EQ(eager_metrics.size(), 1);
+ EXPECT_EQ(eager_metrics.count(EAGER_MODE), 1);
+ const auto& eager_mode = eager_metrics.at(EAGER_MODE);
+ EXPECT_EQ(eager_mode.count(), 1);
+ EXPECT_EQ(eager_mode.self_time_ps(), 200);
+ const TfFunction& concrete =
+ tf_function_db.tf_functions().at(kConcreteFunctionName);
+ EXPECT_EQ(concrete.total_tracing_count(), 0);
+ EXPECT_EQ(concrete.compiler(), INVALID_COMPILER);
+ const auto& concrete_metrics = concrete.metrics();
+ EXPECT_EQ(concrete_metrics.size(), 1);
+ EXPECT_EQ(concrete_metrics.count(CONCRETE_MODE), 1);
+ const auto& concrete_mode = concrete_metrics.at(CONCRETE_MODE);
+ EXPECT_EQ(concrete_mode.count(), 1);
+ EXPECT_EQ(concrete_mode.self_time_ps(), 40);
+}
+
+} // namespace
+} // namespace profiler
+} // namespace tensorflow
diff --git a/tensorflow/core/profiler/protobuf/BUILD b/tensorflow/core/profiler/protobuf/BUILD
index 7ebfd50..b102fe2 100644
--- a/tensorflow/core/profiler/protobuf/BUILD
+++ b/tensorflow/core/profiler/protobuf/BUILD
@@ -77,6 +77,7 @@
":kernel_stats_proto",
":op_metrics_proto",
":steps_db_proto",
+ ":tf_function_proto",
],
visibility = [
":friends",
@@ -90,6 +91,13 @@
visibility = [":friends"],
)
+tf_proto_library(
+ name = "tf_function_proto",
+ srcs = ["tf_function.proto"],
+ cc_api_version = 2,
+ visibility = [":friends"],
+)
+
# This proto is deprecating and not guaranteed to be compatible across versions.
# Please don't refer in new project unless you are double confirmed.
tf_proto_library(
diff --git a/tensorflow/core/profiler/protobuf/op_stats.proto b/tensorflow/core/profiler/protobuf/op_stats.proto
index 2e3ace8..aa7cd56 100644
--- a/tensorflow/core/profiler/protobuf/op_stats.proto
+++ b/tensorflow/core/profiler/protobuf/op_stats.proto
@@ -5,6 +5,7 @@
import "tensorflow/core/profiler/protobuf/kernel_stats.proto";
import "tensorflow/core/profiler/protobuf/op_metrics.proto";
import "tensorflow/core/profiler/protobuf/steps_db.proto";
+import "tensorflow/core/profiler/protobuf/tf_function.proto";
// Performance environment, e.g the peak performance capabilities of the device.
message PerfEnv {
@@ -104,6 +105,8 @@
RunEnvironment run_environment = 5;
// Kernel stats results from all GPUs.
KernelStatsDb kernel_stats_db = 6;
+ // Statistics for all tf-functions.
+ TfFunctionDb tf_function_db = 8;
// Errors seen.
repeated string errors = 7;
}
diff --git a/tensorflow/core/profiler/protobuf/tf_function.proto b/tensorflow/core/profiler/protobuf/tf_function.proto
new file mode 100644
index 0000000..fe07c00
--- /dev/null
+++ b/tensorflow/core/profiler/protobuf/tf_function.proto
@@ -0,0 +1,58 @@
+syntax = "proto3";
+
+package tensorflow.profiler;
+
+// All possible execution modes of a tf-function.
+enum TfFunctionExecutionMode {
+ // Yet to be set.
+ INVALID_MODE = 0;
+ // Eager execution.
+ EAGER_MODE = 1;
+ // Graph execution with tracing.
+ TRACED_MODE = 2;
+ // Graph execution without tracing.
+ NOT_TRACED_MODE = 3;
+ // Concrete function.
+ CONCRETE_MODE = 4;
+}
+
+// All possible compilers that can be used to compile a tf-function in the graph
+// mode.
+enum TfFunctionCompiler {
+ // Yet to be set.
+ INVALID_COMPILER = 0;
+ // Any other compiler.
+ OTHER_COMPILER = 1;
+ // If some instance of the function is compiled with XLA and some is compiled
+ // with Non-XLA, use "MIXED_COMPILER".
+ MIXED_COMPILER = 2;
+ // XLA compiler.
+ XLA_COMPILER = 3;
+ // MLIR compiler.
+ MLIR_COMPILER = 4;
+}
+
+// Metrics associated with a particular execution mode of a tf-function.
+message TfFunctionMetrics {
+ // Number of invocations to the function in that execution mode.
+ uint64 count = 1;
+ // The sum of "self-execution" time of this function over those invocations.
+ uint64 self_time_ps = 2;
+}
+
+// Statistics for a tf-function.
+message TfFunction {
+ // A map from each execution mode to its corresponding metrics.
+ map<int32, TfFunctionMetrics> metrics = 1;
+ // Total tracing count from the program's beginning (i.e. beyond the profiling
+ // period) of this tf-function.
+ int64 total_tracing_count = 2;
+ // Compiler used to compile this function.
+ TfFunctionCompiler compiler = 3;
+}
+
+// Statistics for all tf-functions.
+message TfFunctionDb {
+ // A map from function name to the statistics of that function.
+ map<string, TfFunction> tf_functions = 1;
+}
diff --git a/tensorflow/core/profiler/utils/derived_timeline_test.cc b/tensorflow/core/profiler/utils/derived_timeline_test.cc
index b92f4c5..f3e6b66 100644
--- a/tensorflow/core/profiler/utils/derived_timeline_test.cc
+++ b/tensorflow/core/profiler/utils/derived_timeline_test.cc
@@ -45,8 +45,7 @@
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
- auto first_event =
- CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
+ auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100);
first_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kHloModule)),
kHloModuleName);
@@ -54,7 +53,7 @@
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
- CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
+ CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300);
second_event.AddStatValue(*plane_builder.GetOrCreateStatMetadata(
GetStatTypeStr(StatType::kHloModule)),
kHloModuleName);
@@ -85,8 +84,7 @@
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
- auto first_event =
- CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
+ auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100);
first_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
@@ -94,7 +92,7 @@
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
- CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
+ CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300);
second_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
@@ -164,8 +162,7 @@
XPlane* plane = space.add_planes();
XPlaneBuilder plane_builder(plane);
auto line_builder = plane_builder.GetOrCreateLine(0);
- auto first_event =
- CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100, {});
+ auto first_event = CreateXEvent(&plane_builder, &line_builder, "op1", 0, 100);
first_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
@@ -173,7 +170,7 @@
GetStatTypeStr(StatType::kKernelDetails)),
kKernelDetails);
auto second_event =
- CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300, {});
+ CreateXEvent(&plane_builder, &line_builder, "op2", 200, 300);
second_event.AddStatValue(
*plane_builder.GetOrCreateStatMetadata(GetStatTypeStr(StatType::kLevel0)),
kTfOpName);
diff --git a/tensorflow/core/profiler/utils/group_events_test.cc b/tensorflow/core/profiler/utils/group_events_test.cc
index f3f58bf..6b6a0d2 100644
--- a/tensorflow/core/profiler/utils/group_events_test.cc
+++ b/tensorflow/core/profiler/utils/group_events_test.cc
@@ -159,7 +159,7 @@
// Eagerly executed CPU TF op.
CreateXEvent(&host_plane_builder, &main_thread,
HostEventType::kEagerKernelExecute, 120, 80, {});
- CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80, {});
+ CreateXEvent(&host_plane_builder, &main_thread, "add:Add", 120, 80);
XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane);
@@ -208,7 +208,7 @@
CreateXEvent(&host_plane_builder, &tf_executor_thread, "matmul", 30, 30,
{{StatType::kCorrelationId, 100}});
// CPU TF op executed inside tf.function.
- CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20, {});
+ CreateXEvent(&host_plane_builder, &tf_executor_thread, "add:Add", 70, 20);
XPlane* device_plane = space.add_planes();
XPlaneBuilder device_plane_builder(device_plane);
diff --git a/tensorflow/core/profiler/utils/xplane_schema.cc b/tensorflow/core/profiler/utils/xplane_schema.cc
index 0a2cc52..af00081 100644
--- a/tensorflow/core/profiler/utils/xplane_schema.cc
+++ b/tensorflow/core/profiler/utils/xplane_schema.cc
@@ -161,6 +161,8 @@
{"hlo_module", kHloModule},
{"equation", kEquation},
{"is_eager", kIsEager},
+ {"tf_function_call", kTfFunctionCall},
+ {"tracing_count", kTfFunctionTracingCount},
// Performance counter related.
{"Raw Value", kRawValue},
{"Scaled Value", kScaledValue},
diff --git a/tensorflow/core/profiler/utils/xplane_schema.h b/tensorflow/core/profiler/utils/xplane_schema.h
index 432c948..e8c377b 100644
--- a/tensorflow/core/profiler/utils/xplane_schema.h
+++ b/tensorflow/core/profiler/utils/xplane_schema.h
@@ -157,6 +157,8 @@
kHloModule,
kEquation,
kIsEager,
+ kTfFunctionCall,
+ kTfFunctionTracingCount,
// Performance counter related.
kRawValue,
kScaledValue,
diff --git a/tensorflow/core/profiler/utils/xplane_utils.cc b/tensorflow/core/profiler/utils/xplane_utils.cc
index df1d5fc..3fe3cc9 100644
--- a/tensorflow/core/profiler/utils/xplane_utils.cc
+++ b/tensorflow/core/profiler/utils/xplane_utils.cc
@@ -154,6 +154,23 @@
stats);
}
+XEventBuilder CreateXEventWithStringViewMetadataValue(
+ XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
+ absl::string_view event_name, int64 offset_ps, int64 duration_ps,
+ const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
+ stats) {
+ auto event_builder = line_builder->AddEvent(
+ *plane_builder->GetOrCreateEventMetadata(event_name));
+ event_builder.SetOffsetPs(offset_ps);
+ event_builder.SetDurationPs(duration_ps);
+ for (const auto& stat_type_and_value : stats) {
+ event_builder.AddStatValue(*plane_builder->GetOrCreateStatMetadata(
+ GetStatTypeStr(stat_type_and_value.first)),
+ stat_type_and_value.second);
+ }
+ return event_builder;
+}
+
void RemovePlaneWithName(XSpace* space, absl::string_view name) {
auto* planes = space->mutable_planes();
planes->erase(
@@ -290,5 +307,23 @@
return plane_timestamp;
}
+void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder,
+ XLineBuilder* line_builder,
+ absl::string_view function_name, int64 offset_ps,
+ int64 duration_ps,
+ absl::string_view execution_mode,
+ int64 tracing_count) {
+ XEventBuilder event_builder = CreateXEventWithStringViewMetadataValue(
+ plane_builder, line_builder, function_name, offset_ps, duration_ps,
+ {{StatType::kTfFunctionCall, execution_mode}});
+ if (tracing_count >= 0) {
+ // Adds the tracing_count stats only if tracing_count is valid.
+ event_builder.AddStatValue(
+ *plane_builder->GetOrCreateStatMetadata(
+ GetStatTypeStr(StatType::kTfFunctionTracingCount)),
+ tracing_count);
+ }
+}
+
} // namespace profiler
} // namespace tensorflow
diff --git a/tensorflow/core/profiler/utils/xplane_utils.h b/tensorflow/core/profiler/utils/xplane_utils.h
index 41035a6..a98e37f 100644
--- a/tensorflow/core/profiler/utils/xplane_utils.h
+++ b/tensorflow/core/profiler/utils/xplane_utils.h
@@ -46,16 +46,23 @@
void AddOrUpdateStrStat(int64 metadata_id, absl::string_view value,
tensorflow::profiler::XEvent* event);
+// Creates an XEvent with int64 stats.
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
absl::string_view event_name, int64 offset_ps, int64 duration_ps,
- const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats);
-
+ const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats = {});
XEventBuilder CreateXEvent(
XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
HostEventType event_type, int64 offset_ps, int64 duration_ps,
const absl::flat_hash_map<StatType, int64 /*stat_value*/>& stats);
+// Creates an XEvent with string stats.
+XEventBuilder CreateXEventWithStringViewMetadataValue(
+ XPlaneBuilder* plane_builder, XLineBuilder* line_builder,
+ absl::string_view event_name, int64 offset_ps, int64 duration_ps,
+ const absl::flat_hash_map<StatType, absl::string_view /*stat_value*/>&
+ stats);
+
void RemovePlaneWithName(XSpace* space, absl::string_view name);
void RemoveEmptyPlanes(XSpace* space);
void RemoveEmptyLines(XPlane* plane);
@@ -87,6 +94,13 @@
// timestamps. If zero line exists, return 0;
uint64 GetStartTimestampNs(const XPlane& plane);
+// Creates a Xevent in the given plane & line for a tf-function.
+void CreateTfFunctionCallEvent(XPlaneBuilder* plane_builder,
+ XLineBuilder* line_builder,
+ absl::string_view function_name, int64 offset_ps,
+ int64 duration_ps,
+ absl::string_view execution_mode,
+ int64 tracing_count = -1);
} // namespace profiler
} // namespace tensorflow