| /* Copyright 2016 The TensorFlow Authors All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include "tensorflow/core/profiler/internal/tfprof_stats.h" |
| |
| #include <stdio.h> |
| |
| #include <utility> |
| |
| #include "absl/strings/numbers.h" |
| #include "absl/strings/str_format.h" |
| #include "absl/strings/str_split.h" |
| #include "tensorflow/core/platform/regexp.h" |
| #include "tensorflow/core/profiler/internal/tfprof_timeline.h" |
| |
| namespace tensorflow { |
| namespace tfprof { |
| namespace { |
| |
| const char* const kProfilePrefix = "Profile:\n"; |
| |
| bool CreateRunMetadataNode(const string& name, NodeDef* def) { |
| // TODO(xpan): Better solution than denylisting this 2 nodes. They |
| // actually cost some resources, maybe include them. Some nodes, such |
| // as _SOURCE appear in multiple devices, which breaks tfprof's assumption. |
| if (name == "RecvTensor" || name == "_SOURCE" || |
| name.find("MEMCPY") != name.npos) { |
| return false; |
| } |
| def->set_name(name); |
| // TODO(xpan): Better operation type. |
| // This is because some times a node doesn't have a op type, |
| // so we use node name as the op type. |
| def->set_op(name); |
| return true; |
| } |
| } // namespace |
| |
| TFStats::TFStats(std::unique_ptr<GraphDef> graph, |
| std::unique_ptr<RunMetadata> run_meta, |
| std::unique_ptr<OpLogProto> op_log, |
| std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader) |
| : has_code_traces_(false), |
| miss_accelerator_stream_(false), |
| ckpt_reader_(std::move(ckpt_reader)) { |
| CHECK(graph) << "Must at least have GraphDef"; |
| |
| AddGraph(std::move(graph)); |
| if (run_meta && run_meta->has_step_stats()) { |
| AddRunMeta(0, std::move(run_meta)); |
| } |
| AddOpLogProto(std::move(op_log)); |
| |
| if (ckpt_reader_) { |
| for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) { |
| auto node = nodes_map_.find(v.first); |
| if (node != nodes_map_.end()) { |
| node->second->AddOpType("_checkpoint_variables"); |
| } |
| } |
| } |
| } |
| |
| TFStats::TFStats(const string& filename, |
| std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader) |
| : has_code_traces_(false), |
| miss_accelerator_stream_(false), |
| ckpt_reader_(std::move(ckpt_reader)) { |
| string str; |
| Status s = ReadFileToString(Env::Default(), filename, &str); |
| if (!s.ok()) { |
| absl::FPrintF(stderr, "Failed to read profile: %s", s.ToString()); |
| return; |
| } |
| |
| ProfileProto profile; |
| if (!profile.ParseFromString(str)) { |
| absl::FPrintF(stderr, "Failed to parse profile\n"); |
| return; |
| } |
| for (const auto& entry : profile.id_to_string()) { |
| id_to_string_[entry.first] = entry.second; |
| } |
| for (const auto& node_pb : profile.nodes()) { |
| std::unique_ptr<TFGraphNode> node( |
| new TFGraphNode(node_pb.second, profile, &id_to_string_, &nodes_map_)); |
| nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>( |
| node_pb.second.name(), std::move(node))); |
| } |
| has_code_traces_ = profile.has_trace(); |
| for (int64_t s : profile.steps()) { |
| steps_.insert(s); |
| } |
| } |
| |
| void TFStats::BuildView(const string& cmd) { |
| if (cmd == kCmds[0] && !scope_view_) { |
| scope_view_.reset(new TFScope(ckpt_reader_.get())); |
| for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) { |
| scope_view_->AddNode(it->second.get()); |
| } |
| scope_view_->Build(); |
| } |
| if (cmd == kCmds[1] && !graph_view_) { |
| graph_view_.reset(new TFGraph(ckpt_reader_.get())); |
| for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) { |
| graph_view_->AddNode(it->second.get()); |
| } |
| graph_view_->Build(); |
| } |
| if (cmd == kCmds[2] && !code_view_) { |
| code_view_.reset(new TFCode()); |
| for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) { |
| code_view_->AddNode(it->second.get()); |
| } |
| code_view_->Build(); |
| } |
| if (cmd == kCmds[3] && !op_view_) { |
| op_view_.reset(new TFOp()); |
| for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) { |
| op_view_->AddNode(it->second.get()); |
| } |
| op_view_->Build(); |
| } |
| } |
| |
| void TFStats::BuildAllViews() { |
| std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds)); |
| for (const string& cmd : cmds_str) { |
| BuildView(cmd); |
| } |
| } |
| |
| const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd, |
| const Options& opts) const { |
| if (!Validate(opts)) { |
| return empty_graph_node_; |
| } |
| string prefix = MaybeReportMissingTrace(); |
| prefix += QueryDoc(cmd, opts) + kProfilePrefix; |
| |
| if (cmd == kCmds[0]) { |
| return scope_view_->Show(prefix, opts); |
| } else if (cmd == kCmds[1]) { |
| if (opts.step < 0 && opts.output_type == kOutput[0]) { |
| for (int64_t step : steps_) { |
| Options nopts = opts; |
| nopts.step = step; |
| graph_view_->Show(prefix, nopts); |
| } |
| return empty_graph_node_; |
| } |
| return graph_view_->Show(prefix, opts); |
| } else { |
| absl::FPrintF(stderr, "Unknown command: %s\n", cmd); |
| return empty_graph_node_; |
| } |
| } |
| |
| const MultiGraphNodeProto& TFStats::ShowMultiGraphNode( |
| const string& cmd, const Options& opts) const { |
| if (!Validate(opts)) { |
| return empty_multi_graph_node_; |
| } |
| string prefix = MaybeReportMissingTrace(); |
| prefix += QueryDoc(cmd, opts) + kProfilePrefix; |
| |
| if (cmd == kCmds[2]) { |
| if (!has_code_traces()) { |
| absl::FPrintF(stderr, "No code trace information\n"); |
| return empty_multi_graph_node_; |
| } |
| return code_view_->Show(prefix, opts); |
| } else if (cmd == kCmds[3]) { |
| return op_view_->Show(prefix, opts); |
| } else { |
| absl::FPrintF(stderr, "Unknown command: %s\n", cmd); |
| return empty_multi_graph_node_; |
| } |
| } |
| |
| void TFStats::AddGraph(std::unique_ptr<GraphDef> graph) { |
| std::map<string, const NodeDef*> node_defs; |
| bool node_added = false; |
| for (const NodeDef& node : graph->node()) { |
| if (nodes_map_.find(node.name()) != nodes_map_.end()) { |
| continue; |
| } |
| node_added = true; |
| size_t num_nodes = nodes_map_.size(); |
| nodes_map_[node.name()] = std::unique_ptr<TFGraphNode>( |
| new TFGraphNode(&node, num_nodes, &nodes_map_)); |
| node_defs[node.name()] = &node; |
| } |
| for (auto it = node_defs.begin(); it != node_defs.end(); it++) { |
| TFGraphNode* node = nodes_map_.at(it->first).get(); |
| for (int i = 0; i < it->second->input_size(); ++i) { |
| string node_input = it->second->input(i); |
| int output_idx = 0; |
| // input name format can be: "^node:src_output" |
| // if not :src_output, then it's the first one (further verify?) |
| auto prefix_pos = node_input.find(':'); |
| if (prefix_pos != node_input.npos) { |
| std::vector<string> input_parts = absl::StrSplit(node_input, ':'); |
| DCHECK(input_parts.size() == 2) |
| << "Unknown NodeDef.input format: " << node_input; |
| node_input = input_parts[0]; |
| DCHECK(absl::SimpleAtoi(input_parts[1], &output_idx)) |
| << "Failed to parse integer: " << output_idx; |
| } |
| if (node_input.substr(0, 1) == "^") { |
| node_input = node_input.substr(1); |
| } |
| // Delay input TFGraphNode retrieval as late as possible. |
| // In long run, when we have TensorFlow runtime graph, the |
| // graph connection should be dynamic and per-step. |
| node->AddInput(node_input, output_idx, i); |
| } |
| } |
| if (node_added) { |
| graph_view_.reset(nullptr); |
| scope_view_.reset(nullptr); |
| op_view_.reset(nullptr); |
| code_view_.reset(nullptr); |
| } |
| } |
| |
| void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) { |
| if (!op_log) { |
| return; |
| } |
| for (const auto& entry : op_log->id_to_string()) { |
| if (id_to_string_.find(entry.first) == id_to_string_.end()) { |
| id_to_string_[entry.first] = entry.second; |
| } |
| } |
| for (const OpLogEntry& entry : op_log->log_entries()) { |
| auto node = nodes_map_.find(entry.name()); |
| if (node == nodes_map_.end()) continue; |
| for (const string& type : entry.types()) { |
| node->second->AddOpType(type); |
| } |
| if (entry.float_ops()) { |
| node->second->AddFloatOps(entry.float_ops()); |
| } |
| if (entry.has_code_def()) { |
| has_code_traces_ = true; |
| node->second->AddCode(entry.code_def(), &id_to_string_); |
| } |
| } |
| } |
| |
| void TFStats::AddRunMeta(int64_t step, std::unique_ptr<RunMetadata> run_meta) { |
| if (!run_meta || !run_meta->has_step_stats()) { |
| absl::FPrintF(stderr, "Invalid RunMetadata for step %d\n", step); |
| return; |
| } |
| if (steps_.find(step) == steps_.end()) { |
| steps_.insert(step); |
| } |
| steps_.insert(step); |
| |
| bool has_gpu_scheduling = false; |
| bool has_gpu_stream = false; |
| |
| for (const auto& dev_stat : run_meta->step_stats().dev_stats()) { |
| string dev = absl::AsciiStrToLower(dev_stat.device()); |
| if (IsPlacedOnAccelerator(dev)) { |
| has_gpu_scheduling = true; |
| if (CountAsAcceleratorTime(dev)) { |
| has_gpu_stream = true; |
| } |
| } |
| for (const NodeExecStats& node_stat : dev_stat.node_stats()) { |
| string name = node_stat.node_name(); |
| // Sometimes the node_name is suffixed with unnecessary information. |
| auto split_pos = node_stat.node_name().find(':'); |
| if (split_pos != node_stat.node_name().npos) { |
| name = node_stat.node_name().substr(0, split_pos); |
| } |
| auto node = nodes_map_.find(name); |
| if (node == nodes_map_.end()) { |
| NodeDef def; |
| if (CreateRunMetadataNode(name, &def)) { |
| size_t num_nodes = nodes_map_.size(); |
| nodes_map_[name] = std::unique_ptr<TFGraphNode>( |
| new TFGraphNode(&def, num_nodes, &nodes_map_)); |
| nodes_map_.at(name)->AddStepStat(step, dev_stat.device(), node_stat); |
| } |
| } else { |
| covered_nodes_.insert(node->second->id()); |
| node->second->AddStepStat(step, dev_stat.device(), node_stat); |
| } |
| } |
| } |
| |
| if (has_gpu_scheduling && !has_gpu_stream) { |
| miss_accelerator_stream_ = true; |
| } |
| } |
| |
| string TFStats::MaybeReportMissingTrace() const { |
| string report = ""; |
| if (miss_accelerator_stream_) { |
| report += |
| "\n\nFound accelerator operation but misses accelerator " |
| "stream stats!\n\n" |
| "It's likely a gpu tracing issue rather than tf-profiler issue.\n" |
| "If you found your operation missing accelerator time, " |
| "consider to post to discuss@tensorflow.org!\n\n"; |
| } |
| return report; |
| } |
| |
| void TFStats::SerializeToString(string* content) { |
| ProfileProto profile; |
| for (const auto& entry : id_to_string_) { |
| (*profile.mutable_id_to_string())[entry.first] = entry.second; |
| } |
| for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) { |
| if (it->second->id() < 0) { |
| continue; |
| } |
| (*profile.mutable_nodes())[it->second->id()].MergeFrom( |
| it->second->ToProto(nodes_map_)); |
| } |
| |
| profile.set_has_trace(has_code_traces_); |
| profile.set_miss_accelerator_stream(miss_accelerator_stream_); |
| for (int64_t s : steps_) { |
| profile.add_steps(s); |
| } |
| *content = profile.SerializeAsString(); |
| } |
| |
| void TFStats::WriteProfile(const string& filename) { |
| string content; |
| SerializeToString(&content); |
| Status s = WriteStringToFile(Env::Default(), filename, content); |
| if (!s.ok()) { |
| absl::FPrintF(stderr, "%s\n", s.ToString()); |
| } |
| } |
| |
| bool TFStats::Validate(const Options& opts) const { |
| if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) { |
| absl::FPrintF(stderr, |
| "Options -step=%d not found.\nAvailable steps: ", opts.step); |
| for (int64_t s : steps_) { |
| absl::FPrintF(stderr, "%d ", s); |
| } |
| absl::FPrintF(stderr, "\n"); |
| return false; |
| } |
| return true; |
| } |
| |
| void TFStats::AddNodeForTest(int64_t step, std::unique_ptr<TFGraphNode> node) { |
| steps_.insert(step); |
| nodes_map_[node->name()] = std::move(node); |
| } |
| } // namespace tfprof |
| } // namespace tensorflow |