[tfdbg] Add DebugDataReader.source_lines(); Ensure absolute path in .source_files dump
to support reading of snapshotted source-file content.
Motivation:
- This will be used used by DebuggerV2 plugin in TensorBoard soon.
- This can also be used by other UIs built on top of the file format, e.g., a tfdbg2 CLI.
Also in this CL:
- Ensure that file paths saved in the protos in the .source_files and .stack_frames files are all absolute paths. Previously, they may have relative paths (as returned by `tf_stack.extract_stack()`), which are ambiguous in certain cases.
- Simplify some code by eliminating unused local variables.
PiperOrigin-RevId: 288016488
Change-Id: Ieb48fd166c65d9f7e779db323ff3d2883aa03a2e
diff --git a/tensorflow/python/debug/lib/debug_events_reader.py b/tensorflow/python/debug/lib/debug_events_reader.py
index e11d069..031b1a3 100644
--- a/tensorflow/python/debug/lib/debug_events_reader.py
+++ b/tensorflow/python/debug/lib/debug_events_reader.py
@@ -126,6 +126,11 @@
def graphs_iterator(self):
return self._generic_iterator(self._graphs_path)
+ def read_source_files_event(self, offset):
+ """Read a DebugEvent proto at given offset from the .source_files file."""
+ return debug_event_pb2.DebugEvent.FromString(
+ self._get_reader(self._source_files_path).read(offset)[0])
+
def read_graphs_event(self, offset):
"""Read a DebugEvent proto at a given offset from the .graphs file.
@@ -139,9 +144,8 @@
`errors.DataLossError` if offset is at a wrong location.
`IndexError` if offset is out of range of the file.
"""
- debug_event = debug_event_pb2.DebugEvent.FromString(
+ return debug_event_pb2.DebugEvent.FromString(
self._get_reader(self._graphs_path).read(offset)[0])
- return debug_event
def execution_iterator(self):
return self._generic_iterator(self._execution_path)
@@ -159,9 +163,8 @@
`errors.DataLossError` if offset is at a wrong location.
`IndexError` if offset is out of range of the file.
"""
- debug_event = debug_event_pb2.DebugEvent.FromString(
+ return debug_event_pb2.DebugEvent.FromString(
self._get_reader(self._execution_path).read(offset)[0])
- return debug_event
def graph_execution_traces_iterator(self):
return self._generic_iterator(self._graph_execution_traces_path)
@@ -179,9 +182,8 @@
`errors.DataLossError` if offset is at a wrong location.
`IndexError` if offset is out of range of the file.
"""
- debug_event = debug_event_pb2.DebugEvent.FromString(
+ return debug_event_pb2.DebugEvent.FromString(
self._get_reader(self._graph_execution_traces_path).read(offset)[0])
- return debug_event
def close(self):
with self._readers_lock:
@@ -633,8 +635,8 @@
# TODO(cais): Implement pagination for memory constraints.
self._execution_digests = []
- # A list of (host_name, file_path) tuples.
- self._host_name_file_paths = []
+ # Mapping (host_name, file_path) tuple to offset in the .source_files file.
+ self._host_name_file_path_to_offset = collections.OrderedDict()
# A dict mapping id to (host_name, file_path, lineno, func) tuple.
self._stack_frame_by_id = dict()
# Stores unprocessed stack frame IDs. This is necessary to handle the
@@ -661,10 +663,10 @@
def _load_source_files(self):
"""Incrementally read the .source_files DebugEvent file."""
source_files_iter = self._reader.source_files_iterator()
- for debug_event, _ in source_files_iter:
+ for debug_event, offset in source_files_iter:
source_file = debug_event.source_file
- self._host_name_file_paths.append(
- (source_file.host_name, source_file.file_path))
+ self._host_name_file_path_to_offset[
+ (source_file.host_name, source_file.file_path)] = offset
self._source_file_timestamp = debug_event.wall_time
def _load_stack_frames(self):
@@ -687,12 +689,11 @@
unprocessed_stack_frame_ids = tuple(self._unprocessed_stack_frames.keys())
for stack_frame_id in unprocessed_stack_frame_ids:
file_line_col = self._unprocessed_stack_frames[stack_frame_id]
- if len(self._host_name_file_paths) > file_line_col.file_index:
+ if len(self._host_name_file_path_to_offset) > file_line_col.file_index:
+ host_name, file_path = list(self._host_name_file_path_to_offset.keys())[
+ file_line_col.file_index]
self._stack_frame_by_id[stack_frame_id] = (
- self._host_name_file_paths[file_line_col.file_index][0],
- self._host_name_file_paths[file_line_col.file_index][1],
- file_line_col.line,
- file_line_col.func)
+ host_name, file_path, file_line_col.line, file_line_col.func)
del self._unprocessed_stack_frames[stack_frame_id]
def _load_graphs(self):
@@ -775,6 +776,19 @@
self._load_graph_execution_traces()
self._load_execution()
+ def source_lines(self, host_name, file_path):
+ """Read the line-by-line content of a source file.
+
+ Args:
+ host_name: Host name on which the source file is located.
+ file_path: File path at which the source file is located.
+
+ Returns:
+ Lines of the source file as a `list` of `str`s.
+ """
+ offset = self._host_name_file_path_to_offset[(host_name, file_path)]
+ return list(self._reader.read_source_files_event(offset).source_file.lines)
+
def outermost_graphs(self):
"""Get the number of outer most graphs read so far."""
return [graph for graph in self._graph_by_id.values()
diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py
index e51eedf..4ffbb98 100644
--- a/tensorflow/python/debug/lib/dumping_callback.py
+++ b/tensorflow/python/debug/lib/dumping_callback.py
@@ -19,6 +19,7 @@
from __future__ import print_function
import atexit
+import os
import re
import socket
import threading
@@ -232,15 +233,16 @@
stack_frame_ids = []
writer = None
for file_path, lineno, func, _ in stack_frames:
- if (file_path, lineno, func) in self._stack_frame_to_id:
+ abs_path = os.path.abspath(file_path)
+ if (abs_path, lineno, func) in self._stack_frame_to_id:
stack_frame_ids.append(
- self._stack_frame_to_id[(file_path, lineno, func)])
+ self._stack_frame_to_id[(abs_path, lineno, func)])
continue
with self._stack_frame_to_id_lock:
- if (file_path, lineno, func) not in self._stack_frame_to_id:
+ if (abs_path, lineno, func) not in self._stack_frame_to_id:
stack_frame_id = _get_id()
- self._stack_frame_to_id[(file_path, lineno, func)] = stack_frame_id
- file_index = self._write_source_file_content(file_path)
+ self._stack_frame_to_id[(abs_path, lineno, func)] = stack_frame_id
+ file_index = self._write_source_file_content(abs_path)
file_line_col = graph_debug_info_pb2.GraphDebugInfo.FileLineCol(
file_index=file_index, line=lineno, func=func)
stack_frame_with_id = debug_event_pb2.StackFrameWithId(
@@ -248,7 +250,7 @@
writer = self.get_writer()
writer.WriteStackFrameWithId(stack_frame_with_id)
stack_frame_ids.append(
- self._stack_frame_to_id[(file_path, lineno, func)])
+ self._stack_frame_to_id[(abs_path, lineno, func)])
code_location = debug_event_pb2.CodeLocation(
host_name=self._hostname, stack_frame_ids=stack_frame_ids)
diff --git a/tensorflow/python/debug/lib/dumping_callback_test.py b/tensorflow/python/debug/lib/dumping_callback_test.py
index 115315a..9038a60 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test.py
@@ -342,6 +342,20 @@
self.assertAllClose(
trace.debug_tensor_value, [tensor_id, 10, 2, 4, 2, 2, 0, 0, 0, 0])
+ def testReadingSourceLines(self):
+ writer = dumping_callback.enable_dump_debug_info(self.dump_root)
+ # Run a simple eager execution event, so that the source-file contents are
+ # dumped.
+ self.assertAllClose(math_ops.truediv(7.0, 1.0 / 6.0), 42.0)
+ writer.FlushNonExecutionFiles()
+ writer.FlushExecutionFiles()
+ with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+ reader.update()
+ with open(_current_file_full_path, "rt") as f:
+ file_lines = f.read().split("\n")
+ self.assertEqual(
+ reader.source_lines(_host_name, _current_file_full_path), file_lines)
+
@parameterized.named_parameters(
("NoTensor", "NO_TENSOR"),
("CurtHealth", "CURT_HEALTH"),