[tfdbg] Add DebugDataReader.source_lines(); Ensure absolute path in .source_files dump to support reading of snapshotted source-file content. Motivation: - This will be used used by DebuggerV2 plugin in TensorBoard soon. - This can also be used by other UIs built on top of the file format, e.g., a tfdbg2 CLI. Also in this CL: - Ensure that file paths saved in the protos in the .source_files and .stack_frames files are all absolute paths. Previously, they may have relative paths (as returned by `tf_stack.extract_stack()`), which are ambiguous in certain cases. - Simplify some code by eliminating unused local variables. PiperOrigin-RevId: 288016488 Change-Id: Ieb48fd166c65d9f7e779db323ff3d2883aa03a2e

commit: f8684acc4ee41d9ffcf37e4656b96fba5ac1ed21 [log] [tgz]
author: Shanqing Cai <cais@google.com> Fri Jan 03 10:56:10 2020 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> Fri Jan 03 10:59:09 2020 -0800
tree: 3e1b974ddf230320131e7689e028d44c1b7385e7
parent: 8c9fa3b3acefe0d648cdfe5a24f8d1643f4dc9a5 [diff]
diff --git a/tensorflow/python/debug/lib/debug_events_reader.py b/tensorflow/python/debug/lib/debug_events_reader.py
index e11d069..031b1a3 100644
--- a/tensorflow/python/debug/lib/debug_events_reader.py
+++ b/tensorflow/python/debug/lib/debug_events_reader.py

@@ -126,6 +126,11 @@
   def graphs_iterator(self):
     return self._generic_iterator(self._graphs_path)
 
+  def read_source_files_event(self, offset):
+    """Read a DebugEvent proto at given offset from the .source_files file."""
+    return debug_event_pb2.DebugEvent.FromString(
+        self._get_reader(self._source_files_path).read(offset)[0])
+
   def read_graphs_event(self, offset):
     """Read a DebugEvent proto at a given offset from the .graphs file.
 
@@ -139,9 +144,8 @@
       `errors.DataLossError` if offset is at a wrong location.
       `IndexError` if offset is out of range of the file.
     """
-    debug_event = debug_event_pb2.DebugEvent.FromString(
+    return debug_event_pb2.DebugEvent.FromString(
         self._get_reader(self._graphs_path).read(offset)[0])
-    return debug_event
 
   def execution_iterator(self):
     return self._generic_iterator(self._execution_path)
@@ -159,9 +163,8 @@
       `errors.DataLossError` if offset is at a wrong location.
       `IndexError` if offset is out of range of the file.
     """
-    debug_event = debug_event_pb2.DebugEvent.FromString(
+    return debug_event_pb2.DebugEvent.FromString(
         self._get_reader(self._execution_path).read(offset)[0])
-    return debug_event
 
   def graph_execution_traces_iterator(self):
     return self._generic_iterator(self._graph_execution_traces_path)
@@ -179,9 +182,8 @@
       `errors.DataLossError` if offset is at a wrong location.
       `IndexError` if offset is out of range of the file.
     """
-    debug_event = debug_event_pb2.DebugEvent.FromString(
+    return debug_event_pb2.DebugEvent.FromString(
         self._get_reader(self._graph_execution_traces_path).read(offset)[0])
-    return debug_event
 
   def close(self):
     with self._readers_lock:
@@ -633,8 +635,8 @@
     # TODO(cais): Implement pagination for memory constraints.
     self._execution_digests = []
 
-    # A list of (host_name, file_path) tuples.
-    self._host_name_file_paths = []
+    # Mapping (host_name, file_path) tuple to offset in the .source_files file.
+    self._host_name_file_path_to_offset = collections.OrderedDict()
     # A dict mapping id to (host_name, file_path, lineno, func) tuple.
     self._stack_frame_by_id = dict()
     # Stores unprocessed stack frame IDs. This is necessary to handle the
@@ -661,10 +663,10 @@
   def _load_source_files(self):
     """Incrementally read the .source_files DebugEvent file."""
     source_files_iter = self._reader.source_files_iterator()
-    for debug_event, _ in source_files_iter:
+    for debug_event, offset in source_files_iter:
       source_file = debug_event.source_file
-      self._host_name_file_paths.append(
-          (source_file.host_name, source_file.file_path))
+      self._host_name_file_path_to_offset[
+          (source_file.host_name, source_file.file_path)] = offset
       self._source_file_timestamp = debug_event.wall_time
 
   def _load_stack_frames(self):
@@ -687,12 +689,11 @@
     unprocessed_stack_frame_ids = tuple(self._unprocessed_stack_frames.keys())
     for stack_frame_id in unprocessed_stack_frame_ids:
       file_line_col = self._unprocessed_stack_frames[stack_frame_id]
-      if len(self._host_name_file_paths) > file_line_col.file_index:
+      if len(self._host_name_file_path_to_offset) > file_line_col.file_index:
+        host_name, file_path = list(self._host_name_file_path_to_offset.keys())[
+            file_line_col.file_index]
         self._stack_frame_by_id[stack_frame_id] = (
-            self._host_name_file_paths[file_line_col.file_index][0],
-            self._host_name_file_paths[file_line_col.file_index][1],
-            file_line_col.line,
-            file_line_col.func)
+            host_name, file_path, file_line_col.line, file_line_col.func)
       del self._unprocessed_stack_frames[stack_frame_id]
 
   def _load_graphs(self):
@@ -775,6 +776,19 @@
     self._load_graph_execution_traces()
     self._load_execution()
 
+  def source_lines(self, host_name, file_path):
+    """Read the line-by-line content of a source file.
+
+    Args:
+      host_name: Host name on which the source file is located.
+      file_path: File path at which the source file is located.
+
+    Returns:
+      Lines of the source file as a `list` of `str`s.
+    """
+    offset = self._host_name_file_path_to_offset[(host_name, file_path)]
+    return list(self._reader.read_source_files_event(offset).source_file.lines)
+
   def outermost_graphs(self):
     """Get the number of outer most graphs read so far."""
     return [graph for graph in self._graph_by_id.values()

diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py
index e51eedf..4ffbb98 100644
--- a/tensorflow/python/debug/lib/dumping_callback.py
+++ b/tensorflow/python/debug/lib/dumping_callback.py

@@ -19,6 +19,7 @@
 from __future__ import print_function
 
 import atexit
+import os
 import re
 import socket
 import threading
@@ -232,15 +233,16 @@
     stack_frame_ids = []
     writer = None
     for file_path, lineno, func, _ in stack_frames:
-      if (file_path, lineno, func) in self._stack_frame_to_id:
+      abs_path = os.path.abspath(file_path)
+      if (abs_path, lineno, func) in self._stack_frame_to_id:
         stack_frame_ids.append(
-            self._stack_frame_to_id[(file_path, lineno, func)])
+            self._stack_frame_to_id[(abs_path, lineno, func)])
         continue
       with self._stack_frame_to_id_lock:
-        if (file_path, lineno, func) not in self._stack_frame_to_id:
+        if (abs_path, lineno, func) not in self._stack_frame_to_id:
           stack_frame_id = _get_id()
-          self._stack_frame_to_id[(file_path, lineno, func)] = stack_frame_id
-          file_index = self._write_source_file_content(file_path)
+          self._stack_frame_to_id[(abs_path, lineno, func)] = stack_frame_id
+          file_index = self._write_source_file_content(abs_path)
           file_line_col = graph_debug_info_pb2.GraphDebugInfo.FileLineCol(
               file_index=file_index, line=lineno, func=func)
           stack_frame_with_id = debug_event_pb2.StackFrameWithId(
@@ -248,7 +250,7 @@
           writer = self.get_writer()
           writer.WriteStackFrameWithId(stack_frame_with_id)
         stack_frame_ids.append(
-            self._stack_frame_to_id[(file_path, lineno, func)])
+            self._stack_frame_to_id[(abs_path, lineno, func)])
 
     code_location = debug_event_pb2.CodeLocation(
         host_name=self._hostname, stack_frame_ids=stack_frame_ids)

diff --git a/tensorflow/python/debug/lib/dumping_callback_test.py b/tensorflow/python/debug/lib/dumping_callback_test.py
index 115315a..9038a60 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test.py

@@ -342,6 +342,20 @@
         self.assertAllClose(
             trace.debug_tensor_value, [tensor_id, 10, 2, 4, 2, 2, 0, 0, 0, 0])
 
+  def testReadingSourceLines(self):
+    writer = dumping_callback.enable_dump_debug_info(self.dump_root)
+    # Run a simple eager execution event, so that the source-file contents are
+    # dumped.
+    self.assertAllClose(math_ops.truediv(7.0, 1.0 / 6.0), 42.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      with open(_current_file_full_path, "rt") as f:
+        file_lines = f.read().split("\n")
+      self.assertEqual(
+          reader.source_lines(_host_name, _current_file_full_path), file_lines)
+
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
       ("CurtHealth", "CURT_HEALTH"),
commit	f8684acc4ee41d9ffcf37e4656b96fba5ac1ed21	[log] [tgz]
author	Shanqing Cai <cais@google.com>	Fri Jan 03 10:56:10 2020 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	Fri Jan 03 10:59:09 2020 -0800
tree	3e1b974ddf230320131e7689e028d44c1b7385e7
parent	8c9fa3b3acefe0d648cdfe5a24f8d1643f4dc9a5 [diff]