libdw, readelf: Read inlining info in NVIDIA extended line map

As of CUDA 11.2, NVIDIA added extensions to the line map section
of CUDA binaries to represent inlined functions. These extensions
include

 - two new fields in a line table row to represent inline
   information: context, and functionname,

 - two new DWARF extended opcodes: DW_LNE_NVIDIA_inlined_call,
   DW_LNE_NVIDIA_set_function_name,

 - an additional word in the line table header that indicates
   the offset in the .debug_str function where the function
   names for this line table begin, and

A line table row for an inlined function contains a non-zero "context"
value. The “context” field indicates the index of the line table row
that serves as the call site for an inlined context.

The "functionname" field in a line table row is only meaningful if the
"context" field of the row is non-zero. A meaningful "functionname"
field contains an index into the .debug_str section relative to the
base offset established in the line table header; the position in the
.debug_str section indicates the name of the inlined function.

These extensions resemble the proposed DWARF extensions
(http://dwarfstd.org/ShowIssue.php?issue=140906.1) by Cary Coutant,
but are not identical.

This commit integrates support for handling NVIDIA's extended line
maps into elfutil's libdw library, by adding two functions
dwarf_linecontext and dwarf_linefunctionname, and the readelf
--debug-dump=line command line utility.

Signed-off-by: John M Mellor-Crummey <johnmc@rice.edu>
Signed-off-by: Mark Wielaard <mark@klomp.org>
diff --git a/ChangeLog b/ChangeLog
index 6255fe6..d4b89f9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2021-09-03  John Mellor-Crummey <johnmc@rice.edu>
+
+	* NEWS: Read inlining info in NVIDIA extended line map
+
 2021-08-10  Adrian Ratiu  <adrian.ratiu@collabora.com>
 
 	* configure.ac (AC_CACHE_CHECK): Rework std=gnu99 check to allow clang.
diff --git a/NEWS b/NEWS
index b812b74..897b391 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,11 @@
             Limit the duration of groom ops roughly to rescan (-t) times.
             Several other performance improvements & prometheus metrics.
 
+libdw: Support for the NVIDIA Cuda line map extensions.
+       DW_LNE_NVIDIA_inlined_call and DW_LNE_NVIDIA_set_function_name
+       are defined in dwarf.h. New functions dwarf_linecontext and
+       dwarf_linefunctionname
+
 Version 0.185
 
 debuginfod-client: Simplify curl handle reuse so downloads which
diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index 38e6efb..ca742e6 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,22 @@
+2021-10-20  John M Mellor-Crummey  <johnmc@rice.edu>
+
+	* dwarf_linecontext.c: New file.
+	* dwarf_linefunctionname.c: Likewise.
+	* Makefile.am (libdw_a_SOURCES): Add dwarf_linecontext.c and
+	dwarf_linefunctionname.c
+	* dwarf.h: Add DW_LNE_NVIDIA_inlined_call and
+	DW_LNE_NVIDIA_set_function_name.
+	* dwarf_getsrclines.c (struct line_state): Add context and
+	function_name fields.
+	(add_new_line): Set context and function_name.
+	(MAX_STACK_LINES): Reduce to MAX_STACK_ALLOC / 2.
+	(read_srclines): Initialize context and function_name.  Try to
+	read debug_str_offset if available.  Handle
+	DW_LNE_NVIDIA_inlined_call and DW_LNE_NVIDIA_set_function_name.
+	* libdw.h (dwarf_linecontext): New declaration.
+	(dwarf_linefunctionname): Likewise.
+	* libdw.map (ELFUTILS_0.186): New section.
+
 2021-11-08  Mark Wielaard  <mark@klomp.org>
 
 	* dwarf_begin_elf.c (scn_dwarf_type): New function.
diff --git a/libdw/Makefile.am b/libdw/Makefile.am
index 6b7834a..4fda33b 100644
--- a/libdw/Makefile.am
+++ b/libdw/Makefile.am
@@ -63,6 +63,7 @@
 		  dwarf_linesrc.c dwarf_lineno.c dwarf_lineaddr.c \
 		  dwarf_linecol.c dwarf_linebeginstatement.c \
 		  dwarf_lineendsequence.c dwarf_lineblock.c \
+		  dwarf_linecontext.c dwarf_linefunctionname.c \
 		  dwarf_lineprologueend.c dwarf_lineepiloguebegin.c \
 		  dwarf_lineisa.c dwarf_linediscriminator.c \
 		  dwarf_lineop_index.c dwarf_line_file.c \
diff --git a/libdw/dwarf.h b/libdw/dwarf.h
index 19a4be9..3ce7f23 100644
--- a/libdw/dwarf.h
+++ b/libdw/dwarf.h
@@ -844,6 +844,10 @@
     DW_LNE_set_discriminator = 4,
 
     DW_LNE_lo_user = 128,
+
+    DW_LNE_NVIDIA_inlined_call = 144,
+    DW_LNE_NVIDIA_set_function_name = 145,
+
     DW_LNE_hi_user = 255
   };
 
diff --git a/libdw/dwarf_getsrclines.c b/libdw/dwarf_getsrclines.c
index 8fc48e1..2c1d7a4 100644
--- a/libdw/dwarf_getsrclines.c
+++ b/libdw/dwarf_getsrclines.c
@@ -93,6 +93,8 @@
   struct linelist *linelist;
   size_t nlinelist;
   unsigned int end_sequence;
+  unsigned int context;
+  unsigned int function_name;
 };
 
 static inline void
@@ -139,6 +141,8 @@
   SET (epilogue_begin);
   SET (isa);
   SET (discriminator);
+  SET (context);
+  SET (function_name);
 
 #undef SET
 
@@ -161,7 +165,7 @@
      the stack.  Stack allocate some entries, only dynamically malloc
      when more than MAX.  */
 #define MAX_STACK_ALLOC 4096
-#define MAX_STACK_LINES MAX_STACK_ALLOC
+#define MAX_STACK_LINES (MAX_STACK_ALLOC / 2)
 #define MAX_STACK_FILES (MAX_STACK_ALLOC / 4)
 #define MAX_STACK_DIRS  (MAX_STACK_ALLOC / 16)
 
@@ -180,7 +184,9 @@
       .prologue_end = false,
       .epilogue_begin = false,
       .isa = 0,
-      .discriminator = 0
+      .discriminator = 0,
+      .context = 0,
+      .function_name = 0
     };
 
   /* The dirs normally go on the stack, but if there are too many
@@ -648,6 +654,13 @@
 	}
     }
 
+  unsigned int debug_str_offset = 0;
+  if (unlikely (linep == header_start + header_length - 4))
+    {
+      /* CUBINs contain an unsigned 4-byte offset */
+      debug_str_offset = read_4ubyte_unaligned_inc (dbg, linep);
+    }
+
   /* Consistency check.  */
   if (unlikely (linep != header_start + header_length))
     {
@@ -753,6 +766,8 @@
 	      state.epilogue_begin = false;
 	      state.isa = 0;
 	      state.discriminator = 0;
+	      state.context = 0;
+	      state.function_name = 0;
 	      break;
 
 	    case DW_LNE_set_address:
@@ -831,6 +846,23 @@
 	      get_uleb128 (state.discriminator, linep, lineendp);
 	      break;
 
+	    case DW_LNE_NVIDIA_inlined_call:
+	      if (unlikely (linep >= lineendp))
+		goto invalid_data;
+	      get_uleb128 (state.context, linep, lineendp);
+	      if (unlikely (linep >= lineendp))
+		goto invalid_data;
+	      get_uleb128 (state.function_name, linep, lineendp);
+	      state.function_name += debug_str_offset;
+	      break;
+
+	    case DW_LNE_NVIDIA_set_function_name:
+	      if (unlikely (linep >= lineendp))
+		goto invalid_data;
+	      get_uleb128 (state.function_name, linep, lineendp);
+	      state.function_name += debug_str_offset;
+	      break;
+
 	    default:
 	      /* Unknown, ignore it.  */
 	      if (unlikely ((size_t) (lineendp - (linep - 1)) < len))
diff --git a/libdw/dwarf_linecontext.c b/libdw/dwarf_linecontext.c
new file mode 100644
index 0000000..84572e2
--- /dev/null
+++ b/libdw/dwarf_linecontext.c
@@ -0,0 +1,45 @@
+/* Return context in line.
+   This file is part of elfutils.
+   Written by John Mellor-Crummey <johnmc@rice.edu>, 2021.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include "libdwP.h"
+
+
+Dwarf_Line*
+dwarf_linecontext (Dwarf_Lines* lines, Dwarf_Line *line)
+{
+  if (lines == NULL || line == NULL)
+    return NULL;
+  if (line->context == 0 || line->context >= lines->nlines)
+    return NULL;
+
+  return lines->info + (line->context - 1);
+}
diff --git a/libdw/dwarf_linefunctionname.c b/libdw/dwarf_linefunctionname.c
new file mode 100644
index 0000000..e194d21
--- /dev/null
+++ b/libdw/dwarf_linefunctionname.c
@@ -0,0 +1,52 @@
+/* Return function name in line.
+   This file is part of elfutils.
+   Written by John Mellor-Crummey <johnmc@rice.edu>, 2021.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of either
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at
+       your option) any later version
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at
+       your option) any later version
+
+   or both in parallel, as here.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <string.h>
+#include "libdwP.h"
+
+
+const char *
+dwarf_linefunctionname (Dwarf *dbg, Dwarf_Line *line)
+{
+  if (dbg == NULL || line == NULL)
+    return NULL;
+  if (line->context == 0)
+    return NULL;
+
+  Elf_Data *str_data = dbg->sectiondata[IDX_debug_str];
+  if (str_data == NULL || line->function_name >= str_data->d_size
+      || memchr (str_data->d_buf + line->function_name, '\0',
+		 str_data->d_size - line->function_name) == NULL)
+    return NULL;
+
+  return (char *) str_data->d_buf + line->function_name;
+}
diff --git a/libdw/libdw.h b/libdw/libdw.h
index 77174d2..64d1689 100644
--- a/libdw/libdw.h
+++ b/libdw/libdw.h
@@ -701,6 +701,15 @@
 extern const char *dwarf_linesrc (Dwarf_Line *line,
 				  Dwarf_Word *mtime, Dwarf_Word *length);
 
+/* Return the caller of this line if inlined.  If not inlined,
+   return NULL.  */
+extern Dwarf_Line *dwarf_linecontext (Dwarf_Lines *lines, Dwarf_Line *line);
+
+/* Return the function name in this line record. If this line is
+   inlined, this is the name of the function that was inlined. If this line
+   is not inlined, return NULL.  */
+extern const char *dwarf_linefunctionname (Dwarf *dbg, Dwarf_Line *line);
+
 /* Return file information.  The returned string is NULL when
    an error occurred, or the file path.  The file path is either absolute
    or relative to the compilation directory.  See dwarf_decl_file.  */
diff --git a/libdw/libdw.map b/libdw/libdw.map
index 8ab0a2a..4f53037 100644
--- a/libdw/libdw.map
+++ b/libdw/libdw.map
@@ -360,3 +360,9 @@
     # presume that NULL is only returned on error (otherwise ELF_K_NONE).
     dwelf_elf_begin;
 } ELFUTILS_0.175;
+
+ELFUTILS_0.186 {
+  global:
+    dwarf_linecontext;
+    dwarf_linefunctionname;
+} ELFUTILS_0.177;
diff --git a/libdw/libdwP.h b/libdw/libdwP.h
index 48f3a94..360ad01 100644
--- a/libdw/libdwP.h
+++ b/libdw/libdwP.h
@@ -303,6 +303,9 @@
   unsigned int op_index:8;
   unsigned int isa:8;
   unsigned int discriminator:24;
+  /* These are currently only used for the NVIDIA extensions.  */
+  unsigned int context;
+  unsigned int function_name;
 };
 
 struct Dwarf_Lines_s
diff --git a/src/ChangeLog b/src/ChangeLog
index 316bcb6..05b2522 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+2021-10-20  John M Mellor-Crummey  <johnmc@rice.edu>
+
+	* readelf.c (print_debug_line_section): Try to read
+	debug_str_offset if available.  Handle DW_LNE_NVIDIA_inlined_call
+	and DW_LNE_NVIDIA_set_function_name.
+
 2021-10-06  Mark Wielaard  <mark@klomp.org>
 
 	* elflint.c (check_sections): Don't dereference databits if bad.
diff --git a/src/readelf.c b/src/readelf.c
index 3a19906..c10038e 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -8478,6 +8478,8 @@
 	  header_length = read_8ubyte_unaligned_inc (dbg, linep);
 	}
 
+      const unsigned char *header_start = linep;
+
       /* Next the minimum instruction length.  */
       if ((size_t) (lineendp - linep) < 1)
 	goto invalid_data;
@@ -8761,6 +8763,13 @@
 	  ++linep;
 	}
 
+      unsigned int debug_str_offset = 0;
+      if (unlikely (linep == header_start + header_length - 4))
+	{
+	  /* CUBINs contain an unsigned 4-byte offset */
+	  debug_str_offset = read_4ubyte_unaligned_inc (dbg, linep);
+	}
+
       if (linep == lineendp)
 	{
 	  puts (_("\nNo line number statements."));
@@ -8909,6 +8918,59 @@
 		  printf (_(" set discriminator to %u\n"), u128);
 		  break;
 
+		case DW_LNE_NVIDIA_inlined_call:
+		  {
+		    if (unlikely (linep >= lineendp))
+		      goto invalid_data;
+
+		    unsigned int context;
+		    get_uleb128 (context, linep, lineendp);
+
+		    if (unlikely (linep >= lineendp))
+		      goto invalid_data;
+
+		    unsigned int function_name;
+		    get_uleb128 (function_name, linep, lineendp);
+		    function_name += debug_str_offset;
+
+		    Elf_Data *str_data = dbg->sectiondata[IDX_debug_str];
+		    char *function_str;
+		    if (str_data == NULL || function_name >= str_data->d_size
+			|| memchr (str_data->d_buf + function_name, '\0',
+				   str_data->d_size - function_name) == NULL)
+		      function_str = "???";
+		    else
+		      function_str = (char *) str_data->d_buf + function_name;
+
+		    printf (_(" set inlined context %u,"
+		              " function name %s (0x%x)\n"),
+			    context, function_str, function_name);
+		    break;
+		  }
+
+		case DW_LNE_NVIDIA_set_function_name:
+		  {
+		    if (unlikely (linep >= lineendp))
+		      goto invalid_data;
+
+		    unsigned int function_name;
+		    get_uleb128 (function_name, linep, lineendp);
+		    function_name += debug_str_offset;
+
+		    Elf_Data *str_data = dbg->sectiondata[IDX_debug_str];
+		    char *function_str;
+		    if (str_data == NULL || function_name >= str_data->d_size
+			|| memchr (str_data->d_buf + function_name, '\0',
+				   str_data->d_size - function_name) == NULL)
+		      function_str = "???";
+		    else
+		      function_str = (char *) str_data->d_buf + function_name;
+
+		    printf (_(" set function name %s (0x%x)\n"),
+			    function_str, function_name);
+		  }
+		  break;
+
 		default:
 		  /* Unknown, ignore it.  */
 		  puts (_(" unknown opcode"));
diff --git a/tests/.gitignore b/tests/.gitignore
index d0e83da..99d0481 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -85,6 +85,7 @@
 /next-files
 /next-lines
 /next_cfi
+/nvidia_extended_linemap_libdw
 /peel_type
 /rdwrmmap
 /read_unaligned
diff --git a/tests/ChangeLog b/tests/ChangeLog
index c5d0002..db8b13b 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,16 @@
+2021-10-20  John M Mellor-Crummey  <johnmc@rice.edu>
+
+	* nvidia_extended_linemap_libdw.c: New file.
+	* run-nvidia-extended-linemap-libdw.sh: New test.
+	* run-nvidia-extended-linemap-readelf.sh: Likewise.
+	* testfile_nvidia_linemap.bz2: New test file.
+	* .gitignore: Add nvidia_extended_linemap_libdw.
+	* Makefile.am (check_PROGRAMS): Add nvidia_extended_linemap_libdw.
+	(TESTS): Add run-nvidia-extended-linemap-libdw.sh and
+	run-nvidia-extended-linemap-readelf.sh
+	(EXTRA_DIST): Likewise and testfile_nvidia_linemap.bz2.
+	(nvidia_extended_linemap_libdw_LDADD): New variable.
+
 2021-11-08  Mark Wielaard  <mark@klomp.org>
 
 	* Makefile.am (TESTS): Add run-readelf-fat-lto.sh.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index ccc4c05..6d3e75a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -61,6 +61,7 @@
 		  dwelf_elf_e_machine_string \
 		  getphdrnum leb128 read_unaligned \
 		  msg_tst system-elf-libelf-test \
+		  nvidia_extended_linemap_libdw \
 		  $(asm_TESTS)
 
 asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
@@ -189,6 +190,7 @@
 	leb128 read_unaligned \
 	msg_tst system-elf-libelf-test \
 	$(asm_TESTS) run-disasm-bpf.sh run-low_high_pc-dw-form-indirect.sh \
+	run-nvidia-extended-linemap-libdw.sh run-nvidia-extended-linemap-readelf.sh \
 	run-readelf-dw-form-indirect.sh run-strip-largealign.sh
 
 if !BIARCH
@@ -566,6 +568,8 @@
 	     run-getphdrnum.sh testfile-phdrs.elf.bz2 \
 	     run-test-includes.sh run-low_high_pc-dw-form-indirect.sh \
 	     run-readelf-dw-form-indirect.sh testfile-dw-form-indirect.bz2 \
+	     run-nvidia-extended-linemap-libdw.sh run-nvidia-extended-linemap-readelf.sh \
+	     testfile_nvidia_linemap.bz2 \
 	     testfile-largealign.o.bz2 run-strip-largealign.sh
 
 
@@ -739,6 +743,7 @@
 getphdrnum_LDADD = $(libelf) $(libdw)
 leb128_LDADD = $(libelf) $(libdw)
 read_unaligned_LDADD = $(libelf) $(libdw)
+nvidia_extended_linemap_libdw_LDADD = $(libelf) $(libdw)
 
 # We want to test the libelf header against the system elf.h header.
 # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
diff --git a/tests/nvidia_extended_linemap_libdw.c b/tests/nvidia_extended_linemap_libdw.c
new file mode 100644
index 0000000..20d8d40
--- /dev/null
+++ b/tests/nvidia_extended_linemap_libdw.c
@@ -0,0 +1,166 @@
+/* Inspect nvidia extended linemap with dwarf_next_lines.
+   Copyright (C) 2002, 2004, 2018 Red Hat, Inc.
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <fcntl.h>
+#include <inttypes.h>
+#include <libelf.h>
+#include ELFUTILS_HEADER(dw)
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+
+int
+main (int argc, char *argv[])
+{
+  int result = 0;
+  int cnt;
+
+  for (cnt = 1; cnt < argc; ++cnt)
+    {
+      int fd = open (argv[cnt], O_RDONLY);
+
+      Dwarf *dbg = dwarf_begin (fd, DWARF_C_READ);
+      if  (dbg == NULL)
+	{
+	  printf ("%s not usable: %s\n", argv[cnt], dwarf_errmsg (-1));
+	  close  (fd);
+	  continue;
+	}
+
+      Dwarf_Off off;
+      Dwarf_Off next_off = 0;
+      Dwarf_CU *cu = NULL;
+      Dwarf_Lines *lb;
+      size_t nlb;
+      int res;
+      while ((res = dwarf_next_lines (dbg, off = next_off, &next_off, &cu,
+				      NULL, NULL, &lb, &nlb)) == 0)
+	{
+	  printf ("off = %" PRIu64 "\n", off);
+	  printf (" %zu lines\n", nlb);
+
+	  for (size_t i = 0; i < nlb; ++i)
+	    {
+	      Dwarf_Line *l = dwarf_onesrcline (lb, i);
+	      if (l == NULL)
+		{
+		  printf ("%s: cannot get individual line\n", argv[cnt]);
+		  result = 1;
+		  break;
+		}
+
+	      Dwarf_Addr addr;
+	      if (dwarf_lineaddr (l, &addr) != 0)
+		addr = 0;
+	      const char *file = dwarf_linesrc (l, NULL, NULL);
+	      int line;
+	      if (dwarf_lineno (l, &line) != 0)
+		line = 0;
+
+	      printf ("%" PRIx64 ": %s:%d:", (uint64_t) addr,
+		      file ?: "???", line);
+
+	      /* Getting the file path through the Dwarf_Files should
+		 result in the same path.  */
+	      Dwarf_Files *files;
+	      size_t idx;
+	      if (dwarf_line_file (l, &files, &idx) != 0)
+		{
+		  printf ("%s: cannot get file from line (%zd): %s\n",
+			  argv[cnt], i, dwarf_errmsg (-1));
+		  result = 1;
+		  break;
+		}
+	      const char *path = dwarf_filesrc (files, idx, NULL, NULL);
+	      if ((path == NULL && file != NULL)
+		  || (path != NULL && file == NULL)
+		  || (strcmp (file, path) != 0))
+		{
+		  printf ("%s: line %zd srcline (%s) != file srcline (%s)\n",
+			  argv[cnt], i, file ?: "???", path ?: "???");
+		  result = 1;
+		  break;
+		}
+
+	      int column;
+	      if (dwarf_linecol (l, &column) != 0)
+		column = 0;
+	      if (column >= 0)
+		printf ("%d:", column);
+
+	      bool is_stmt;
+	      if (dwarf_linebeginstatement (l, &is_stmt) != 0)
+		is_stmt = false;
+	      bool end_sequence;
+	      if (dwarf_lineendsequence (l, &end_sequence) != 0)
+		end_sequence = false;
+	      bool basic_block;
+	      if (dwarf_lineblock (l, &basic_block) != 0)
+		basic_block = false;
+	      bool prologue_end;
+	      if (dwarf_lineprologueend (l, &prologue_end) != 0)
+		prologue_end = false;
+	      bool epilogue_begin;
+	      if (dwarf_lineepiloguebegin (l, &epilogue_begin) != 0)
+		epilogue_begin = false;
+	      printf (" is_stmt:%s, end_seq:%s, bb:%s, prologue:%s, epilogue:%s\n",
+		      is_stmt ? "yes" : "no", end_sequence ? "yes" : "no",
+		      basic_block ? "yes" : "no", prologue_end  ? "yes" : "no",
+		      epilogue_begin ? "yes" : "no");
+
+	      Dwarf_Line* callee_context = l;
+	      Dwarf_Line* caller_context = dwarf_linecontext (lb, l);
+	      unsigned int depth = 0;
+	      while (caller_context != NULL)
+		{
+		  depth++;
+		  for (unsigned int x = 0; x < depth; x++)
+		    printf ("  ");
+
+		  const char *inlined_file = dwarf_linesrc (caller_context,
+							    NULL, NULL);
+		  int inlined_line;
+		  if (dwarf_lineno (caller_context, &inlined_line) != 0)
+		    inlined_line = 0;
+
+		  printf ("%s inlined at %s:%d\n",
+			  dwarf_linefunctionname(dbg, callee_context),
+			  inlined_file ?: "???", inlined_line);
+
+		  callee_context = caller_context;
+		  caller_context = dwarf_linecontext (lb, callee_context);
+	        }
+	    }
+	}
+
+      if (res < 0)
+	{
+	  printf ("dwarf_next_lines failed: %s\n", dwarf_errmsg (-1));
+	  result = 1;
+	}
+
+      dwarf_end (dbg);
+      close (fd);
+    }
+
+  return result;
+}
diff --git a/tests/run-nvidia-extended-linemap-libdw.sh b/tests/run-nvidia-extended-linemap-libdw.sh
new file mode 100755
index 0000000..d1df2cf
--- /dev/null
+++ b/tests/run-nvidia-extended-linemap-libdw.sh
@@ -0,0 +1,60 @@
+# Copyright (C) 2011 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+# NOTE: 
+#   the file testfile_nvidia_linemap is a CUDA binary for an NVIDIA A100 generated as follows using CUDA 11.2
+#   nvcc -o main main.cu  -Xcompiler "-g -fopenmp" -O3 -lineinfo -arch sm_80  -lcudart -lcuda -lstdc++ -lm
+#   cuobjdump -xelf all main
+#   mv main.sm_80.cubin testfile_nvidia_linemap
+
+testfiles testfile_nvidia_linemap
+testrun_compare ${abs_top_builddir}/tests/nvidia_extended_linemap_libdw testfile_nvidia_linemap << EOF
+off = 0
+ 18 lines
+0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:25:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+10: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:26:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+40: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:27:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+90: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:25:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+a0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:28:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+100: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:28:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+100: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:8:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  foo inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:28
+150: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:9:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  foo inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:28
+1e0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+1e0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:6:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+1e0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:8:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  foo inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:6
+    bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+220: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:9:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  foo inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:6
+    bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+2b0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:7:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+2f0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:8:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+2f0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:18:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  _Z1aPiS_S_ inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:8
+    bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+330: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:19:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+  _Z1aPiS_S_ inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/bar.h:8
+    bar inlined at /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:31
+3c0: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:33:0: is_stmt:yes, end_seq:no, bb:no, prologue:no, epilogue:no
+480: /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4/main.cu:33:0: is_stmt:yes, end_seq:yes, bb:no, prologue:no, epilogue:no
+EOF
diff --git a/tests/run-nvidia-extended-linemap-readelf.sh b/tests/run-nvidia-extended-linemap-readelf.sh
new file mode 100755
index 0000000..1fa9b7b
--- /dev/null
+++ b/tests/run-nvidia-extended-linemap-readelf.sh
@@ -0,0 +1,120 @@
+# Copyright (C) 2011 Red Hat, Inc.
+# This file is part of elfutils.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# elfutils is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. $srcdir/test-subr.sh
+
+# NOTE:
+#   the file testfile_nvidia_linemap is a CUDA binary for an NVIDIA A100 generated as follows using CUDA 11.2
+#   nvcc -o main main.cu  -Xcompiler "-g -fopenmp" -O3 -lineinfo -arch sm_80  -lcudart -lcuda -lstdc++ -lm
+#   cuobjdump -xelf all main
+#   mv main.sm_80.cubin testfile_nvidia_linemap
+
+testfiles testfile_nvidia_linemap
+testrun_compare ${abs_top_builddir}/src/readelf --debug-dump=line testfile_nvidia_linemap << EOF
+
+DWARF section [ 5] '.debug_line' at offset 0x3e0:
+
+Table at offset 0:
+
+ Length:                         253
+ DWARF version:                  2
+ Prologue length:                111
+ Address size:                   8
+ Segment selector size:          0
+ Min instruction length:         1
+ Max operations per instruction: 1
+ Initial value if 'is_stmt':     1
+ Line base:                      -5
+ Line range:                     14
+ Opcode base:                    10
+
+Opcodes:
+  [1]  0 arguments
+  [2]  1 argument
+  [3]  1 argument
+  [4]  1 argument
+  [5]  1 argument
+  [6]  0 arguments
+  [7]  0 arguments
+  [8]  0 arguments
+  [9]  1 argument
+
+Directory table:
+ /home/johnmc/hpctoolkit-gpu-samples/nvidia_extended_linemap4
+
+File name table:
+ Entry Dir   Time      Size      Name
+ 1     1     1626104146 1819      main.cu
+ 2     1     1626104111 211       bar.h
+
+Line number statements:
+ [    79] extended opcode 2:  set address to 0 <kernel>
+ [    84] set file to 1
+ [    86] advance line by constant 24 to 25
+ [    88] copy
+ [    89] special opcode 240: address+16 = 0x10 <kernel+0x10>, line+1 = 26
+ [    8a] advance line by constant 1 to 27
+ [    8c] advance address by 48 to 0x40 <kernel+0x40>
+ [    8e] copy
+ [    8f] advance line by constant -2 to 25
+ [    91] advance address by 80 to 0x90 <kernel+0x90>
+ [    94] copy
+ [    95] special opcode 242: address+16 = 0xa0 <kernel+0xa0>, line+3 = 28
+ [    96] advance address by 96 to 0x100 <kernel+0x100>
+ [    99] copy
+ [    9a] extended opcode 144:  set inlined context 6, function name foo (0x0)
+ [    9f] advance line by constant -20 to 8
+ [    a1] copy
+ [    a2] advance line by constant 1 to 9
+ [    a4] advance address by 80 to 0x150 <kernel+0x150>
+ [    a7] copy
+ [    a8] extended opcode 144:  set inlined context 0, function name foo (0x0)
+ [    ad] advance line by constant 22 to 31
+ [    af] advance address by 144 to 0x1e0 <kernel+0x1e0>
+ [    b2] copy
+ [    b3] set file to 2
+ [    b5] extended opcode 144:  set inlined context 9, function name bar (0x4)
+ [    ba] advance line by constant -25 to 6
+ [    bc] copy
+ [    bd] set file to 1
+ [    bf] extended opcode 144:  set inlined context 10, function name foo (0x0)
+ [    c4] advance line by constant 2 to 8
+ [    c6] copy
+ [    c7] advance line by constant 1 to 9
+ [    c9] advance address by 64 to 0x220 <kernel+0x220>
+ [    cc] copy
+ [    cd] set file to 2
+ [    cf] extended opcode 144:  set inlined context 9, function name bar (0x4)
+ [    d4] advance line by constant -2 to 7
+ [    d6] advance address by 144 to 0x2b0 <kernel+0x2b0>
+ [    d9] copy
+ [    da] advance line by constant 1 to 8
+ [    dc] advance address by 64 to 0x2f0 <kernel+0x2f0>
+ [    df] copy
+ [    e0] set file to 1
+ [    e2] extended opcode 144:  set inlined context 14, function name _Z1aPiS_S_ (0x8)
+ [    e7] advance line by constant 10 to 18
+ [    e9] copy
+ [    ea] advance line by constant 1 to 19
+ [    ec] advance address by 64 to 0x330 <kernel+0x330>
+ [    ef] copy
+ [    f0] extended opcode 144:  set inlined context 0, function name foo (0x0)
+ [    f5] advance line by constant 14 to 33
+ [    f7] advance address by 144 to 0x3c0 <kernel+0x3c0>
+ [    fa] copy
+ [    fb] advance address by 192 to 0x480
+ [    fe] extended opcode 1:  end of sequence
+EOF
diff --git a/tests/testfile_nvidia_linemap.bz2 b/tests/testfile_nvidia_linemap.bz2
new file mode 100644
index 0000000..8a6d09f
--- /dev/null
+++ b/tests/testfile_nvidia_linemap.bz2
Binary files differ