Don't break fcntl locks when program does mmap.  #280965.
(Rusty Russell, rusty@rustcorp.com.au)

tdb uses fcntl locks and mmap, and some of the tests fail under valgrind. 
strace showed valgrind opening the tdb file, reading 1024 bytes, then closing
it.  This is not allowed: POSIX says if you open and close a file, all fcntl
locks on it are dropped (insane, yes).

Finally got around to hacking the source to track this down: di_notify_mmap is
doing the damage.  The simplest fix was to hand in an optional fd for it to
use, then have it do pread.

I had to fix your pread; surely this should seek back even if the platform
doesn't have pread support?



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12224 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index cf7bdf6..4096d5c 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -660,6 +660,10 @@
    carefully control when the thing will read symbols from the
    Valgrind executable itself.
 
+   If use_fd is not -1, that is used instead of the filename; this
+   avoids perturbing fcntl locks, which are released by simply
+   re-opening and closing the same file (even via different fd!).
+
    If a call to VG_(di_notify_mmap) causes debug info to be read, then
    the returned ULong is an abstract handle which can later be used to
    refer to the debuginfo read as a result of this specific mapping,
@@ -667,19 +671,21 @@
    will be one or above.  If the returned value is zero, no debug info
    was read. */
 
-ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV )
+ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd )
 {
    NSegment const * seg;
    HChar*     filename;
    Bool       is_rx_map, is_rw_map, is_ro_map;
    DebugInfo* di;
-   SysRes     fd;
-   Int        nread, oflags;
+   Int        actual_fd, oflags;
+   SysRes     preadres;
    HChar      buf1k[1024];
    Bool       debug = False;
    SysRes     statres;
    struct vg_stat statbuf;
 
+   vg_assert(use_fd >= -1);
+
    /* In short, figure out if this mapping is of interest to us, and
       if so, try to guess what ld.so is doing and when/if we should
       read debug info. */
@@ -817,36 +823,46 @@
 #  if defined(VKI_O_LARGEFILE)
    oflags |= VKI_O_LARGEFILE;
 #  endif
-   fd = VG_(open)( filename, oflags, 0 );
-   if (sr_isError(fd)) {
-      if (sr_Err(fd) != VKI_EACCES) {
-         DebugInfo fake_di;
-         VG_(memset)(&fake_di, 0, sizeof(fake_di));
-         fake_di.fsm.filename = filename;
-         ML_(symerr)(&fake_di, True, "can't open file to inspect ELF header");
-      }
-      return 0;
-   }
-   nread = VG_(read)( sr_Res(fd), buf1k, sizeof(buf1k) );
-   VG_(close)( sr_Res(fd) );
 
-   if (nread == 0)
-      return 0;
-   if (nread < 0) {
+   if (use_fd == -1) {
+      SysRes fd = VG_(open)( filename, oflags, 0 );
+      if (sr_isError(fd)) {
+         if (sr_Err(fd) != VKI_EACCES) {
+            DebugInfo fake_di;
+            VG_(memset)(&fake_di, 0, sizeof(fake_di));
+            fake_di.fsm.filename = filename;
+            ML_(symerr)(&fake_di, True,
+                        "can't open file to inspect ELF header");
+         }
+         return 0;
+      }
+      actual_fd = sr_Res(fd);
+   } else {
+      actual_fd = use_fd;
+   }
+
+   preadres = VG_(pread)( actual_fd, buf1k, sizeof(buf1k), 0 );
+   if (use_fd == -1) {
+      VG_(close)( actual_fd );
+   }
+
+   if (sr_isError(preadres)) {
       DebugInfo fake_di;
       VG_(memset)(&fake_di, 0, sizeof(fake_di));
       fake_di.fsm.filename = filename;
       ML_(symerr)(&fake_di, True, "can't read file to inspect ELF header");
       return 0;
    }
-   vg_assert(nread > 0 && nread <= sizeof(buf1k) );
+   if (sr_Res(preadres) == 0)
+      return 0;
+   vg_assert(sr_Res(preadres) > 0 && sr_Res(preadres) <= sizeof(buf1k) );
 
    /* We're only interested in mappings of object files. */
 #  if defined(VGO_linux)
-   if (!ML_(is_elf_object_file)( buf1k, (SizeT)nread ))
+   if (!ML_(is_elf_object_file)( buf1k, (SizeT)sr_Res(preadres) ))
       return 0;
 #  elif defined(VGO_darwin)
-   if (!ML_(is_macho_object_file)( buf1k, (SizeT)nread ))
+   if (!ML_(is_macho_object_file)( buf1k, (SizeT)sr_Res(preadres) ))
       return 0;
 #  else
 #    error "unknown OS"
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 1792322..d93888d 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -1929,7 +1929,8 @@
         be True here so that we read info from the valgrind executable
         itself. */
      for (i = 0; i < n_seg_starts; i++) {
-        anu.ull = VG_(di_notify_mmap)( seg_starts[i], True/*allow_SkFileV*/ );
+        anu.ull = VG_(di_notify_mmap)( seg_starts[i], True/*allow_SkFileV*/,
+                                       -1/*Don't use_fd*/);
         /* anu.ull holds the debuginfo handle returned by di_notify_mmap,
            if any. */
         if (anu.ull > 0) {
@@ -1949,8 +1950,10 @@
      /* show them all to the debug info reader.  
         Don't read from V segments (unlike Linux) */
      // GrP fixme really?
-     for (i = 0; i < n_seg_starts; i++)
-        VG_(di_notify_mmap)( seg_starts[i], False/*don't allow_SkFileV*/ );
+     for (i = 0; i < n_seg_starts; i++) {
+        VG_(di_notify_mmap)( seg_starts[i], False/*don't allow_SkFileV*/,
+                             -1/*don't use_fd*/);
+     }
 
      VG_(free)( seg_starts );
    }
diff --git a/coregrind/m_syswrap/syswrap-darwin.c b/coregrind/m_syswrap/syswrap-darwin.c
index 27923bf..2778957 100644
--- a/coregrind/m_syswrap/syswrap-darwin.c
+++ b/coregrind/m_syswrap/syswrap-darwin.c
@@ -3550,7 +3550,8 @@
    if (RES != -1) {
       ML_(notify_core_and_tool_of_mmap)(RES, ARG2, ARG3, ARG4, ARG5, ARG6);
       // Try to load symbols from the region
-      VG_(di_notify_mmap)( (Addr)RES, False/*allow_SkFileV*/ );
+      VG_(di_notify_mmap)( (Addr)RES, False/*allow_SkFileV*/,
+                           -1/*don't use_fd*/ );
    }
 }
 
diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c
index f140f40..eca7122 100644
--- a/coregrind/m_syswrap/syswrap-generic.c
+++ b/coregrind/m_syswrap/syswrap-generic.c
@@ -2070,7 +2070,7 @@
       );
       /* Load symbols? */
       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres), 
-                                       False/*allow_SkFileV*/ );
+                                       False/*allow_SkFileV*/, (Int)arg5 );
       /* Notify the tool. */
       notify_tool_of_mmap(
          (Addr)sr_Res(sres), /* addr kernel actually assigned */
diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h
index dbe6a28..fa1b129 100644
--- a/coregrind/pub_core_debuginfo.h
+++ b/coregrind/pub_core_debuginfo.h
@@ -55,9 +55,15 @@
    refer to the debuginfo read as a result of this specific mapping,
    in later queries to m_debuginfo.  In this case the handle value
    will be one or above.  If the returned value is zero, no debug info
-   was read. */
+   was read.
+
+   For VG_(di_notify_mmap), if use_fd is not -1, that is used instead
+   of the filename; this avoids perturbing fcntl locks, which are
+   released by simply re-opening and closing the same file (even via
+   different fd!).
+*/
 #if defined(VGO_linux) || defined(VGO_darwin)
-extern ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV );
+extern ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd );
 
 extern void VG_(di_notify_munmap)( Addr a, SizeT len );
 
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am
index f5828d9..a9c498f 100644
--- a/none/tests/Makefile.am
+++ b/none/tests/Makefile.am
@@ -92,6 +92,8 @@
 	manythreads.stdout.exp manythreads.stderr.exp manythreads.vgtest \
 	map_unaligned.stderr.exp map_unaligned.vgtest \
 	map_unmap.stderr.exp map_unmap.stdout.exp map_unmap.vgtest \
+	mmap_fcntl_bug.vgtest mmap_fcntl_bug.stdout.exp \
+		mmap_fcntl_bug.stderr.exp \
 	mq.stderr.exp mq.vgtest \
 	munmap_exe.stderr.exp munmap_exe.vgtest \
 	nestedfns.stderr.exp nestedfns.stdout.exp nestedfns.vgtest \
@@ -168,6 +170,7 @@
 	fdleak_fcntl fdleak_ipv4 fdleak_open fdleak_pipe \
 	fdleak_socketpair \
 	floored fork fucomip \
+	mmap_fcntl_bug \
 	munmap_exe map_unaligned map_unmap mq \
 	nestedfns \
 	pending \
diff --git a/none/tests/mmap_fcntl_bug.c b/none/tests/mmap_fcntl_bug.c
new file mode 100644
index 0000000..056b89c
--- /dev/null
+++ b/none/tests/mmap_fcntl_bug.c
@@ -0,0 +1,68 @@
+
+/* Test program to demonstrate valgrind breaking fcntl locks during
+ * mmap.  Feed it a r/w file, such as its own source code. */
+
+/* See bug 280965. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <err.h>
+
+int main(int argc, char *argv[])
+{
+	struct flock fl;
+	const char *file = /* argv[1]; */
+			   "mmap_fcntl_bug.c";
+	int fd, status;
+
+	if (!file)
+		errx(1, "Usage: %s <normal-file>", argv[0]);
+
+	fd = open(file, O_RDWR);
+	if (fd < 0)
+		err(1, "Opening %s", file);
+
+	fl.l_type = F_WRLCK;
+	fl.l_whence = SEEK_SET;
+	fl.l_start = 0;
+	fl.l_len = 1;
+
+	/* I'm assuming noone else tries to lock this! */
+	if (fcntl(fd, F_SETLK, &fl) != 0)
+		err(1, "Locking %s", file);
+
+	/* If under valgrind, mmap re-opens and closes file, screwing us */
+	if (mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0) == MAP_FAILED)
+		err(1, "mmap of %s", file);
+
+	switch (fork()) {
+	case 0:
+		/* Child.  Lock should fail. */
+		if (fcntl(fd, F_SETLK, &fl) == 0)
+			exit(1);
+		exit(0);
+	case -1:
+		err(1, "Fork failed");
+	}
+
+	if (wait(&status) == -1)
+		 err(1, "Child vanished?");
+
+	if (!WIFEXITED(status))
+		errx(1, "Child died with signal %i", WTERMSIG(status));
+
+	switch (WEXITSTATUS(status)) {
+	case 1:
+		errx(1, "Child got lock, we must have dropped it (TEST FAILED)");
+	case 0:
+		fprintf(stderr, "Child exited with zero (TEST PASSED).\n");
+		return 0;
+	default:
+		errx(1, "Child weird exit status %i", WEXITSTATUS(status));
+	}
+}
diff --git a/none/tests/mmap_fcntl_bug.stderr.exp b/none/tests/mmap_fcntl_bug.stderr.exp
new file mode 100644
index 0000000..7885ee9
--- /dev/null
+++ b/none/tests/mmap_fcntl_bug.stderr.exp
@@ -0,0 +1 @@
+Child exited with zero (TEST PASSED).
diff --git a/none/tests/mmap_fcntl_bug.stdout.exp b/none/tests/mmap_fcntl_bug.stdout.exp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/none/tests/mmap_fcntl_bug.stdout.exp
diff --git a/none/tests/mmap_fcntl_bug.vgtest b/none/tests/mmap_fcntl_bug.vgtest
new file mode 100644
index 0000000..47a4e5e
--- /dev/null
+++ b/none/tests/mmap_fcntl_bug.vgtest
@@ -0,0 +1,2 @@
+prog: mmap_fcntl_bug
+vgopts: -q