blob: 79d9320f857e88a55d216cd45698f54776995c41 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- Wrappers for generic Unix system calls ---*/
/*--- syswrap-generic.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2000-2013 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file COPYING.
*/
#if defined(VGO_linux) || defined(VGO_darwin)
#include "pub_core_basics.h"
#include "pub_core_vki.h"
#include "pub_core_vkiscnums.h"
#include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
#include "pub_core_threadstate.h"
#include "pub_core_debuginfo.h" // VG_(di_notify_*)
#include "pub_core_aspacemgr.h"
#include "pub_core_transtab.h" // VG_(discard_translations)
#include "pub_core_xarray.h"
#include "pub_core_clientstate.h" // VG_(brk_base), VG_(brk_limit)
#include "pub_core_debuglog.h"
#include "pub_core_errormgr.h"
#include "pub_core_gdbserver.h" // VG_(gdbserver)
#include "pub_core_libcbase.h"
#include "pub_core_libcassert.h"
#include "pub_core_libcfile.h"
#include "pub_core_libcprint.h"
#include "pub_core_libcproc.h"
#include "pub_core_libcsignal.h"
#include "pub_core_machine.h" // VG_(get_SP)
#include "pub_core_mallocfree.h"
#include "pub_core_options.h"
#include "pub_core_scheduler.h"
#include "pub_core_signals.h"
#include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
#include "pub_core_syscall.h"
#include "pub_core_syswrap.h"
#include "pub_core_tooliface.h"
#include "pub_core_ume.h"
#include "pub_core_stacks.h"
#include "priv_types_n_macros.h"
#include "priv_syswrap-generic.h"
#include "config.h"
void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
{
Bool debug = False;
NSegment const* seg;
/* We don't really know where the client stack is, because its
allocated by the client. The best we can do is look at the
memory mappings and try to derive some useful information. We
assume that sp starts near its highest possible value, and can
only go down to the start of the mmaped segment. */
seg = VG_(am_find_nsegment)(sp);
if (seg && seg->kind != SkResvn) {
tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
if (debug)
VG_(printf)("tid %d: guessed client stack range [%#lx-%#lx]\n",
tst->tid, seg->start, tst->client_stack_highest_byte);
} else {
VG_(message)(Vg_UserMsg,
"!? New thread %d starts with SP(%#lx) unmapped\n",
tst->tid, sp);
tst->client_stack_highest_byte = 0;
tst->client_stack_szB = 0;
}
}
/* Returns True iff address range is something the client can
plausibly mess with: all of it is either already belongs to the
client or is free or a reservation. */
Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
const HChar *syscallname)
{
Bool ret;
if (size == 0)
return True;
ret = VG_(am_is_valid_for_client_or_free_or_resvn)
(start,size,VKI_PROT_NONE);
if (0)
VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
syscallname, start, start+size-1, (Int)ret);
if (!ret && syscallname != NULL) {
VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
"to modify addresses %#lx-%#lx\n",
syscallname, start, start+size-1);
if (VG_(clo_verbosity) > 1) {
VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
}
}
return ret;
}
Bool ML_(client_signal_OK)(Int sigNo)
{
/* signal 0 is OK for kill */
Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
//VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
return ret;
}
/* Handy small function to help stop wrappers from segfaulting when
presented with bogus client addresses. Is not used for generating
user-visible errors. */
Bool ML_(safe_to_deref) ( void* start, SizeT size )
{
return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
}
/* ---------------------------------------------------------------------
Doing mmap, mremap
------------------------------------------------------------------ */
/* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
munmap, mprotect (and mremap??) work at the page level. So addresses
and lengths must be adjusted for this. */
/* Mash around start and length so that the area exactly covers
an integral number of pages. If we don't do that, memcheck's
idea of addressible memory diverges from that of the
kernel's, which causes the leak detector to crash. */
static
void page_align_addr_and_len( Addr* a, SizeT* len)
{
Addr ra;
ra = VG_PGROUNDDN(*a);
*len = VG_PGROUNDUP(*a + *len) - ra;
*a = ra;
}
static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
UInt flags, Int fd, Off64T offset)
{
Bool d;
/* 'a' is the return value from a real kernel mmap, hence: */
vg_assert(VG_IS_PAGE_ALIGNED(a));
/* whereas len is whatever the syscall supplied. So: */
len = VG_PGROUNDUP(len);
d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
if (d)
VG_(discard_translations)( (Addr64)a, (ULong)len,
"notify_core_of_mmap" );
}
static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
{
Bool rr, ww, xx;
/* 'a' is the return value from a real kernel mmap, hence: */
vg_assert(VG_IS_PAGE_ALIGNED(a));
/* whereas len is whatever the syscall supplied. So: */
len = VG_PGROUNDUP(len);
rr = toBool(prot & VKI_PROT_READ);
ww = toBool(prot & VKI_PROT_WRITE);
xx = toBool(prot & VKI_PROT_EXEC);
VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
}
/* When a client mmap has been successfully done, this function must
be called. It notifies both aspacem and the tool of the new
mapping.
JRS 2008-Aug-14: But notice this is *very* obscure. The only place
it is called from is POST(sys_io_setup). In particular,
ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
client mmap. But it doesn't call this function; instead it does the
relevant notifications itself. Here, we just pass di_handle=0 to
notify_tool_of_mmap as we have no better information. But really this
function should be done away with; problem is I don't understand what
POST(sys_io_setup) does or how it works.
[However, this function is used lots for Darwin, because
ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
*/
void
ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
UInt flags, Int fd, Off64T offset )
{
// XXX: unlike the other notify_core_and_tool* functions, this one doesn't
// do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
// Should it? --njn
notify_core_of_mmap(a, len, prot, flags, fd, offset);
notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
}
void
ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
{
Bool d;
page_align_addr_and_len(&a, &len);
d = VG_(am_notify_munmap)(a, len);
VG_TRACK( die_mem_munmap, a, len );
VG_(di_notify_munmap)( a, len );
if (d)
VG_(discard_translations)( (Addr64)a, (ULong)len,
"ML_(notify_core_and_tool_of_munmap)" );
}
void
ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
{
Bool rr = toBool(prot & VKI_PROT_READ);
Bool ww = toBool(prot & VKI_PROT_WRITE);
Bool xx = toBool(prot & VKI_PROT_EXEC);
Bool d;
page_align_addr_and_len(&a, &len);
d = VG_(am_notify_mprotect)(a, len, prot);
VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
VG_(di_notify_mprotect)( a, len, prot );
if (d)
VG_(discard_translations)( (Addr64)a, (ULong)len,
"ML_(notify_core_and_tool_of_mprotect)" );
}
#if HAVE_MREMAP
/* Expand (or shrink) an existing mapping, potentially moving it at
the same time (controlled by the MREMAP_MAYMOVE flag). Nightmare.
*/
static
SysRes do_mremap( Addr old_addr, SizeT old_len,
Addr new_addr, SizeT new_len,
UWord flags, ThreadId tid )
{
# define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
Bool ok, d;
NSegment const* old_seg;
Addr advised;
Bool f_fixed = toBool(flags & VKI_MREMAP_FIXED);
Bool f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
if (0)
VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
old_addr,old_len,new_addr,new_len,
flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
flags & VKI_MREMAP_FIXED ? "FIXED" : "");
if (0)
VG_(am_show_nsegments)(0, "do_remap: before");
if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
goto eINVAL;
if (!VG_IS_PAGE_ALIGNED(old_addr))
goto eINVAL;
old_len = VG_PGROUNDUP(old_len);
new_len = VG_PGROUNDUP(new_len);
if (new_len == 0)
goto eINVAL;
/* kernel doesn't reject this, but we do. */
if (old_len == 0)
goto eINVAL;
/* reject wraparounds */
if (old_addr + old_len < old_addr)
goto eINVAL;
if (f_fixed == True && new_addr + new_len < new_len)
goto eINVAL;
/* kernel rejects all fixed, no-move requests (which are
meaningless). */
if (f_fixed == True && f_maymove == False)
goto eINVAL;
/* Stay away from non-client areas. */
if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
goto eINVAL;
/* In all remaining cases, if the old range does not fall within a
single segment, fail. */
old_seg = VG_(am_find_nsegment)( old_addr );
if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
goto eINVAL;
if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
goto eINVAL;
vg_assert(old_len > 0);
vg_assert(new_len > 0);
vg_assert(VG_IS_PAGE_ALIGNED(old_len));
vg_assert(VG_IS_PAGE_ALIGNED(new_len));
vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
/* There are 3 remaining cases:
* maymove == False
new space has to be at old address, so:
- shrink -> unmap end
- same size -> do nothing
- grow -> if can grow in-place, do so, else fail
* maymove == True, fixed == False
new space can be anywhere, so:
- shrink -> unmap end
- same size -> do nothing
- grow -> if can grow in-place, do so, else
move to anywhere large enough, else fail
* maymove == True, fixed == True
new space must be at new address, so:
- if new address is not page aligned, fail
- if new address range overlaps old one, fail
- if new address range cannot be allocated, fail
- else move to new address range with new size
- else fail
*/
if (f_maymove == False) {
/* new space has to be at old address */
if (new_len < old_len)
goto shrink_in_place;
if (new_len > old_len)
goto grow_in_place_or_fail;
goto same_in_place;
}
if (f_maymove == True && f_fixed == False) {
/* new space can be anywhere */
if (new_len < old_len)
goto shrink_in_place;
if (new_len > old_len)
goto grow_in_place_or_move_anywhere_or_fail;
goto same_in_place;
}
if (f_maymove == True && f_fixed == True) {
/* new space can only be at the new address */
if (!VG_IS_PAGE_ALIGNED(new_addr))
goto eINVAL;
if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
/* no overlap */
} else {
goto eINVAL;
}
if (new_addr == 0)
goto eINVAL;
/* VG_(am_get_advisory_client_simple) interprets zero to mean
non-fixed, which is not what we want */
advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
if (!ok || advised != new_addr)
goto eNOMEM;
ok = VG_(am_relocate_nooverlap_client)
( &d, old_addr, old_len, new_addr, new_len );
if (ok) {
VG_TRACK( copy_mem_remap, old_addr, new_addr,
MIN_SIZET(old_len,new_len) );
if (new_len > old_len)
VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
old_seg->hasR, old_seg->hasW, old_seg->hasX,
0/*di_handle*/ );
VG_TRACK(die_mem_munmap, old_addr, old_len);
if (d) {
VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
}
return VG_(mk_SysRes_Success)( new_addr );
}
goto eNOMEM;
}
/* end of the 3 cases */
/*NOTREACHED*/ vg_assert(0);
grow_in_place_or_move_anywhere_or_fail:
{
/* try growing it in-place */
Addr needA = old_addr + old_len;
SSizeT needL = new_len - old_len;
vg_assert(needL > 0);
if (needA == 0)
goto eINVAL;
/* VG_(am_get_advisory_client_simple) interprets zero to mean
non-fixed, which is not what we want */
advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
if (ok) {
/* Fixes bug #129866. */
ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
}
if (ok && advised == needA) {
ok = VG_(am_extend_map_client)( &d, old_seg, needL );
if (ok) {
VG_TRACK( new_mem_mmap, needA, needL,
old_seg->hasR,
old_seg->hasW, old_seg->hasX,
0/*di_handle*/ );
if (d)
VG_(discard_translations)( needA, needL, "do_remap(3)" );
return VG_(mk_SysRes_Success)( old_addr );
}
}
/* that failed. Look elsewhere. */
advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
if (ok) {
Bool oldR = old_seg->hasR;
Bool oldW = old_seg->hasW;
Bool oldX = old_seg->hasX;
/* assert new area does not overlap old */
vg_assert(advised+new_len-1 < old_addr
|| advised > old_addr+old_len-1);
ok = VG_(am_relocate_nooverlap_client)
( &d, old_addr, old_len, advised, new_len );
if (ok) {
VG_TRACK( copy_mem_remap, old_addr, advised,
MIN_SIZET(old_len,new_len) );
if (new_len > old_len)
VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
oldR, oldW, oldX, 0/*di_handle*/ );
VG_TRACK(die_mem_munmap, old_addr, old_len);
if (d) {
VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
VG_(discard_translations)( advised, new_len, "do_remap(5)" );
}
return VG_(mk_SysRes_Success)( advised );
}
}
goto eNOMEM;
}
/*NOTREACHED*/ vg_assert(0);
grow_in_place_or_fail:
{
Addr needA = old_addr + old_len;
SizeT needL = new_len - old_len;
if (needA == 0)
goto eINVAL;
/* VG_(am_get_advisory_client_simple) interprets zero to mean
non-fixed, which is not what we want */
advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
if (ok) {
/* Fixes bug #129866. */
ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
}
if (!ok || advised != needA)
goto eNOMEM;
ok = VG_(am_extend_map_client)( &d, old_seg, needL );
if (!ok)
goto eNOMEM;
VG_TRACK( new_mem_mmap, needA, needL,
old_seg->hasR, old_seg->hasW, old_seg->hasX,
0/*di_handle*/ );
if (d)
VG_(discard_translations)( needA, needL, "do_remap(6)" );
return VG_(mk_SysRes_Success)( old_addr );
}
/*NOTREACHED*/ vg_assert(0);
shrink_in_place:
{
SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
if (sr_isError(sres))
return sres;
VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
if (d)
VG_(discard_translations)( old_addr+new_len, old_len-new_len,
"do_remap(7)" );
return VG_(mk_SysRes_Success)( old_addr );
}
/*NOTREACHED*/ vg_assert(0);
same_in_place:
return VG_(mk_SysRes_Success)( old_addr );
/*NOTREACHED*/ vg_assert(0);
eINVAL:
return VG_(mk_SysRes_Error)( VKI_EINVAL );
eNOMEM:
return VG_(mk_SysRes_Error)( VKI_ENOMEM );
# undef MIN_SIZET
}
#endif /* HAVE_MREMAP */
/* ---------------------------------------------------------------------
File-descriptor tracking
------------------------------------------------------------------ */
/* One of these is allocated for each open file descriptor. */
typedef struct OpenFd
{
Int fd; /* The file descriptor */
HChar *pathname; /* NULL if not a regular file or unknown */
ExeContext *where; /* NULL if inherited from parent */
struct OpenFd *next, *prev;
} OpenFd;
/* List of allocated file descriptors. */
static OpenFd *allocated_fds = NULL;
/* Count of open file descriptors. */
static Int fd_count = 0;
/* Note the fact that a file descriptor was just closed. */
static
void record_fd_close(Int fd)
{
OpenFd *i = allocated_fds;
if (fd >= VG_(fd_hard_limit))
return; /* Valgrind internal */
while(i) {
if(i->fd == fd) {
if(i->prev)
i->prev->next = i->next;
else
allocated_fds = i->next;
if(i->next)
i->next->prev = i->prev;
if(i->pathname)
VG_(free) (i->pathname);
VG_(free) (i);
fd_count--;
break;
}
i = i->next;
}
}
/* Note the fact that a file descriptor was just opened. If the
tid is -1, this indicates an inherited fd. If the pathname is NULL,
this either indicates a non-standard file (i.e. a pipe or socket or
some such thing) or that we don't know the filename. If the fd is
already open, then we're probably doing a dup2() to an existing fd,
so just overwrite the existing one. */
void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
{
OpenFd *i;
if (fd >= VG_(fd_hard_limit))
return; /* Valgrind internal */
/* Check to see if this fd is already open. */
i = allocated_fds;
while (i) {
if (i->fd == fd) {
if (i->pathname) VG_(free)(i->pathname);
break;
}
i = i->next;
}
/* Not already one: allocate an OpenFd */
if (i == NULL) {
i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
i->prev = NULL;
i->next = allocated_fds;
if(allocated_fds) allocated_fds->prev = i;
allocated_fds = i;
fd_count++;
}
i->fd = fd;
i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
}
// Record opening of an fd, and find its name.
void ML_(record_fd_open_named)(ThreadId tid, Int fd)
{
static HChar buf[VKI_PATH_MAX];
HChar* name;
if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
name = buf;
else
name = NULL;
ML_(record_fd_open_with_given_name)(tid, fd, name);
}
// Record opening of a nameless fd.
void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
{
ML_(record_fd_open_with_given_name)(tid, fd, NULL);
}
static
HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
{
if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
VG_(sprintf)(name, "<unknown>");
} else {
VG_(sprintf)(name, "%s", sa->sun_path);
}
return name;
}
static
HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
{
if (sa == NULL || len == 0) {
VG_(sprintf)(name, "<unknown>");
} else if (sa->sin_port == 0) {
VG_(sprintf)(name, "<unbound>");
} else {
UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
VG_(sprintf)(name, "%u.%u.%u.%u:%u",
(addr>>24) & 0xFF, (addr>>16) & 0xFF,
(addr>>8) & 0xFF, addr & 0xFF,
VG_(ntohs)(sa->sin_port));
}
return name;
}
static
void inet6_format(HChar *s, const UChar ip[16])
{
static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
const struct vki_in_addr *sin_addr =
(const struct vki_in_addr *)(ip + 12);
UInt addr = VG_(ntohl)(sin_addr->s_addr);
VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
(addr>>24) & 0xFF, (addr>>16) & 0xFF,
(addr>>8) & 0xFF, addr & 0xFF);
} else {
Bool compressing = False;
Bool compressed = False;
Int len = 0;
Int i;
for (i = 0; i < 16; i += 2) {
UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
if (word == 0 && !compressed) {
compressing = True;
} else {
if (compressing) {
compressing = False;
compressed = True;
s[len++] = ':';
}
if (i > 0) {
s[len++] = ':';
}
len += VG_(sprintf)(s + len, "%x", word);
}
}
if (compressing) {
s[len++] = ':';
s[len++] = ':';
}
s[len++] = 0;
}
return;
}
static
HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
{
if (sa == NULL || len == 0) {
VG_(sprintf)(name, "<unknown>");
} else if (sa->sin6_port == 0) {
VG_(sprintf)(name, "<unbound>");
} else {
char addr[128];
inet6_format(addr, (void *)&(sa->sin6_addr));
VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
}
return name;
}
/*
* Try get some details about a socket.
*/
static void
getsockdetails(Int fd)
{
union u {
struct vki_sockaddr a;
struct vki_sockaddr_in in;
struct vki_sockaddr_in6 in6;
struct vki_sockaddr_un un;
} laddr;
Int llen;
llen = sizeof(laddr);
VG_(memset)(&laddr, 0, llen);
if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
switch(laddr.a.sa_family) {
case VKI_AF_INET: {
static char lname[32];
static char pname[32];
struct vki_sockaddr_in paddr;
Int plen = sizeof(struct vki_sockaddr_in);
if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
inet_to_name(&(laddr.in), llen, lname),
inet_to_name(&paddr, plen, pname));
} else {
VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
fd, inet_to_name(&(laddr.in), llen, lname));
}
return;
}
case VKI_AF_INET6: {
static char lname[128];
static char pname[128];
struct vki_sockaddr_in6 paddr;
Int plen = sizeof(struct vki_sockaddr_in6);
if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
inet6_to_name(&(laddr.in6), llen, lname),
inet6_to_name(&paddr, plen, pname));
} else {
VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
fd, inet6_to_name(&(laddr.in6), llen, lname));
}
return;
}
case VKI_AF_UNIX: {
static char lname[256];
VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
unix_to_name(&(laddr.un), llen, lname));
return;
}
default:
VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
laddr.a.sa_family, fd);
return;
}
}
VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
}
/* Dump out a summary, and a more detailed list, of open file descriptors. */
void VG_(show_open_fds) (const HChar* when)
{
OpenFd *i = allocated_fds;
VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
while (i) {
if (i->pathname) {
VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
i->pathname);
} else {
Int val;
Int len = sizeof(val);
if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
== -1) {
VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
} else {
getsockdetails(i->fd);
}
}
if(i->where) {
VG_(pp_ExeContext)(i->where);
VG_(message)(Vg_UserMsg, "\n");
} else {
VG_(message)(Vg_UserMsg, " <inherited from parent>\n");
VG_(message)(Vg_UserMsg, "\n");
}
i = i->next;
}
VG_(message)(Vg_UserMsg, "\n");
}
/* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
have /proc support compiled in, or a non-Linux kernel), then we need to
find out what file descriptors we inherited from our parent process the
hard way - by checking each fd in turn. */
static
void init_preopened_fds_without_proc_self_fd(void)
{
struct vki_rlimit lim;
UInt count;
Int i;
if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
/* Hmm. getrlimit() failed. Now we're screwed, so just choose
an arbitrarily high number. 1024 happens to be the limit in
the 2.4 Linux kernels. */
count = 1024;
} else {
count = lim.rlim_cur;
}
for (i = 0; i < count; i++)
if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
ML_(record_fd_open_named)(-1, i);
}
/* Initialize the list of open file descriptors with the file descriptors
we inherited from out parent process. */
void VG_(init_preopened_fds)(void)
{
// DDD: should probably use HAVE_PROC here or similar, instead.
#if defined(VGO_linux)
Int ret;
struct vki_dirent64 d;
SysRes f;
f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
if (sr_isError(f)) {
init_preopened_fds_without_proc_self_fd();
return;
}
while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
if (ret == -1)
goto out;
if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
HChar* s;
Int fno = VG_(strtoll10)(d.d_name, &s);
if (*s == '\0') {
if (fno != sr_Res(f))
if (VG_(clo_track_fds))
ML_(record_fd_open_named)(-1, fno);
} else {
VG_(message)(Vg_DebugMsg,
"Warning: invalid file name in /proc/self/fd: %s\n",
d.d_name);
}
}
VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
}
out:
VG_(close)(sr_Res(f));
#elif defined(VGO_darwin)
init_preopened_fds_without_proc_self_fd();
#else
# error Unknown OS
#endif
}
static
HChar *strdupcat ( const HChar* cc, const HChar *s1, const HChar *s2,
ArenaId aid )
{
UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
HChar *result = VG_(arena_malloc) ( aid, cc, len );
VG_(strcpy) ( result, s1 );
VG_(strcat) ( result, s2 );
return result;
}
static
void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
const HChar *msg, Addr base, SizeT size )
{
HChar *outmsg = strdupcat ( "di.syswrap.pmrs.1",
"sendmsg", msg, VG_AR_CORE );
PRE_MEM_READ( outmsg, base, size );
VG_(free) ( outmsg );
}
static
void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
const HChar *msg, Addr base, SizeT size )
{
HChar *outmsg = strdupcat ( "di.syswrap.pmwr.1",
"recvmsg", msg, VG_AR_CORE );
if ( read )
PRE_MEM_READ( outmsg, base, size );
else
PRE_MEM_WRITE( outmsg, base, size );
VG_(free) ( outmsg );
}
static
void post_mem_write_recvmsg ( ThreadId tid, Bool read,
const HChar *fieldName, Addr base, SizeT size )
{
if ( !read )
POST_MEM_WRITE( base, size );
}
static
void msghdr_foreachfield (
ThreadId tid,
const HChar *name,
struct vki_msghdr *msg,
UInt length,
void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
)
{
HChar *fieldName;
if ( !msg )
return;
fieldName = VG_(malloc) ( "di.syswrap.mfef", VG_(strlen)(name) + 32 );
VG_(sprintf) ( fieldName, "(%s)", name );
foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
/* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
the field, but does write to it. */
if ( rekv )
foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
&& msg->msg_name ) {
VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
foreach_func ( tid, False, fieldName,
(Addr)msg->msg_name, msg->msg_namelen );
}
if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
&& msg->msg_iov ) {
struct vki_iovec *iov = msg->msg_iov;
UInt i;
VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
foreach_func ( tid, True, fieldName,
(Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
foreach_func ( tid, False, fieldName,
(Addr)iov->iov_base, iov_len );
length = length - iov_len;
}
}
if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
&& msg->msg_control )
{
VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
foreach_func ( tid, False, fieldName,
(Addr)msg->msg_control, msg->msg_controllen );
}
VG_(free) ( fieldName );
}
static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
{
struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
while (cm) {
if (cm->cmsg_level == VKI_SOL_SOCKET &&
cm->cmsg_type == VKI_SCM_RIGHTS ) {
Int *fds = (Int *) VKI_CMSG_DATA(cm);
Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
/ sizeof(int);
Int i;
for (i = 0; i < fdc; i++)
if(VG_(clo_track_fds))
// XXX: must we check the range on these fds with
// ML_(fd_allowed)()?
ML_(record_fd_open_named)(tid, fds[i]);
}
cm = VKI_CMSG_NXTHDR(msg, cm);
}
}
/* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
static
void pre_mem_read_sockaddr ( ThreadId tid,
const HChar *description,
struct vki_sockaddr *sa, UInt salen )
{
HChar *outmsg;
struct vki_sockaddr_un* sun = (struct vki_sockaddr_un *)sa;
struct vki_sockaddr_in* sin = (struct vki_sockaddr_in *)sa;
struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
# ifdef VKI_AF_BLUETOOTH
struct vki_sockaddr_rc* rc = (struct vki_sockaddr_rc *)sa;
# endif
# ifdef VKI_AF_NETLINK
struct vki_sockaddr_nl* nl = (struct vki_sockaddr_nl *)sa;
# endif
/* NULL/zero-length sockaddrs are legal */
if ( sa == NULL || salen == 0 ) return;
outmsg = VG_(malloc) ( "di.syswrap.pmr_sockaddr.1",
VG_(strlen)( description ) + 30 );
VG_(sprintf) ( outmsg, description, "sa_family" );
PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
switch (sa->sa_family) {
case VKI_AF_UNIX:
VG_(sprintf) ( outmsg, description, "sun_path" );
PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
// GrP fixme max of sun_len-2? what about nul char?
break;
case VKI_AF_INET:
VG_(sprintf) ( outmsg, description, "sin_port" );
PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
VG_(sprintf) ( outmsg, description, "sin_addr" );
PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
break;
case VKI_AF_INET6:
VG_(sprintf) ( outmsg, description, "sin6_port" );
PRE_MEM_READ( outmsg,
(Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
PRE_MEM_READ( outmsg,
(Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
VG_(sprintf) ( outmsg, description, "sin6_addr" );
PRE_MEM_READ( outmsg,
(Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
PRE_MEM_READ( outmsg,
(Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
break;
# ifdef VKI_AF_BLUETOOTH
case VKI_AF_BLUETOOTH:
VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
VG_(sprintf) ( outmsg, description, "rc_channel" );
PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
break;
# endif
# ifdef VKI_AF_NETLINK
case VKI_AF_NETLINK:
VG_(sprintf)(outmsg, description, "nl_pid");
PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
VG_(sprintf)(outmsg, description, "nl_groups");
PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
break;
# endif
# ifdef VKI_AF_UNSPEC
case VKI_AF_UNSPEC:
break;
# endif
default:
/* No specific information about this address family.
Let's just check the full data following the family.
Note that this can give false positive if this (unknown)
struct sockaddr_???? has padding bytes between its elements. */
VG_(sprintf) ( outmsg, description, "sa_data" );
PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
salen );
break;
}
VG_(free) ( outmsg );
}
/* Dereference a pointer to a UInt. */
static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
{
UInt* a_p = (UInt*)a;
PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
if (a_p == NULL)
return 0;
else
return *a_p;
}
void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
const HChar* buf_s, const HChar* buflen_s )
{
if (VG_(tdict).track_pre_mem_write) {
UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
if (buflen_in > 0) {
VG_(tdict).track_pre_mem_write(
Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
}
}
}
void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
Addr buf_p, Addr buflen_p, const HChar* s )
{
if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
UInt buflen_out = deref_UInt( tid, buflen_p, s);
if (buflen_out > 0 && buf_p != (Addr)NULL) {
VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
}
}
}
/* ---------------------------------------------------------------------
Data seg end, for brk()
------------------------------------------------------------------ */
/* +--------+------------+
| anon | resvn |
+--------+------------+
^ ^ ^
| | boundary is page aligned
| VG_(brk_limit) -- no alignment constraint
VG_(brk_base) -- page aligned -- does not move
Both the anon part and the reservation part are always at least
one page.
*/
/* Set the new data segment end to NEWBRK. If this succeeds, return
NEWBRK, else return the current data segment end. */
static Addr do_brk ( Addr newbrk )
{
NSegment const* aseg;
NSegment const* rseg;
Addr newbrkP;
SizeT delta;
Bool ok;
Bool debug = False;
if (debug)
VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
VG_(brk_base), VG_(brk_limit), newbrk);
# if 0
if (0) show_segments("in_brk");
# endif
if (newbrk < VG_(brk_base))
/* Clearly impossible. */
goto bad;
if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
/* shrinking the data segment. Be lazy and don't munmap the
excess area. */
NSegment const * seg = VG_(am_find_nsegment)(newbrk);
if (seg && seg->hasT)
VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
"do_brk(shrink)" );
/* Since we're being lazy and not unmapping pages, we have to
zero out the area, so that if the area later comes back into
circulation, it will be filled with zeroes, as if it really
had been unmapped and later remapped. Be a bit paranoid and
try hard to ensure we're not going to segfault by doing the
write - check both ends of the range are in the same segment
and that segment is writable. */
if (seg) {
/* pre: newbrk < VG_(brk_limit)
=> newbrk <= VG_(brk_limit)-1 */
NSegment const * seg2;
vg_assert(newbrk < VG_(brk_limit));
seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
if (seg2 && seg == seg2 && seg->hasW)
VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
}
VG_(brk_limit) = newbrk;
return newbrk;
}
/* otherwise we're expanding the brk segment. */
if (VG_(brk_limit) > VG_(brk_base))
aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
else
aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
/* These should be assured by setup_client_dataseg in m_main. */
vg_assert(aseg);
vg_assert(rseg);
vg_assert(aseg->kind == SkAnonC);
vg_assert(rseg->kind == SkResvn);
vg_assert(aseg->end+1 == rseg->start);
vg_assert(newbrk >= VG_(brk_base));
if (newbrk <= rseg->start) {
/* still fits within the anon segment. */
VG_(brk_limit) = newbrk;
return newbrk;
}
if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
/* request is too large -- the resvn would fall below 1 page,
which isn't allowed. */
goto bad;
}
newbrkP = VG_PGROUNDUP(newbrk);
vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
delta = newbrkP - rseg->start;
vg_assert(delta > 0);
vg_assert(VG_IS_PAGE_ALIGNED(delta));
ok = VG_(am_extend_into_adjacent_reservation_client)( aseg, delta );
if (!ok) goto bad;
VG_(brk_limit) = newbrk;
return newbrk;
bad:
return VG_(brk_limit);
}
/* ---------------------------------------------------------------------
Vet file descriptors for sanity
------------------------------------------------------------------ */
/*
> - what does the "Bool soft" parameter mean?
(Tom Hughes, 3 Oct 05):
Whether or not to consider a file descriptor invalid if it is above
the current soft limit.
Basically if we are testing whether a newly created file descriptor is
valid (in a post handler) then we set soft to true, and if we are
testing whether a file descriptor that is about to be used (in a pre
handler) is valid [viz, an already-existing fd] then we set it to false.
The point is that if the (virtual) soft limit is lowered then any
existing descriptors can still be read/written/closed etc (so long as
they are below the valgrind reserved descriptors) but no new
descriptors can be created above the new soft limit.
(jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
*/
/* Return true if we're allowed to use or create this fd */
Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
Bool isNewFd)
{
Bool allowed = True;
/* hard limits always apply */
if (fd < 0 || fd >= VG_(fd_hard_limit))
allowed = False;
/* hijacking the output fds is never allowed */
if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
allowed = False;
/* if creating a new fd (rather than using an existing one), the
soft limit must also be observed */
if (isNewFd && fd >= VG_(fd_soft_limit))
allowed = False;
/* this looks like it ought to be included, but causes problems: */
/*
if (fd == 2 && VG_(debugLog_getLevel)() > 0)
allowed = False;
*/
/* The difficulty is as follows: consider a program P which expects
to be able to mess with (redirect) its own stderr (fd 2).
Usually to deal with P we would issue command line flags to send
logging somewhere other than stderr, so as not to disrupt P.
The problem is that -d unilaterally hijacks stderr with no
consultation with P. And so, if this check is enabled, P will
work OK normally but fail if -d is issued.
Basically -d is a hack and you take your chances when using it.
It's very useful for low level debugging -- particularly at
startup -- and having its presence change the behaviour of the
client is exactly what we don't want. */
/* croak? */
if ((!allowed) && VG_(showing_core_errors)() ) {
VG_(message)(Vg_UserMsg,
"Warning: invalid file descriptor %d in syscall %s()\n",
fd, syscallname);
if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
VG_(message)(Vg_UserMsg,
" Use --log-fd=<number> to select an alternative log fd.\n");
if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
VG_(message)(Vg_UserMsg,
" Use --xml-fd=<number> to select an alternative XML "
"output fd.\n");
// DDD: consider always printing this stack trace, it's useful.
// Also consider also making this a proper core error, ie.
// suppressible and all that.
if (VG_(clo_verbosity) > 1) {
VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
}
}
return allowed;
}
/* ---------------------------------------------------------------------
Deal with a bunch of socket-related syscalls
------------------------------------------------------------------ */
/* ------ */
void
ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
UWord arg0, UWord arg1,
UWord arg2, UWord arg3 )
{
/* int socketpair(int d, int type, int protocol, int sv[2]); */
PRE_MEM_WRITE( "socketcall.socketpair(sv)",
arg3, 2*sizeof(int) );
}
SysRes
ML_(generic_POST_sys_socketpair) ( ThreadId tid,
SysRes res,
UWord arg0, UWord arg1,
UWord arg2, UWord arg3 )
{
SysRes r = res;
Int fd1 = ((Int*)arg3)[0];
Int fd2 = ((Int*)arg3)[1];
vg_assert(!sr_isError(res)); /* guaranteed by caller */
POST_MEM_WRITE( arg3, 2*sizeof(int) );
if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
!ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
VG_(close)(fd1);
VG_(close)(fd2);
r = VG_(mk_SysRes_Error)( VKI_EMFILE );
} else {
POST_MEM_WRITE( arg3, 2*sizeof(int) );
if (VG_(clo_track_fds)) {
ML_(record_fd_open_nameless)(tid, fd1);
ML_(record_fd_open_nameless)(tid, fd2);
}
}
return r;
}
/* ------ */
SysRes
ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
{
SysRes r = res;
vg_assert(!sr_isError(res)); /* guaranteed by caller */
if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
VG_(close)(sr_Res(res));
r = VG_(mk_SysRes_Error)( VKI_EMFILE );
} else {
if (VG_(clo_track_fds))
ML_(record_fd_open_nameless)(tid, sr_Res(res));
}
return r;
}
/* ------ */
void
ML_(generic_PRE_sys_bind) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int bind(int sockfd, struct sockaddr *my_addr,
int addrlen); */
pre_mem_read_sockaddr(
tid, "socketcall.bind(my_addr.%s)",
(struct vki_sockaddr *) arg1, arg2
);
}
/* ------ */
void
ML_(generic_PRE_sys_accept) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int accept(int s, struct sockaddr *addr, int *addrlen); */
Addr addr_p = arg1;
Addr addrlen_p = arg2;
if (addr_p != (Addr)NULL)
ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
"socketcall.accept(addr)",
"socketcall.accept(addrlen_in)" );
}
SysRes
ML_(generic_POST_sys_accept) ( ThreadId tid,
SysRes res,
UWord arg0, UWord arg1, UWord arg2 )
{
SysRes r = res;
vg_assert(!sr_isError(res)); /* guaranteed by caller */
if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
VG_(close)(sr_Res(res));
r = VG_(mk_SysRes_Error)( VKI_EMFILE );
} else {
Addr addr_p = arg1;
Addr addrlen_p = arg2;
if (addr_p != (Addr)NULL)
ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
"socketcall.accept(addrlen_out)" );
if (VG_(clo_track_fds))
ML_(record_fd_open_nameless)(tid, sr_Res(res));
}
return r;
}
/* ------ */
void
ML_(generic_PRE_sys_sendto) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2,
UWord arg3, UWord arg4, UWord arg5 )
{
/* int sendto(int s, const void *msg, int len,
unsigned int flags,
const struct sockaddr *to, int tolen); */
PRE_MEM_READ( "socketcall.sendto(msg)",
arg1, /* msg */
arg2 /* len */ );
pre_mem_read_sockaddr(
tid, "socketcall.sendto(to.%s)",
(struct vki_sockaddr *) arg4, arg5
);
}
/* ------ */
void
ML_(generic_PRE_sys_send) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int send(int s, const void *msg, size_t len, int flags); */
PRE_MEM_READ( "socketcall.send(msg)",
arg1, /* msg */
arg2 /* len */ );
}
/* ------ */
void
ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2,
UWord arg3, UWord arg4, UWord arg5 )
{
/* int recvfrom(int s, void *buf, int len, unsigned int flags,
struct sockaddr *from, int *fromlen); */
Addr buf_p = arg1;
Int len = arg2;
Addr from_p = arg4;
Addr fromlen_p = arg5;
PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
if (from_p != (Addr)NULL)
ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
"socketcall.recvfrom(from)",
"socketcall.recvfrom(fromlen_in)" );
}
void
ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
SysRes res,
UWord arg0, UWord arg1, UWord arg2,
UWord arg3, UWord arg4, UWord arg5 )
{
Addr buf_p = arg1;
Int len = arg2;
Addr from_p = arg4;
Addr fromlen_p = arg5;
vg_assert(!sr_isError(res)); /* guaranteed by caller */
if (from_p != (Addr)NULL)
ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
"socketcall.recvfrom(fromlen_out)" );
POST_MEM_WRITE( buf_p, len );
}
/* ------ */
void
ML_(generic_PRE_sys_recv) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int recv(int s, void *buf, int len, unsigned int flags); */
/* man 2 recv says:
The recv call is normally used only on a connected socket
(see connect(2)) and is identical to recvfrom with a NULL
from parameter.
*/
PRE_MEM_WRITE( "socketcall.recv(buf)",
arg1, /* buf */
arg2 /* len */ );
}
void
ML_(generic_POST_sys_recv) ( ThreadId tid,
UWord res,
UWord arg0, UWord arg1, UWord arg2 )
{
if (res >= 0 && arg1 != 0) {
POST_MEM_WRITE( arg1, /* buf */
arg2 /* len */ );
}
}
/* ------ */
void
ML_(generic_PRE_sys_connect) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int connect(int sockfd,
struct sockaddr *serv_addr, int addrlen ); */
pre_mem_read_sockaddr( tid,
"socketcall.connect(serv_addr.%s)",
(struct vki_sockaddr *) arg1, arg2);
}
/* ------ */
void
ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2,
UWord arg3, UWord arg4 )
{
/* int setsockopt(int s, int level, int optname,
const void *optval, int optlen); */
PRE_MEM_READ( "socketcall.setsockopt(optval)",
arg3, /* optval */
arg4 /* optlen */ );
}
/* ------ */
void
ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int getsockname(int s, struct sockaddr* name, int* namelen) */
Addr name_p = arg1;
Addr namelen_p = arg2;
/* Nb: name_p cannot be NULL */
ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
"socketcall.getsockname(name)",
"socketcall.getsockname(namelen_in)" );
}
void
ML_(generic_POST_sys_getsockname) ( ThreadId tid,
SysRes res,
UWord arg0, UWord arg1, UWord arg2 )
{
Addr name_p = arg1;
Addr namelen_p = arg2;
vg_assert(!sr_isError(res)); /* guaranteed by caller */
ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
"socketcall.getsockname(namelen_out)" );
}
/* ------ */
void
ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int getpeername(int s, struct sockaddr* name, int* namelen) */
Addr name_p = arg1;
Addr namelen_p = arg2;
/* Nb: name_p cannot be NULL */
ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
"socketcall.getpeername(name)",
"socketcall.getpeername(namelen_in)" );
}
void
ML_(generic_POST_sys_getpeername) ( ThreadId tid,
SysRes res,
UWord arg0, UWord arg1, UWord arg2 )
{
Addr name_p = arg1;
Addr namelen_p = arg2;
vg_assert(!sr_isError(res)); /* guaranteed by caller */
ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
"socketcall.getpeername(namelen_out)" );
}
/* ------ */
void
ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
struct vki_msghdr *msg )
{
msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
}
/* ------ */
void
ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
struct vki_msghdr *msg )
{
msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
}
void
ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
struct vki_msghdr *msg, UInt length )
{
msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
check_cmsg_for_fds( tid, msg );
}
/* ---------------------------------------------------------------------
Deal with a bunch of IPC related syscalls
------------------------------------------------------------------ */
/* ------ */
void
ML_(generic_PRE_sys_semop) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int semop(int semid, struct sembuf *sops, unsigned nsops); */
PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
}
/* ------ */
void
ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
UWord arg0, UWord arg1,
UWord arg2, UWord arg3 )
{
/* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
struct timespec *timeout); */
PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
if (arg3 != 0)
PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
}
/* ------ */
static
UInt get_sem_count( Int semid )
{
struct vki_semid_ds buf;
union vki_semun arg;
SysRes res;
/* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
(experimental) otherwise complains that the use in the return
statement below is uninitialised. */
buf.sem_nsems = 0;
arg.buf = &buf;
# ifdef __NR_semctl
res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
# else
res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
VKI_IPC_STAT, (UWord)&arg);
# endif
if (sr_isError(res))
return 0;
return buf.sem_nsems;
}
void
ML_(generic_PRE_sys_semctl) ( ThreadId tid,
UWord arg0, UWord arg1,
UWord arg2, UWord arg3 )
{
/* int semctl(int semid, int semnum, int cmd, ...); */
union vki_semun arg = *(union vki_semun *)&arg3;
UInt nsems;
switch (arg2 /* cmd */) {
#if defined(VKI_IPC_INFO)
case VKI_IPC_INFO:
case VKI_SEM_INFO:
case VKI_IPC_INFO|VKI_IPC_64:
case VKI_SEM_INFO|VKI_IPC_64:
PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
(Addr)arg.buf, sizeof(struct vki_seminfo) );
break;
#endif
case VKI_IPC_STAT:
#if defined(VKI_SEM_STAT)
case VKI_SEM_STAT:
#endif
PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
(Addr)arg.buf, sizeof(struct vki_semid_ds) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_STAT|VKI_IPC_64:
#if defined(VKI_SEM_STAT)
case VKI_SEM_STAT|VKI_IPC_64:
#endif
PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
(Addr)arg.buf, sizeof(struct vki_semid64_ds) );
break;
#endif
case VKI_IPC_SET:
PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
(Addr)arg.buf, sizeof(struct vki_semid_ds) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_SET|VKI_IPC_64:
PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
(Addr)arg.buf, sizeof(struct vki_semid64_ds) );
break;
#endif
case VKI_GETALL:
#if defined(VKI_IPC_64)
case VKI_GETALL|VKI_IPC_64:
#endif
nsems = get_sem_count( arg0 );
PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
(Addr)arg.array, sizeof(unsigned short) * nsems );
break;
case VKI_SETALL:
#if defined(VKI_IPC_64)
case VKI_SETALL|VKI_IPC_64:
#endif
nsems = get_sem_count( arg0 );
PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
(Addr)arg.array, sizeof(unsigned short) * nsems );
break;
}
}
void
ML_(generic_POST_sys_semctl) ( ThreadId tid,
UWord res,
UWord arg0, UWord arg1,
UWord arg2, UWord arg3 )
{
union vki_semun arg = *(union vki_semun *)&arg3;
UInt nsems;
switch (arg2 /* cmd */) {
#if defined(VKI_IPC_INFO)
case VKI_IPC_INFO:
case VKI_SEM_INFO:
case VKI_IPC_INFO|VKI_IPC_64:
case VKI_SEM_INFO|VKI_IPC_64:
POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
break;
#endif
case VKI_IPC_STAT:
#if defined(VKI_SEM_STAT)
case VKI_SEM_STAT:
#endif
POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_STAT|VKI_IPC_64:
case VKI_SEM_STAT|VKI_IPC_64:
POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
break;
#endif
case VKI_GETALL:
#if defined(VKI_IPC_64)
case VKI_GETALL|VKI_IPC_64:
#endif
nsems = get_sem_count( arg0 );
POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
break;
}
}
/* ------ */
/* ------ */
static
SizeT get_shm_size ( Int shmid )
{
#ifdef __NR_shmctl
# ifdef VKI_IPC_64
struct vki_shmid64_ds buf;
# ifdef VGP_amd64_linux
/* See bug 222545 comment 7 */
SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
VKI_IPC_STAT, (UWord)&buf);
# else
SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
# endif
# else /* !def VKI_IPC_64 */
struct vki_shmid_ds buf;
SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
# endif /* def VKI_IPC_64 */
#else
struct vki_shmid_ds buf;
SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
VKI_IPC_STAT, 0, (UWord)&buf);
#endif
if (sr_isError(__res))
return 0;
return (SizeT) buf.shm_segsz;
}
UWord
ML_(generic_PRE_sys_shmat) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* void *shmat(int shmid, const void *shmaddr, int shmflg); */
SizeT segmentSize = get_shm_size ( arg0 );
UWord tmp;
Bool ok;
if (arg1 == 0) {
/* arm-linux only: work around the fact that
VG_(am_get_advisory_client_simple) produces something that is
VKI_PAGE_SIZE aligned, whereas what we want is something
VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE. Hence
increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
then round the result up to the next VKI_SHMLBA boundary.
See bug 222545 comment 15. So far, arm-linux is the only
platform where this is known to be necessary. */
vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
if (VKI_SHMLBA > VKI_PAGE_SIZE) {
segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
}
tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
if (ok) {
if (VKI_SHMLBA > VKI_PAGE_SIZE) {
arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
} else {
arg1 = tmp;
}
}
}
else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
arg1 = 0;
return arg1;
}
void
ML_(generic_POST_sys_shmat) ( ThreadId tid,
UWord res,
UWord arg0, UWord arg1, UWord arg2 )
{
SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
if ( segmentSize > 0 ) {
UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
Bool d;
if (arg2 & VKI_SHM_RDONLY)
prot &= ~VKI_PROT_WRITE;
/* It isn't exactly correct to pass 0 for the fd and offset
here. The kernel seems to think the corresponding section
does have dev/ino numbers:
04e52000-04ec8000 rw-s 00000000 00:06 1966090 /SYSV00000000 (deleted)
However there is no obvious way to find them. In order to
cope with the discrepancy, aspacem's sync checker omits the
dev/ino correspondence check in cases where V does not know
the dev/ino. */
d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
/* we don't distinguish whether it's read-only or
* read-write -- it doesn't matter really. */
VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
0/*di_handle*/ );
if (d)
VG_(discard_translations)( (Addr64)res,
(ULong)VG_PGROUNDUP(segmentSize),
"ML_(generic_POST_sys_shmat)" );
}
}
/* ------ */
Bool
ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
{
/* int shmdt(const void *shmaddr); */
return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
}
void
ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
{
NSegment const* s = VG_(am_find_nsegment)(arg0);
if (s != NULL) {
Addr s_start = s->start;
SizeT s_len = s->end+1 - s->start;
Bool d;
vg_assert(s->kind == SkShmC);
vg_assert(s->start == arg0);
d = VG_(am_notify_munmap)(s_start, s_len);
s = NULL; /* s is now invalid */
VG_TRACK( die_mem_munmap, s_start, s_len );
if (d)
VG_(discard_translations)( (Addr64)s_start,
(ULong)s_len,
"ML_(generic_POST_sys_shmdt)" );
}
}
/* ------ */
void
ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
UWord arg0, UWord arg1, UWord arg2 )
{
/* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
switch (arg1 /* cmd */) {
#if defined(VKI_IPC_INFO)
case VKI_IPC_INFO:
PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
arg2, sizeof(struct vki_shminfo) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_INFO|VKI_IPC_64:
PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
arg2, sizeof(struct vki_shminfo64) );
break;
#endif
#endif
#if defined(VKI_SHM_INFO)
case VKI_SHM_INFO:
#if defined(VKI_IPC_64)
case VKI_SHM_INFO|VKI_IPC_64:
#endif
PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
arg2, sizeof(struct vki_shm_info) );
break;
#endif
case VKI_IPC_STAT:
#if defined(VKI_SHM_STAT)
case VKI_SHM_STAT:
#endif
PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
arg2, sizeof(struct vki_shmid_ds) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_STAT|VKI_IPC_64:
case VKI_SHM_STAT|VKI_IPC_64:
PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
arg2, sizeof(struct vki_shmid64_ds) );
break;
#endif
case VKI_IPC_SET:
PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
arg2, sizeof(struct vki_shmid_ds) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_SET|VKI_IPC_64:
PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
arg2, sizeof(struct vki_shmid64_ds) );
break;
#endif
}
}
void
ML_(generic_POST_sys_shmctl) ( ThreadId tid,
UWord res,
UWord arg0, UWord arg1, UWord arg2 )
{
switch (arg1 /* cmd */) {
#if defined(VKI_IPC_INFO)
case VKI_IPC_INFO:
POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
break;
case VKI_IPC_INFO|VKI_IPC_64:
POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
break;
#endif
#if defined(VKI_SHM_INFO)
case VKI_SHM_INFO:
case VKI_SHM_INFO|VKI_IPC_64:
POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
break;
#endif
case VKI_IPC_STAT:
#if defined(VKI_SHM_STAT)
case VKI_SHM_STAT:
#endif
POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
break;
#if defined(VKI_IPC_64)
case VKI_IPC_STAT|VKI_IPC_64:
case VKI_SHM_STAT|VKI_IPC_64:
POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
break;
#endif
}
}
/* ---------------------------------------------------------------------
Generic handler for mmap
------------------------------------------------------------------ */
/*
* Although mmap is specified by POSIX and the argument are generally
* consistent across platforms the precise details of the low level
* argument passing conventions differ. For example:
*
* - On x86-linux there is mmap (aka old_mmap) which takes the
* arguments in a memory block and the offset in bytes; and
* mmap2 (aka sys_mmap2) which takes the arguments in the normal
* way and the offset in pages.
*
* - On ppc32-linux there is mmap (aka sys_mmap) which takes the
* arguments in the normal way and the offset in bytes; and
* mmap2 (aka sys_mmap2) which takes the arguments in the normal
* way and the offset in pages.
*
* - On amd64-linux everything is simple and there is just the one
* call, mmap (aka sys_mmap) which takes the arguments in the
* normal way and the offset in bytes.
*
* - On s390x-linux there is mmap (aka old_mmap) which takes the
* arguments in a memory block and the offset in bytes. mmap2
* is also available (but not exported via unistd.h) with
* arguments in a memory block and the offset in pages.
*
* To cope with all this we provide a generic handler function here
* and then each platform implements one or more system call handlers
* which call this generic routine after extracting and normalising
* the arguments.
*/
SysRes
ML_(generic_PRE_sys_mmap) ( ThreadId tid,
UWord arg1, UWord arg2, UWord arg3,
UWord arg4, UWord arg5, Off64T arg6 )
{
Addr advised;
SysRes sres;
MapRequest mreq;
Bool mreq_ok;
# if defined(VGO_darwin)
// Nb: we can't use this on Darwin, it has races:
// * needs to RETRY if advisory succeeds but map fails
// (could have been some other thread in a nonblocking call)
// * needs to not use fixed-position mmap() on Darwin
// (mmap will cheerfully smash whatever's already there, which might
// be a new mapping from some other thread in a nonblocking call)
VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
# endif
if (arg2 == 0) {
/* SuSV3 says: If len is zero, mmap() shall fail and no mapping
shall be established. */
return VG_(mk_SysRes_Error)( VKI_EINVAL );
}
if (!VG_IS_PAGE_ALIGNED(arg1)) {
/* zap any misaligned addresses. */
/* SuSV3 says misaligned addresses only cause the MAP_FIXED case
to fail. Here, we catch them all. */
return VG_(mk_SysRes_Error)( VKI_EINVAL );
}
if (!VG_IS_PAGE_ALIGNED(arg6)) {
/* zap any misaligned offsets. */
/* SuSV3 says: The off argument is constrained to be aligned and
sized according to the value returned by sysconf() when
passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
return VG_(mk_SysRes_Error)( VKI_EINVAL );
}
# if defined(VKI_MAP_32BIT)
/* We can't support MAP_32BIT (at least, not without significant
complication), and it's royally unportable, so if the client
asks for it, just fail it. */
if (arg4 & VKI_MAP_32BIT) {
return VG_(mk_SysRes_Error)( VKI_ENOMEM );
}
# endif
/* Figure out what kind of allocation constraints there are
(fixed/hint/any), and ask aspacem what we should do. */
mreq.start = arg1;
mreq.len = arg2;
if (arg4 & VKI_MAP_FIXED) {
mreq.rkind = MFixed;
} else
if (arg1 != 0) {
mreq.rkind = MHint;
} else {
mreq.rkind = MAny;
}
/* Enquire ... */
advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
if (!mreq_ok) {
/* Our request was bounced, so we'd better fail. */
return VG_(mk_SysRes_Error)( VKI_EINVAL );
}
/* Otherwise we're OK (so far). Install aspacem's choice of
address, and let the mmap go through. */
sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
arg4 | VKI_MAP_FIXED,
arg5, arg6);
/* A refinement: it may be that the kernel refused aspacem's choice
of address. If we were originally asked for a hinted mapping,
there is still a last chance: try again at any address.
Hence: */
if (mreq.rkind == MHint && sr_isError(sres)) {
mreq.start = 0;
mreq.len = arg2;
mreq.rkind = MAny;
advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
if (!mreq_ok) {
/* Our request was bounced, so we'd better fail. */
return VG_(mk_SysRes_Error)( VKI_EINVAL );
}
/* and try again with the kernel */
sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
arg4 | VKI_MAP_FIXED,
arg5, arg6);
}
if (!sr_isError(sres)) {
ULong di_handle;
/* Notify aspacem. */
notify_core_of_mmap(
(Addr)sr_Res(sres), /* addr kernel actually assigned */
arg2, /* length */
arg3, /* prot */
arg4, /* the original flags value */
arg5, /* fd */
arg6 /* offset */
);
/* Load symbols? */
di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
False/*allow_SkFileV*/, (Int)arg5 );
/* Notify the tool. */
notify_tool_of_mmap(
(Addr)sr_Res(sres), /* addr kernel actually assigned */
arg2, /* length */
arg3, /* prot */
di_handle /* so the tool can refer to the read debuginfo later,
if it wants. */
);
}
/* Stay sane */
if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
vg_assert(sr_Res(sres) == arg1);
return sres;
}
/* ---------------------------------------------------------------------
The Main Entertainment ... syscall wrappers
------------------------------------------------------------------ */
/* Note: the PRE() and POST() wrappers are for the actual functions
implementing the system calls in the OS kernel. These mostly have
names like sys_write(); a few have names like old_mmap(). See the
comment for ML_(syscall_table)[] for important info about the __NR_foo
constants and their relationship to the sys_foo() functions.
Some notes about names used for syscalls and args:
- For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
ambiguity.
- For error messages, we generally use a somewhat generic name
for the syscall (eg. "write" rather than "sys_write"). This should be
good enough for the average user to understand what is happening,
without confusing them with names like "sys_write".
- Also, for error messages the arg names are mostly taken from the man
pages (even though many of those man pages are really for glibc
functions of the same name), rather than from the OS kernel source,
for the same reason -- a user presented with a "bogus foo(bar)" arg
will most likely look at the "foo" man page to see which is the "bar"
arg.
Note that we use our own vki_* types. The one exception is in
PRE_REG_READn calls, where pointer types haven't been changed, because
they don't need to be -- eg. for "foo*" to be used, the type foo need not
be visible.
XXX: some of these are arch-specific, and should be factored out.
*/
#define PRE(name) DEFN_PRE_TEMPLATE(generic, name)
#define POST(name) DEFN_POST_TEMPLATE(generic, name)
// Macros to support 64-bit syscall args split into two 32 bit values
#if defined(VG_LITTLEENDIAN)
#define MERGE64(lo,hi) ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
#define MERGE64_FIRST(name) name##_low
#define MERGE64_SECOND(name) name##_high
#elif defined(VG_BIGENDIAN)
#define MERGE64(hi,lo) ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
#define MERGE64_FIRST(name) name##_high
#define MERGE64_SECOND(name) name##_low
#else
#error Unknown endianness
#endif
PRE(sys_exit)
{
ThreadState* tst;
/* simple; just make this thread exit */
PRINT("exit( %ld )", ARG1);
PRE_REG_READ1(void, "exit", int, status);
tst = VG_(get_ThreadState)(tid);
/* Set the thread's status to be exiting, then claim that the
syscall succeeded. */
tst->exitreason = VgSrc_ExitThread;
tst->os_state.exitcode = ARG1;
SET_STATUS_Success(0);
}
PRE(sys_ni_syscall)
{
PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
VG_SYSNUM_STRING(SYSNO));
PRE_REG_READ0(long, "ni_syscall");
SET_STATUS_Failure( VKI_ENOSYS );
}
PRE(sys_iopl)
{
PRINT("sys_iopl ( %ld )", ARG1);
PRE_REG_READ1(long, "iopl", unsigned long, level);
}
PRE(sys_fsync)
{
*flags |= SfMayBlock;
PRINT("sys_fsync ( %ld )", ARG1);
PRE_REG_READ1(long, "fsync", unsigned int, fd);
}
PRE(sys_fdatasync)
{
*flags |= SfMayBlock;
PRINT("sys_fdatasync ( %ld )", ARG1);
PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
}
PRE(sys_msync)
{
*flags |= SfMayBlock;
PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
PRE_REG_READ3(long, "msync",
unsigned long, start, vki_size_t, length, int, flags);
PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
}
// Nb: getpmsg() and putpmsg() are special additional syscalls used in early
// versions of LiS (Linux Streams). They are not part of the kernel.
// Therefore, we have to provide this type ourself, rather than getting it
// from the kernel sources.
struct vki_pmsg_strbuf {
int maxlen; /* no. of bytes in buffer */
int len; /* no. of bytes returned */
vki_caddr_t buf; /* pointer to data */
};
PRE(sys_getpmsg)
{
/* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
struct vki_pmsg_strbuf *ctrl;
struct vki_pmsg_strbuf *data;
*flags |= SfMayBlock;
PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
PRE_REG_READ5(int, "getpmsg",
int, fd, struct strbuf *, ctrl, struct strbuf *, data,
int *, bandp, int *, flagsp);
ctrl = (struct vki_pmsg_strbuf *)ARG2;
data = (struct vki_pmsg_strbuf *)ARG3;
if (ctrl && ctrl->maxlen > 0)
PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
if (data && data->maxlen > 0)
PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
if (ARG4)
PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
if (ARG5)
PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
}
POST(sys_getpmsg)
{
struct vki_pmsg_strbuf *ctrl;
struct vki_pmsg_strbuf *data;
vg_assert(SUCCESS);
ctrl = (struct vki_pmsg_strbuf *)ARG2;
data = (struct vki_pmsg_strbuf *)ARG3;
if (RES == 0 && ctrl && ctrl->len > 0) {
POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
}
if (RES == 0 && data && data->len > 0) {
POST_MEM_WRITE( (Addr)data->buf, data->len);
}
}
PRE(sys_putpmsg)
{
/* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
struct vki_pmsg_strbuf *ctrl;
struct vki_pmsg_strbuf *data;
*flags |= SfMayBlock;
PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
PRE_REG_READ5(int, "putpmsg",
int, fd, struct strbuf *, ctrl, struct strbuf *, data,
int, band, int, flags);
ctrl = (struct vki_pmsg_strbuf *)ARG2;
data = (struct vki_pmsg_strbuf *)ARG3;
if (ctrl && ctrl->len > 0)
PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
if (data && data->len > 0)
PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
}
PRE(sys_getitimer)
{
struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
PRE_timeval_WRITE( "getitimer(&value->it_value)", &(value->it_value));
}
POST(sys_getitimer)
{
if (ARG2 != (Addr)NULL) {
struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
POST_timeval_WRITE( &(value->it_interval) );
POST_timeval_WRITE( &(value->it_value) );
}
}
PRE(sys_setitimer)
{
PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
PRE_REG_READ3(long, "setitimer",
int, which,
struct itimerval *, value, struct itimerval *, ovalue);
if (ARG2 != (Addr)NULL) {
struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
PRE_timeval_READ( "setitimer(&value->it_interval)",
&(value->it_interval));
PRE_timeval_READ( "setitimer(&value->it_value)",
&(value->it_value));
}
if (ARG3 != (Addr)NULL) {
struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
&(ovalue->it_interval));
PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
&(ovalue->it_value));
}
}
POST(sys_setitimer)
{
if (ARG3 != (Addr)NULL) {
struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
POST_timeval_WRITE( &(ovalue->it_interval) );
POST_timeval_WRITE( &(ovalue->it_value) );
}
}
PRE(sys_chroot)
{
PRINT("sys_chroot ( %#lx )", ARG1);
PRE_REG_READ1(long, "chroot", const char *, path);
PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
}
PRE(sys_madvise)
{
*flags |= SfMayBlock;
PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
PRE_REG_READ3(long, "madvise",
unsigned long, start, vki_size_t, length, int, advice);
}
#if HAVE_MREMAP
PRE(sys_mremap)
{
// Nb: this is different to the glibc version described in the man pages,
// which lacks the fifth 'new_address' argument.
if (ARG4 & VKI_MREMAP_FIXED) {
PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
PRE_REG_READ5(unsigned long, "mremap",
unsigned long, old_addr, unsigned long, old_size,
unsigned long, new_size, unsigned long, flags,
unsigned long, new_addr);
} else {
PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
ARG1, (ULong)ARG2, ARG3, ARG4);
PRE_REG_READ4(unsigned long, "mremap",
unsigned long, old_addr, unsigned long, old_size,
unsigned long, new_size, unsigned long, flags);
}
SET_STATUS_from_SysRes(
do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
);
}
#endif /* HAVE_MREMAP */
PRE(sys_nice)
{
PRINT("sys_nice ( %ld )", ARG1);
PRE_REG_READ1(long, "nice", int, inc);
}
PRE(sys_mlock)
{
*flags |= SfMayBlock;
PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
}
PRE(sys_munlock)
{
*flags |= SfMayBlock;
PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
}
PRE(sys_mlockall)
{
*flags |= SfMayBlock;
PRINT("sys_mlockall ( %lx )", ARG1);
PRE_REG_READ1(long, "mlockall", int, flags);
}
PRE(sys_setpriority)
{
PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
}
PRE(sys_getpriority)
{
PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
PRE_REG_READ2(long, "getpriority", int, which, int, who);
}
PRE(sys_pwrite64)
{
*flags |= SfMayBlock;
#if VG_WORDSIZE == 4
PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
PRE_REG_READ5(ssize_t, "pwrite64",
unsigned int, fd, const char *, buf, vki_size_t, count,
vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
#elif VG_WORDSIZE == 8
PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
PRE_REG_READ4(ssize_t, "pwrite64",
unsigned int, fd, const char *, buf, vki_size_t, count,
Word, offset);
#else
# error Unexpected word size
#endif
PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
}
PRE(sys_sync)
{
*flags |= SfMayBlock;
PRINT("sys_sync ( )");
PRE_REG_READ0(long, "sync");
}
PRE(sys_fstatfs)
{
FUSE_COMPATIBLE_MAY_BLOCK();
PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
PRE_REG_READ2(long, "fstatfs",
unsigned int, fd, struct statfs *, buf);
PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
}
POST(sys_fstatfs)
{
POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
}
PRE(sys_fstatfs64)
{
FUSE_COMPATIBLE_MAY_BLOCK();
PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
PRE_REG_READ3(long, "fstatfs64",
unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
}
POST(sys_fstatfs64)
{
POST_MEM_WRITE( ARG3, ARG2 );
}
PRE(sys_getsid)
{
PRINT("sys_getsid ( %ld )", ARG1);
PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
}
PRE(sys_pread64)
{
*flags |= SfMayBlock;
#if VG_WORDSIZE == 4
PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
PRE_REG_READ5(ssize_t, "pread64",
unsigned int, fd, char *, buf, vki_size_t, count,
vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
#elif VG_WORDSIZE == 8
PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
PRE_REG_READ4(ssize_t, "pread64",
unsigned int, fd, char *, buf, vki_size_t, count,
Word, offset);
#else
# error Unexpected word size
#endif
PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
}
POST(sys_pread64)
{
vg_assert(SUCCESS);
if (RES > 0) {
POST_MEM_WRITE( ARG2, RES );
}
}
PRE(sys_mknod)
{
FUSE_COMPATIBLE_MAY_BLOCK();
PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
PRE_REG_READ3(long, "mknod",
const char *, pathname, int, mode, unsigned, dev);
PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
}
PRE(sys_flock)
{
*flags |= SfMayBlock;
PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
}
// Pre_read a char** argument.
static void pre_argv_envp(Addr a, ThreadId tid, const HChar* s1, const HChar* s2)
{
while (True) {
Addr a_deref;
Addr* a_p = (Addr*)a;
PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
a_deref = *a_p;
if (0 == a_deref)
break;
PRE_MEM_RASCIIZ( s2, a_deref );
a += sizeof(char*);
}
}
static Bool i_am_the_only_thread ( void )
{
Int c = VG_(count_living_threads)();
vg_assert(c >= 1); /* stay sane */
return c == 1;
}
/* Wait until all other threads disappear. */
void VG_(reap_threads)(ThreadId self)
{
while (!i_am_the_only_thread()) {
/* Let other thread(s) run */
VG_(vg_yield)();
VG_(poll_signals)(self);
}
vg_assert(i_am_the_only_thread());
}
// XXX: prototype here seemingly doesn't match the prototype for i386-linux,
// but it seems to work nonetheless...
PRE(sys_execve)
{
HChar* path = NULL; /* path to executable */
HChar** envp = NULL;
HChar** argv = NULL;
HChar** arg2copy;
HChar* launcher_basename = NULL;
ThreadState* tst;
Int