Add command line flag --max-threads=<integer> to increase the number of
threads that valgrind can handle. No recompile is needed. 
Part of fixing BZ #337869.


git-svn-id: svn://svn.valgrind.org/valgrind/trunk@14932 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/NEWS b/NEWS
index 0b7bb56..481948a 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,10 @@
   searching/extracting errors in output files mixing valgrind
   errors with program output.
 
+* New Option --max-threads=<number> can be used to increase the
+  number of threads valgrind can handle. The default is 500 threads
+  which should be more than enough for most applications.
+
 * ==================== FIXED BUGS ====================
 
 The following bugs have been fixed or resolved.  Note that "n-i-bz"
diff --git a/callgrind/main.c b/callgrind/main.c
index d4b3679..5e501bc 100644
--- a/callgrind/main.c
+++ b/callgrind/main.c
@@ -1703,9 +1703,9 @@
 
 /* struct timeval syscalltime[VG_N_THREADS]; */
 #if CLG_MICROSYSTIME
-ULong syscalltime[VG_N_THREADS];
+ULong *syscalltime;
 #else
-UInt syscalltime[VG_N_THREADS];
+UInt *syscalltime;
 #endif
 
 static
@@ -2071,6 +2071,12 @@
     VG_(track_post_deliver_signal)( & CLG_(post_signal) );
 
     CLG_(set_clo_defaults)();
+
+    syscalltime = CLG_MALLOC("cl.main.pci.1",
+                             VG_N_THREADS * sizeof syscalltime[0]);
+    for (UInt i = 0; i < VG_N_THREADS; ++i) {
+       syscalltime[i] = 0;
+    }
 }
 
 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
diff --git a/callgrind/threads.c b/callgrind/threads.c
index 023009f..7dca771 100644
--- a/callgrind/threads.c
+++ b/callgrind/threads.c
@@ -61,7 +61,7 @@
 /* current running thread */
 ThreadId CLG_(current_tid);
 
-static thread_info* thread[VG_N_THREADS];
+static thread_info** thread;
 
 thread_info** CLG_(get_threads)()
 {
@@ -75,7 +75,10 @@
 
 void CLG_(init_threads)()
 {
-    Int i;
+    UInt i;
+
+    thread = CLG_MALLOC("cl.threads.it.1", VG_N_THREADS * sizeof thread[0]);
+
     for(i=0;i<VG_N_THREADS;i++)
 	thread[i] = 0;
     CLG_(current_tid) = VG_INVALID_THREADID;
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 51ada98..b956ae8 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -212,6 +212,8 @@
 "                  recovered by stack scanning [5]\n"
 "    --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]\n"
 "              attempt to avoid expensive address-space-resync operations\n"
+"    --max-threads=<number>    maximum number of threads that valgrind can\n"
+"                              handle [%d]\n"
 "\n";
 
    const HChar usage2[] = 
@@ -317,7 +319,8 @@
                default_redzone_size       /* char* */,
                VG_(clo_vgdb_poll)         /* int */,
                VG_(vgdb_prefix_default)() /* char* */,
-               N_SECTORS_DEFAULT          /* int */
+               N_SECTORS_DEFAULT          /* int */,
+               MAX_THREADS_DEFAULT        /* int */
                ); 
    if (VG_(details).name) {
       VG_(printf)("  user options for %s:\n", VG_(details).name);
@@ -394,6 +397,9 @@
       else if VG_INT_CLO(str, "--max-stackframe", VG_(clo_max_stackframe)) {}
       else if VG_INT_CLO(str, "--main-stacksize", VG_(clo_main_stacksize)) {}
 
+      // Set up VG_(clo_max_threads); needed for VG_(tl_pre_clo_init)
+      else if VG_INT_CLO(str, "--max-threads", VG_(clo_max_threads)) {}
+
       // Set up VG_(clo_sim_hints). This is needed a.o. for an inner
       // running in an outer, to have "no-inner-prefix" enabled
       // as early as possible.
@@ -403,6 +409,9 @@
                             "no-nptl-pthread-stackcache",
                             VG_(clo_sim_hints)) {}
    }
+
+   /* For convenience */
+   VG_N_THREADS = VG_(clo_max_threads);
 }
 
 /* The main processing for command line options.  See comments above
@@ -539,6 +548,7 @@
       else if VG_STREQ(     arg, "-d")                   {}
       else if VG_STREQN(17, arg, "--max-stackframe=")    {}
       else if VG_STREQN(17, arg, "--main-stacksize=")    {}
+      else if VG_STREQN(14, arg, "--max-threads=")       {}
       else if VG_STREQN(12, arg, "--sim-hints=")         {}
       else if VG_STREQN(15, arg, "--profile-heap=")      {}
       else if VG_STREQN(20, arg, "--core-redzone-size=") {}
diff --git a/coregrind/m_options.c b/coregrind/m_options.c
index f9183e3..3f21cef 100644
--- a/coregrind/m_options.c
+++ b/coregrind/m_options.c
@@ -126,6 +126,7 @@
 Bool   VG_(clo_show_below_main)= False;
 Bool   VG_(clo_show_emwarns)   = False;
 Word   VG_(clo_max_stackframe) = 2000000;
+UInt   VG_(clo_max_threads)    = MAX_THREADS_DEFAULT;
 Word   VG_(clo_main_stacksize) = 0; /* use client's rlimit.stack */
 Bool   VG_(clo_wait_for_gdb)   = False;
 VgSmc  VG_(clo_smc_check)      = Vg_SmcStack;
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 3ec0c8d..dcb60bc 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -247,9 +247,9 @@
          return i;
       }
    }
-   VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
-   VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
-   VG_(core_panic)("VG_N_THREADS is too low");
+   VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
+               "and rerun valgrind\n");
+   VG_(core_panic)("Max number of threads is too low");
    /*NOTREACHED*/
 }
 
diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
index 1c522cd..ac1d7be 100644
--- a/coregrind/m_syswrap/syswrap-main.c
+++ b/coregrind/m_syswrap/syswrap-main.c
@@ -49,6 +49,7 @@
 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
 #include "pub_core_syscall.h"
 #include "pub_core_machine.h"
+#include "pub_core_mallocfree.h"
 #include "pub_core_syswrap.h"
 
 #include "priv_types_n_macros.h"
@@ -1364,13 +1365,13 @@
    }
    SyscallInfo;
 
-SyscallInfo syscallInfo[VG_N_THREADS];
-
+SyscallInfo *syscallInfo;
 
 /* The scheduler needs to be able to zero out these records after a
    fork, hence this is exported from m_syswrap. */
 void VG_(clear_syscallInfo) ( Int tid )
 {
+   vg_assert(syscallInfo);
    vg_assert(tid >= 0 && tid < VG_N_THREADS);
    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
    syscallInfo[tid].status.what = SsIdle;
@@ -1383,6 +1384,9 @@
    if (init_done) 
       return;
    init_done = True;
+
+   syscallInfo = VG_(malloc)("scinfo", VG_N_THREADS * sizeof syscallInfo[0]);
+
    for (i = 0; i < VG_N_THREADS; i++) {
       VG_(clear_syscallInfo)( i );
    }
diff --git a/coregrind/m_threadstate.c b/coregrind/m_threadstate.c
index c53e406..b3adcde 100644
--- a/coregrind/m_threadstate.c
+++ b/coregrind/m_threadstate.c
@@ -32,6 +32,7 @@
 #include "pub_core_vki.h"
 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
 #include "pub_core_threadstate.h"
+#include "pub_core_mallocfree.h"    // VG_(malloc)
 #include "pub_core_libcassert.h"
 #include "pub_core_inner.h"
 #if defined(ENABLE_INNER_CLIENT_REQUEST)
@@ -44,8 +45,8 @@
 
 ThreadId VG_(running_tid) = VG_INVALID_THREADID;
 
-ThreadState VG_(threads)[VG_N_THREADS]
-            __attribute__((aligned(LibVEX_GUEST_STATE_ALIGN)));
+ThreadState *VG_(threads);
+UInt VG_N_THREADS;
 
 /*------------------------------------------------------------*/
 /*--- Operations.                                          ---*/
@@ -54,6 +55,14 @@
 void VG_(init_Threads)(void)
 {
    ThreadId tid;
+   UChar *addr, *aligned_addr;
+
+   addr = VG_(malloc)("init_Threads",
+          VG_N_THREADS * sizeof VG_(threads)[0] + LibVEX_GUEST_STATE_ALIGN - 1);
+
+   // Align
+   aligned_addr = addr + (Addr)addr % LibVEX_GUEST_STATE_ALIGN;
+   VG_(threads) = (ThreadState *)aligned_addr;
 
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       INNER_REQUEST(
diff --git a/coregrind/pub_core_options.h b/coregrind/pub_core_options.h
index 1b49554..3407586 100644
--- a/coregrind/pub_core_options.h
+++ b/coregrind/pub_core_options.h
@@ -295,6 +295,10 @@
    be? */
 extern Word VG_(clo_main_stacksize);
 
+/* The maximum number of threads we support. */
+#define MAX_THREADS_DEFAULT 500
+extern UInt VG_(clo_max_threads);
+
 /* If the same IP is found twice in a backtrace in a sequence of max
    VG_(clo_merge_recursive_frames) frames, then the recursive call
    is merged in the backtrace.
diff --git a/coregrind/pub_core_threadstate.h b/coregrind/pub_core_threadstate.h
index fafcce9..ba75f14 100644
--- a/coregrind/pub_core_threadstate.h
+++ b/coregrind/pub_core_threadstate.h
@@ -362,7 +362,7 @@
 /* A statically allocated array of threads.  NOTE: [0] is
    never used, to simplify the simulation of initialisers for
    LinuxThreads. */
-extern ThreadState VG_(threads)[VG_N_THREADS];
+extern ThreadState *VG_(threads);
 
 // The running thread.  m_scheduler should be the only other module
 // to write to this.
diff --git a/drd/drd_thread.c b/drd/drd_thread.c
index 8f2ca64..247dd91 100644
--- a/drd/drd_thread.c
+++ b/drd/drd_thread.c
@@ -65,7 +65,7 @@
 static ULong    s_conflict_set_bitmap2_creation_count;
 static ThreadId s_vg_running_tid  = VG_INVALID_THREADID;
 DrdThreadId     DRD_(g_drd_running_tid) = DRD_INVALID_THREADID;
-ThreadInfo      DRD_(g_threadinfo)[DRD_N_THREADS];
+ThreadInfo*     DRD_(g_threadinfo);
 struct bitmap*  DRD_(g_conflict_set);
 Bool DRD_(verify_conflict_set);
 static Bool     s_trace_context_switches = False;
@@ -142,6 +142,12 @@
 
 void DRD_(thread_init)(void)
 {
+   DRD_(g_threadinfo) = VG_(malloc)("drd.main.ti.1",
+                                DRD_N_THREADS * sizeof DRD_(g_threadinfo)[0]);
+   for (UInt i = 0; i < DRD_N_THREADS; ++i) {
+      static ThreadInfo initval;
+      DRD_(g_threadinfo)[i] = initval;
+   }
 }
 
 /**
@@ -152,7 +158,7 @@
  */
 DrdThreadId DRD_(VgThreadIdToDrdThreadId)(const ThreadId tid)
 {
-   int i;
+   UInt i;
 
    if (tid == VG_INVALID_THREADID)
       return DRD_INVALID_THREADID;
@@ -172,7 +178,7 @@
 /** Allocate a new DRD thread ID for the specified Valgrind thread ID. */
 static DrdThreadId DRD_(VgThreadIdToNewDrdThreadId)(const ThreadId tid)
 {
-   int i;
+   UInt i;
 
    tl_assert(DRD_(VgThreadIdToDrdThreadId)(tid) == DRD_INVALID_THREADID);
 
@@ -218,7 +224,7 @@
 /** Convert a POSIX thread ID into a DRD thread ID. */
 DrdThreadId DRD_(PtThreadIdToDrdThreadId)(const PThreadId tid)
 {
-   int i;
+   UInt i;
 
    if (tid != INVALID_POSIX_THREADID)
    {
@@ -336,7 +342,7 @@
 
 static void DRD_(thread_delayed_delete)(const DrdThreadId tid)
 {
-   int j;
+   UInt j;
 
    DRD_(g_threadinfo)[tid].vg_thread_exists = False;
    DRD_(g_threadinfo)[tid].posix_thread_exists = False;
@@ -476,9 +482,9 @@
 
 Int DRD_(thread_get_threads_on_alt_stack)(void)
 {
-   int i, n = 0;
+   int n = 0;
 
-   for (i = 1; i < DRD_N_THREADS; i++)
+   for (UInt i = 1; i < DRD_N_THREADS; i++)
       n += DRD_(g_threadinfo)[i].on_alt_stack;
    return n;
 }
diff --git a/drd/drd_thread.h b/drd/drd_thread.h
index d489965..28068a7 100644
--- a/drd/drd_thread.h
+++ b/drd/drd_thread.h
@@ -113,7 +113,7 @@
  */
 extern DrdThreadId    DRD_(g_drd_running_tid);
 /** Per-thread information managed by DRD. */
-extern ThreadInfo     DRD_(g_threadinfo)[DRD_N_THREADS];
+extern ThreadInfo*    DRD_(g_threadinfo);
 /** Conflict set for the currently running thread. */
 extern struct bitmap* DRD_(g_conflict_set);
 extern Bool           DRD_(verify_conflict_set);
@@ -323,7 +323,7 @@
 static __inline__
 Bool DRD_(thread_address_on_any_stack)(const Addr a)
 {
-   int i;
+   UInt i;
 
    for (i = 1; i < DRD_N_THREADS; i++)
    {
diff --git a/exp-sgcheck/sg_main.c b/exp-sgcheck/sg_main.c
index 22c1000..0b2c50e 100644
--- a/exp-sgcheck/sg_main.c
+++ b/exp-sgcheck/sg_main.c
@@ -1038,11 +1038,11 @@
    * a shadow stack of StackFrames, which is a double-linked list
    * an stack block interval tree
 */
-static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
+static  struct _StackFrame**         shadowStacks;
 
-static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
+static  WordFM** /* StackTreeNode */ siTrees;
 
-static  QCache                       qcaches[VG_N_THREADS];
+static  QCache*                      qcaches;
 
 
 /* Additionally, there is one global variable interval tree
@@ -1062,9 +1062,16 @@
 static void ourGlobals_init ( void )
 {
    Word i;
+
+   shadowStacks = sg_malloc( "di.sg_main.oGi.2",
+                             VG_N_THREADS * sizeof shadowStacks[0] );
+   siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
+   qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
+
    for (i = 0; i < VG_N_THREADS; i++) {
       shadowStacks[i] = NULL;
       siTrees[i] = NULL;
+      qcaches[i] = (QCache){};
    }
    invalidate_all_QCaches();
    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free, 
diff --git a/helgrind/tests/locked_vs_unlocked2.stderr.exp b/helgrind/tests/locked_vs_unlocked2.stderr.exp
index 3463b54..d80f32d 100644
--- a/helgrind/tests/locked_vs_unlocked2.stderr.exp
+++ b/helgrind/tests/locked_vs_unlocked2.stderr.exp
@@ -16,13 +16,13 @@
 
  Lock at 0x........ was first observed
    at 0x........: pthread_mutex_init (hg_intercepts.c:...)
-   by 0x........: main (locked_vs_unlocked2.c:58)
- Address 0x........ is 0 bytes inside data symbol "mx2a"
+   by 0x........: main (locked_vs_unlocked2.c:59)
+ Address 0x........ is 0 bytes inside data symbol "mx2b"
 
  Lock at 0x........ was first observed
    at 0x........: pthread_mutex_init (hg_intercepts.c:...)
-   by 0x........: main (locked_vs_unlocked2.c:59)
- Address 0x........ is 0 bytes inside data symbol "mx2b"
+   by 0x........: main (locked_vs_unlocked2.c:58)
+ Address 0x........ is 0 bytes inside data symbol "mx2a"
 
  Lock at 0x........ was first observed
    at 0x........: pthread_mutex_init (hg_intercepts.c:...)
diff --git a/include/pub_tool_threadstate.h b/include/pub_tool_threadstate.h
index 7220fca..ccd5d97 100644
--- a/include/pub_tool_threadstate.h
+++ b/include/pub_tool_threadstate.h
@@ -33,12 +33,8 @@
 
 #include "pub_tool_basics.h"   // ThreadID
 
-/* The maximum number of pthreads that we support.  This is
-   deliberately not very high since our implementation of some of the
-   scheduler algorithms is surely O(N) in the number of threads, since
-   that's simple, at least.  And (in practice) we hope that most
-   programs do not need many threads. */
-#define VG_N_THREADS 500
+/* The maximum number of pthreads that we support. */
+extern UInt VG_N_THREADS;
 
 /* Special magic value for an invalid ThreadId.  It corresponds to
    LinuxThreads using zero as the initial value for
diff --git a/none/tests/cmdline1.stdout.exp b/none/tests/cmdline1.stdout.exp
index 8367d4e..20ffe31 100644
--- a/none/tests/cmdline1.stdout.exp
+++ b/none/tests/cmdline1.stdout.exp
@@ -125,6 +125,8 @@
                   recovered by stack scanning [5]
     --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]
               attempt to avoid expensive address-space-resync operations
+    --max-threads=<number>    maximum number of threads that valgrind can
+                              handle [500]
 
   user options for Nulgrind:
     (none)
diff --git a/none/tests/cmdline2.stdout.exp b/none/tests/cmdline2.stdout.exp
index 2654de1..44b6511 100644
--- a/none/tests/cmdline2.stdout.exp
+++ b/none/tests/cmdline2.stdout.exp
@@ -125,6 +125,8 @@
                   recovered by stack scanning [5]
     --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]
               attempt to avoid expensive address-space-resync operations
+    --max-threads=<number>    maximum number of threads that valgrind can
+                              handle [500]
 
   user options for Nulgrind:
     (none)