Impose a minimum tcache count for small size classes.

Now that small allocation runs have fewer regions due to run metadata
residing in chunk headers, an explicit minimum tcache count is needed to
make sure that tcache adequately amortizes synchronization overhead.

Bug: 21326736
(cherry picked from commit 83d543f8689bc7c6142179a5491bdf2a31b5cfc7)

Change-Id: I4178902b63ed310100019fee0805a11839de740f
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 99524bf..b248114 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -17,6 +17,11 @@
 #define	TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
 /*
+ * Absolute minimum number of cache slots for each small bin.
+ */
+#define	TCACHE_NSLOTS_SMALL_MIN		20
+
+/*
  * Absolute maximum number of cache slots for each small bin in the thread
  * cache.  This is an additional constraint beyond that imposed as: twice the
  * number of regions per run for this size class.
diff --git a/src/tcache.c b/src/tcache.c
index 83e7e36..3814365 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -515,7 +515,11 @@
 		return (true);
 	stack_nelms = 0;
 	for (i = 0; i < NBINS; i++) {
-		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
+			tcache_bin_info[i].ncached_max =
+			    TCACHE_NSLOTS_SMALL_MIN;
+		} else if ((arena_bin_info[i].nregs << 1) <=
+		    TCACHE_NSLOTS_SMALL_MAX) {
 			tcache_bin_info[i].ncached_max =
 			    (arena_bin_info[i].nregs << 1);
 		} else {