Revert to first-best-fit run/chunk allocation.

This effectively reverts 97c04a93838c4001688fe31bf018972b4696efe2 (Use
first-fit rather than first-best-fit run/chunk allocation.).  In some
pathological cases, first-fit search dominates allocation time, and it
also tends not to converge as readily on a steady state of memory
layout, since precise allocation order has a bigger effect than for
first-best-fit.

(cherry picked from upstream commit https://github.com/jemalloc/jemalloc/commit/aa2826621e1793db9faea31e803690ccbe36f14c)

Bug: 22172059
Change-Id: I3327939e1354c9a358dc54ce2b667feda890e42b
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index c846103..08317be 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -329,7 +329,7 @@
 
 	/*
 	 * Size/address-ordered tree of this arena's available runs.  The tree
-	 * is used for first-fit run allocation.
+	 * is used for first-best-fit run allocation.
 	 */
 	arena_avail_tree_t	runs_avail;
 
diff --git a/src/arena.c b/src/arena.c
index e822b58..9b9d55f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -979,53 +979,28 @@
 	return (err);
 }
 
-/* Do first-fit run selection. */
+/*
+ * Do first-best-fit run selection, i.e. select the lowest run that best fits.
+ * Run sizes are quantized, so not all candidate runs are necessarily exactly
+ * the same size.
+ */
 static arena_run_t *
-arena_run_first_fit(arena_t *arena, size_t size)
+arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	arena_run_t *run;
-	size_t search_size, max_size;
-
-	assert(size == s2u(size));
-	assert(size == PAGE_CEILING(size));
-
-	/*
-	 * Iterate over all size classes that are at least large enough to
-	 * satisfy the request, search for the lowest run of each size class,
-	 * and choose the lowest of the runs found.
-	 */
-	run = NULL;
-	for (search_size = run_quantize_first(size), max_size =
-	    run_quantize(arena_maxclass + large_pad); search_size <= max_size;
-	    search_size = run_quantize_next(search_size)) {
-		arena_run_t *currun;
-		arena_chunk_t *currun_chunk;
-		size_t currun_pageind, currun_size;
-		arena_chunk_map_misc_t *key = (arena_chunk_map_misc_t *)
-		    (search_size | CHUNK_MAP_KEY);
-		arena_chunk_map_misc_t *miscelm =
-		    arena_avail_tree_nsearch(&arena->runs_avail, key);
-		if (miscelm == NULL)
-			break;
-		currun = &miscelm->run;
-		if (run == NULL || (uintptr_t)currun < (uintptr_t)run)
-			run = currun;
-		/* Skip iteration(s) if run is larger than the search size. */
-		currun_chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(currun);
-		currun_pageind = arena_miscelm_to_pageind(miscelm);
-		currun_size = arena_mapbits_unallocated_size_get(currun_chunk,
-		    currun_pageind);
-		assert(currun_size >= search_size);
-		search_size = currun_size;
-	}
-
-	return (run);
+	size_t search_size = run_quantize_first(size);
+	arena_chunk_map_misc_t *key = (arena_chunk_map_misc_t *)
+	    (search_size | CHUNK_MAP_KEY);
+	arena_chunk_map_misc_t *miscelm =
+	    arena_avail_tree_nsearch(&arena->runs_avail, key);
+	if (miscelm == NULL)
+		return (NULL);
+	return (&miscelm->run);
 }
 
 static arena_run_t *
 arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
-	arena_run_t *run = arena_run_first_fit(arena, s2u(size));
+	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
 	if (run != NULL)
 		arena_run_split_large(arena, run, size, zero);
 	return (run);
@@ -1066,7 +1041,7 @@
 static arena_run_t *
 arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind)
 {
-	arena_run_t *run = arena_run_first_fit(arena, size);
+	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL)
 		arena_run_split_small(arena, run, size, binind);
 	return (run);
diff --git a/src/chunk.c b/src/chunk.c
index 7063410..7ec8af9 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -62,46 +62,20 @@
 	}
 }
 
-/* Do first-fit chunk selection. */
+/*
+ * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
+ * fits.
+ */
 static extent_node_t *
-chunk_first_fit(arena_t *arena, extent_tree_t *chunks_szad,
+chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
     extent_tree_t *chunks_ad, size_t size)
 {
-	extent_node_t *node;
-	index_t index;
+	extent_node_t key;
 
 	assert(size == CHUNK_CEILING(size));
 
-	if (size == chunksize) {
-		/*
-		 * Any chunk will suffice, so simply select the one lowest in
-		 * memory.
-		 */
-		return (extent_tree_ad_first(chunks_ad));
-	}
-
-	/*
-	 * Iterate over all size classes that are at least large enough to
-	 * satisfy the request, search for the lowest chunk of each size class,
-	 * and choose the lowest of the chunks found.
-	 */
-	node = NULL;
-	for (index = size2index(size); index < NSIZES;) {
-		extent_node_t *curnode;
-		extent_node_t key;
-		extent_node_init(&key, arena, NULL,
-		    CHUNK_CEILING(index2size(index)), false);
-		curnode = extent_tree_szad_nsearch(chunks_szad, &key);
-		if (curnode == NULL)
-			break;
-		if (node == NULL || (uintptr_t)extent_node_addr_get(curnode) <
-		    (uintptr_t)extent_node_addr_get(node))
-			node = curnode;
-		assert(size2index(extent_node_size_get(curnode)) + 1 > index);
-		index = size2index(extent_node_size_get(curnode)) + 1;
-	}
-
-	return (node);
+	extent_node_init(&key, arena, NULL, size, false);
+	return (extent_tree_szad_nsearch(chunks_szad, &key));
 }
 
 static void *
@@ -127,7 +101,7 @@
 		extent_node_init(&key, arena, new_addr, alloc_size, false);
 		node = extent_tree_ad_search(chunks_ad, &key);
 	} else {
-		node = chunk_first_fit(arena, chunks_szad, chunks_ad,
+		node = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
 		    alloc_size);
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <