Avoid atomic operations for dependent rtree reads.

Bug: 21326736
(cherry picked from commit 831d585c1dad2e6e39804db2a223064d295cb4fe)

Change-Id: I75b5ab51aed356cfb4bff136f38f7188716717cc
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 8093814..c253cdc 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -70,15 +70,15 @@
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_node_t	*chunk_lookup(const void *chunk);
+extent_node_t	*chunk_lookup(const void *chunk, bool dependent);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
 JEMALLOC_INLINE extent_node_t *
-chunk_lookup(const void *chunk)
+chunk_lookup(const void *ptr, bool dependent)
 {
 
-	return (rtree_get(&chunks_rtree, (uintptr_t)chunk));
+	return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent));
 }
 #endif
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 910ebf7..0268245 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -948,7 +948,7 @@
 	extent_node_t *node;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
-	node = chunk_lookup(CHUNK_ADDR2BASE(ptr));
+	node = chunk_lookup(ptr, false);
 	if (node == NULL)
 		return (0);
 	/* Only arena chunks should be looked up via interior pointers. */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 7a8ebfd..28ae9d1 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -114,13 +114,14 @@
 rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
     unsigned level);
-extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm);
+extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
+    bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
     const extent_node_t *val);
 rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
 rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
 
-extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key);
+extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
@@ -179,10 +180,25 @@
 }
 
 JEMALLOC_INLINE extent_node_t *
-rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm)
+rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
-	return (atomic_read_p(&elm->pun));
+	if (dependent) {
+		/*
+		 * Reading a val on behalf of a pointer to a valid allocation is
+		 * guaranteed to be a clean read even without synchronization,
+		 * because the rtree update became visible in memory before the
+		 * pointer came into existence.
+		 */
+		return (elm->val);
+	} else {
+		/*
+		 * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
+		 * dependent on a previous rtree write, which means a stale read
+		 * could result if synchronization were omitted here.
+		 */
+		return (atomic_read_p(&elm->pun));
+	}
 }
 
 JEMALLOC_INLINE void
@@ -216,7 +232,7 @@
 }
 
 JEMALLOC_INLINE extent_node_t *
-rtree_get(rtree_t *rtree, uintptr_t key)
+rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
 	unsigned i, start_level;
@@ -226,7 +242,7 @@
 
 	for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
 	    /**/; i++, node = child) {
-		if (unlikely(!rtree_node_valid(node)))
+		if (!dependent && unlikely(!rtree_node_valid(node)))
 			return (NULL);
 		subkey = rtree_subkey(rtree, key, i);
 		if (i == rtree->height - 1) {
@@ -234,7 +250,8 @@
 			 * node is a leaf, so it contains values rather than
 			 * child pointers.
 			 */
-			return (rtree_val_read(rtree, &node[subkey]));
+			return (rtree_val_read(rtree, &node[subkey],
+			    dependent));
 		}
 		assert(i < rtree->height - 1);
 		child = rtree_child_tryread(&node[subkey]);
diff --git a/src/huge.c b/src/huge.c
index 32af205..6e6824d 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -8,7 +8,7 @@
 {
 	extent_node_t *node;
 
-	node = chunk_lookup(ptr);
+	node = chunk_lookup(ptr, true);
 	assert(!extent_node_achunk_get(node));
 
 	return (node);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 496e03a..3f95554 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -22,7 +22,7 @@
 		rtree_t rtree;
 		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
 		    "Unexpected rtree_new() failure");
-		assert_ptr_null(rtree_get(&rtree, 0),
+		assert_ptr_null(rtree_get(&rtree, 0, false),
 		    "rtree_get() should return NULL for empty tree");
 		rtree_delete(&rtree);
 	}
@@ -40,11 +40,11 @@
 		    "Unexpected rtree_new() failure");
 
 		rtree_set(&rtree, 0, &node_a);
-		assert_ptr_eq(rtree_get(&rtree, 0), &node_a,
+		assert_ptr_eq(rtree_get(&rtree, 0, true), &node_a,
 		    "rtree_get() should return previously set value");
 
 		rtree_set(&rtree, ~((uintptr_t)0), &node_b);
-		assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0)), &node_b,
+		assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0), true), &node_b,
 		    "rtree_get() should return previously set value");
 
 		rtree_delete(&rtree);
@@ -68,15 +68,15 @@
 		for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
 			rtree_set(&rtree, keys[j], &node);
 			for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
-				assert_ptr_eq(rtree_get(&rtree, keys[k]), &node,
-				    "rtree_get() should return previously set "
-				    "value and ignore insignificant key bits; "
-				    "i=%u, j=%u, k=%u, set key=%#"PRIxPTR", "
-				    "get key=%#"PRIxPTR, i, j, k, keys[j],
-				    keys[k]);
+				assert_ptr_eq(rtree_get(&rtree, keys[k], true),
+				    &node, "rtree_get() should return "
+				    "previously set value and ignore "
+				    "insignificant key bits; i=%u, j=%u, k=%u, "
+				    "set key=%#"PRIxPTR", get key=%#"PRIxPTR, i,
+				    j, k, keys[j], keys[k]);
 			}
 			assert_ptr_null(rtree_get(&rtree,
-			    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i))),
+			    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)), false),
 			    "Only leftmost rtree leaf should be set; "
 			    "i=%u, j=%u", i, j);
 			rtree_set(&rtree, keys[j], NULL);
@@ -107,21 +107,21 @@
 		for (j = 0; j < NSET; j++) {
 			keys[j] = (uintptr_t)gen_rand64(sfmt);
 			rtree_set(&rtree, keys[j], &node);
-			assert_ptr_eq(rtree_get(&rtree, keys[j]), &node,
+			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &node,
 			    "rtree_get() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_eq(rtree_get(&rtree, keys[j]), &node,
+			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &node,
 			    "rtree_get() should return previously set value");
 		}
 
 		for (j = 0; j < NSET; j++) {
 			rtree_set(&rtree, keys[j], NULL);
-			assert_ptr_null(rtree_get(&rtree, keys[j]),
+			assert_ptr_null(rtree_get(&rtree, keys[j], true),
 			    "rtree_get() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_null(rtree_get(&rtree, keys[j]),
+			assert_ptr_null(rtree_get(&rtree, keys[j], true),
 			    "rtree_get() should return previously set value");
 		}