Do a second key cleanup in pthread_exit.

During pthread_exit, the keys are cleaned. Unfortunately, a call to
free occurs after the cleanup and the memory for some of the keys
is recreated when using jemalloc. The solution is to do the key
cleanup twice.

Also, modify the pthread_detach__leak test to be less flaky
when run on a jemalloc system.

Bug: 16513133
Change-Id: Ic17e8344bdc1ba053c4f5b6d827a4c19c57860c1
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index a6bb363..6cd5311 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -112,6 +112,12 @@
   }
   pthread_mutex_unlock(&g_thread_list_lock);
 
+  // Perform a second key cleanup. When using jemalloc, a call to free from
+  // _pthread_internal_remove_locked causes the memory associated with a key
+  // to be reallocated.
+  // TODO: When b/16847284 is fixed this call can be removed.
+  pthread_key_clean_all();
+
   if (user_allocated_stack) {
     // Cleaning up this thread's stack is the creator's responsibility, not ours.
     __exit(0);
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 4da003f..5328e48 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -400,27 +400,36 @@
 }
 
 TEST(pthread, pthread_detach__leak) {
-  size_t initial_bytes = mallinfo().uordblks;
+  size_t initial_bytes = 0;
+  // Run this loop more than once since the first loop causes some memory
+  // to be allocated permenantly. Run an extra loop to help catch any subtle
+  // memory leaks.
+  for (size_t loop = 0; loop < 3; loop++) {
+    // Set the initial bytes on the second loop since the memory in use
+    // should have stabilized.
+    if (loop == 1) {
+      initial_bytes = mallinfo().uordblks;
+    }
 
-  pthread_attr_t attr;
-  ASSERT_EQ(0, pthread_attr_init(&attr));
-  ASSERT_EQ(0, pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE));
+    pthread_attr_t attr;
+    ASSERT_EQ(0, pthread_attr_init(&attr));
+    ASSERT_EQ(0, pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE));
 
-  std::vector<pthread_t> threads;
-  for (size_t i = 0; i < 32; ++i) {
-    pthread_t t;
-    ASSERT_EQ(0, pthread_create(&t, &attr, IdFn, NULL));
-    threads.push_back(t);
-  }
+    std::vector<pthread_t> threads;
+    for (size_t i = 0; i < 32; ++i) {
+      pthread_t t;
+      ASSERT_EQ(0, pthread_create(&t, &attr, IdFn, NULL));
+      threads.push_back(t);
+    }
 
-  sleep(1);
+    sleep(1);
 
-  for (size_t i = 0; i < 32; ++i) {
-    ASSERT_EQ(0, pthread_detach(threads[i])) << i;
+    for (size_t i = 0; i < 32; ++i) {
+      ASSERT_EQ(0, pthread_detach(threads[i])) << i;
+    }
   }
 
   size_t final_bytes = mallinfo().uordblks;
-
   int leaked_bytes = (final_bytes - initial_bytes);
 
   // User code (like this test) doesn't know how large pthread_internal_t is.