Avoid using malloc debug code after exit.

I wrote a new unit test that would fail on the old version of the
code.

On a walleye big cpu, this costs about 40ns-50ns (going from ~430ns to ~480ns).
I think this is an acceptable performance degradation.

Bug: 131867816

Test: New unit tests pass.
Change-Id: I4c0f4373fb0694bf29c3824dbb1224a8a17e211e
Merged-In: I4c0f4373fb0694bf29c3824dbb1224a8a17e211e
(cherry picked from commit d269fcc935b276502b9e47a575d76693fe1b8455)
diff --git a/libc/malloc_debug/malloc_debug.cpp b/libc/malloc_debug/malloc_debug.cpp
index 91e1d26..53fcead 100644
--- a/libc/malloc_debug/malloc_debug.cpp
+++ b/libc/malloc_debug/malloc_debug.cpp
@@ -29,6 +29,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <malloc.h>
+#include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -103,6 +104,32 @@
 __END_DECLS
 // ------------------------------------------------------------------------
 
+class ScopedConcurrentLock {
+ public:
+  ScopedConcurrentLock() {
+    pthread_rwlock_rdlock(&lock_);
+  }
+  ~ScopedConcurrentLock() {
+    pthread_rwlock_unlock(&lock_);
+  }
+
+  static void Init() {
+    pthread_rwlockattr_t attr;
+    // Set the attribute so that when a write lock is pending, read locks are no
+    // longer granted.
+    pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+    pthread_rwlock_init(&lock_, &attr);
+  }
+
+  static void BlockAllOperations() {
+    pthread_rwlock_wrlock(&lock_);
+  }
+
+ private:
+  static pthread_rwlock_t lock_;
+};
+pthread_rwlock_t ScopedConcurrentLock::lock_;
+
 static void InitAtfork() {
   static pthread_once_t atfork_init = PTHREAD_ONCE_INIT;
   pthread_once(&atfork_init, []() {
@@ -257,6 +284,8 @@
     info_log("%s: malloc debug enabled", getprogname());
   }
 
+  ScopedConcurrentLock::Init();
+
   return true;
 }
 
@@ -265,6 +294,10 @@
     return;
   }
 
+  // Make sure that there are no other threads doing debug allocations
+  // before we kill everything.
+  ScopedConcurrentLock::BlockAllOperations();
+
   // Turn off capturing allocations calls.
   DebugDisableSet(true);
 
@@ -292,6 +325,8 @@
 
 void debug_get_malloc_leak_info(uint8_t** info, size_t* overall_size, size_t* info_size,
                                 size_t* total_memory, size_t* backtrace_size) {
+  ScopedConcurrentLock lock;
+
   ScopedDisableDebugCalls disable;
 
   // Verify the arguments.
@@ -325,6 +360,7 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return g_dispatch->malloc_usable_size(pointer);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (!VerifyPointer(pointer, "malloc_usable_size")) {
@@ -388,6 +424,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->malloc(size);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   void* pointer = InternalMalloc(size);
@@ -463,6 +500,7 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return g_dispatch->free(pointer);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (g_debug->config().options() & RECORD_ALLOCS) {
@@ -480,6 +518,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->memalign(alignment, bytes);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (bytes == 0) {
@@ -558,6 +597,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->realloc(pointer, bytes);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (pointer == nullptr) {
@@ -676,6 +716,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->calloc(nmemb, bytes);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   size_t size;
@@ -737,6 +778,8 @@
   if (DebugCallsDisabled() || !g_debug->TrackPointers()) {
     return g_dispatch->malloc_info(options, fp);
   }
+  ScopedConcurrentLock lock;
+  ScopedDisableDebugCalls disable;
 
   MallocXmlElem root(fp, "malloc", "version=\"debug-malloc-1\"");
   std::vector<ListInfoType> list;
@@ -786,6 +829,7 @@
 
 int debug_iterate(uintptr_t base, size_t size, void (*callback)(uintptr_t, size_t, void*),
                   void* arg) {
+  ScopedConcurrentLock lock;
   if (g_debug->TrackPointers()) {
     // Since malloc is disabled, don't bother acquiring any locks.
     for (auto it = PointerData::begin(); it != PointerData::end(); ++it) {
@@ -800,6 +844,7 @@
 }
 
 void debug_malloc_disable() {
+  ScopedConcurrentLock lock;
   g_dispatch->malloc_disable();
   if (g_debug->pointer) {
     g_debug->pointer->PrepareFork();
@@ -807,6 +852,7 @@
 }
 
 void debug_malloc_enable() {
+  ScopedConcurrentLock lock;
   if (g_debug->pointer) {
     g_debug->pointer->PostForkParent();
   }
@@ -817,6 +863,7 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return 0;
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (!(g_debug->config().options() & BACKTRACE)) {
@@ -870,6 +917,7 @@
 }
 
 bool debug_write_malloc_leak_info(FILE* fp) {
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   std::lock_guard<std::mutex> guard(g_dump_lock);
@@ -883,6 +931,7 @@
 }
 
 void debug_dump_heap(const char* file_name) {
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   std::lock_guard<std::mutex> guard(g_dump_lock);
diff --git a/libc/malloc_debug/tests/malloc_debug_system_tests.cpp b/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
index 71e8ebf..f85c45b 100644
--- a/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
+++ b/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
@@ -42,13 +42,15 @@
 #include <log/log.h>
 
 #include <string>
+#include <thread>
 #include <vector>
 
 #include "private/bionic_malloc.h"
 
-static constexpr time_t kTimeoutSeconds = 5;
+static constexpr time_t kTimeoutSeconds = 10;
 
-static void Exec(const char* test_name, const char* debug_options, pid_t* pid) {
+static void Exec(const char* test_name, const char* debug_options, pid_t* pid, int exit_code = 0,
+                 time_t timeout_seconds = kTimeoutSeconds) {
   int fds[2];
   ASSERT_NE(-1, pipe(fds));
   ASSERT_NE(-1, fcntl(fds[0], F_SETFL, O_NONBLOCK));
@@ -94,7 +96,8 @@
       output.append(buffer.data(), bytes);
     }
 
-    if ((time(nullptr) - start_time) > kTimeoutSeconds) {
+    if ((time(nullptr) - start_time) > timeout_seconds) {
+      kill(*pid, SIGINT);
       break;
     }
   }
@@ -109,7 +112,7 @@
       done = true;
       break;
     }
-    if ((time(nullptr) - start_time) > kTimeoutSeconds) {
+    if ((time(nullptr) - start_time) > timeout_seconds) {
       break;
     }
   }
@@ -119,21 +122,23 @@
     while (true) {
       int kill_status;
       int wait_pid = waitpid(*pid, &kill_status, WNOHANG);
-      if (wait_pid == *pid || (time(nullptr) - start_time) > kTimeoutSeconds) {
+      if (wait_pid == *pid || (time(nullptr) - start_time) > timeout_seconds) {
         break;
       }
     }
   }
 
   ASSERT_TRUE(done) << "Timed out waiting for waitpid, output:\n" << output;
-  ASSERT_EQ(0, WEXITSTATUS(status)) << "Output:\n" << output;
+  ASSERT_FALSE(WIFSIGNALED(status))
+      << "Failed with signal " << WTERMSIG(status) << "\nOutput:\n" << output;
+  ASSERT_EQ(exit_code, WEXITSTATUS(status)) << "Output:\n" << output;
 }
 
-static void GetLogStr(pid_t pid, std::string* log_str) {
+static void GetLogStr(pid_t pid, std::string* log_str, log_id log = LOG_ID_MAIN) {
   log_str->clear();
 
   logger_list* list;
-  list = android_logger_list_open(LOG_ID_MAIN, ANDROID_LOG_RDONLY | ANDROID_LOG_NONBLOCK, 1000, pid);
+  list = android_logger_list_open(log, ANDROID_LOG_RDONLY | ANDROID_LOG_NONBLOCK, 1000, pid);
   ASSERT_TRUE(list != nullptr);
 
   while (true) {
@@ -168,7 +173,8 @@
   android_logger_list_close(list);
 }
 
-static void FindStrings(pid_t pid, std::vector<const char*> match_strings) {
+static void FindStrings(pid_t pid, std::vector<const char*> match_strings,
+                        time_t timeout_seconds = kTimeoutSeconds) {
   std::string log_str;
   time_t start = time(nullptr);
   bool found_all;
@@ -184,7 +190,7 @@
     if (found_all) {
       return;
     }
-    if ((time(nullptr) - start) > kTimeoutSeconds) {
+    if ((time(nullptr) - start) > timeout_seconds) {
       break;
     }
   }
@@ -414,3 +420,47 @@
 TEST(MallocDebugSystemTest, verify_leak_allocation_limit) {
   VerifyLeak("leak_memory_limit_");
 }
+
+static constexpr int kExpectedExitCode = 30;
+
+TEST(MallocTests, DISABLED_exit_while_threads_allocating) {
+  std::atomic_uint32_t thread_mask;
+  thread_mask = 0;
+
+  for (size_t i = 0; i < 32; i++) {
+    std::thread malloc_thread([&thread_mask, i] {
+      while (true) {
+        void* ptr = malloc(100);
+        if (ptr == nullptr) {
+          exit(1000);
+        }
+        free(ptr);
+        thread_mask.fetch_or(1 << i);
+      }
+    });
+    malloc_thread.detach();
+  }
+
+  // Wait until each thread has done at least one allocation.
+  while (thread_mask.load() != 0xffffffff)
+    ;
+  exit(kExpectedExitCode);
+}
+
+// Verify that exiting while other threads are doing malloc operations,
+// that there are no crashes.
+TEST(MallocDebugSystemTest, exit_while_threads_allocating) {
+  for (size_t i = 0; i < 100; i++) {
+    SCOPED_TRACE(::testing::Message() << "Run " << i);
+    pid_t pid;
+    ASSERT_NO_FATAL_FAILURE(Exec("MallocTests.DISABLED_exit_while_threads_allocating",
+                                 "verbose backtrace", &pid, kExpectedExitCode));
+
+    ASSERT_NO_FATAL_FAILURE(FindStrings(pid, std::vector<const char*>{"malloc debug enabled"}));
+
+    std::string log_str;
+    GetLogStr(pid, &log_str, LOG_ID_CRASH);
+    ASSERT_TRUE(log_str.find("Fatal signal") == std::string::npos)
+        << "Found crash in log.\nLog message: " << log_str;
+  }
+}