Make dlerror(3) thread-safe.

I gave up trying to use the usual thread-local buffer idiom; calls to
calloc(3) and free(3) from any of the "dl" functions -- which live in
the dynamic linker -- end up resolving to the dynamic linker's stubs.
I tried to work around that, but was just making things more complicated.
This alternative costs us a well-known TLS slot (instead of the
dynamically-allocated TLS slot we'd have used otherwise, so no difference
there), plus an extra buffer inside every pthread_internal_t.

Bug: 5404023
Change-Id: Ie9614edd05b6d1eeaf7bf9172792d616c6361767
diff --git a/libc/bionic/pthread.c b/libc/bionic/pthread.c
index da3a551..719bc83 100644
--- a/libc/bionic/pthread.c
+++ b/libc/bionic/pthread.c
@@ -163,20 +163,20 @@
 }
 
 
-void  __init_tls(void**  tls, void*  thread)
-{
-    int  nn;
+void  __init_tls(void** tls, void* thread) {
+  ((pthread_internal_t*) thread)->tls = tls;
 
-    ((pthread_internal_t*)thread)->tls = tls;
+  // Zero-initialize all the slots.
+  for (size_t i = 0; i < BIONIC_TLS_SLOTS; ++i) {
+    tls[i] = NULL;
+  }
 
-    // slot 0 must point to the tls area, this is required by the implementation
-    // of the x86 Linux kernel thread-local-storage
-    tls[TLS_SLOT_SELF]      = (void*)tls;
-    tls[TLS_SLOT_THREAD_ID] = thread;
-    for (nn = TLS_SLOT_ERRNO; nn < BIONIC_TLS_SLOTS; nn++)
-       tls[nn] = 0;
+  // Slot 0 must point to the tls area, this is required by the implementation
+  // of the x86 Linux kernel thread-local-storage.
+  tls[TLS_SLOT_SELF]      = (void*) tls;
+  tls[TLS_SLOT_THREAD_ID] = thread;
 
-    __set_tls( (void*)tls );
+  __set_tls((void*) tls);
 }
 
 
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 58a809a..4bc81ef 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -45,6 +45,13 @@
     int                         internal_flags;
     __pthread_cleanup_t*        cleanup_stack;
     void**                      tls;         /* thread-local storage area */
+
+    /*
+     * The dynamic linker implements dlerror(3), which makes it hard for us to implement this
+     * per-thread buffer by simply using malloc(3) and free(3).
+     */
+#define __BIONIC_DLERROR_BUFFER_SIZE 512
+    char dlerror_buffer[__BIONIC_DLERROR_BUFFER_SIZE];
 } pthread_internal_t;
 
 int _init_thread(pthread_internal_t* thread, pid_t kernel_id, pthread_attr_t* attr,
diff --git a/libc/bionic/strerror.cpp b/libc/bionic/strerror.cpp
index 455dc52..a50c99f 100644
--- a/libc/bionic/strerror.cpp
+++ b/libc/bionic/strerror.cpp
@@ -41,6 +41,6 @@
   }
 
   LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, strerror, NL_TEXTMAX);
-  strerror_r(error_number, strerror_buffer, strerror_buffer_size);
-  return strerror_buffer;
+  strerror_r(error_number, strerror_tls_buffer, strerror_tls_buffer_size);
+  return strerror_tls_buffer;
 }
diff --git a/libc/bionic/strsignal.cpp b/libc/bionic/strsignal.cpp
index 9b046d4..c549e74 100644
--- a/libc/bionic/strsignal.cpp
+++ b/libc/bionic/strsignal.cpp
@@ -42,5 +42,5 @@
   }
 
   LOCAL_INIT_THREAD_LOCAL_BUFFER(char*, strsignal, NL_TEXTMAX);
-  return const_cast<char*>(__strsignal(signal_number, strsignal_buffer, strsignal_buffer_size));
+  return const_cast<char*>(__strsignal(signal_number, strsignal_tls_buffer, strsignal_tls_buffer_size));
 }
diff --git a/libc/bionic/ThreadLocalBuffer.h b/libc/private/ThreadLocalBuffer.h
similarity index 68%
rename from libc/bionic/ThreadLocalBuffer.h
rename to libc/private/ThreadLocalBuffer.h
index 99acdba..1c5e3f4 100644
--- a/libc/bionic/ThreadLocalBuffer.h
+++ b/libc/private/ThreadLocalBuffer.h
@@ -37,23 +37,24 @@
 // TODO: move __cxa_guard_acquire and __cxa_guard_release into libc.
 
 #define GLOBAL_INIT_THREAD_LOCAL_BUFFER(name) \
-  static pthread_once_t name ## _once; \
-  static pthread_key_t name ## _key; \
-  static void name ## _key_destroy(void* buffer) { \
+  static pthread_once_t __bionic_tls_ ## name ## _once; \
+  static pthread_key_t __bionic_tls_ ## name ## _key; \
+  static void __bionic_tls_ ## name ## _key_destroy(void* buffer) { \
     free(buffer); \
   } \
-  static void name ## _key_init() { \
-    pthread_key_create(&name ## _key, name ## _key_destroy); \
+  static void __bionic_tls_ ## name ## _key_init() { \
+    pthread_key_create(&__bionic_tls_ ## name ## _key, __bionic_tls_ ## name ## _key_destroy); \
   }
 
-// Leaves "name_buffer" and "name_byte_count" defined and initialized.
+// Leaves "name_tls_buffer" and "name_tls_buffer_size" defined and initialized.
 #define LOCAL_INIT_THREAD_LOCAL_BUFFER(type, name, byte_count) \
-  pthread_once(&name ## _once, name ## _key_init); \
-  type name ## _buffer = reinterpret_cast<type>(pthread_getspecific(name ## _key)); \
-  if (name ## _buffer == NULL) { \
-    name ## _buffer = reinterpret_cast<type>(malloc(byte_count)); \
-    pthread_setspecific(name ## _key, name ## _buffer); \
+  pthread_once(&__bionic_tls_ ## name ## _once, __bionic_tls_ ## name ## _key_init); \
+  type name ## _tls_buffer = \
+      reinterpret_cast<type>(pthread_getspecific(__bionic_tls_ ## name ## _key)); \
+  if (name ## _tls_buffer == NULL) { \
+    name ## _tls_buffer = reinterpret_cast<type>(calloc(1, byte_count)); \
+    pthread_setspecific(__bionic_tls_ ## name ## _key, name ## _tls_buffer); \
   } \
-  const size_t name ## _buffer_size = byte_count
+  const size_t name ## _tls_buffer_size __attribute__((unused)) = byte_count
 
 #endif // _BIONIC_THREAD_LOCAL_BUFFER_H_included
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 4658866..a626d21 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -52,7 +52,7 @@
  * thread-specific segment descriptor...
  */
 
-/* Well known TLS slots */
+/* Well-known TLS slots. */
 #define TLS_SLOT_SELF               0
 #define TLS_SLOT_THREAD_ID          1
 #define TLS_SLOT_ERRNO              2
@@ -60,23 +60,16 @@
 #define TLS_SLOT_OPENGL_API         3
 #define TLS_SLOT_OPENGL             4
 
-/* this slot is only used to pass information from the dynamic linker to
+#define TLS_SLOT_DLERROR            5
+
+#define TLS_SLOT_MAX_WELL_KNOWN     TLS_SLOT_DLERROR
+
+/* This slot is only used to pass information from the dynamic linker to
  * libc.so when the C library is loaded in to memory. The C runtime init
  * function will then clear it. Since its use is extremely temporary,
  * we reuse an existing location.
  */
-#define  TLS_SLOT_BIONIC_PREINIT    (TLS_SLOT_ERRNO+1)
-
-/* small technical note: it is not possible to call pthread_setspecific
- * on keys that are <= TLS_SLOT_MAX_WELL_KNOWN, which is why it is set to
- * TLS_SLOT_ERRNO.
- *
- * later slots like TLS_SLOT_OPENGL are pre-allocated through the use of
- * TLS_DEFAULT_ALLOC_MAP. this means that there is no need to use
- * pthread_key_create() to initialize them. on the other hand, there is
- * no destructor associated to them (we might need to implement this later)
- */
-#define TLS_SLOT_MAX_WELL_KNOWN     TLS_SLOT_ERRNO
+#define  TLS_SLOT_BIONIC_PREINIT    TLS_SLOT_OPENGL_API
 
 #define TLS_DEFAULT_ALLOC_MAP       0x0000001F
 
diff --git a/linker/dlfcn.cpp b/linker/dlfcn.cpp
index aefdd55..900803c 100644
--- a/linker/dlfcn.cpp
+++ b/linker/dlfcn.cpp
@@ -14,39 +14,52 @@
  * limitations under the License.
  */
 
+#include "linker.h"
+
 #include <dlfcn.h>
 #include <pthread.h>
 #include <stdio.h>
+#include <stdlib.h>
 
+#include <bionic/pthread_internal.h>
+#include <private/bionic_tls.h>
 #include <private/ScopedPthreadMutexLocker.h>
-
-#include "linker.h"
-#include "linker_format.h"
+#include <private/ThreadLocalBuffer.h>
 
 /* This file hijacks the symbols stubbed out in libdl.so. */
 
-static char dl_err_buf[1024];
-static const char* dl_err_str;
-
-#define likely(expr)   __builtin_expect (expr, 1)
-#define unlikely(expr) __builtin_expect (expr, 0)
-
 static pthread_mutex_t gDlMutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER;
 
-static void set_dlerror(const char* msg, const char* detail) {
-  if (detail != NULL) {
-    format_buffer(dl_err_buf, sizeof(dl_err_buf), "%s: %s", msg, detail);
-  } else {
-    format_buffer(dl_err_buf, sizeof(dl_err_buf), "%s", msg);
-  }
-  dl_err_str = (const char*) &dl_err_buf[0];
+static const char* __bionic_set_dlerror(char* new_value) {
+  void* tls = const_cast<void*>(__get_tls());
+  char** dlerror_slot = &reinterpret_cast<char**>(tls)[TLS_SLOT_DLERROR];
+
+  const char* old_value = *dlerror_slot;
+  *dlerror_slot = new_value;
+  return old_value;
 }
 
-void *dlopen(const char* filename, int flag) {
+static void __bionic_format_dlerror(const char* msg, const char* detail) {
+  char* buffer = __get_thread()->dlerror_buffer;
+  strlcpy(buffer, msg, __BIONIC_DLERROR_BUFFER_SIZE);
+  if (detail != NULL) {
+    strlcat(buffer, ": ", __BIONIC_DLERROR_BUFFER_SIZE);
+    strlcat(buffer, detail, __BIONIC_DLERROR_BUFFER_SIZE);
+  }
+
+  __bionic_set_dlerror(buffer);
+}
+
+const char* dlerror() {
+  const char* old_value = __bionic_set_dlerror(NULL);
+  return old_value;
+}
+
+void* dlopen(const char* filename, int flag) {
   ScopedPthreadMutexLocker locker(&gDlMutex);
   soinfo* result = find_library(filename);
   if (result == NULL) {
-    set_dlerror("dlopen failed", linker_get_error());
+    __bionic_format_dlerror("dlopen failed", linker_get_error());
     return NULL;
   }
   soinfo_call_constructors(result);
@@ -54,21 +67,15 @@
   return result;
 }
 
-const char* dlerror() {
-  const char* old_value = dl_err_str;
-  dl_err_str = NULL;
-  return (const char*) old_value;
-}
-
 void* dlsym(void* handle, const char* symbol) {
   ScopedPthreadMutexLocker locker(&gDlMutex);
 
-  if (unlikely(handle == 0)) {
-    set_dlerror("dlsym library handle is null", NULL);
+  if (handle == NULL) {
+    __bionic_format_dlerror("dlsym library handle is null", NULL);
     return NULL;
   }
-  if (unlikely(symbol == 0)) {
-    set_dlerror("dlsym symbol name is null", NULL);
+  if (symbol == NULL) {
+    __bionic_format_dlerror("dlsym symbol name is null", NULL);
     return NULL;
   }
 
@@ -89,18 +96,18 @@
     sym = soinfo_lookup(found, symbol);
   }
 
-  if (likely(sym != 0)) {
+  if (sym != NULL) {
     unsigned bind = ELF32_ST_BIND(sym->st_info);
 
-    if (likely((bind == STB_GLOBAL) && (sym->st_shndx != 0))) {
+    if (bind == STB_GLOBAL && sym->st_shndx != 0) {
       unsigned ret = sym->st_value + found->load_bias;
       return (void*) ret;
     }
 
-    set_dlerror("symbol found but not global", symbol);
+    __bionic_format_dlerror("symbol found but not global", symbol);
     return NULL;
   } else {
-    set_dlerror("undefined symbol", symbol);
+    __bionic_format_dlerror("undefined symbol", symbol);
     return NULL;
   }
 }
@@ -159,7 +166,7 @@
       /* st_other */ 0, \
       shndx }
 
-static Elf32_Sym libdl_symtab[] = {
+static Elf32_Sym gLibDlSymtab[] = {
   // Total length of libdl_info.strtab, including trailing 0.
   // This is actually the STH_UNDEF entry. Technically, it's
   // supposed to have st_name == 0, but instead, it points to an index
@@ -179,24 +186,24 @@
 
 // Fake out a hash table with a single bucket.
 // A search of the hash table will look through
-// libdl_symtab starting with index [1], then
-// use libdl_chains to find the next index to
-// look at.  libdl_chains should be set up to
-// walk through every element in libdl_symtab,
+// gLibDlSymtab starting with index [1], then
+// use gLibDlChains to find the next index to
+// look at.  gLibDlChains should be set up to
+// walk through every element in gLibDlSymtab,
 // and then end with 0 (sentinel value).
 //
-// That is, libdl_chains should look like
+// That is, gLibDlChains should look like
 // { 0, 2, 3, ... N, 0 } where N is the number
-// of actual symbols, or nelems(libdl_symtab)-1
-// (since the first element of libdl_symtab is not
+// of actual symbols, or nelems(gLibDlSymtab)-1
+// (since the first element of gLibDlSymtab is not
 // a real symbol).
 //
 // (see soinfo_elf_lookup())
 //
 // Note that adding any new symbols here requires
 // stubbing them out in libdl.
-static unsigned libdl_buckets[1] = { 1 };
-static unsigned libdl_chains[7] = { 0, 2, 3, 4, 5, 6, 0 };
+static unsigned gLibDlBuckets[1] = { 1 };
+static unsigned gLibDlChains[7] = { 0, 2, 3, 4, 5, 6, 0 };
 
 // This is used by the dynamic linker. Every process gets these symbols for free.
 soinfo libdl_info = {
@@ -210,12 +217,12 @@
     flags: FLAG_LINKED,
 
     strtab: ANDROID_LIBDL_STRTAB,
-    symtab: libdl_symtab,
+    symtab: gLibDlSymtab,
 
     nbucket: 1,
     nchain: 7,
-    bucket: libdl_buckets,
-    chain: libdl_chains,
+    bucket: gLibDlBuckets,
+    chain: gLibDlChains,
 
     plt_got: 0, plt_rel: 0, plt_rel_count: 0, rel: 0, rel_count: 0,
     preinit_array: 0, preinit_array_count: 0, init_array: 0, init_array_count: 0,
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 6e1c604..4ffa1e7 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -168,16 +168,15 @@
         ERROR(fmt "\n", ##x); \
     } while(0)
 
-const char *linker_get_error(void)
-{
-    return (const char *)&__linker_dl_err_buf[0];
+const char* linker_get_error() {
+  return &__linker_dl_err_buf[0];
 }
 
 /*
  * This function is an empty stub where GDB locates a breakpoint to get notified
  * about linker activity.
  */
-extern "C" void __attribute__((noinline)) __attribute__((visibility("default"))) rtld_db_dlactivity(void);
+extern "C" void __attribute__((noinline)) __attribute__((visibility("default"))) rtld_db_dlactivity();
 
 static r_debug _r_debug = {1, NULL, &rtld_db_dlactivity,
                                   RT_CONSISTENT, 0};
@@ -1363,8 +1362,7 @@
 
 /* Force any of the closed stdin, stdout and stderr to be associated with
    /dev/null. */
-static int nullify_closed_stdio (void)
-{
+static int nullify_closed_stdio() {
     int dev_null, i, status;
     int return_value = 0;
 
@@ -2056,5 +2054,8 @@
 
     // We have successfully fixed our own relocations. It's safe to run
     // the main part of the linker now.
-    return __linker_init_post_relocation(elfdata, linker_addr);
+    unsigned start_address = __linker_init_post_relocation(elfdata, linker_addr);
+
+    // Return the address that the calling assembly stub should jump to.
+    return start_address;
 }
diff --git a/tests/dlopen_test.cpp b/tests/dlopen_test.cpp
index 5ef32ad..5b5c7f6 100644
--- a/tests/dlopen_test.cpp
+++ b/tests/dlopen_test.cpp
@@ -58,6 +58,27 @@
 #endif
 }
 
+static void* ConcurrentDlErrorFn(void* arg) {
+  dlopen("/child/thread", RTLD_NOW);
+  return reinterpret_cast<void*>(strdup(dlerror()));
+}
+
+TEST(dlopen, dlerror_concurrent) {
+  dlopen("/main/thread", RTLD_NOW);
+  const char* main_thread_error = dlerror();
+  ASSERT_SUBSTR("/main/thread", main_thread_error);
+
+  pthread_t t;
+  ASSERT_EQ(0, pthread_create(&t, NULL, ConcurrentDlErrorFn, NULL));
+  void* result;
+  ASSERT_EQ(0, pthread_join(t, &result));
+  char* child_thread_error = static_cast<char*>(result);
+  ASSERT_SUBSTR("/child/thread", child_thread_error);
+  free(child_thread_error);
+
+  ASSERT_SUBSTR("/main/thread", main_thread_error);
+}
+
 TEST(dlopen, dlsym_failures) {
   dlerror(); // Clear any pending errors.
   void* self = dlopen(NULL, RTLD_NOW);
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index ea1491c..47469d8 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -29,7 +29,7 @@
   ASSERT_STREQ("Unknown error 1234", strerror(1234));
 }
 
-void* ConcurrentStrErrorFn(void* arg) {
+static void* ConcurrentStrErrorFn(void* arg) {
   bool equal = (strcmp("Unknown error 2002", strerror(2002)) == 0);
   return reinterpret_cast<void*>(equal);
 }
@@ -88,7 +88,7 @@
   ASSERT_STREQ("Unknown signal 1234", strsignal(1234)); // Too large.
 }
 
-void* ConcurrentStrSignalFn(void* arg) {
+static void* ConcurrentStrSignalFn(void* arg) {
   bool equal = (strcmp("Unknown signal 2002", strsignal(2002)) == 0);
   return reinterpret_cast<void*>(equal);
 }