Delay emutls deallocation for one round
With Android/Bionic, delay deallocation to round 2 of 4. It must run after
C++ thread_local destructors have been called, but before the final 2
rounds, because emutls calls free, and jemalloc then needs another 2
rounds to free its thread-specific data.
Bug: https://github.com/android-ndk/ndk/issues/687
Bug: b/78022094
Test: manual
Test: ./run_tests.py --rebuild --filter emutls-dealloc
Change-Id: I01bd634d97b7d22161b5cc8ca71b3cb94064a03e
diff --git a/gcc-4.9/libgcc/emutls.c b/gcc-4.9/libgcc/emutls.c
index cba9c3b..525db28 100644
--- a/gcc-4.9/libgcc/emutls.c
+++ b/gcc-4.9/libgcc/emutls.c
@@ -30,6 +30,22 @@
#include "libgcc_tm.h"
#include "gthr.h"
+#ifdef __BIONIC__
+/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
+ to round 2. We need to delay deallocation because:
+ - Android versions older than M lack __cxa_thread_atexit_impl, so apps
+ use a pthread key destructor to call C++ destructors.
+ - Apps might use __thread/thread_local variables in pthread destructors.
+ We can't wait until the final two rounds, because jemalloc needs two rounds
+ after the final malloc/free call to free its thread-specific data (see
+ https://reviews.llvm.org/D46978#1107507). Bugs:
+ - https://github.com/android-ndk/ndk/issues/687.
+ - http://b/16847284, http://b/78022094. */
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
+#else
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
+#endif
+
typedef unsigned int word __attribute__((mode(word)));
typedef unsigned int pointer __attribute__((mode(pointer)));
@@ -46,6 +62,7 @@
struct __emutls_array
{
+ pointer skip_destructor_rounds;
pointer size;
void **data[];
};
@@ -67,16 +84,30 @@
emutls_destroy (void *ptr)
{
struct __emutls_array *arr = ptr;
- pointer size = arr->size;
- pointer i;
- for (i = 0; i < size; ++i)
+ /* emutls is deallocated using a pthread key destructor. These destructors
+ are called in several rounds to accommodate destructor functions that
+ (re)initialize key values with pthread_setspecific. Delay the emutls
+ deallocation to accommodate other end-of-thread cleanup tasks like
+ calling thread_local destructors. */
+ if (arr->skip_destructor_rounds > 0)
{
- if (arr->data[i])
- free (arr->data[i][-1]);
+ arr->skip_destructor_rounds--;
+ __gthread_setspecific (emutls_key, (void *) arr);
}
+ else
+ {
+ pointer size = arr->size;
+ pointer i;
- free (ptr);
+ for (i = 0; i < size; ++i)
+ {
+ if (arr->data[i])
+ free (arr->data[i][-1]);
+ }
+
+ free (ptr);
+ }
}
static void
@@ -163,12 +194,14 @@
}
struct __emutls_array *arr = __gthread_getspecific (emutls_key);
+ const pointer hdr_size = sizeof (struct __emutls_array) / sizeof (void *);
if (__builtin_expect (arr == NULL, 0))
{
pointer size = offset + 32;
- arr = calloc (size + 1, sizeof (void *));
+ arr = calloc (size + hdr_size, sizeof (void *));
if (arr == NULL)
abort ();
+ arr->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
arr->size = size;
__gthread_setspecific (emutls_key, (void *) arr);
}
@@ -178,7 +211,7 @@
pointer size = orig_size * 2;
if (offset > size)
size = offset + 32;
- arr = realloc (arr, (size + 1) * sizeof (void *));
+ arr = realloc (arr, (size + hdr_size) * sizeof (void *));
if (arr == NULL)
abort ();
arr->size = size;