Clear pthread_internal_t allocated on user provided stack.

Several parts in pthread_internal_t should be initialized
to zero, like tls, key_data and thread_local_dtors. So
just clear the whole pthread_internal_t is more convenient.

Bug: 25990348
Change-Id: Ibb6d1200ea5e6e1afbc77971f179197e8239f6ea
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index ce43009..34826db 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -53,13 +53,6 @@
 
 // This code is used both by each new pthread and the code that initializes the main thread.
 void __init_tls(pthread_internal_t* thread) {
-  if (thread->mmap_size == 0) {
-    // If the TLS area was not allocated by mmap(), it may not have been cleared to zero.
-    // So assume the worst and zero the TLS area.
-    memset(thread->tls, 0, sizeof(thread->tls));
-    memset(thread->key_data, 0, sizeof(thread->key_data));
-  }
-
   // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
   thread->tls[TLS_SLOT_SELF] = thread->tls;
   thread->tls[TLS_SLOT_THREAD_ID] = thread;
@@ -175,6 +168,11 @@
                 (reinterpret_cast<uintptr_t>(stack_top) - sizeof(pthread_internal_t)) & ~0xf);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
+  if (mmap_size == 0) {
+    // If thread was not allocated by mmap(), it may not have been cleared to zero.
+    // So assume the worst and zero it.
+    memset(thread, 0, sizeof(pthread_internal_t));
+  }
   attr->stack_size = stack_top - reinterpret_cast<uint8_t*>(attr->stack_base);
 
   thread->mmap_size = mmap_size;