Split main thread init into early+late functions

Split __libc_init_main_thread into __libc_init_main_thread_early and
__libc_init_main_thread_late. The early function is called very early in
the startup of the dynamic linker and static executables. It initializes
the global auxv pointer and enough TLS memory to do system calls, access
errno, and run -fstack-protector code (but with a zero cookie because the
code for generating a cookie is complex).

After the linker is relocated, __libc_init_main_thread_late finishes
thread initialization.

Bug: none
Test: bionic unit tests
Change-Id: I6fcd8d7587a380f8bd649c817b40a3a6cc1d2ee0
Merged-In: I6fcd8d7587a380f8bd649c817b40a3a6cc1d2ee0
(cherry picked from commit 39bc44bb0e03514e8d92f8c0ceb0b5901e27a485)
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index be9d32e..cef3d7d 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -36,6 +36,7 @@
 #include "private/bionic_ssp.h"
 #include "pthread_internal.h"
 
+extern "C" pid_t __getpid();
 extern "C" int __set_tid_address(int* tid_address);
 
 // Declared in "private/bionic_ssp.h".
@@ -61,23 +62,36 @@
 // -fno-stack-protector because it's responsible for setting up the main
 // thread's TLS (which stack protector relies on).
 
+// Do enough setup to:
+//  - Let the dynamic linker invoke system calls (and access errno)
+//  - Ensure that TLS access functions (__get_{tls,thread}) never return NULL
+//  - Allow the stack protector to work (with a zero cookie)
+// Avoid doing much more because, when this code is called within the dynamic
+// linker, the linker binary hasn't been relocated yet, so certain kinds of code
+// are hazardous, such as accessing non-hidden global variables.
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void __libc_init_main_thread(KernelArgumentBlock& args) {
+void __libc_init_main_thread_early(KernelArgumentBlock& args) {
   __libc_auxv = args.auxv;
 #if defined(__i386__)
   __libc_init_sysinfo(args);
 #endif
-
-  // The -fstack-protector implementation uses TLS, so make sure that's
-  // set up before we call any function that might get a stack check inserted.
-  // TLS also needs to be set up before errno (and therefore syscalls) can be used.
   __set_tls(main_thread.tls);
-  if (!__init_tls(&main_thread)) async_safe_fatal("failed to initialize TLS: %s", strerror(errno));
+  __init_tls(&main_thread);
+  main_thread.tid = __getpid();
+  main_thread.set_cached_pid(main_thread.tid);
+}
+
+// Finish initializing the main thread.
+__BIONIC_WEAK_FOR_NATIVE_BRIDGE
+void __libc_init_main_thread_late(KernelArgumentBlock& args) {
+  main_thread.bionic_tls = __allocate_bionic_tls();
+  if (main_thread.bionic_tls == nullptr) {
+    // Avoid strerror because it might need bionic_tls.
+    async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
+  }
 
   // Tell the kernel to clear our tid field when we exit, so we're like any other pthread.
-  // As a side-effect, this tells us our pid (which is the same as the main thread's tid).
-  main_thread.tid = __set_tid_address(&main_thread.tid);
-  main_thread.set_cached_pid(main_thread.tid);
+  __set_tid_address(&main_thread.tid);
 
   // We don't want to free the main thread's stack even when the main thread exits
   // because things like environment variables with global scope live on it.
@@ -97,7 +111,7 @@
   // before we initialize the TLS. Dynamic executables will initialize their copy of the global
   // stack protector from the one in the main thread's TLS.
   __libc_safe_arc4random_buf(&__stack_chk_guard, sizeof(__stack_chk_guard), args);
-  __init_thread_stack_guard(&main_thread);
+  __init_tls_stack_guard(&main_thread);
 
   __init_thread(&main_thread);
 
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 51fbe07..e79d3b3 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -95,14 +95,12 @@
                                         structors_array_t const * const structors) {
   BIONIC_STOP_UNWIND;
 
+  // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-  __libc_shared_globals()->init_progname = args.argv[0];
-
-  // Initializing the globals requires TLS to be available for errno.
-  __libc_init_main_thread(args);
-
+  __libc_init_main_thread_early(args);
+  __libc_init_main_thread_late(args);
   __libc_init_globals(args);
-
+  __libc_shared_globals()->init_progname = args.argv[0];
   __libc_init_AT_SECURE(args.envp);
   __libc_init_common();
 
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 6f632e8..8e8d180 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -53,39 +53,43 @@
 #endif
 
 // This code is used both by each new pthread and the code that initializes the main thread.
-bool __init_tls(pthread_internal_t* thread) {
+__attribute__((no_stack_protector))
+void __init_tls(pthread_internal_t* thread) {
   // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
   thread->tls[TLS_SLOT_SELF] = thread->tls;
   thread->tls[TLS_SLOT_THREAD_ID] = thread;
+}
 
+__attribute__((no_stack_protector))
+void __init_tls_stack_guard(pthread_internal_t* thread) {
+  // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
+  thread->tls[TLS_SLOT_STACK_GUARD] = reinterpret_cast<void*>(__stack_chk_guard);
+}
+
+bionic_tls* __allocate_bionic_tls() {
   // Add a guard before and after.
   size_t allocation_size = BIONIC_TLS_SIZE + (2 * PTHREAD_GUARD_SIZE);
   void* allocation = mmap(nullptr, allocation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (allocation == MAP_FAILED) {
     async_safe_format_log(ANDROID_LOG_WARN, "libc",
                           "pthread_create failed: couldn't allocate TLS: %s", strerror(errno));
-    return false;
+    return nullptr;
   }
 
   prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, allocation, allocation_size, "bionic TLS guard");
 
   // Carve out the writable TLS section.
-  thread->bionic_tls = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) +
+  bionic_tls* result = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) +
                                                      PTHREAD_GUARD_SIZE);
-  if (mprotect(thread->bionic_tls, BIONIC_TLS_SIZE, PROT_READ | PROT_WRITE) != 0) {
+  if (mprotect(result, BIONIC_TLS_SIZE, PROT_READ | PROT_WRITE) != 0) {
     async_safe_format_log(ANDROID_LOG_WARN, "libc",
                           "pthread_create failed: couldn't mprotect TLS: %s", strerror(errno));
     munmap(allocation, allocation_size);
-    return false;
+    return nullptr;
   }
 
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, thread->bionic_tls, BIONIC_TLS_SIZE, "bionic TLS");
-  return true;
-}
-
-void __init_thread_stack_guard(pthread_internal_t* thread) {
-  // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
-  thread->tls[TLS_SLOT_STACK_GUARD] = reinterpret_cast<void*>(__stack_chk_guard);
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, result, BIONIC_TLS_SIZE, "bionic TLS");
+  return result;
 }
 
 static void __init_alternate_signal_stack(pthread_internal_t* thread) {
@@ -255,11 +259,15 @@
 
   thread->mmap_size = mmap_size;
   thread->attr = *attr;
-  if (!__init_tls(thread)) {
+
+  thread->bionic_tls = __allocate_bionic_tls();
+  if (thread->bionic_tls == nullptr) {
     if (thread->mmap_size != 0) munmap(thread->attr.stack_base, thread->mmap_size);
     return EAGAIN;
   }
-  __init_thread_stack_guard(thread);
+
+  __init_tls(thread);
+  __init_tls_stack_guard(thread);
 
   *threadp = thread;
   *child_stack = stack_top;
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 81b885a..5a5318d 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -152,10 +152,11 @@
   pthread_key_data_t key_data[BIONIC_PTHREAD_KEY_COUNT];
 };
 
-__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
-__LIBC_HIDDEN__ bool __init_tls(pthread_internal_t* thread);
-__LIBC_HIDDEN__ void __init_thread_stack_guard(pthread_internal_t* thread);
+__LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
+__LIBC_HIDDEN__ void __init_tls_stack_guard(pthread_internal_t* thread);
+__LIBC_HIDDEN__ bionic_tls* __allocate_bionic_tls();
 __LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
+__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
 
 __LIBC_HIDDEN__ pthread_t           __pthread_internal_add(pthread_internal_t* thread);
 __LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id);
diff --git a/libc/private/bionic_auxv.h b/libc/private/bionic_auxv.h
index 53fcc49..494fe9c 100644
--- a/libc/private/bionic_auxv.h
+++ b/libc/private/bionic_auxv.h
@@ -34,7 +34,7 @@
 
 __BEGIN_DECLS
 
-extern ElfW(auxv_t)* __libc_auxv;
+__LIBC_HIDDEN__ extern ElfW(auxv_t)* __libc_auxv;
 
 __END_DECLS
 
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index de086f2..f65de34 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -132,7 +132,8 @@
 
 #if defined(__cplusplus)
 class KernelArgumentBlock;
-extern void __libc_init_main_thread(KernelArgumentBlock&);
+extern void __libc_init_main_thread_early(KernelArgumentBlock& args);
+extern void __libc_init_main_thread_late(KernelArgumentBlock& args);
 #endif
 
 #endif /* __BIONIC_PRIVATE_BIONIC_TLS_H_ */
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index aa12b6e..b384ce4 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -559,11 +559,9 @@
  * function, or other GOT reference will generate a segfault.
  */
 extern "C" ElfW(Addr) __linker_init(void* raw_args) {
+  // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-
-#if defined(__i386__)
-  __libc_init_sysinfo(args);
-#endif
+  __libc_init_main_thread_early(args);
 
   // When the linker is run by itself (rather than as an interpreter for
   // another program), AT_BASE is 0.
@@ -622,8 +620,8 @@
  */
 static ElfW(Addr) __attribute__((noinline))
 __linker_init_post_relocation(KernelArgumentBlock& args, soinfo& tmp_linker_so) {
-  // Initialize the main thread (including TLS, so system calls really work).
-  __libc_init_main_thread(args);
+  // Finish initializing the main thread.
+  __libc_init_main_thread_late(args);
 
   // We didn't protect the linker's RELRO pages in link_image because we
   // couldn't make system calls on x86 at that point, but we can now...