ART: Change main-thread thread paging scheme

Modify the code that ensures we can install a stack guard page into
the main thread.

A recent kernel change means that our previous approach of using a
free pointer does not work. It is important to actually extend the
stack properly. For portability, use a function with a large stack
frame (suggested by and adapted from hboehm).

(cherry picked from commit e43dfd6498545ab54121bf330941aef82d9bf339)

Bug: 62952017
Test: m
Test: m test-art-host
Test: Device boots (x86_64 emulator)
Test: Device boots (bullhead)
Merged-In: Ic2a0c3d6d05a1ea9f655329d147b46949e1b9db3
Change-Id: Ic2a0c3d6d05a1ea9f655329d147b46949e1b9db3
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5274f9e..b0651e7 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -337,9 +337,6 @@
   return stack_size;
 }
 
-// Global variable to prevent the compiler optimizing away the page reads for the stack.
-uint8_t dont_optimize_this;
-
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
 // overflow is detected.  It is located right below the stack_begin_.
 //
@@ -357,20 +354,47 @@
   uint8_t* stack_top = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(&stack_himem) &
       ~(kPageSize - 1));    // Page containing current top of stack.
 
-  // First remove the protection on the protected region as will want to read and
-  // write it.  This may fail (on the first attempt when the stack is not mapped)
-  // but we ignore that.
+  // There is a little complexity here that deserves a special mention.  On some
+  // architectures, the stack is created using a VM_GROWSDOWN flag
+  // to prevent memory being allocated when it's not needed.  This flag makes the
+  // kernel only allocate memory for the stack by growing down in memory.  Because we
+  // want to put an mprotected region far away from that at the stack top, we need
+  // to make sure the pages for the stack are mapped in before we call mprotect.
+  //
+  // The failed mprotect in UnprotectStack is an indication of a thread with VM_GROWSDOWN
+  // with a non-mapped stack (usually only the main thread).
+  //
+  // We map in the stack by reading every page from the stack bottom (highest address)
+  // to the stack top. (We then madvise this away.) This must be done by reading from the
+  // current stack pointer downwards.
+
+  // (Defensively) first remove the protection on the protected region as we'll want to read
+  // and write it. Ignore errors.
   UnprotectStack();
 
-  // Map in the stack.  This must be done by reading from the
-  // current stack pointer downwards as the stack may be mapped using VM_GROWSDOWN
-  // in the kernel.  Any access more than a page below the current SP might cause
-  // a segv.
+  VLOG(threads) << "Need to map in stack for thread at " << std::hex <<
+      static_cast<void*>(pregion);
 
-  // Read every page from the high address to the low.
-  for (uint8_t* p = stack_top; p >= pregion; p -= kPageSize) {
-    dont_optimize_this = *p;
-  }
+  struct RecurseDownStack {
+    // This function has an intentionally large stack size.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+    NO_INLINE
+    static void Touch(uintptr_t target) {
+      volatile size_t zero = 0;
+      // Use a large local volatile array to ensure a large frame size. Do not use anything close
+      // to a full page for ASAN. It would be nice to ensure the frame size is at most a page, but
+      // there is no pragma support for this.
+      volatile char space[kPageSize - 256];
+      char sink ATTRIBUTE_UNUSED = space[zero];
+      if (reinterpret_cast<uintptr_t>(space) >= target + kPageSize) {
+        Touch(target);
+      }
+      zero *= 2;  // Try to avoid tail recursion.
+    }
+#pragma GCC diagnostic pop
+  };
+  RecurseDownStack::Touch(reinterpret_cast<uintptr_t>(pregion));
 
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<