Wait sooner for non-daemon threads

When the main thread returns, we attempt to shut down the runtime.
Sometime during that process we always waited for non-daemon threads
to complete as required. But previously we only did so after
the runtime was partially shut down, potentially causing the
remaining threads to deadlock.

This explicitly waits before we start destroying the runtime.

Add test to make sure that a long running child thread finishes
properly.

Bug: 148126377
Bug: 147619421
Test: New test fails without waiting call, passes with.
Change-Id: Ic60d695c8a03543b51d8532156f19fff00a58edc
diff --git a/runtime/jni/java_vm_ext.cc b/runtime/jni/java_vm_ext.cc
index 1bf88c5..e5b3d4d 100644
--- a/runtime/jni/java_vm_ext.cc
+++ b/runtime/jni/java_vm_ext.cc
@@ -385,6 +385,13 @@
       return JNI_ERR;
     }
     JavaVMExt* raw_vm = reinterpret_cast<JavaVMExt*>(vm);
+
+    // Wait for all non-dameon threads to terminate before we start destroying
+    // bits of the runtime. Thread list deletion will repeat this in case more
+    // threads are created by daemons in the meantime.
+    raw_vm->GetRuntime()->GetThreadList()
+          ->WaitForOtherNonDaemonThreadsToExit(/*check_no_birth=*/ false);
+
     delete raw_vm->GetRuntime();
     android::ResetNativeLoader();
     return JNI_OK;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index ed28e74..330f1c1 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1110,18 +1110,27 @@
   return nullptr;
 }
 
-void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
+void ThreadList::WaitForOtherNonDaemonThreadsToExit(bool check_no_birth) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   Thread* self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
   while (true) {
-    {
+    Locks::runtime_shutdown_lock_->Lock(self);
+    if (check_no_birth) {
       // No more threads can be born after we start to shutdown.
-      MutexLock mu(self, *Locks::runtime_shutdown_lock_);
       CHECK(Runtime::Current()->IsShuttingDownLocked());
       CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
+    } else {
+      if (Runtime::Current()->NumberOfThreadsBeingBorn() != 0U) {
+        // Awkward. Shutdown_cond_ is private, but the only live thread may not be registered yet.
+        // Fortunately, this is used mostly for testing, and not performance-critical.
+        Locks::runtime_shutdown_lock_->Unlock(self);
+        usleep(1000);
+        continue;
+      }
     }
     MutexLock mu(self, *Locks::thread_list_lock_);
+    Locks::runtime_shutdown_lock_->Unlock(self);
     // Also wait for any threads that are unregistering to finish. This is required so that no
     // threads access the thread list after it is deleted. TODO: This may not work for user daemon
     // threads since they could unregister at the wrong time.
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index ce564fd..8fc219b 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -181,6 +181,10 @@
       REQUIRES(!Locks::thread_list_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void WaitForOtherNonDaemonThreadsToExit(bool check_no_birth = true)
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_,
+               !Locks::mutator_lock_);
+
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
@@ -194,8 +198,6 @@
 
   void SuspendAllDaemonThreadsForShutdown()
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
-  void WaitForOtherNonDaemonThreadsToExit()
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
   void SuspendAllInternal(Thread* self,
                           Thread* ignore1,
diff --git a/test/2030-long-running-child/expected.txt b/test/2030-long-running-child/expected.txt
new file mode 100644
index 0000000..5180c39
--- /dev/null
+++ b/test/2030-long-running-child/expected.txt
@@ -0,0 +1,3 @@
+Main Started
+Main Finished
+Child finished
diff --git a/test/2030-long-running-child/info.txt b/test/2030-long-running-child/info.txt
new file mode 100644
index 0000000..339abf0
--- /dev/null
+++ b/test/2030-long-running-child/info.txt
@@ -0,0 +1,3 @@
+Check that a child of a main thread can run to completion even if the
+main thread terminates immediately, and the child allocates memory and
+forks additional threads.
diff --git a/test/2030-long-running-child/src/Main.java b/test/2030-long-running-child/src/Main.java
new file mode 100644
index 0000000..a5293dc
--- /dev/null
+++ b/test/2030-long-running-child/src/Main.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class Main {
+
+  private static class LazyGrandChildThread implements Runnable {
+    @Override
+    public void run() {}
+  }
+
+  private static class ChildThread implements Runnable {
+    @Override
+    public void run() {
+      // Allocate memory forcing GCs and fork children.
+      for (int i = 0; i < 100; ++i) {
+        int [][] a = new int[10][];
+        for (int j = 0; j < 10; ++j) {
+          a[j] = new int[100000 * j + 20];
+          a[j][17] = 1;
+        }
+        Thread t = new Thread(new LazyGrandChildThread());
+        t.start();
+        int sum = 0;
+        // Make it hard to optimize out the arrays.
+        for (int j = 0; j < 10; ++j) {
+          sum += a[j][16] /* = 0 */ + a[j][17] /* = 1 */;
+        }
+        if (sum != 10) {
+          System.out.println("Bad result! Was " + sum);
+        }
+        try {
+          t.join();
+        } catch (InterruptedException e) {
+          System.out.println("Interrupted by " + e);
+        }
+      }
+      System.out.println("Child finished");
+    }
+  }
+
+  public static void main(String[] args) {
+    System.out.println("Main Started");
+    new Thread(new ChildThread()).start();
+    System.out.println("Main Finished");
+  }
+}