Revert^2 "Make FinalizerWatchDogDaemon also monitor RQDaemon"

This relands aosp/1830386

PS1 is identical to aosp/1830386. See comments there.

PS2 adds some cleanups discovered while scrutinizing the code.

PS3 adds fixes, and patchsets tweak them slightly:

1) Static constants were initialized after calling a constructor that
used them.

2) Statically initializing activeWatchees doesn't work if the daemon
is stopped via interruption, as we do regularly in the zygote.

3) Explicitly catch and log uncaught exceptions in the system
daemons. They previously did not always show up in the log,
since the relevant call to isetDefaultUncaughtExceptionHandler
is made later during app startup.

The last one is somewhat unrelated, but this hampered my debugging,
and it could cause problems diagnosing a crashed daemon.

Bug: 197500807
Test: Build and boot AOSP

Change-Id: I575c1ab7c4af0c94050efe718b6a7674d7954b2d
diff --git a/libart/src/main/java/java/lang/Daemons.java b/libart/src/main/java/java/lang/Daemons.java
index 6e05cea..5b2adb3 100644
--- a/libart/src/main/java/java/lang/Daemons.java
+++ b/libart/src/main/java/java/lang/Daemons.java
@@ -136,7 +136,14 @@
             } else {
                 PRE_ZYGOTE_START_LATCH.countDown();
             }
-            runInternal();
+            try {
+                runInternal();
+            } catch (Throwable ex) {
+                // Should never happen, but may not o.w. get reported, e.g. in zygote.
+                // Risk logging redundantly, rather than losing it.
+                System.logE("Uncaught exception in system thread " + name, ex);
+                throw ex;
+            }
         }
 
         public abstract void runInternal();
@@ -204,17 +211,29 @@
         @UnsupportedAppUsage
         private static final ReferenceQueueDaemon INSTANCE = new ReferenceQueueDaemon();
 
+        // Monitored by FinalizerWatchdogDaemon to make sure we're still working.
+        private final AtomicInteger progressCounter = new AtomicInteger(0);
+
         ReferenceQueueDaemon() {
             super("ReferenceQueueDaemon");
         }
 
         @Override public void runInternal() {
+            FinalizerWatchdogDaemon.INSTANCE.monitoringNeeded(FinalizerWatchdogDaemon.RQ_DAEMON);
             while (isRunning()) {
                 Reference<?> list;
                 try {
                     synchronized (ReferenceQueue.class) {
-                        while (ReferenceQueue.unenqueued == null) {
-                            ReferenceQueue.class.wait();
+                        if (ReferenceQueue.unenqueued == null) {
+                            progressCounter.incrementAndGet();
+                            FinalizerWatchdogDaemon.INSTANCE.monitoringNotNeeded(
+                                    FinalizerWatchdogDaemon.RQ_DAEMON);
+                            do {
+                               ReferenceQueue.class.wait();
+                            } while (ReferenceQueue.unenqueued == null);
+                            progressCounter.incrementAndGet();
+                            FinalizerWatchdogDaemon.INSTANCE.monitoringNeeded(
+                                    FinalizerWatchdogDaemon.RQ_DAEMON);
                         }
                         list = ReferenceQueue.unenqueued;
                         ReferenceQueue.unenqueued = null;
@@ -224,9 +243,14 @@
                 } catch (OutOfMemoryError e) {
                     continue;
                 }
-                ReferenceQueue.enqueuePending(list);
+                ReferenceQueue.enqueuePending(list, progressCounter);
+                FinalizerWatchdogDaemon.INSTANCE.resetTimeouts();
             }
         }
+
+        ReferenceQueue currentlyProcessing() {
+          return ReferenceQueue.getCurrentQueue();
+        }
     }
 
     private static class FinalizerDaemon extends Daemon {
@@ -257,6 +281,8 @@
             // Local copy of progressCounter; saves a fence per increment on ARM and MIPS.
             int localProgressCounter = progressCounter.get();
 
+            FinalizerWatchdogDaemon.INSTANCE.monitoringNeeded(
+                    FinalizerWatchdogDaemon.FINALIZER_DAEMON);
             while (isRunning()) {
                 try {
                     // Use non-blocking poll to avoid FinalizerWatchdogDaemon communication
@@ -269,11 +295,13 @@
                         finalizingObject = null;
                         progressCounter.lazySet(++localProgressCounter);
                         // Slow path; block.
-                        FinalizerWatchdogDaemon.INSTANCE.goToSleep();
+                        FinalizerWatchdogDaemon.INSTANCE.monitoringNotNeeded(
+                                FinalizerWatchdogDaemon.FINALIZER_DAEMON);
                         finalizingReference = (FinalizerReference<?>)queue.remove();
                         finalizingObject = finalizingReference.get();
                         progressCounter.set(++localProgressCounter);
-                        FinalizerWatchdogDaemon.INSTANCE.wakeUp();
+                        FinalizerWatchdogDaemon.INSTANCE.monitoringNeeded(
+                                FinalizerWatchdogDaemon.FINALIZER_DAEMON);
                     }
                     doFinalize(finalizingReference);
                 } catch (InterruptedException ignored) {
@@ -300,31 +328,47 @@
     }
 
     /**
-     * The watchdog exits the VM if the finalizer ever gets stuck. We consider
-     * the finalizer to be stuck if it spends more than MAX_FINALIZATION_MILLIS
-     * on one instance.
+     * The watchdog exits the VM if either the FinalizerDaemon, or the ReferenceQueueDaemon
+     * gets stuck. We consider the finalizer to be stuck if it spends more than
+     * MAX_FINALIZATION_MILLIS on one instance. We consider ReferenceQueueDaemon to be
+     * potentially stuck if it spends more than MAX_FINALIZATION_MILLIS processing a single
+     * Cleaner or transferring objects into a single queue, but only report if this happens
+     * a few times in a row, to compensate for the fact that multiple Cleaners may be involved.
      */
     private static class FinalizerWatchdogDaemon extends Daemon {
+        // Single bit values to identify daemon to be watched.
+        static final int FINALIZER_DAEMON = 1;
+        static final int RQ_DAEMON = 2;
+
         @UnsupportedAppUsage
         private static final FinalizerWatchdogDaemon INSTANCE = new FinalizerWatchdogDaemon();
 
-        private boolean needToWork = true;  // Only accessed in synchronized methods.
+        private int activeWatchees;  // Only synchronized accesses.
 
         private long finalizerTimeoutNs = 0;  // Lazily initialized.
 
+        // We tolerate this many timeouts during an enqueuePending call.
+        // This number is > 1, since we may only report enqueuePending progress rarely.
+        private static final int TOLERATED_REFERENCE_QUEUE_TIMEOUTS = 5;
+        private static final AtomicInteger observedReferenceQueueTimeouts = new AtomicInteger(0);
+
         FinalizerWatchdogDaemon() {
             super("FinalizerWatchdogDaemon");
         }
 
+        void resetTimeouts() {
+            observedReferenceQueueTimeouts.lazySet(0);
+        }
+
         @Override public void runInternal() {
             while (isRunning()) {
                 if (!sleepUntilNeeded()) {
                     // We have been interrupted, need to see if this daemon has been stopped.
                     continue;
                 }
-                final Object finalizing = waitForFinalization();
-                if (finalizing != null && !VMDebug.isDebuggerConnected()) {
-                    finalizerTimedOut(finalizing);
+                final TimeoutException exception = waitForProgress();
+                if (exception != null && !VMDebug.isDebuggerConnected()) {
+                    timedOut(exception);
                     break;
                 }
             }
@@ -336,7 +380,7 @@
          * See also http://code.google.com/p/android/issues/detail?id=22778.
          */
         private synchronized boolean sleepUntilNeeded() {
-            while (!needToWork) {
+            while (activeWatchees == 0) {
                 try {
                     wait();
                 } catch (InterruptedException e) {
@@ -353,20 +397,24 @@
          * Notify daemon that it's OK to sleep until notified that something is ready to be
          * finalized.
          */
-        private synchronized void goToSleep() {
-            needToWork = false;
+        private synchronized void monitoringNotNeeded(int whichDaemon) {
+            activeWatchees &= ~whichDaemon;
         }
 
         /**
          * Notify daemon that there is something ready to be finalized.
          */
-        private synchronized void wakeUp() {
-            needToWork = true;
-            notify();
+        private synchronized void monitoringNeeded(int whichDaemon) {
+            int oldWatchees = activeWatchees;
+            activeWatchees |= whichDaemon;
+
+            if (oldWatchees == 0) {
+                notify();
+            }
         }
 
-        private synchronized boolean getNeedToWork() {
-            return needToWork;
+        private synchronized boolean isActive(int whichDaemon) {
+            return (activeWatchees & whichDaemon) != 0;
         }
 
         /**
@@ -402,37 +450,52 @@
 
 
         /**
-         * Return an object that took too long to finalize or return null.
+         * Return null (normal case) or an exception describing what timed out.
          * Wait VMRuntime.getFinalizerTimeoutMs.  If the FinalizerDaemon took essentially the
-         * whole time processing a single reference, return that reference.  Otherwise return
-         * null.  Only called from a single thread.
+         * whole time processing a single reference, or the ReferenceQueueDaemon failed to make
+         * visible progress during that time, return an exception.  Only called from a single
+         * thread.
          */
-        private Object waitForFinalization() {
+        private TimeoutException waitForProgress() {
             if (finalizerTimeoutNs == 0) {
                 finalizerTimeoutNs =
                         NANOS_PER_MILLI * VMRuntime.getRuntime().getFinalizerTimeoutMs();
                 // Temporary app backward compatibility. Remove eventually.
                 MAX_FINALIZE_NANOS = finalizerTimeoutNs;
             }
-            long startCount = FinalizerDaemon.INSTANCE.progressCounter.get();
+            boolean monitorFinalizer = false;
+            int finalizerStartCount = 0;
+            if (isActive(FINALIZER_DAEMON)) {
+                monitorFinalizer = true;
+                finalizerStartCount = FinalizerDaemon.INSTANCE.progressCounter.get();
+            }
+            boolean monitorRefQueue = false;
+            int refQueueStartCount = 0;
+            if (isActive(RQ_DAEMON)) {
+                monitorRefQueue = true;
+                refQueueStartCount = ReferenceQueueDaemon.INSTANCE.progressCounter.get();
+            }
+
             // Avoid remembering object being finalized, so as not to keep it alive.
             if (!sleepForNanos(finalizerTimeoutNs)) {
                 // Don't report possibly spurious timeout if we are interrupted.
                 return null;
             }
-            if (getNeedToWork() && FinalizerDaemon.INSTANCE.progressCounter.get() == startCount) {
-                // We assume that only remove() and doFinalize() may take time comparable to
-                // the finalizer timeout.
-                // We observed neither the effect of the gotoSleep() nor the increment preceding a
-                // later wakeUp. Any remove() call by the FinalizerDaemon during our sleep
-                // interval must have been followed by a wakeUp call before we checked needToWork.
-                // But then we would have seen the counter increment.  Thus there cannot have
-                // been such a remove() call.
+            if (FinalizerDaemon.INSTANCE.progressCounter.get() == finalizerStartCount
+                && monitorFinalizer && isActive(FINALIZER_DAEMON)) {
+                // We assume that only remove() and doFinalize() may take time comparable to the
+                // finalizer timeout.
+                // We observed neither the effect of the monitoringNotNeeded() nor the increment
+                // preceding a later wakeUp. Any remove() call by the FinalizerDaemon during our
+                // sleep interval must have been followed by a monitoringNeeded() call before we
+                // checked activeCallees.  But then we would have seen the counter increment.
+                // Thus there cannot have been such a remove() call.
                 // The FinalizerDaemon must not have progressed (from either the beginning or the
-                // last progressCounter increment) to either the next increment or gotoSleep()
-                // call.  Thus we must have taken essentially the whole finalizerTimeoutMs in a
-                // single doFinalize() call.  Thus it's OK to time out.  finalizingObject was set
-                // just before the counter increment, which preceded the doFinalize call.  Thus we
+                // last progressCounter increment) to either the next increment or
+                // monitoringNotNeeded() call.
+                // Thus we must have taken essentially the whole finalizerTimeoutMs in a single
+                // doFinalize() call.  Thus it's OK to time out.  finalizingObject was set just
+                // before the counter increment, which preceded the doFinalize() call.  Thus we
                 // are guaranteed to get the correct finalizing value below, unless doFinalize()
                 // just finished as we were timing out, in which case we may get null or a later
                 // one.  In this last case, we are very likely to discard it below.
@@ -440,22 +503,37 @@
                 sleepForNanos(500 * NANOS_PER_MILLI);
                 // Recheck to make it even less likely we report the wrong finalizing object in
                 // the case which a very slow finalization just finished as we were timing out.
-                if (getNeedToWork()
-                        && FinalizerDaemon.INSTANCE.progressCounter.get() == startCount) {
-                    return finalizing;
+                if (isActive(FINALIZER_DAEMON)
+                        && FinalizerDaemon.INSTANCE.progressCounter.get() == finalizerStartCount) {
+                    return finalizerTimeoutException(finalizing);
+                }
+            }
+            if (ReferenceQueueDaemon.INSTANCE.progressCounter.get() == refQueueStartCount
+                && monitorRefQueue && isActive(RQ_DAEMON)) {
+                if (observedReferenceQueueTimeouts.incrementAndGet()
+                        > TOLERATED_REFERENCE_QUEUE_TIMEOUTS) {
+                    return refQueueTimeoutException(
+                            ReferenceQueueDaemon.INSTANCE.currentlyProcessing());
                 }
             }
             return null;
         }
 
-        private static void finalizerTimedOut(Object object) {
-            // The current object has exceeded the finalization deadline; abort!
+        private static TimeoutException finalizerTimeoutException(Object object) {
             String message = object.getClass().getName() + ".finalize() timed out after "
                     + VMRuntime.getRuntime().getFinalizerTimeoutMs() / 1000 + " seconds";
-            Exception syntheticException = new TimeoutException(message);
+            TimeoutException syntheticException = new TimeoutException(message);
             // We use the stack from where finalize() was running to show where it was stuck.
             syntheticException.setStackTrace(FinalizerDaemon.INSTANCE.getStackTrace());
+            return syntheticException;
+        }
 
+        private static TimeoutException refQueueTimeoutException(ReferenceQueue rq) {
+            String message = "ReferenceQueueDaemon timed out while targeting " + rq;
+            return new TimeoutException(message);
+        }
+
+        private static void timedOut(TimeoutException exception) {
             // Send SIGQUIT to get native stack traces.
             try {
                 Os.kill(Os.getpid(), OsConstants.SIGQUIT);
@@ -472,7 +550,7 @@
             // handler to dispatch to, either via a handler set on itself, via its ThreadGroup
             // object or via the defaultUncaughtExceptionHandler.
             //
-            // As an approximation, we log by hand an exit if there's no pre-exception handler nor
+            // As an approximation, we log by hand and exit if there's no pre-exception handler nor
             // a default uncaught exception handler.
             //
             // Note that this condition will only ever be hit by ART host tests and standalone
@@ -481,14 +559,14 @@
             if (Thread.getUncaughtExceptionPreHandler() == null &&
                     Thread.getDefaultUncaughtExceptionHandler() == null) {
                 // If we have no handler, log and exit.
-                System.logE(message, syntheticException);
+                System.logE(exception.getMessage(), exception);
                 System.exit(2);
             }
 
             // Otherwise call the handler to do crash reporting.
             // We don't just throw because we're not the thread that
             // timed out; we're the thread that detected it.
-            Thread.currentThread().dispatchUncaughtException(syntheticException);
+            Thread.currentThread().dispatchUncaughtException(exception);
         }
     }
 
diff --git a/ojluni/src/main/java/java/lang/ref/ReferenceQueue.java b/ojluni/src/main/java/java/lang/ref/ReferenceQueue.java
index 72e55c1b..6923a3b 100644
--- a/ojluni/src/main/java/java/lang/ref/ReferenceQueue.java
+++ b/ojluni/src/main/java/java/lang/ref/ReferenceQueue.java
@@ -27,6 +27,7 @@
 package java.lang.ref;
 
 import sun.misc.Cleaner;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * Reference queues, to which registered reference objects are appended by the
@@ -50,6 +51,8 @@
 
     private final Object lock = new Object();
 
+    private static ReferenceQueue currentQueue = null;  // Current target of enqueuePending.
+
     /**
      * Constructs a new reference-object queue.
      */
@@ -93,6 +96,16 @@
     }
 
     /**
+     * The queue currently being targeted by enqueuePending. Used only to get slightly
+     * informative output for timeouts. May be read via a data race, but only for crash
+     * debugging output.
+     * @hide
+     */
+    public static ReferenceQueue getCurrentQueue() {
+        return currentQueue;
+    }
+
+    /**
      * Test if the given reference object has been enqueued but not yet
      * removed from the queue, assuming this is the reference object's queue.
      */
@@ -216,10 +229,11 @@
      *
      * @hide
      */
-    public static void enqueuePending(Reference<?> list) {
+    public static void enqueuePending(Reference<?> list, AtomicInteger progressCounter) {
         Reference<?> start = list;
         do {
             ReferenceQueue queue = list.queue;
+            currentQueue = queue;
             if (queue == null) {
                 Reference<?> next = list.pendingNext;
 
@@ -230,9 +244,12 @@
                 list = next;
             } else {
                 // To improve performance, we try to avoid repeated
-                // synchronization on the same queue by batching enqueue of
+                // synchronization on the same queue by batching enqueueing of
                 // consecutive references in the list that have the same
-                // queue.
+                // queue. We limit this so that progressCounter gets incremented
+                // occasionally,
+                final int MAX_ITERS = 100;
+                int i = 0;
                 synchronized (queue.lock) {
                     do {
                         Reference<?> next = list.pendingNext;
@@ -244,10 +261,11 @@
                         list.pendingNext = list;
                         queue.enqueueLocked(list);
                         list = next;
-                    } while (list != start && list.queue == queue);
+                    } while (list != start && list.queue == queue && ++i <= MAX_ITERS);
                     queue.lock.notifyAll();
                 }
             }
+            progressCounter.incrementAndGet();
         } while (list != start);
     }