Point debuggerd at the interesting thread.
There are a couple of situations (spin-on-suspend, HeapWorker wedged)
where the current thread's native stack is much less interesting than one
of the others. This change will cause a couple of signals to be thrown
at the "interesting" thread in an attempt to get more useful information
out of debuggerd in these cases.
For bug 2517042.
Change-Id: Ib8c0e0ef93f07b7114e5f4638a907a0f7802bdc0
diff --git a/vm/Thread.c b/vm/Thread.c
index 4343694..c5420a8 100644
--- a/vm/Thread.c
+++ b/vm/Thread.c
@@ -25,8 +25,10 @@
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
+#include <sys/types.h>
#include <sys/resource.h>
#include <sys/mman.h>
+#include <signal.h>
#include <errno.h>
#include <fcntl.h>
@@ -2667,9 +2669,16 @@
spinSleepTime = MORE_SLEEP;
if (retryCount++ == kMaxRetries) {
+ LOGE("Fatal spin-on-suspend, dumping threads\n");
+ dvmDumpAllThreads(false);
+
+ /* log this after -- long traces will scroll off log */
LOGE("threadid=%d: stuck on threadid=%d, giving up\n",
self->threadId, thread->threadId);
- dvmDumpAllThreads(false);
+
+ /* try to get a debuggerd dump from the spinning thread */
+ dvmNukeThread(thread);
+ /* abort the VM */
dvmAbort();
}
}
@@ -3571,6 +3580,44 @@
dvmUnlockThreadList();
}
+/*
+ * Nuke the target thread from orbit.
+ *
+ * The idea is to send a "crash" signal to the target thread so that
+ * debuggerd will take notice and dump an appropriate stack trace.
+ * Because of the way debuggerd works, we have to throw the same signal
+ * at it twice.
+ *
+ * This does not necessarily cause the entire process to stop, but once a
+ * thread has been nuked the rest of the system is likely to be unstable.
+ * This returns so that some limited set of additional operations may be
+ * performed, but it's advisable to abort soon. (This is NOT a way to
+ * simply cancel a thread.)
+ */
+void dvmNukeThread(Thread* thread)
+{
+ pid_t tid = thread->systemTid;
+
+ /*
+ * Send the signals, separated by a brief interval to allow debuggerd to
+ * work its magic. SIGFPE could be used to make it stand out a little
+ * in the crash dump. (Observed behavior: with SIGFPE, debuggerd will
+ * dump the target thread and then the thread that calls dvmAbort.
+ * With SIGSEGV, you don't get the second stack trace. The position in
+ * the current thread is generally know, so we're using SIGSEGV for now
+ * to reduce log volume.)
+ *
+ * The thread can continue to execute between the two signals. (The
+ * first just causes debuggerd to attach.)
+ */
+ LOGD("Sending two SIGSEGVs to tid=%d to cause debuggerd dump\n", tid);
+ kill(tid, SIGSEGV);
+ usleep(750 * 1000);
+ kill(tid, SIGSEGV);
+ usleep(1000 * 1000);
+ LOGD("Continuing\n");
+}
+
#ifdef WITH_MONITOR_TRACKING
/*
* Count up the #of locked objects in the current thread.
diff --git a/vm/Thread.h b/vm/Thread.h
index f397fba..5ca73c4 100644
--- a/vm/Thread.h
+++ b/vm/Thread.h
@@ -517,6 +517,12 @@
void dvmDumpAllThreads(bool grabLock);
void dvmDumpAllThreadsEx(const DebugOutputTarget* target, bool grabLock);
+/*
+ * Debug: kill a thread to get a debuggerd stack trace. Leaves the VM
+ * in an uncertain state.
+ */
+void dvmNukeThread(Thread* thread);
+
#ifdef WITH_MONITOR_TRACKING
/*
* Track locks held by the current thread, along with the stack trace at
diff --git a/vm/alloc/HeapWorker.c b/vm/alloc/HeapWorker.c
index 7d2b687..d743ce5 100644
--- a/vm/alloc/HeapWorker.c
+++ b/vm/alloc/HeapWorker.c
@@ -180,6 +180,9 @@
free(desc);
dvmDumpAllThreads(true);
+ /* try to get a debuggerd dump from the target thread */
+ dvmNukeThread(thread);
+
/* abort the VM */
dvmAbort();
} else if (delta > HEAP_WORKER_WATCHDOG_TIMEOUT / 2) {