sched: add sched blocked tracepoint which dumps out context of sleep.

Decare war on uninterruptible sleep. Add a tracepoint which
walks the kernel stack and dumps the first non-scheduler function
called before the scheduler is invoked.

Change-Id: I19e965d5206329360a92cbfe2afcc8c30f65c229
Signed-off-by: Riley Andrews <riandrews@google.com>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 6cfbb8a..4996720 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -675,7 +675,7 @@
 DEFINE_EVENT(sched_process_template, sched_process_free,
 	     TP_PROTO(struct task_struct *p),
 	     TP_ARGS(p));
-	     
+
 
 /*
  * Tracepoint for a task exiting:
@@ -834,6 +834,30 @@
 	     TP_ARGS(tsk, delay));
 
 /*
+ * Tracepoint for recording the cause of uninterruptible sleep.
+ */
+TRACE_EVENT(sched_blocked_reason,
+
+	TP_PROTO(struct task_struct *tsk),
+
+	TP_ARGS(tsk),
+
+	TP_STRUCT__entry(
+		__field( pid_t,	pid	)
+		__field( void*, caller	)
+		__field( bool, io_wait	)
+	),
+
+	TP_fast_assign(
+		__entry->pid	= tsk->pid;
+		__entry->caller = (void*)get_wchan(tsk);
+		__entry->io_wait = tsk->in_iowait;
+	),
+
+	TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller)
+);
+
+/*
  * Tracepoint for accounting runtime (time the task is executing
  * on a CPU).
  */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6e64f7e..782e9fa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3028,6 +3028,7 @@
 			}
 
 			trace_sched_stat_blocked(tsk, delta);
+			trace_sched_blocked_reason(tsk);
 
 			/*
 			 * Blocking time is in units of nanosecs, so shift by