perf: protect group_leader from races that cause ctx double-free

When moving a group_leader perf event from a software-context
to a hardware-context, there's a race in checking and
updating that context. The existing locking solution
doesn't work; note that it tries to grab a lock inside
the group_leader's context object, which you can only
get at by going through a pointer that should be protected
from these races. To avoid that problem, and to produce
a simple solution, we can just use a lock per group_leader
to protect all checks on the group_leader's context.
The new lock is grabbed and released when no context locks
are held.

Bug: 30955111
Bug: 31095224
Change-Id: If37124c100ca6f4aa962559fba3bd5dbbec8e052
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index aebe48c..75368f9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -339,6 +339,12 @@
 	int				nr_siblings;
 	int				group_flags;
 	struct perf_event		*group_leader;
+
+	/*
+	 * Protect the pmu, attributes and context of a group leader.
+	 * Note: does not protect the pointer to the group_leader.
+	 */
+	struct mutex			group_leader_mutex;
 	struct pmu			*pmu;
 
 	enum perf_event_active_state	state;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 76d3b64..01eab13 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7142,6 +7142,7 @@
 	if (!group_leader)
 		group_leader = event;
 
+	mutex_init(&event->group_leader_mutex);
 	mutex_init(&event->child_mutex);
 	INIT_LIST_HEAD(&event->child_list);
 
@@ -7506,6 +7507,16 @@
 			group_leader = NULL;
 	}
 
+	/*
+	 * Take the group_leader's group_leader_mutex before observing
+	 * anything in the group leader that leads to changes in ctx,
+	 * many of which may be changing on another thread.
+	 * In particular, we want to take this lock before deciding
+	 * whether we need to move_group.
+	 */
+	if (group_leader)
+		mutex_lock(&group_leader->group_leader_mutex);
+
 	if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
 		task = find_lively_task_by_vpid(pid);
 		if (IS_ERR(task)) {
@@ -7686,6 +7697,8 @@
 		put_ctx(gctx);
 	}
 	mutex_unlock(&ctx->mutex);
+	if (group_leader)
+		mutex_unlock(&group_leader->group_leader_mutex);
 
 	put_online_cpus();
 
@@ -7722,6 +7735,8 @@
 	if (task)
 		put_task_struct(task);
 err_group_fd:
+	if (group_leader)
+		mutex_unlock(&group_leader->group_leader_mutex);
 	fdput(group);
 err_fd:
 	put_unused_fd(event_fd);