goldfish_sync: fix stalls by avoiding early kfree()

When running for a long time, we get mysterious stall messages
or some impossible-looking kernel stack trace where
a single CPU accesses drivers/staging/android/sync.c's
sync_timeline_signal() and cannot get the spin lock.

This was found to be because the timeline wrapper objects
(goldfish_sync_timeline_obj) were not
being cleaned up properly for the (rare) case when a
timeline increment is still pending after the
timeline wrapper object is destroyed.

If the wrapper object is kfree()'ed too early, it may
point at garbage memory that can happen to line up
so that it looks like a sync timeline object that
currently holds a spin lock. In that case, we get
a stall due to sw_sync_timeline_inc being unable to
acquire that "zombie" spin lock.

This CL postpones timeline object destruction until
all pending increments have gone through, using a
reference-counting scheme (krefs).

Change-Id: I6f83a7bd61c174a8d99d83ea0f6e0972211337ee
Signed-off-by: Lingfeng Yang <lfy@google.com>
(cherry picked from commit 2d2c0829a38d4f0c4d2f42e88f838aaf5d33cefa)
diff --git a/drivers/staging/goldfish/goldfish_sync.c b/drivers/staging/goldfish/goldfish_sync.c
index f1572bc..708c884 100644
--- a/drivers/staging/goldfish/goldfish_sync.c
+++ b/drivers/staging/goldfish/goldfish_sync.c
@@ -21,6 +21,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/interrupt.h>
+#include <linux/kref.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -131,7 +132,7 @@
 	/* Spinlock protects |to_do| / |to_do_end|. */
 	spinlock_t lock;
 	/* |mutex_lock| protects all concurrent access
-	 * to timelines for both kernel and user spage */
+	 * to timelines for both kernel and user space. */
 	struct mutex mutex_lock;
 
 	/* Buffer holding commands issued from host. */
@@ -158,8 +159,77 @@
 struct goldfish_sync_timeline_obj {
 	struct sw_sync_timeline *sw_sync_tl;
 	uint32_t current_time;
+	/* We need to be careful about when we deallocate
+	 * this |goldfish_sync_timeline_obj| struct.
+	 * In order to ensure proper cleanup, we need to
+	 * consider the triggered host-side wait that may
+	 * still be in flight when the guest close()'s a
+	 * goldfish_sync device's sync context fd (and
+	 * destroys the |sw_sync_tl| field above).
+	 * The host-side wait may raise IRQ
+	 * and tell the kernel to increment the timeline _after_
+	 * the |sw_sync_tl| has already been set to null.
+	 *
+	 * From observations on OpenGL apps and CTS tests, this
+	 * happens at some very low probability upon context
+	 * destruction or process close, but it does happen
+	 * and it needs to be handled properly. Otherwise,
+	 * if we clean up the surrounding |goldfish_sync_timeline_obj|
+	 * too early, any |handle| field of any host->guest command
+	 * might not even point to a null |sw_sync_tl| field,
+	 * but to garbage memory or even a reclaimed |sw_sync_tl|.
+	 * If we do not count such "pending waits" and kfree the object
+	 * immediately upon |goldfish_sync_timeline_destroy|,
+	 * we might get mysterous RCU stalls after running a long
+	 * time because the garbage memory that is being read
+	 * happens to be interpretable as a |spinlock_t| struct
+	 * that is currently in the locked state.
+	 *
+	 * To track when to free the |goldfish_sync_timeline_obj|
+	 * itself, we maintain a kref.
+	 * The kref essentially counts the timeline itself plus
+	 * the number of waits in flight. kref_init/kref_put
+	 * are issued on
+	 * |goldfish_sync_timeline_create|/|goldfish_sync_timeline_destroy|
+	 * and kref_get/kref_put are issued on
+	 * |goldfish_sync_fence_create|/|goldfish_sync_timeline_inc|.
+	 *
+	 * The timeline is destroyed after reference count
+	 * reaches zero, which would happen after
+	 * |goldfish_sync_timeline_destroy| and all pending
+	 * |goldfish_sync_timeline_inc|'s are fulfilled.
+	 *
+	 * NOTE (1): We assume that |fence_create| and
+	 * |timeline_inc| calls are 1:1, otherwise the kref scheme
+	 * will not work. This is a valid assumption as long
+	 * as the host-side virtual device implementation
+	 * does not insert any timeline increments
+	 * that we did not trigger from here.
+	 *
+	 * NOTE (2): The use of kref by itself requires no locks,
+	 * but this does not mean everything works without locks.
+	 * Related timeline operations do require a lock of some sort,
+	 * or at least are not proven to work without it.
+	 * In particualr, we assume that all the operations
+	 * done on the |kref| field above are done in contexts where
+	 * |global_sync_state->mutex_lock| is held. Do not
+	 * remove that lock until everything is proven to work
+	 * without it!!! */
+	struct kref kref;
 };
 
+/* We will call |delete_timeline_obj| when the last reference count
+ * of the kref is decremented. This deletes the sw_sync
+ * timeline object along with the wrapper itself. */
+static void delete_timeline_obj(struct kref* kref) {
+	struct goldfish_sync_timeline_obj* obj =
+		container_of(kref, struct goldfish_sync_timeline_obj, kref);
+
+	sync_timeline_destroy(&obj->sw_sync_tl->obj);
+	obj->sw_sync_tl = NULL;
+	kfree(obj);
+}
+
 static uint64_t gensym_ctr;
 static void gensym(char *dst)
 {
@@ -167,6 +237,8 @@
 	gensym_ctr++;
 }
 
+/* |goldfish_sync_timeline_create| assumes that |global_sync_state->mutex_lock|
+ * is held. */
 static struct goldfish_sync_timeline_obj*
 goldfish_sync_timeline_create(void)
 {
@@ -188,22 +260,31 @@
 	res = kzalloc(sizeof(struct goldfish_sync_timeline_obj), GFP_KERNEL);
 	res->sw_sync_tl = res_sync_tl;
 	res->current_time = 0;
+	kref_init(&res->kref);
 
 	DPRINT("new timeline_obj=0x%p", res);
 	return res;
 }
 
+/* |goldfish_sync_fence_create| assumes that |global_sync_state->mutex_lock|
+ * is held. */
 static int
-goldfish_sync_fence_create(struct sw_sync_timeline *tl, uint32_t val)
+goldfish_sync_fence_create(struct goldfish_sync_timeline_obj *obj,
+							uint32_t val)
 {
 
 	int fd;
 	char fence_name[256];
 	struct sync_pt *syncpt = NULL;
 	struct sync_fence *sync_obj = NULL;
+	struct sw_sync_timeline *tl;
 
 	DTRACE();
 
+	if (!obj) return -1;
+
+	tl = obj->sw_sync_tl;
+
 	syncpt = sw_sync_pt_create(tl, val);
 	if (!syncpt) {
 		ERR("could not create sync point! "
@@ -231,6 +312,7 @@
 
 	DPRINT("installing sync fence into fd %d sync_obj=0x%p", fd, sync_obj);
 	sync_fence_install(sync_obj, fd);
+	kref_get(&obj->kref);
 
 	return fd;
 
@@ -241,28 +323,37 @@
 	return -1;
 }
 
+/* |goldfish_sync_timeline_inc| assumes that |global_sync_state->mutex_lock|
+ * is held. */
 static void
 goldfish_sync_timeline_inc(struct goldfish_sync_timeline_obj *obj, uint32_t inc)
 {
 	DTRACE();
 	/* Just give up if someone else nuked the timeline.
 	 * Whoever it was won't care that it doesn't get signaled. */
-	if (!obj || !obj->sw_sync_tl) return;
+	if (!obj) return;
 
 	DPRINT("timeline_obj=0x%p", obj);
 	sw_sync_timeline_inc(obj->sw_sync_tl, inc);
 	DPRINT("incremented timeline. increment max_time");
 	obj->current_time += inc;
+
+	/* Here, we will end up deleting the timeline object if it
+	 * turns out that this call was a pending increment after
+	 * |goldfish_sync_timeline_destroy| was called. */
+	kref_put(&obj->kref, delete_timeline_obj);
 	DPRINT("done");
 }
 
+/* |goldfish_sync_timeline_destroy| assumes
+ * that |global_sync_state->mutex_lock| is held. */
 static void
 goldfish_sync_timeline_destroy(struct goldfish_sync_timeline_obj *obj)
 {
 	DTRACE();
-	sync_timeline_destroy(&obj->sw_sync_tl->obj);
-	obj->sw_sync_tl = NULL;
-	kfree(obj);
+	/* See description of |goldfish_sync_timeline_obj| for why we
+	 * should not immediately destroy |obj| */
+	kref_put(&obj->kref, delete_timeline_obj);
 }
 
 static inline void
@@ -498,8 +589,7 @@
 			DPRINT("exec CMD_CREATE_SYNC_FENCE: "
 					"handle=0x%llx time_arg=%d",
 					handle, time_arg);
-			sync_fence_fd = goldfish_sync_fence_create(timeline->sw_sync_tl,
-													time_arg);
+			sync_fence_fd = goldfish_sync_fence_create(timeline, time_arg);
 			goldfish_sync_hostcmd_reply(sync_state, CMD_CREATE_SYNC_FENCE,
 										sync_fence_fd,
 										0,
@@ -650,7 +740,7 @@
 		}
 
 		timeline = sync_context_data->timeline;
-		fd_out = goldfish_sync_fence_create(timeline->sw_sync_tl,
+		fd_out = goldfish_sync_fence_create(timeline,
 											timeline->current_time + 1);
 		DPRINT("Created fence with fd %d and current time %u (timeline: 0x%p)",
 			   fd_out,
@@ -665,10 +755,14 @@
 			DPRINT("Error, could not copy to user!!!");
 
 			sys_close(fd_out);
+			/* We won't be doing an increment, kref_put immediately. */
+			kref_put(&timeline->kref, delete_timeline_obj);
 			mutex_unlock(&global_sync_state->mutex_lock);
 			return -EFAULT;
 		}
 
+		/* We are now about to trigger a host-side wait;
+		 * accumulate on |pending_waits|. */
 		goldfish_sync_send_guestcmd(global_sync_state,
 				CMD_TRIGGER_HOST_WAIT,
 				ioctl_data.host_glsync_handle_in,
@@ -876,7 +970,7 @@
 	test_timeline = goldfish_sync_timeline_create();
 	DPRINT("sw_sync_timeline_create -> 0x%p", test_timeline);
 
-	test_fence_fd = goldfish_sync_fence_create(test_timeline->sw_sync_tl, 1);
+	test_fence_fd = goldfish_sync_fence_create(test_timeline, 1);
 	DPRINT("sync_fence_create -> %d", test_fence_fd);
 
 	DPRINT("incrementing test timeline");