ANDROID: Compute dmabuf info for forks from scratch

For forks (!CLONE_VM && !CLONE_FILES) the child's task_dma_buf_info is
created as a copy of the parent's in dup_dma_buf_info. This assumes that
the parent and child share all FD and VMA dmabuf references, however
conditions can exist that result in dmabuf references from a parent to
not propagate to a child via fork:

* madvise(MADV_DONTFORK) (i.e. VM_DONTCOPY) on a dmabuf VMA
* Raciness between copy_files / copy_mm and copy_dmabuf_info where a
  parent thread closes FDs or unmaps

This results in incorrect accounting information in the child, and
potentially a UAF in dup_dma_buf_info when dereferencing a pointer to a
freed dmabuf.

Fix this by scanning the mm_struct and files_struct for all dmabuf
references in the child, and accounting each against the child's new
task_dma_buf_info.

Fixes: 060742b3d06d ("ANDROID: Track per-process dmabuf RSS")
Bug: 424648392
Change-Id: I891edd2956419c2952c3e3ea57a92cb3b25b903a
Signed-off-by: T.J. Mercier <tjmercier@google.com>
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index bcb645e..53b673d 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -510,67 +510,90 @@ static struct task_dma_buf_info *alloc_task_dma_buf_info(void)
 	return dmabuf_info;
 }
 
-static struct task_dma_buf_info *dup_dma_buf_info(struct task_dma_buf_info *from)
-{
-	struct task_dma_buf_info *to;
-	struct task_dma_buf_record *from_rec, *to_rec;
-	unsigned int count;
-	int retries = 0;
+#define COUNT_DMABUF_FDS(files, file_lookup_func) ({ \
+	size_t count = 0; \
+	unsigned int max_fds = files_fdtable(files)->max_fds; \
+	for (unsigned int n = 0; n < max_fds; ++n) { \
+		struct file *file = file_lookup_func(files, n); \
+		if (file && is_dma_buf_file(file)) \
+			++count; \
+	} \
+	count; \
+})
 
-	/* Allocate now before locked section below. */
-	to = alloc_task_dma_buf_info();
-	if (!to)
+static struct task_dma_buf_info *compute_dmabuf_info(struct task_struct *task)
+{
+	struct task_dma_buf_info *dmabuf_info;
+	size_t count_fd = 0, count_vma = 0;
+	size_t count;
+
+	dmabuf_info = alloc_task_dma_buf_info();
+	if (!dmabuf_info)
 		return NULL;
 
-	/* Read required count racily, before obtaining dmabuf_info->lock */
-	count = READ_ONCE(from->dmabuf_count);
-	if (!task_dmabuf_records_preload(count))
-		goto err_list_copy;
+	/*
+	 * RCU not actually needed here because task isn't fully formed yet and nobody else can
+	 * access these structures, however lockdep will complain if we don't hold the lock.
+	 */
+	rcu_read_lock();
+	if (task->files)
+		count_fd = COUNT_DMABUF_FDS(task->files, files_lookup_fd_rcu);
 
-retry:
-	spin_lock(&from->lock);
-	if (from->dmabuf_count > count) {
-		/* We don't have enough reserved records, allocate more */
-		count = from->dmabuf_count;
+	if (task->mm) {
+		struct vm_area_struct *vma;
 
-		spin_unlock(&from->lock);
-		task_dmabuf_records_preload_end();
-		if (!task_dmabuf_records_preload(count))
-			goto err_list_copy;
+		VMA_ITERATOR(vmi, task->mm, 0);
 
-		/* Limit the number of retries to avoid live-lock */
-		if (retries++ > 5) {
-			task_dmabuf_records_preload_end();
-			goto err_list_copy;
+		for_each_vma(vmi, vma)
+			if (vma->vm_file && is_dma_buf_file(vma->vm_file))
+				++count_vma;
+	}
+	rcu_read_unlock();
+
+	/* count can't change underneath us. See comment above. */
+	count = count_fd + count_vma;
+	if (count > 0) {
+		unsigned int max_fds;
+
+		if (!task_dmabuf_records_preload(count)) {
+			kfree(dmabuf_info);
+			return NULL;
 		}
 
-		goto retry;
+		rcu_read_lock();
+		if (count_fd) {
+			max_fds = files_fdtable(task->files)->max_fds;
+			for (unsigned int n = 0; count_fd && n < max_fds; ++n) {
+				struct file *file = files_lookup_fd_rcu(task->files, n);
+
+				if (file && is_dma_buf_file(file)) {
+					__dma_buf_account_task(file->private_data, dmabuf_info,
+							       false);
+					--count_fd;
+				}
+			}
+		}
+
+		if (count_vma) {
+			struct vm_area_struct *vma;
+
+			VMA_ITERATOR(vmi, task->mm, 0);
+
+			for_each_vma(vmi, vma) {
+				if (vma->vm_file && is_dma_buf_file(vma->vm_file)) {
+					__dma_buf_account_task(vma->vm_file->private_data,
+							       dmabuf_info, false);
+					if (--count_vma == 0)
+						break;
+				}
+			}
+		}
+		rcu_read_unlock();
+
+		task_dmabuf_records_preload_end();
 	}
 
-	/* All required records are reserved */
-	list_for_each_entry(from_rec, &from->dmabufs, node) {
-		to_rec = alloc_task_dmabuf_record();
-		WARN_ON(!to_rec);
-		to_rec->dmabuf = from_rec->dmabuf;
-		to_rec->refcnt = from_rec->refcnt;
-		list_add(&to_rec->node, &to->dmabufs);
-		atomic64_inc(&to_rec->dmabuf->nr_task_refs);
-	}
-	to->dmabuf_count = from->dmabuf_count;
-	to->rss = from->rss;
-	to->rss_hwm = to->rss;
-	spin_unlock(&from->lock);
-
-	trim_task_dmabuf_records_locked();
-	task_dmabuf_records_preload_end();
-
-	return to;
-
-err_list_copy:
-	trim_task_dmabuf_records();
-	kfree(to);
-
-	return NULL;
+	return dmabuf_info;
 }
 
 int copy_dmabuf_info(u64 clone_flags, struct task_struct *task)
@@ -622,10 +645,13 @@ int copy_dmabuf_info(u64 clone_flags, struct task_struct *task)
 	}
 
 	/*
-	 * No sharing: Both MM and FD references to dmabufs are duplicated in the child. We
-	 * duplicate the dmabuf accounting info into the child as well here.
+	 * No sharing: Both MM and FD references to dmabufs were already duplicated in the child by
+	 * copy_files and copy_mm. The parent's dmabuf_info has not been synchronized with the child
+	 * since that occurred, and VM_DONTCOPY can also prevent a parent's VMAs from propagating to
+	 * the child. Construct the dmabuf accounting info for the child here based on what actually
+	 * made it into the child's files_struct and mm_struct.
 	 */
-	child_dmabuf_info = dup_dma_buf_info(parent_dmabuf_info);
+	child_dmabuf_info = compute_dmabuf_info(task);
 	if (!child_dmabuf_info)
 		return -ENOMEM;
 
@@ -662,17 +688,6 @@ void put_dmabuf_info(struct task_dma_buf_info *dmabuf_info)
 	kfree(dmabuf_info);
 }
 
-#define COUNT_DMABUF_FDS(file_lookup_func) ({ \
-	size_t count = 0; \
-	unsigned int max_fds = files_fdtable(current->files)->max_fds; \
-	for (unsigned int n = 0; n < max_fds; ++n) { \
-		struct file *file = file_lookup_func(current->files, n); \
-		if (file && is_dma_buf_file(file)) \
-			++count; \
-	} \
-	count; \
-})
-
 /*
  * begin_new_exec is the starting point for the execution of a new program. It involves unsharing
  * files_struct (possibly creating a new one), and installs a new mm_struct. Since this modifies the
@@ -702,7 +717,7 @@ int dma_buf_begin_new_exec(struct files_struct *old_files)
 
 		/* Attempt to count dmabuf FDs locklessly before allocating */
 		rcu_read_lock();
-		num_dmabuf_fds = COUNT_DMABUF_FDS(files_lookup_fd_rcu);
+		num_dmabuf_fds = COUNT_DMABUF_FDS(current->files, files_lookup_fd_rcu);
 		rcu_read_unlock();
 retry:
 		if (!task_dmabuf_records_preload(num_dmabuf_fds))
@@ -711,7 +726,7 @@ int dma_buf_begin_new_exec(struct files_struct *old_files)
 		spin_lock(&my_files->file_lock);
 
 		/* First make sure we have enough preallocated records */
-		num_dmabuf_fds_check = COUNT_DMABUF_FDS(files_lookup_fd_locked);
+		num_dmabuf_fds_check = COUNT_DMABUF_FDS(current->files, files_lookup_fd_locked);
 
 		if (num_dmabuf_fds_check > num_dmabuf_fds) {
 			spin_unlock(&my_files->file_lock);