panfrost: Allocate syncobjs in panfrost_flush

For implementing panfrost_flush, it suffices to wait on only a single
syncobj, not an entire array of them. This lets us wait on it directly,
without coercing to/from syncfds in the middle (although some complexity
may be added later to support Android winsys).

Further, we should let the fence own the syncobj, tying together the
lifetimes and thus removing the connection between syncobjs and
batch_fence.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5995>
diff --git a/src/gallium/drivers/panfrost/pan_compute.c b/src/gallium/drivers/panfrost/pan_compute.c
index c11bab4..4abb414 100644
--- a/src/gallium/drivers/panfrost/pan_compute.c
+++ b/src/gallium/drivers/panfrost/pan_compute.c
@@ -135,7 +135,7 @@
         panfrost_new_job(&batch->pool, &batch->scoreboard,
                         JOB_TYPE_COMPUTE, true, 0, &payload,
                          sizeof(payload), false);
-        panfrost_flush_all_batches(ctx);
+        panfrost_flush_all_batches(ctx, 0);
 }
 
 static void
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index 884b803..8be2bb6 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -240,35 +240,18 @@
 {
         struct panfrost_context *ctx = pan_context(pipe);
         struct panfrost_device *dev = pan_device(pipe->screen);
-        struct util_dynarray fences;
+        uint32_t syncobj = 0;
 
-        /* We must collect the fences before the flush is done, otherwise we'll
-         * lose track of them.
-         */
-        if (fence) {
-                util_dynarray_init(&fences, NULL);
-                hash_table_foreach(ctx->batches, hentry) {
-                        struct panfrost_batch *batch = hentry->data;
-
-                        panfrost_batch_fence_reference(batch->out_sync);
-                        util_dynarray_append(&fences,
-                                             struct panfrost_batch_fence *,
-                                             batch->out_sync);
-                }
-        }
+        if (fence)
+                drmSyncobjCreate(dev->fd, 0, &syncobj);
 
         /* Submit all pending jobs */
-        panfrost_flush_all_batches(ctx);
+        panfrost_flush_all_batches(ctx, syncobj);
 
         if (fence) {
-                struct panfrost_fence *f = panfrost_fence_create(ctx, &fences);
+                struct panfrost_fence *f = panfrost_fence_create(ctx, syncobj);
                 pipe->screen->fence_reference(pipe->screen, fence, NULL);
                 *fence = (struct pipe_fence_handle *)f;
-
-                util_dynarray_foreach(&fences, struct panfrost_batch_fence *, fence)
-                        panfrost_batch_fence_unreference(*fence);
-
-                util_dynarray_fini(&fences);
         }
 
         if (dev->debug & PAN_DBG_TRACE)
@@ -279,7 +262,7 @@
 panfrost_texture_barrier(struct pipe_context *pipe, unsigned flags)
 {
         struct panfrost_context *ctx = pan_context(pipe);
-        panfrost_flush_all_batches(ctx);
+        panfrost_flush_all_batches(ctx, 0);
 }
 
 #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c;
@@ -1417,7 +1400,7 @@
 
         case PIPE_QUERY_PRIMITIVES_GENERATED:
         case PIPE_QUERY_PRIMITIVES_EMITTED:
-                panfrost_flush_all_batches(ctx);
+                panfrost_flush_all_batches(ctx, 0);
                 vresult->u64 = query->end - query->start;
                 break;
 
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index 7ba1170..dfe9837 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -82,7 +82,8 @@
 
 struct panfrost_fence {
         struct pipe_reference reference;
-        struct util_dynarray syncfds;
+        uint32_t syncobj;
+        bool signaled;
 };
 
 struct panfrost_streamout {
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 7075b43..c82e268 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -64,16 +64,12 @@
 panfrost_create_batch_fence(struct panfrost_batch *batch)
 {
         struct panfrost_batch_fence *fence;
-        ASSERTED int ret;
 
         fence = rzalloc(NULL, struct panfrost_batch_fence);
         assert(fence);
         pipe_reference_init(&fence->reference, 1);
         fence->ctx = batch->ctx;
         fence->batch = batch;
-        ret = drmSyncobjCreate(pan_device(batch->ctx->base.screen)->fd, 0,
-                               &fence->syncobj);
-        assert(!ret);
 
         return fence;
 }
@@ -81,8 +77,6 @@
 static void
 panfrost_free_batch_fence(struct panfrost_batch_fence *fence)
 {
-        drmSyncobjDestroy(pan_device(fence->ctx->base.screen)->fd,
-                          fence->syncobj);
         ralloc_free(fence);
 }
 
@@ -320,30 +314,12 @@
         return panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
 }
 
-static bool
-panfrost_batch_fence_is_signaled(struct panfrost_batch_fence *fence)
-{
-        if (fence->signaled)
-                return true;
-
-        /* Batch has not been submitted yet. */
-        if (fence->batch)
-                return false;
-
-        int ret = drmSyncobjWait(pan_device(fence->ctx->base.screen)->fd,
-                                 &fence->syncobj, 1, 0, 0, NULL);
-
-        /* Cache whether the fence was signaled */
-        fence->signaled = ret >= 0;
-        return fence->signaled;
-}
-
 static void
 panfrost_bo_access_gc_fences(struct panfrost_context *ctx,
                              struct panfrost_bo_access *access,
 			     const struct panfrost_bo *bo)
 {
-        if (access->writer && panfrost_batch_fence_is_signaled(access->writer)) {
+        if (access->writer) {
                 panfrost_batch_fence_unreference(access->writer);
                 access->writer = NULL;
         }
@@ -356,13 +332,8 @@
                 if (!(*reader))
                         continue;
 
-                if (panfrost_batch_fence_is_signaled(*reader)) {
-                        panfrost_batch_fence_unreference(*reader);
-                        *reader = NULL;
-                } else {
-                        /* Build a new array of only unsignaled fences in-place */
-                        *(new_readers++) = *reader;
-                }
+                panfrost_batch_fence_unreference(*reader);
+                *reader = NULL;
         }
 
         if (!util_dynarray_resize(&access->readers, struct panfrost_batch_fence *,
@@ -995,7 +966,8 @@
 static int
 panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
                             mali_ptr first_job_desc,
-                            uint32_t reqs)
+                            uint32_t reqs,
+                            uint32_t out_sync)
 {
         struct panfrost_context *ctx = batch->ctx;
         struct pipe_context *gallium = (struct pipe_context *) ctx;
@@ -1004,7 +976,19 @@
         uint32_t *bo_handles;
         int ret;
 
-        submit.out_sync = batch->out_sync->syncobj;
+        /* If we trace, we always need a syncobj, so make one of our own if we
+         * weren't given one to use. Remember that we did so, so we can free it
+         * after we're done but preventing double-frees if we were given a
+         * syncobj */
+
+        bool our_sync = false;
+
+        if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+                drmSyncobjCreate(dev->fd, 0, &out_sync);
+                our_sync = false;
+        }
+
+        submit.out_sync = out_sync;
         submit.jc = first_job_desc;
         submit.requirements = reqs;
 
@@ -1031,7 +1015,7 @@
         /* Trace the job if we're doing that */
         if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
                 /* Wait so we can get errors reported back */
-                drmSyncobjWait(dev->fd, &batch->out_sync->syncobj, 1,
+                drmSyncobjWait(dev->fd, &out_sync, 1,
                                INT64_MAX, 0, NULL);
 
                 /* Trace gets priority over sync */
@@ -1039,21 +1023,31 @@
                 pandecode_jc(submit.jc, dev->quirks & IS_BIFROST, dev->gpu_id, minimal);
         }
 
+        /* Cleanup if we created the syncobj */
+        if (our_sync)
+                drmSyncobjDestroy(dev->fd, out_sync);
+
         return 0;
 }
 
+/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
+ * outsync corresponding to the later of the two (since there will be an
+ * implicit dep between them) */
+
 static int
-panfrost_batch_submit_jobs(struct panfrost_batch *batch)
+panfrost_batch_submit_jobs(struct panfrost_batch *batch, uint32_t out_sync)
 {
         bool has_draws = batch->scoreboard.first_job;
+        bool has_frag = batch->scoreboard.tiler_dep || batch->clear;
         int ret = 0;
 
         if (has_draws) {
-                ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, 0);
+                ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
+                                0, has_frag ? 0 : out_sync);
                 assert(!ret);
         }
 
-        if (batch->scoreboard.tiler_dep || batch->clear) {
+        if (has_frag) {
                 /* Whether we program the fragment job for draws or not depends
                  * on whether there is any *tiler* activity (so fragment
                  * shaders). If there are draws but entirely RASTERIZER_DISCARD
@@ -1063,7 +1057,8 @@
 
                 mali_ptr fragjob = panfrost_fragment_job(batch,
                                 batch->scoreboard.tiler_dep != 0);
-                ret = panfrost_batch_submit_ioctl(batch, fragjob, PANFROST_JD_REQ_FS);
+                ret = panfrost_batch_submit_ioctl(batch, fragjob,
+                                PANFROST_JD_REQ_FS, out_sync);
                 assert(!ret);
         }
 
@@ -1071,16 +1066,17 @@
 }
 
 static void
-panfrost_batch_submit(struct panfrost_batch *batch)
+panfrost_batch_submit(struct panfrost_batch *batch, uint32_t out_sync)
 {
         assert(batch);
         struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
 
-        /* Submit the dependencies first. */
+        /* Submit the dependencies first. Don't pass along the out_sync since
+         * they are guaranteed to terminate sooner */
         util_dynarray_foreach(&batch->dependencies,
                               struct panfrost_batch_fence *, dep) {
                 if ((*dep)->batch)
-                        panfrost_batch_submit((*dep)->batch);
+                        panfrost_batch_submit((*dep)->batch, 0);
         }
 
         int ret;
@@ -1115,7 +1111,7 @@
 
         panfrost_scoreboard_initialize_tiler(&batch->pool, &batch->scoreboard, polygon_list);
 
-        ret = panfrost_batch_submit_jobs(batch);
+        ret = panfrost_batch_submit_jobs(batch, out_sync);
 
         if (ret && dev->debug & PAN_DBG_MSGS)
                 fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
@@ -1142,14 +1138,19 @@
         panfrost_free_batch(batch);
 }
 
+/* Submit all batches, applying the out_sync to the currently bound batch */
+
 void
-panfrost_flush_all_batches(struct panfrost_context *ctx)
+panfrost_flush_all_batches(struct panfrost_context *ctx, uint32_t out_sync)
 {
+        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
+        panfrost_batch_submit(batch, out_sync);
+
         hash_table_foreach(ctx->batches, hentry) {
                 struct panfrost_batch *batch = hentry->data;
                 assert(batch);
 
-                panfrost_batch_submit(batch);
+                panfrost_batch_submit(batch, 0);
         }
 
         assert(!ctx->batches->entries);
@@ -1198,7 +1199,7 @@
                 return;
 
         if (access->writer && access->writer->batch)
-                panfrost_batch_submit(access->writer->batch);
+                panfrost_batch_submit(access->writer->batch, 0);
 
         if (!flush_readers)
                 return;
@@ -1206,7 +1207,7 @@
         util_dynarray_foreach(&access->readers, struct panfrost_batch_fence *,
                               reader) {
                 if (*reader && (*reader)->batch)
-                        panfrost_batch_submit((*reader)->batch);
+                        panfrost_batch_submit((*reader)->batch, 0);
         }
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_job.h b/src/gallium/drivers/panfrost/pan_job.h
index 68a1b33..746862e 100644
--- a/src/gallium/drivers/panfrost/pan_job.h
+++ b/src/gallium/drivers/panfrost/pan_job.h
@@ -53,9 +53,6 @@
          */
         struct panfrost_context *ctx;
 
-        /* Sync object backing this fence. */
-        uint32_t syncobj;
-
         /* Cached value of the signaled state to avoid calling WAIT_SYNCOBJs
          * when we know the fence has already been signaled.
          */
@@ -168,7 +165,7 @@
                          uint32_t create_flags, uint32_t access_flags);
 
 void
-panfrost_flush_all_batches(struct panfrost_context *ctx);
+panfrost_flush_all_batches(struct panfrost_context *ctx, uint32_t out_sync);
 
 bool
 panfrost_pending_batches_access_bo(struct panfrost_context *ctx,
diff --git a/src/gallium/drivers/panfrost/pan_screen.c b/src/gallium/drivers/panfrost/pan_screen.c
index a9f7c4e..d5ffdda 100644
--- a/src/gallium/drivers/panfrost/pan_screen.c
+++ b/src/gallium/drivers/panfrost/pan_screen.c
@@ -571,14 +571,13 @@
                          struct pipe_fence_handle **ptr,
                          struct pipe_fence_handle *fence)
 {
+        struct panfrost_device *dev = pan_device(pscreen);
         struct panfrost_fence **p = (struct panfrost_fence **)ptr;
         struct panfrost_fence *f = (struct panfrost_fence *)fence;
         struct panfrost_fence *old = *p;
 
         if (pipe_reference(&(*p)->reference, &f->reference)) {
-                util_dynarray_foreach(&old->syncfds, int, fd)
-                        close(*fd);
-                util_dynarray_fini(&old->syncfds);
+                drmSyncobjDestroy(dev->fd, old->syncobj);
                 free(old);
         }
         *p = f;
@@ -592,68 +591,34 @@
 {
         struct panfrost_device *dev = pan_device(pscreen);
         struct panfrost_fence *f = (struct panfrost_fence *)fence;
-        struct util_dynarray syncobjs;
         int ret;
 
-        /* All fences were already signaled */
-        if (!util_dynarray_num_elements(&f->syncfds, int))
+        if (f->signaled)
                 return true;
 
-        util_dynarray_init(&syncobjs, NULL);
-        util_dynarray_foreach(&f->syncfds, int, fd) {
-                uint32_t syncobj;
-
-                ret = drmSyncobjCreate(dev->fd, 0, &syncobj);
-                assert(!ret);
-
-                ret = drmSyncobjImportSyncFile(dev->fd, syncobj, *fd);
-                assert(!ret);
-                util_dynarray_append(&syncobjs, uint32_t, syncobj);
-        }
-
         uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
         if (abs_timeout == OS_TIMEOUT_INFINITE)
                 abs_timeout = INT64_MAX;
 
-        ret = drmSyncobjWait(dev->fd, util_dynarray_begin(&syncobjs),
-                             util_dynarray_num_elements(&syncobjs, uint32_t),
+        ret = drmSyncobjWait(dev->fd, &f->syncobj,
+                             1,
                              abs_timeout, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
                              NULL);
 
-        util_dynarray_foreach(&syncobjs, uint32_t, syncobj)
-                drmSyncobjDestroy(dev->fd, *syncobj);
-
-        return ret >= 0;
+        f->signaled = (ret >= 0);
+        return f->signaled;
 }
 
 struct panfrost_fence *
 panfrost_fence_create(struct panfrost_context *ctx,
-                      struct util_dynarray *fences)
+                      uint32_t syncobj)
 {
-        struct panfrost_device *device = pan_device(ctx->base.screen);
         struct panfrost_fence *f = calloc(1, sizeof(*f));
         if (!f)
                 return NULL;
 
-        util_dynarray_init(&f->syncfds, NULL);
-
-        /* Export fences from all pending batches. */
-        util_dynarray_foreach(fences, struct panfrost_batch_fence *, fence) {
-                int fd = -1;
-
-                /* The fence is already signaled, no need to export it. */
-                if ((*fence)->signaled)
-                        continue;
-
-                drmSyncobjExportSyncFile(device->fd, (*fence)->syncobj, &fd);
-                if (fd == -1)
-                        fprintf(stderr, "export failed: %m\n");
-
-                assert(fd != -1);
-                util_dynarray_append(&f->syncfds, int, fd);
-        }
-
         pipe_reference_init(&f->reference, 1);
+        f->syncobj = syncobj;
 
         return f;
 }
diff --git a/src/gallium/drivers/panfrost/pan_screen.h b/src/gallium/drivers/panfrost/pan_screen.h
index 6fe6381..8f72185 100644
--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -63,7 +63,6 @@
 }
 
 struct panfrost_fence *
-panfrost_fence_create(struct panfrost_context *ctx,
-                      struct util_dynarray *fences);
+panfrost_fence_create(struct panfrost_context *ctx, uint32_t syncobj);
 
 #endif /* PAN_SCREEN_H */