gallium/u_threaded: merge consecutive draw calls within batches

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7056>
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 188c51e..093f9d5 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -87,6 +87,19 @@
    }
 }
 
+static bool
+is_next_call_a_mergeable_draw(struct tc_full_draw_info *first_info,
+                              struct tc_call *next,
+                              struct tc_full_draw_info **next_info)
+{
+   return next->call_id == TC_CALL_draw_vbo &&
+          (*next_info = (struct tc_full_draw_info*)&next->payload) &&
+          /* All fields must be the same except start and count. */
+          memcmp((uint32_t*)&first_info->draw + 2,
+                 (uint32_t*)&(*next_info)->draw + 2,
+                 sizeof(struct pipe_draw_info) - 8) == 0;
+}
+
 static void
 tc_batch_execute(void *job, UNUSED int thread_index)
 {
@@ -98,10 +111,57 @@
 
    assert(!batch->token);
 
-   for (struct tc_call *iter = batch->call; iter != last;
-        iter += iter->num_call_slots) {
+   for (struct tc_call *iter = batch->call; iter != last;) {
       tc_assert(iter->sentinel == TC_SENTINEL);
+
+      /* Draw call merging. */
+      if (iter->call_id == TC_CALL_draw_vbo) {
+         struct tc_call *first = iter;
+         struct tc_call *next = first + first->num_call_slots;
+         struct tc_full_draw_info *first_info =
+            (struct tc_full_draw_info*)&first->payload;
+         struct tc_full_draw_info *next_info;
+
+         /* If at least 2 consecutive draw calls can be merged... */
+         if (next != last && next->call_id == TC_CALL_draw_vbo &&
+             first_info->draw.drawid == 0 &&
+             !first_info->draw.indirect &&
+             !first_info->draw.count_from_stream_output &&
+             is_next_call_a_mergeable_draw(first_info, next, &next_info)) {
+            /* Merge up to 256 draw calls. */
+            struct pipe_draw_start_count multi[256];
+            unsigned num_draws = 2;
+
+            multi[0].start = first_info->draw.start;
+            multi[0].count = first_info->draw.count;
+            multi[1].start = next_info->draw.start;
+            multi[1].count = next_info->draw.count;
+
+            if (next_info->draw.index_size)
+               pipe_resource_reference(&next_info->draw.index.resource, NULL);
+
+            /* Find how many other draws can be merged. */
+            next = next + next->num_call_slots;
+            for (; next != last && num_draws < ARRAY_SIZE(multi) &&
+                 is_next_call_a_mergeable_draw(first_info, next, &next_info);
+                 next += next->num_call_slots, num_draws++) {
+               multi[num_draws].start = next_info->draw.start;
+               multi[num_draws].count = next_info->draw.count;
+
+               if (next_info->draw.index_size)
+                  pipe_resource_reference(&next_info->draw.index.resource, NULL);
+            }
+
+            pipe->multi_draw(pipe, &first_info->draw, multi, num_draws);
+            if (first_info->draw.index_size)
+               pipe_resource_reference(&first_info->draw.index.resource, NULL);
+            iter = next;
+            continue;
+         }
+      }
+
       execute_func[iter->call_id](pipe, &iter->payload);
+      iter += iter->num_call_slots;
    }
 
    tc_batch_check(batch);
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index a2621d4..9a47a07 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -145,6 +145,8 @@
  *    another resource's backing storage. The threaded context uses it to
  *    implement buffer invalidation. This call is always queued.
  *
+ * pipe_context::multi_draw() must be implemented.
+ *
  *
  * Performance gotchas
  * -------------------