gallium/u_threaded: merge consecutive draw calls within batches
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7056>
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 188c51e..093f9d5 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -87,6 +87,19 @@
}
}
+static bool
+is_next_call_a_mergeable_draw(struct tc_full_draw_info *first_info,
+ struct tc_call *next,
+ struct tc_full_draw_info **next_info)
+{
+ return next->call_id == TC_CALL_draw_vbo &&
+ (*next_info = (struct tc_full_draw_info*)&next->payload) &&
+ /* All fields must be the same except start and count. */
+ memcmp((uint32_t*)&first_info->draw + 2,
+ (uint32_t*)&(*next_info)->draw + 2,
+ sizeof(struct pipe_draw_info) - 8) == 0;
+}
+
static void
tc_batch_execute(void *job, UNUSED int thread_index)
{
@@ -98,10 +111,57 @@
assert(!batch->token);
- for (struct tc_call *iter = batch->call; iter != last;
- iter += iter->num_call_slots) {
+ for (struct tc_call *iter = batch->call; iter != last;) {
tc_assert(iter->sentinel == TC_SENTINEL);
+
+ /* Draw call merging. */
+ if (iter->call_id == TC_CALL_draw_vbo) {
+ struct tc_call *first = iter;
+ struct tc_call *next = first + first->num_call_slots;
+ struct tc_full_draw_info *first_info =
+ (struct tc_full_draw_info*)&first->payload;
+ struct tc_full_draw_info *next_info;
+
+ /* If at least 2 consecutive draw calls can be merged... */
+ if (next != last && next->call_id == TC_CALL_draw_vbo &&
+ first_info->draw.drawid == 0 &&
+ !first_info->draw.indirect &&
+ !first_info->draw.count_from_stream_output &&
+ is_next_call_a_mergeable_draw(first_info, next, &next_info)) {
+ /* Merge up to 256 draw calls. */
+ struct pipe_draw_start_count multi[256];
+ unsigned num_draws = 2;
+
+ multi[0].start = first_info->draw.start;
+ multi[0].count = first_info->draw.count;
+ multi[1].start = next_info->draw.start;
+ multi[1].count = next_info->draw.count;
+
+ if (next_info->draw.index_size)
+ pipe_resource_reference(&next_info->draw.index.resource, NULL);
+
+ /* Find how many other draws can be merged. */
+ next = next + next->num_call_slots;
+ for (; next != last && num_draws < ARRAY_SIZE(multi) &&
+ is_next_call_a_mergeable_draw(first_info, next, &next_info);
+ next += next->num_call_slots, num_draws++) {
+ multi[num_draws].start = next_info->draw.start;
+ multi[num_draws].count = next_info->draw.count;
+
+ if (next_info->draw.index_size)
+ pipe_resource_reference(&next_info->draw.index.resource, NULL);
+ }
+
+ pipe->multi_draw(pipe, &first_info->draw, multi, num_draws);
+ if (first_info->draw.index_size)
+ pipe_resource_reference(&first_info->draw.index.resource, NULL);
+ iter = next;
+ continue;
+ }
+ }
+
execute_func[iter->call_id](pipe, &iter->payload);
+ iter += iter->num_call_slots;
}
tc_batch_check(batch);
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index a2621d4..9a47a07 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -145,6 +145,8 @@
* another resource's backing storage. The threaded context uses it to
* implement buffer invalidation. This call is always queued.
*
+ * pipe_context::multi_draw() must be implemented.
+ *
*
* Performance gotchas
* -------------------