lima: allocate separate bo to store varyings The current strategy using the suballocator with fixed size doesn't scale and causes some programs with large number of vertices (like some glmark2 scenes) to crash. Change it to dynamically allocate a separate bo to accomodate for arbitrary number of vertices. This also fixes the buffer read/write flags for gp. Signed-off-by: Erico Nunes <nunes.erico@gmail.com> Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com> Reviewed-by: Andreas Baierl <ichgeh@imkreisrum.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/2445>

commit: 270c282a43a2dc30558ebb709d4a25f8dbc71a58 [log] [tgz]
author: Erico Nunes <nunes.erico@gmail.com> Thu Oct 24 00:27:22 2019 +0200
committer: Erico Nunes <nunes.erico@gmail.com> Sat Dec 14 07:44:43 2019 +0100
tree: e6fc0d7f46402b5a42a147aef59174a659272a92
parent: 8bf2b5db786b8608ddd7c83fffa695ae011bf6b3 [diff]
diff --git a/src/gallium/drivers/lima/lima_context.h b/src/gallium/drivers/lima/lima_context.h
index 7a0e7e8..abb3500 100644
--- a/src/gallium/drivers/lima/lima_context.h
+++ b/src/gallium/drivers/lima/lima_context.h

@@ -121,7 +121,6 @@
 };
 
 enum lima_ctx_buff {
-   lima_ctx_buff_sh_varying,
    lima_ctx_buff_sh_gl_pos,
    lima_ctx_buff_sh_gl_point_size,
    lima_ctx_buff_gp_varying_info,
@@ -227,6 +226,7 @@
    struct lima_bo *gp_tile_heap[LIMA_CTX_PLB_MAX_NUM];
    #define gp_tile_heap_size         0x100000
    struct lima_bo *plb_gp_stream;
+   struct lima_bo *sh_varying;
 
    struct hash_table *plb_pp_stream;
    uint32_t plb_index;

diff --git a/src/gallium/drivers/lima/lima_draw.c b/src/gallium/drivers/lima/lima_draw.c
index 02d6baa..2e93e52 100644
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c

@@ -1118,8 +1118,8 @@
 
    if (ctx->vs->num_varyings) {
       render->varying_types = 0x00000000;
-      render->varyings_address =
-         lima_ctx_buff_va(ctx, lima_ctx_buff_sh_varying, LIMA_CTX_BUFF_SUBMIT_PP);
+      render->varyings_address = ctx->sh_varying->va;
+      lima_submit_add_bo(ctx->pp_submit, ctx->sh_varying, LIMA_SUBMIT_BO_READ);
       for (int i = 0, index = 0; i < ctx->vs->num_outputs; i++) {
          int val;
 
@@ -1257,6 +1257,7 @@
 static void
 lima_update_varying(struct lima_context *ctx, const struct pipe_draw_info *info)
 {
+   struct lima_screen *screen = lima_screen(ctx->base.screen);
    struct lima_vs_shader_state *vs = ctx->vs;
 
    uint32_t *varying =
@@ -1290,9 +1291,14 @@
 
    vs->varying_stride = align(offset, 16);
 
-   if (vs->num_varyings)
-      lima_ctx_buff_alloc(ctx, lima_ctx_buff_sh_varying,
-                          vs->varying_stride * info->count, false);
+   if (vs->num_varyings) {
+      /* sh_varying can be too large for the suballocators, so create a
+       * separate bo for it. The bo cache should prevent a performance hit. */
+      ctx->sh_varying = lima_bo_create(screen,
+                                       vs->varying_stride * info->count, 0);
+      assert(ctx->sh_varying);
+      lima_submit_add_bo(ctx->gp_submit, ctx->sh_varying, LIMA_SUBMIT_BO_WRITE);
+   }
 
    for (int i = 0; i < vs->num_outputs; i++) {
       struct lima_varying_info *v = vs->varying + i;
@@ -1313,9 +1319,7 @@
          varying[n++] = 0x2021;
       } else {
          /* Varying */
-         varying[n++] =
-            lima_ctx_buff_va(ctx, lima_ctx_buff_sh_varying, LIMA_CTX_BUFF_SUBMIT_GP) +
-            v->offset;
+         varying[n++] = ctx->sh_varying->va + v->offset;
          varying[n++] = (vs->varying_stride << 11) | (v->components - 1) |
             (v->component_size == 2 ? 0x0C : 0);
       }
@@ -1396,6 +1400,11 @@
    lima_pack_render_state(ctx, info);
    lima_pack_plbu_cmd(ctx, info);
 
+   if (ctx->sh_varying) {
+      lima_bo_unreference(ctx->sh_varying); /* held by submit */
+      ctx->sh_varying = NULL;
+   }
+
    ctx->dirty = 0;
 }
commit	270c282a43a2dc30558ebb709d4a25f8dbc71a58	[log] [tgz]
author	Erico Nunes <nunes.erico@gmail.com>	Thu Oct 24 00:27:22 2019 +0200
committer	Erico Nunes <nunes.erico@gmail.com>	Sat Dec 14 07:44:43 2019 +0100
tree	e6fc0d7f46402b5a42a147aef59174a659272a92
parent	8bf2b5db786b8608ddd7c83fffa695ae011bf6b3 [diff]