panfrost: XML-ify the local storage descriptor
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6797>
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 5a872ae..ddeb4f5 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -57,21 +57,25 @@
{
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
- struct mali_shared_memory shared = {
- .shared_workgroup_count = ~0,
- };
+ struct panfrost_transfer t =
+ panfrost_pool_alloc_aligned(&batch->pool,
+ MALI_LOCAL_STORAGE_LENGTH,
+ 64);
- if (batch->stack_size) {
- struct panfrost_bo *stack =
- panfrost_batch_get_scratchpad(batch, batch->stack_size,
- dev->thread_tls_alloc,
- dev->core_count);
+ pan_pack(t.cpu, LOCAL_STORAGE, ls) {
+ ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+ if (batch->stack_size) {
+ struct panfrost_bo *stack =
+ panfrost_batch_get_scratchpad(batch, batch->stack_size,
+ dev->thread_tls_alloc,
+ dev->core_count);
- shared.stack_shift = panfrost_get_stack_shift(batch->stack_size);
- shared.scratchpad = stack->gpu;
+ ls.tls_size = panfrost_get_stack_shift(batch->stack_size);
+ ls.tls_base_pointer = stack->gpu;
+ }
}
- return panfrost_pool_upload_aligned(&batch->pool, &shared, sizeof(shared), 64);
+ return t.gpu;
}
void
@@ -950,15 +954,18 @@
struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
shared_size,
1);
+ struct panfrost_transfer t =
+ panfrost_pool_alloc_aligned(&batch->pool,
+ MALI_LOCAL_STORAGE_LENGTH,
+ 64);
- struct mali_shared_memory shared = {
- .shared_memory = bo->gpu,
- .shared_workgroup_count = log2_instances,
- .shared_shift = util_logbase2(single_size) + 1
+ pan_pack(t.cpu, LOCAL_STORAGE, ls) {
+ ls.wls_base_pointer = bo->gpu;
+ ls.wls_instances = log2_instances;
+ ls.wls_size_scale = util_logbase2(single_size) + 1;
};
- return panfrost_pool_upload_aligned(&batch->pool, &shared,
- sizeof(shared), 64);
+ return t.gpu;
}
static mali_ptr
diff --git a/src/gallium/drivers/panfrost/pan_mfbd.c b/src/gallium/drivers/panfrost/pan_mfbd.c
index deccd3a..fad3f02 100644
--- a/src/gallium/drivers/panfrost/pan_mfbd.c
+++ b/src/gallium/drivers/panfrost/pan_mfbd.c
@@ -498,18 +498,24 @@
mfbd.msaa.sample_locations = panfrost_emit_sample_locations(batch);
mfbd.tiler_meta = panfrost_batch_get_tiler_meta(batch, vertex_count);
} else {
- if (batch->stack_size) {
- unsigned shift = panfrost_get_stack_shift(batch->stack_size);
- struct panfrost_bo *bo = panfrost_batch_get_scratchpad(batch,
- batch->stack_size,
- dev->thread_tls_alloc,
- dev->core_count);
- mfbd.shared_memory.stack_shift = shift;
- mfbd.shared_memory.scratchpad = bo->gpu;
+ struct mali_local_storage_packed lsp;
+
+ pan_pack(&lsp, LOCAL_STORAGE, ls) {
+ if (batch->stack_size) {
+ unsigned shift =
+ panfrost_get_stack_shift(batch->stack_size);
+ struct panfrost_bo *bo =
+ panfrost_batch_get_scratchpad(batch,
+ batch->stack_size,
+ dev->thread_tls_alloc,
+ dev->core_count);
+ ls.tls_size = shift;
+ ls.tls_base_pointer = bo->gpu;
+ }
+
+ ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
}
-
- mfbd.shared_memory.shared_workgroup_count = ~0;
-
+ mfbd.shared_memory = lsp;
mfbd.tiler = panfrost_emit_midg_tiler(batch, vertex_count);
}
diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c
index d9173ba..ce68740 100644
--- a/src/gallium/drivers/panfrost/pan_sfbd.c
+++ b/src/gallium/drivers/panfrost/pan_sfbd.c
@@ -207,11 +207,6 @@
struct mali_single_framebuffer framebuffer = {
.width = MALI_POSITIVE(width),
.height = MALI_POSITIVE(height),
- .shared_memory = {
- .stack_shift = shift,
- .shared_workgroup_count = ~0,
- .scratchpad = panfrost_batch_get_scratchpad(batch, shift, dev->thread_tls_alloc, dev->core_count)->gpu,
- },
.format = {
.unk3 = 0x3,
},
@@ -219,6 +214,18 @@
.tiler = panfrost_emit_midg_tiler(batch, vertex_count),
};
+ struct mali_local_storage_packed lsp;
+ pan_pack(&lsp, LOCAL_STORAGE, ls) {
+ ls.tls_size = shift;
+ ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+ ls.tls_base_pointer =
+ panfrost_batch_get_scratchpad(batch,
+ shift,
+ dev->thread_tls_alloc,
+ dev->core_count)->gpu;
+ }
+ framebuffer.shared_memory = lsp;
+
return framebuffer;
}
diff --git a/src/panfrost/bifrost/test/bi_submit.c b/src/panfrost/bifrost/test/bi_submit.c
index c953e87..408171f 100644
--- a/src/panfrost/bifrost/test/bi_submit.c
+++ b/src/panfrost/bifrost/test/bi_submit.c
@@ -169,11 +169,10 @@
memcpy(attr->cpu + 1024, iattr, sz_attr);
struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
- struct mali_shared_memory shmemp = {
- .shared_workgroup_count = 0x1f,
- };
- memcpy(shmem->cpu, &shmemp, sizeof(shmemp));
+ pan_pack(shmem->cpu, LOCAL_STORAGE, cfg) {
+ cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
+ }
pan_pack(shader_desc->cpu, STATE, cfg) {
cfg.shader.shader = shader->gpu;
diff --git a/src/panfrost/include/panfrost-job.h b/src/panfrost/include/panfrost-job.h
index 634f62b..5be6253 100644
--- a/src/panfrost/include/panfrost-job.h
+++ b/src/panfrost/include/panfrost-job.h
@@ -630,37 +630,6 @@
unsigned unk3 : 4;
};
-/* Shared structure at the start of framebuffer descriptors, or used bare for
- * compute jobs, configuring stack and shared memory */
-
-struct mali_shared_memory {
- u32 stack_shift : 4;
- u32 unk0 : 28;
-
- /* Configuration for shared memory for compute shaders.
- * shared_workgroup_count is logarithmic and may be computed for a
- * compute shader using shared memory as:
- *
- * shared_workgroup_count = MAX2(ceil(log2(count_x)) + ... + ceil(log2(count_z), 10)
- *
- * For compute shaders that don't use shared memory, or non-compute
- * shaders, this is set to ~0
- */
-
- u32 shared_workgroup_count : 5;
- u32 shared_unk1 : 3;
- u32 shared_shift : 4;
- u32 shared_zero : 20;
-
- mali_ptr scratchpad;
-
- /* For compute shaders, the RAM backing of workgroup-shared memory. For
- * fragment shaders on Bifrost, apparently multisampling locations */
-
- mali_ptr shared_memory;
- mali_ptr unknown1;
-} __attribute__((packed));
-
/* Configures multisampling on Bifrost fragment jobs */
struct bifrost_multisampling {
@@ -671,7 +640,7 @@
} __attribute__((packed));
struct mali_single_framebuffer {
- struct mali_shared_memory shared_memory;
+ struct mali_local_storage_packed shared_memory;
struct mali_sfbd_format format;
u32 clear_flags;
@@ -876,7 +845,7 @@
struct mali_framebuffer {
union {
- struct mali_shared_memory shared_memory;
+ struct mali_local_storage_packed shared_memory;
struct bifrost_multisampling msaa;
};
diff --git a/src/panfrost/lib/decode.c b/src/panfrost/lib/decode.c
index 12f9f8e..463a779 100644
--- a/src/panfrost/lib/decode.c
+++ b/src/panfrost/lib/decode.c
@@ -446,41 +446,6 @@
pandecode_log("},\n");
}
-static void
-pandecode_shared_memory(const struct mali_shared_memory *desc, bool is_compute)
-{
- pandecode_prop("stack_shift = 0x%x", desc->stack_shift);
-
- if (desc->unk0)
- pandecode_prop("unk0 = 0x%x", desc->unk0);
-
- if (desc->shared_workgroup_count != 0x1F) {
- pandecode_prop("shared_workgroup_count = %d", desc->shared_workgroup_count);
- if (!is_compute)
- pandecode_msg("XXX: wrong workgroup count for noncompute\n");
- }
-
- if (desc->shared_unk1 || desc->shared_shift) {
- pandecode_prop("shared_unk1 = %X", desc->shared_unk1);
- pandecode_prop("shared_shift = %X", desc->shared_shift);
-
- if (!is_compute)
- pandecode_msg("XXX: shared memory configured in noncompute shader");
- }
-
- if (desc->shared_zero) {
- pandecode_msg("XXX: shared memory zero tripped\n");
- pandecode_prop("shared_zero = 0x%" PRIx32, desc->shared_zero);
- }
-
- if (desc->shared_memory && !is_compute)
- pandecode_msg("XXX: shared memory used in noncompute shader\n");
-
- MEMORY_PROP(desc, scratchpad);
- MEMORY_PROP(desc, shared_memory);
- MEMORY_PROP(desc, unknown1);
-}
-
static struct pandecode_fbd
pandecode_sfbd(uint64_t gpu_va, int job_no, bool is_fragment, unsigned gpu_id)
{
@@ -494,13 +459,7 @@
pandecode_log("struct mali_single_framebuffer framebuffer_%"PRIx64"_%d = {\n", gpu_va, job_no);
pandecode_indent++;
-
- pandecode_log(".shared_memory = {\n");
- pandecode_indent++;
- pandecode_shared_memory(&s->shared_memory, false);
- pandecode_indent--;
- pandecode_log("},\n");
-
+ DUMP_CL(LOCAL_STORAGE, &s->shared_memory, "Local Storage:\n");
pandecode_sfbd_format(s->format);
info.width = s->width + 1;
@@ -599,13 +558,8 @@
pandecode_compute_fbd(uint64_t gpu_va, int job_no)
{
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
- const struct mali_shared_memory *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
-
- pandecode_log("struct mali_shared_memory shared_%"PRIx64"_%d = {\n", gpu_va, job_no);
- pandecode_indent++;
- pandecode_shared_memory(s, true);
- pandecode_indent--;
- pandecode_log("},\n");
+ const struct mali_local_storage_packed *PANDECODE_PTR_VAR(s, mem, (mali_ptr) gpu_va);
+ DUMP_CL(LOCAL_STORAGE, s, "Local Storage:\n");
}
/* Extracts the number of components associated with a Mali format */
@@ -872,11 +826,8 @@
pandecode_indent--;
pandecode_log("},\n");
} else {
- pandecode_log(".shared_memory = {\n");
- pandecode_indent++;
- pandecode_shared_memory(&fb->shared_memory, is_compute);
- pandecode_indent--;
- pandecode_log("},\n");
+ struct mali_local_storage_packed ls = fb->shared_memory;
+ DUMP_CL(LOCAL_STORAGE, &ls, "Local Storage:\n");
}
info.width = fb->width1 + 1;
diff --git a/src/panfrost/lib/midgard.xml b/src/panfrost/lib/midgard.xml
index 6fb896d..e27f880 100644
--- a/src/panfrost/lib/midgard.xml
+++ b/src/panfrost/lib/midgard.xml
@@ -561,4 +561,15 @@
<field name="Scissor Maximum Y" size="16" start="7:16" type="uint"/>
</struct>
+ <struct name="Local Storage" size="8">
+ <field name="TLS Size" size="5" start="0:0" type="uint"/>
+ <field name="TLS Initial Stack Pointer Offset" size="27" start="0:5" type="uint"/>
+ <field name="WLS Instances" size="5" start="1:0" type="uint" modifier="log2" prefix="MALI_LOCAL_STORAGE">
+ <value name="No Workgroup Mem" value="0x80000000"/>
+ </field>
+ <field name="WLS Size Base" size="2" start="1:5" type="uint"/>
+ <field name="WLS Size Scale" size="5" start="1:8" type="uint"/>
+ <field name="TLS Base Pointer" size="64" start="2:0" type="address"/>
+ <field name="WLS Base Pointer" size="64" start="4:0" type="address"/>
+ </struct>
</panxml>