pan/bi: Add load_output support
This is mapped to the LD_TILE instruction. Note that multi-sample RTs
are not supported yet.
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7151>
diff --git a/src/panfrost/bifrost/bi_pack.c b/src/panfrost/bifrost/bi_pack.c
index f6e18fc..86b7efc 100644
--- a/src/panfrost/bifrost/bi_pack.c
+++ b/src/panfrost/bifrost/bi_pack.c
@@ -831,6 +831,8 @@
}
case BI_LOAD_VAR_ADDRESS:
return pan_pack_add_lea_attr_imm(clause, bundle.add, regs);
+ case BI_LOAD_TILE:
+ return pan_pack_add_ld_tile(clause, bundle.add, regs);
case BI_MINMAX:
if (bundle.add->op.minmax == BI_MINMAX_MIN) {
if (bundle.add->dest_type == nir_type_float32)
diff --git a/src/panfrost/bifrost/bi_print.c b/src/panfrost/bifrost/bi_print.c
index f95295c..a677c3b 100644
--- a/src/panfrost/bifrost/bi_print.c
+++ b/src/panfrost/bifrost/bi_print.c
@@ -62,6 +62,7 @@
case BI_LOAD_ATTR: return "load_attr";
case BI_LOAD_VAR: return "load_var";
case BI_LOAD_VAR_ADDRESS: return "load_var_address";
+ case BI_LOAD_TILE: return "load_tile";
case BI_MINMAX: return "minmax";
case BI_MOV: return "mov";
case BI_SELECT: return "select";
diff --git a/src/panfrost/bifrost/bi_schedule.c b/src/panfrost/bifrost/bi_schedule.c
index 3fd1dfb..4a05527 100644
--- a/src/panfrost/bifrost/bi_schedule.c
+++ b/src/panfrost/bifrost/bi_schedule.c
@@ -77,6 +77,9 @@
case BI_BLEND:
return BIFROST_MESSAGE_BLEND;
+ case BI_LOAD_TILE:
+ return BIFROST_MESSAGE_TILE;
+
case BI_ATEST:
return BIFROST_MESSAGE_ATEST;
diff --git a/src/panfrost/bifrost/bi_tables.c b/src/panfrost/bifrost/bi_tables.c
index 90862c8e..19352e5 100644
--- a/src/panfrost/bifrost/bi_tables.c
+++ b/src/panfrost/bifrost/bi_tables.c
@@ -45,6 +45,7 @@
[BI_LOAD_ATTR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_LOAD_VAR_ADDRESS] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
+ [BI_LOAD_TILE] = BI_SCHED_HI_LATENCY | BI_SCHED_ADD | BI_VECTOR | BI_DATA_REG_DEST,
[BI_MINMAX] = BI_SCHED_ADD | BI_NO_ABS_ABS_FP16_FMA | BI_MODS,
[BI_MOV] = BI_SCHED_ALL,
[BI_FMOV] = BI_MODS | BI_SCHED_ALL,
diff --git a/src/panfrost/bifrost/bifrost.h b/src/panfrost/bifrost/bifrost.h
index 55148c8..6d03a58 100644
--- a/src/panfrost/bifrost/bifrost.h
+++ b/src/panfrost/bifrost/bifrost.h
@@ -540,4 +540,15 @@
unsigned mask : 4;
} __attribute__((packed));
+#define BIFROST_MEGA_SAMPLE 128
+#define BIFROST_ALL_SAMPLES 255
+#define BIFROST_CURRENT_PIXEL 255
+
+struct bifrost_pixel_indices {
+ unsigned sample : 8;
+ unsigned rt : 8;
+ unsigned x : 8;
+ unsigned y : 8;
+} __attribute__((packed));
+
#endif
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index 3e3df21..b8191d9 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -105,6 +105,46 @@
}
static void
+bi_emit_ld_output(bi_context *ctx, nir_intrinsic_instr *instr)
+{
+ assert(ctx->is_blend);
+
+ bi_instruction ins = {
+ .type = BI_LOAD_TILE,
+ .vector_channels = instr->num_components,
+ .dest = pan_dest_index(&instr->dest),
+ .dest_type = nir_type_float16,
+ .src = {
+ /* PixelIndices */
+ BIR_INDEX_CONSTANT,
+ /* PixelCoverage: we simply pass r60 which contains the cumulative
+ * coverage bitmap
+ */
+ BIR_INDEX_REGISTER | 60,
+ /* InternalConversionDescriptor (see src/panfrost/lib/midgard.xml for more
+ * details)
+ */
+ BIR_INDEX_CONSTANT | 32
+ },
+ .src_types = { nir_type_uint32, nir_type_uint32, nir_type_uint32 },
+ };
+
+ /* We want to load the current pixel.
+ * FIXME: The sample to load is currently hardcoded to 0. This should
+ * be addressed for multi-sample FBs.
+ */
+ struct bifrost_pixel_indices pix = {
+ .y = BIFROST_CURRENT_PIXEL,
+ };
+ memcpy(&ins.constant.u64, &pix, sizeof(pix));
+
+ /* Only keep the conversion part of the blend descriptor. */
+ ins.constant.u64 |= ctx->blend_desc & 0xffffffff00000000ULL;
+
+ bi_emit(ctx, ins);
+}
+
+static void
bi_emit_ld_vary(bi_context *ctx, nir_intrinsic_instr *instr)
{
bi_instruction ins = bi_load(BI_LOAD_VAR, instr);
@@ -488,6 +528,10 @@
bi_emit_sysval(ctx, &instr->instr, 1, 8);
break;
+ case nir_intrinsic_load_output:
+ bi_emit_ld_output(ctx, instr);
+ break;
+
case nir_intrinsic_load_viewport_scale:
case nir_intrinsic_load_viewport_offset:
case nir_intrinsic_load_num_work_groups:
diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h
index 350a0d4..7a2a1fd 100644
--- a/src/panfrost/bifrost/compiler.h
+++ b/src/panfrost/bifrost/compiler.h
@@ -66,6 +66,7 @@
BI_LOAD_ATTR,
BI_LOAD_VAR,
BI_LOAD_VAR_ADDRESS,
+ BI_LOAD_TILE,
BI_MINMAX,
BI_MOV,
BI_REDUCE_FMA,