panfrost: add atomic ops infrastructure

Signed-off-by: Italo Nicola <italonicola@collabora.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6439>
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 06ed3f6..5c22ba7 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -667,6 +667,15 @@
         return false;
 }
 
+#define ATOMIC_CASE_IMPL(ctx, instr, nir, op, is_shared) \
+        case nir_intrinsic_##nir: \
+                emit_atomic(ctx, instr, is_shared, midgard_op_##op); \
+                break;
+
+#define ATOMIC_CASE(ctx, instr, nir, op) \
+        ATOMIC_CASE_IMPL(ctx, instr, shared_atomic_##nir, atomic_##op, true); \
+        ATOMIC_CASE_IMPL(ctx, instr, global_atomic_##nir, atomic_##op, false);
+
 #define ALU_CASE(nir, _op) \
 	case nir_op_##nir: \
 		op = midgard_alu_op_##_op; \
@@ -1370,6 +1379,60 @@
         emit_mir_instruction(ctx, ins);
 }
 
+/* If is_shared is off, the only other possible value are globals, since
+ * SSBO's are being lowered to globals through a NIR pass. */
+static void
+emit_atomic(
+        compiler_context *ctx,
+        nir_intrinsic_instr *instr,
+        bool is_shared,
+        midgard_load_store_op op)
+{
+        unsigned bitsize = nir_src_bit_size(instr->src[1]);
+        nir_alu_type type =
+                (op == midgard_op_atomic_imin || op == midgard_op_atomic_imax) ?
+                nir_type_int : nir_type_uint;
+
+        unsigned dest = nir_dest_index(&instr->dest);
+        unsigned val = nir_src_index(ctx, &instr->src[1]);
+        emit_explicit_constant(ctx, val, val);
+
+        midgard_instruction ins = {
+                .type = TAG_LOAD_STORE_4,
+                .mask = 0xF,
+                .dest = dest,
+                .src = { ~0, ~0, ~0, val },
+                .src_types = { 0, 0, 0, type | bitsize },
+                .op = op
+        };
+
+        nir_src *src_offset = nir_get_io_offset_src(instr);
+
+        /* cmpxchg takes an extra value in arg_2, so we don't use it for the offset */
+        if (op == midgard_op_atomic_cmpxchg) {
+                unsigned addr = nir_src_index(ctx, src_offset);
+
+                ins.src[1] = addr;
+                ins.src_types[1] = nir_type_uint | nir_src_bit_size(*src_offset);
+
+                unsigned xchg_val = nir_src_index(ctx, &instr->src[2]);
+                emit_explicit_constant(ctx, xchg_val, xchg_val);
+
+                ins.src[2] = val;
+                ins.src_types[2] = type | bitsize;
+                ins.src[3] = xchg_val;
+
+                if (is_shared)
+                        ins.load_store.arg_1 |= 0x6E;
+        } else {
+                mir_set_offset(ctx, &ins, src_offset, is_shared);
+        }
+
+        mir_set_intr_mask(&instr->instr, &ins, true);
+
+        emit_mir_instruction(ctx, ins);
+}
+
 static void
 emit_varying_read(
         compiler_context *ctx,
diff --git a/src/panfrost/midgard/midgard_emit.c b/src/panfrost/midgard/midgard_emit.c
index a0fbe20..213b702 100644
--- a/src/panfrost/midgard/midgard_emit.c
+++ b/src/panfrost/midgard/midgard_emit.c
@@ -499,6 +499,15 @@
                 ldst.reg = SSA_REG_FROM_FIXED(ins->dest);
         }
 
+        /* Atomic opcode swizzles have a special meaning:
+         *   - The first two bits say which component of the implicit register should be used
+         *   - The next two bits say if the implicit register is r26 or r27 */
+        if (OP_IS_ATOMIC(ins->op)) {
+                ldst.swizzle = 0;
+                ldst.swizzle |= ins->swizzle[3][0] & 3;
+                ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2;
+        }
+
         if (ins->src[1] != ~0) {
                 unsigned src = SSA_REG_FROM_FIXED(ins->src[1]);
                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
@@ -855,7 +864,9 @@
                 for (unsigned i = 0; i < bundle->instruction_count; ++i) {
                         mir_pack_ldst_mask(bundle->instructions[i]);
 
-                        mir_pack_swizzle_ldst(bundle->instructions[i]);
+                        /* Atomic ops don't use this swizzle the same way as other ops */
+                        if (!OP_IS_ATOMIC(bundle->instructions[i]->op))
+                                mir_pack_swizzle_ldst(bundle->instructions[i]);
 
                         /* Apply a constant offset */
                         unsigned offset = bundle->instructions[i]->constants.u32[0];