r600/sfn: Handle mem barrier and image barrier by using ACK

When reading from and writing to the same image in a shader the
memory_barrier can possibly be handled by emitting an ack-write and then
wait for the ack when the memory barrier is set.

Not sure whow well this goes with the syncronization across all shader
invocations though.

Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7142>
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
index ddeb3a9..9200307 100644
--- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.cpp
@@ -100,6 +100,11 @@
       return emit_image_size(intr);
    case nir_intrinsic_get_ssbo_size:
       return emit_buffer_size(intr);
+   case nir_intrinsic_memory_barrier:
+   case nir_intrinsic_memory_barrier_image:
+   case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_group_memory_barrier:
+      return make_stores_ack_and_waitack();
    default:
       return false;
    }
@@ -352,17 +357,21 @@
    auto values = vec_from_nir_with_fetch_constant(instr->src[0],
          (1 << nir_src_num_components(instr->src[0])) - 1, {0,1,2,3}, true);
 
-   emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
-                                       values, addr_vec, m_ssbo_image_offset, rat_id, 1,
-                                       1, 0, false));
+   auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
+                                   values, addr_vec, m_ssbo_image_offset, rat_id, 1,
+                                   1, 0, false);
+   emit_instruction(store);
+   m_store_ops.push_back(store);
 
    for (unsigned i = 1; i < nir_src_num_components(instr->src[0]); ++i) {
       emit_instruction(new AluInstruction(op1_mov, temp2.reg_i(0), from_nir(instr->src[0], i), write));
       emit_instruction(new AluInstruction(op2_add_int, addr_vec.reg_i(0),
                                           {addr_vec.reg_i(0), Value::one_i}, last_write));
-      emit_instruction(new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
-                                          temp2, addr_vec, 0, rat_id, 1,
-                                          1, 0, false));
+      store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED,
+                                 temp2, addr_vec, 0, rat_id, 1,
+                                 1, 0, false);
+      emit_instruction(store);
+      m_store_ops.push_back(store);
    }
 #endif
    return true;
@@ -392,6 +401,8 @@
 
    auto store = new RatInstruction(cf_mem_rat, RatInstruction::STORE_TYPED, value, coord, imageid,
                                    image_offset, 1, 0xf, 0, false);
+
+   m_store_ops.push_back(store);
    emit_instruction(store);
    return true;
 }
@@ -617,6 +628,19 @@
    return true;
 }
 
+bool EmitSSBOInstruction::make_stores_ack_and_waitack()
+{
+   for (auto&& store: m_store_ops)
+      store->set_ack();
+
+   if (!m_store_ops.empty())
+      emit_instruction(new WaitAck(0));
+
+   m_store_ops.clear();
+
+   return true;
+}
+
 GPRVector EmitSSBOInstruction::make_dest(const nir_intrinsic_instr* ir)
 {
    GPRVector::Values v;
diff --git a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
index f289c5d..56e0e31 100644
--- a/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
+++ b/src/gallium/drivers/r600/sfn/sfn_emitssboinstruction.h
@@ -37,6 +37,8 @@
 
    bool fetch_return_value(const nir_intrinsic_instr *intrin);
 
+   bool make_stores_ack_and_waitack();
+
    ESDOp get_opcode(nir_intrinsic_op opcode);
    RatInstruction::ERatOp get_rat_opcode(const nir_intrinsic_op opcode, pipe_format format) const;
 
@@ -47,6 +49,7 @@
    bool m_require_rat_return_address;
    GPRVector m_rat_return_address;
    int m_ssbo_image_offset;
+   std::vector<RatInstruction *> m_store_ops;
 };
 
 }
diff --git a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
index 2a3d408..46d4280 100644
--- a/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
+++ b/src/gallium/drivers/r600/sfn/sfn_instruction_gds.h
@@ -168,6 +168,8 @@
 
    int data_swz(int chan) const {return m_data.chan_i(chan);}
 
+   void set_ack() {m_need_ack = true; }
+
 private:
 
    bool is_equal_to(const Instruction& lhs) const override;
diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
index 60e4bc2..5799616 100644
--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
@@ -678,8 +678,8 @@
    case nir_intrinsic_control_barrier:
    case nir_intrinsic_memory_barrier_tcs_patch:
    case nir_intrinsic_memory_barrier_shared:
-   case nir_intrinsic_memory_barrier:
    case nir_intrinsic_memory_barrier_buffer:
+   case nir_intrinsic_memory_barrier:
    case nir_intrinsic_memory_barrier_image:
    case nir_intrinsic_group_memory_barrier:
       return emit_barrier(instr);