intel/compiler: Clear accumulator register before EOT

v2: (Francisco Jerez)
- Drop vec4 changes.
- Handle explicit acc0 operand and implicit one.
- Make sure instruction is SIMD16, prediction is off and default mask
  control set to true.

v3: (Francisco Jerez)
- Clear accumulator only when it's written.
- Use BRW_MASK_DISABLE instead of true.
- Use correct width for brw_acc_reg().
- Fix last_inst_offset.

v4: (Francisco Jerez)
- Don't check for last instruction for accummulator write.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3376>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3376>
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index fc19a32..7c8f7bd 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1711,6 +1711,7 @@
     */
    int spill_count = 0, fill_count = 0;
    int loop_count = 0, send_count = 0;
+   bool is_accum_used = false;
 
    struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
 
@@ -1741,6 +1742,23 @@
          last_insn_offset = p->next_insn_offset;
       }
 
+      /* GEN:BUG:14010017096:
+       *
+       * Clear accumulator register before end of thread.
+       */
+      if (inst->eot && is_accum_used && devinfo->gen >= 12) {
+         brw_set_default_exec_size(p, BRW_EXECUTE_16);
+         brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+         brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+         brw_MOV(p, brw_acc_reg(8), brw_imm_f(0.0f));
+         last_insn_offset = p->next_insn_offset;
+      }
+
+      if (!is_accum_used && !inst->eot) {
+         is_accum_used = inst->writes_accumulator_implicitly(devinfo) ||
+                         inst->dst.is_accumulator();
+      }
+
       if (unlikely(debug_flag))
          disasm_annotate(disasm_info, inst, p->next_insn_offset);