Revert "nir: replace lower_ffma and fuse_ffma with has_ffma" This reverts commit 939ddf3f67de2ed1700c093e60cf95c1b72ff20b. Intel has a separate pass for fusing FFMAs selectively. We split these flags in commit 1b72c31e1f1947123d8c236b56e230f030f60cf9 and the reasoning still stands. The patch being reverted was just a cleanup, so there should be no issue with reverting it. Acked-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6849>

commit: 140f53e64646668c09dd4f09d2df9c17ce5d0d5f [log] [tgz]
author: Kenneth Graunke <kenneth@whitecape.org> Thu Sep 24 08:46:31 2020 -0700
committer: Kenneth Graunke <kenneth@whitecape.org> Thu Sep 24 13:11:50 2020 -0700
tree: 99c8edcfd0f91b91299ab8ddabb779f42935fe11
parent: d8cdcd4adf7a1209da7afe47056118183e2b3529 [diff]
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index cebefc8..3df8ccd 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c

@@ -70,6 +70,9 @@
 	.lower_unpack_unorm_4x8 = true,
 	.lower_extract_byte = true,
 	.lower_extract_word = true,
+	.lower_ffma16 = true,
+	.lower_ffma32 = true,
+	.lower_ffma64 = true,
 	.lower_fpow = true,
 	.lower_mul_2x32_64 = true,
 	.lower_rotate = true,
@@ -112,6 +115,9 @@
 	.lower_unpack_half_2x16 = true,
 	.lower_extract_byte = true,
 	.lower_extract_word = true,
+	.lower_ffma16 = true,
+	.lower_ffma32 = true,
+	.lower_ffma64 = true,
 	.lower_fpow = true,
 	.lower_mul_2x32_64 = true,
 	.lower_rotate = true,

diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 1a847cf..25b3230 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c

@@ -2874,6 +2874,9 @@
         .lower_unpack_half_2x16 = true,
         .lower_fdiv = true,
         .lower_find_lsb = true,
+	.lower_ffma16 = true,
+	.lower_ffma32 = true,
+	.lower_ffma64 = true,
         .lower_flrp32 = true,
         .lower_fpow = true,
         .lower_fsat = true,

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 3cc2750..1741f4f 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h

@@ -3055,9 +3055,12 @@
 
 typedef struct nir_shader_compiler_options {
    bool lower_fdiv;
-   bool has_ffma16;
-   bool has_ffma32;
-   bool has_ffma64;
+   bool lower_ffma16;
+   bool lower_ffma32;
+   bool lower_ffma64;
+   bool fuse_ffma16;
+   bool fuse_ffma32;
+   bool fuse_ffma64;
    bool lower_flrp16;
    bool lower_flrp32;
    /** Lowers flrp when it does not support doubles */

diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c
index de8b8fd..d9c4587 100644
--- a/src/compiler/nir/nir_lower_flrp.c
+++ b/src/compiler/nir/nir_lower_flrp.c

@@ -370,11 +370,11 @@
    unsigned bit_size = nir_dest_bit_size(alu->dest.dest);
 
    if (bit_size == 16)
-      have_ffma = bld->shader->options->has_ffma16;
+      have_ffma = !bld->shader->options->lower_ffma16;
    else if (bit_size == 32)
-      have_ffma = bld->shader->options->has_ffma32;
+      have_ffma = !bld->shader->options->lower_ffma32;
    else if (bit_size == 64)
-      have_ffma = bld->shader->options->has_ffma64;
+      have_ffma = !bld->shader->options->lower_ffma64;
    else
       unreachable("invalid bit_size");
 

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 4e6eaae..be836f9 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py

@@ -193,13 +193,13 @@
    (('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
    (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
    (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
-   (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma16'),
-   (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma32'),
-   (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), '!options->has_ffma64'),
+   (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
+   (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
+   (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
    # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
-   (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma16'),
-   (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma32'),
-   (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->has_ffma64'),
+   (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
+   (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
+   (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
 
    (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
     ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
@@ -2032,9 +2032,9 @@
    (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
    (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
    (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
-   (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma16'),
-   (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma32'),
-   (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->has_ffma64'),
+   (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
+   (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'),
+   (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'),
 
    # These are duplicated from the main optimizations table.  The late
    # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create

diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 0d3e02e..f33f048 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c

@@ -47,9 +47,9 @@
 		.lower_usub_borrow = true,
 		.lower_mul_high = true,
 		.lower_mul_2x32_64 = true,
-		.has_ffma16 = true,
-		.has_ffma32 = true,
-		.has_ffma64 = true,
+		.fuse_ffma16 = true,
+		.fuse_ffma32 = true,
+		.fuse_ffma64 = true,
 		.vertex_id_zero_based = true,
 		.lower_extract_byte = true,
 		.lower_extract_word = true,
@@ -99,9 +99,9 @@
 		.lower_usub_borrow = true,
 		.lower_mul_high = true,
 		.lower_mul_2x32_64 = true,
-		.has_ffma16 = true,
-		.has_ffma32 = true,
-		.has_ffma64 = true,
+		.fuse_ffma16 = true,
+		.fuse_ffma32 = true,
+		.fuse_ffma64 = true,
 		.vertex_id_zero_based = false,
 		.lower_extract_byte = true,
 		.lower_extract_word = true,

diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index d7ae245..3cc7dce 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c

@@ -1004,9 +1004,9 @@
       .lower_fpow = true,
       .lower_sub = true,
       .lower_ftrunc = true,
-      .has_ffma16 = true,
-      .has_ffma32 = true,
-      .has_ffma64 = true,
+      .fuse_ffma16 = true,
+      .fuse_ffma32 = true,
+      .fuse_ffma64 = true,
       .lower_bitops = true,
       .lower_all_io_to_temps = true,
       .vertex_id_zero_based = true,

diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index ac95abf..6cf95d5 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c

@@ -35,9 +35,9 @@
 	.lower_fmod = true,
 	.lower_fdiv = true,
 	.lower_fceil = true,
-	.has_ffma16 = true,
-	.has_ffma32 = true,
-	.has_ffma64 = true,
+	.fuse_ffma16 = true,
+	.fuse_ffma32 = true,
+	.fuse_ffma64 = true,
 	/* .fdot_replicates = true, it is replicated, but it makes things worse */
 	.lower_all_io_to_temps = true,
 	.vertex_id_zero_based = true, /* its not implemented anyway */

diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 0b5e9b6..30a3f52 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c

@@ -42,6 +42,9 @@
 #include "ir/lima_ir.h"
 
 static const nir_shader_compiler_options vs_nir_options = {
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
    .lower_fpow = true,
    .lower_ffract = true,
    .lower_fdiv = true,
@@ -59,6 +62,9 @@
 };
 
 static const nir_shader_compiler_options fs_nir_options = {
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
    .lower_fpow = true,
    .lower_fdiv = true,
    .lower_fmod = true,

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 6ab9dd9..218d80e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c

@@ -549,6 +549,9 @@
    .lower_bitfield_insert_to_shifts = true,
    .lower_bitfield_extract_to_shifts = true,
    .lower_sub = true,
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
    .lower_fmod = true,
    .lower_hadd = true,
    .lower_add_sat = true,

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index d06818f..64453ed 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

@@ -3206,6 +3206,12 @@
 {
    nir_shader_compiler_options op = {};
    op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
+   op.lower_ffma16 = false;
+   op.lower_ffma32 = false;
+   op.lower_ffma64 = false;
+   op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
+   op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
+   op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
    op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
    op.lower_flrp32 = true;
    op.lower_flrp64 = true;

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index e2007f2..29201ee 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

@@ -923,6 +923,9 @@
 }
 
 static const nir_shader_compiler_options nir_options = {
+   .fuse_ffma16 = false, /* nir doesn't track mad vs fma */
+   .fuse_ffma32 = false, /* nir doesn't track mad vs fma */
+   .fuse_ffma64 = false, /* nir doesn't track mad vs fma */
    .lower_flrp32 = true,
    .lower_flrp64 = true,
    .lower_fpow = false,

diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index 73912b2..eab7ce9 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c

@@ -1179,9 +1179,9 @@
 }
 
 const struct nir_shader_compiler_options r600_nir_fs_options = {
-	.has_ffma16 = true,
-	.has_ffma32 = true,
-	.has_ffma64 = true,
+	.fuse_ffma16 = true,
+	.fuse_ffma32 = true,
+	.fuse_ffma64 = true,
 	.lower_scmp = true,
 	.lower_flrp32 = true,
 	.lower_flrp64 = true,
@@ -1205,9 +1205,9 @@
 };
 
 const struct nir_shader_compiler_options r600_nir_options = {
-	.has_ffma16 = true,
-	.has_ffma32 = true,
-	.has_ffma64 = true,
+	.fuse_ffma16 = true,
+	.fuse_ffma32 = true,
+	.fuse_ffma64 = true,
 	.lower_scmp = true,
 	.lower_flrp32 = true,
 	.lower_flrp64 = true,

diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 7a3c6eb..4c68474 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c

@@ -953,9 +953,12 @@
        * gfx9 and newer prefer FMA for F16 because of the packed instruction.
        * gfx10 and older prefer MAD for F32 because of the legacy instruction.
        */
-      .has_ffma16 = sscreen->info.chip_class >= GFX9,
-      .has_ffma32 = sscreen->info.chip_class >= GFX10_3,
-      .has_ffma64 = true,
+      .lower_ffma16 = sscreen->info.chip_class < GFX9,
+      .lower_ffma32 = sscreen->info.chip_class < GFX10_3,
+      .lower_ffma64 = false,
+      .fuse_ffma16 = sscreen->info.chip_class >= GFX9,
+      .fuse_ffma32 = sscreen->info.chip_class >= GFX10_3,
+      .fuse_ffma64 = true,
       .lower_fmod = true,
       .lower_pack_snorm_4x8 = true,
       .lower_pack_unorm_4x8 = true,

diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index a650fe7..8bffb1d 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c

@@ -2179,6 +2179,9 @@
         .lower_extract_byte = true,
         .lower_extract_word = true,
         .lower_fdiv = true,
+        .lower_ffma16 = true,
+        .lower_ffma32 = true,
+        .lower_ffma64 = true,
         .lower_flrp32 = true,
         .lower_fmod = true,
         .lower_fpow = true,

diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index a15f7a9..8f0c16c 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c

@@ -126,6 +126,9 @@
 
 static const struct nir_shader_compiler_options nir_options = {
    .lower_all_io_to_temps = true,
+   .lower_ffma16 = true,
+   .lower_ffma32 = true,
+   .lower_ffma64 = true,
    .lower_fdph = true,
    .lower_flrp32 = true,
    .lower_fpow = true,

diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index e000c32..3d0fcbe 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c

@@ -183,9 +183,9 @@
       /* Prior to Gen6, there are no three source operations, and Gen11 loses
        * LRP.
        */
-      nir_options->has_ffma16 = devinfo->gen >= 6;
-      nir_options->has_ffma32 = devinfo->gen >= 6;
-      nir_options->has_ffma64 = devinfo->gen >= 6;
+      nir_options->lower_ffma16 = devinfo->gen < 6;
+      nir_options->lower_ffma32 = devinfo->gen < 6;
+      nir_options->lower_ffma64 = devinfo->gen < 6;
       nir_options->lower_flrp32 = devinfo->gen < 6 || devinfo->gen >= 11;
       nir_options->lower_fpow = devinfo->gen >= 12;
 

diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index a46beed..15b9078 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h

@@ -69,9 +69,9 @@
 
         .lower_bitfield_extract_to_shifts = true,
         .vectorize_io = true,
-	.has_ffma16 = true,
-	.has_ffma32 = true,
-	.has_ffma64 = true,
+	.fuse_ffma16 = true,
+	.fuse_ffma32 = true,
+	.fuse_ffma64 = true,
         .use_interpolated_input_intrinsics = true
 };
 

diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 1da9cff..fabed8b 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h

@@ -36,6 +36,9 @@
  * solution. */
 
 static const nir_shader_compiler_options midgard_nir_options = {
+        .lower_ffma16 = true,
+        .lower_ffma32 = true,
+        .lower_ffma64 = true,
         .lower_scmp = true,
         .lower_flrp16 = true,
         .lower_flrp32 = true,
commit	140f53e64646668c09dd4f09d2df9c17ce5d0d5f	[log] [tgz]
author	Kenneth Graunke <kenneth@whitecape.org>	Thu Sep 24 08:46:31 2020 -0700
committer	Kenneth Graunke <kenneth@whitecape.org>	Thu Sep 24 13:11:50 2020 -0700
tree	99c8edcfd0f91b91299ab8ddabb779f42935fe11
parent	d8cdcd4adf7a1209da7afe47056118183e2b3529 [diff]