freedreno/ir3: Fix duplicated fine derivatives instructions. legalize_block() can get run multiple times, which I didn't notice when adding fine derivs support. Other instruction clones change things such that the legalization won't trigger again, but that didn't apply to the DS.PP legalization. To keep someone else from tripping over this, split the one-shot legalization out of the iterative sync flag application. Fixes failures in dEQP-VK.glsl.derivate.dfdxfine.* Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3198 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5699>

commit: 3d7d5d220b5e5c2b48fa522b16ced5be0edad94f [log] [tgz]
author: Eric Anholt <eric@anholt.net> Tue Jun 30 12:17:10 2020 -0700
committer: Marge Bot <eric+marge@anholt.net> Sat Jul 18 00:43:44 2020 +0000
tree: 77647d8ef97bbb335705e2efaee4ef8d78db4ed9
parent: 862d85a63ff6942bc0d30ea3bb42a66433d3c936 [diff]
diff --git a/.gitlab-ci/deqp-freedreno-a630-fails.txt b/.gitlab-ci/deqp-freedreno-a630-fails.txt
index daf0960..274cfd6 100644
--- a/.gitlab-ci/deqp-freedreno-a630-fails.txt
+++ b/.gitlab-ci/deqp-freedreno-a630-fails.txt

@@ -5,7 +5,6 @@
 dEQP-GLES31.functional.stencil_texturing.render.depth24_stencil8_draw
 dEQP-VK.binding_model.descriptorset_random.sets4.constant.ubolimitlow.sbolimithigh.imglimithigh.noiub.uab.frag.ialimitlow.0
 dEQP-VK.draw.output_location.array.b8g8r8a8-unorm-mediump-output-vec3
-dEQP-VK.glsl.derivate.fwidthfine.uniform_loop.vec3_mediump
 dEQP-VK.glsl.linkage.varying.struct.mat3x2
 dEQP-VK.graphicsfuzz.mat-array-deep-control-flow
 dEQP-VK.pipeline.spec_constant.graphics.geometry.composite.array.array_vec4

diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c
index de8387f..6cd2789 100644
--- a/src/freedreno/ir3/disasm-a3xx.c
+++ b/src/freedreno/ir3/disasm-a3xx.c

@@ -1263,6 +1263,9 @@
 	OPC(5, OPC_DSYPP_1,      dsypp.1),
 	OPC(5, OPC_RGETPOS,      rgetpos),
 	OPC(5, OPC_RGETINFO,     rgetinfo),
+	/* macros are needed here for ir3_print */
+	OPC(5, OPC_DSXPP_MACRO,  dsxpp.macro),
+	OPC(5, OPC_DSYPP_MACRO,  dsypp.macro),
 
 
 	/* category 6: */

diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h
index 822d9dd..d271508 100644
--- a/src/freedreno/ir3/instr-a3xx.h
+++ b/src/freedreno/ir3/instr-a3xx.h

@@ -185,6 +185,9 @@
 	OPC_DSYPP_1         = _OPC(5, 25),
 	OPC_RGETPOS         = _OPC(5, 26),
 	OPC_RGETINFO        = _OPC(5, 27),
+	/* cat5 meta instructions, placed above the cat5 opc field's size */
+	OPC_DSXPP_MACRO     = _OPC(5, 32),
+	OPC_DSYPP_MACRO     = _OPC(5, 33),
 
 	/* category 6: */
 	OPC_LDG             = _OPC(6, 0),        /* load-global */

diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 634089c..a198e4c 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h

@@ -1663,9 +1663,9 @@
 
 /* cat5 instructions: */
 INSTR1(DSX)
-INSTR1(DSXPP_1)
+INSTR1(DSXPP_MACRO)
 INSTR1(DSY)
-INSTR1(DSYPP_1)
+INSTR1(DSYPP_MACRO)
 INSTR1F(3D, DSX)
 INSTR1F(3D, DSY)
 INSTR1(RGETPOS)

diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index acfe434..10a51eb 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c

@@ -478,7 +478,7 @@
 		dst[0]->cat5.type = TYPE_F32;
 		break;
 	case nir_op_fddx_fine:
-		dst[0] = ir3_DSXPP_1(b, src[0], 0);
+		dst[0] = ir3_DSXPP_MACRO(b, src[0], 0);
 		dst[0]->cat5.type = TYPE_F32;
 		break;
 	case nir_op_fddy:
@@ -488,7 +488,7 @@
 		break;
 		break;
 	case nir_op_fddy_fine:
-		dst[0] = ir3_DSYPP_1(b, src[0], 0);
+		dst[0] = ir3_DSYPP_MACRO(b, src[0], 0);
 		dst[0]->cat5.type = TYPE_F32;
 		break;
 	case nir_op_flt:

diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c
index c4d90c1..69efdba 100644
--- a/src/freedreno/ir3/ir3_legalize.c
+++ b/src/freedreno/ir3/ir3_legalize.c

@@ -233,13 +233,6 @@
 			list_addtail(&n->node, &block->instr_list);
 		}
 
-		if (n->opc == OPC_DSXPP_1 || n->opc == OPC_DSYPP_1) {
-			struct ir3_instruction *op_p = ir3_instr_clone(n);
-			op_p->flags = IR3_INSTR_P;
-
-			ctx->so->need_fine_derivatives = true;
-		}
-
 		if (is_sfu(n))
 			regmask_set(&state->needs_ss, n->regs[0]);
 
@@ -360,6 +353,42 @@
 	return true;
 }
 
+/* Expands dsxpp and dsypp macros to:
+ *
+ * dsxpp.1 dst, src
+ * dsxpp.1.p dst, src
+ *
+ * We apply this after flags syncing, as we don't want to sync in between the
+ * two (which might happen if dst == src).  We do it before nop scheduling
+ * because that needs to count actual instructions.
+ */
+static bool
+apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
+{
+	struct list_head instr_list;
+
+	/* remove all the instructions from the list, we'll be adding
+	 * them back in as we go
+	 */
+	list_replace(&block->instr_list, &instr_list);
+	list_inithead(&block->instr_list);
+
+	foreach_instr_safe (n, &instr_list) {
+		list_addtail(&n->node, &block->instr_list);
+
+		if (n->opc == OPC_DSXPP_MACRO || n->opc == OPC_DSYPP_MACRO) {
+			n->opc = (n->opc == OPC_DSXPP_MACRO) ? OPC_DSXPP_1 : OPC_DSYPP_1;
+
+			struct ir3_instruction *op_p = ir3_instr_clone(n);
+			op_p->flags = IR3_INSTR_P;
+
+			ctx->so->need_fine_derivatives = true;
+		}
+	}
+
+	return true;
+}
+
 /* NOTE: branch instructions are always the last instruction(s)
  * in the block.  We take advantage of this as we resolve the
  * branches, since "if (foo) break;" constructs turn into
@@ -752,6 +781,11 @@
 	block_sched(ir);
 	if (so->type == MESA_SHADER_FRAGMENT)
 		kill_sched(ir, so);
+
+	foreach_block (block, &ir->block_list) {
+		progress |= apply_fine_deriv_macro(ctx, block);
+	}
+
 	nop_sched(ir);
 
 	do {
commit	3d7d5d220b5e5c2b48fa522b16ced5be0edad94f	[log] [tgz]
author	Eric Anholt <eric@anholt.net>	Tue Jun 30 12:17:10 2020 -0700
committer	Marge Bot <eric+marge@anholt.net>	Sat Jul 18 00:43:44 2020 +0000
tree	77647d8ef97bbb335705e2efaee4ef8d78db4ed9
parent	862d85a63ff6942bc0d30ea3bb42a66433d3c936 [diff]