panfrost: Simplify depth/stencil/alpha

The alpha test is lowered by the frontend so we can drop that unused
code path, and stencil state can be computed at CSO create time to
reduce draw-time complexity.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6195>
diff --git a/src/gallium/drivers/panfrost/pan_assemble.c b/src/gallium/drivers/panfrost/pan_assemble.c
index 4a3bf79..2dbed04 100644
--- a/src/gallium/drivers/panfrost/pan_assemble.c
+++ b/src/gallium/drivers/panfrost/pan_assemble.c
@@ -160,11 +160,7 @@
         s->info.stage = stage;
 
         /* Call out to Midgard compiler given the above NIR */
-
-        panfrost_program program = {
-                .alpha_ref = state->alpha_state.ref_value
-        };
-
+        panfrost_program program = {};
         memcpy(program.rt_formats, state->rt_formats, sizeof(program.rt_formats));
 
         if (dev->quirks & IS_BIFROST) {
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index 8b4fe46..e17dae9 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -350,72 +350,6 @@
 }
 
 static unsigned
-panfrost_translate_compare_func(enum pipe_compare_func in)
-{
-        switch (in) {
-        case PIPE_FUNC_NEVER:
-                return MALI_FUNC_NEVER;
-
-        case PIPE_FUNC_LESS:
-                return MALI_FUNC_LESS;
-
-        case PIPE_FUNC_EQUAL:
-                return MALI_FUNC_EQUAL;
-
-        case PIPE_FUNC_LEQUAL:
-                return MALI_FUNC_LEQUAL;
-
-        case PIPE_FUNC_GREATER:
-                return MALI_FUNC_GREATER;
-
-        case PIPE_FUNC_NOTEQUAL:
-                return MALI_FUNC_NOT_EQUAL;
-
-        case PIPE_FUNC_GEQUAL:
-                return MALI_FUNC_GEQUAL;
-
-        case PIPE_FUNC_ALWAYS:
-                return MALI_FUNC_ALWAYS;
-
-        default:
-                unreachable("Invalid func");
-        }
-}
-
-static unsigned
-panfrost_translate_stencil_op(enum pipe_stencil_op in)
-{
-        switch (in) {
-        case PIPE_STENCIL_OP_KEEP:
-                return MALI_STENCIL_OP_KEEP;
-
-        case PIPE_STENCIL_OP_ZERO:
-                return MALI_STENCIL_OP_ZERO;
-
-        case PIPE_STENCIL_OP_REPLACE:
-               return MALI_STENCIL_OP_REPLACE;
-
-        case PIPE_STENCIL_OP_INCR:
-                return MALI_STENCIL_OP_INCR_SAT;
-
-        case PIPE_STENCIL_OP_DECR:
-                return MALI_STENCIL_OP_DECR_SAT;
-
-        case PIPE_STENCIL_OP_INCR_WRAP:
-                return MALI_STENCIL_OP_INCR_WRAP;
-
-        case PIPE_STENCIL_OP_DECR_WRAP:
-                return MALI_STENCIL_OP_DECR_WRAP;
-
-        case PIPE_STENCIL_OP_INVERT:
-                return MALI_STENCIL_OP_INVERT;
-
-        default:
-                unreachable("Invalid stencil op");
-        }
-}
-
-static unsigned
 translate_tex_wrap(enum pipe_tex_wrap w)
 {
         switch (w) {
@@ -518,19 +452,6 @@
 }
 
 static void
-panfrost_make_stencil_state(const struct pipe_stencil_state *in,
-                            void *out)
-{
-        pan_pack(out, STENCIL, cfg) {
-                cfg.mask = in->valuemask;
-                cfg.compare_function = panfrost_translate_compare_func(in->func);
-                cfg.stencil_fail = panfrost_translate_stencil_op(in->fail_op);
-                cfg.depth_fail = panfrost_translate_stencil_op(in->zfail_op);
-                cfg.depth_pass = panfrost_translate_stencil_op(in->zpass_op);
-        }
-}
-
-static void
 panfrost_frag_meta_rasterizer_update(struct panfrost_context *ctx,
                                      struct mali_shader_meta *fragmeta)
 {
@@ -578,43 +499,37 @@
 panfrost_frag_meta_zsa_update(struct panfrost_context *ctx,
                               struct mali_shader_meta *fragmeta)
 {
-        const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
+        const struct panfrost_zsa_state *so = ctx->depth_stencil;
         int zfunc = PIPE_FUNC_ALWAYS;
 
-        if (!zsa) {
+        if (!so) {
                 /* If stenciling is disabled, the state is irrelevant */
                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST, false);
                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK, false);
         } else {
                 SET_BIT(fragmeta->unknown2_4, MALI_STENCIL_TEST,
-                        zsa->stencil[0].enabled);
-                panfrost_make_stencil_state(&zsa->stencil[0],
-                                            &fragmeta->stencil_front);
-                fragmeta->stencil_mask_front = zsa->stencil[0].writemask;
+                        so->base.stencil[0].enabled);
 
-                /* Bottom 8-bits of stencil state is the stencil ref, ref is no
-                 * more than 8-bits. Be extra careful. */
-                fragmeta->stencil_front.opaque[0] |= ctx->stencil_ref.ref_value[0];
+                fragmeta->stencil_mask_front = so->stencil_mask_front;
+                fragmeta->stencil_mask_back = so->stencil_mask_back;
+
+                /* Bottom bits for stencil ref, exactly one word */
+                fragmeta->stencil_front.opaque[0] = so->stencil_front.opaque[0] | ctx->stencil_ref.ref_value[0];
 
                 /* If back-stencil is not enabled, use the front values */
 
-                if (zsa->stencil[1].enabled) {
-                        panfrost_make_stencil_state(&zsa->stencil[1],
-                                                    &fragmeta->stencil_back);
-                        fragmeta->stencil_mask_back = zsa->stencil[1].writemask;
-                        fragmeta->stencil_back.opaque[0] |= ctx->stencil_ref.ref_value[1];
-                } else {
+                if (so->base.stencil[1].enabled)
+                        fragmeta->stencil_back.opaque[0] = so->stencil_back.opaque[0] | ctx->stencil_ref.ref_value[1];
+                else
                         fragmeta->stencil_back = fragmeta->stencil_front;
-                        fragmeta->stencil_mask_back = fragmeta->stencil_mask_front;
-                }
 
-                if (zsa->depth.enabled)
-                        zfunc = zsa->depth.func;
+                if (so->base.depth.enabled)
+                        zfunc = so->base.depth.func;
 
                 /* Depth state (TODO: Refactor) */
 
                 SET_BIT(fragmeta->unknown2_3, MALI_DEPTH_WRITEMASK,
-                        zsa->depth.writemask);
+                        so->base.depth.writemask);
         }
 
         fragmeta->unknown2_3 &= ~MALI_DEPTH_FUNC_MASK;
@@ -864,7 +779,7 @@
                  * depends on if depth/stencil is used for the draw or not.
                  * Just one of depth OR stencil is enough to trigger this. */
 
-                const struct pipe_depth_stencil_alpha_state *zsa = ctx->depth_stencil;
+                const struct pipe_depth_stencil_alpha_state *zsa = &ctx->depth_stencil->base;
                 bool zs_enabled = fs->writes_depth || fs->writes_stencil;
 
                 if (zsa) {
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.h b/src/gallium/drivers/panfrost/pan_cmdstream.h
index 067542d..7847338 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.h
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.h
@@ -114,4 +114,20 @@
 mali_ptr
 panfrost_emit_sample_locations(struct panfrost_batch *batch);
 
+static inline unsigned
+panfrost_translate_compare_func(enum pipe_compare_func in)
+{
+        switch (in) {
+        case PIPE_FUNC_NEVER: return MALI_FUNC_NEVER;
+        case PIPE_FUNC_LESS: return MALI_FUNC_LESS;
+        case PIPE_FUNC_EQUAL: return MALI_FUNC_EQUAL;
+        case PIPE_FUNC_LEQUAL: return MALI_FUNC_LEQUAL;
+        case PIPE_FUNC_GREATER: return MALI_FUNC_GREATER;
+        case PIPE_FUNC_NOTEQUAL: return MALI_FUNC_NOT_EQUAL;
+        case PIPE_FUNC_GEQUAL: return MALI_FUNC_GEQUAL;
+        case PIPE_FUNC_ALWAYS: return MALI_FUNC_ALWAYS;
+        default: unreachable("Invalid func");
+        }
+}
+
 #endif /* __PAN_CMDSTREAM_H__ */
diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c
index bb4ba3b..400d83d 100644
--- a/src/gallium/drivers/panfrost/pan_context.c
+++ b/src/gallium/drivers/panfrost/pan_context.c
@@ -636,25 +636,9 @@
 {
         struct panfrost_device *dev = pan_device(ctx->base.screen);
         struct pipe_rasterizer_state *rasterizer = &ctx->rasterizer->base;
-        struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha;
 
         bool is_fragment = (type == PIPE_SHADER_FRAGMENT);
 
-        if (is_fragment && (alpha->enabled || variant->alpha_state.enabled)) {
-                /* Make sure enable state is at least the same */
-                if (alpha->enabled != variant->alpha_state.enabled) {
-                        return false;
-                }
-
-                /* Check that the contents of the test are the same */
-                bool same_func = alpha->func == variant->alpha_state.func;
-                bool same_ref = alpha->ref_value == variant->alpha_state.ref_value;
-
-                if (!(same_func && same_ref)) {
-                        return false;
-                }
-        }
-
         if (variant->outputs_read) {
                 struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
 
@@ -787,8 +771,6 @@
                                 &variants->variants[variant];
 
                 if (type == PIPE_SHADER_FRAGMENT) {
-                        v->alpha_state = ctx->depth_stencil->alpha;
-
                         struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
                         for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
                                 enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
@@ -1133,11 +1115,58 @@
                 ctx->base.bind_fs_state(&ctx->base, fs);
 }
 
+static inline unsigned
+pan_pipe_to_stencil_op(enum pipe_stencil_op in)
+{
+        switch (in) {
+        case PIPE_STENCIL_OP_KEEP: return MALI_STENCIL_OP_KEEP;
+        case PIPE_STENCIL_OP_ZERO: return MALI_STENCIL_OP_ZERO;
+        case PIPE_STENCIL_OP_REPLACE: return MALI_STENCIL_OP_REPLACE;
+        case PIPE_STENCIL_OP_INCR: return MALI_STENCIL_OP_INCR_SAT;
+        case PIPE_STENCIL_OP_DECR: return MALI_STENCIL_OP_DECR_SAT;
+        case PIPE_STENCIL_OP_INCR_WRAP: return MALI_STENCIL_OP_INCR_WRAP;
+        case PIPE_STENCIL_OP_DECR_WRAP: return MALI_STENCIL_OP_DECR_WRAP;
+        case PIPE_STENCIL_OP_INVERT: return MALI_STENCIL_OP_INVERT;
+        default: unreachable("Invalid stencil op");
+        }
+}
+
+static inline void
+pan_pipe_to_stencil(const struct pipe_stencil_state *in, void *out)
+{
+        pan_pack(out, STENCIL, cfg) {
+                cfg.mask = in->valuemask;
+                cfg.compare_function = panfrost_translate_compare_func(in->func);
+                cfg.stencil_fail = pan_pipe_to_stencil_op(in->fail_op);
+                cfg.depth_fail = pan_pipe_to_stencil_op(in->zfail_op);
+                cfg.depth_pass = pan_pipe_to_stencil_op(in->zpass_op);
+        }
+}
+
 static void *
 panfrost_create_depth_stencil_state(struct pipe_context *pipe,
-                                    const struct pipe_depth_stencil_alpha_state *depth_stencil)
+                                    const struct pipe_depth_stencil_alpha_state *zsa)
 {
-        return mem_dup(depth_stencil, sizeof(*depth_stencil));
+        struct panfrost_zsa_state *so = CALLOC_STRUCT(panfrost_zsa_state);
+        so->base = *zsa;
+
+        pan_pipe_to_stencil(&zsa->stencil[0], &so->stencil_front);
+        pan_pipe_to_stencil(&zsa->stencil[1], &so->stencil_back);
+
+        so->stencil_mask_front = zsa->stencil[0].writemask;
+
+        if (zsa->stencil[1].enabled)
+                so->stencil_mask_back = zsa->stencil[1].writemask;
+        else
+                so->stencil_mask_back = so->stencil_mask_front;
+
+        /* Alpha lowered by frontend */
+        assert(!zsa->alpha.enabled);
+
+        /* TODO: Bounds test should be easy */
+        assert(!zsa->depth.bounds_test);
+
+        return so;
 }
 
 static void
@@ -1145,22 +1174,8 @@
                                   void *cso)
 {
         struct panfrost_context *ctx = pan_context(pipe);
-        struct pipe_depth_stencil_alpha_state *depth_stencil = cso;
-        ctx->depth_stencil = depth_stencil;
-
-        if (!depth_stencil)
-                return;
-
-        /* Alpha does not exist in the hardware (it's not in ES3), so it's
-         * emulated in the fragment shader */
-
-        if (depth_stencil->alpha.enabled) {
-                /* We need to trigger a new shader (maybe) */
-                ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
-        }
-
-        /* Bounds test not implemented */
-        assert(!depth_stencil->depth.bounds_test);
+        struct panfrost_zsa_state *zsa = cso;
+        ctx->depth_stencil = zsa;
 }
 
 static void
diff --git a/src/gallium/drivers/panfrost/pan_context.h b/src/gallium/drivers/panfrost/pan_context.h
index f079162..5e0bce3 100644
--- a/src/gallium/drivers/panfrost/pan_context.h
+++ b/src/gallium/drivers/panfrost/pan_context.h
@@ -162,7 +162,7 @@
         struct pipe_viewport_state pipe_viewport;
         struct pipe_scissor_state scissor;
         struct pipe_blend_color blend_color;
-        struct pipe_depth_stencil_alpha_state *depth_stencil;
+        struct panfrost_zsa_state *depth_stencil;
         struct pipe_stencil_ref stencil_ref;
         unsigned sample_mask;
         unsigned min_samples;
@@ -177,8 +177,7 @@
 };
 
 /* Variants bundle together to form the backing CSO, bundling multiple
- * shaders with varying emulated features baked in (alpha test
- * parameters, etc) */
+ * shaders with varying emulated features baked in */
 
 /* A shader state corresponds to the actual, current variant of the shader */
 struct panfrost_shader_state {
@@ -217,9 +216,6 @@
         unsigned sysval_count;
         unsigned sysval[MAX_SYSVAL_COUNT];
 
-        /* Information on this particular shader variant */
-        struct pipe_alpha_state alpha_state;
-
         uint16_t point_sprite_mask;
         unsigned point_sprite_upper_left : 1;
 
@@ -261,6 +257,16 @@
         struct mali_attr_meta hw[PIPE_MAX_ATTRIBS];
 };
 
+struct panfrost_zsa_state {
+        struct pipe_depth_stencil_alpha_state base;
+
+        /* Precomputed stencil state */
+        struct mali_stencil_packed stencil_front;
+        struct mali_stencil_packed stencil_back;
+        u8 stencil_mask_front;
+        u8 stencil_mask_back;
+};
+
 struct panfrost_sampler_state {
         struct pipe_sampler_state base;
         struct mali_sampler_descriptor midgard_hw;
diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c
index 13075e6..0b6ffd0 100644
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -1211,12 +1211,12 @@
         if (ctx->rasterizer && ctx->rasterizer->base.multisample)
                 batch->requirements |= PAN_REQ_MSAA;
 
-        if (ctx->depth_stencil && ctx->depth_stencil->depth.writemask) {
+        if (ctx->depth_stencil && ctx->depth_stencil->base.depth.writemask) {
                 batch->requirements |= PAN_REQ_DEPTH_WRITE;
                 batch->draws |= PIPE_CLEAR_DEPTH;
         }
 
-        if (ctx->depth_stencil && ctx->depth_stencil->stencil[0].enabled)
+        if (ctx->depth_stencil && ctx->depth_stencil->base.stencil[0].enabled)
                 batch->draws |= PIPE_CLEAR_STENCIL;
 }
 
diff --git a/src/gallium/drivers/panfrost/pan_sfbd.c b/src/gallium/drivers/panfrost/pan_sfbd.c
index 6425dfa..4f83099 100644
--- a/src/gallium/drivers/panfrost/pan_sfbd.c
+++ b/src/gallium/drivers/panfrost/pan_sfbd.c
@@ -170,7 +170,7 @@
         fb->depth_stride = rsrc->slices[level].stride;
 
         /* No stencil? Job done. */
-        if (!ctx->depth_stencil || !ctx->depth_stencil->stencil[0].enabled)
+        if (!ctx->depth_stencil || !ctx->depth_stencil->base.stencil[0].enabled)
                 return;
 
         if (panfrost_is_z24s8_variant(surf->format)) {
diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h
index 4a318f1..ec28c48 100644
--- a/src/panfrost/midgard/compiler.h
+++ b/src/panfrost/midgard/compiler.h
@@ -311,9 +311,6 @@
         /* Count of instructions emitted from NIR overall, across all blocks */
         int instruction_count;
 
-        /* Alpha ref value passed in */
-        float alpha_ref;
-
         unsigned quadword_count;
 
         /* Bitmask of valid metadata */
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 50260d5..592e4dc 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -2847,7 +2847,6 @@
         ctx->nir = nir;
         ctx->stage = nir->info.stage;
         ctx->is_blend = is_blend;
-        ctx->alpha_ref = program->alpha_ref;
         ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
         ctx->blend_input = ~0;
         ctx->blend_src1 = ~0;
diff --git a/src/panfrost/util/pan_ir.h b/src/panfrost/util/pan_ir.h
index ba190db..57d8129 100644
--- a/src/panfrost/util/pan_ir.h
+++ b/src/panfrost/util/pan_ir.h
@@ -107,9 +107,6 @@
          * (register spilling), or zero if no spilling is used */
         unsigned tls_size;
 
-        /* IN: For a fragment shader with a lowered alpha test, the ref value */
-        float alpha_ref;
-
         /* IN: Render target formats for output load/store lowering */
         enum pipe_format rt_formats[8];
 } panfrost_program;