v3d/compiler: implement nir_op_fquantize2f16

Reviewd-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6766>
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 72c0d37..475f5cd 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1160,6 +1160,27 @@
                 vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_H);
                 break;
 
+        case nir_op_fquantize2f16: {
+                /* F32 -> F16 -> F32 conversion */
+                struct qreg tmp = vir_FMOV(c, src[0]);
+                vir_set_pack(c->defs[tmp.index], V3D_QPU_PACK_L);
+                tmp = vir_FMOV(c, tmp);
+                vir_set_unpack(c->defs[tmp.index], 0, V3D_QPU_UNPACK_L);
+
+                /* Check for denorm */
+                struct qreg abs_src = vir_FMOV(c, src[0]);
+                vir_set_unpack(c->defs[abs_src.index], 0, V3D_QPU_UNPACK_ABS);
+                struct qreg threshold = vir_uniform_f(c, ldexpf(1.0, -14));
+                vir_set_pf(vir_FCMP_dest(c, vir_nop_reg(), abs_src, threshold),
+                                         V3D_QPU_PF_PUSHC);
+
+                /* Return +/-0 for denorms */
+                struct qreg zero =
+                        vir_AND(c, src[0], vir_uniform_ui(c, 0x80000000));
+                result = vir_FMOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, tmp, zero));
+                break;
+        }
+
         default:
                 fprintf(stderr, "unknown NIR ALU inst: ");
                 nir_print_instr(&instr->instr, stderr);
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index ddb3d14..b829173 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -850,6 +850,7 @@
 void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf);
 void vir_set_unpack(struct qinst *inst, int src,
                     enum v3d_qpu_input_unpack unpack);
+void vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack);
 
 struct qreg vir_get_temp(struct v3d_compile *c);
 void vir_emit_last_thrsw(struct v3d_compile *c);
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 805f1f3..0c837fd 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -210,6 +210,17 @@
 }
 
 void
+vir_set_pack(struct qinst *inst, enum v3d_qpu_output_pack pack)
+{
+        if (vir_is_add(inst)) {
+                inst->qpu.alu.add.output_pack = pack;
+        } else {
+                assert(vir_is_mul(inst));
+                inst->qpu.alu.mul.output_pack = pack;
+        }
+}
+
+void
 vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
 {
         if (vir_is_add(inst)) {