iris: Fill out compute caps and enable clover support

This commit enables clover support for iris.  It is intended as a
compiler developer tool and not as a new OpenCL implementation from
Intel.  If you want competent OpenCL, we have a different open-source
driver for that built on our LLVM-based IGC compiler stack.  However,
using clover with iris is becoming increasingly useful as a compiler
development tool and I'm getting tired of carrying the patches in a
private branch.

By default, clover will not initialize on iris.  To enable clover, set
the IRIS_ENABLE_CLOVER environment variable to "1" or "true".  As we've
done with the semi-sketchy platform support in ANV, it dumps a very loud
WARNING to stderr when enabled.  Use at your own risk.

NOTE: To anyone intending to benchmark this, the performance is going to
be terrible and that is expected.  This is in no way representative of
the Intel/NIR compiler stack.  As it currently stands, clover passes
-O0 to clang when compiling OpenCL C to make SPIRV-LLVM-Transator work.
When compiling the SPIR-V, clover currently doesn't run any NIR
optimizations before it lowers memory access so any NIR optimizations
iris attempts to do are severely hampered.  One day, clover will get a
NIR optimization loop or the ability to hand things off to the driver
per-lowering but today is not that day.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7047>
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 6b5eb7e..985eecf 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -96,6 +96,28 @@
    gen_uuid_compute_driver_id((uint8_t *)uuid, devinfo, PIPE_UUID_SIZE);
 }
 
+static bool
+iris_enable_clover()
+{
+   static int enable = -1;
+   if (enable < 0)
+      enable = env_var_as_boolean("IRIS_ENABLE_CLOVER", false);
+   return enable;
+}
+
+static void
+iris_warn_clover()
+{
+   static bool warned = false;
+   if (warned)
+      return;
+
+   warned = true;
+   fprintf(stderr, "WARNING: OpenCL support via iris+clover is incomplete.\n"
+                   "For a complete and conformant OpenCL implementation, use\n"
+                   "https://github.com/intel/compute-runtime instead\n");
+}
+
 static const char *
 iris_get_name(struct pipe_screen *pscreen)
 {
@@ -441,8 +463,12 @@
       return 0;
    case PIPE_SHADER_CAP_PREFERRED_IR:
       return PIPE_SHADER_IR_NIR;
-   case PIPE_SHADER_CAP_SUPPORTED_IRS:
-      return (1 << PIPE_SHADER_IR_NIR);
+   case PIPE_SHADER_CAP_SUPPORTED_IRS: {
+      int irs = 1 << PIPE_SHADER_IR_NIR;
+      if (iris_enable_clover())
+         irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
+      return irs;
+   }
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
       return 1;
@@ -479,6 +505,13 @@
 } while (0)
 
    switch (param) {
+   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+      /* This gets queried on clover device init and is never queried by the
+       * OpenGL state tracker.
+       */
+      iris_warn_clover();
+      RET((uint32_t []){ 64 });
+
    case PIPE_COMPUTE_CAP_IR_TARGET:
       if (ret)
          strcpy(ret, "gen");
@@ -504,19 +537,33 @@
       /* MaxComputeSharedMemorySize */
       RET((uint64_t []) { 64 * 1024 });
 
+   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+      RET((uint32_t []) { 0 });
+
    case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
       RET((uint32_t []) { BRW_SUBGROUP_SIZE });
 
-   case PIPE_COMPUTE_CAP_ADDRESS_BITS:
-   case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
    case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
-   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
    case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+      RET((uint64_t []) { 1 << 30 }); /* TODO */
+
+   case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+      RET((uint32_t []) { 400 }); /* TODO */
+
+   case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: {
+      unsigned total_num_subslices = 0;
+      for (unsigned i = 0; i < devinfo->num_slices; i++)
+         total_num_subslices += devinfo->num_subslices[i];
+      RET((uint32_t []) { total_num_subslices });
+   }
+
    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+      /* MaxComputeSharedMemorySize */
+      RET((uint64_t []) { 64 * 1024 });
+
    case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-      // XXX: I think these are for Clover...
-      return 0;
+      /* We could probably allow more; this is the OpenCL minimum */
+      RET((uint64_t []) { 1024 });
 
    default:
       unreachable("unknown compute param");