nouveau: enable HMM

v2: move declarations into libdrm
v3: fix typos
    rework handling of how much memory to reserve
v4: remove unused parameter
    unmap cutout on error and when the screen is destroyed
v5: move into screen_create
    enable HMM only if CL gets enabled

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5906>
diff --git a/meson.build b/meson.build
index faa2679..2b30ab0 100644
--- a/meson.build
+++ b/meson.build
@@ -1355,7 +1355,7 @@
 
 _drm_amdgpu_ver = '2.4.100'
 _drm_radeon_ver = '2.4.71'
-_drm_nouveau_ver = '2.4.66'
+_drm_nouveau_ver = '2.4.102'
 _drm_intel_ver = '2.4.75'
 _drm_ver = '2.4.81'
 
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 702d88b..e725f37 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -8,6 +8,7 @@
 #include "util/format/u_format_s3tc.h"
 #include "util/u_string.h"
 
+#include "os/os_mman.h"
 #include "util/os_time.h"
 
 #include <stdio.h>
@@ -15,6 +16,7 @@
 #include <stdlib.h>
 
 #include <nouveau_drm.h>
+#include <xf86drm.h>
 
 #include "nouveau_winsys.h"
 #include "nouveau_screen.h"
@@ -28,6 +30,11 @@
 /* XXX this should go away */
 #include "frontend/drm_driver.h"
 
+/* Even though GPUs might allow addresses with more bits, some engines do not.
+ * Stick with 40 for compatibility.
+ */
+#define NV_GENERIC_VM_LIMIT_SHIFT 39
+
 int nouveau_mesa_debug = 0;
 
 static const char *
@@ -173,6 +180,16 @@
                         cache_id, driver_flags);
 }
 
+static void*
+reserve_vma(uintptr_t start, uint64_t reserved_size)
+{
+   void *reserved = os_mmap((void*)start, reserved_size, PROT_NONE,
+                            MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+   if (reserved == MAP_FAILED)
+      return NULL;
+   return reserved;
+}
+
 int
 nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
 {
@@ -217,6 +234,46 @@
       size = sizeof(nvc0_data);
    }
 
+   screen->has_svm = false;
+   /* we only care about HMM with OpenCL enabled */
+   if (dev->chipset > 0x130 && screen->force_enable_cl) {
+      /* Before being able to enable SVM we need to carve out some memory for
+       * driver bo allocations. Let's just base the size on the available VRAM.
+       *
+       * 40 bit is the biggest we care about and for 32 bit systems we don't
+       * want to allocate all of the available memory either.
+       *
+       * Also we align the size we want to reserve to the next POT to make use
+       * of hugepages.
+       */
+      const int vram_shift = util_logbase2_ceil64(dev->vram_size);
+      const int limit_bit =
+         MIN2(sizeof(void*) * 8 - 1, NV_GENERIC_VM_LIMIT_SHIFT);
+      screen->svm_cutout_size =
+         BITFIELD64_BIT(MIN2(sizeof(void*) == 4 ? 26 : NV_GENERIC_VM_LIMIT_SHIFT, vram_shift));
+
+      size_t start = screen->svm_cutout_size;
+      do {
+         screen->svm_cutout = reserve_vma(start, screen->svm_cutout_size);
+         if (!screen->svm_cutout) {
+            start += screen->svm_cutout_size;
+            continue;
+         }
+
+         struct drm_nouveau_svm_init svm_args = {
+            .unmanaged_addr = (uint64_t)screen->svm_cutout,
+            .unmanaged_size = screen->svm_cutout_size,
+         };
+
+         ret = drmCommandWrite(screen->drm->fd, DRM_NOUVEAU_SVM_INIT,
+                               &svm_args, sizeof(svm_args));
+         screen->has_svm = !ret;
+         if (!screen->has_svm)
+            os_munmap(screen->svm_cutout, screen->svm_cutout_size);
+         break;
+      } while ((start + screen->svm_cutout_size) < BITFIELD64_MASK(limit_bit));
+   }
+
    /*
     * Set default VRAM domain if not overridden
     */
@@ -230,16 +287,16 @@
    ret = nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS,
                             data, size, &screen->channel);
    if (ret)
-      return ret;
+      goto err;
 
    ret = nouveau_client_new(screen->device, &screen->client);
    if (ret)
-      return ret;
+      goto err;
    ret = nouveau_pushbuf_new(screen->client, screen->channel,
                              4, 512 * 1024, 1,
                              &screen->pushbuf);
    if (ret)
-      return ret;
+      goto err;
 
    /* getting CPU time first appears to be more accurate */
    screen->cpu_gpu_time_delta = os_time_get();
@@ -281,6 +338,11 @@
                                        &mm_config);
    screen->mm_VRAM = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
    return 0;
+
+err:
+   if (screen->svm_cutout)
+      os_munmap(screen->svm_cutout, screen->svm_cutout_size);
+   return ret;
 }
 
 void
@@ -290,6 +352,8 @@
 
    if (screen->force_enable_cl)
       glsl_type_singleton_decref();
+   if (screen->has_svm)
+      os_munmap(screen->svm_cutout, screen->svm_cutout_size);
 
    nouveau_mm_destroy(screen->mm_GART);
    nouveau_mm_destroy(screen->mm_VRAM);
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 4046422..2eb6320 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -70,6 +70,9 @@
 
    bool prefer_nir;
    bool force_enable_cl;
+   bool has_svm;
+   void *svm_cutout;
+   size_t svm_cutout_size;
 
 #ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
    union {