intel/nir: Allow splitting a single load into up to 32 loads

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6405>
diff --git a/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c b/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
index f67a414..c26ea0b 100644
--- a/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
+++ b/src/intel/compiler/brw_nir_lower_mem_access_bit_sizes.c
@@ -109,8 +109,10 @@
       result = nir_extract_bits(b, &load, 1, load_offset * 8,
                                 num_components, bit_size);
    } else {
-      /* Otherwise, we have to break it into smaller loads */
-      nir_ssa_def *loads[8];
+      /* Otherwise, we have to break it into smaller loads.  We could end up
+       * with as many as 32 loads if we're loading a u64vec16 from scratch.
+       */
+      nir_ssa_def *loads[32];
       unsigned num_loads = 0;
       int load_offset = 0;
       while (load_offset < bytes_read) {