i965/fs: Expose arbitrary pull constant load sizes to the IR.

Change the FS generator to ask the dataport for enough owords worth of
constants to fill the execution size of the instruction -- Which means
that the visitor now needs to set the execution size correctly for
uniform pull constant load instructions, which we were kind of
neglecting until now.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6141bfb..8536a13 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2256,7 +2256,7 @@
 }
 
 /**
- * Read a float[4] vector from the data port constant cache.
+ * Read float[4] vectors from the data port constant cache.
  * Location (in buffer) should be a multiple of 16.
  * Used for fetching shader constants.
  */
@@ -2270,6 +2270,7 @@
    const unsigned target_cache =
       (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
        BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+   const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
 
    /* On newer hardware, offset is in units of owords. */
    if (devinfo->gen >= 6)
@@ -2278,11 +2279,12 @@
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
    brw_push_insn_state(p);
-   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
    brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
+   brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
    /* set message header global offset field (reg 0, element 2) */
@@ -2291,6 +2293,7 @@
 			       mrf.nr,
 			       2), BRW_REGISTER_TYPE_UD),
 	   brw_imm_ud(offset));
+   brw_pop_insn_state(p);
 
    brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
@@ -2305,15 +2308,13 @@
       brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
    }
 
-   brw_set_dp_read_message(p,
-			   insn,
-			   bind_table_index,
-			   BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+   brw_set_dp_read_message(p, insn, bind_table_index,
+                           BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
 			   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
 			   target_cache,
 			   1, /* msg_length */
                            true, /* header_present */
-			   1); /* response_length (1 reg, 2 owords!) */
+			   DIV_ROUND_UP(exec_size, 8)); /* response_length */
 
    brw_pop_insn_state(p);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b22dc9a..977fd8c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2121,7 +2121,7 @@
 
          assert(inst->src[i].stride == 0);
 
-         const fs_builder ubld = ibld.exec_all().group(8, 0);
+         const fs_builder ubld = ibld.exec_all().group(4, 0);
          struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
          ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
                    dst, brw_imm_ud(index), offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8b9fa8e..93f4c41 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1127,6 +1127,7 @@
                                                   struct brw_reg index,
                                                   struct brw_reg offset)
 {
+   assert(type_sz(dst.type) == 4);
    assert(inst->mlen != 0);
 
    assert(index.file == BRW_IMMEDIATE_VALUE &&
@@ -1149,27 +1150,25 @@
 {
    assert(index.type == BRW_REGISTER_TYPE_UD);
    assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+   assert(type_sz(dst.type) == 4);
 
    if (index.file == BRW_IMMEDIATE_VALUE) {
       const uint32_t surf_index = index.ud;
 
       brw_push_insn_state(p);
-      brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
       brw_set_default_mask_control(p, BRW_MASK_DISABLE);
       brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
-      brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4);
       brw_pop_insn_state(p);
 
-      brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD)));
-      brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD)));
-      brw_set_dp_read_message(p, send,
-                              surf_index,
-                              BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+      brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
+      brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+      brw_set_dp_read_message(p, send, surf_index,
+                              BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
                               GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
                               GEN6_SFID_DATAPORT_CONSTANT_CACHE,
                               1, /* mlen */
                               true, /* header */
-                              1); /* rlen */
+                              DIV_ROUND_UP(inst->size_written, REG_SIZE));
 
    } else {
       struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1188,17 +1187,15 @@
       /* dst = send(payload, a0.0 | <descriptor>) */
       brw_inst *insn = brw_send_indirect_message(
          p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
-         vec4(retype(dst, BRW_REGISTER_TYPE_UD)),
-         vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr);
-      brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
-      brw_set_dp_read_message(p, insn,
-                              0, /* surface */
-                              BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+         retype(dst, BRW_REGISTER_TYPE_UD),
+         retype(payload, BRW_REGISTER_TYPE_UD), addr);
+      brw_set_dp_read_message(p, insn, 0 /* surface */,
+                              BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
                               GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
                               GEN6_SFID_DATAPORT_CONSTANT_CACHE,
                               1, /* mlen */
                               true, /* header */
-                              1); /* rlen */
+                              DIV_ROUND_UP(inst->size_written, REG_SIZE));
 
       brw_pop_insn_state(p);
    }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index bfb286b..7df7423 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4059,7 +4059,9 @@
           * and we have to split it if necessary.
           */
          const unsigned type_size = type_sz(dest.type);
-         const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+         const fs_builder ubld = bld.exec_all().group(4, 0);
+         const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
+
          for (unsigned c = 0; c < instr->num_components;) {
             const unsigned base = const_offset->u32[0] + c * type_size;
 
@@ -4067,9 +4069,8 @@
             const unsigned count = MIN2(instr->num_components - c,
                                         (16 - base % 16) / type_size);
 
-            bld.exec_all()
-               .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-                     packed_consts, surf_index, brw_imm_ud(base & ~15));
+            ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+                      packed_consts, surf_index, brw_imm_ud(base & ~15));
 
             const fs_reg consts =
                retype(byte_offset(packed_consts, base & 15), dest.type);