i965/vec4: fix scratch reads for 64bit data

v2: Setup for a 64-bit scratch read by checking the type size of the
    correct register

v3: Use byte_offset() instead of offset()

Reviewed-by: Matt Turner <mattst88@gmail.com>
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index d4fe4fa..2edbabb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1482,7 +1482,18 @@
    src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
                                       reg_offset);
 
-   emit_before(block, inst, SCRATCH_READ(temp, index));
+   if (type_sz(orig_src.type) < 8) {
+      emit_before(block, inst, SCRATCH_READ(temp, index));
+   } else {
+      dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
+      dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
+      emit_before(block, inst, SCRATCH_READ(shuffled_float, index));
+      index = get_scratch_offset(block, inst, orig_src.reladdr, reg_offset + 1);
+      vec4_instruction *last_read =
+         SCRATCH_READ(byte_offset(shuffled_float, REG_SIZE), index);
+      emit_before(block, inst, last_read);
+      shuffle_64bit_data(temp, src_reg(shuffled), false, block, last_read);
+   }
 }
 
 /**
@@ -1548,7 +1559,8 @@
 
    /* Now handle scratch access on src */
    if (src.file == VGRF && scratch_loc[src.nr] != -1) {
-      dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+      dst_reg temp = dst_reg(this, type_sz(src.type) == 8 ?
+         glsl_type::dvec4_type : glsl_type::vec4_type);
       emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
       src.nr = temp.nr;
       src.offset %= REG_SIZE;