freedreno/ir3: Fix the type of half-float indirect uniform loads.

We would be making a MOV from a u32, when we should be loading from a
16-bit value.  This likely didn't bite us because we only do mediump in FS
and CS so far, and indirect uniforms are usually in a VS (and usually
highp).

Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6179>
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h
index 0a7ab73..2f54232 100644
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1409,14 +1409,14 @@
 }
 
 static inline struct ir3_instruction *
-create_uniform_indirect(struct ir3_block *block, int n,
+create_uniform_indirect(struct ir3_block *block, int n, type_t type,
 		struct ir3_instruction *address)
 {
 	struct ir3_instruction *mov;
 
 	mov = ir3_instr_create(block, OPC_MOV);
-	mov->cat1.src_type = TYPE_U32;
-	mov->cat1.dst_type = TYPE_U32;
+	mov->cat1.src_type = type;
+	mov->cat1.dst_type = type;
 	__ssa_dst(mov);
 	ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
 
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 77c0b80..c3461c8 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -785,8 +785,8 @@
 		base_lo = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz));
 		base_hi = create_uniform(b, ubo + (src0->regs[1]->iim_val * ptrsz) + 1);
 	} else {
-		base_lo = create_uniform_indirect(b, ubo, ir3_get_addr0(ctx, src0, ptrsz));
-		base_hi = create_uniform_indirect(b, ubo + 1, ir3_get_addr0(ctx, src0, ptrsz));
+		base_lo = create_uniform_indirect(b, ubo, TYPE_U32, ir3_get_addr0(ctx, src0, ptrsz));
+		base_hi = create_uniform_indirect(b, ubo + 1, TYPE_U32, ir3_get_addr0(ctx, src0, ptrsz));
 
 		/* NOTE: since relative addressing is used, make sure constlen is
 		 * at least big enough to cover all the UBO addresses, since the
@@ -1524,6 +1524,7 @@
 			src = ir3_get_src(ctx, &intr->src[0]);
 			for (int i = 0; i < dest_components; i++) {
 				dst[i] = create_uniform_indirect(b, idx + i,
+						nir_dest_bit_size(intr->dest) == 16 ? TYPE_F16 : TYPE_F32,
 						ir3_get_addr0(ctx, src[0], 1));
 			}
 			/* NOTE: if relative addressing is used, we set