llvmpipe/draw: handle UBOs that are < 16 bytes.

Not sure if this is a bug in the user or not, but some CTS
tests fail due to using an 8 byte constant buffer.

Fixes: KHR-GLES31.core.layout_binding.block_layout_binding_block_VertexShader

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index b96a5c9..0eeaf78 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -289,8 +289,13 @@
    unsigned i;
 
    for (i = 0; i < ARRAY_SIZE(llvm->jit_context.vs_constants); ++i) {
+      /*
+       * There could be a potential issue with rounding this up, as the
+       * shader expects 16-byte allocations, the fix is likely to move
+       * to LOAD intrinsic in the future and remove the vec4 constraint.
+       */
       int num_consts =
-         draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4);
+         DIV_ROUND_UP(draw->pt.user.vs_constants_size[i], (sizeof(float) * 4));
       llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
       llvm->jit_context.num_vs_constants[i] = num_consts;
       if (num_consts == 0) {
@@ -308,7 +313,7 @@
 
    for (i = 0; i < ARRAY_SIZE(llvm->gs_jit_context.constants); ++i) {
       int num_consts =
-         draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4);
+         DIV_ROUND_UP(draw->pt.user.gs_constants_size[i], (sizeof(float) * 4));
       llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
       llvm->gs_jit_context.num_constants[i] = num_consts;
       if (num_consts == 0) {
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index e6fa082..002c8b8 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -1230,7 +1230,7 @@
          }
 
          num_constants =
-            setup->constants[i].stored_size / (sizeof(float) * 4);
+            DIV_ROUND_UP(setup->constants[i].stored_size, (sizeof(float) * 4));
          setup->fs.current.jit_context.num_constants[i] = num_constants;
          setup->dirty |= LP_SETUP_NEW_FS;
       }