Merge "mesa3d: Merge remote-tracking branch 'aosp/upstream-19.1' into aosp/master"
diff --git a/VERSION b/VERSION
index d11ab1e..6cb37a8 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-19.1.2
+19.1.3
diff --git a/docs/relnotes/19.1.3.html b/docs/relnotes/19.1.3.html
new file mode 100644
index 0000000..abf0a89
--- /dev/null
+++ b/docs/relnotes/19.1.3.html
@@ -0,0 +1,191 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.3 Release Notes / July 23, 2019</h1>
+
+<p>
+Mesa 19.1.3 is a bug fix release which fixes bugs found since the 19.1.2 release.
+</p>
+<p>
+Mesa 19.1.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+845460b2225d15c15d4a9743dec798ff0b7396b533011d43e774e67f7825b7e0  mesa-19.1.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109203">Bug 109203</a> - [cfl dxvk] GPU Crash Launching Monopoly Plus (Iris Plus 655 / Wine + DXVK)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109524">Bug 109524</a> - &quot;Invalid glsl version in shading_language_version()&quot; when trying to run directX games using wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110309">Bug 110309</a> - [icl][bisected] regression on piglit arb_gpu_shader_int 64.execution.fs-ishl-then-* tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110663">Bug 110663</a> - threads_posix.h:96: undefined reference to `pthread_once'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110955">Bug 110955</a> - Mesa 18.2.8 implementation error: Invalid GLSL version in shading_language_version()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111010">Bug 111010</a> - Cemu Shader Cache Corruption Displaying Solid Color After commit 11e16ca7ce0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111071">Bug 111071</a> - SPIR-V shader processing fails with message about &quot;extra dangling SSA sources&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111075">Bug 111075</a> - Processing of SPIR-V shader causes device hang, sometimes leading to system reboot</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=111097">Bug 111097</a> - Can not detect VK_ERROR_OUT_OF_DATE_KHR or VK_SUBOPTIMAL_KHR when window resizing</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Handle cmask being disallowed by addrlib.</li>
+  <li>anv: Add android dependencies on android.</li>
+  <li>radv: Only save the descriptor set if we have one.</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (2):</p>
+<ul>
+  <li>anv: Fix pool allocator when first alloc needs to grow</li>
+  <li>spirv: Fix stride calculation when lowering Workgroup to offsets</li>
+</ul>
+
+<p>Chia-I Wu (2):</p>
+<ul>
+  <li>anv: fix VkExternalBufferProperties for unsupported handles</li>
+  <li>anv: fix VkExternalBufferProperties for host allocation</li>
+</ul>
+
+<p>Connor Abbott (1):</p>
+<ul>
+  <li>nir: Add a helper to determine if an intrinsic can be reordered</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>radv: fix crash in shader tracing.</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>freedreno: Fix assertion failures in context setup in shader-db mode.</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>softpipe: Remove unused static function</li>
+</ul>
+
+<p>Ian Romanick (4):</p>
+<ul>
+  <li>intel/vec4: Reswizzle VF immediates too</li>
+  <li>nir: Add unit tests for nir_opt_comparison_pre</li>
+  <li>nir: Use nir_src_bit_size instead of alu1-&gt;dest.dest.ssa.bit_size</li>
+  <li>mesa: Set minimum possible GLSL version</li>
+</ul>
+
+<p>Jason Ekstrand (13):</p>
+<ul>
+  <li>nir/instr_set: Expose nir_instrs_equal()</li>
+  <li>nir/loop_analyze: Fix phi-of-identical-alu detection</li>
+  <li>nir: Add more helpers for working with const values</li>
+  <li>nir/loop_analyze: Handle bit sizes correctly in calculate_iterations</li>
+  <li>nir/loop_analyze: Bail if we encounter swizzles</li>
+  <li>anv: Set Stateless Data Port Access MOCS</li>
+  <li>nir/opt_if: Clean up single-src phis in opt_if_loop_terminator</li>
+  <li>nir,intel: Add support for lowering 64-bit nir_opt_extract_*</li>
+  <li>anv: Account for dynamic stencil write disables in the PMA fix</li>
+  <li>nir/regs_to_ssa: Handle regs in phi sources properly</li>
+  <li>nir/loop_analyze: Refactor detection of limit vars</li>
+  <li>nir: Add some helpers for chasing SSA values properly</li>
+  <li>nir/loop_analyze: Properly handle swizzles in loop conditions</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 19.1.2</li>
+  <li>Update version to 19.1.3</li>
+</ul>
+
+<p>Lepton Wu (1):</p>
+<ul>
+  <li>virgl: Set meta data for textures from handle.</li>
+</ul>
+
+<p>Lionel Landwerlin (6):</p>
+<ul>
+  <li>vulkan/overlay: fix command buffer stats</li>
+  <li>vulkan/overlay: fix crash on freeing NULL command buffer</li>
+  <li>anv: fix crash in vkCmdClearAttachments with unused attachment</li>
+  <li>vulkan/wsi: update swapchain status on vkQueuePresent</li>
+  <li>anv: report timestampComputeAndGraphics true</li>
+  <li>anv: fix format mapping for depth/stencil formats</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: don't set READ_ONLY for const_uploader to fix bindless texture hangs</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (1):</p>
+<ul>
+  <li>anv: fix alphaToCoverage when there is no color attachment</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix VGT_GS_MODE if VS uses the primitive ID</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>meta: memory leak of CopyPixels usage</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa: save/restore SSO flag when using ARB_get_program_binary</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>meson: Add dep_thread dependency.</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>meta: leaking of BO with DrawPixels</li>
+</ul>
+
+
+</div>
+</body>
+</html>
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 92409d1..4233d1b 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -860,6 +860,11 @@
 	uint32_t clear_value_size = 0;
 	radv_image_get_cmask_info(device, image, &image->cmask);
 
+	if (!image->cmask.size)
+		return;
+
+	assert(image->cmask.alignment);
+
 	image->cmask.offset = align64(image->size, image->cmask.alignment);
 	/* + 8 for storing the clear values */
 	if (!image->clear_value_offset) {
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index ec4fc4a..0606d49 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -81,7 +81,7 @@
 
 	if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
 		state->old_descriptor_set0 = descriptors_state->sets[0];
-		if (!state->old_descriptor_set0)
+		if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
 			state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
 	}
 
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 5a34a85..5201f46 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -3610,9 +3610,10 @@
 
 unsigned
 radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+				gl_shader_stage stage,
 				const struct nir_shader *nir)
 {
-	switch (nir->info.stage) {
+	switch (stage) {
 	case MESA_SHADER_TESS_CTRL:
 		return chip_class >= CIK ? 128 : 64;
 	case MESA_SHADER_GEOMETRY:
@@ -3623,6 +3624,8 @@
 		return 0;
 	}
 
+	if (!nir)
+		return chip_class >= GFX9 ? 128 : 64;
 	unsigned max_workgroup_size = nir->info.cs.local_size[0] *
 		nir->info.cs.local_size[1] *
 		nir->info.cs.local_size[2];
@@ -3689,7 +3692,8 @@
 	for (int i = 0; i < shader_count; ++i) {
 		ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
 		                              radv_nir_get_max_workgroup_size(ctx.options->chip_class,
-		                                                            shaders[i]));
+									      shaders[i]->info.stage,
+									      shaders[i]));
 	}
 
 	create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index f80948b..29840e5 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2930,8 +2930,11 @@
                                    struct radv_pipeline *pipeline)
 {
 	const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
-
 	uint32_t vgt_primitiveid_en = false;
+	const struct radv_shader_variant *vs =
+		pipeline->shaders[MESA_SHADER_TESS_EVAL] ?
+		pipeline->shaders[MESA_SHADER_TESS_EVAL] :
+		pipeline->shaders[MESA_SHADER_VERTEX];
 	uint32_t vgt_gs_mode = 0;
 
 	if (radv_pipeline_has_gs(pipeline)) {
@@ -2940,7 +2943,7 @@
 
 		vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
 		                             pipeline->device->physical_device->rad_info.chip_class);
-	} else if (outinfo->export_prim_id) {
+	} else if (outinfo->export_prim_id || vs->info.info.uses_prim_id) {
 		vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
 		vgt_primitiveid_en = true;
 	}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index b0bcb57..31c829d 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1994,6 +1994,7 @@
 			     const struct radv_nir_compiler_options *options);
 
 unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+					 gl_shader_stage stage,
 					 const struct nir_shader *nir);
 
 /* radv_shader_info.h */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index ea9f3d9..1f9fa48 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -765,7 +765,7 @@
 				     lds_increment);
 	} else if (stage == MESA_SHADER_COMPUTE) {
 		unsigned max_workgroup_size =
-				radv_nir_get_max_workgroup_size(chip_class, variant->nir);
+			radv_nir_get_max_workgroup_size(chip_class, stage, variant->nir);
 		lds_per_wave = (conf->lds_size * lds_increment) /
 			       DIV_ROUND_UP(max_workgroup_size, 64);
 	}
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index a8faeb9..18aa44a 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -299,4 +299,16 @@
       link_with : libmesa_util,
     )
   )
+
+  test(
+    'comparison_pre',
+    executable(
+      'comparison_pre',
+      files('tests/comparison_pre_tests.cpp'),
+      c_args : [c_vis_args, c_msvc_compat_args, no_override_init_args],
+      include_directories : [inc_common],
+      dependencies : [dep_thread, idep_gtest, idep_nir],
+      link_with : libmesa_util,
+    )
+  )
 endif
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 5b75585..5c1e0e8 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -1204,6 +1204,41 @@
    return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
 }
 
+nir_const_value
+nir_const_value_for_float(double f, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   switch (bit_size) {
+   case 16:
+      v.u16 = _mesa_float_to_half(f);
+      break;
+   case 32:
+      v.f32 = f;
+      break;
+   case 64:
+      v.f64 = f;
+      break;
+   default:
+      unreachable("Invalid bit size");
+   }
+
+   return v;
+}
+
+double
+nir_const_value_as_float(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 16: return _mesa_half_to_float(value.u16);
+   case 32: return value.f32;
+   case 64: return value.f64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
 int64_t
 nir_src_comp_as_int(nir_src src, unsigned comp)
 {
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 37161e8..8898139 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -140,6 +140,106 @@
       arr[i] = c[i].m; \
 } while (false)
 
+static inline nir_const_value
+nir_const_value_for_raw_uint(uint64_t x, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   switch (bit_size) {
+   case 1:  v.b   = x;  break;
+   case 8:  v.u8  = x;  break;
+   case 16: v.u16 = x;  break;
+   case 32: v.u32 = x;  break;
+   case 64: v.u64 = x;  break;
+   default:
+      unreachable("Invalid bit size");
+   }
+
+   return v;
+}
+
+static inline nir_const_value
+nir_const_value_for_int(int64_t i, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   assert(bit_size <= 64);
+   if (bit_size < 64) {
+      assert(i >= (-(1ll << (bit_size - 1))));
+      assert(i < (1ll << (bit_size - 1)));
+   }
+
+   return nir_const_value_for_raw_uint(i, bit_size);
+}
+
+static inline nir_const_value
+nir_const_value_for_uint(uint64_t u, unsigned bit_size)
+{
+   nir_const_value v;
+   memset(&v, 0, sizeof(v));
+
+   assert(bit_size <= 64);
+   if (bit_size < 64)
+      assert(u < (1ull << bit_size));
+
+   return nir_const_value_for_raw_uint(u, bit_size);
+}
+
+static inline nir_const_value
+nir_const_value_for_bool(bool b, unsigned bit_size)
+{
+   /* Booleans use a 0/-1 convention */
+   return nir_const_value_for_int(-(int)b, bit_size);
+}
+
+/* This one isn't inline because it requires half-float conversion */
+nir_const_value nir_const_value_for_float(double b, unsigned bit_size);
+
+static inline int64_t
+nir_const_value_as_int(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   /* int1_t uses 0/-1 convention */
+   case 1:  return -(int)value.b;
+   case 8:  return value.i8;
+   case 16: return value.i16;
+   case 32: return value.i32;
+   case 64: return value.i64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
+static inline int64_t
+nir_const_value_as_uint(nir_const_value value, unsigned bit_size)
+{
+   switch (bit_size) {
+   case 1:  return value.b;
+   case 8:  return value.u8;
+   case 16: return value.u16;
+   case 32: return value.u32;
+   case 64: return value.u64;
+   default:
+      unreachable("Invalid bit size");
+   }
+}
+
+static inline bool
+nir_const_value_as_bool(nir_const_value value, unsigned bit_size)
+{
+   int64_t i = nir_const_value_as_int(value, bit_size);
+
+   /* Booleans of any size use 0/-1 convention */
+   assert(i == 0 || i == -1);
+
+   return i;
+}
+
+/* This one isn't inline because it requires half-float conversion */
+double nir_const_value_as_float(nir_const_value value, unsigned bit_size);
+
 typedef struct nir_constant {
    /**
     * Value of the constant.
@@ -1416,6 +1516,16 @@
 void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
                                  nir_ssa_def *handle, bool bindless);
 
+/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
+static inline bool
+nir_intrinsic_can_reorder(nir_intrinsic_instr *instr)
+{
+   const nir_intrinsic_info *info =
+      &nir_intrinsic_infos[instr->intrinsic];
+   return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
+          (info->flags & NIR_INTRINSIC_CAN_REORDER);
+}
+
 /**
  * \group texture information
  *
@@ -1815,6 +1925,85 @@
                 nir_parallel_copy_instr, instr,
                 type, nir_instr_type_parallel_copy)
 
+typedef struct {
+   nir_ssa_def *def;
+   unsigned comp;
+} nir_ssa_scalar;
+
+static inline bool
+nir_ssa_scalar_is_const(nir_ssa_scalar s)
+{
+   return s.def->parent_instr->type == nir_instr_type_load_const;
+}
+
+static inline nir_const_value
+nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
+{
+   assert(s.comp < s.def->num_components);
+   nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
+   return load->value[s.comp];
+}
+
+#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix)                     \
+static inline type                                                   \
+nir_ssa_scalar_as_##suffix(nir_ssa_scalar s)                         \
+{                                                                    \
+   return nir_const_value_as_##suffix(                               \
+      nir_ssa_scalar_as_const_value(s), s.def->bit_size);            \
+}
+
+NIR_DEFINE_SCALAR_AS_CONST(int64_t,    int)
+NIR_DEFINE_SCALAR_AS_CONST(uint64_t,   uint)
+NIR_DEFINE_SCALAR_AS_CONST(bool,       bool)
+NIR_DEFINE_SCALAR_AS_CONST(double,     float)
+
+#undef NIR_DEFINE_SCALAR_AS_CONST
+
+static inline bool
+nir_ssa_scalar_is_alu(nir_ssa_scalar s)
+{
+   return s.def->parent_instr->type == nir_instr_type_alu;
+}
+
+static inline nir_op
+nir_ssa_scalar_alu_op(nir_ssa_scalar s)
+{
+   return nir_instr_as_alu(s.def->parent_instr)->op;
+}
+
+static inline nir_ssa_scalar
+nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
+{
+   nir_ssa_scalar out = { NULL, 0 };
+
+   nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
+   assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
+
+   /* Our component must be written */
+   assert(s.comp < s.def->num_components);
+   assert(alu->dest.write_mask & (1u << s.comp));
+
+   assert(alu->src[alu_src_idx].src.is_ssa);
+   out.def = alu->src[alu_src_idx].src.ssa;
+
+   if (nir_op_infos[alu->op].input_sizes[alu_src_idx] == 0) {
+      /* The ALU src is unsized so the source component follows the
+       * destination component.
+       */
+      out.comp = alu->src[alu_src_idx].swizzle[s.comp];
+   } else {
+      /* This is a sized source so all source components work together to
+       * produce all the destination components.  Since we need to return a
+       * scalar, this only works if the source is a scalar.
+       */
+      assert(nir_op_infos[alu->op].input_sizes[alu_src_idx] == 1);
+      out.comp = alu->src[alu_src_idx].swizzle[0];
+   }
+   assert(out.comp < out.def->num_components);
+
+   return out;
+}
+
 /*
  * Control flow
  *
@@ -2196,6 +2385,7 @@
    nir_lower_minmax64 = (1 << 10),
    nir_lower_shift64 = (1 << 11),
    nir_lower_imul_2x32_64 = (1 << 12),
+   nir_lower_extract64 = (1 << 13),
 } nir_lower_int64_options;
 
 typedef enum {
@@ -2785,6 +2975,7 @@
 
 bool nir_src_is_dynamically_uniform(nir_src src);
 bool nir_srcs_equal(nir_src src1, nir_src src2);
+bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src);
 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
@@ -3487,6 +3678,9 @@
 bool nir_lower_ssa_defs_to_regs_block(nir_block *block);
 bool nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl);
 
+/* This is here for unit tests. */
+bool nir_opt_comparison_pre_impl(nir_function_impl *impl);
+
 bool nir_opt_comparison_pre(nir_shader *shader);
 
 bool nir_opt_algebraic(nir_shader *shader);
@@ -3535,6 +3729,7 @@
                              bool indirect_load_ok, bool expensive_alu_ok);
 
 bool nir_opt_remove_phis(nir_shader *shader);
+bool nir_opt_remove_phis_block(nir_block *block);
 
 bool nir_opt_shrink_load(nir_shader *shader);
 
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index bd62bc9..e2a0b32 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -25,6 +25,64 @@
 #include "nir_vla.h"
 #include "util/half_float.h"
 
+static bool
+src_is_ssa(nir_src *src, void *data)
+{
+   (void) data;
+   return src->is_ssa;
+}
+
+static bool
+dest_is_ssa(nir_dest *dest, void *data)
+{
+   (void) data;
+   return dest->is_ssa;
+}
+
+static inline bool
+instr_each_src_and_dest_is_ssa(const nir_instr *instr)
+{
+   if (!nir_foreach_dest((nir_instr *)instr, dest_is_ssa, NULL) ||
+       !nir_foreach_src((nir_instr *)instr, src_is_ssa, NULL))
+      return false;
+
+   return true;
+}
+
+/* This function determines if uses of an instruction can safely be rewritten
+ * to use another identical instruction instead. Note that this function must
+ * be kept in sync with hash_instr() and nir_instrs_equal() -- only
+ * instructions that pass this test will be handed on to those functions, and
+ * conversely they must handle everything that this function returns true for.
+ */
+static bool
+instr_can_rewrite(const nir_instr *instr)
+{
+   /* We only handle SSA. */
+   assert(instr_each_src_and_dest_is_ssa(instr));
+
+   switch (instr->type) {
+   case nir_instr_type_alu:
+   case nir_instr_type_deref:
+   case nir_instr_type_tex:
+   case nir_instr_type_load_const:
+   case nir_instr_type_phi:
+      return true;
+   case nir_instr_type_intrinsic:
+      return nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr));
+   case nir_instr_type_call:
+   case nir_instr_type_jump:
+   case nir_instr_type_ssa_undef:
+      return false;
+   case nir_instr_type_parallel_copy:
+   default:
+      unreachable("Invalid instruction type");
+   }
+
+   return false;
+}
+
+
 #define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data))
 
 static uint32_t
@@ -430,12 +488,16 @@
       if (const2 == NULL)
          return false;
 
+      if (nir_src_bit_size(alu1->src[src1].src) !=
+          nir_src_bit_size(alu2->src[src2].src))
+         return false;
+
       /* FINISHME: Apply the swizzle? */
       return nir_const_value_negative_equal(const1,
                                             const2,
                                             nir_ssa_alu_instr_src_components(alu1, src1),
                                             nir_op_infos[alu1->op].input_types[src1],
-                                            alu1->dest.dest.ssa.bit_size);
+                                            nir_src_bit_size(alu1->src[src1].src));
    }
 
    uint8_t alu1_swizzle[4] = {0};
@@ -503,9 +565,11 @@
  * the same hash for (ignoring collisions, of course).
  */
 
-static bool
+bool
 nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
 {
+   assert(instr_can_rewrite(instr1) && instr_can_rewrite(instr2));
+
    if (instr1->type != instr2->type)
       return false;
 
@@ -701,68 +765,6 @@
    unreachable("All cases in the above switch should return");
 }
 
-static bool
-src_is_ssa(nir_src *src, void *data)
-{
-   (void) data;
-   return src->is_ssa;
-}
-
-static bool
-dest_is_ssa(nir_dest *dest, void *data)
-{
-   (void) data;
-   return dest->is_ssa;
-}
-
-static inline bool
-instr_each_src_and_dest_is_ssa(nir_instr *instr)
-{
-   if (!nir_foreach_dest(instr, dest_is_ssa, NULL) ||
-       !nir_foreach_src(instr, src_is_ssa, NULL))
-      return false;
-
-   return true;
-}
-
-/* This function determines if uses of an instruction can safely be rewritten
- * to use another identical instruction instead. Note that this function must
- * be kept in sync with hash_instr() and nir_instrs_equal() -- only
- * instructions that pass this test will be handed on to those functions, and
- * conversely they must handle everything that this function returns true for.
- */
-
-static bool
-instr_can_rewrite(nir_instr *instr)
-{
-   /* We only handle SSA. */
-   assert(instr_each_src_and_dest_is_ssa(instr));
-
-   switch (instr->type) {
-   case nir_instr_type_alu:
-   case nir_instr_type_deref:
-   case nir_instr_type_tex:
-   case nir_instr_type_load_const:
-   case nir_instr_type_phi:
-      return true;
-   case nir_instr_type_intrinsic: {
-      const nir_intrinsic_info *info =
-         &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
-      return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
-             (info->flags & NIR_INTRINSIC_CAN_REORDER);
-   }
-   case nir_instr_type_call:
-   case nir_instr_type_jump:
-   case nir_instr_type_ssa_undef:
-      return false;
-   case nir_instr_type_parallel_copy:
-   default:
-      unreachable("Invalid instruction type");
-   }
-
-   return false;
-}
-
 static nir_ssa_def *
 nir_instr_get_dest_ssa_def(nir_instr *instr)
 {
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index 0ae9533..d484c14 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -32,7 +32,10 @@
    basic_induction
 } nir_loop_variable_type;
 
-struct nir_basic_induction_var;
+typedef struct nir_basic_induction_var {
+   nir_alu_instr *alu;                      /* The def of the alu-operation */
+   nir_ssa_def *def_outside_loop;           /* The phi-src outside the loop */
+} nir_basic_induction_var;
 
 typedef struct {
    /* A link for the work list */
@@ -57,13 +60,6 @@
 
 } nir_loop_variable;
 
-typedef struct nir_basic_induction_var {
-   nir_op alu_op;                           /* The type of alu-operation    */
-   nir_loop_variable *alu_def;              /* The def of the alu-operation */
-   nir_loop_variable *invariant;            /* The invariant alu-operand    */
-   nir_loop_variable *def_outside_loop;     /* The phi-src outside the loop */
-} nir_basic_induction_var;
-
 typedef struct {
    /* The loop we store information for */
    nir_loop *loop;
@@ -274,6 +270,44 @@
    }
 }
 
+/* If all of the instruction sources point to identical ALU instructions (as
+ * per nir_instrs_equal), return one of the ALU instructions.  Otherwise,
+ * return NULL.
+ */
+static nir_alu_instr *
+phi_instr_as_alu(nir_phi_instr *phi)
+{
+   nir_alu_instr *first = NULL;
+   nir_foreach_phi_src(src, phi) {
+      assert(src->src.is_ssa);
+      if (src->src.ssa->parent_instr->type != nir_instr_type_alu)
+         return NULL;
+
+      nir_alu_instr *alu = nir_instr_as_alu(src->src.ssa->parent_instr);
+      if (first == NULL) {
+         first = alu;
+      } else {
+         if (!nir_instrs_equal(&first->instr, &alu->instr))
+            return NULL;
+      }
+   }
+
+   return first;
+}
+
+static bool
+alu_src_has_identity_swizzle(nir_alu_instr *alu, unsigned src_idx)
+{
+   assert(nir_op_infos[alu->op].input_sizes[src_idx] == 0);
+   assert(alu->dest.dest.is_ssa);
+   for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) {
+      if (alu->src[src_idx].swizzle[i] != i)
+         return false;
+   }
+
+   return true;
+}
+
 static bool
 compute_induction_information(loop_info_state *state)
 {
@@ -298,6 +332,7 @@
       nir_phi_instr *phi = nir_instr_as_phi(var->def->parent_instr);
       nir_basic_induction_var *biv = rzalloc(state, nir_basic_induction_var);
 
+      nir_loop_variable *alu_src_var = NULL;
       nir_foreach_phi_src(src, phi) {
          nir_loop_variable *src_var = get_loop_var(src->src.ssa, state);
 
@@ -313,60 +348,44 @@
          if (is_var_phi(src_var)) {
             nir_phi_instr *src_phi =
                nir_instr_as_phi(src_var->def->parent_instr);
-
-            nir_op alu_op = nir_num_opcodes; /* avoid uninitialized warning */
-            nir_ssa_def *alu_srcs[2] = {0};
-            nir_foreach_phi_src(src2, src_phi) {
-               nir_loop_variable *src_var2 =
-                  get_loop_var(src2->src.ssa, state);
-
-               if (!src_var2->in_if_branch || !is_var_alu(src_var2))
+            nir_alu_instr *src_phi_alu = phi_instr_as_alu(src_phi);
+            if (src_phi_alu) {
+               src_var = get_loop_var(&src_phi_alu->dest.dest.ssa, state);
+               if (!src_var->in_if_branch)
                   break;
-
-               nir_alu_instr *alu =
-                  nir_instr_as_alu(src_var2->def->parent_instr);
-               if (nir_op_infos[alu->op].num_inputs != 2)
-                  break;
-
-               if (alu->src[0].src.ssa == alu_srcs[0] &&
-                   alu->src[1].src.ssa == alu_srcs[1] &&
-                   alu->op == alu_op) {
-                  /* Both branches perform the same calculation so we can use
-                   * one of them to find the induction variable.
-                   */
-                  src_var = src_var2;
-               } else {
-                  alu_srcs[0] = alu->src[0].src.ssa;
-                  alu_srcs[1] = alu->src[1].src.ssa;
-                  alu_op = alu->op;
-               }
             }
          }
 
-         if (!src_var->in_loop) {
-            biv->def_outside_loop = src_var;
-         } else if (is_var_alu(src_var)) {
+         if (!src_var->in_loop && !biv->def_outside_loop) {
+            biv->def_outside_loop = src_var->def;
+         } else if (is_var_alu(src_var) && !biv->alu) {
+            alu_src_var = src_var;
             nir_alu_instr *alu = nir_instr_as_alu(src_var->def->parent_instr);
 
             if (nir_op_infos[alu->op].num_inputs == 2) {
-               biv->alu_def = src_var;
-               biv->alu_op = alu->op;
-
                for (unsigned i = 0; i < 2; i++) {
-                  /* Is one of the operands const, and the other the phi */
-                  if (alu->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const &&
-                      alu->src[1-i].src.ssa == &phi->dest.ssa)
-                     biv->invariant = get_loop_var(alu->src[i].src.ssa, state);
+                  /* Is one of the operands const, and the other the phi.  The
+                   * phi source can't be swizzled in any way.
+                   */
+                  if (nir_src_is_const(alu->src[i].src) &&
+                      alu->src[1-i].src.ssa == &phi->dest.ssa &&
+                      alu_src_has_identity_swizzle(alu, 1 - i))
+                     biv->alu = alu;
                }
             }
+
+            if (!biv->alu)
+               break;
+         } else {
+            biv->alu = NULL;
+            break;
          }
       }
 
-      if (biv->alu_def && biv->def_outside_loop && biv->invariant &&
-          is_var_constant(biv->def_outside_loop)) {
-         assert(is_var_constant(biv->invariant));
-         biv->alu_def->type = basic_induction;
-         biv->alu_def->ind = biv;
+      if (biv->alu && biv->def_outside_loop &&
+          biv->def_outside_loop->parent_instr->type == nir_instr_type_load_const) {
+         alu_src_var->type = basic_induction;
+         alu_src_var->ind = biv;
          var->type = basic_induction;
          var->ind = biv;
 
@@ -493,7 +512,7 @@
 
 static bool
 guess_loop_limit(loop_info_state *state, nir_const_value *limit_val,
-                 nir_loop_variable *basic_ind)
+                 nir_ssa_scalar basic_ind)
 {
    unsigned min_array_size = 0;
 
@@ -514,8 +533,10 @@
                find_array_access_via_induction(state,
                                                nir_src_as_deref(intrin->src[0]),
                                                &array_idx);
-            if (basic_ind == array_idx &&
+            if (array_idx && basic_ind.def == array_idx->def &&
                 (min_array_size == 0 || min_array_size > array_size)) {
+               /* Array indices are scalars */
+               assert(basic_ind.def->num_components == 1);
                min_array_size = array_size;
             }
 
@@ -526,8 +547,10 @@
                find_array_access_via_induction(state,
                                                nir_src_as_deref(intrin->src[1]),
                                                &array_idx);
-            if (basic_ind == array_idx &&
+            if (array_idx && basic_ind.def == array_idx->def &&
                 (min_array_size == 0 || min_array_size > array_size)) {
+               /* Array indices are scalars */
+               assert(basic_ind.def->num_components == 1);
                min_array_size = array_size;
             }
          }
@@ -535,7 +558,8 @@
    }
 
    if (min_array_size) {
-      limit_val->i32 = min_array_size;
+      *limit_val = nir_const_value_for_uint(min_array_size,
+                                            basic_ind.def->bit_size);
       return true;
    }
 
@@ -543,71 +567,84 @@
 }
 
 static bool
-try_find_limit_of_alu(nir_loop_variable *limit, nir_const_value *limit_val,
+try_find_limit_of_alu(nir_ssa_scalar limit, nir_const_value *limit_val,
                       nir_loop_terminator *terminator, loop_info_state *state)
 {
-   if(!is_var_alu(limit))
+   if (!nir_ssa_scalar_is_alu(limit))
       return false;
 
-   nir_alu_instr *limit_alu = nir_instr_as_alu(limit->def->parent_instr);
-
-   if (limit_alu->op == nir_op_imin ||
-       limit_alu->op == nir_op_fmin) {
-      limit = get_loop_var(limit_alu->src[0].src.ssa, state);
-
-      if (!is_var_constant(limit))
-         limit = get_loop_var(limit_alu->src[1].src.ssa, state);
-
-      if (!is_var_constant(limit))
-         return false;
-
-      *limit_val = nir_instr_as_load_const(limit->def->parent_instr)->value[0];
-
-      terminator->exact_trip_count_unknown = true;
-
-      return true;
+   nir_op limit_op = nir_ssa_scalar_alu_op(limit);
+   if (limit_op == nir_op_imin || limit_op == nir_op_fmin) {
+      for (unsigned i = 0; i < 2; i++) {
+         nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(limit, i);
+         if (nir_ssa_scalar_is_const(src)) {
+            *limit_val = nir_ssa_scalar_as_const_value(src);
+            terminator->exact_trip_count_unknown = true;
+            return true;
+         }
+      }
    }
 
    return false;
 }
 
-static int32_t
-get_iteration(nir_op cond_op, nir_const_value *initial, nir_const_value *step,
-              nir_const_value *limit)
+static nir_const_value
+eval_const_unop(nir_op op, unsigned bit_size, nir_const_value src0)
 {
-   int32_t iter;
+   assert(nir_op_infos[op].num_inputs == 1);
+   nir_const_value dest;
+   nir_const_value *src[1] = { &src0 };
+   nir_eval_const_opcode(op, &dest, 1, bit_size, src);
+   return dest;
+}
+
+static nir_const_value
+eval_const_binop(nir_op op, unsigned bit_size,
+                 nir_const_value src0, nir_const_value src1)
+{
+   assert(nir_op_infos[op].num_inputs == 2);
+   nir_const_value dest;
+   nir_const_value *src[2] = { &src0, &src1 };
+   nir_eval_const_opcode(op, &dest, 1, bit_size, src);
+   return dest;
+}
+
+static int32_t
+get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step,
+              nir_const_value limit, unsigned bit_size)
+{
+   nir_const_value span, iter;
 
    switch (cond_op) {
    case nir_op_ige:
    case nir_op_ilt:
    case nir_op_ieq:
-   case nir_op_ine: {
-      int32_t initial_val = initial->i32;
-      int32_t span = limit->i32 - initial_val;
-      iter = span / step->i32;
+   case nir_op_ine:
+      span = eval_const_binop(nir_op_isub, bit_size, limit, initial);
+      iter = eval_const_binop(nir_op_idiv, bit_size, span, step);
       break;
-   }
+
    case nir_op_uge:
-   case nir_op_ult: {
-      uint32_t initial_val = initial->u32;
-      uint32_t span = limit->u32 - initial_val;
-      iter = span / step->u32;
+   case nir_op_ult:
+      span = eval_const_binop(nir_op_isub, bit_size, limit, initial);
+      iter = eval_const_binop(nir_op_udiv, bit_size, span, step);
       break;
-   }
+
    case nir_op_fge:
    case nir_op_flt:
    case nir_op_feq:
-   case nir_op_fne: {
-      float initial_val = initial->f32;
-      float span = limit->f32 - initial_val;
-      iter = span / step->f32;
+   case nir_op_fne:
+      span = eval_const_binop(nir_op_fsub, bit_size, limit, initial);
+      iter = eval_const_binop(nir_op_fdiv, bit_size, span, step);
+      iter = eval_const_unop(nir_op_f2i64, bit_size, iter);
       break;
-   }
+
    default:
       return -1;
    }
 
-   return iter;
+   uint64_t iter_u64 = nir_const_value_as_uint(iter, bit_size);
+   return iter_u64 > INT_MAX ? -1 : (int)iter_u64;
 }
 
 static bool
@@ -618,18 +655,18 @@
 {
    assert(nir_op_infos[cond_op].num_inputs == 2);
 
-   nir_const_value iter_src = {0, };
+   nir_const_value iter_src;
    nir_op mul_op;
    nir_op add_op;
    switch (induction_base_type) {
    case nir_type_float:
-      iter_src.f32 = (float) iter_int;
+      iter_src = nir_const_value_for_float(iter_int, bit_size);
       mul_op = nir_op_fmul;
       add_op = nir_op_fadd;
       break;
    case nir_type_int:
    case nir_type_uint:
-      iter_src.i32 = iter_int;
+      iter_src = nir_const_value_for_int(iter_int, bit_size);
       mul_op = nir_op_imul;
       add_op = nir_op_iadd;
       break;
@@ -662,14 +699,12 @@
 
 static int
 calculate_iterations(nir_const_value *initial, nir_const_value *step,
-                     nir_const_value *limit, nir_loop_variable *alu_def,
-                     nir_alu_instr *cond_alu, nir_op alu_op, bool limit_rhs,
+                     nir_const_value *limit, nir_alu_instr *alu,
+                     nir_ssa_scalar cond, nir_op alu_op, bool limit_rhs,
                      bool invert_cond)
 {
    assert(initial != NULL && step != NULL && limit != NULL);
 
-   nir_alu_instr *alu = nir_instr_as_alu(alu_def->def->parent_instr);
-
    /* nir_op_isub should have been lowered away by this point */
    assert(alu->op != nir_op_isub);
 
@@ -701,12 +736,16 @@
     * condition and if so we assume we need to step the initial value.
     */
    unsigned trip_offset = 0;
-   if (cond_alu->src[0].src.ssa == alu_def->def ||
-       cond_alu->src[1].src.ssa == alu_def->def) {
+   nir_alu_instr *cond_alu = nir_instr_as_alu(cond.def->parent_instr);
+   if (cond_alu->src[0].src.ssa == &alu->dest.dest.ssa ||
+       cond_alu->src[1].src.ssa == &alu->dest.dest.ssa) {
       trip_offset = 1;
    }
 
-   int iter_int = get_iteration(alu_op, initial, step, limit);
+   assert(nir_src_bit_size(alu->src[0].src) ==
+          nir_src_bit_size(alu->src[1].src));
+   unsigned bit_size = nir_src_bit_size(alu->src[0].src);
+   int iter_int = get_iteration(alu_op, *initial, *step, *limit, bit_size);
 
    /* If iter_int is negative the loop is ill-formed or is the conditional is
     * unsigned with a huge iteration count so don't bother going any further.
@@ -723,9 +762,6 @@
     *
     *    for (float x = 0.0; x != 0.9; x += 0.2);
     */
-   assert(nir_src_bit_size(alu->src[0].src) ==
-          nir_src_bit_size(alu->src[1].src));
-   unsigned bit_size = nir_src_bit_size(alu->src[0].src);
    for (int bias = -1; bias <= 1; bias++) {
       const int iter_bias = iter_int + bias;
 
@@ -740,9 +776,9 @@
 }
 
 static nir_op
-inverse_comparison(nir_alu_instr *alu)
+inverse_comparison(nir_op alu_op)
 {
-   switch (alu->op) {
+   switch (alu_op) {
    case nir_op_fge:
       return nir_op_flt;
    case nir_op_ige:
@@ -769,95 +805,97 @@
 }
 
 static bool
-is_supported_terminator_condition(nir_alu_instr *alu)
+is_supported_terminator_condition(nir_ssa_scalar cond)
 {
+   if (!nir_ssa_scalar_is_alu(cond))
+      return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(cond.def->parent_instr);
    return nir_alu_instr_is_comparison(alu) &&
           nir_op_infos[alu->op].num_inputs == 2;
 }
 
 static bool
-get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
-                             nir_loop_variable **limit,
+get_induction_and_limit_vars(nir_ssa_scalar cond,
+                             nir_ssa_scalar *ind,
+                             nir_ssa_scalar *limit,
+                             bool *limit_rhs,
                              loop_info_state *state)
 {
-   bool limit_rhs = true;
+   nir_ssa_scalar rhs, lhs;
+   lhs = nir_ssa_scalar_chase_alu_src(cond, 0);
+   rhs = nir_ssa_scalar_chase_alu_src(cond, 1);
 
-   /* We assume that the limit is the "right" operand */
-   *ind = get_loop_var(alu->src[0].src.ssa, state);
-   *limit = get_loop_var(alu->src[1].src.ssa, state);
-
-   if ((*ind)->type != basic_induction) {
-      /* We had it the wrong way, flip things around */
-      *ind = get_loop_var(alu->src[1].src.ssa, state);
-      *limit = get_loop_var(alu->src[0].src.ssa, state);
-      limit_rhs = false;
+   if (get_loop_var(lhs.def, state)->type == basic_induction) {
+      *ind = lhs;
+      *limit = rhs;
+      *limit_rhs = true;
+      return true;
+   } else if (get_loop_var(rhs.def, state)->type == basic_induction) {
+      *ind = rhs;
+      *limit = lhs;
+      *limit_rhs = false;
+      return true;
+   } else {
+      return false;
    }
-
-   return limit_rhs;
 }
 
-static void
-try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
-                                 nir_loop_variable **ind,
-                                 nir_loop_variable **limit,
+static bool
+try_find_trip_count_vars_in_iand(nir_ssa_scalar *cond,
+                                 nir_ssa_scalar *ind,
+                                 nir_ssa_scalar *limit,
                                  bool *limit_rhs,
                                  loop_info_state *state)
 {
-   assert((*alu)->op == nir_op_ieq || (*alu)->op == nir_op_inot);
+   const nir_op alu_op = nir_ssa_scalar_alu_op(*cond);
+   assert(alu_op == nir_op_ieq || alu_op == nir_op_inot);
 
-   nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
+   nir_ssa_scalar iand = nir_ssa_scalar_chase_alu_src(*cond, 0);
 
-   if ((*alu)->op == nir_op_ieq) {
-      nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
+   if (alu_op == nir_op_ieq) {
+      nir_ssa_scalar zero = nir_ssa_scalar_chase_alu_src(*cond, 1);
 
-      if (iand_def->parent_instr->type != nir_instr_type_alu ||
-          zero_def->parent_instr->type != nir_instr_type_load_const) {
-
+      if (!nir_ssa_scalar_is_alu(iand) || !nir_ssa_scalar_is_const(zero)) {
          /* Maybe we had it the wrong way, flip things around */
-         iand_def = (*alu)->src[1].src.ssa;
-         zero_def = (*alu)->src[0].src.ssa;
+         nir_ssa_scalar tmp = zero;
+         zero = iand;
+         iand = tmp;
 
          /* If we still didn't find what we need then return */
-         if (zero_def->parent_instr->type != nir_instr_type_load_const)
-            return;
+         if (!nir_ssa_scalar_is_const(zero))
+            return false;
       }
 
       /* If the loop is not breaking on (x && y) == 0 then return */
-      nir_const_value *zero =
-         nir_instr_as_load_const(zero_def->parent_instr)->value;
-      if (zero[0].i32 != 0)
-         return;
+      if (nir_ssa_scalar_as_uint(zero) != 0)
+         return false;
    }
 
-   if (iand_def->parent_instr->type != nir_instr_type_alu)
-      return;
+   if (!nir_ssa_scalar_is_alu(iand))
+      return false;
 
-   nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
-   if (iand->op != nir_op_iand)
-      return;
+   if (nir_ssa_scalar_alu_op(iand) != nir_op_iand)
+      return false;
 
    /* Check if iand src is a terminator condition and try get induction var
     * and trip limit var.
     */
-   nir_ssa_def *src = iand->src[0].src.ssa;
-   if (src->parent_instr->type == nir_instr_type_alu) {
-      *alu = nir_instr_as_alu(src->parent_instr);
-      if (is_supported_terminator_condition(*alu))
-         *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
-   }
+   bool found_induction_var = false;
+   for (unsigned i = 0; i < 2; i++) {
+      nir_ssa_scalar src = nir_ssa_scalar_chase_alu_src(iand, i);
+      if (is_supported_terminator_condition(src) &&
+          get_induction_and_limit_vars(src, ind, limit, limit_rhs, state)) {
+         *cond = src;
+         found_induction_var = true;
 
-   /* Try the other iand src if needed */
-   if (*ind == NULL || (*ind && (*ind)->type != basic_induction) ||
-       !is_var_constant(*limit)) {
-      src = iand->src[1].src.ssa;
-      if (src->parent_instr->type == nir_instr_type_alu) {
-         nir_alu_instr *tmp_alu = nir_instr_as_alu(src->parent_instr);
-         if (is_supported_terminator_condition(tmp_alu)) {
-            *alu = tmp_alu;
-            *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
-         }
+         /* If we've found one with a constant limit, stop. */
+         if (nir_ssa_scalar_is_const(*limit))
+            return true;
       }
    }
+
+   return found_induction_var;
 }
 
 /* Run through each of the terminators of the loop and try to infer a possible
@@ -877,8 +915,10 @@
    list_for_each_entry(nir_loop_terminator, terminator,
                        &state->loop->info->loop_terminator_list,
                        loop_terminator_link) {
+      assert(terminator->nif->condition.is_ssa);
+      nir_ssa_scalar cond = { terminator->nif->condition.ssa, 0 };
 
-      if (terminator->conditional_instr->type != nir_instr_type_alu) {
+      if (!nir_ssa_scalar_is_alu(cond)) {
          /* If we get here the loop is dead and will get cleaned up by the
           * nir_opt_dead_cf pass.
           */
@@ -886,43 +926,35 @@
          continue;
       }
 
-      nir_alu_instr *alu = nir_instr_as_alu(terminator->conditional_instr);
-      nir_op alu_op = alu->op;
+      nir_op alu_op = nir_ssa_scalar_alu_op(cond);
 
       bool limit_rhs;
-      nir_loop_variable *basic_ind = NULL;
-      nir_loop_variable *limit;
-      if (alu->op == nir_op_inot || alu->op == nir_op_ieq) {
-         nir_alu_instr *new_alu = alu;
-         try_find_trip_count_vars_in_iand(&new_alu, &basic_ind, &limit,
-                                          &limit_rhs, state);
+      nir_ssa_scalar basic_ind = { NULL, 0 };
+      nir_ssa_scalar limit;
+      if ((alu_op == nir_op_inot || alu_op == nir_op_ieq) &&
+          try_find_trip_count_vars_in_iand(&cond, &basic_ind, &limit,
+                                           &limit_rhs, state)) {
 
          /* The loop is exiting on (x && y) == 0 so we need to get the
           * inverse of x or y (i.e. which ever contained the induction var) in
           * order to compute the trip count.
           */
-         if (basic_ind && basic_ind->type == basic_induction) {
-            alu = new_alu;
-            alu_op = inverse_comparison(alu);
-            trip_count_known = false;
-            terminator->exact_trip_count_unknown = true;
-         }
+         alu_op = inverse_comparison(nir_ssa_scalar_alu_op(cond));
+         trip_count_known = false;
+         terminator->exact_trip_count_unknown = true;
       }
 
-      if (!basic_ind) {
-         if (!is_supported_terminator_condition(alu)) {
-            trip_count_known = false;
-            continue;
+      if (!basic_ind.def) {
+         if (is_supported_terminator_condition(cond)) {
+            get_induction_and_limit_vars(cond, &basic_ind,
+                                         &limit, &limit_rhs, state);
          }
-
-         limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
-                                                  state);
       }
 
       /* The comparison has to have a basic induction variable for us to be
        * able to find trip counts.
        */
-      if (basic_ind->type != basic_induction) {
+      if (!basic_ind.def) {
          trip_count_known = false;
          continue;
       }
@@ -931,9 +963,8 @@
 
       /* Attempt to find a constant limit for the loop */
       nir_const_value limit_val;
-      if (is_var_constant(limit)) {
-         limit_val =
-            nir_instr_as_load_const(limit->def->parent_instr)->value[0];
+      if (nir_ssa_scalar_is_const(limit)) {
+         limit_val = nir_ssa_scalar_as_const_value(limit);
       } else {
          trip_count_known = false;
 
@@ -955,17 +986,38 @@
        * Thats all thats needed to calculate the trip-count
        */
 
-      nir_const_value *initial_val =
-         nir_instr_as_load_const(basic_ind->ind->def_outside_loop->
-                                    def->parent_instr)->value;
+      nir_basic_induction_var *ind_var =
+         get_loop_var(basic_ind.def, state)->ind;
 
-      nir_const_value *step_val =
-         nir_instr_as_load_const(basic_ind->ind->invariant->def->
-                                    parent_instr)->value;
+      /* The basic induction var might be a vector but, because we guarantee
+       * earlier that the phi source has a scalar swizzle, we can take the
+       * component from basic_ind.
+       */
+      nir_ssa_scalar initial_s = { ind_var->def_outside_loop, basic_ind.comp };
+      nir_ssa_scalar alu_s = { &ind_var->alu->dest.dest.ssa, basic_ind.comp };
 
-      int iterations = calculate_iterations(initial_val, step_val,
+      nir_const_value initial_val = nir_ssa_scalar_as_const_value(initial_s);
+
+      /* We are guaranteed by earlier code that at least one of these sources
+       * is a constant but we don't know which.
+       */
+      nir_const_value step_val;
+      memset(&step_val, 0, sizeof(step_val));
+      UNUSED bool found_step_value = false;
+      assert(nir_op_infos[ind_var->alu->op].num_inputs == 2);
+      for (unsigned i = 0; i < 2; i++) {
+         nir_ssa_scalar alu_src = nir_ssa_scalar_chase_alu_src(alu_s, i);
+         if (nir_ssa_scalar_is_const(alu_src)) {
+            found_step_value = true;
+            step_val = nir_ssa_scalar_as_const_value(alu_src);
+            break;
+         }
+      }
+      assert(found_step_value);
+
+      int iterations = calculate_iterations(&initial_val, &step_val,
                                             &limit_val,
-                                            basic_ind->ind->alu_def, alu,
+                                            ind_var->alu, cond,
                                             alu_op, limit_rhs,
                                             terminator->continue_from_then);
 
diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
index b3b78c6..84ec2a7 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -629,6 +629,34 @@
    return nir_bcsel(b, n_is_neg, nir_ineg(b, r), r);
 }
 
+static nir_ssa_def *
+lower_extract(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *c)
+{
+   assert(op == nir_op_extract_u8 || op == nir_op_extract_i8 ||
+          op == nir_op_extract_u16 || op == nir_op_extract_i16);
+
+   const int chunk = nir_src_as_uint(nir_src_for_ssa(c));
+   const int chunk_bits =
+      (op == nir_op_extract_u8 || op == nir_op_extract_i8) ? 8 : 16;
+   const int num_chunks_in_32 = 32 / chunk_bits;
+
+   nir_ssa_def *extract32;
+   if (chunk < num_chunks_in_32) {
+      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_x(b, x),
+                                   nir_imm_int(b, chunk),
+                                   NULL, NULL);
+   } else {
+      extract32 = nir_build_alu(b, op, nir_unpack_64_2x32_split_y(b, x),
+                                   nir_imm_int(b, chunk - num_chunks_in_32),
+                                   NULL, NULL);
+   }
+
+   if (op == nir_op_extract_i8 || op == nir_op_extract_i16)
+      return lower_i2i64(b, extract32);
+   else
+      return lower_u2u64(b, extract32);
+}
+
 nir_lower_int64_options
 nir_lower_int64_op_to_options_mask(nir_op opcode)
 {
@@ -685,6 +713,11 @@
    case nir_op_ishr:
    case nir_op_ushr:
       return nir_lower_shift64;
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+      return nir_lower_extract64;
    default:
       return 0;
    }
@@ -779,6 +812,11 @@
       return lower_ishr64(b, src[0], src[1]);
    case nir_op_ushr:
       return lower_ushr64(b, src[0], src[1]);
+   case nir_op_extract_u8:
+   case nir_op_extract_i8:
+   case nir_op_extract_u16:
+   case nir_op_extract_i16:
+      return lower_extract(b, alu->op, src[0], src[1]);
    default:
       unreachable("Invalid ALU opcode to lower");
    }
diff --git a/src/compiler/nir/nir_lower_regs_to_ssa.c b/src/compiler/nir/nir_lower_regs_to_ssa.c
index 0db11ff..76ed128 100644
--- a/src/compiler/nir/nir_lower_regs_to_ssa.c
+++ b/src/compiler/nir/nir_lower_regs_to_ssa.c
@@ -251,9 +251,17 @@
 
    nir_foreach_block(block, impl) {
       nir_foreach_instr(instr, block) {
-         if (instr->type == nir_instr_type_alu) {
+         switch (instr->type) {
+         case nir_instr_type_alu:
             rewrite_alu_instr(nir_instr_as_alu(instr), &state);
-         } else {
+            break;
+
+         case nir_instr_type_phi:
+            /* We rewrite sources as a separate pass */
+            nir_foreach_dest(instr, rewrite_dest, &state);
+            break;
+
+         default:
             nir_foreach_src(instr, rewrite_src, &state);
             nir_foreach_dest(instr, rewrite_dest, &state);
          }
@@ -262,6 +270,28 @@
       nir_if *following_if = nir_block_get_following_if(block);
       if (following_if)
          rewrite_if_condition(following_if, &state);
+
+      /* Handle phi sources that source from this block.  We have to do this
+       * as a separate pass because the phi builder assumes that uses and
+       * defs are processed in an order that respects dominance.  When we have
+       * loops, a phi source may be a back-edge so we have to handle it as if
+       * it were one of the last instructions in the predecessor block.
+       */
+      for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) {
+         if (block->successors[i] == NULL)
+            continue;
+
+         nir_foreach_instr(instr, block->successors[i]) {
+            if (instr->type != nir_instr_type_phi)
+               break;
+
+            nir_phi_instr *phi = nir_instr_as_phi(instr);
+            nir_foreach_phi_src(phi_src, phi) {
+               if (phi_src->pred == block)
+                  rewrite_src(&phi_src->src, &state);
+            }
+         }
+      }
    }
 
    nir_phi_builder_finish(phi_build);
diff --git a/src/compiler/nir/nir_opt_comparison_pre.c b/src/compiler/nir/nir_opt_comparison_pre.c
index eee4962..a7a227c 100644
--- a/src/compiler/nir/nir_opt_comparison_pre.c
+++ b/src/compiler/nir/nir_opt_comparison_pre.c
@@ -346,7 +346,7 @@
    return progress;
 }
 
-static bool
+bool
 nir_opt_comparison_pre_impl(nir_function_impl *impl)
 {
    struct block_queue bq;
diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index e7d3f8e..aeae2ad 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -152,11 +152,7 @@
          break;
 
       case nir_instr_type_intrinsic: {
-         const nir_intrinsic_info *info =
-            &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic];
-
-         if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) &&
-             (info->flags & NIR_INTRINSIC_CAN_REORDER)) {
+         if (nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr))) {
             instr->pass_flags = 0;
          } else {
             instr->pass_flags = GCM_INSTR_PINNED;
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index f674185..912580b 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -1040,6 +1040,13 @@
    if (!nir_is_trivial_loop_if(nif, break_blk))
       return false;
 
+   /* Even though this if statement has a jump on one side, we may still have
+    * phis afterwards.  Single-source phis can be produced by loop unrolling
+    * or dead control-flow passes and are perfectly legal.  Run a quick phi
+    * removal on the block after the if to clean up any such phis.
+    */
+   nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)));
+
    /* Finally, move the continue from branch after the if-statement. */
    nir_cf_list tmp;
    nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk),
diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c
index 9efbf42..b03a0ab 100644
--- a/src/compiler/nir/nir_opt_remove_phis.c
+++ b/src/compiler/nir/nir_opt_remove_phis.c
@@ -139,6 +139,14 @@
    return progress;
 }
 
+bool
+nir_opt_remove_phis_block(nir_block *block)
+{
+   nir_builder b;
+   nir_builder_init(&b, nir_cf_node_get_function(&block->cf_node));
+   return remove_phis_block(block, &b);
+}
+
 static bool
 nir_opt_remove_phis_impl(nir_function_impl *impl)
 {
diff --git a/src/compiler/nir/tests/comparison_pre_tests.cpp b/src/compiler/nir/tests/comparison_pre_tests.cpp
new file mode 100644
index 0000000..fe1cc23
--- /dev/null
+++ b/src/compiler/nir/tests/comparison_pre_tests.cpp
@@ -0,0 +1,531 @@
+/*
+ * Copyright © 2019 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include <gtest/gtest.h>
+#include "nir.h"
+#include "nir_builder.h"
+
+class comparison_pre_test : public ::testing::Test {
+protected:
+   comparison_pre_test()
+   {
+      static const nir_shader_compiler_options options = { };
+      nir_builder_init_simple_shader(&bld, NULL, MESA_SHADER_VERTEX, &options);
+
+      v1 = nir_imm_vec4(&bld, -2.0, -1.0,  1.0,  2.0);
+      v2 = nir_imm_vec4(&bld,  2.0,  1.0, -1.0, -2.0);
+      v3 = nir_imm_vec4(&bld,  3.0,  4.0,  5.0,  6.0);
+   }
+
+   ~comparison_pre_test()
+   {
+      ralloc_free(bld.shader);
+   }
+
+   struct nir_builder bld;
+
+   nir_ssa_def *v1;
+   nir_ssa_def *v2;
+   nir_ssa_def *v3;
+
+   const uint8_t xxxx[4] = { 0, 0, 0, 0 };
+   const uint8_t wwww[4] = { 3, 3, 3, 3 };
+};
+
+TEST_F(comparison_pre_test, a_lt_b_vs_neg_a_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 1 ssa_6 = flt ssa_5, ssa_3
+    *
+    * if ssa_6 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    *    vec1 32 ssa_8 = fadd ssa_7, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const (0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, a, one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, nir_fneg(&bld, a), one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_b_vs_a_minus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 1 ssa_6 = flt ssa_3, ssa_5
+    *
+    * if ssa_6 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    *    vec1 32 ssa_8 = fadd ssa_3, ssa_7
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const (0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    *    vec1 32 ssa_7 = fneg ssa_5
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *b = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, one, b);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, one, nir_fneg(&bld, b));
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, neg_a_lt_b_vs_a_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_6 = fneg ssa_5
+    * vec1 1 ssa_7 = flt ssa_6, ssa_3
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_5, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_9 = fneg ssa_6
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, nir_fneg(&bld, a), one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_neg_b_vs_a_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_6 = fneg ssa_5
+    * vec1 1 ssa_7 = flt ssa_3, ssa_6
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_3, ssa_5
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec4 32 ssa_4 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_5 = mov ssa_4.x
+    * vec1 32 ssa_9 = fneg ssa_5
+    * vec1 32 ssa_9 = fneg ssa_6
+    * vec1 32 ssa_10 = fadd ssa_3, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *b = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, one, nir_fneg(&bld, b));
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, one, b);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, imm_lt_b_vs_neg_imm_plus_b)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_3, ssa_6
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_4, ssa_6
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_3
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, one, a);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, neg_one, a);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_imm_vs_a_minus_imm)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_6, ssa_3
+    *
+    * if ssa_6 {
+    *    vec1 32 ssa_8 = fadd ssa_6, ssa_4
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_3
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, a, one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, neg_one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, neg_imm_lt_a_vs_a_plus_imm)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_4, ssa_6
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_6, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_4
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_11, ssa_10
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, neg_one, a);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, a_lt_neg_imm_vs_a_plus_imm)
+{
+   /* Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 1 ssa_7 = flt ssa_6, ssa_4
+    *
+    * if ssa_7 {
+    *    vec1 32 ssa_8 = fadd ssa_6, ssa_3
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec1 32 ssa_3 = load_const ( 1.0)
+    * vec1 32 ssa_4 = load_const (-1.0)
+    * vec4 32 ssa_5 = fadd ssa_0, ssa_2
+    * vec1 32 ssa_6 = mov ssa_5.x
+    * vec1 32 ssa_9 = fneg ssa_4
+    * vec1 32 ssa_10 = fadd ssa_6, ssa_9
+    * vec1 32 ssa_11 = load_const ( 0.0)
+    * vec1 1 ssa_12 = flt ssa_10, ssa_11
+    * vec1 32 ssa_13 = mov ssa_10
+    * vec1 1 ssa_14 = mov ssa_12
+    *
+    * if ssa_14 {
+    * } else {
+    * }
+    */
+   nir_ssa_def *one = nir_imm_float(&bld, 1.0f);
+   nir_ssa_def *neg_one = nir_imm_float(&bld, -1.0f);
+   nir_ssa_def *a = nir_channel(&bld, nir_fadd(&bld, v1, v3), 0);
+
+   nir_ssa_def *flt = nir_flt(&bld, a, neg_one);
+
+   nir_if *nif = nir_push_if(&bld, flt);
+
+   nir_fadd(&bld, a, one);
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_TRUE(nir_opt_comparison_pre_impl(bld.impl));
+}
+
+TEST_F(comparison_pre_test, non_scalar_add_result)
+{
+   /* The optimization pass should not do anything because the result of the
+    * fadd is not a scalar.
+    *
+    * Before:
+    *
+    * vec4 32 ssa_0 = load_const (-2.0, -1.0,  1.0,  2.0)
+    * vec4 32 ssa_1 = load_const ( 2.0,  1.0, -1.0, -2.0)
+    * vec4 32 ssa_2 = load_const ( 3.0,  4.0,  5.0,  6.0)
+    * vec4 32 ssa_3 = fadd ssa_0, ssa_2
+    * vec1 1 ssa_4 = flt ssa_0.x, ssa_3.x
+    *
+    * if ssa_4 {
+    *    vec2 32 ssa_5 = fadd ssa_1.xx, ssa_3.xx
+    * } else {
+    * }
+    *
+    * After:
+    *
+    * No change.
+    */
+   nir_ssa_def *a = nir_fadd(&bld, v1, v3);
+
+   nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
+
+   flt->src[0].src = nir_src_for_ssa(v1);
+   flt->src[1].src = nir_src_for_ssa(a);
+
+   memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
+   memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx));
+
+   nir_builder_alu_instr_finish_and_insert(&bld, flt);
+
+   flt->dest.dest.ssa.num_components = 1;
+   flt->dest.write_mask = 1;
+
+   nir_if *nif = nir_push_if(&bld, &flt->dest.dest.ssa);
+
+   nir_alu_instr *fadd = nir_alu_instr_create(bld.shader, nir_op_fadd);
+
+   fadd->src[0].src = nir_src_for_ssa(v2);
+   fadd->src[1].src = nir_src_for_ssa(a);
+
+   memcpy(&fadd->src[0].swizzle, xxxx, sizeof(xxxx));
+   memcpy(&fadd->src[1].swizzle, xxxx, sizeof(xxxx));
+
+   nir_builder_alu_instr_finish_and_insert(&bld, fadd);
+
+   fadd->dest.dest.ssa.num_components = 2;
+   fadd->dest.write_mask = 3;
+
+   nir_pop_if(&bld, nif);
+
+   EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
+}
diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
index 98df7a8..1a5d4c7 100644
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1432,7 +1432,7 @@
             val->type->align = align;
 
             /* Override any ArrayStride previously set. */
-            val->type->stride = size;
+            val->type->stride = vtn_align_u32(size, align);
          }
       }
       break;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
index 59dcaa4..878f67a 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
@@ -79,6 +79,7 @@
 		return NULL;
 
 	pctx = &fd3_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd3_ctx->base.dev = fd_device_ref(screen->dev);
 	fd3_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
index e9730e9..8960509 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
@@ -79,6 +79,7 @@
 		return NULL;
 
 	pctx = &fd4_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd4_ctx->base.dev = fd_device_ref(screen->dev);
 	fd4_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.c b/src/gallium/drivers/freedreno/a5xx/fd5_context.c
index 37e02c6..211d2b5 100644
--- a/src/gallium/drivers/freedreno/a5xx/fd5_context.c
+++ b/src/gallium/drivers/freedreno/a5xx/fd5_context.c
@@ -78,6 +78,7 @@
 		return NULL;
 
 	pctx = &fd5_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd5_ctx->base.dev = fd_device_ref(screen->dev);
 	fd5_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c
index 7ba0926..4f696ef 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c
@@ -84,6 +84,7 @@
 		return NULL;
 
 	pctx = &fd6_ctx->base.base;
+	pctx->screen = pscreen;
 
 	fd6_ctx->base.dev = fd_device_ref(screen->dev);
 	fd6_ctx->base.screen = fd_screen(pscreen);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 7dd4a73..2f484f7 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -464,9 +464,7 @@
 						 0, PIPE_USAGE_DEFAULT,
 						 SI_RESOURCE_FLAG_32BIT |
 						 (use_sdma_upload ?
-							  SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA :
-							  (sscreen->cpdma_prefetch_writes_memory ?
-								   0 : SI_RESOURCE_FLAG_READ_ONLY)));
+							  SI_RESOURCE_FLAG_UPLOAD_FLUSH_EXPLICIT_VIA_SDMA : 0));
 	if (!sctx->b.const_uploader)
 		goto fail;
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index a4470e6..45d4eda 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -659,15 +659,6 @@
 }
 
 
-static float
-compute_lambda_vert_explicite_gradients(UNUSED const struct sp_sampler_view *sview,
-                                        UNUSED const float derivs[3][2][TGSI_QUAD_SIZE],
-                                        UNUSED int quad)
-{
-   return 0.0f;
-}
-
-
 compute_lambda_from_grad_func
 softpipe_get_lambda_from_grad_func(const struct pipe_sampler_view *view,
                                    enum pipe_shader_type shader)
diff --git a/src/gallium/drivers/virgl/virgl_resource.c b/src/gallium/drivers/virgl/virgl_resource.c
index ef81f21..6d4c9f5 100644
--- a/src/gallium/drivers/virgl/virgl_resource.c
+++ b/src/gallium/drivers/virgl/virgl_resource.c
@@ -112,6 +112,7 @@
    res->u.b = *templ;
    res->u.b.screen = &vs->base;
    pipe_reference_init(&res->u.b.reference, 1);
+   virgl_resource_layout(&res->u.b, &res->metadata);
 
    res->hw_res = vs->vws->resource_create_from_handle(vs->vws, whandle);
    if (!res->hw_res) {
diff --git a/src/gallium/tests/trivial/meson.build b/src/gallium/tests/trivial/meson.build
index bbb2551..1f912d5 100644
--- a/src/gallium/tests/trivial/meson.build
+++ b/src/gallium/tests/trivial/meson.build
@@ -24,6 +24,7 @@
     '@0@.c'.format(t),
     include_directories : inc_common,
     link_with : [libmesa_util, libgallium, libpipe_loader_dynamic],
+    dependencies : dep_thread,
     install : false,
   )
 endforeach
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 4429608..1f401ce 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -141,7 +141,8 @@
                        nir_lower_ineg64 |
                        nir_lower_logic64 |
                        nir_lower_minmax64 |
-                       nir_lower_shift64;
+                       nir_lower_shift64 |
+                       nir_lower_extract64;
       fp64_options |= nir_lower_fp64_full_software;
    }
 
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 7d60665..6308b28 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1204,9 +1204,31 @@
        opcode != BRW_OPCODE_DP3 && opcode != BRW_OPCODE_DP2 &&
        opcode != VEC4_OPCODE_PACK_BYTES) {
       for (int i = 0; i < 3; i++) {
-         if (src[i].file == BAD_FILE || src[i].file == IMM)
+         if (src[i].file == BAD_FILE)
             continue;
 
+         if (src[i].file == IMM) {
+            assert(src[i].type != BRW_REGISTER_TYPE_V &&
+                   src[i].type != BRW_REGISTER_TYPE_UV);
+
+            /* Vector immediate types need to be reswizzled. */
+            if (src[i].type == BRW_REGISTER_TYPE_VF) {
+               const unsigned imm[] = {
+                  (src[i].ud >>  0) & 0x0ff,
+                  (src[i].ud >>  8) & 0x0ff,
+                  (src[i].ud >> 16) & 0x0ff,
+                  (src[i].ud >> 24) & 0x0ff,
+               };
+
+               src[i] = brw_imm_vf4(imm[BRW_GET_SWZ(swizzle, 0)],
+                                    imm[BRW_GET_SWZ(swizzle, 1)],
+                                    imm[BRW_GET_SWZ(swizzle, 2)],
+                                    imm[BRW_GET_SWZ(swizzle, 3)]);
+            }
+
+            continue;
+         }
+
          src[i].swizzle = brw_compose_swizzle(swizzle, src[i].swizzle);
       }
    }
diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
index fa92354..48d4189 100644
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -478,6 +478,11 @@
    if (result != VK_SUCCESS)
       goto fail_mmap_cleanups;
 
+   /* Make the entire pool available in the front of the pool.  If back
+    * allocation needs to use this space, the "ends" will be re-arranged.
+    */
+   pool->state.end = pool->size;
+
    return VK_SUCCESS;
 
  fail_mmap_cleanups:
diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 0d3d3f9..96ee66f 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1075,11 +1075,11 @@
 {
    static const union isl_color_value color_value = { .u32 = { 0, } };
    const struct anv_subpass *subpass = cmd_buffer->state.subpass;
-   const uint32_t att_idx = subpass->depth_stencil_attachment->attachment;
-
-   if (att_idx == VK_ATTACHMENT_UNUSED)
+   if (!subpass->depth_stencil_attachment)
       return;
 
+   const uint32_t att_idx = subpass->depth_stencil_attachment->attachment;
+   assert(att_idx != VK_ATTACHMENT_UNUSED);
    struct anv_render_pass_attachment *pass_att =
       &cmd_buffer->state.pass->attachments[att_idx];
 
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 132ccc9..ab8dee4 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1301,7 +1301,7 @@
       .sampledImageStencilSampleCounts          = sample_counts,
       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
       .maxSampleMaskWords                       = 1,
-      .timestampComputeAndGraphics              = false,
+      .timestampComputeAndGraphics              = true,
       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
       .maxClipDistances                         = 8,
       .maxCullDistances                         = 8,
diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c
index 4e15e62..6573d3e 100644
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -69,6 +69,7 @@
            .aspect = VK_IMAGE_ASPECT_DEPTH_BIT, \
          }, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = 1, \
    }
 
@@ -80,6 +81,7 @@
            .aspect = VK_IMAGE_ASPECT_STENCIL_BIT, \
          }, \
       }, \
+      .vk_format = __vk_fmt, \
       .n_planes = 1, \
    }
 
@@ -798,6 +800,7 @@
    if (format == NULL)
       goto unsupported;
 
+   assert(format->vk_format == info->format);
    format_feature_flags = anv_get_image_format_features(devinfo, info->format,
                                                         format, info->tiling);
 
@@ -977,6 +980,13 @@
       VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
 };
 
+static const VkExternalMemoryProperties userptr_props = {
+   .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
+   .exportFromImportedHandleTypes = 0,
+   .compatibleHandleTypes =
+      VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+};
+
 static const VkExternalMemoryProperties android_buffer_props = {
    .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
                              VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
@@ -1159,6 +1169,9 @@
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
       pExternalBufferProperties->externalMemoryProperties = prime_fd_props;
       return;
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+      pExternalBufferProperties->externalMemoryProperties = userptr_props;
+      return;
    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
       if (physical_device->supported_extensions.ANDROID_external_memory_android_hardware_buffer) {
          pExternalBufferProperties->externalMemoryProperties = android_buffer_props;
@@ -1170,8 +1183,14 @@
    }
 
  unsupported:
+   /* From the Vulkan 1.1.113 spec:
+    *
+    *    compatibleHandleTypes must include at least handleType.
+    */
    pExternalBufferProperties->externalMemoryProperties =
-      (VkExternalMemoryProperties) {0};
+      (VkExternalMemoryProperties) {
+         .compatibleHandleTypes = pExternalBufferInfo->handleType,
+      };
 }
 
 VkResult anv_CreateSamplerYcbcrConversion(
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index b3672ac..4012a6d 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -825,14 +825,24 @@
          continue;
 
       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      /* Unused or out-of-bounds */
-      if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid & (1 << rt)))
+      /* Out-of-bounds */
+      if (rt >= MAX_RTS)
          continue;
 
       const unsigned array_len =
          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
       assert(rt + array_len <= max_rt);
 
+      /* Unused */
+      if (!(stage->key.wm.color_outputs_valid & BITFIELD_RANGE(rt, array_len))) {
+         /* If this is the RT at location 0 and we have alpha to coverage
+          * enabled we will have to create a null RT for it, so mark it as
+          * used.
+          */
+         if (rt > 0 || !stage->key.wm.alpha_to_coverage)
+            continue;
+      }
+
       for (unsigned i = 0; i < array_len; i++)
          rt_used[rt + i] = true;
    }
@@ -843,11 +853,22 @@
          continue;
 
       rt_to_bindings[i] = num_rts;
-      rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
-         .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
-         .binding = 0,
-         .index = i,
-      };
+
+      if (stage->key.wm.color_outputs_valid & (1 << i)) {
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = i,
+         };
+      } else {
+         /* Setup a null render target */
+         rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) {
+            .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
+            .binding = 0,
+            .index = UINT32_MAX,
+         };
+      }
+
       num_rts++;
    }
 
@@ -857,9 +878,11 @@
          continue;
 
       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
-      if (rt >= MAX_RTS ||
-          !(stage->key.wm.color_outputs_valid & (1 << rt))) {
-         /* Unused or out-of-bounds, throw it away */
+
+      if (rt >= MAX_RTS || !rt_used[rt]) {
+         /* Unused or out-of-bounds, throw it away, unless it is the first
+          * RT and we have alpha to coverage enabled.
+          */
          deleted_output = true;
          var->data.mode = nir_var_function_temp;
          exec_node_remove(&var->node);
diff --git a/src/intel/vulkan/gen8_cmd_buffer.c b/src/intel/vulkan/gen8_cmd_buffer.c
index 2e6d9de..762cc37 100644
--- a/src/intel/vulkan/gen8_cmd_buffer.c
+++ b/src/intel/vulkan/gen8_cmd_buffer.c
@@ -355,6 +355,8 @@
     */
    const bool stc_write_en =
       (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+      (cmd_buffer->state.gfx.dynamic.stencil_write_mask.front ||
+       cmd_buffer->state.gfx.dynamic.stencil_write_mask.back) &&
       pipeline->writes_stencil;
 
    /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 1caa0a8..f3dea6a 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -76,6 +76,8 @@
       sba.GeneralStateMOCS = GENX(MOCS);
       sba.GeneralStateBaseAddressModifyEnable = true;
 
+      sba.StatelessDataPortAccessMOCS = GENX(MOCS);
+
       sba.SurfaceStateBaseAddress =
          anv_cmd_buffer_surface_base_address(cmd_buffer);
       sba.SurfaceStateMOCS = GENX(MOCS);
diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build
index 7fe6609..d0120d8 100644
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -178,6 +178,7 @@
 endif
 
 if with_platform_android
+  anv_deps += dep_android
   anv_flags += '-DVK_USE_PLATFORM_ANDROID_KHR'
   libanv_files += files('anv_android.c')
 else
@@ -235,9 +236,9 @@
     c_args : anv_flags,
   )
 
-  foreach t : ['block_pool_no_free', 'state_pool_no_free',
-               'state_pool_free_list_only', 'state_pool',
-               'state_pool_padding']
+  foreach t : ['block_pool_no_free', 'block_pool_grow_first',
+               'state_pool_no_free', 'state_pool_free_list_only',
+               'state_pool', 'state_pool_padding']
     test(
       'anv_@0@'.format(t),
       executable(
diff --git a/src/intel/vulkan/tests/block_pool_grow_first.c b/src/intel/vulkan/tests/block_pool_grow_first.c
new file mode 100644
index 0000000..aea12b2
--- /dev/null
+++ b/src/intel/vulkan/tests/block_pool_grow_first.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#undef NDEBUG
+
+#include "anv_private.h"
+
+int main(int argc, char **argv)
+{
+   struct anv_instance instance;
+   struct anv_device device = {
+      .instance = &instance,
+   };
+   struct anv_block_pool pool;
+
+   /* Create a pool with initial size smaller than the block allocated, so
+    * that it must grow in the first allocation.
+    */
+   const uint32_t block_size = 16 * 1024;
+   const uint32_t initial_size = block_size / 2;
+
+   anv_block_pool_init(&pool, &device, 4096, initial_size, EXEC_OBJECT_PINNED);
+   assert(pool.size == initial_size);
+
+   uint32_t padding;
+   int32_t offset = anv_block_pool_alloc(&pool, block_size, &padding);
+
+   /* Pool will have grown at least space to fit the new allocation. */
+   assert(pool.size > initial_size);
+   assert(pool.size >= initial_size + block_size);
+
+   /* The whole initial size is considered padding and the allocation should be
+    * right next to it.
+    */
+   assert(padding == initial_size);
+   assert(offset == initial_size);
+
+   /* Use the memory to ensure it is valid. */
+   void *map = anv_block_pool_map(&pool, offset);
+   memset(map, 22, block_size);
+
+   anv_block_pool_finish(&pool);
+}
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 3515e31..0a8b9bb 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -104,6 +104,8 @@
                                  struct temp_texture *tex);
 static void meta_glsl_clear_cleanup(struct gl_context *ctx,
                                     struct clear_state *clear);
+static void meta_copypix_cleanup(struct gl_context *ctx,
+                                    struct copypix_state *copypix);
 static void meta_decompress_cleanup(struct gl_context *ctx,
                                     struct decompress_state *decompress);
 static void meta_drawpix_cleanup(struct gl_context *ctx,
@@ -422,6 +424,7 @@
    _mesa_make_current(ctx, NULL, NULL);
    _mesa_meta_glsl_blit_cleanup(ctx, &ctx->Meta->Blit);
    meta_glsl_clear_cleanup(ctx, &ctx->Meta->Clear);
+   meta_copypix_cleanup(ctx, &ctx->Meta->CopyPix);
    _mesa_meta_glsl_generate_mipmap_cleanup(ctx, &ctx->Meta->Mipmap);
    cleanup_temp_texture(ctx, &ctx->Meta->TempTex);
    meta_decompress_cleanup(ctx, &ctx->Meta->Decompress);
@@ -1465,6 +1468,8 @@
          /* load image */
          _mesa_TexSubImage2D(tex->Target, 0,
                              0, 0, width, height, format, type, pixels);
+
+         _mesa_reference_buffer_object(ctx, &save_unpack_obj, NULL);
       }
    }
    else {
@@ -1595,6 +1600,17 @@
    }
 }
 
+static void
+meta_copypix_cleanup(struct gl_context *ctx, struct copypix_state *copypix)
+{
+   if (copypix->VAO == 0)
+      return;
+   _mesa_DeleteVertexArrays(1, &copypix->VAO);
+   copypix->VAO = 0;
+   _mesa_reference_buffer_object(ctx, &copypix->buf_obj, NULL);
+}
+
+
 /**
  * Given a bitfield of BUFFER_BIT_x draw buffers, call glDrawBuffers to
  * set GL to only draw to those buffers.
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index 2c3d9a1..92c495f 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -616,6 +616,17 @@
    consts->MaxProgramMatrices = MAX_PROGRAM_MATRICES;
    consts->MaxProgramMatrixStackDepth = MAX_PROGRAM_MATRIX_STACK_DEPTH;
 
+   /* Set the absolute minimum possible GLSL version.  API_OPENGL_CORE can
+    * mean an OpenGL 3.0 forward-compatible context, so that implies a minimum
+    * possible version of 1.30.  Otherwise, the minimum possible version 1.20.
+    * Since Mesa unconditionally advertises GL_ARB_shading_language_100 and
+    * GL_ARB_shader_objects, every driver has GLSL 1.20... even if they don't
+    * advertise any extensions to enable any shader stages (e.g.,
+    * GL_ARB_vertex_shader).
+    */
+   consts->GLSLVersion = api == API_OPENGL_CORE ? 130 : 120;
+   consts->GLSLVersionCompat = consts->GLSLVersion;
+
    /* Assume that if GLSL 1.30+ (or GLSL ES 3.00+) is supported that
     * gl_VertexID is implemented using a native hardware register with OpenGL
     * semantics.
diff --git a/src/mesa/main/program_binary.c b/src/mesa/main/program_binary.c
index 7390fef..39537cf 100644
--- a/src/mesa/main/program_binary.c
+++ b/src/mesa/main/program_binary.c
@@ -178,6 +178,8 @@
                                                       shader->Program);
    }
 
+   blob_write_uint32(blob, sh_prog->SeparateShader);
+
    serialize_glsl_program(blob, ctx, sh_prog);
 
    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
@@ -195,6 +197,8 @@
 read_program_payload(struct gl_context *ctx, struct blob_reader *blob,
                      GLenum binary_format, struct gl_shader_program *sh_prog)
 {
+   sh_prog->SeparateShader = blob_read_uint32(blob);
+
    if (!deserialize_glsl_program(blob, ctx, sh_prog))
       return false;
 
diff --git a/src/vulkan/overlay-layer/overlay.cpp b/src/vulkan/overlay-layer/overlay.cpp
index 5b761ee..bc365bf 100644
--- a/src/vulkan/overlay-layer/overlay.cpp
+++ b/src/vulkan/overlay-layer/overlay.cpp
@@ -1857,6 +1857,8 @@
    struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
    struct device_data *device_data = cmd_buffer_data->device;
 
+   memset(&cmd_buffer_data->stats, 0, sizeof(cmd_buffer_data->stats));
+
    /* We don't record any query in secondary command buffers, just make sure
     * we have the right inheritance.
     */
@@ -2042,6 +2044,10 @@
    for (uint32_t i = 0; i < commandBufferCount; i++) {
       struct command_buffer_data *cmd_buffer_data =
          FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
+      /* It is legal to free a NULL command buffer*/
+      if (!cmd_buffer_data)
+         continue;
+
       uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool));
       if (count == 1) {
          unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool));
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 46f1c08..6fa4dab 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -974,6 +974,19 @@
       options |= XCB_PRESENT_OPTION_SUBOPTIMAL;
 #endif
 
+   /* Poll for any available event and update the swapchain status. This could
+    * update the status of the swapchain to SUBOPTIMAL or OUT_OF_DATE if the
+    * associated X11 surface has been resized.
+    */
+   xcb_generic_event_t *event;
+   while ((event = xcb_poll_for_special_event(chain->conn, chain->special_event))) {
+      VkResult result = x11_handle_dri3_present_event(chain, (void *)event);
+      free(event);
+      if (result < 0)
+         return x11_swapchain_result(chain, result);
+      x11_swapchain_result(chain, result);
+   }
+
    xshmfence_reset(image->shm_fence);
 
    ++chain->send_sbc;
@@ -1009,6 +1022,10 @@
    struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
    uint64_t timeout = info->timeout;
 
+   /* If the swapchain is in an error state, don't go any further. */
+   if (chain->status < 0)
+      return chain->status;
+
    if (chain->threaded) {
       return x11_acquire_next_image_from_queue(chain, image_index, timeout);
    } else {
@@ -1023,6 +1040,10 @@
 {
    struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
 
+   /* If the swapchain is in an error state, don't go any further. */
+   if (chain->status < 0)
+      return chain->status;
+
    if (chain->threaded) {
       wsi_queue_push(&chain->present_queue, image_index);
       return chain->status;