nir/gcm: Rework the schedule late loop

This fixes a bug in code motion that occurred when the best block is the
same as the schedule early block.  In this case, because we're checking
(lca != def->parent_instr->block) at the top of the loop, we never get to
the check for loop depth so we wouldn't move it out of the loop.  This
commit reworks the loop to be a simple for loop up the dominator chain and
we place the (lca != def->parent_instr->block) check at the end of the
loop.

Reviewed-by: Matt Turner <mattst88@gmail.com>
diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index 77eb8e6..9d7f59c 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -326,12 +326,13 @@
     * as far outside loops as we can get.
     */
    nir_block *best = lca;
-   while (lca != def->parent_instr->block) {
-      assert(lca);
-      if (state->blocks[lca->index].loop_depth <
+   for (nir_block *block = lca; block != NULL; block = block->imm_dom) {
+      if (state->blocks[block->index].loop_depth <
           state->blocks[best->index].loop_depth)
-         best = lca;
-      lca = lca->imm_dom;
+         best = block;
+
+      if (block == def->parent_instr->block)
+         break;
    }
    def->parent_instr->block = best;