8237859: C2: Crash when loads float above range check

Fix control edges of predicates to data nodes when creating pre/main/post loops.

Reviewed-by: neliasso, thartmann, roland
diff --git a/src/hotspot/share/opto/loopPredicate.cpp b/src/hotspot/share/opto/loopPredicate.cpp
index b1c7229..cfc02ee 100644
--- a/src/hotspot/share/opto/loopPredicate.cpp
+++ b/src/hotspot/share/opto/loopPredicate.cpp
@@ -1207,7 +1207,7 @@
     new_predicate_proj = upper_bound_proj;
 
     if (iff->is_RangeCheck()) {
-      new_predicate_proj = insert_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
+      new_predicate_proj = insert_initial_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
     }
 
 #ifndef PRODUCT
@@ -1238,13 +1238,13 @@
 // of the main loop induction variable. Make a copy of the predicates
 // here with an opaque node as a place holder for the value (will be
 // updated by PhaseIdealLoop::clone_skeleton_predicate()).
-ProjNode* PhaseIdealLoop::insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
-                                                    ProjNode* proj, ProjNode *predicate_proj,
-                                                    ProjNode* upper_bound_proj,
-                                                    int scale, Node* offset,
-                                                    Node* init, Node* limit, jint stride,
-                                                    Node* rng, bool &overflow,
-                                                    Deoptimization::DeoptReason reason) {
+ProjNode* PhaseIdealLoop::insert_initial_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
+                                                            ProjNode* proj, ProjNode *predicate_proj,
+                                                            ProjNode* upper_bound_proj,
+                                                            int scale, Node* offset,
+                                                            Node* init, Node* limit, jint stride,
+                                                            Node* rng, bool &overflow,
+                                                            Deoptimization::DeoptReason reason) {
   assert(proj->_con && predicate_proj->_con, "not a range check?");
   Node* opaque_init = new Opaque1Node(C, init);
   register_new_node(opaque_init, upper_bound_proj);
diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp
index e72d5cc..aa039f1 100644
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@@ -1120,6 +1120,19 @@
   return NULL;
 }
 
+#ifdef ASSERT
+void PhaseIdealLoop::ensure_zero_trip_guard_proj(Node* node, bool is_main_loop) {
+  assert(node->is_IfProj(), "must be the zero trip guard If node");
+  Node* zer_bol = node->in(0)->in(1);
+  assert(zer_bol != NULL && zer_bol->is_Bool(), "must be Bool");
+  Node* zer_cmp = zer_bol->in(1);
+  assert(zer_cmp != NULL && zer_cmp->Opcode() == Op_CmpI, "must be CmpI");
+  // For the main loop, the opaque node is the second input to zer_cmp, for the post loop it's the first input node
+  Node* zer_opaq = zer_cmp->in(is_main_loop ? 2 : 1);
+  assert(zer_opaq != NULL && zer_opaq->Opcode() == Op_Opaque1, "must be Opaque1");
+}
+#endif
+
 // Make a copy of the skeleton range check predicates before the main
 // loop and set the initial value of loop as input. After unrolling,
 // the range of values for the induction variable in the main loop can
@@ -1128,10 +1141,16 @@
 // CastII/ConvI2L nodes cause some data paths to die. For consistency,
 // the control paths must die too but the range checks were removed by
 // predication. The range checks that we add here guarantee that they do.
-void PhaseIdealLoop::duplicate_predicates_helper(Node* predicate, Node* start, Node* end,
+void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop_helper(Node* predicate, Node* start, Node* end,
                                                  IdealLoopTree* outer_loop, LoopNode* outer_main_head,
-                                                 uint dd_main_head) {
+                                                 uint dd_main_head, const uint idx_before_pre_post,
+                                                 const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main,
+                                                 Node* zero_trip_guard_proj_post, const Node_List &old_new) {
   if (predicate != NULL) {
+#ifdef ASSERT
+    ensure_zero_trip_guard_proj(zero_trip_guard_proj_main, true);
+    ensure_zero_trip_guard_proj(zero_trip_guard_proj_post, false);
+#endif
     IfNode* iff = predicate->in(0)->as_If();
     ProjNode* uncommon_proj = iff->proj_out(1 - predicate->as_Proj()->_con);
     Node* rgn = uncommon_proj->unique_ctrl_out();
@@ -1147,13 +1166,36 @@
         break;
       if (iff->in(1)->Opcode() == Op_Opaque4) {
         assert(skeleton_predicate_has_opaque(iff), "unexpected");
-        // Clone the predicate twice and initialize one with the initial
+        // Clone the skeleton predicate twice and initialize one with the initial
         // value of the loop induction variable. Leave the other predicate
         // to be initialized when increasing the stride during loop unrolling.
         prev_proj = clone_skeleton_predicate(iff, start, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
         assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()) == (start->Opcode() == Op_Opaque1), "");
         prev_proj = clone_skeleton_predicate(iff, end, predicate, uncommon_proj, current_proj, outer_loop, prev_proj);
         assert(skeleton_predicate_has_opaque(prev_proj->in(0)->as_If()) == (end->Opcode() == Op_Opaque1), "");
+
+        // Rewire any control inputs from the cloned skeleton predicates down to the main and post loop for data nodes that are part of the
+        // main loop (and were cloned to the pre and post loop).
+        for (DUIterator i = predicate->outs(); predicate->has_out(i); i++) {
+          Node* loop_node = predicate->out(i);
+          Node* pre_loop_node = old_new[loop_node->_idx];
+          // Change the control if 'loop_node' is part of the main loop. If there is an old->new mapping and the index of
+          // 'pre_loop_node' is greater than idx_before_pre_post, then we know that 'loop_node' was cloned and is part of
+          // the main loop (and 'pre_loop_node' is part of the pre loop).
+          if (!loop_node->is_CFG() && (pre_loop_node != NULL && pre_loop_node->_idx > idx_after_post_before_pre)) {
+            // 'loop_node' is a data node and part of the main loop. Rewire the control to the projection of the zero-trip guard if node
+            // of the main loop that is immediately preceding the cloned predicates.
+            _igvn.replace_input_of(loop_node, 0, zero_trip_guard_proj_main);
+            --i;
+          } else if (loop_node->_idx > idx_before_pre_post && loop_node->_idx < idx_after_post_before_pre) {
+            // 'loop_node' is a data node and part of the post loop. Rewire the control to the projection of the zero-trip guard if node
+            // of the post loop that is immediately preceding the post loop header node (there are no cloned predicates for the post loop).
+            assert(pre_loop_node == NULL, "a node belonging to the post loop should not have an old_new mapping at this stage");
+            _igvn.replace_input_of(loop_node, 0, zero_trip_guard_proj_post);
+            --i;
+          }
+        }
+
         // Remove the skeleton predicate from the pre-loop
         _igvn.replace_input_of(iff, 1, _igvn.intcon(1));
       }
@@ -1278,9 +1320,11 @@
   return proj;
 }
 
-void PhaseIdealLoop::duplicate_predicates(CountedLoopNode* pre_head, Node* start, Node* end,
+void PhaseIdealLoop::copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* start, Node* end,
                                           IdealLoopTree* outer_loop, LoopNode* outer_main_head,
-                                          uint dd_main_head) {
+                                          uint dd_main_head, const uint idx_before_pre_post,
+                                          const uint idx_after_post_before_pre, Node* zero_trip_guard_proj_main,
+                                          Node* zero_trip_guard_proj_post, const Node_List &old_new) {
   if (UseLoopPredicate) {
     Node* entry = pre_head->in(LoopNode::EntryControl);
     Node* predicate = NULL;
@@ -1296,8 +1340,12 @@
       }
     }
     predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
-    duplicate_predicates_helper(predicate, start, end, outer_loop, outer_main_head, dd_main_head);
-    duplicate_predicates_helper(profile_predicate, start, end, outer_loop, outer_main_head, dd_main_head);
+    copy_skeleton_predicates_to_main_loop_helper(predicate, start, end, outer_loop, outer_main_head, dd_main_head,
+                                                 idx_before_pre_post, idx_after_post_before_pre, zero_trip_guard_proj_main,
+                                                 zero_trip_guard_proj_post, old_new);
+    copy_skeleton_predicates_to_main_loop_helper(profile_predicate, start, end, outer_loop, outer_main_head, dd_main_head,
+                                                 idx_before_pre_post, idx_after_post_before_pre, zero_trip_guard_proj_main,
+                                                 zero_trip_guard_proj_post, old_new);
   }
 }
 
@@ -1347,8 +1395,10 @@
   }
 
   // Add the post loop
+  const uint idx_before_pre_post = Compile::current()->unique();
   CountedLoopNode *post_head = NULL;
   Node *main_exit = insert_post_loop(loop, old_new, main_head, main_end, incr, limit, post_head);
+  const uint idx_after_post_before_pre = Compile::current()->unique();
 
   //------------------------------
   // Step B: Create Pre-Loop.
@@ -1446,7 +1496,9 @@
   assert(castii != NULL, "no castII inserted");
   Node* opaque_castii = new Opaque1Node(C, castii);
   register_new_node(opaque_castii, outer_main_head->in(LoopNode::EntryControl));
-  duplicate_predicates(pre_head, castii, opaque_castii, outer_loop, outer_main_head, dd_main_head);
+  assert(post_head->in(1)->is_IfProj(), "must be zero-trip guard If node projection of the post loop");
+  copy_skeleton_predicates_to_main_loop(pre_head, castii, opaque_castii, outer_loop, outer_main_head, dd_main_head,
+                                        idx_before_pre_post, idx_after_post_before_pre, min_taken, post_head->in(1), old_new);
 
   // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
   // RCE and alignment may change this later.
@@ -1732,7 +1784,7 @@
   return !is_member(_phase->get_loop(n_c));
 }
 
-void PhaseIdealLoop::update_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con) {
+void PhaseIdealLoop::update_main_loop_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con) {
   // Search for skeleton predicates and update them according to the new stride
   Node* entry = ctrl;
   Node* prev_proj = ctrl;
@@ -1840,7 +1892,7 @@
   assert(old_trip_count > 1 &&
       (!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
 
-  update_skeleton_predicates(ctrl, loop_head, init, stride_con);
+  update_main_loop_skeleton_predicates(ctrl, loop_head, init, stride_con);
 
   // Adjust loop limit to keep valid iterations number after unroll.
   // Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp
index d68fbdb..7ee9100 100644
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@@ -783,14 +783,20 @@
   }
 
   Node* cast_incr_before_loop(Node* incr, Node* ctrl, Node* loop);
-  void duplicate_predicates_helper(Node* predicate, Node* start, Node* end, IdealLoopTree* outer_loop,
-                                   LoopNode* outer_main_head, uint dd_main_head);
-  void duplicate_predicates(CountedLoopNode* pre_head, Node* start, Node* end, IdealLoopTree* outer_loop,
-                            LoopNode* outer_main_head, uint dd_main_head);
+
+#ifdef ASSERT
+  void ensure_zero_trip_guard_proj(Node* node, bool is_main_loop);
+#endif
+  void copy_skeleton_predicates_to_main_loop_helper(Node* predicate, Node* start, Node* end, IdealLoopTree* outer_loop, LoopNode* outer_main_head,
+                                                    uint dd_main_head, const uint idx_before_pre_post, const uint idx_after_post_before_pre,
+                                                    Node* zero_trip_guard_proj_main, Node* zero_trip_guard_proj_post, const Node_List &old_new);
+  void copy_skeleton_predicates_to_main_loop(CountedLoopNode* pre_head, Node* start, Node* end, IdealLoopTree* outer_loop, LoopNode* outer_main_head,
+                                             uint dd_main_head, const uint idx_before_pre_post, const uint idx_after_post_before_pre,
+                                             Node* zero_trip_guard_proj_main, Node* zero_trip_guard_proj_post, const Node_List &old_new);
   Node* clone_skeleton_predicate(Node* iff, Node* value, Node* predicate, Node* uncommon_proj,
-                                  Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj);
+                                 Node* current_proj, IdealLoopTree* outer_loop, Node* prev_proj);
   bool skeleton_predicate_has_opaque(IfNode* iff);
-  void update_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con);
+  void update_main_loop_skeleton_predicates(Node* ctrl, CountedLoopNode* loop_head, Node* init, int stride_con);
   void insert_loop_limit_check(ProjNode* limit_check_proj, Node* cmp_limit, Node* bol);
 
 public:
@@ -1154,13 +1160,13 @@
   void loop_predication_follow_branches(Node *c, IdealLoopTree *loop, float loop_trip_cnt,
                                         PathFrequency& pf, Node_Stack& stack, VectorSet& seen,
                                         Node_List& if_proj_list);
-  ProjNode* insert_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
-                                      ProjNode* proj, ProjNode *predicate_proj,
-                                      ProjNode* upper_bound_proj,
-                                      int scale, Node* offset,
-                                      Node* init, Node* limit, jint stride,
-                                      Node* rng, bool& overflow,
-                                      Deoptimization::DeoptReason reason);
+  ProjNode* insert_initial_skeleton_predicate(IfNode* iff, IdealLoopTree *loop,
+                                              ProjNode* proj, ProjNode *predicate_proj,
+                                              ProjNode* upper_bound_proj,
+                                              int scale, Node* offset,
+                                              Node* init, Node* limit, jint stride,
+                                              Node* rng, bool& overflow,
+                                              Deoptimization::DeoptReason reason);
   Node* add_range_check_predicate(IdealLoopTree* loop, CountedLoopNode* cl,
                                   Node* predicate_proj, int scale_con, Node* offset,
                                   Node* limit, jint stride_con, Node* value);
diff --git a/test/hotspot/jtreg/compiler/loopopts/TestRangeCheckPredicatesControl.java b/test/hotspot/jtreg/compiler/loopopts/TestRangeCheckPredicatesControl.java
new file mode 100644
index 0000000..9f00d60
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/loopopts/TestRangeCheckPredicatesControl.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @requires vm.gc.Z & !vm.graal.enabled
+ * @bug 8237859
+ * @summary A LoadP node has a wrong control input (too early) which results in an out-of-bounds read of an object array with ZGC.
+ *
+ * @run main/othervm -XX:+UnlockExperimentalVMOptions -XX:+UseZGC compiler.loopopts.TestRangeCheckPredicatesControl
+ * @run main/othervm -XX:+UnlockExperimentalVMOptions -XX:+UseZGC -XX:+UnlockDiagnosticVMOptions -XX:+StressGCM compiler.loopopts.TestRangeCheckPredicatesControl
+ */
+
+package compiler.loopopts;
+
+public class TestRangeCheckPredicatesControl {
+    static Wrapper w1 = new Wrapper();
+    static Wrapper w2 = new Wrapper();
+    static Wrapper w3 = new Wrapper();
+
+    public static void main(String[] args) {
+        for (int x = 0; x < 10000000; x++) {
+            test(x % 2 == 0);
+            test2(x % 2 == 0, x % 3 == 0);
+            test3(x % 2 == 0);
+            test4(x % 2 == 0);
+        }
+    }
+
+    private static class Wrapper {
+        long longs;
+        int a;
+        public void maybeMaskBits(boolean b) {
+            if (b) {
+                longs &= 0x1F1F1F1F;
+            }
+        }
+
+        public void maybeMaskBits2(boolean b, boolean c) {
+            if (b) {
+                longs &= 0x1F1F1F1F;
+            }
+            if (c) {
+                a += 344;
+            }
+        }
+    }
+
+    private static void test(boolean flag) {
+        Wrapper[] wrappers_array;
+        if (flag) {
+            wrappers_array = new Wrapper[] {w1, w2};
+        } else {
+            wrappers_array = new Wrapper[] {w1, w2, w3};
+        }
+
+        // This loop is first unswitched and then pre/main/post loops are created for both unswitched loops.
+        // Both loops are unrolled once (two iterations in main loop, one in pre loop). As a result, the main
+        // loop contains an access of wrappers_array[1] and wrappers_array[2]. If 'flag' is false, then the
+        // main loop is not entered. But the load for wrappers_array[2] is wrongly scheduled before deciding
+        // if the main loop is executed or not due to a wrong control input of the corresponding LoadP node.
+        // The LoadP node still has a control input from a loop predicate of the original loop to be unswitched.
+        // As a consequence, the access wrappers_array[2] is executed regardless of the value of 'flag' resulting
+        // in a segfault. This fix addresses the problem of not updating the control inputs for data nodes
+        // from a predicate before the pre loop to the main and post loop. The fix for JDK-8240227 updates the
+        // control dependencies to predicates of the original loop to be unswitched to newly cloned predicates
+        // of the unswitched loops.
+        for (int i = 0; i < wrappers_array.length; i++) {
+            wrappers_array[i].maybeMaskBits(flag);
+        }
+    }
+
+    // This test unswitches two times
+    private static void test2(boolean flag, boolean flag2) {
+        Wrapper[] wrappers_array;
+        Wrapper[] wrappers_array2;
+        if (flag) {
+            wrappers_array = new Wrapper[] {w1, w2};
+            wrappers_array2 = new Wrapper[] {w1, w2};
+        } else {
+            wrappers_array = new Wrapper[] {w1, w2, w3};
+            wrappers_array2 = new Wrapper[] {w1, w2, w3};
+        }
+
+        for (int i = 0; i < wrappers_array.length; i++) {
+            wrappers_array[i].maybeMaskBits(flag);
+            wrappers_array2[i].maybeMaskBits2(flag, flag2);
+        }
+    }
+
+    // Test without unswitching but wrong control dependencies for data nodes to predicates before the pre loop.
+    // There is no update for the data nodes belonging to main and post loop after pre/main/post loops are created.
+    private static void test3(boolean flag) {
+        Wrapper[] wrappers_array;
+        if (flag) {
+            wrappers_array = new Wrapper[] {w1, w2};
+        } else {
+            wrappers_array = new Wrapper[] {w1, w2, w3};
+        }
+
+        for (int i = 0; i < wrappers_array.length; i++) {
+            wrappers_array[i].longs &= 0x1F1F1F1F;
+        }
+    }
+
+    private static void test4(boolean flag) {
+        Wrapper[] wrappers_array;
+        Wrapper[] wrappers_array2;
+        if (flag) {
+            wrappers_array = new Wrapper[] {w1, w2};
+            wrappers_array2 = new Wrapper[] {w1, w2};
+        } else {
+            wrappers_array = new Wrapper[] {w1, w2, w3};
+            wrappers_array2 = new Wrapper[] {w1, w2, w3};
+        }
+
+        for (int i = 0; i < wrappers_array.length; i++) {
+            wrappers_array[i].longs &= 0x1F1F1F1F;
+            wrappers_array2[i].longs &= 0x1F1F1F1F;
+        }
+    }
+}