8251925: C2: RenaissanceStressTest fails with assert(!had_error): bad dominance

Reviewed-by: kvn, thartmann
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index a7515d3..5ec83b0 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -69,7 +69,8 @@
   _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
   _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
   _lpt(NULL),                             // loop tree node
-  _lp(NULL),                              // LoopNode
+  _lp(NULL),                              // CountedLoopNode
+  _pre_loop_end(NULL),                    // Pre loop CountedLoopEndNode
   _bb(NULL),                              // basic block
   _iv(NULL),                              // induction var
   _race_possible(false),                  // cases where SDMU is true
@@ -155,10 +156,15 @@
 
   if (cl->is_main_loop()) {
     // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
-    CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
-    if (pre_end == NULL) return;
-    Node *pre_opaq1 = pre_end->limit();
-    if (pre_opaq1->Opcode() != Op_Opaque1) return;
+    CountedLoopEndNode* pre_end = find_pre_loop_end(cl);
+    if (pre_end == NULL) {
+      return;
+    }
+    Node* pre_opaq1 = pre_end->limit();
+    if (pre_opaq1->Opcode() != Op_Opaque1) {
+      return;
+    }
+    set_pre_loop_end(pre_end);
   }
 
   init(); // initialize data structures
@@ -911,8 +917,7 @@
   if (!p.has_iv()) {
     return true;   // no induction variable
   }
-  CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
-  assert(pre_end != NULL, "we must have a correct pre-loop");
+  CountedLoopEndNode* pre_end = pre_loop_end();
   assert(pre_end->stride_is_con(), "pre loop stride is constant");
   int preloop_stride = pre_end->stride_con();
 
@@ -3431,21 +3436,19 @@
 // to align_to_ref will be a position zero in the vector.
 //   (iv + k) mod vector_align == 0
 void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
-  CountedLoopNode *main_head = lp()->as_CountedLoop();
-  assert(main_head->is_main_loop(), "");
-  CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
-  assert(pre_end != NULL, "we must have a correct pre-loop");
-  Node *pre_opaq1 = pre_end->limit();
+  assert(lp()->is_main_loop(), "");
+  CountedLoopEndNode* pre_end = pre_loop_end();
+  Node* pre_opaq1 = pre_end->limit();
   assert(pre_opaq1->Opcode() == Op_Opaque1, "");
-  Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
-  Node *lim0 = pre_opaq->in(1);
+  Opaque1Node* pre_opaq = (Opaque1Node*)pre_opaq1;
+  Node* lim0 = pre_opaq->in(1);
 
   // Where we put new limit calculations
-  Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+  Node* pre_ctrl = pre_loop_head()->in(LoopNode::EntryControl);
 
   // Ensure the original loop limit is available from the
   // pre-loop Opaque1 node.
-  Node *orig_limit = pre_opaq->original_loop_limit();
+  Node* orig_limit = pre_opaq->original_loop_limit();
   assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
 
   SWPointer align_to_ref_p(align_to_ref, this, NULL, false);
@@ -3596,7 +3599,7 @@
 
 //----------------------------get_pre_loop_end---------------------------
 // Find pre loop end from main loop.  Returns null if none.
-CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode* cl) {
+CountedLoopEndNode* SuperWord::find_pre_loop_end(CountedLoopNode* cl) const {
   // The loop cannot be optimized if the graph shape at
   // the loop entry is inappropriate.
   if (!PhaseIdealLoop::is_canonical_loop_entry(cl)) {
@@ -3724,7 +3727,7 @@
   // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
   Node* base = adr->in(AddPNode::Base);
   // The base address should be loop invariant
-  if (!invariant(base)) {
+  if (is_main_loop_member(base)) {
     assert(!valid(), "base address is loop variant");
     return;
   }
@@ -3753,7 +3756,7 @@
       break; // stop looking at addp's
     }
   }
-  if (!invariant(adr)) {
+  if (is_main_loop_member(adr)) {
     assert(!valid(), "adr is loop variant");
     return;
   }
@@ -3783,12 +3786,23 @@
   #endif
 {}
 
+bool SWPointer::is_main_loop_member(Node* n) const {
+  Node* n_c = phase()->get_ctrl(n);
+  return lpt()->is_member(phase()->get_loop(n_c));
+}
 
-bool SWPointer::invariant(Node* n) {
+bool SWPointer::invariant(Node* n) const {
   NOT_PRODUCT(Tracer::Depth dd;)
-  Node *n_c = phase()->get_ctrl(n);
+  Node* n_c = phase()->get_ctrl(n);
   NOT_PRODUCT(_tracer.invariant_1(n, n_c);)
-  return !lpt()->is_member(phase()->get_loop(n_c));
+  bool is_not_member = !is_main_loop_member(n);
+  if (is_not_member && _slp->lp()->is_main_loop()) {
+    // Check that n_c dominates the pre loop head node. If it does not, then we cannot use n as invariant for the pre loop
+    // CountedLoopEndNode check because n_c is either part of the pre loop or between the pre and the main loop (illegal
+    // invariant: Happens, for example, when n_c is a CastII node that prevents data nodes to flow above the main loop).
+    return phase()->is_dominator(n_c, _slp->pre_loop_head());
+  }
+  return is_not_member;
 }
 
 //------------------------scaled_iv_plus_offset--------------------
@@ -3851,7 +3865,7 @@
     NOT_PRODUCT(_tracer.scaled_iv_3(n, _scale);)
     return true;
   }
-  if (_analyze_only && (invariant(n) == false)) {
+  if (_analyze_only && (is_main_loop_member(n))) {
     _nstack->push(n, _stack_idx++);
   }
 
@@ -3937,7 +3951,7 @@
     return false;
   }
 
-  if (_analyze_only && (invariant(n) == false)) {
+  if (_analyze_only && is_main_loop_member(n)) {
     _nstack->push(n, _stack_idx++);
   }
   if (opc == Op_AddI) {
@@ -3970,20 +3984,27 @@
       return true;
     }
   }
-  if (invariant(n)) {
+
+  if (!is_main_loop_member(n)) {
+    // 'n' is loop invariant. Skip range check dependent CastII nodes before checking if 'n' is dominating the pre loop.
     if (opc == Op_ConvI2L) {
       n = n->in(1);
       if (n->Opcode() == Op_CastII &&
           n->as_CastII()->has_range_check()) {
         // Skip range check dependent CastII nodes
-        assert(invariant(n), "sanity");
+        assert(!is_main_loop_member(n), "sanity");
         n = n->in(1);
       }
+
+      // Check if 'n' can really be used as invariant (not in main loop and dominating the pre loop).
+      if (invariant(n)) {
+        _negate_invar = negate;
+        _invar = n;
+        NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)
+        return true;
+      }
     }
-    _negate_invar = negate;
-    _invar = n;
-    NOT_PRODUCT(_tracer.offset_plus_k_10(n, _invar, _negate_invar, _offset);)
-    return true;
+    return false;
   }
 
   NOT_PRODUCT(_tracer.offset_plus_k_11(n);)
@@ -4004,8 +4025,10 @@
 
 //----------------------------tracing------------------------
 #ifndef PRODUCT
-void SWPointer::Tracer::print_depth() {
-  for (int ii = 0; ii<_depth; ++ii) tty->print("  ");
+void SWPointer::Tracer::print_depth() const {
+  for (int ii = 0; ii < _depth; ++ii) {
+    tty->print("  ");
+  }
 }
 
 void SWPointer::Tracer::ctor_1 (Node* mem) {
@@ -4057,7 +4080,7 @@
   }
 }
 
-void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) {
+void SWPointer::Tracer::invariant_1(Node *n, Node *n_c) const {
   if (_slp->do_vector_loop() && _slp->is_debug() && _slp->_lpt->is_member(_slp->_phase->get_loop(n_c)) != (int)_slp->in_bb(n)) {
     int is_member =  _slp->_lpt->is_member(_slp->_phase->get_loop(n_c));
     int in_bb     =  _slp->in_bb(n);
diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp
index 32342a2..b519e51 100644
--- a/src/hotspot/share/opto/superword.hpp
+++ b/src/hotspot/share/opto/superword.hpp
@@ -291,11 +291,11 @@
   void unrolling_analysis(int &local_loop_unroll_factor);
 
   // Accessors for SWPointer
-  PhaseIdealLoop* phase()          { return _phase; }
-  IdealLoopTree* lpt()             { return _lpt; }
-  PhiNode* iv()                    { return _iv; }
+  PhaseIdealLoop* phase() const    { return _phase; }
+  IdealLoopTree* lpt() const       { return _lpt; }
+  PhiNode* iv() const              { return _iv; }
 
-  bool early_return()              { return _early_return; }
+  bool early_return() const        { return _early_return; }
 
 #ifndef PRODUCT
   bool     is_debug()              { return _vector_loop_debug > 0; }
@@ -310,7 +310,8 @@
   bool     do_reserve_copy()       { return _do_reserve_copy; }
  private:
   IdealLoopTree* _lpt;             // Current loop tree node
-  LoopNode*      _lp;              // Current LoopNode
+  CountedLoopNode* _lp;            // Current CountedLoopNode
+  CountedLoopEndNode* _pre_loop_end; // Current CountedLoopEndNode of pre loop
   Node*          _bb;              // Current basic block
   PhiNode*       _iv;              // Induction var
   bool           _race_possible;   // In cases where SDMU is true
@@ -330,14 +331,33 @@
   Arena* arena()                   { return _arena; }
 
   Node* bb()                       { return _bb; }
-  void  set_bb(Node* bb)           { _bb = bb; }
-
+  void set_bb(Node* bb)            { _bb = bb; }
   void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; }
+  CountedLoopNode* lp() const      { return _lp; }
+  void set_lp(CountedLoopNode* lp) {
+    _lp = lp;
+    _iv = lp->as_CountedLoop()->phi()->as_Phi();
+  }
+  int iv_stride() const            { return lp()->stride_con(); }
 
-  LoopNode* lp()                   { return _lp; }
-  void      set_lp(LoopNode* lp)   { _lp = lp;
-                                     _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
-  int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); }
+  CountedLoopNode* pre_loop_head() const {
+    assert(_pre_loop_end != NULL && _pre_loop_end->loopnode() != NULL, "should find head from pre loop end");
+    return _pre_loop_end->loopnode();
+  }
+  void set_pre_loop_end(CountedLoopEndNode* pre_loop_end) {
+    assert(pre_loop_end, "must be valid");
+    _pre_loop_end = pre_loop_end;
+  }
+  CountedLoopEndNode* pre_loop_end() const {
+#ifdef ASSERT
+    assert(_lp != NULL, "sanity");
+    assert(_pre_loop_end != NULL, "should be set when fetched");
+    Node* found_pre_end = find_pre_loop_end(_lp);
+    assert(_pre_loop_end == found_pre_end && _pre_loop_end == pre_loop_head()->loopexit(),
+           "should find the pre loop end and must be the same result");
+#endif
+    return _pre_loop_end;
+  }
 
   int vector_width(Node* n) {
     BasicType bt = velt_basic_type(n);
@@ -531,7 +551,7 @@
   // to align_to_ref will be a position zero in the vector.
   void align_initial_loop_index(MemNode* align_to_ref);
   // Find pre loop end from main loop.  Returns null if none.
-  CountedLoopEndNode* get_pre_loop_end(CountedLoopNode *cl);
+  CountedLoopEndNode* find_pre_loop_end(CountedLoopNode *cl) const;
   // Is the use of d1 in u1 at the same operand position as d2 in u2?
   bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2);
   void init();
@@ -567,11 +587,12 @@
   bool        _analyze_only; // Used in loop unrolling only for swpointer trace
   uint        _stack_idx;    // Used in loop unrolling only for swpointer trace
 
-  PhaseIdealLoop* phase() { return _slp->phase(); }
-  IdealLoopTree*  lpt()   { return _slp->lpt(); }
-  PhiNode*        iv()    { return _slp->iv();  } // Induction var
+  PhaseIdealLoop* phase() const { return _slp->phase(); }
+  IdealLoopTree*  lpt() const   { return _slp->lpt(); }
+  PhiNode*        iv() const    { return _slp->iv();  } // Induction var
 
-  bool invariant(Node* n);
+  bool is_main_loop_member(Node* n) const;
+  bool invariant(Node* n) const;
 
   // Match: k*iv + offset
   bool scaled_iv_plus_offset(Node* n);
@@ -638,7 +659,7 @@
     SuperWord*   _slp;
     static int   _depth;
     int _depth_save;
-    void print_depth();
+    void print_depth() const;
     int  depth() const    { return _depth; }
     void set_depth(int d) { _depth = d; }
     void inc_depth()      { _depth++;}
@@ -664,7 +685,7 @@
     void ctor_5(Node* adr, Node* base,  int i);
     void ctor_6(Node* mem);
 
-    void invariant_1(Node *n, Node *n_c);
+    void invariant_1(Node *n, Node *n_c) const;
 
     void scaled_iv_plus_offset_1(Node* n);
     void scaled_iv_plus_offset_2(Node* n);