7125896: Eliminate nested locks

Nested locks elimination done before lock nodes expansion by looking for outer locks of the same object.

Reviewed-by: never, twisti
diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad
index 6bf396a..3063b2d 100644
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@@ -10274,24 +10274,24 @@
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastLock object box));
 
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Lock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
 
 
-instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{
+instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
   match(Set pcc (FastUnlock object box));
-  effect(KILL scratch, TEMP scratch2);
+  effect(TEMP scratch2, USE_KILL box, KILL scratch);
   ins_cost(100);
 
-  format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
+  format %{ "FASTUNLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
   ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
 %}
diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad
index 076cf3a..ca85f15 100644
--- a/hotspot/src/cpu/x86/vm/x86_32.ad
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad
@@ -13435,20 +13435,20 @@
 // inlined locking and unlocking
 
 
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
+instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
   match( Set cr (FastLock object box) );
-  effect( TEMP tmp, TEMP scr );
+  effect( TEMP tmp, TEMP scr, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
+  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode( Fast_Lock(object,box,tmp,scr) );
   ins_pipe( pipe_slow );
 %}
 
 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
   match( Set cr (FastUnlock object box) );
-  effect( TEMP tmp );
+  effect( TEMP tmp, USE_KILL box );
   ins_cost(300);
-  format %{ "FASTUNLOCK $object, $box, $tmp" %}
+  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
   ins_encode( Fast_Unlock(object,box,tmp) );
   ins_pipe( pipe_slow );
 %}
diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad
index beed258..338db98 100644
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@@ -11511,13 +11511,13 @@
 // inlined locking and unlocking
 
 instruct cmpFastLock(rFlagsReg cr,
-                     rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
+                     rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
 %{
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr);
+  effect(TEMP tmp, TEMP scr, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastlock $object,$box,$tmp,$scr" %}
+  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode(Fast_Lock(object, box, tmp, scr));
   ins_pipe(pipe_slow);
 %}
@@ -11526,10 +11526,10 @@
                        rRegP object, rax_RegP box, rRegP tmp)
 %{
   match(Set cr (FastUnlock object box));
-  effect(TEMP tmp);
+  effect(TEMP tmp, USE_KILL box);
 
   ins_cost(300);
-  format %{ "fastunlock $object, $box, $tmp" %}
+  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
   ins_encode(Fast_Unlock(object, box, tmp));
   ins_pipe(pipe_slow);
 %}
diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.cpp b/hotspot/src/share/vm/ci/ciTypeFlow.cpp
index a5042d9..6678b4c 100644
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp
@@ -1589,7 +1589,7 @@
   _next = NULL;
   _on_work_list = false;
   _backedge_copy = false;
-  _exception_entry = false;
+  _has_monitorenter = false;
   _trap_bci = -1;
   _trap_index = 0;
   df_init();
@@ -2182,6 +2182,10 @@
         !head->is_clonable_exit(lp))
       continue;
 
+    // Avoid BoxLock merge.
+    if (EliminateNestedLocks && head->has_monitorenter())
+      continue;
+
     // check not already cloned
     if (head->backedge_copy_count() != 0)
       continue;
@@ -2322,6 +2326,10 @@
     // Watch for bailouts.
     if (failing())  return;
 
+    if (str.cur_bc() == Bytecodes::_monitorenter) {
+      block->set_has_monitorenter();
+    }
+
     if (res) {
 
       // We have encountered a trap.  Record it in this block.
diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.hpp b/hotspot/src/share/vm/ci/ciTypeFlow.hpp
index 5cc5c90..8a8d241 100644
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp
@@ -544,15 +544,19 @@
     // Has this block been cloned for a loop backedge?
     bool                             _backedge_copy;
 
+    // This block is entry to irreducible loop.
+    bool                             _irreducible_entry;
+
+    // This block has monitor entry point.
+    bool                             _has_monitorenter;
+
     // A pointer used for our internal work list
-    Block*                           _next;
     bool                             _on_work_list;      // on the work list
+    Block*                           _next;
     Block*                           _rpo_next;          // Reverse post order list
 
     // Loop info
     Loop*                            _loop;              // nearest loop
-    bool                             _irreducible_entry; // entry to irreducible loop
-    bool                             _exception_entry;   // entry to exception handler
 
     ciBlock*     ciblock() const     { return _ciblock; }
     StateVector* state() const     { return _state; }
@@ -689,6 +693,8 @@
     bool   is_loop_head() const          { return _loop && _loop->head() == this; }
     void   set_irreducible_entry(bool c) { _irreducible_entry = c; }
     bool   is_irreducible_entry() const  { return _irreducible_entry; }
+    void   set_has_monitorenter()        { _has_monitorenter = true; }
+    bool   has_monitorenter() const      { return _has_monitorenter; }
     bool   is_visited() const            { return has_pre_order(); }
     bool   is_post_visited() const       { return has_post_order(); }
     bool   is_clonable_exit(Loop* lp);
diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp
index e7a0fff..f73dcbd 100644
--- a/hotspot/src/share/vm/opto/c2_globals.hpp
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp
@@ -426,6 +426,9 @@
   product(bool, EliminateLocks, true,                                       \
           "Coarsen locks when possible")                                    \
                                                                             \
+  product(bool, EliminateNestedLocks, true,                                 \
+          "Eliminate nested locks of the same object when possible")        \
+                                                                            \
   notproduct(bool, PrintLockStatistics, false,                              \
           "Print precise statistics on the dynamic lock usage")             \
                                                                             \
diff --git a/hotspot/src/share/vm/opto/callnode.cpp b/hotspot/src/share/vm/opto/callnode.cpp
index 58d3e37..8c6c5ec 100644
--- a/hotspot/src/share/vm/opto/callnode.cpp
+++ b/hotspot/src/share/vm/opto/callnode.cpp
@@ -400,10 +400,10 @@
       Node *box = mcall->monitor_box(this, i);
       Node *obj = mcall->monitor_obj(this, i);
       if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
-        while( !box->is_BoxLock() )  box = box->in(1);
+        box = BoxLockNode::box_node(box);
         format_helper( regalloc, st, box, "MON-BOX[", i, &scobjs );
       } else {
-        OptoReg::Name box_reg = BoxLockNode::stack_slot(box);
+        OptoReg::Name box_reg = BoxLockNode::reg(box);
         st->print(" MON-BOX%d=%s+%d",
                    i,
                    OptoReg::regname(OptoReg::c_frame_pointer),
@@ -411,8 +411,7 @@
       }
       const char* obj_msg = "MON-OBJ[";
       if (EliminateLocks) {
-        while( !box->is_BoxLock() )  box = box->in(1);
-        if (box->as_BoxLock()->is_eliminated())
+        if (BoxLockNode::box_node(box)->is_eliminated())
           obj_msg = "MON-OBJ(LOCK ELIMINATED)[";
       }
       format_helper( regalloc, st, obj, obj_msg, i, &scobjs );
@@ -1388,7 +1387,8 @@
     if (n != NULL && n->is_Unlock()) {
       UnlockNode *unlock = n->as_Unlock();
       if ((lock->obj_node() == unlock->obj_node()) &&
-          (lock->box_node() == unlock->box_node()) && !unlock->is_eliminated()) {
+          BoxLockNode::same_slot(lock->box_node(), unlock->box_node()) &&
+          !unlock->is_eliminated()) {
         lock_ops.append(unlock);
         return true;
       }
@@ -1432,7 +1432,7 @@
   if (ctrl->is_Lock()) {
     LockNode *lock = ctrl->as_Lock();
     if ((lock->obj_node() == unlock->obj_node()) &&
-            (lock->box_node() == unlock->box_node())) {
+        BoxLockNode::same_slot(lock->box_node(), unlock->box_node())) {
       lock_result = lock;
     }
   }
@@ -1463,7 +1463,8 @@
       if (lock1_node != NULL && lock1_node->is_Lock()) {
         LockNode *lock1 = lock1_node->as_Lock();
         if ((lock->obj_node() == lock1->obj_node()) &&
-            (lock->box_node() == lock1->box_node()) && !lock1->is_eliminated()) {
+            BoxLockNode::same_slot(lock->box_node(), lock1->box_node()) &&
+            !lock1->is_eliminated()) {
           lock_ops.append(lock1);
           return true;
         }
@@ -1507,19 +1508,16 @@
 void AbstractLockNode::create_lock_counter(JVMState* state) {
   _counter = OptoRuntime::new_named_counter(state, NamedCounter::LockCounter);
 }
-#endif
 
-void AbstractLockNode::set_eliminated() {
-  _eliminate = true;
-#ifndef PRODUCT
+void AbstractLockNode::set_eliminated_lock_counter() {
   if (_counter) {
     // Update the counter to indicate that this lock was eliminated.
     // The counter update code will stay around even though the
     // optimizer will eliminate the lock operation itself.
     _counter->set_tag(NamedCounter::EliminatedLockCounter);
   }
-#endif
 }
+#endif
 
 //=============================================================================
 Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
@@ -1535,7 +1533,7 @@
   // prevents macro expansion from expanding the lock.  Since we don't
   // modify the graph, the value returned from this function is the
   // one computed above.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are locking an unescaped object, the lock/unlock is unnecessary
     //
@@ -1544,16 +1542,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
       return result;
     }
 
@@ -1613,8 +1606,7 @@
         for (int i = 0; i < lock_ops.length(); i++) {
           AbstractLockNode* lock = lock_ops.at(i);
 
-          // Mark it eliminated to update any counters
-          lock->set_eliminated();
+          // Mark it eliminated by coarsening and update any counters
           lock->set_coarsened();
         }
       } else if (ctrl->is_Region() &&
@@ -1632,6 +1624,41 @@
 }
 
 //=============================================================================
+bool LockNode::is_nested_lock_region() {
+  Node* box = box_node();
+  if (!box->is_BoxLock() || box->as_BoxLock()->stack_slot() <= 0)
+    return false; // External lock or it is not Box (Phi node).
+
+  // Ignore complex cases: merged locks or multiple locks.
+  BoxLockNode* box_lock = box->as_BoxLock();
+  Node* obj = obj_node();
+  LockNode* unique_lock = NULL;
+  if (!box_lock->is_simple_lock_region(&unique_lock, obj) ||
+      (unique_lock != this)) {
+    return false;
+  }
+
+  // Look for external lock for the same object.
+  int stk_slot = box_lock->stack_slot();
+  SafePointNode* sfn = this->as_SafePoint();
+  JVMState* youngest_jvms = sfn->jvms();
+  int max_depth = youngest_jvms->depth();
+  for (int depth = 1; depth <= max_depth; depth++) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    int num_mon  = jvms->nof_monitors();
+    // Loop over monitors
+    for (int idx = 0; idx < num_mon; idx++) {
+      Node* obj_node = sfn->monitor_obj(jvms, idx);
+      BoxLockNode* box_node = BoxLockNode::box_node(sfn->monitor_box(jvms, idx));
+      if ((obj_node == obj) && (box_node->stack_slot() < stk_slot)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+//=============================================================================
 uint UnlockNode::size_of() const { return sizeof(*this); }
 
 //=============================================================================
@@ -1649,7 +1676,7 @@
   // modify the graph, the value returned from this function is the
   // one computed above.
   // Escape state is defined after Parse phase.
-  if (can_reshape && EliminateLocks && (!is_eliminated() || is_coarsened())) {
+  if (can_reshape && EliminateLocks && !is_non_esc_obj()) {
     //
     // If we are unlocking an unescaped object, the lock/unlock is unnecessary.
     //
@@ -1658,16 +1685,11 @@
     if (cgr != NULL)
       es = cgr->escape_state(obj_node());
     if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-      if (!is_eliminated()) {
-        // Mark it eliminated to update any counters
-        this->set_eliminated();
-      } else {
-        assert(is_coarsened(), "sanity");
-        // The lock could be marked eliminated by lock coarsening
-        // code during first IGVN before EA. Clear coarsened flag
-        // to eliminate all associated locks/unlocks.
-        this->clear_coarsened();
-      }
+      assert(!is_eliminated() || is_coarsened(), "sanity");
+      // The lock could be marked eliminated by lock coarsening
+      // code during first IGVN before EA. Replace coarsened flag
+      // to eliminate all associated locks/unlocks.
+      this->set_non_esc_obj();
     }
   }
   return result;
diff --git a/hotspot/src/share/vm/opto/callnode.hpp b/hotspot/src/share/vm/opto/callnode.hpp
index 565d5d8..fa9af0c 100644
--- a/hotspot/src/share/vm/opto/callnode.hpp
+++ b/hotspot/src/share/vm/opto/callnode.hpp
@@ -840,8 +840,12 @@
 //------------------------------AbstractLockNode-----------------------------------
 class AbstractLockNode: public CallNode {
 private:
-  bool _eliminate;    // indicates this lock can be safely eliminated
-  bool _coarsened;    // indicates this lock was coarsened
+  enum {
+    Regular = 0,  // Normal lock
+    NonEscObj,    // Lock is used for non escaping object
+    Coarsened,    // Lock was coarsened
+    Nested        // Nested lock
+  } _kind;
 #ifndef PRODUCT
   NamedCounter* _counter;
 #endif
@@ -858,12 +862,13 @@
                                GrowableArray<AbstractLockNode*> &lock_ops);
   LockNode *find_matching_lock(UnlockNode* unlock);
 
+  // Update the counter to indicate that this lock was eliminated.
+  void set_eliminated_lock_counter() PRODUCT_RETURN;
 
 public:
   AbstractLockNode(const TypeFunc *tf)
     : CallNode(tf, NULL, TypeRawPtr::BOTTOM),
-      _coarsened(false),
-      _eliminate(false)
+      _kind(Regular)
   {
 #ifndef PRODUCT
     _counter = NULL;
@@ -873,20 +878,23 @@
   Node *   obj_node() const       {return in(TypeFunc::Parms + 0); }
   Node *   box_node() const       {return in(TypeFunc::Parms + 1); }
   Node *   fastlock_node() const  {return in(TypeFunc::Parms + 2); }
+  void     set_box_node(Node* box) { set_req(TypeFunc::Parms + 1, box); }
+
   const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
 
   virtual uint size_of() const { return sizeof(*this); }
 
-  bool is_eliminated()         {return _eliminate; }
-  // mark node as eliminated and update the counter if there is one
-  void set_eliminated();
+  bool is_eliminated()  const { return (_kind != Regular); }
+  bool is_non_esc_obj() const { return (_kind == NonEscObj); }
+  bool is_coarsened()   const { return (_kind == Coarsened); }
+  bool is_nested()      const { return (_kind == Nested); }
 
-  bool is_coarsened()  { return _coarsened; }
-  void set_coarsened() { _coarsened = true; }
-  void clear_coarsened() { _coarsened = false; }
+  void set_non_esc_obj() { _kind = NonEscObj; set_eliminated_lock_counter(); }
+  void set_coarsened()   { _kind = Coarsened; set_eliminated_lock_counter(); }
+  void set_nested()      { _kind = Nested; set_eliminated_lock_counter(); }
 
   // locking does not modify its arguments
-  virtual bool        may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
+  virtual bool may_modify(const TypePtr *addr_t, PhaseTransform *phase){ return false;}
 
 #ifndef PRODUCT
   void create_lock_counter(JVMState* s);
@@ -936,6 +944,8 @@
   virtual void  clone_jvms() {
     set_jvms(jvms()->clone_deep(Compile::current()));
   }
+
+  bool is_nested_lock_region(); // Is this Lock nested?
 };
 
 //------------------------------Unlock---------------------------------------
diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
index a874d80..514a7ac 100644
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@@ -1842,20 +1842,15 @@
       Node *n = C->macro_node(i);
       if (n->is_AbstractLock()) { // Lock and Unlock nodes
         AbstractLockNode* alock = n->as_AbstractLock();
-        if (!alock->is_eliminated() || alock->is_coarsened()) {
+        if (!alock->is_non_esc_obj()) {
           PointsToNode::EscapeState es = escape_state(alock->obj_node());
           assert(es != PointsToNode::UnknownEscape, "should know");
           if (es != PointsToNode::UnknownEscape && es != PointsToNode::GlobalEscape) {
-            if (!alock->is_eliminated()) {
-              // Mark it eliminated to update any counters
-              alock->set_eliminated();
-            } else {
-              // The lock could be marked eliminated by lock coarsening
-              // code during first IGVN before EA. Clear coarsened flag
-              // to eliminate all associated locks/unlocks and relock
-              // during deoptimization.
-              alock->clear_coarsened();
-            }
+            assert(!alock->is_eliminated() || alock->is_coarsened(), "sanity");
+            // The lock could be marked eliminated by lock coarsening
+            // code during first IGVN before EA. Replace coarsened flag
+            // to eliminate all associated locks/unlocks.
+            alock->set_non_esc_obj();
           }
         }
       }
diff --git a/hotspot/src/share/vm/opto/locknode.cpp b/hotspot/src/share/vm/opto/locknode.cpp
index 23a594e..14e056f 100644
--- a/hotspot/src/share/vm/opto/locknode.cpp
+++ b/hotspot/src/share/vm/opto/locknode.cpp
@@ -49,18 +49,22 @@
 
 //-----------------------------hash--------------------------------------------
 uint BoxLockNode::hash() const {
+  if (EliminateNestedLocks)
+    return NO_HASH; // Each locked region has own BoxLock node
   return Node::hash() + _slot + (_is_eliminated ? Compile::current()->fixed_slots() : 0);
 }
 
 //------------------------------cmp--------------------------------------------
 uint BoxLockNode::cmp( const Node &n ) const {
+  if (EliminateNestedLocks)
+    return (&n == this); // Always fail except on self
   const BoxLockNode &bn = (const BoxLockNode &)n;
   return bn._slot == _slot && bn._is_eliminated == _is_eliminated;
 }
 
-OptoReg::Name BoxLockNode::stack_slot(Node* box_node) {
+BoxLockNode* BoxLockNode::box_node(Node* box) {
   // Chase down the BoxNode
-  while (!box_node->is_BoxLock()) {
+  while (!box->is_BoxLock()) {
     //    if (box_node->is_SpillCopy()) {
     //      Node *m = box_node->in(1);
     //      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_StoreP) {
@@ -68,10 +72,80 @@
     //        continue;
     //      }
     //    }
-    assert(box_node->is_SpillCopy() || box_node->is_Phi(), "Bad spill of Lock.");
-    box_node = box_node->in(1);
+    assert(box->is_SpillCopy() || box->is_Phi(), "Bad spill of Lock.");
+    // Only BoxLock nodes with the same stack slot are merged.
+    // So it is enough to trace one path to find the slot value.
+    box = box->in(1);
   }
-  return box_node->in_RegMask(0).find_first_elem();
+  return box->as_BoxLock();
+}
+
+OptoReg::Name BoxLockNode::reg(Node* box) {
+  return box_node(box)->in_RegMask(0).find_first_elem();
+}
+
+bool BoxLockNode::same_slot(Node* box1, Node* box2) {
+  return box_node(box1)->_slot == box_node(box2)->_slot;
+}
+
+// Is BoxLock node used for one simple lock region (same box and obj)?
+bool BoxLockNode::is_simple_lock_region(LockNode** unique_lock, Node* obj) {
+  LockNode* lock = NULL;
+  bool has_one_lock = false;
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    if (n->is_Phi())
+      return false; // Merged regions
+    if (n->is_AbstractLock()) {
+      AbstractLockNode* alock = n->as_AbstractLock();
+      // Check lock's box since box could be referenced by Lock's debug info.
+      if (alock->box_node() == this) {
+        if (alock->obj_node() == obj) {
+          if ((unique_lock != NULL) && alock->is_Lock()) {
+            if (lock == NULL) {
+              lock = alock->as_Lock();
+              has_one_lock = true;
+            } else if (lock != alock->as_Lock()) {
+              has_one_lock = false;
+            }
+          }
+        } else {
+          return false; // Different objects
+        }
+      }
+    }
+  }
+#ifdef ASSERT
+  // Verify that FastLock and Safepoint reference only this lock region.
+  for (uint i = 0; i < this->outcnt(); i++) {
+    Node* n = this->raw_out(i);
+    if (n->is_FastLock()) {
+      FastLockNode* flock = n->as_FastLock();
+      assert((flock->box_node() == this) && (flock->obj_node() == obj),"");
+    }
+    if (n->is_SafePoint() && n->as_SafePoint()->jvms()) {
+      SafePointNode* sfn = n->as_SafePoint();
+      JVMState* youngest_jvms = sfn->jvms();
+      int max_depth = youngest_jvms->depth();
+      for (int depth = 1; depth <= max_depth; depth++) {
+        JVMState* jvms = youngest_jvms->of_depth(depth);
+        int num_mon  = jvms->nof_monitors();
+        // Loop over monitors
+        for (int idx = 0; idx < num_mon; idx++) {
+          Node* obj_node = sfn->monitor_obj(jvms, idx);
+          Node* box_node = sfn->monitor_box(jvms, idx);
+          if (box_node == this) {
+            assert(obj_node == obj,"");
+          }
+        }
+      }
+    }
+  }
+#endif
+  if (unique_lock != NULL && has_one_lock) {
+    *unique_lock = lock;
+  }
+  return true;
 }
 
 //=============================================================================
diff --git a/hotspot/src/share/vm/opto/locknode.hpp b/hotspot/src/share/vm/opto/locknode.hpp
index 05630e2..b362537 100644
--- a/hotspot/src/share/vm/opto/locknode.hpp
+++ b/hotspot/src/share/vm/opto/locknode.hpp
@@ -49,11 +49,11 @@
 
 //------------------------------BoxLockNode------------------------------------
 class BoxLockNode : public Node {
-public:
   const int _slot;
   RegMask   _inmask;
   bool _is_eliminated;    // indicates this lock was safely eliminated
 
+public:
   BoxLockNode( int lock );
   virtual int Opcode() const;
   virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
@@ -66,11 +66,17 @@
   virtual const class Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
   virtual uint ideal_reg() const { return Op_RegP; }
 
-  static OptoReg::Name stack_slot(Node* box_node);
+  static OptoReg::Name reg(Node* box_node);
+  static BoxLockNode* box_node(Node* box_node);
+  static bool same_slot(Node* box1, Node* box2);
+  int stack_slot() const { return _slot; }
 
-  bool is_eliminated()  { return _is_eliminated; }
+  bool is_eliminated() const { return _is_eliminated; }
   // mark lock as eliminated.
-  void set_eliminated() { _is_eliminated = true; }
+  void set_eliminated()      { _is_eliminated = true; }
+
+  // Is BoxLock node used for one simple lock region?
+  bool is_simple_lock_region(LockNode** unique_lock, Node* obj);
 
 #ifndef PRODUCT
   virtual void format( PhaseRegAlloc *, outputStream *st ) const;
@@ -91,6 +97,7 @@
   }
   Node* obj_node() const { return in(1); }
   Node* box_node() const { return in(2); }
+  void  set_box_node(Node* box) { set_req(2, box); }
 
   // FastLock and FastUnlockNode do not hash, we need one for each correspoding
   // LockNode/UnLockNode to avoid creating Phi's.
diff --git a/hotspot/src/share/vm/opto/macro.cpp b/hotspot/src/share/vm/opto/macro.cpp
index 97c5f11..594bd0a 100644
--- a/hotspot/src/share/vm/opto/macro.cpp
+++ b/hotspot/src/share/vm/opto/macro.cpp
@@ -1789,7 +1789,8 @@
                          slow_call_address);
 }
 
-//-----------------------mark_eliminated_locking_nodes-----------------------
+//-------------------mark_eliminated_box----------------------------------
+//
 // During EA obj may point to several objects but after few ideal graph
 // transformations (CCP) it may point to only one non escaping object
 // (but still using phi), corresponding locks and unlocks will be marked
@@ -1800,62 +1801,145 @@
 // marked for elimination since new obj has no escape information.
 // Mark all associated (same box and obj) lock and unlock nodes for
 // elimination if some of them marked already.
-void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
-  if (!alock->is_eliminated()) {
+void PhaseMacroExpand::mark_eliminated_box(Node* oldbox, Node* obj) {
+  if (oldbox->is_BoxLock() && oldbox->as_BoxLock()->is_eliminated())
+    return;
+
+  if (oldbox->is_BoxLock() &&
+      oldbox->as_BoxLock()->is_simple_lock_region(NULL, obj)) {
+    // Box is used only in one lock region. Mark this box as eliminated.
+    _igvn.hash_delete(oldbox);
+    oldbox->as_BoxLock()->set_eliminated(); // This changes box's hash value
+    _igvn.hash_insert(oldbox);
+
+    for (uint i = 0; i < oldbox->outcnt(); i++) {
+      Node* u = oldbox->raw_out(i);
+      if (u->is_AbstractLock() && !u->as_AbstractLock()->is_non_esc_obj()) {
+        AbstractLockNode* alock = u->as_AbstractLock();
+        // Check lock's box since box could be referenced by Lock's debug info.
+        if (alock->box_node() == oldbox) {
+          assert(alock->obj_node() == obj, "");
+          // Mark eliminated all related locks and unlocks.
+          alock->set_non_esc_obj();
+        }
+      }
+    }
     return;
   }
-  if (!alock->is_coarsened()) { // Eliminated by EA
-      // Create new "eliminated" BoxLock node and use it
-      // in monitor debug info for the same object.
-      BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
-      Node* obj = alock->obj_node();
-      if (!oldbox->is_eliminated()) {
-        BoxLockNode* newbox = oldbox->clone()->as_BoxLock();
+
+  // Create new "eliminated" BoxLock node and use it in monitor debug info
+  // instead of oldbox for the same object.
+  BoxLockNode* box = BoxLockNode::box_node(oldbox);
+  BoxLockNode* newbox = box->clone()->as_BoxLock();
+
+  // Note: BoxLock node is marked eliminated only here and it is used
+  // to indicate that all associated lock and unlock nodes are marked
+  // for elimination.
+  newbox->set_eliminated();
+  transform_later(newbox);
+
+  // Replace old box node with new box for all users of the same object.
+  for (uint i = 0; i < oldbox->outcnt();) {
+    bool next_edge = true;
+
+    Node* u = oldbox->raw_out(i);
+    if (u->is_AbstractLock()) {
+      AbstractLockNode* alock = u->as_AbstractLock();
+      if (alock->obj_node() == obj && alock->box_node() == oldbox) {
+        // Replace Box and mark eliminated all related locks and unlocks.
+        alock->set_non_esc_obj();
+        _igvn.hash_delete(alock);
+        alock->set_box_node(newbox);
+        _igvn._worklist.push(alock);
+        next_edge = false;
+      }
+    }
+    if (u->is_FastLock() && u->as_FastLock()->obj_node() == obj) {
+      FastLockNode* flock = u->as_FastLock();
+      assert(flock->box_node() == oldbox, "sanity");
+      _igvn.hash_delete(flock);
+      flock->set_box_node(newbox);
+      _igvn._worklist.push(flock);
+      next_edge = false;
+    }
+
+    // Replace old box in monitor debug info.
+    if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
+      SafePointNode* sfn = u->as_SafePoint();
+      JVMState* youngest_jvms = sfn->jvms();
+      int max_depth = youngest_jvms->depth();
+      for (int depth = 1; depth <= max_depth; depth++) {
+        JVMState* jvms = youngest_jvms->of_depth(depth);
+        int num_mon  = jvms->nof_monitors();
+        // Loop over monitors
+        for (int idx = 0; idx < num_mon; idx++) {
+          Node* obj_node = sfn->monitor_obj(jvms, idx);
+          Node* box_node = sfn->monitor_box(jvms, idx);
+          if (box_node == oldbox && obj_node == obj) {
+            int j = jvms->monitor_box_offset(idx);
+            _igvn.hash_delete(u);
+            u->set_req(j, newbox);
+            _igvn._worklist.push(u);
+            next_edge = false;
+          }
+        }
+      }
+    }
+    if (next_edge) i++;
+  }
+}
+
+//-----------------------mark_eliminated_locking_nodes-----------------------
+void PhaseMacroExpand::mark_eliminated_locking_nodes(AbstractLockNode *alock) {
+  if (EliminateNestedLocks) {
+    if (alock->is_nested()) {
+       assert(alock->box_node()->as_BoxLock()->is_eliminated(), "sanity");
+       return;
+    } else if (!alock->is_non_esc_obj()) { // Not eliminated or coarsened
+      // Only Lock node has JVMState needed here.
+      if (alock->jvms() != NULL && alock->as_Lock()->is_nested_lock_region()) {
+        // Mark eliminated related nested locks and unlocks.
+        Node* obj = alock->obj_node();
+        BoxLockNode* box_node = alock->box_node()->as_BoxLock();
+        assert(!box_node->is_eliminated(), "should not be marked yet");
         // Note: BoxLock node is marked eliminated only here
         // and it is used to indicate that all associated lock
         // and unlock nodes are marked for elimination.
-        newbox->set_eliminated();
-        transform_later(newbox);
-        // Replace old box node with new box for all users
-        // of the same object.
-        for (uint i = 0; i < oldbox->outcnt();) {
-
-          bool next_edge = true;
-          Node* u = oldbox->raw_out(i);
-          if (u->is_AbstractLock() &&
-              u->as_AbstractLock()->obj_node() == obj &&
-              u->as_AbstractLock()->box_node() == oldbox) {
-            // Mark all associated locks and unlocks.
-            u->as_AbstractLock()->set_eliminated();
-            _igvn.hash_delete(u);
-            u->set_req(TypeFunc::Parms + 1, newbox);
-            next_edge = false;
+        box_node->set_eliminated(); // Box's hash is always NO_HASH here
+        for (uint i = 0; i < box_node->outcnt(); i++) {
+          Node* u = box_node->raw_out(i);
+          if (u->is_AbstractLock()) {
+            alock = u->as_AbstractLock();
+            if (alock->box_node() == box_node) {
+              // Verify that this Box is referenced only by related locks.
+              assert(alock->obj_node() == obj, "");
+              // Mark all related locks and unlocks.
+              alock->set_nested();
+            }
           }
-          // Replace old box in monitor debug info.
-          if (u->is_SafePoint() && u->as_SafePoint()->jvms()) {
-            SafePointNode* sfn = u->as_SafePoint();
-            JVMState* youngest_jvms = sfn->jvms();
-            int max_depth = youngest_jvms->depth();
-            for (int depth = 1; depth <= max_depth; depth++) {
-              JVMState* jvms = youngest_jvms->of_depth(depth);
-              int num_mon  = jvms->nof_monitors();
-              // Loop over monitors
-              for (int idx = 0; idx < num_mon; idx++) {
-                Node* obj_node = sfn->monitor_obj(jvms, idx);
-                Node* box_node = sfn->monitor_box(jvms, idx);
-                if (box_node == oldbox && obj_node == obj) {
-                  int j = jvms->monitor_box_offset(idx);
-                  _igvn.hash_delete(u);
-                  u->set_req(j, newbox);
-                  next_edge = false;
-                }
-              } // for (int idx = 0;
-            } // for (int depth = 1;
-          } // if (u->is_SafePoint()
-          if (next_edge) i++;
-        } // for (uint i = 0; i < oldbox->outcnt();)
-      } // if (!oldbox->is_eliminated())
-  } // if (!alock->is_coarsened())
+        }
+      }
+      return;
+    }
+    // Process locks for non escaping object
+    assert(alock->is_non_esc_obj(), "");
+  } // EliminateNestedLocks
+
+  if (alock->is_non_esc_obj()) { // Lock is used for non escaping object
+    // Look for all locks of this object and mark them and
+    // corresponding BoxLock nodes as eliminated.
+    Node* obj = alock->obj_node();
+    for (uint j = 0; j < obj->outcnt(); j++) {
+      Node* o = obj->raw_out(j);
+      if (o->is_AbstractLock() && o->as_AbstractLock()->obj_node() == obj) {
+        alock = o->as_AbstractLock();
+        Node* box = alock->box_node();
+        // Replace old box node with new eliminated box for all users
+        // of the same object and mark related locks as eliminated.
+        mark_eliminated_box(box, obj);
+      }
+    }
+  }
 }
 
 // we have determined that this lock/unlock can be eliminated, we simply
@@ -1870,7 +1954,7 @@
     return false;
   }
 #ifdef ASSERT
-  if (alock->is_Lock() && !alock->is_coarsened()) {
+  if (!alock->is_coarsened()) {
     // Check that new "eliminated" BoxLock node is created.
     BoxLockNode* oldbox = alock->box_node()->as_BoxLock();
     assert(oldbox->is_eliminated(), "should be done already");
@@ -1962,6 +2046,8 @@
   Node* box = lock->box_node();
   Node* flock = lock->fastlock_node();
 
+  assert(!BoxLockNode::box_node(box)->is_eliminated(), "sanity");
+
   // Make the merge point
   Node *region;
   Node *mem_phi;
@@ -2196,6 +2282,8 @@
   Node* obj = unlock->obj_node();
   Node* box = unlock->box_node();
 
+  assert(!BoxLockNode::box_node(box)->is_eliminated(), "sanity");
+
   // No need for a null check on unlock
 
   // Make the merge point
diff --git a/hotspot/src/share/vm/opto/macro.hpp b/hotspot/src/share/vm/opto/macro.hpp
index 7f0080a..f521e3d 100644
--- a/hotspot/src/share/vm/opto/macro.hpp
+++ b/hotspot/src/share/vm/opto/macro.hpp
@@ -92,6 +92,7 @@
   void process_users_of_allocation(AllocateNode *alloc);
 
   void eliminate_card_mark(Node *cm);
+  void mark_eliminated_box(Node* box, Node* obj);
   void mark_eliminated_locking_nodes(AbstractLockNode *alock);
   bool eliminate_locking_node(AbstractLockNode *alock);
   void expand_lock_node(LockNode *lock);
diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp
index d9317d6..f365353 100644
--- a/hotspot/src/share/vm/opto/output.cpp
+++ b/hotspot/src/share/vm/opto/output.cpp
@@ -924,10 +924,10 @@
         scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding());
       }
 
-      OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
+      OptoReg::Name box_reg = BoxLockNode::reg(box_node);
       Location basic_lock = Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg));
-      while( !box_node->is_BoxLock() )  box_node = box_node->in(1);
-      monarray->append(new MonitorValue(scval, basic_lock, box_node->as_BoxLock()->is_eliminated()));
+      bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated());
+      monarray->append(new MonitorValue(scval, basic_lock, eliminated));
     }
 
     // We dump the object pool first, since deoptimization reads it in first.
diff --git a/hotspot/src/share/vm/opto/parse1.cpp b/hotspot/src/share/vm/opto/parse1.cpp
index 30e3f5c..1505123 100644
--- a/hotspot/src/share/vm/opto/parse1.cpp
+++ b/hotspot/src/share/vm/opto/parse1.cpp
@@ -1819,8 +1819,12 @@
   } else if (jvms->is_stk(idx)) {
     t = block()->stack_type_at(idx - jvms->stkoff());
   } else if (jvms->is_mon(idx)) {
-    assert(!jvms->is_monitor_box(idx), "no phis for boxes");
-    t = TypeInstPtr::BOTTOM; // this is sufficient for a lock object
+    if (EliminateNestedLocks && jvms->is_monitor_box(idx)) {
+      // BoxLock nodes are not commoning. Create Phi.
+      t = o->bottom_type(); // TypeRawPtr::BOTTOM
+    } else {
+      t = TypeInstPtr::BOTTOM; // this is sufficient for a lock object
+    }
   } else if ((uint)idx < TypeFunc::Parms) {
     t = o->bottom_type();  // Type::RETURN_ADDRESS or such-like.
   } else {
diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp
index 8a0dc63..45dcf5d 100644
--- a/hotspot/src/share/vm/runtime/arguments.cpp
+++ b/hotspot/src/share/vm/runtime/arguments.cpp
@@ -3160,6 +3160,9 @@
   if (!UseBiasedLocking || EmitSync != 0) {
     UseOptoBiasInlining = false;
   }
+  if (!EliminateLocks) {
+    EliminateNestedLocks = false;
+  }
 #endif
 
   if (PrintAssembly && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp
index 05771c3..44eeee8 100644
--- a/hotspot/src/share/vm/runtime/deoptimization.cpp
+++ b/hotspot/src/share/vm/runtime/deoptimization.cpp
@@ -211,7 +211,7 @@
 #ifdef COMPILER2
   // Reallocate the non-escaping objects and restore their fields. Then
   // relock objects if synchronization on them was eliminated.
-  if (DoEscapeAnalysis) {
+  if (DoEscapeAnalysis || EliminateNestedLocks) {
     if (EliminateAllocations) {
       assert (chunk->at(0)->scope() != NULL,"expect only compiled java frames");
       GrowableArray<ScopeValue*>* objects = chunk->at(0)->scope()->objects();