Add more tests, spin before inflating monitors

This adds some additional tests/benchmarks to 2029-contended-monitors.

Modify the monitor inflation logic minimally to try pure spinning
before resorting to sched_yield.

Based on my testing with 2029-contended-monitors, it would be beneficial
to more aggressively avoid lock inflation. But in the end, I couldn't
figure out how to do that effectively without taking more wall clock
time when we actually inflate a lock. That seems like a questionable
trade-off, so this is the low-risk alternative that simply avoids the
clearly dubious choice to immediately spend a microsecond or so on a
sched_yield call whenever the lock is already held.

This is somehat similar to aosp/666639, but we still try everything
we tried before to avoid inflation. (aosp/666639 with a much higher
value of max_spins would probably be similar. I expect CpuRelax()
there doesn't have much impact either way. We could add it here.)

Bug:140590186
Test: experimentation with 2029-contended-monitors.
Change-Id: I58349cc2c45dd6ea16c67c3c3bffb791274eb99e
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 8190960..a581131 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1102,6 +1102,7 @@
   obj = FakeLock(obj);
   uint32_t thread_id = self->GetThreadId();
   size_t contention_count = 0;
+  constexpr size_t kExtraSpinIters = 100;
   StackHandleScope<1> hs(self);
   Handle<mirror::Object> h_obj(hs.NewHandle(obj));
 #if !ART_USE_FUTEXES
@@ -1166,16 +1167,15 @@
           // Contention.
           contention_count++;
           Runtime* runtime = Runtime::Current();
-          if (contention_count <= runtime->GetMaxSpinsBeforeThinLockInflation()) {
+          if (contention_count
+              <= kExtraSpinIters + runtime->GetMaxSpinsBeforeThinLockInflation()) {
             // TODO: Consider switching the thread state to kWaitingForLockInflation when we are
             // yielding.  Use sched_yield instead of NanoSleep since NanoSleep can wait much longer
             // than the parameter you pass in. This can cause thread suspension to take excessively
             // long and make long pauses. See b/16307460.
-            // TODO: We should literally spin first, without sched_yield. Sched_yield either does
-            // nothing (at significant expense), or guarantees that we wait at least microseconds.
-            // If the owner is running, I would expect the median lock hold time to be hundreds
-            // of nanoseconds or less.
-            sched_yield();
+            if (contention_count > kExtraSpinIters) {
+              sched_yield();
+            }
           } else {
 #if ART_USE_FUTEXES
             contention_count = 0;
diff --git a/test/2029-contended-monitors/expected.txt b/test/2029-contended-monitors/expected.txt
index 1894ad7..4fa00bc 100644
--- a/test/2029-contended-monitors/expected.txt
+++ b/test/2029-contended-monitors/expected.txt
@@ -6,5 +6,10 @@
 Hold time 2000, shared lock
 Hold time 20000, shared lock
 Hold time 200000, shared lock
+Hold time 2, pause time 18, shared lock
+Hold time 20, pause time 180, shared lock
+Hold time 200, pause time 1800, shared lock
+Hold time 2000, pause time 18000, shared lock
+Hold time 20000, pause time 180000, shared lock
 Hold for 2 msecs while sleeping, shared lock
 Hold for 2 msecs while sleeping, private lock
diff --git a/test/2029-contended-monitors/src/Main.java b/test/2029-contended-monitors/src/Main.java
index 78d2ae4..ffe8e73 100644
--- a/test/2029-contended-monitors/src/Main.java
+++ b/test/2029-contended-monitors/src/Main.java
@@ -44,7 +44,7 @@
     }
   }
 
-  // Increment counter by n, holding log for time roughly propertional to n.
+  // Increment counter by n, holding lock for time roughly propertional to n.
   // N must be even.
   private void holdFor(Object lock, int n) {
     synchronized(lock) {
@@ -56,6 +56,20 @@
     }
   }
 
+  // Increment local by an even number n in a way that takes time roughly proportional
+  // to n.
+  private void spinFor(int n) {
+    int y = -1;
+    int local_counter = 0;
+    for (int i = 0; i < n; ++i) {
+      local_counter += y;
+      y = nextInt(y);
+    }
+    if (local_counter != n) {
+      throw new Error();
+    }
+  }
+
   private class RepeatedLockHolder implements Runnable {
     RepeatedLockHolder(boolean shared, int n /* even */) {
       sharedLock = shared;
@@ -73,6 +87,24 @@
     private int holdTime;
   }
 
+  private class RepeatedIntermittentLockHolder implements Runnable {
+    RepeatedIntermittentLockHolder(boolean shared, int n /* even */) {
+      sharedLock = shared;
+      holdTime = n;
+    }
+    @Override
+    public void run() {
+      Object myLock = sharedLock ? lock : new Object();
+      int nIters = TOTAL_ITERS / 10 / currentThreadCount / holdTime;
+      for (int i = 0; i < nIters; ++i) {
+        spinFor(9 * holdTime);
+        holdFor(myLock, holdTime);
+      }
+    }
+    private boolean sharedLock;
+    private int holdTime;
+  }
+
   private class SleepyLockHolder implements Runnable {
     SleepyLockHolder(boolean shared) {
       sharedLock = shared;
@@ -154,11 +186,15 @@
   }
 
   private class CheckCounter implements Runnable {
+    private final int expected;
+    public CheckCounter(int e) {
+      expected = e;
+    }
     @Override
     public void run() {
-      if (counter != TOTAL_ITERS) {
+      if (counter != expected) {
         throw new AssertionError("Failed counter postcondition check for "
-            + currentThreadCount + " threads");
+            + currentThreadCount + " threads, expected " + expected + " got " + counter);
       }
     }
   }
@@ -172,7 +208,14 @@
     for (int i = 2; i <= MAX_HOLD_TIME; i *= 10) {
       // i * 8 (max thread count) divides TOTAL_ITERS
       System.out.println("Hold time " + i + ", shared lock");
-      runAll(new RepeatedLockHolder(true, i), () -> { counter = 0; }, new CheckCounter());
+      runAll(new RepeatedLockHolder(true, i), () -> { counter = 0; },
+          new CheckCounter(TOTAL_ITERS));
+    }
+    for (int i = 2; i <= MAX_HOLD_TIME / 10; i *= 10) {
+      // i * 8 (max thread count) divides TOTAL_ITERS
+      System.out.println("Hold time " + i + ", pause time " + (9 * i) + ", shared lock");
+      runAll(new RepeatedIntermittentLockHolder(true, i), () -> { counter = 0; },
+          new CheckCounter(TOTAL_ITERS / 10));
     }
     if (PRINT_TIMES) {
       for (int i = 2; i <= MAX_HOLD_TIME; i *= 1000) {
@@ -183,7 +226,7 @@
       }
     }
     System.out.println("Hold for 2 msecs while sleeping, shared lock");
-    runAll(new SleepyLockHolder(true), () -> { counter = 0; }, new CheckCounter());
+    runAll(new SleepyLockHolder(true), () -> { counter = 0; }, new CheckCounter(TOTAL_ITERS));
     System.out.println("Hold for 2 msecs while sleeping, private lock");
     runAll(new SleepyLockHolder(false), () -> { counter = 0; }, () -> {});
   }