SF: delay region sampling when short on time

In a number of janky traces, particularly at high
frame rates, we've seen the surfaceflinger thread
overrunning its time slot. In some of those cases,
the surfaceflinger thread is doing region-sampling.
This change causes region-sampling to check how much
time is left until the next vsync before deciding
whether to sample this frame. If low on time,
it will defer the sampling to a later frame.

Bug: 133779857
Test: trace inspection from scrolling in various apps
Change-Id: I92c2368e80033c1ba6e27f947a456d14db02064c
diff --git a/services/surfaceflinger/RegionSamplingThread.cpp b/services/surfaceflinger/RegionSamplingThread.cpp
index 66906e9..4fca63a 100644
--- a/services/surfaceflinger/RegionSamplingThread.cpp
+++ b/services/surfaceflinger/RegionSamplingThread.cpp
@@ -44,11 +44,14 @@
 enum class samplingStep {
     noWorkNeeded,
     idleTimerWaiting,
+    waitForQuietFrame,
     waitForZeroPhase,
     waitForSamplePhase,
     sample
 };
 
+constexpr auto timeForRegionSampling = 5000000ns;
+constexpr auto maxRegionSamplingSkips = 10;
 constexpr auto defaultRegionSamplingOffset = -3ms;
 constexpr auto defaultRegionSamplingPeriod = 100ms;
 constexpr auto defaultRegionSamplingTimerTimeout = 100ms;
@@ -215,9 +218,9 @@
 void RegionSamplingThread::checkForStaleLuma() {
     std::lock_guard lock(mThreadControlMutex);
 
-    if (mDiscardedFrames) {
+    if (mDiscardedFrames > 0) {
         ATRACE_INT(lumaSamplingStepTag, static_cast<int>(samplingStep::waitForZeroPhase));
-        mDiscardedFrames = false;
+        mDiscardedFrames = 0;
         mPhaseCallback->startVsyncListener();
     }
 }
@@ -235,13 +238,25 @@
     auto now = std::chrono::nanoseconds(systemTime(SYSTEM_TIME_MONOTONIC));
     if (lastSampleTime + mTunables.mSamplingPeriod > now) {
         ATRACE_INT(lumaSamplingStepTag, static_cast<int>(samplingStep::idleTimerWaiting));
-        mDiscardedFrames = true;
+        if (mDiscardedFrames == 0) mDiscardedFrames++;
         return;
     }
+    if (mDiscardedFrames < maxRegionSamplingSkips) {
+        // If there is relatively little time left for surfaceflinger
+        // until the next vsync deadline, defer this sampling work
+        // to a later frame, when hopefully there will be more time.
+        DisplayStatInfo stats;
+        mScheduler.getDisplayStatInfo(&stats);
+        if (std::chrono::nanoseconds(stats.vsyncTime) - now < timeForRegionSampling) {
+            ATRACE_INT(lumaSamplingStepTag, static_cast<int>(samplingStep::waitForQuietFrame));
+            mDiscardedFrames++;
+            return;
+        }
+    }
 
     ATRACE_INT(lumaSamplingStepTag, static_cast<int>(samplingStep::sample));
 
-    mDiscardedFrames = false;
+    mDiscardedFrames = 0;
     lastSampleTime = now;
 
     mIdleTimer.reset();
diff --git a/services/surfaceflinger/RegionSamplingThread.h b/services/surfaceflinger/RegionSamplingThread.h
index 3c6fcf3..96ffe20 100644
--- a/services/surfaceflinger/RegionSamplingThread.h
+++ b/services/surfaceflinger/RegionSamplingThread.h
@@ -117,7 +117,7 @@
     std::condition_variable_any mCondition;
     bool mRunning GUARDED_BY(mThreadControlMutex) = true;
     bool mSampleRequested GUARDED_BY(mThreadControlMutex) = false;
-    bool mDiscardedFrames GUARDED_BY(mThreadControlMutex) = false;
+    uint32_t mDiscardedFrames GUARDED_BY(mThreadControlMutex) = 0;
     std::chrono::nanoseconds lastSampleTime GUARDED_BY(mThreadControlMutex);
 
     std::mutex mSamplingMutex;