Increase timeout waiting for ExternalStorageService to bind.
We've observed occasions where it can take a long time for the
system_server main thread to come around to handling the bind of the
service, for example because it's handling long-running jobs:
06-04 08:19:27.544 1000 1532 1532 W Looper : Slow dispatch took
2044ms main h=com.android.server.job.JobSchedulerService$JobHandler
c=null m=1
Delays like this can cause us to fail binding to the external storage
service for a user, and reset the whole storage stack, leading to
unexpected results, especially if we timed out for trying to mount user
10, while user 0 has already been successfully mounted.
Increase the timeout to 20 seconds; this gives us a lot more time to
complete the bind. At the same time, we want to avoid risking holding
the vold lock for too and/or any deadlocks.
Add a WTF for when we do hit the timeout, to get a better grasp on how
common this is.
Bug: 158452122
Test: Boot device
Change-Id: Ia8f6f121448f6b5e484fd3e8cca845fdd89afc23
diff --git a/services/core/java/com/android/server/StorageManagerService.java b/services/core/java/com/android/server/StorageManagerService.java
index 1ce3dfe..cd68fdd 100644
--- a/services/core/java/com/android/server/StorageManagerService.java
+++ b/services/core/java/com/android/server/StorageManagerService.java
@@ -52,7 +52,6 @@
import static com.android.internal.util.XmlUtils.writeIntAttribute;
import static com.android.internal.util.XmlUtils.writeLongAttribute;
import static com.android.internal.util.XmlUtils.writeStringAttribute;
-import static com.android.server.storage.StorageUserConnection.REMOTE_TIMEOUT_SECONDS;
import static org.xmlpull.v1.XmlPullParser.END_DOCUMENT;
import static org.xmlpull.v1.XmlPullParser.START_TAG;
@@ -224,6 +223,9 @@
private static final String ANDROID_VOLD_APP_DATA_ISOLATION_ENABLED_PROPERTY =
"persist.sys.vold_app_data_isolation_enabled";
+ // How long we wait to reset storage, if we failed to call onMount on the
+ // external storage service.
+ public static final int FAILED_MOUNT_RESET_TIMEOUT_SECONDS = 10;
/**
* If {@code 1}, enables the isolated storage feature. If {@code -1},
* disables the isolated storage feature. If {@code 0}, uses the default
@@ -2202,7 +2204,7 @@
} catch (ExternalStorageServiceException e) {
Slog.e(TAG, "Failed to mount volume " + vol, e);
- int nextResetSeconds = REMOTE_TIMEOUT_SECONDS * 2;
+ int nextResetSeconds = FAILED_MOUNT_RESET_TIMEOUT_SECONDS;
Slog.i(TAG, "Scheduling reset in " + nextResetSeconds + "s");
mHandler.removeMessages(H_RESET);
mHandler.sendMessageDelayed(mHandler.obtainMessage(H_RESET),
diff --git a/services/core/java/com/android/server/storage/StorageUserConnection.java b/services/core/java/com/android/server/storage/StorageUserConnection.java
index 94a2502..ed57067 100644
--- a/services/core/java/com/android/server/storage/StorageUserConnection.java
+++ b/services/core/java/com/android/server/storage/StorageUserConnection.java
@@ -62,7 +62,7 @@
public final class StorageUserConnection {
private static final String TAG = "StorageUserConnection";
- public static final int REMOTE_TIMEOUT_SECONDS = 5;
+ public static final int REMOTE_TIMEOUT_SECONDS = 20;
private final Object mLock = new Object();
private final Context mContext;
@@ -202,6 +202,7 @@
try {
if (!latch.await(REMOTE_TIMEOUT_SECONDS, TimeUnit.SECONDS)) {
// TODO(b/140025078): Call ActivityManager ANR API?
+ Slog.wtf(TAG, "Failed to bind to the ExternalStorageService for user " + mUserId);
throw new TimeoutException("Latch wait for " + reason + " elapsed");
}
} catch (InterruptedException e) {